From db48ea754a5c210c63c0c26e87385fc544987dc1 Mon Sep 17 00:00:00 2001 From: Kim Heejeong Date: Thu, 22 Oct 2020 20:18:56 +0900 Subject: [PATCH] [RAMEN9610-21777]drivers: gpu: initial code for b_r26p0-01eac0 Change-Id: I97667f2c1896eb0f14d0fa6172f4c996f0ff05cb Signed-off-by: Kim Heejeong --- drivers/gpu/arm/b_r26p0/Kbuild | 222 + drivers/gpu/arm/b_r26p0/Kconfig | 314 ++ drivers/gpu/arm/b_r26p0/Makefile | 38 + drivers/gpu/arm/b_r26p0/Makefile.kbase | 23 + drivers/gpu/arm/b_r26p0/Mconfig | 278 + drivers/gpu/arm/b_r26p0/arbiter/Kbuild | 24 + .../arm/b_r26p0/arbiter/mali_kbase_arbif.c | 175 + .../arm/b_r26p0/arbiter/mali_kbase_arbif.h | 133 + .../b_r26p0/arbiter/mali_kbase_arbiter_defs.h | 95 + .../arbiter/mali_kbase_arbiter_interface.h | 181 + .../b_r26p0/arbiter/mali_kbase_arbiter_pm.c | 645 +++ .../b_r26p0/arbiter/mali_kbase_arbiter_pm.h | 159 + drivers/gpu/arm/b_r26p0/backend/gpu/Kbuild | 66 + .../backend/gpu/mali_kbase_backend_config.h | 31 + .../gpu/mali_kbase_cache_policy_backend.c | 34 + .../gpu/mali_kbase_cache_policy_backend.h | 39 + .../gpu/mali_kbase_clk_rate_trace_mgr.c | 280 + .../gpu/mali_kbase_clk_rate_trace_mgr.h | 155 + .../gpu/mali_kbase_debug_job_fault_backend.c | 164 + .../b_r26p0/backend/gpu/mali_kbase_devfreq.c | 731 +++ .../b_r26p0/backend/gpu/mali_kbase_devfreq.h | 46 + .../backend/gpu/mali_kbase_device_hw.c | 391 ++ .../backend/gpu/mali_kbase_device_internal.h | 127 + .../backend/gpu/mali_kbase_gpuprops_backend.c | 146 + .../backend/gpu/mali_kbase_instr_backend.c | 411 ++ .../backend/gpu/mali_kbase_instr_defs.h | 60 + .../backend/gpu/mali_kbase_instr_internal.h | 44 + .../backend/gpu/mali_kbase_irq_internal.h | 44 + .../backend/gpu/mali_kbase_irq_linux.c | 517 ++ .../b_r26p0/backend/gpu/mali_kbase_jm_as.c | 243 + .../b_r26p0/backend/gpu/mali_kbase_jm_defs.h | 111 + .../b_r26p0/backend/gpu/mali_kbase_jm_hw.c | 1485 +++++ .../backend/gpu/mali_kbase_jm_internal.h | 177 + .../b_r26p0/backend/gpu/mali_kbase_jm_rb.c | 1676 ++++++ .../b_r26p0/backend/gpu/mali_kbase_jm_rb.h | 83 + .../backend/gpu/mali_kbase_js_backend.c | 356 ++ .../backend/gpu/mali_kbase_js_internal.h | 74 + .../backend/gpu/mali_kbase_l2_mmu_config.c | 122 + .../backend/gpu/mali_kbase_l2_mmu_config.h | 55 + .../backend/gpu/mali_kbase_pm_always_on.c | 67 + .../backend/gpu/mali_kbase_pm_always_on.h | 81 + .../backend/gpu/mali_kbase_pm_backend.c | 763 +++ .../b_r26p0/backend/gpu/mali_kbase_pm_ca.c | 112 + .../b_r26p0/backend/gpu/mali_kbase_pm_ca.h | 89 + .../backend/gpu/mali_kbase_pm_ca_devfreq.h | 60 + .../backend/gpu/mali_kbase_pm_coarse_demand.c | 66 + .../backend/gpu/mali_kbase_pm_coarse_demand.h | 69 + .../b_r26p0/backend/gpu/mali_kbase_pm_defs.h | 532 ++ .../backend/gpu/mali_kbase_pm_driver.c | 2269 ++++++++ .../backend/gpu/mali_kbase_pm_internal.h | 710 +++ .../backend/gpu/mali_kbase_pm_l2_states.h | 38 + .../backend/gpu/mali_kbase_pm_metrics.c | 331 ++ .../backend/gpu/mali_kbase_pm_policy.c | 249 + .../backend/gpu/mali_kbase_pm_policy.h | 106 + .../backend/gpu/mali_kbase_pm_shader_states.h | 43 + .../arm/b_r26p0/backend/gpu/mali_kbase_time.c | 77 + drivers/gpu/arm/b_r26p0/build.bp | 183 + .../context/backend/mali_kbase_context_jm.c | 243 + .../arm/b_r26p0/context/mali_kbase_context.c | 338 ++ .../arm/b_r26p0/context/mali_kbase_context.h | 157 + .../context/mali_kbase_context_internal.h | 60 + .../mali_kbase_debug_ktrace_codes_jm.h | 170 + .../backend/mali_kbase_debug_ktrace_defs_jm.h | 80 + .../backend/mali_kbase_debug_ktrace_jm.c | 125 + .../backend/mali_kbase_debug_ktrace_jm.h | 362 ++ .../mali_kbase_debug_linux_ktrace_jm.h | 151 + .../b_r26p0/debug/mali_kbase_debug_ktrace.c | 429 ++ .../b_r26p0/debug/mali_kbase_debug_ktrace.h | 228 + .../debug/mali_kbase_debug_ktrace_codes.h | 160 + .../debug/mali_kbase_debug_ktrace_defs.h | 157 + .../debug/mali_kbase_debug_ktrace_internal.h | 89 + .../debug/mali_kbase_debug_linux_ktrace.h | 99 + .../device/backend/mali_kbase_device_jm.c | 264 + .../arm/b_r26p0/device/mali_kbase_device.c | 434 ++ .../arm/b_r26p0/device/mali_kbase_device.h | 71 + .../device/mali_kbase_device_internal.h | 78 + .../gpu/backend/mali_kbase_gpu_fault_jm.c | 181 + .../gpu/backend/mali_kbase_gpu_regmap_jm.h | 262 + drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu.c | 41 + drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu.h | 31 + .../b_r26p0/gpu/mali_kbase_gpu_coherency.h | 31 + .../arm/b_r26p0/gpu/mali_kbase_gpu_fault.h | 59 + .../gpu/arm/b_r26p0/gpu/mali_kbase_gpu_id.h | 118 + .../arm/b_r26p0/gpu/mali_kbase_gpu_regmap.h | 437 ++ drivers/gpu/arm/b_r26p0/ipa/Kbuild | 28 + drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa.c | 673 +++ drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa.h | 253 + .../arm/b_r26p0/ipa/mali_kbase_ipa_debugfs.c | 322 ++ .../arm/b_r26p0/ipa/mali_kbase_ipa_debugfs.h | 68 + .../arm/b_r26p0/ipa/mali_kbase_ipa_simple.c | 351 ++ .../arm/b_r26p0/ipa/mali_kbase_ipa_simple.h | 45 + .../ipa/mali_kbase_ipa_vinstr_common.c | 346 ++ .../ipa/mali_kbase_ipa_vinstr_common.h | 217 + .../b_r26p0/ipa/mali_kbase_ipa_vinstr_g7x.c | 456 ++ .../gpu/arm/b_r26p0/jm/mali_base_jm_kernel.h | 1076 ++++ .../gpu/arm/b_r26p0/jm/mali_kbase_jm_defs.h | 826 +++ .../gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h | 194 + drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_js.h | 892 +++ .../gpu/arm/b_r26p0/jm/mali_kbase_js_defs.h | 409 ++ .../arm/b_r26p0/mali_base_hwconfig_features.h | 486 ++ .../arm/b_r26p0/mali_base_hwconfig_issues.h | 635 +++ drivers/gpu/arm/b_r26p0/mali_base_kernel.h | 801 +++ drivers/gpu/arm/b_r26p0/mali_base_mem_priv.h | 57 + drivers/gpu/arm/b_r26p0/mali_gpu_mem_trace.h | 73 + drivers/gpu/arm/b_r26p0/mali_kbase.h | 634 +++ .../arm/b_r26p0/mali_kbase_as_fault_debugfs.c | 113 + .../arm/b_r26p0/mali_kbase_as_fault_debugfs.h | 50 + drivers/gpu/arm/b_r26p0/mali_kbase_bits.h | 41 + .../gpu/arm/b_r26p0/mali_kbase_cache_policy.c | 67 + .../gpu/arm/b_r26p0/mali_kbase_cache_policy.h | 50 + drivers/gpu/arm/b_r26p0/mali_kbase_caps.h | 65 + drivers/gpu/arm/b_r26p0/mali_kbase_ccswe.c | 105 + drivers/gpu/arm/b_r26p0/mali_kbase_ccswe.h | 97 + drivers/gpu/arm/b_r26p0/mali_kbase_config.c | 48 + drivers/gpu/arm/b_r26p0/mali_kbase_config.h | 398 ++ .../arm/b_r26p0/mali_kbase_config_defaults.h | 213 + .../gpu/arm/b_r26p0/mali_kbase_core_linux.c | 4763 +++++++++++++++++ .../arm/b_r26p0/mali_kbase_cs_experimental.h | 51 + .../gpu/arm/b_r26p0/mali_kbase_ctx_sched.c | 344 ++ .../gpu/arm/b_r26p0/mali_kbase_ctx_sched.h | 209 + drivers/gpu/arm/b_r26p0/mali_kbase_debug.c | 44 + drivers/gpu/arm/b_r26p0/mali_kbase_debug.h | 169 + .../arm/b_r26p0/mali_kbase_debug_job_fault.c | 566 ++ .../arm/b_r26p0/mali_kbase_debug_job_fault.h | 116 + .../arm/b_r26p0/mali_kbase_debug_mem_view.c | 313 ++ .../arm/b_r26p0/mali_kbase_debug_mem_view.h | 40 + .../arm/b_r26p0/mali_kbase_debugfs_helper.c | 183 + .../arm/b_r26p0/mali_kbase_debugfs_helper.h | 141 + drivers/gpu/arm/b_r26p0/mali_kbase_defs.h | 1844 +++++++ .../arm/b_r26p0/mali_kbase_disjoint_events.c | 81 + .../gpu/arm/b_r26p0/mali_kbase_dma_fence.c | 456 ++ .../gpu/arm/b_r26p0/mali_kbase_dma_fence.h | 136 + .../gpu/arm/b_r26p0/mali_kbase_dummy_job_wa.c | 442 ++ .../gpu/arm/b_r26p0/mali_kbase_dummy_job_wa.h | 45 + drivers/gpu/arm/b_r26p0/mali_kbase_event.c | 322 ++ drivers/gpu/arm/b_r26p0/mali_kbase_fence.c | 214 + drivers/gpu/arm/b_r26p0/mali_kbase_fence.h | 282 + .../gpu/arm/b_r26p0/mali_kbase_fence_defs.h | 71 + drivers/gpu/arm/b_r26p0/mali_kbase_gator.h | 53 + .../b_r26p0/mali_kbase_gpu_memory_debugfs.c | 110 + .../b_r26p0/mali_kbase_gpu_memory_debugfs.h | 42 + drivers/gpu/arm/b_r26p0/mali_kbase_gpuprops.c | 636 +++ drivers/gpu/arm/b_r26p0/mali_kbase_gpuprops.h | 135 + .../arm/b_r26p0/mali_kbase_gpuprops_types.h | 98 + drivers/gpu/arm/b_r26p0/mali_kbase_gwt.c | 269 + drivers/gpu/arm/b_r26p0/mali_kbase_gwt.h | 55 + drivers/gpu/arm/b_r26p0/mali_kbase_hw.c | 425 ++ drivers/gpu/arm/b_r26p0/mali_kbase_hw.h | 70 + .../arm/b_r26p0/mali_kbase_hwaccess_backend.h | 45 + .../arm/b_r26p0/mali_kbase_hwaccess_defs.h | 51 + .../b_r26p0/mali_kbase_hwaccess_gpuprops.h | 87 + .../arm/b_r26p0/mali_kbase_hwaccess_instr.h | 151 + .../gpu/arm/b_r26p0/mali_kbase_hwaccess_jm.h | 302 ++ .../gpu/arm/b_r26p0/mali_kbase_hwaccess_pm.h | 229 + .../arm/b_r26p0/mali_kbase_hwaccess_time.h | 56 + drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt.c | 794 +++ .../b_r26p0/mali_kbase_hwcnt_accumulator.h | 146 + .../arm/b_r26p0/mali_kbase_hwcnt_backend.h | 220 + .../arm/b_r26p0/mali_kbase_hwcnt_backend_jm.c | 707 +++ .../arm/b_r26p0/mali_kbase_hwcnt_backend_jm.h | 61 + .../arm/b_r26p0/mali_kbase_hwcnt_context.h | 119 + .../gpu/arm/b_r26p0/mali_kbase_hwcnt_gpu.c | 786 +++ .../gpu/arm/b_r26p0/mali_kbase_hwcnt_gpu.h | 255 + .../gpu/arm/b_r26p0/mali_kbase_hwcnt_legacy.c | 152 + .../gpu/arm/b_r26p0/mali_kbase_hwcnt_legacy.h | 94 + .../gpu/arm/b_r26p0/mali_kbase_hwcnt_reader.h | 106 + .../gpu/arm/b_r26p0/mali_kbase_hwcnt_types.c | 604 +++ .../gpu/arm/b_r26p0/mali_kbase_hwcnt_types.h | 1142 ++++ .../b_r26p0/mali_kbase_hwcnt_virtualizer.c | 790 +++ .../b_r26p0/mali_kbase_hwcnt_virtualizer.h | 145 + drivers/gpu/arm/b_r26p0/mali_kbase_ioctl.h | 886 +++ drivers/gpu/arm/b_r26p0/mali_kbase_jd.c | 1819 +++++++ .../gpu/arm/b_r26p0/mali_kbase_jd_debugfs.c | 244 + .../gpu/arm/b_r26p0/mali_kbase_jd_debugfs.h | 45 + drivers/gpu/arm/b_r26p0/mali_kbase_jm.c | 151 + drivers/gpu/arm/b_r26p0/mali_kbase_jm.h | 115 + drivers/gpu/arm/b_r26p0/mali_kbase_js.c | 3784 +++++++++++++ drivers/gpu/arm/b_r26p0/mali_kbase_js.h | 40 + .../gpu/arm/b_r26p0/mali_kbase_js_ctx_attr.c | 283 + .../gpu/arm/b_r26p0/mali_kbase_js_ctx_attr.h | 155 + .../gpu/arm/b_r26p0/mali_kbase_kinstr_jm.c | 896 ++++ .../gpu/arm/b_r26p0/mali_kbase_kinstr_jm.h | 283 + .../arm/b_r26p0/mali_kbase_kinstr_jm_reader.h | 70 + drivers/gpu/arm/b_r26p0/mali_kbase_linux.h | 48 + drivers/gpu/arm/b_r26p0/mali_kbase_mem.c | 4582 ++++++++++++++++ drivers/gpu/arm/b_r26p0/mali_kbase_mem.h | 1891 +++++++ .../gpu/arm/b_r26p0/mali_kbase_mem_linux.c | 3072 +++++++++++ .../gpu/arm/b_r26p0/mali_kbase_mem_linux.h | 467 ++ .../gpu/arm/b_r26p0/mali_kbase_mem_lowlevel.h | 166 + drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c | 856 +++ .../arm/b_r26p0/mali_kbase_mem_pool_debugfs.c | 191 + .../arm/b_r26p0/mali_kbase_mem_pool_debugfs.h | 123 + .../arm/b_r26p0/mali_kbase_mem_pool_group.c | 115 + .../arm/b_r26p0/mali_kbase_mem_pool_group.h | 92 + .../b_r26p0/mali_kbase_mem_profile_debugfs.c | 157 + .../b_r26p0/mali_kbase_mem_profile_debugfs.h | 64 + .../mali_kbase_mem_profile_debugfs_buf_size.h | 39 + .../arm/b_r26p0/mali_kbase_mipe_gen_header.h | 217 + .../gpu/arm/b_r26p0/mali_kbase_mipe_proto.h | 127 + .../gpu/arm/b_r26p0/mali_kbase_native_mgm.c | 153 + .../gpu/arm/b_r26p0/mali_kbase_native_mgm.h | 39 + .../arm/b_r26p0/mali_kbase_platform_fake.c | 124 + drivers/gpu/arm/b_r26p0/mali_kbase_pm.c | 284 + drivers/gpu/arm/b_r26p0/mali_kbase_pm.h | 244 + .../b_r26p0/mali_kbase_regs_history_debugfs.c | 136 + .../b_r26p0/mali_kbase_regs_history_debugfs.h | 55 + .../gpu/arm/b_r26p0/mali_kbase_reset_gpu.h | 139 + drivers/gpu/arm/b_r26p0/mali_kbase_smc.c | 91 + drivers/gpu/arm/b_r26p0/mali_kbase_smc.h | 72 + drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c | 1816 +++++++ drivers/gpu/arm/b_r26p0/mali_kbase_strings.c | 28 + drivers/gpu/arm/b_r26p0/mali_kbase_strings.h | 24 + drivers/gpu/arm/b_r26p0/mali_kbase_sync.h | 223 + .../gpu/arm/b_r26p0/mali_kbase_sync_android.c | 542 ++ .../gpu/arm/b_r26p0/mali_kbase_sync_common.c | 49 + .../gpu/arm/b_r26p0/mali_kbase_sync_file.c | 378 ++ .../arm/b_r26p0/mali_kbase_trace_gpu_mem.c | 227 + .../arm/b_r26p0/mali_kbase_trace_gpu_mem.h | 101 + drivers/gpu/arm/b_r26p0/mali_kbase_uku.h | 126 + drivers/gpu/arm/b_r26p0/mali_kbase_utility.h | 55 + drivers/gpu/arm/b_r26p0/mali_kbase_vinstr.c | 1083 ++++ drivers/gpu/arm/b_r26p0/mali_kbase_vinstr.h | 91 + drivers/gpu/arm/b_r26p0/mali_linux_systrace.h | 110 + drivers/gpu/arm/b_r26p0/mali_linux_trace.h | 532 ++ drivers/gpu/arm/b_r26p0/mali_malisw.h | 109 + .../b_r26p0/mali_power_gpu_frequency_trace.c | 27 + .../b_r26p0/mali_power_gpu_frequency_trace.h | 69 + drivers/gpu/arm/b_r26p0/mali_uk.h | 147 + .../b_r26p0/mmu/backend/mali_kbase_mmu_jm.c | 432 ++ drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c | 2249 ++++++++ drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.h | 118 + .../gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw.h | 107 + .../b_r26p0/mmu/mali_kbase_mmu_hw_direct.c | 272 + .../arm/b_r26p0/mmu/mali_kbase_mmu_internal.h | 63 + .../b_r26p0/mmu/mali_kbase_mmu_mode_aarch64.c | 200 + .../b_r26p0/mmu/mali_kbase_mmu_mode_lpae.c | 215 + drivers/gpu/arm/b_r26p0/platform/Kbuild | 21 + drivers/gpu/arm/b_r26p0/platform/Kconfig | 31 + .../arm/b_r26p0/platform/devicetree/Kbuild | 29 + .../devicetree/mali_kbase_clk_rate_trace.c | 68 + .../devicetree/mali_kbase_config_devicetree.c | 41 + .../devicetree/mali_kbase_config_platform.h | 49 + .../devicetree/mali_kbase_runtime_pm.c | 185 + .../gpu/arm/b_r26p0/platform/exynos/Kbuild | 40 + .../gpu/arm/b_r26p0/platform/exynos/Kconfig | 142 + .../arm/b_r26p0/platform/exynos/gpu_control.c | 641 +++ .../arm/b_r26p0/platform/exynos/gpu_control.h | 78 + .../platform/exynos/gpu_custom_interface.c | 2224 ++++++++ .../platform/exynos/gpu_custom_interface.h | 27 + .../b_r26p0/platform/exynos/gpu_dvfs_api.c | 654 +++ .../platform/exynos/gpu_dvfs_governor.c | 377 ++ .../platform/exynos/gpu_dvfs_governor.h | 38 + .../platform/exynos/gpu_dvfs_handler.c | 100 + .../platform/exynos/gpu_dvfs_handler.h | 87 + .../exynos/gpu_integration_callbacks.c | 716 +++ .../platform/exynos/gpu_integration_defs.h | 62 + .../gpu/arm/b_r26p0/platform/exynos/gpu_ipa.c | 271 + .../gpu/arm/b_r26p0/platform/exynos/gpu_ipa.h | 41 + .../platform/exynos/gpu_job_fence_debug.c | 344 ++ .../b_r26p0/platform/exynos/gpu_notifier.c | 511 ++ .../b_r26p0/platform/exynos/gpu_notifier.h | 23 + .../arm/b_r26p0/platform/exynos/gpu_pmqos.c | 206 + .../platform/exynos/gpu_protected_mode.c | 251 + .../platform/exynos/gpu_protected_mode.h | 42 + .../b_r26p0/platform/exynos/gpu_trace_defs.h | 73 + .../b_r26p0/platform/exynos/gpu_utilization.c | 144 + .../exynos/mali_kbase_clk_rate_trace.c | 92 + .../exynos/mali_kbase_config_platform.h | 97 + .../platform/exynos/mali_kbase_platform.c | 703 +++ .../platform/exynos/mali_kbase_platform.h | 366 ++ .../arm/b_r26p0/platform/exynos/mali_power.h | 61 + .../gpu/arm/b_r26p0/platform/vexpress/Kbuild | 24 + .../vexpress/mali_kbase_config_platform.h | 39 + .../vexpress/mali_kbase_config_vexpress.c | 69 + .../b_r26p0/platform/vexpress_1xv7_a57/Kbuild | 24 + .../mali_kbase_config_platform.h | 39 + .../mali_kbase_config_vexpress.c | 65 + .../platform/vexpress_6xvirtex7_10mhz/Kbuild | 25 + .../mali_kbase_config_platform.h | 39 + .../mali_kbase_config_vexpress.c | 67 + .../gpu/arm/b_r26p0/protected_mode_switcher.h | 69 + drivers/gpu/arm/b_r26p0/tests/Kbuild | 24 + drivers/gpu/arm/b_r26p0/tests/Kconfig | 24 + drivers/gpu/arm/b_r26p0/tests/Mconfig | 38 + .../b_r26p0/tests/include/kutf/kutf_helpers.h | 85 + .../tests/include/kutf/kutf_helpers_user.h | 179 + .../arm/b_r26p0/tests/include/kutf/kutf_mem.h | 73 + .../tests/include/kutf/kutf_resultset.h | 181 + .../b_r26p0/tests/include/kutf/kutf_suite.h | 569 ++ .../b_r26p0/tests/include/kutf/kutf_utils.h | 60 + drivers/gpu/arm/b_r26p0/tests/kutf/Kbuild | 26 + drivers/gpu/arm/b_r26p0/tests/kutf/Kconfig | 28 + drivers/gpu/arm/b_r26p0/tests/kutf/Makefile | 35 + drivers/gpu/arm/b_r26p0/tests/kutf/build.bp | 36 + .../gpu/arm/b_r26p0/tests/kutf/kutf_helpers.c | 131 + .../b_r26p0/tests/kutf/kutf_helpers_user.c | 468 ++ drivers/gpu/arm/b_r26p0/tests/kutf/kutf_mem.c | 108 + .../arm/b_r26p0/tests/kutf/kutf_resultset.c | 164 + .../gpu/arm/b_r26p0/tests/kutf/kutf_suite.c | 1219 +++++ .../gpu/arm/b_r26p0/tests/kutf/kutf_utils.c | 76 + .../mali_kutf_clk_rate_trace/kernel/Kbuild | 26 + .../mali_kutf_clk_rate_trace/kernel/Kconfig | 30 + .../mali_kutf_clk_rate_trace/kernel/Makefile | 57 + .../mali_kutf_clk_rate_trace/kernel/build.bp | 34 + .../kernel/mali_kutf_clk_rate_trace_test.c | 886 +++ .../mali_kutf_clk_rate_trace_test.h | 148 + .../b_r26p0/tests/mali_kutf_irq_test/Kbuild | 26 + .../b_r26p0/tests/mali_kutf_irq_test/Kconfig | 29 + .../b_r26p0/tests/mali_kutf_irq_test/Makefile | 51 + .../b_r26p0/tests/mali_kutf_irq_test/build.bp | 35 + .../mali_kutf_irq_test_main.c | 278 + .../arm/b_r26p0/thirdparty/mali_kbase_mmap.c | 366 ++ .../tl/backend/mali_kbase_timeline_jm.c | 97 + .../gpu/arm/b_r26p0/tl/mali_kbase_timeline.c | 274 + .../gpu/arm/b_r26p0/tl/mali_kbase_timeline.h | 121 + .../arm/b_r26p0/tl/mali_kbase_timeline_io.c | 329 ++ .../arm/b_r26p0/tl/mali_kbase_timeline_priv.h | 66 + .../arm/b_r26p0/tl/mali_kbase_tl_serialize.h | 125 + .../gpu/arm/b_r26p0/tl/mali_kbase_tlstream.c | 298 ++ .../gpu/arm/b_r26p0/tl/mali_kbase_tlstream.h | 166 + .../arm/b_r26p0/tl/mali_kbase_tracepoints.c | 3194 +++++++++++ .../arm/b_r26p0/tl/mali_kbase_tracepoints.h | 2381 ++++++++ 322 files changed, 104055 insertions(+) create mode 100644 drivers/gpu/arm/b_r26p0/Kbuild create mode 100644 drivers/gpu/arm/b_r26p0/Kconfig create mode 100644 drivers/gpu/arm/b_r26p0/Makefile create mode 100644 drivers/gpu/arm/b_r26p0/Makefile.kbase create mode 100644 drivers/gpu/arm/b_r26p0/Mconfig create mode 100644 drivers/gpu/arm/b_r26p0/arbiter/Kbuild create mode 100644 drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbif.c create mode 100644 drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbif.h create mode 100644 drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_defs.h create mode 100644 drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_interface.h create mode 100644 drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_pm.c create mode 100644 drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_pm.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/Kbuild create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_backend_config.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_cache_policy_backend.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_cache_policy_backend.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_clk_rate_trace_mgr.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_clk_rate_trace_mgr.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_debug_job_fault_backend.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_devfreq.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_devfreq.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_device_hw.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_device_internal.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_gpuprops_backend.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_instr_backend.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_instr_defs.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_instr_internal.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_irq_internal.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_irq_linux.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_as.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_defs.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_hw.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_internal.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_rb.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_rb.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_js_backend.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_js_internal.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_l2_mmu_config.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_l2_mmu_config.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_always_on.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_always_on.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_backend.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_ca.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_ca.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_ca_devfreq.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_coarse_demand.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_coarse_demand.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_defs.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_driver.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_internal.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_l2_states.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_metrics.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_policy.c create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_policy.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_shader_states.h create mode 100644 drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_time.c create mode 100644 drivers/gpu/arm/b_r26p0/build.bp create mode 100644 drivers/gpu/arm/b_r26p0/context/backend/mali_kbase_context_jm.c create mode 100644 drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c create mode 100644 drivers/gpu/arm/b_r26p0/context/mali_kbase_context.h create mode 100644 drivers/gpu/arm/b_r26p0/context/mali_kbase_context_internal.h create mode 100644 drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_codes_jm.h create mode 100644 drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_defs_jm.h create mode 100644 drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_jm.c create mode 100644 drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_jm.h create mode 100644 drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_linux_ktrace_jm.h create mode 100644 drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace.c create mode 100644 drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace.h create mode 100644 drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace_codes.h create mode 100644 drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace_defs.h create mode 100644 drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace_internal.h create mode 100644 drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_linux_ktrace.h create mode 100644 drivers/gpu/arm/b_r26p0/device/backend/mali_kbase_device_jm.c create mode 100644 drivers/gpu/arm/b_r26p0/device/mali_kbase_device.c create mode 100644 drivers/gpu/arm/b_r26p0/device/mali_kbase_device.h create mode 100644 drivers/gpu/arm/b_r26p0/device/mali_kbase_device_internal.h create mode 100644 drivers/gpu/arm/b_r26p0/gpu/backend/mali_kbase_gpu_fault_jm.c create mode 100644 drivers/gpu/arm/b_r26p0/gpu/backend/mali_kbase_gpu_regmap_jm.h create mode 100644 drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu.c create mode 100644 drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu.h create mode 100644 drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_coherency.h create mode 100644 drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_fault.h create mode 100644 drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_id.h create mode 100644 drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_regmap.h create mode 100644 drivers/gpu/arm/b_r26p0/ipa/Kbuild create mode 100644 drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa.c create mode 100644 drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa.h create mode 100644 drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_debugfs.c create mode 100644 drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_debugfs.h create mode 100644 drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_simple.c create mode 100644 drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_simple.h create mode 100644 drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_vinstr_common.c create mode 100644 drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_vinstr_common.h create mode 100644 drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_vinstr_g7x.c create mode 100644 drivers/gpu/arm/b_r26p0/jm/mali_base_jm_kernel.h create mode 100644 drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_defs.h create mode 100644 drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h create mode 100644 drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_js.h create mode 100644 drivers/gpu/arm/b_r26p0/jm/mali_kbase_js_defs.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_base_hwconfig_features.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_base_hwconfig_issues.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_base_kernel.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_base_mem_priv.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_gpu_mem_trace.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_as_fault_debugfs.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_as_fault_debugfs.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_bits.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_cache_policy.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_cache_policy.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_caps.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_ccswe.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_ccswe.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_config.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_config.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_config_defaults.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_core_linux.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_cs_experimental.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_debug.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_debug.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_debug_job_fault.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_debug_job_fault.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_debug_mem_view.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_debug_mem_view.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_debugfs_helper.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_debugfs_helper.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_defs.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_disjoint_events.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_dma_fence.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_dma_fence.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_dummy_job_wa.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_dummy_job_wa.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_event.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_fence.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_fence.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_fence_defs.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_gator.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_gpu_memory_debugfs.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_gpu_memory_debugfs.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_gpuprops.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_gpuprops.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_gpuprops_types.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_gwt.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_gwt.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hw.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hw.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_backend.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_defs.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_gpuprops.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_instr.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_jm.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_pm.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_time.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_accumulator.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend_jm.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend_jm.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_context.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_gpu.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_gpu.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_legacy.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_legacy.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_reader.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_types.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_types.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_virtualizer.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_virtualizer.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_ioctl.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_jd.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_jd_debugfs.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_jd_debugfs.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_jm.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_jm.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_js.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_js.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_js_ctx_attr.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_js_ctx_attr.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_kinstr_jm.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_kinstr_jm.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_kinstr_jm_reader.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_linux.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_mem.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_mem.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_mem_lowlevel.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_debugfs.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_debugfs.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_group.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_group.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_mem_profile_debugfs.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_mem_profile_debugfs.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_mem_profile_debugfs_buf_size.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_mipe_gen_header.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_mipe_proto.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_native_mgm.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_native_mgm.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_platform_fake.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_pm.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_pm.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_regs_history_debugfs.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_regs_history_debugfs.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_reset_gpu.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_smc.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_smc.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_strings.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_strings.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_sync.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_sync_android.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_sync_common.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_sync_file.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_trace_gpu_mem.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_trace_gpu_mem.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_uku.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_utility.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_vinstr.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_kbase_vinstr.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_linux_systrace.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_linux_trace.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_malisw.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_power_gpu_frequency_trace.c create mode 100644 drivers/gpu/arm/b_r26p0/mali_power_gpu_frequency_trace.h create mode 100644 drivers/gpu/arm/b_r26p0/mali_uk.h create mode 100644 drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c create mode 100644 drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c create mode 100644 drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.h create mode 100644 drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw.h create mode 100644 drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw_direct.c create mode 100644 drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_internal.h create mode 100644 drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_aarch64.c create mode 100644 drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_lpae.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/Kbuild create mode 100644 drivers/gpu/arm/b_r26p0/platform/Kconfig create mode 100644 drivers/gpu/arm/b_r26p0/platform/devicetree/Kbuild create mode 100644 drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_clk_rate_trace.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_config_devicetree.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_config_platform.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_runtime_pm.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/Kbuild create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/Kconfig create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_control.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_control.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_custom_interface.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_custom_interface.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_api.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_governor.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_governor.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_handler.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_handler.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_integration_callbacks.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_integration_defs.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_ipa.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_ipa.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_job_fence_debug.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_notifier.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_notifier.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_pmqos.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_protected_mode.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_protected_mode.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_trace_defs.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/gpu_utilization.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_clk_rate_trace.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_config_platform.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_platform.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_platform.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/exynos/mali_power.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/vexpress/Kbuild create mode 100644 drivers/gpu/arm/b_r26p0/platform/vexpress/mali_kbase_config_platform.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/vexpress/mali_kbase_config_vexpress.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/vexpress_1xv7_a57/Kbuild create mode 100644 drivers/gpu/arm/b_r26p0/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c create mode 100644 drivers/gpu/arm/b_r26p0/platform/vexpress_6xvirtex7_10mhz/Kbuild create mode 100644 drivers/gpu/arm/b_r26p0/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h create mode 100644 drivers/gpu/arm/b_r26p0/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c create mode 100644 drivers/gpu/arm/b_r26p0/protected_mode_switcher.h create mode 100644 drivers/gpu/arm/b_r26p0/tests/Kbuild create mode 100644 drivers/gpu/arm/b_r26p0/tests/Kconfig create mode 100644 drivers/gpu/arm/b_r26p0/tests/Mconfig create mode 100644 drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_helpers.h create mode 100644 drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_helpers_user.h create mode 100644 drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_mem.h create mode 100644 drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_resultset.h create mode 100644 drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_suite.h create mode 100644 drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_utils.h create mode 100644 drivers/gpu/arm/b_r26p0/tests/kutf/Kbuild create mode 100644 drivers/gpu/arm/b_r26p0/tests/kutf/Kconfig create mode 100644 drivers/gpu/arm/b_r26p0/tests/kutf/Makefile create mode 100644 drivers/gpu/arm/b_r26p0/tests/kutf/build.bp create mode 100644 drivers/gpu/arm/b_r26p0/tests/kutf/kutf_helpers.c create mode 100644 drivers/gpu/arm/b_r26p0/tests/kutf/kutf_helpers_user.c create mode 100644 drivers/gpu/arm/b_r26p0/tests/kutf/kutf_mem.c create mode 100644 drivers/gpu/arm/b_r26p0/tests/kutf/kutf_resultset.c create mode 100644 drivers/gpu/arm/b_r26p0/tests/kutf/kutf_suite.c create mode 100644 drivers/gpu/arm/b_r26p0/tests/kutf/kutf_utils.c create mode 100644 drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/Kbuild create mode 100644 drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/Kconfig create mode 100644 drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/Makefile create mode 100644 drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/build.bp create mode 100644 drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c create mode 100644 drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h create mode 100644 drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/Kbuild create mode 100644 drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/Kconfig create mode 100644 drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/Makefile create mode 100644 drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/build.bp create mode 100644 drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c create mode 100644 drivers/gpu/arm/b_r26p0/thirdparty/mali_kbase_mmap.c create mode 100644 drivers/gpu/arm/b_r26p0/tl/backend/mali_kbase_timeline_jm.c create mode 100644 drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.c create mode 100644 drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.h create mode 100644 drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline_io.c create mode 100644 drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline_priv.h create mode 100644 drivers/gpu/arm/b_r26p0/tl/mali_kbase_tl_serialize.h create mode 100644 drivers/gpu/arm/b_r26p0/tl/mali_kbase_tlstream.c create mode 100644 drivers/gpu/arm/b_r26p0/tl/mali_kbase_tlstream.h create mode 100644 drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.c create mode 100644 drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.h diff --git a/drivers/gpu/arm/b_r26p0/Kbuild b/drivers/gpu/arm/b_r26p0/Kbuild new file mode 100644 index 000000000000..2d1be602182c --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/Kbuild @@ -0,0 +1,222 @@ +# +# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + + +# Driver version string which is returned to userspace via an ioctl +MALI_RELEASE_NAME ?= "r26p0-01eac0" + +# Paths required for build + +# make $(src) as absolute path if it isn't already, by prefixing $(srctree) +src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) +KBASE_PATH = $(src) +KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy +UMP_PATH = $(src)/../../../base + +# Set up defaults if not defined by build system +MALI_CUSTOMER_RELEASE ?= 1 +MALI_USE_CSF ?= 0 +MALI_UNIT_TEST ?= 0 +MALI_KERNEL_TEST_API ?= 0 +MALI_COVERAGE ?= 0 +MALI_JIT_PRESSURE_LIMIT_BASE ?= 1 +CONFIG_MALI_PLATFORM_NAME ?= "devicetree" +# Experimental features (corresponding -D definition should be appended to +# DEFINES below, e.g. for MALI_EXPERIMENTAL_FEATURE, +# -DMALI_EXPERIMENTAL_FEATURE=$(MALI_EXPERIMENTAL_FEATURE) should be appended) +# +# Experimental features must default to disabled, e.g.: +# MALI_EXPERIMENTAL_FEATURE ?= 0 +MALI_INCREMENTAL_RENDERING ?= 0 + +# Set up our defines, which will be passed to gcc +DEFINES = \ + -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ + -DMALI_USE_CSF=$(MALI_USE_CSF) \ + -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ + -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ + -DMALI_COVERAGE=$(MALI_COVERAGE) \ + -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \ + -DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \ + -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) + +# MALI_SEC_INTEGRATION : rename CONFIG_MALI_PLATFORM_NAME to CONFIG_MALI_PLATFORM_THIRDPARTY_NAME +ifeq ($(KBUILD_EXTMOD),) +# in-tree +DEFINES +=-DMALI_KBASE_PLATFORM_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME) +else +# out-of-tree +DEFINES +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME) +endif + +DEFINES += -I$(srctree)/drivers/staging/android + +DEFINES += -DMALI_KBASE_BUILD + +# Use our defines when compiling +ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux +subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux + +SRC := \ + context/mali_kbase_context.c \ + debug/mali_kbase_debug_ktrace.c \ + device/mali_kbase_device.c \ + mali_kbase_cache_policy.c \ + mali_kbase_ccswe.c \ + mali_kbase_mem.c \ + mali_kbase_mem_pool_group.c \ + mali_kbase_native_mgm.c \ + mali_kbase_ctx_sched.c \ + mali_kbase_jm.c \ + mali_kbase_gpuprops.c \ + mali_kbase_pm.c \ + mali_kbase_config.c \ + mali_kbase_vinstr.c \ + mali_kbase_hwcnt.c \ + mali_kbase_hwcnt_backend_jm.c \ + mali_kbase_hwcnt_gpu.c \ + mali_kbase_hwcnt_legacy.c \ + mali_kbase_hwcnt_types.c \ + mali_kbase_hwcnt_virtualizer.c \ + mali_kbase_softjobs.c \ + mali_kbase_hw.c \ + mali_kbase_debug.c \ + mali_kbase_gpu_memory_debugfs.c \ + mali_kbase_mem_linux.c \ + mali_kbase_core_linux.c \ + mali_kbase_mem_profile_debugfs.c \ + mmu/mali_kbase_mmu.c \ + mmu/mali_kbase_mmu_hw_direct.c \ + mmu/mali_kbase_mmu_mode_lpae.c \ + mmu/mali_kbase_mmu_mode_aarch64.c \ + mali_kbase_disjoint_events.c \ + mali_kbase_debug_mem_view.c \ + mali_kbase_smc.c \ + mali_kbase_mem_pool.c \ + mali_kbase_mem_pool_debugfs.c \ + mali_kbase_debugfs_helper.c \ + mali_kbase_strings.c \ + mali_kbase_as_fault_debugfs.c \ + mali_kbase_regs_history_debugfs.c \ + mali_power_gpu_frequency_trace.c \ + thirdparty/mali_kbase_mmap.c \ + tl/mali_kbase_timeline.c \ + tl/mali_kbase_timeline_io.c \ + tl/mali_kbase_tlstream.c \ + tl/mali_kbase_tracepoints.c \ + gpu/mali_kbase_gpu.c \ + mali_kbase_trace_gpu_mem.c + +ifeq ($(MALI_USE_CSF),1) + SRC += \ + debug/backend/mali_kbase_debug_ktrace_csf.c \ + device/backend/mali_kbase_device_csf.c \ + gpu/backend/mali_kbase_gpu_fault_csf.c \ + tl/backend/mali_kbase_timeline_csf.c \ + mmu/backend/mali_kbase_mmu_csf.c \ + context/backend/mali_kbase_context_csf.c +else + SRC += \ + mali_kbase_dummy_job_wa.c \ + mali_kbase_debug_job_fault.c \ + mali_kbase_event.c \ + mali_kbase_jd.c \ + mali_kbase_jd_debugfs.c \ + mali_kbase_js.c \ + mali_kbase_js_ctx_attr.c \ + mali_kbase_kinstr_jm.c \ + debug/backend/mali_kbase_debug_ktrace_jm.c \ + device/backend/mali_kbase_device_jm.c \ + gpu/backend/mali_kbase_gpu_fault_jm.c \ + tl/backend/mali_kbase_timeline_jm.c \ + mmu/backend/mali_kbase_mmu_jm.c \ + context/backend/mali_kbase_context_jm.c +endif + +ifeq ($(CONFIG_MALI_CINSTR_GWT),y) + SRC += mali_kbase_gwt.c +endif + +ifeq ($(MALI_UNIT_TEST),1) + SRC += tl/mali_kbase_timeline_test.c +endif + +ifeq ($(MALI_CUSTOMER_RELEASE),0) + SRC += mali_kbase_regs_dump_debugfs.c +endif + + +ccflags-y += -I$(KBASE_PATH) -I$(KBASE_PATH)/debug \ + -I$(KBASE_PATH)/debug/backend + +# Tell the Linux build system from which .o file to create the kernel module +obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o + +# Tell the Linux build system to enable building of our .c files +mali_kbase-y := $(SRC:.c=.o) + +# Kconfig passes in the name with quotes for in-tree builds - remove them. +# MALI_SEC_INTEGRATION : rename CONFIG_MALI_PLATFORM_NAME to CONFIG_MALI_PLATFORM_THIRDPARTY_NAME +platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)) +MALI_PLATFORM_DIR := platform/$(platform_name) +ccflags-y += -I$(src)/$(MALI_PLATFORM_DIR) +#include $(src)/$(MALI_PLATFORM_DIR)/Kbuild +obj-$(CONFIG_MALI_MIDGARD) += platform/ +#mali_kbase-y += $(PLATFORM_THIRDPARTY:.c=.o) + +ifeq ($(CONFIG_MALI_DEVFREQ),y) + ifeq ($(CONFIG_DEVFREQ_THERMAL),y) + include $(src)/ipa/Kbuild + endif +endif + +ifeq ($(MALI_USE_CSF),1) + include $(src)/csf/Kbuild +else +# empty +endif + +ifeq ($(CONFIG_MALI_ARBITER_SUPPORT),y) + include $(src)/arbiter/Kbuild +else +# empty +endif + +mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \ + mali_kbase_dma_fence.o \ + mali_kbase_fence.o +mali_kbase-$(CONFIG_SYNC) += \ + mali_kbase_sync_android.o \ + mali_kbase_sync_common.o +mali_kbase-$(CONFIG_SYNC_FILE) += \ + mali_kbase_sync_file.o \ + mali_kbase_sync_common.o \ + mali_kbase_fence.o + +include $(src)/backend/gpu/Kbuild +mali_kbase-y += $(BACKEND:.c=.o) + + +ccflags-y += -I$(src)/backend/gpu +subdir-ccflags-y += -I$(src)/backend/gpu + +# For kutf and mali_kutf_irq_latency_test +obj-$(CONFIG_MALI_KUTF) += tests/ diff --git a/drivers/gpu/arm/b_r26p0/Kconfig b/drivers/gpu/arm/b_r26p0/Kconfig new file mode 100644 index 000000000000..947ed7d34bf2 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/Kconfig @@ -0,0 +1,314 @@ +# +# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + + +menuconfig MALI_MIDGARD + tristate "Mali Midgard series support" + select GPU_TRACEPOINTS if ANDROID + select DMA_SHARED_BUFFER + default n + help + Enable this option to build support for a ARM Mali Midgard GPU. + + To compile this driver as a module, choose M here: + this will generate a single module, called mali_kbase. + +config MALI_GATOR_SUPPORT + bool "Enable Streamline tracing support" + depends on MALI_MIDGARD + default y + help + Enables kbase tracing used by the Arm Streamline Performance Analyzer. + The tracepoints are used to derive GPU activity charts in Streamline. + +config MALI_MIDGARD_DVFS + bool "Enable legacy DVFS" + depends on MALI_MIDGARD && !MALI_DEVFREQ + default n + help + Choose this option to enable legacy DVFS in the Mali Midgard DDK. + +config MALI_MIDGARD_ENABLE_TRACE + bool "Enable kbase tracing" + depends on MALI_MIDGARD + default y if MALI_DEBUG + default n + help + Enables tracing in kbase. Trace log available through + the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled + +config MALI_DEVFREQ + bool "devfreq support for Mali" + depends on MALI_MIDGARD && PM_DEVFREQ + default n + help + Support devfreq for Mali. + + Using the devfreq framework and, by default, the simpleondemand + governor, the frequency of Mali will be dynamically selected from the + available OPPs. + +config MALI_DMA_FENCE + bool "DMA_BUF fence support for Mali" + depends on MALI_MIDGARD + default n + help + Support DMA_BUF fences for Mali. + + This option should only be enabled if the Linux Kernel has built in + support for DMA_BUF fences. + +# MALI_SEC_INTEGRATION +config MALI_PLATFORM_THIRDPARTY + default y + bool "Third Party Platform" + +config MALI_PLATFORM_THIRDPARTY_NAME + depends on MALI_MIDGARD + string "Platform name" + default "exynos" + help + Enter the name of the desired platform configuration directory to + include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must + exist. + +config MALI_ARBITER_SUPPORT + bool "Enable arbiter support for Mali" + depends on MALI_MIDGARD + default n + help + Enable support for the arbiter interface in the driver. + This allows an external arbiter to manage driver access + to GPU hardware in a virtualized environment + + If unsure, say N. + +# MALI_EXPERT configuration options + +menuconfig MALI_EXPERT + depends on MALI_MIDGARD + bool "Enable Expert Settings" + default n + help + Enabling this option and modifying the default settings may produce a driver with performance or + other limitations. + +config MALI_CORESTACK + bool "Support controlling power to the GPU core stack" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Enabling this feature on supported GPUs will let the driver powering + on/off the GPU core stack independently without involving the Power + Domain Controller. This should only be enabled on platforms which + integration of the PDC to the Mali GPU is known to be problematic. + This feature is currently only supported on t-Six and t-HEx GPUs. + + If unsure, say N. + +config MALI_DEBUG + bool "Debug build" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Select this option for increased checking and reporting of errors. + +config MALI_FENCE_DEBUG + bool "Debug sync fence usage" + depends on MALI_MIDGARD && MALI_EXPERT && (SYNC || SYNC_FILE) + default y + help + Select this option to enable additional checking and reporting on the + use of sync fences in the Mali driver. + + This will add a 3s timeout to all sync fence waits in the Mali + driver, so that when work for Mali has been waiting on a sync fence + for a long time a debug message will be printed, detailing what fence + is causing the block, and which dependent Mali atoms are blocked as a + result of this. + + The timeout can be changed at runtime through the js_soft_timeout + device attribute, where the timeout is specified in milliseconds. + +config MALI_NO_MALI + bool "No Mali" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + This can be used to test the driver in a simulated environment + whereby the hardware is not physically present. If the hardware is physically + present it will not be used. This can be used to test the majority of the + driver without needing actual hardware or for software benchmarking. + All calls to the simulated hardware will complete immediately as if the hardware + completed the task. + +config MALI_REAL_HW + def_bool !MALI_NO_MALI + +config MALI_ERROR_INJECT + bool "Error injection" + depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI + default n + help + Enables insertion of errors to test module failure and recovery mechanisms. + +config MALI_SYSTEM_TRACE + bool "Enable system event tracing support" + depends on MALI_MIDGARD && MALI_EXPERT + default y if MALI_DEBUG + default n + help + Choose this option to enable system trace events for each + kbase event. This is typically used for debugging but has + minimal overhead when not in use. Enable only if you know what + you are doing. + +config MALI_2MB_ALLOC + bool "Attempt to allocate 2MB pages" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Rather than allocating all GPU memory page-by-page, attempt to + allocate 2MB pages from the kernel. This reduces TLB pressure and + helps to prevent memory fragmentation. + + If in doubt, say N + +config MALI_PWRSOFT_765 + bool "PWRSOFT-765 ticket" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged + in kernel v4.10, however if backported into the kernel then this + option must be manually selected. + + If using kernel >= v4.10 then say N, otherwise if devfreq cooling + changes have been backported say Y to avoid compilation errors. + +config MALI_MEMORY_FULLY_BACKED + bool "Memory fully physically-backed" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + This option enables full physical backing of all virtual + memory allocations in the kernel. Notice that this build + option only affects allocations of grow-on-GPU-page-fault + memory. + +config MALI_DMA_BUF_MAP_ON_DEMAND + bool "Map imported dma-bufs on demand" + depends on MALI_MIDGARD + default n + help + This option caused kbase to set up the GPU mapping of imported + dma-buf when needed to run atoms. This is the legacy behaviour. + + This is intended for testing and the option will get removed in the + future. + +config MALI_DMA_BUF_LEGACY_COMPAT + bool "Enable legacy compatibility cache flush on dma-buf map" + depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND + default n + help + This option enables compatibility with legacy dma-buf mapping + behavior, then the dma-buf is mapped on import, by adding cache + maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, + including a cache flush. + + This option might work-around issues related to missing cache + flushes in other drivers. This only has an effect for clients using + UK 11.18 or older. For later UK versions it is not possible. + +config MALI_HW_ERRATA_1485982_NOT_AFFECTED + bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + This option disables the default workaround for GPU2017-1336. The + workaround keeps the L2 cache powered up except for powerdown and reset. + + The workaround introduces a limitation that will prevent the running of + protected mode content on fully coherent platforms, as the switch to IO + coherency mode requires the L2 to be turned off. + +config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE + bool "Use alternative workaround for BASE_HW_ISSUE_GPU2017_1336" + depends on MALI_MIDGARD && MALI_EXPERT && !MALI_HW_ERRATA_1485982_NOT_AFFECTED + default n + help + This option uses an alternative workaround for GPU2017-1336. Lowering + the GPU clock to a, platform specific, known good frequeuncy before + powering down the L2 cache. The clock can be specified in the device + tree using the property, opp-mali-errata-1485982. Otherwise the + slowest clock will be selected. + +config MALI_GEM5_BUILD + bool "Enable build of Mali kernel driver for GEM5" + depends on MALI_MIDGARD + default n + help + This option is to do a Mali GEM5 build. + If unsure, say N. + +# Instrumentation options. + +config MALI_JOB_DUMP + bool "Enable system level support needed for job dumping" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Choose this option to enable system level support needed for + job dumping. This is typically used for instrumentation but has + minimal overhead when not in use. Enable only if you know what + you are doing. + +config MALI_PRFCNT_SET_SECONDARY + bool "Use secondary set of performance counters" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Select this option to use secondary set of performance counters. Kernel + features that depend on an access to the primary set of counters may + become unavailable. Enabling this option will prevent power management + from working optimally and may cause instrumentation tools to return + bogus results. + + If unsure, say N. + +config MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + bool "Use secondary set of performance counters" + depends on MALI_MIDGARD && MALI_EXPERT && !MALI_PRFCNT_SET_SECONDARY && DEBUG_FS + default n + help + Select this option to make the secondary set of performance counters + available at runtime via debugfs. Kernel features that depend on an + access to the primary set of counters may become unavailable. + + This feature is unsupported and unstable, and may break at any time. + Enabling this option will prevent power management from working + optimally and may cause instrumentation tools to return bogus results. + + If unsure, say N. + +source "drivers/gpu/arm/b_r26p0/platform/Kconfig" +source "drivers/gpu/arm/b_r26p0/tests/Kconfig" diff --git a/drivers/gpu/arm/b_r26p0/Makefile b/drivers/gpu/arm/b_r26p0/Makefile new file mode 100644 index 000000000000..53a12094ec14 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/Makefile @@ -0,0 +1,38 @@ +# +# (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + + +KDIR ?= /lib/modules/$(shell uname -r)/build + +BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../.. +KBASE_PATH_RELATIVE = $(CURDIR) + +ifeq ($(CONFIG_MALI_BUSLOG),y) +#Add bus logger symbols +EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers +endif + +# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions +all: + $(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules + +clean: + $(MAKE) -C $(KDIR) M=$(CURDIR) clean diff --git a/drivers/gpu/arm/b_r26p0/Makefile.kbase b/drivers/gpu/arm/b_r26p0/Makefile.kbase new file mode 100644 index 000000000000..6b0f81ee76e8 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/Makefile.kbase @@ -0,0 +1,23 @@ +# +# (C) COPYRIGHT 2010, 2013, 2018 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(KBASE_PATH)/platform_$(PLATFORM) + diff --git a/drivers/gpu/arm/b_r26p0/Mconfig b/drivers/gpu/arm/b_r26p0/Mconfig new file mode 100644 index 000000000000..6dd9543d898c --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/Mconfig @@ -0,0 +1,278 @@ +# +# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + +menuconfig MALI_MIDGARD + bool "Mali Midgard series support" + default y + help + Enable this option to build support for a ARM Mali Midgard GPU. + + To compile this driver as a module, choose M here: + this will generate a single module, called mali_kbase. + +config MALI_GATOR_SUPPORT + bool "Enable Streamline tracing support" + depends on MALI_MIDGARD && !BACKEND_USER + default y + help + Enables kbase tracing used by the Arm Streamline Performance Analyzer. + The tracepoints are used to derive GPU activity charts in Streamline. + +config MALI_MIDGARD_DVFS + bool "Enable legacy DVFS" + depends on MALI_MIDGARD && !MALI_DEVFREQ + default n + help + Choose this option to enable legacy DVFS in the Mali Midgard DDK. + +config MALI_MIDGARD_ENABLE_TRACE + bool "Enable kbase tracing" + depends on MALI_MIDGARD + default y if MALI_DEBUG + default n + help + Enables tracing in kbase. Trace log available through + the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled + +config MALI_DEVFREQ + bool "devfreq support for Mali" + depends on MALI_MIDGARD + default y if PLATFORM_JUNO + default y if PLATFORM_CUSTOM + help + Support devfreq for Mali. + + Using the devfreq framework and, by default, the simpleondemand + governor, the frequency of Mali will be dynamically selected from the + available OPPs. + +config MALI_DMA_FENCE + bool "DMA_BUF fence support for Mali" + depends on MALI_MIDGARD + default n + help + Support DMA_BUF fences for Mali. + + This option should only be enabled if the Linux Kernel has built in + support for DMA_BUF fences. + +config MALI_PLATFORM_NAME + depends on MALI_MIDGARD + string "Platform name" + default "hisilicon" if PLATFORM_HIKEY960 + default "hisilicon" if PLATFORM_HIKEY970 + default "devicetree" + help + Enter the name of the desired platform configuration directory to + include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must + exist. + + When PLATFORM_CUSTOM is set, this needs to be set manually to + pick up the desired platform files. + +config MALI_ARBITER_SUPPORT + bool "Enable arbiter support for Mali" + depends on MALI_MIDGARD + default n + help + Enable support for the arbiter interface in the driver. + This allows an external arbiter to manage driver access + to GPU hardware in a virtualized environment + + If unsure, say N. + +# MALI_EXPERT configuration options + +menuconfig MALI_EXPERT + depends on MALI_MIDGARD + bool "Enable Expert Settings" + default y + help + Enabling this option and modifying the default settings may produce a driver with performance or + other limitations. + +config MALI_CORESTACK + bool "Support controlling power to the GPU core stack" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Enabling this feature on supported GPUs will let the driver powering + on/off the GPU core stack independently without involving the Power + Domain Controller. This should only be enabled on platforms which + integration of the PDC to the Mali GPU is known to be problematic. + This feature is currently only supported on t-Six and t-HEx GPUs. + + If unsure, say N. + +config MALI_DEBUG + bool "Debug build" + depends on MALI_MIDGARD && MALI_EXPERT + default y if DEBUG + default n + help + Select this option for increased checking and reporting of errors. + +config MALI_FENCE_DEBUG + bool "Debug sync fence usage" + depends on MALI_MIDGARD && MALI_EXPERT + default y if MALI_DEBUG + help + Select this option to enable additional checking and reporting on the + use of sync fences in the Mali driver. + + This will add a 3s timeout to all sync fence waits in the Mali + driver, so that when work for Mali has been waiting on a sync fence + for a long time a debug message will be printed, detailing what fence + is causing the block, and which dependent Mali atoms are blocked as a + result of this. + + The timeout can be changed at runtime through the js_soft_timeout + device attribute, where the timeout is specified in milliseconds. + +choice + prompt "Error injection level" + default MALI_ERROR_INJECT_NONE + help + Enables insertion of errors to test module failure and recovery mechanisms. + +config MALI_ERROR_INJECT_NONE + bool "disabled" + help + Error injection is disabled. + +config MALI_ERROR_INJECT_TRACK_LIST + bool "error track list" + depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI + help + Errors to inject are pre-configured by the user. + +config MALI_ERROR_INJECT_RANDOM + bool "random error injection" + depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI + help + Injected errors are random, rather than user-driven. + +endchoice + +config MALI_ERROR_INJECT_ON + string + default "0" if MALI_ERROR_INJECT_NONE + default "1" if MALI_ERROR_INJECT_TRACK_LIST + default "2" if MALI_ERROR_INJECT_RANDOM + +config MALI_ERROR_INJECT + bool + default y if !MALI_ERROR_INJECT_NONE + +config MALI_SYSTEM_TRACE + bool "Enable system event tracing support" + depends on MALI_MIDGARD && MALI_EXPERT + default y if MALI_DEBUG + default n + help + Choose this option to enable system trace events for each + kbase event. This is typically used for debugging but has + minimal overhead when not in use. Enable only if you know what + you are doing. + +config MALI_2MB_ALLOC + bool "Attempt to allocate 2MB pages" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Rather than allocating all GPU memory page-by-page, attempt to + allocate 2MB pages from the kernel. This reduces TLB pressure and + helps to prevent memory fragmentation. + + If in doubt, say N + +config MALI_PWRSOFT_765 + bool "PWRSOFT-765 ticket" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + PWRSOFT-765 fixes devfreq cooling devices issues. However, they are + not merged in mainline kernel yet. So this define helps to guard those + parts of the code. + +config MALI_MEMORY_FULLY_BACKED + bool "Memory fully physically-backed" + default n + help + This option enables full backing of all virtual memory allocations + for the kernel. This only affects grow-on-GPU-page-fault memory. + +config MALI_DMA_BUF_MAP_ON_DEMAND + bool "Map imported dma-bufs on demand" + depends on MALI_MIDGARD + default n + default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED + help + This option caused kbase to set up the GPU mapping of imported + dma-buf when needed to run atoms. This is the legacy behaviour. + +config MALI_DMA_BUF_LEGACY_COMPAT + bool "Enable legacy compatibility cache flush on dma-buf map" + depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND + default n + help + This option enables compatibility with legacy dma-buf mapping + behavior, then the dma-buf is mapped on import, by adding cache + maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, + including a cache flush. + +config MALI_REAL_HW + bool + default y + default n if NO_MALI + +config MALI_HW_ERRATA_1485982_NOT_AFFECTED + bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336" + depends on MALI_MIDGARD && MALI_EXPERT + default n + default y if PLATFORM_JUNO + help + This option disables the default workaround for GPU2017-1336. The + workaround keeps the L2 cache powered up except for powerdown and reset. + + The workaround introduces a limitation that will prevent the running of + protected mode content on fully coherent platforms, as the switch to IO + coherency mode requires the L2 to be turned off. + +config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE + bool "Use alternative workaround for BASE_HW_ISSUE_GPU2017_1336" + depends on MALI_MIDGARD && MALI_EXPERT && !MALI_HW_ERRATA_1485982_NOT_AFFECTED + default n + help + This option uses an alternative workaround for GPU2017-1336. Lowering + the GPU clock to a, platform specific, known good frequeuncy before + powering down the L2 cache. The clock can be specified in the device + tree using the property, opp-mali-errata-1485982. Otherwise the + slowest clock will be selected. + +config MALI_GEM5_BUILD + bool "Enable build of Mali kernel driver for GEM5" + depends on MALI_MIDGARD + default n + help + This option is to do a Mali GEM5 build. + If unsure, say N. + +# Instrumentation options. + +# config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig. +# config MALI_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig. + +source "kernel/drivers/gpu/arm/b_r26p0/tests/Mconfig" diff --git a/drivers/gpu/arm/b_r26p0/arbiter/Kbuild b/drivers/gpu/arm/b_r26p0/arbiter/Kbuild new file mode 100644 index 000000000000..98e47bed223a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/arbiter/Kbuild @@ -0,0 +1,24 @@ +# +# (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +mali_kbase-y += \ + arbiter/mali_kbase_arbif.o \ + arbiter/mali_kbase_arbiter_pm.o diff --git a/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbif.c b/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbif.c new file mode 100644 index 000000000000..d193cb99d881 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbif.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_arbif.c + * Mali arbiter interface APIs to share GPU between Virtual Machines + */ + +#include +#include "mali_kbase_arbif.h" +#include +#include +#include +#include "mali_kbase_arbiter_interface.h" + +static void on_gpu_stop(struct device *dev) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + KBASE_TLSTREAM_TL_EVENT_ARB_STOP_REQUESTED(kbdev, kbdev); + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_STOP_EVT); +} + +static void on_gpu_granted(struct device *dev) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + KBASE_TLSTREAM_TL_EVENT_ARB_GRANTED(kbdev, kbdev); + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_GRANTED_EVT); +} + +static void on_gpu_lost(struct device *dev) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_LOST_EVT); +} + +int kbase_arbif_init(struct kbase_device *kbdev) +{ +#ifdef CONFIG_OF + struct arbiter_if_arb_vm_ops ops; + struct arbiter_if_dev *arb_if; + struct device_node *arbiter_if_node; + struct platform_device *pdev; + int err; + + dev_dbg(kbdev->dev, "%s\n", __func__); + + arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, + "arbiter_if", 0); + if (!arbiter_if_node) { + dev_dbg(kbdev->dev, "No arbiter_if in Device Tree\n"); + /* no arbiter interface defined in device tree */ + kbdev->arb.arb_dev = NULL; + kbdev->arb.arb_if = NULL; + return 0; + } + + pdev = of_find_device_by_node(arbiter_if_node); + if (!pdev) { + dev_err(kbdev->dev, "Failed to find arbiter_if device\n"); + return -EPROBE_DEFER; + } + + if (!pdev->dev.driver || !try_module_get(pdev->dev.driver->owner)) { + dev_err(kbdev->dev, "arbiter_if driver not available\n"); + return -EPROBE_DEFER; + } + kbdev->arb.arb_dev = &pdev->dev; + arb_if = platform_get_drvdata(pdev); + if (!arb_if) { + dev_err(kbdev->dev, "arbiter_if driver not ready\n"); + module_put(pdev->dev.driver->owner); + return -EPROBE_DEFER; + } + + kbdev->arb.arb_if = arb_if; + ops.arb_vm_gpu_stop = on_gpu_stop; + ops.arb_vm_gpu_granted = on_gpu_granted; + ops.arb_vm_gpu_lost = on_gpu_lost; + + /* register kbase arbiter_if callbacks */ + if (arb_if->vm_ops.vm_arb_register_dev) { + err = arb_if->vm_ops.vm_arb_register_dev(arb_if, + kbdev->dev, &ops); + if (err) { + dev_err(kbdev->dev, "Arbiter registration failed.\n"); + module_put(pdev->dev.driver->owner); + return err; + } + } +#else /* CONFIG_OF */ + dev_dbg(kbdev->dev, "No arbiter without Device Tree support\n"); + kbdev->arb.arb_dev = NULL; + kbdev->arb.arb_if = NULL; +#endif + return 0; +} + +void kbase_arbif_destroy(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_unregister_dev) { + dev_dbg(kbdev->dev, "%s\n", __func__); + arb_if->vm_ops.vm_arb_unregister_dev(kbdev->arb.arb_if); + } + kbdev->arb.arb_if = NULL; + if (kbdev->arb.arb_dev) + module_put(kbdev->arb.arb_dev->driver->owner); + kbdev->arb.arb_dev = NULL; +} + +void kbase_arbif_gpu_request(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_gpu_request) { + dev_dbg(kbdev->dev, "%s\n", __func__); + arb_if->vm_ops.vm_arb_gpu_request(arb_if); + } +} + +void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_gpu_stopped) { + dev_dbg(kbdev->dev, "%s\n", __func__); + KBASE_TLSTREAM_TL_EVENT_ARB_STOPPED(kbdev, kbdev); + arb_if->vm_ops.vm_arb_gpu_stopped(arb_if, gpu_required); + } +} + +void kbase_arbif_gpu_active(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_gpu_active) { + dev_dbg(kbdev->dev, "%s\n", __func__); + arb_if->vm_ops.vm_arb_gpu_active(arb_if); + } +} + +void kbase_arbif_gpu_idle(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_gpu_idle) { + dev_dbg(kbdev->dev, "vm_arb_gpu_idle\n"); + arb_if->vm_ops.vm_arb_gpu_idle(arb_if); + } +} diff --git a/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbif.h b/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbif.h new file mode 100644 index 000000000000..e7e9de76c94c --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbif.h @@ -0,0 +1,133 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * + */ + +/** + * @file + * Mali arbiter interface APIs to share GPU between Virtual Machines + */ + +#ifndef _MALI_KBASE_ARBIF_H_ +#define _MALI_KBASE_ARBIF_H_ + +/** + * enum kbase_arbif_evt - Internal Arbiter event. + * + * @KBASE_VM_GPU_INITIALIZED_EVT: KBase has finished initializing + * and can be stopped + * @KBASE_VM_GPU_STOP_EVT: Stop message received from Arbiter + * @KBASE_VM_GPU_GRANTED_EVT: Grant message received from Arbiter + * @KBASE_VM_GPU_LOST_EVT: Lost message received from Arbiter + * @KBASE_VM_GPU_IDLE_EVENT: KBase has transitioned into an inactive state. + * @KBASE_VM_REF_EVENT: KBase has transitioned into an active state. + * @KBASE_VM_OS_SUSPEND_EVENT: KBase is suspending + * @KBASE_VM_OS_RESUME_EVENT: Kbase is resuming + */ +enum kbase_arbif_evt { + KBASE_VM_GPU_INITIALIZED_EVT = 1, + KBASE_VM_GPU_STOP_EVT, + KBASE_VM_GPU_GRANTED_EVT, + KBASE_VM_GPU_LOST_EVT, + KBASE_VM_GPU_IDLE_EVENT, + KBASE_VM_REF_EVENT, + KBASE_VM_OS_SUSPEND_EVENT, + KBASE_VM_OS_RESUME_EVENT, +}; + +/** + * kbase_arbif_init() - Initialize the arbiter interface functionality. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Initialize the arbiter interface and also determines + * if Arbiter functionality is required. + * + * Return: 0 if the Arbiter interface was successfully initialized or the + * Arbiter was not required. + */ +int kbase_arbif_init(struct kbase_device *kbdev); + +/** + * kbase_arbif_destroy() - Cleanups the arbiter interface functionality. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Cleans up the arbiter interface functionality and resets the reference count + * of the arbif module used + */ +void kbase_arbif_destroy(struct kbase_device *kbdev); + +/** + * kbase_arbif_gpu_request() - Send GPU request message to the arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Sends a message to Arbiter to request GPU access. + */ +void kbase_arbif_gpu_request(struct kbase_device *kbdev); + +/** + * kbase_arbif_gpu_stopped() - Send GPU stopped message to the arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @gpu_required: true if GPU access is still required + * (Arbiter will automatically send another grant message) + * + * Sends a message to Arbiter to notify that the GPU has stopped. + * @note Once this call has been made, KBase must not attempt to access the GPU + * until the #KBASE_VM_GPU_GRANTED_EVT event has been received. + */ +void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required); + +/** + * kbase_arbif_gpu_active() - Send a GPU active message to the arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Sends a message to Arbiter to report that KBase has gone active. + */ +void kbase_arbif_gpu_active(struct kbase_device *kbdev); + +/** + * kbase_arbif_gpu_idle() - Send a GPU idle message to the arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Sends a message to Arbiter to report that KBase has gone idle. + */ +void kbase_arbif_gpu_idle(struct kbase_device *kbdev); + +#endif /* _MALI_KBASE_ARBIF_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_defs.h b/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_defs.h new file mode 100644 index 000000000000..1f53cbf1a286 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_defs.h @@ -0,0 +1,95 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * + */ + +/** + * @file + * Mali structures define to support arbitration feature + */ + +#ifndef _MALI_KBASE_ARBITER_DEFS_H_ +#define _MALI_KBASE_ARBITER_DEFS_H_ + +#include "mali_kbase_arbiter_pm.h" + +/** + * struct kbase_arbiter_vm_state - Struct representing the state and containing the + * data of pm work + * @kbdev: Pointer to kbase device structure (must be a valid pointer) + * @vm_state_lock: The lock protecting the VM state when arbiter is used. + * This lock must also be held whenever the VM state is being + * transitioned + * @vm_state_wait: Wait queue set when GPU is granted + * @vm_state: Current state of VM + * @vm_arb_wq: Work queue for resuming or stopping work on the GPU for use + * with the Arbiter + * @vm_suspend_work: Work item for vm_arb_wq to stop current work on GPU + * @vm_resume_work: Work item for vm_arb_wq to resume current work on GPU + * @vm_arb_starting: Work queue resume in progress + * @vm_arb_stopping: Work queue suspend in progress + * @vm_arb_users_waiting: Count of users waiting for GPU + */ +struct kbase_arbiter_vm_state { + struct kbase_device *kbdev; + struct mutex vm_state_lock; + wait_queue_head_t vm_state_wait; + enum kbase_vm_state vm_state; + struct workqueue_struct *vm_arb_wq; + struct work_struct vm_suspend_work; + struct work_struct vm_resume_work; + bool vm_arb_starting; + bool vm_arb_stopping; + int vm_arb_users_waiting; +}; + +/** + * struct kbase_arbiter_device - Representing an instance of arbiter device, + * allocated from the probe method of Mali driver + * @arb_if: Pointer to the arbiter interface device + * @arb_dev: Pointer to the arbiter device + */ +struct kbase_arbiter_device { + struct arbiter_if_dev *arb_if; + struct device *arb_dev; +}; + +#endif /* _MALI_KBASE_ARBITER_DEFS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_interface.h b/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_interface.h new file mode 100644 index 000000000000..5d5d8a7d2cff --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_interface.h @@ -0,0 +1,181 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * + */ + +/** + * @file + * Defines the Mali arbiter interface + */ + +#ifndef _MALI_KBASE_ARBITER_INTERFACE_H_ +#define _MALI_KBASE_ARBITER_INTERFACE_H_ + +/** + * @brief Mali arbiter interface version + * + * This specifies the current version of the configuration interface. Whenever + * the arbiter interface changes, so that integration effort is required, the + * version number will be increased. Each configuration must make an effort + * to check that it implements the correct version. + * + * Version history: + * 1 - Added the Mali arbiter configuration interface. + * 2 - Strip out reference code from header + * 3 - Removed DVFS utilization interface (DVFS moved to arbiter side) + */ +#define MALI_KBASE_ARBITER_INTERFACE_VERSION 3 + +struct arbiter_if_dev; + +/** + * struct arbiter_if_arb_vm_ops - Interface to communicate messages to VM + * + * This struct contains callbacks used to deliver messages + * from the arbiter to the corresponding VM. + * + * Note that calls into these callbacks may have synchronous calls back into + * the arbiter arbiter_if_vm_arb_ops callbacks below. + * For example vm_arb_gpu_stopped() may be called as a side effect of + * arb_vm_gpu_stop() being called here. + */ +struct arbiter_if_arb_vm_ops { + /** + * arb_vm_gpu_stop() - Ask VM to stop using GPU + * @dev: The arbif kernel module device. + * + * Informs KBase to stop using the GPU as soon as possible. + * @Note: Once the driver is no longer using the GPU, a call to + * vm_arb_gpu_stopped is expected by the arbiter. + */ + void (*arb_vm_gpu_stop)(struct device *dev); + + /** + * arb_vm_gpu_granted() - GPU has been granted to VM + * @dev: The arbif kernel module device. + * + * Informs KBase that the GPU can now be used by the VM. + */ + void (*arb_vm_gpu_granted)(struct device *dev); + + /** + * arb_vm_gpu_lost() - VM has lost the GPU + * @dev: The arbif kernel module device. + * + * This is called if KBase takes too long to respond to the arbiter + * stop request. + * Once this is called, KBase will assume that access to the GPU + * has been lost and will fail all running jobs and reset its + * internal state. + * If successful, will respond with a vm_arb_gpu_stopped message. + */ + void (*arb_vm_gpu_lost)(struct device *dev); +}; + +/** + * struct arbiter_if_vm_arb_ops - Interface to communicate messages to arbiter + * + * This struct contains callbacks used to request operations + * from the VM to the arbiter + * + * Note that we must not make any synchronous calls back in to the VM + * (via arbiter_if_arb_vm_ops above) in the context of these callbacks. + */ +struct arbiter_if_vm_arb_ops { + /** + * vm_arb_register_dev() - Register VM device driver callbacks. + * @arbif_dev: The arbiter interface we are registering device callbacks + * @dev: The device structure to supply in the callbacks. + * @ops: The callbacks that the device driver supports + * (none are optional). + */ + int (*vm_arb_register_dev)(struct arbiter_if_dev *arbif_dev, + struct device *dev, struct arbiter_if_arb_vm_ops *ops); + + /** + * vm_arb_unregister_dev() - Unregister VM device driver callbacks. + * @arbif_dev: The arbiter interface we are unregistering from. + */ + void (*vm_arb_unregister_dev)(struct arbiter_if_dev *arbif_dev); + + /** + * vm_arb_gpu_request() - Ask the arbiter interface for GPU access. + * @arbif_dev: The arbiter interface we want to issue the request. + */ + void (*vm_arb_gpu_request)(struct arbiter_if_dev *arbif_dev); + + /** + * vm_arb_gpu_active() - Inform arbiter that the driver has gone active + * @arbif_dev: The arbiter interface device. + */ + void (*vm_arb_gpu_active)(struct arbiter_if_dev *arbif_dev); + + /** + * vm_arb_gpu_idle() - Inform the arbiter that the driver has gone idle + * @arbif_dev: The arbiter interface device. + */ + void (*vm_arb_gpu_idle)(struct arbiter_if_dev *arbif_dev); + + /** + * vm_arb_gpu_stopped() - Inform the arbiter that the driver has stopped + * using the GPU + * @arbif_dev: The arbiter interface device. + * @gpu_required: The GPU is still needed to do more work. + */ + void (*vm_arb_gpu_stopped)(struct arbiter_if_dev *arbif_dev, + u8 gpu_required); +}; + +/** + * struct arbiter_if_dev - Arbiter Interface + * @vm_ops: Callback functions for connecting KBase with + * arbiter interface device. + * @priv_data: Internal arbif data not used by KBASE. + * + * Arbiter Interface Kernel Module State used for linking KBase + * with an arbiter interface platform device + */ +struct arbiter_if_dev { + struct arbiter_if_vm_arb_ops vm_ops; + void *priv_data; +}; + +#endif /* _MALI_KBASE_ARBITER_INTERFACE_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_pm.c b/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_pm.c new file mode 100644 index 000000000000..6c35e165009b --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_pm.c @@ -0,0 +1,645 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_arbiter_pm.c + * Mali arbiter power manager state machine and APIs + */ + +#include +#include +#include +#include +#include +#include +#include + +static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev); +static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( + struct kbase_device *kbdev); + +static inline const char *kbase_arbiter_pm_vm_state_str( + enum kbase_vm_state state) +{ + switch (state) { + case KBASE_VM_STATE_INITIALIZING: + return "KBASE_VM_STATE_INITIALIZING"; + case KBASE_VM_STATE_INITIALIZING_WITH_GPU: + return "KBASE_VM_STATE_INITIALIZING_WITH_GPU"; + case KBASE_VM_STATE_SUSPENDED: + return "KBASE_VM_STATE_SUSPENDED"; + case KBASE_VM_STATE_STOPPED: + return "KBASE_VM_STATE_STOPPED"; + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + return "KBASE_VM_STATE_STOPPED_GPU_REQUESTED"; + case KBASE_VM_STATE_STARTING: + return "KBASE_VM_STATE_STARTING"; + case KBASE_VM_STATE_IDLE: + return "KBASE_VM_STATE_IDLE"; + case KBASE_VM_STATE_ACTIVE: + return "KBASE_VM_STATE_ACTIVE"; + case KBASE_VM_STATE_STOPPING_IDLE: + return "KBASE_VM_STATE_STOPPING_IDLE"; + case KBASE_VM_STATE_STOPPING_ACTIVE: + return "KBASE_VM_STATE_STOPPING_ACTIVE"; + case KBASE_VM_STATE_SUSPEND_PENDING: + return "KBASE_VM_STATE_SUSPEND_PENDING"; + case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: + return "KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT"; + default: + KBASE_DEBUG_ASSERT(false); + return "[UnknownState]"; + } +} + +static inline const char *kbase_arbiter_pm_vm_event_str( + enum kbase_arbif_evt evt) +{ + switch (evt) { + case KBASE_VM_GPU_INITIALIZED_EVT: + return "KBASE_VM_GPU_INITIALIZED_EVT"; + case KBASE_VM_GPU_STOP_EVT: + return "KBASE_VM_GPU_STOP_EVT"; + case KBASE_VM_GPU_GRANTED_EVT: + return "KBASE_VM_GPU_GRANTED_EVT"; + case KBASE_VM_GPU_LOST_EVT: + return "KBASE_VM_GPU_LOST_EVT"; + case KBASE_VM_OS_SUSPEND_EVENT: + return "KBASE_VM_OS_SUSPEND_EVENT"; + case KBASE_VM_OS_RESUME_EVENT: + return "KBASE_VM_OS_RESUME_EVENT"; + case KBASE_VM_GPU_IDLE_EVENT: + return "KBASE_VM_GPU_IDLE_EVENT"; + case KBASE_VM_REF_EVENT: + return "KBASE_VM_REF_EVENT"; + default: + KBASE_DEBUG_ASSERT(false); + return "[UnknownEvent]"; + } +} + +static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev, + enum kbase_vm_state new_state) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + dev_dbg(kbdev->dev, "VM set_state %s -> %s", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state), + kbase_arbiter_pm_vm_state_str(new_state)); + lockdep_assert_held(&arb_vm_state->vm_state_lock); + arb_vm_state->vm_state = new_state; + wake_up(&arb_vm_state->vm_state_wait); +} + +static void kbase_arbiter_pm_suspend_wq(struct work_struct *data) +{ + struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, + struct kbase_arbiter_vm_state, + vm_suspend_work); + struct kbase_device *kbdev = arb_vm_state->kbdev; + + mutex_lock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, ">%s\n", __func__); + if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE || + arb_vm_state->vm_state == + KBASE_VM_STATE_STOPPING_ACTIVE || + arb_vm_state->vm_state == + KBASE_VM_STATE_SUSPEND_PENDING) { + mutex_unlock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, ">kbase_pm_driver_suspend\n"); + kbase_pm_driver_suspend(kbdev); + dev_dbg(kbdev->dev, "vm_state_lock); + } + mutex_unlock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, "<%s\n", __func__); +} + +static void kbase_arbiter_pm_resume_wq(struct work_struct *data) +{ + struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, + struct kbase_arbiter_vm_state, + vm_resume_work); + struct kbase_device *kbdev = arb_vm_state->kbdev; + + mutex_lock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, ">%s\n", __func__); + arb_vm_state->vm_arb_starting = true; + if (arb_vm_state->vm_state == KBASE_VM_STATE_STARTING) { + mutex_unlock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, ">kbase_pm_driver_resume\n"); + kbase_pm_driver_resume(kbdev, true); + dev_dbg(kbdev->dev, "vm_state_lock); + } else if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_ACTIVE) { + kbase_arbiter_pm_vm_stopped(kbdev); + } + arb_vm_state->vm_arb_starting = false; + mutex_unlock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, "<%s\n", __func__); +} + +int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) +{ + int err; + struct kbase_arbiter_vm_state *arb_vm_state = NULL; + + arb_vm_state = kmalloc(sizeof(struct kbase_arbiter_vm_state), + GFP_KERNEL); + if (arb_vm_state == NULL) + return -ENOMEM; + + arb_vm_state->kbdev = kbdev; + arb_vm_state->vm_state = KBASE_VM_STATE_INITIALIZING; + + mutex_init(&arb_vm_state->vm_state_lock); + init_waitqueue_head(&arb_vm_state->vm_state_wait); + arb_vm_state->vm_arb_wq = alloc_ordered_workqueue("kbase_vm_arb_wq", + WQ_HIGHPRI); + if (!arb_vm_state->vm_arb_wq) { + dev_err(kbdev->dev, "Failed to allocate vm_arb workqueue\n"); + return -ENOMEM; + } + INIT_WORK(&arb_vm_state->vm_suspend_work, kbase_arbiter_pm_suspend_wq); + INIT_WORK(&arb_vm_state->vm_resume_work, kbase_arbiter_pm_resume_wq); + arb_vm_state->vm_arb_starting = false; + arb_vm_state->vm_arb_users_waiting = 0; + kbdev->pm.arb_vm_state = arb_vm_state; + + err = kbase_arbif_init(kbdev); + if (err) { + goto arbif_init_fail; + } + if (kbdev->arb.arb_if) { + kbase_arbif_gpu_request(kbdev); + dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n"); + wait_event(arb_vm_state->vm_state_wait, + arb_vm_state->vm_state == + KBASE_VM_STATE_INITIALIZING_WITH_GPU); + dev_dbg(kbdev->dev, + "Waiting for initial GPU assignment - done\n"); + } + return 0; + +arbif_init_fail: + destroy_workqueue(arb_vm_state->vm_arb_wq); + kfree(arb_vm_state); + kbdev->pm.arb_vm_state = NULL; + return err; +} + +void kbase_arbiter_pm_early_term(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + mutex_lock(&arb_vm_state->vm_state_lock); + if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) + kbase_arbif_gpu_stopped(kbdev, false); + + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_arbif_destroy(kbdev); + destroy_workqueue(arb_vm_state->vm_arb_wq); + arb_vm_state->vm_arb_wq = NULL; + kfree(kbdev->pm.arb_vm_state); + kbdev->pm.arb_vm_state = NULL; +} + +void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + mutex_lock(&arb_vm_state->vm_state_lock); + if (!kbdev->arb.arb_if || + arb_vm_state->vm_state > + KBASE_VM_STATE_STOPPED_GPU_REQUESTED) + kbase_release_interrupts(kbdev); + + mutex_unlock(&arb_vm_state->vm_state_lock); +} + +void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) +{ + bool request_gpu = false; + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + + if (arb_vm_state->vm_arb_users_waiting > 0 && + arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE) + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + + dev_dbg(kbdev->dev, "%s %s\n", __func__, + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + kbase_release_interrupts(kbdev); + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STOPPING_ACTIVE: + request_gpu = true; + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPED_GPU_REQUESTED); + break; + case KBASE_VM_STATE_STOPPING_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED); + break; + case KBASE_VM_STATE_SUSPEND_PENDING: + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); + break; + default: + dev_warn(kbdev->dev, "unexpected pm_stop VM state %u", + arb_vm_state->vm_state); + break; + } + + kbase_arbif_gpu_stopped(kbdev, request_gpu); +} + +static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_INITIALIZING: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_INITIALIZING_WITH_GPU); + break; + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING); + kbase_install_interrupts(kbdev); + queue_work(arb_vm_state->vm_arb_wq, + &arb_vm_state->vm_resume_work); + break; + case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: + kbase_arbif_gpu_stopped(kbdev, false); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); + break; + default: + dev_warn(kbdev->dev, + "GPU_GRANTED when not expected - state %s\n", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + break; + } +} + +static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING_WITH_GPU) { + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); + mutex_lock(&arb_vm_state->vm_state_lock); + } + + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_IDLE); + queue_work(arb_vm_state->vm_arb_wq, + &arb_vm_state->vm_suspend_work); + break; + case KBASE_VM_STATE_ACTIVE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + queue_work(arb_vm_state->vm_arb_wq, + &arb_vm_state->vm_suspend_work); + break; + case KBASE_VM_STATE_STARTING: + dev_dbg(kbdev->dev, "Got GPU_STOP event while STARTING."); + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + if (arb_vm_state->vm_arb_starting) + queue_work(arb_vm_state->vm_arb_wq, + &arb_vm_state->vm_suspend_work); + break; + case KBASE_VM_STATE_SUSPEND_PENDING: + /* Suspend finishes with a stop so nothing else to do */ + break; + default: + dev_warn(kbdev->dev, "GPU_STOP when not expected - state %s\n", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + break; + } +} + +static void kbase_gpu_lost(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STARTING: + case KBASE_VM_STATE_ACTIVE: + case KBASE_VM_STATE_IDLE: + dev_warn(kbdev->dev, "GPU lost in state %s", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + kbase_arbiter_pm_vm_gpu_stop(kbdev); + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_pm_handle_gpu_lost(kbdev); + mutex_lock(&arb_vm_state->vm_state_lock); + break; + case KBASE_VM_STATE_STOPPING_IDLE: + case KBASE_VM_STATE_STOPPING_ACTIVE: + case KBASE_VM_STATE_SUSPEND_PENDING: + dev_info(kbdev->dev, "GPU lost while stopping"); + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_pm_handle_gpu_lost(kbdev); + mutex_lock(&arb_vm_state->vm_state_lock); + break; + case KBASE_VM_STATE_SUSPENDED: + case KBASE_VM_STATE_STOPPED: + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + dev_info(kbdev->dev, "GPU lost while already stopped"); + break; + case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: + dev_info(kbdev->dev, "GPU lost while waiting to suspend"); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); + break; + default: + break; + } + +} + +static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state( + struct kbase_device *kbdev) +{ + switch (kbdev->pm.arb_vm_state->vm_state) { + case KBASE_VM_STATE_SUSPENDED: + case KBASE_VM_STATE_STOPPED: + case KBASE_VM_STATE_IDLE: + case KBASE_VM_STATE_ACTIVE: + return true; + default: + return false; + } +} + +static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + enum kbase_vm_state prev_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + if (kbdev->arb.arb_if) { + if (kbdev->pm.arb_vm_state->vm_state == + KBASE_VM_STATE_SUSPENDED) + return; + } + /* Block suspend OS function until we are in a stable state + * with vm_state_lock + */ + while (!kbase_arbiter_pm_vm_os_suspend_ready_state(kbdev)) { + prev_state = arb_vm_state->vm_state; + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STOPPING_ACTIVE: + case KBASE_VM_STATE_STOPPING_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_SUSPEND_PENDING); + break; + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT); + break; + case KBASE_VM_STATE_STARTING: + if (!arb_vm_state->vm_arb_starting) { + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_SUSPEND_PENDING); + kbase_arbiter_pm_vm_stopped(kbdev); + } + break; + default: + break; + } + mutex_unlock(&arb_vm_state->vm_state_lock); + wait_event(arb_vm_state->vm_state_wait, + arb_vm_state->vm_state != prev_state); + mutex_lock(&arb_vm_state->vm_state_lock); + } + + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STOPPED: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_SUSPENDED); + break; + case KBASE_VM_STATE_IDLE: + case KBASE_VM_STATE_ACTIVE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_SUSPEND_PENDING); + mutex_unlock(&arb_vm_state->vm_state_lock); + /* Ensure resume has completed fully before starting suspend */ + flush_work(&arb_vm_state->vm_resume_work); + kbase_pm_driver_suspend(kbdev); + mutex_lock(&arb_vm_state->vm_state_lock); + break; + case KBASE_VM_STATE_SUSPENDED: + break; + default: + KBASE_DEBUG_ASSERT_MSG(false, "Unexpected state to suspend"); + break; + } +} + +static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + KBASE_DEBUG_ASSERT_MSG(arb_vm_state->vm_state == + KBASE_VM_STATE_SUSPENDED, + "Unexpected state to resume"); + + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPED_GPU_REQUESTED); + kbase_arbif_gpu_request(kbdev); + + /* Release lock and block resume OS function until we have + * asynchronously received the GRANT message from the Arbiter and + * fully resumed + */ + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); + flush_work(&arb_vm_state->vm_resume_work); + mutex_lock(&arb_vm_state->vm_state_lock); +} + +void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, + enum kbase_arbif_evt evt) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + if (!kbdev->arb.arb_if) + return; + + mutex_lock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, "%s %s\n", __func__, + kbase_arbiter_pm_vm_event_str(evt)); + + switch (evt) { + case KBASE_VM_GPU_GRANTED_EVT: + kbase_arbiter_pm_vm_gpu_start(kbdev); + break; + case KBASE_VM_GPU_STOP_EVT: + kbase_arbiter_pm_vm_gpu_stop(kbdev); + break; + case KBASE_VM_GPU_LOST_EVT: + dev_info(kbdev->dev, "KBASE_ARBIF_GPU_LOST_EVT!"); + kbase_gpu_lost(kbdev); + break; + case KBASE_VM_OS_SUSPEND_EVENT: + kbase_arbiter_pm_vm_os_prepare_suspend(kbdev); + break; + case KBASE_VM_OS_RESUME_EVENT: + kbase_arbiter_pm_vm_os_resume(kbdev); + break; + case KBASE_VM_GPU_IDLE_EVENT: + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_ACTIVE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_IDLE); + kbase_arbif_gpu_idle(kbdev); + break; + default: + break; + } + break; + + case KBASE_VM_REF_EVENT: + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STARTING: + KBASE_TLSTREAM_TL_EVENT_ARB_STARTED(kbdev, kbdev); + /* FALL THROUGH */ + case KBASE_VM_STATE_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_ACTIVE); + kbase_arbif_gpu_active(kbdev); + break; + case KBASE_VM_STATE_STOPPING_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + break; + default: + break; + } + break; + + case KBASE_VM_GPU_INITIALIZED_EVT: + lockdep_assert_held(&kbdev->pm.lock); + if (kbdev->pm.active_count > 0) { + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_ACTIVE); + kbase_arbif_gpu_active(kbdev); + } else { + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_IDLE); + kbase_arbif_gpu_idle(kbdev); + } + break; + + default: + dev_alert(kbdev->dev, "Got Unknown Event!"); + break; + } + mutex_unlock(&arb_vm_state->vm_state_lock); +} + +static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + dev_dbg(kbdev->dev, "Waiting for GPU assignment...\n"); + wait_event(arb_vm_state->vm_state_wait, + arb_vm_state->vm_state == KBASE_VM_STATE_IDLE || + arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE); + dev_dbg(kbdev->dev, "Waiting for GPU assignment - done\n"); +} + +static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( + struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + return (arb_vm_state->vm_state == KBASE_VM_STATE_IDLE || + arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE); +} + +int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + if (kbdev->arb.arb_if) { + mutex_lock(&arb_vm_state->vm_state_lock); + while (!kbase_arbiter_pm_vm_gpu_assigned_lockheld(kbdev)) { + /* Update VM state since we have GPU work to do */ + if (arb_vm_state->vm_state == + KBASE_VM_STATE_STOPPING_IDLE) + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + else if (arb_vm_state->vm_state == + KBASE_VM_STATE_STOPPED) { + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPED_GPU_REQUESTED); + kbase_arbif_gpu_request(kbdev); + } else if (arb_vm_state->vm_state == + KBASE_VM_STATE_INITIALIZING_WITH_GPU) + break; + + if (suspend_handler != + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE) { + if (suspend_handler == + KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED + || + kbdev->pm.active_count > 0) + break; + + mutex_unlock(&arb_vm_state->vm_state_lock); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + return 1; + } + + if (arb_vm_state->vm_state == + KBASE_VM_STATE_INITIALIZING_WITH_GPU) + break; + + /* Need to synchronously wait for GPU assignment */ + arb_vm_state->vm_arb_users_waiting++; + mutex_unlock(&arb_vm_state->vm_state_lock); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + mutex_lock(&arb_vm_state->vm_state_lock); + arb_vm_state->vm_arb_users_waiting--; + } + mutex_unlock(&arb_vm_state->vm_state_lock); + } + return 0; +} diff --git a/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_pm.h b/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_pm.h new file mode 100644 index 000000000000..3c49eb1948c5 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/arbiter/mali_kbase_arbiter_pm.h @@ -0,0 +1,159 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file + * Mali arbiter power manager state machine and APIs + */ + +#ifndef _MALI_KBASE_ARBITER_PM_H_ +#define _MALI_KBASE_ARBITER_PM_H_ + +#include "mali_kbase_arbif.h" + +/** + * enum kbase_vm_state - Current PM Arbitration state. + * + * @KBASE_VM_STATE_INITIALIZING: Special state before arbiter is initialized. + * @KBASE_VM_STATE_INITIALIZING_WITH_GPU: Initialization after GPU + * has been granted. + * @KBASE_VM_STATE_SUSPENDED: KBase is suspended by OS and GPU is not assigned. + * @KBASE_VM_STATE_STOPPED: GPU is not assigned to KBase and is not required. + * @KBASE_VM_STATE_STOPPED_GPU_REQUESTED: GPU is not assigned to KBase + * but a request has been made. + * @KBASE_VM_STATE_STARTING: GPU is assigned and KBase is getting ready to run. + * @KBASE_VM_STATE_IDLE: GPU is assigned but KBase has no work to do + * @KBASE_VM_STATE_ACTIVE: GPU is assigned and KBase is busy using it + * @KBASE_VM_STATE_SUSPEND_PENDING: OS is going into suspend mode. + * @KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: OS is going into suspend mode but GPU + * has already been requested. + * In this situation we must wait for + * the Arbiter to send a GRANTED message + * and respond immediately with + * a STOPPED message before entering + * the suspend mode. + * @KBASE_VM_STATE_STOPPING_IDLE: Arbiter has sent a stopped message and there + * is currently no work to do on the GPU. + * @KBASE_VM_STATE_STOPPING_ACTIVE: Arbiter has sent a stopped message when + * KBase has work to do. + */ +enum kbase_vm_state { + KBASE_VM_STATE_INITIALIZING, + KBASE_VM_STATE_INITIALIZING_WITH_GPU, + KBASE_VM_STATE_SUSPENDED, + KBASE_VM_STATE_STOPPED, + KBASE_VM_STATE_STOPPED_GPU_REQUESTED, + KBASE_VM_STATE_STARTING, + KBASE_VM_STATE_IDLE, + KBASE_VM_STATE_ACTIVE, + KBASE_VM_STATE_SUSPEND_PENDING, + KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT, + KBASE_VM_STATE_STOPPING_IDLE, + KBASE_VM_STATE_STOPPING_ACTIVE +}; + +/** + * kbase_arbiter_pm_early_init() - Initialize arbiter for VM Paravirtualized use + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Initialize the arbiter and other required resources during the runtime + * and request the GPU for the VM for the first time. + * + * Return: 0 if successful, otherwise a standard Linux error code + */ +int kbase_arbiter_pm_early_init(struct kbase_device *kbdev); + +/** + * kbase_arbiter_pm_early_term() - Shutdown arbiter and free resources. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Clean up all the resources + */ +void kbase_arbiter_pm_early_term(struct kbase_device *kbdev); + +/** + * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Releases interrupts if needed (GPU is available) otherwise does nothing + */ +void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev); + +/** + * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * The state machine function. Receives events and transitions states + * according the event received and the current state + */ +void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, + enum kbase_arbif_evt event); + +/** + * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for + * arbitration mode + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @suspend_handler: The handler code for how to handle a suspend + * that might occur + * + * This function handles a suspend event from the driver, + * communicating with the arbiter and waiting synchronously for the GPU + * to be granted again depending on the VM state. + * + * Return: 0 if success, 1 if failure due to system suspending/suspended + */ +int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler); + + +/** + * kbase_arbiter_pm_vm_stopped() - Handle stop event for the VM + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function handles a stop event for the VM. + * It will update the VM state and forward the stop event to the driver. + */ +void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev); + +#endif /*_MALI_KBASE_ARBITER_PM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/Kbuild b/drivers/gpu/arm/b_r26p0/backend/gpu/Kbuild new file mode 100644 index 000000000000..0b3e0732261d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/Kbuild @@ -0,0 +1,66 @@ +# +# (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +BACKEND += \ + backend/gpu/mali_kbase_cache_policy_backend.c \ + backend/gpu/mali_kbase_device_hw.c \ + backend/gpu/mali_kbase_gpuprops_backend.c \ + backend/gpu/mali_kbase_irq_linux.c \ + backend/gpu/mali_kbase_instr_backend.c \ + backend/gpu/mali_kbase_js_backend.c \ + backend/gpu/mali_kbase_pm_backend.c \ + backend/gpu/mali_kbase_pm_driver.c \ + backend/gpu/mali_kbase_pm_metrics.c \ + backend/gpu/mali_kbase_pm_ca.c \ + backend/gpu/mali_kbase_pm_always_on.c \ + backend/gpu/mali_kbase_pm_coarse_demand.c \ + backend/gpu/mali_kbase_pm_policy.c \ + backend/gpu/mali_kbase_time.c \ + backend/gpu/mali_kbase_l2_mmu_config.c \ + backend/gpu/mali_kbase_clk_rate_trace_mgr.c + +ifeq ($(MALI_USE_CSF),1) +# empty +else + BACKEND += \ + backend/gpu/mali_kbase_jm_as.c \ + backend/gpu/mali_kbase_debug_job_fault_backend.c \ + backend/gpu/mali_kbase_jm_hw.c \ + backend/gpu/mali_kbase_jm_rb.c +endif + +ifeq ($(MALI_CUSTOMER_RELEASE),0) +BACKEND += \ + backend/gpu/mali_kbase_pm_always_on_demand.c +endif + +ifeq ($(CONFIG_MALI_DEVFREQ),y) +BACKEND += \ + backend/gpu/mali_kbase_devfreq.c +endif + +ifeq ($(CONFIG_MALI_NO_MALI),y) + # Dummy model + BACKEND += backend/gpu/mali_kbase_model_dummy.c + BACKEND += backend/gpu/mali_kbase_model_linux.c + # HW error simulation + BACKEND += backend/gpu/mali_kbase_model_error_generator.c +endif diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_backend_config.h new file mode 100644 index 000000000000..4a61f96c8c7d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_backend_config.h @@ -0,0 +1,31 @@ +/* + * + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Backend specific configuration + */ + +#ifndef _KBASE_BACKEND_CONFIG_H_ +#define _KBASE_BACKEND_CONFIG_H_ + +#endif /* _KBASE_BACKEND_CONFIG_H_ */ + diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_cache_policy_backend.c new file mode 100644 index 000000000000..7378bfd7b397 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_cache_policy_backend.c @@ -0,0 +1,34 @@ +/* + * + * (C) COPYRIGHT 2015-2016,2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "backend/gpu/mali_kbase_cache_policy_backend.h" +#include + +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, + u32 mode) +{ + kbdev->current_gpu_coherency_mode = mode; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) + kbase_reg_write(kbdev, COHERENCY_ENABLE, mode); +} + diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_cache_policy_backend.h new file mode 100644 index 000000000000..f78ada74f605 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_cache_policy_backend.h @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +#ifndef _KBASE_CACHE_POLICY_BACKEND_H_ +#define _KBASE_CACHE_POLICY_BACKEND_H_ + +#include "mali_kbase.h" +#include "mali_base_kernel.h" + +/** + * kbase_cache_set_coherency_mode() - Sets the system coherency mode + * in the GPU. + * @kbdev: Device pointer + * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE + */ +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, + u32 mode); + +#endif /* _KBASE_CACHE_POLICY_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_clk_rate_trace_mgr.c new file mode 100644 index 000000000000..18bb1173a49f --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_clk_rate_trace_mgr.c @@ -0,0 +1,280 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Implementation of the GPU clock rate trace manager. + */ + +#include +#include +#include +#include +#include "mali_kbase_clk_rate_trace_mgr.h" + +#ifdef CONFIG_TRACE_POWER_GPU_FREQUENCY +#include +#else +#include "mali_power_gpu_frequency_trace.h" +#endif + +#ifndef CLK_RATE_TRACE_OPS +#define CLK_RATE_TRACE_OPS (NULL) +#endif + +static int gpu_clk_rate_change_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct kbase_gpu_clk_notifier_data *ndata = data; + struct kbase_clk_data *clk_data = + container_of(nb, struct kbase_clk_data, clk_rate_change_nb); + struct kbase_clk_rate_trace_manager *clk_rtm = clk_data->clk_rtm; + unsigned long flags; + + if (WARN_ON_ONCE(clk_data->gpu_clk_handle != ndata->gpu_clk_handle)) + return NOTIFY_BAD; + + spin_lock_irqsave(&clk_rtm->lock, flags); + if (event == POST_RATE_CHANGE) { + if (!clk_rtm->gpu_idle && + (clk_data->clock_val != ndata->new_rate)) { + kbase_clk_rate_trace_manager_notify_all( + clk_rtm, clk_data->index, ndata->new_rate); + } + + clk_data->clock_val = ndata->new_rate; + } + spin_unlock_irqrestore(&clk_rtm->lock, flags); + + return NOTIFY_DONE; +} + +static int gpu_clk_data_init(struct kbase_device *kbdev, + void *gpu_clk_handle, unsigned int index) +{ + struct kbase_clk_rate_trace_op_conf *callbacks = + (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS; + struct kbase_clk_data *clk_data; + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + int ret = 0; + + if (WARN_ON(!callbacks) || + WARN_ON(!gpu_clk_handle) || + WARN_ON(index >= BASE_MAX_NR_CLOCKS_REGULATORS)) + return -EINVAL; + + clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL); + if (!clk_data) { + dev_err(kbdev->dev, "Failed to allocate data for clock enumerated at index %u", index); + return -ENOMEM; + } + + clk_data->index = (u8)index; + clk_data->gpu_clk_handle = gpu_clk_handle; + /* Store the initial value of clock */ + clk_data->clock_val = + callbacks->get_gpu_clk_rate(kbdev, gpu_clk_handle); + + { + /* At the initialization time, GPU is powered off. */ + unsigned long flags; + + spin_lock_irqsave(&clk_rtm->lock, flags); + kbase_clk_rate_trace_manager_notify_all( + clk_rtm, clk_data->index, 0); + spin_unlock_irqrestore(&clk_rtm->lock, flags); + } + + clk_data->clk_rtm = clk_rtm; + clk_rtm->clks[index] = clk_data; + + clk_data->clk_rate_change_nb.notifier_call = + gpu_clk_rate_change_notifier; + + ret = callbacks->gpu_clk_notifier_register(kbdev, gpu_clk_handle, + &clk_data->clk_rate_change_nb); + if (ret) { + dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", index); + kfree(clk_data); + } + + return ret; +} + +int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev) +{ + struct kbase_clk_rate_trace_op_conf *callbacks = + (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS; + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + unsigned int i; + int ret = 0; + + /* Return early if no callbacks provided for clock rate tracing */ + if (!callbacks) + return 0; + + spin_lock_init(&clk_rtm->lock); + INIT_LIST_HEAD(&clk_rtm->listeners); + + clk_rtm->gpu_idle = true; + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + void *gpu_clk_handle = + callbacks->enumerate_gpu_clk(kbdev, i); + + if (!gpu_clk_handle) + break; + + ret = gpu_clk_data_init(kbdev, gpu_clk_handle, i); + if (ret) + goto error; + } + + /* Activate clock rate trace manager if at least one GPU clock was + * enumerated. + */ + if (i) + WRITE_ONCE(clk_rtm->clk_rate_trace_ops, callbacks); + else + dev_info(kbdev->dev, "No clock(s) available for rate tracing"); + + return 0; + +error: + while (i--) { + clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister( + kbdev, clk_rtm->clks[i]->gpu_clk_handle, + &clk_rtm->clks[i]->clk_rate_change_nb); + kfree(clk_rtm->clks[i]); + } + + return ret; +} + +void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev) +{ + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + unsigned int i; + + WARN_ON(!list_empty(&clk_rtm->listeners)); + + if (!clk_rtm->clk_rate_trace_ops) + return; + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + if (!clk_rtm->clks[i]) + break; + + clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister( + kbdev, clk_rtm->clks[i]->gpu_clk_handle, + &clk_rtm->clks[i]->clk_rate_change_nb); + kfree(clk_rtm->clks[i]); + } + + WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL); +} + +void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev) +{ + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + unsigned int i; + unsigned long flags; + + if (!clk_rtm->clk_rate_trace_ops) + return; + + spin_lock_irqsave(&clk_rtm->lock, flags); + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + struct kbase_clk_data *clk_data = clk_rtm->clks[i]; + + if (!clk_data) + break; + + if (unlikely(!clk_data->clock_val)) + continue; + + kbase_clk_rate_trace_manager_notify_all( + clk_rtm, clk_data->index, clk_data->clock_val); + } + + clk_rtm->gpu_idle = false; + spin_unlock_irqrestore(&clk_rtm->lock, flags); +} + +void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev) +{ + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + unsigned int i; + unsigned long flags; + + if (!clk_rtm->clk_rate_trace_ops) + return; + + spin_lock_irqsave(&clk_rtm->lock, flags); + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + struct kbase_clk_data *clk_data = clk_rtm->clks[i]; + + if (!clk_data) + break; + + if (unlikely(!clk_data->clock_val)) + continue; + + kbase_clk_rate_trace_manager_notify_all( + clk_rtm, clk_data->index, 0); + } + + clk_rtm->gpu_idle = true; + spin_unlock_irqrestore(&clk_rtm->lock, flags); +} + +void kbase_clk_rate_trace_manager_notify_all( + struct kbase_clk_rate_trace_manager *clk_rtm, + u32 clk_index, + unsigned long new_rate) +{ + struct kbase_clk_rate_listener *pos; + struct kbase_device *kbdev; + + lockdep_assert_held(&clk_rtm->lock); + + kbdev = container_of(clk_rtm, struct kbase_device, pm.clk_rtm); + + dev_dbg(kbdev->dev, "GPU clock %u rate changed to %lu", + clk_index, new_rate); + + /* Raise standard `power/gpu_frequency` ftrace event */ + { + unsigned long new_rate_khz = new_rate; + + do_div(new_rate_khz, 1000); + trace_gpu_frequency(new_rate_khz, clk_index); + } + + /* Notify the listeners. */ + list_for_each_entry(pos, &clk_rtm->listeners, node) { + pos->notify(pos, clk_index, new_rate); + } +} +KBASE_EXPORT_TEST_API(kbase_clk_rate_trace_manager_notify_all); + diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_clk_rate_trace_mgr.h new file mode 100644 index 000000000000..dcafb26ea4c0 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_clk_rate_trace_mgr.h @@ -0,0 +1,155 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_CLK_RATE_TRACE_MGR_ +#define _KBASE_CLK_RATE_TRACE_MGR_ + +/** The index of top clock domain in kbase_clk_rate_trace_manager:clks. */ +#define KBASE_CLOCK_DOMAIN_TOP (0) + +/** The index of shader-cores clock domain in + * kbase_clk_rate_trace_manager:clks. + */ +#define KBASE_CLOCK_DOMAIN_SHADER_CORES (1) + +/** + * struct kbase_clk_data - Data stored per enumerated GPU clock. + * + * @clk_rtm: Pointer to clock rate trace manager object. + * @gpu_clk_handle: Handle unique to the enumerated GPU clock. + * @plat_private: Private data for the platform to store into + * @clk_rate_change_nb: notifier block containing the pointer to callback + * function that is invoked whenever the rate of + * enumerated GPU clock changes. + * @clock_val: Current rate of the enumerated GPU clock. + * @index: Index at which the GPU clock was enumerated. + */ +struct kbase_clk_data { + struct kbase_clk_rate_trace_manager *clk_rtm; + void *gpu_clk_handle; + void *plat_private; + struct notifier_block clk_rate_change_nb; + unsigned long clock_val; + u8 index; +}; + +/** + * kbase_clk_rate_trace_manager_init - Initialize GPU clock rate trace manager. + * + * @kbdev: Device pointer + * + * Return: 0 if success, or an error code on failure. + */ +int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev); + +/** + * kbase_clk_rate_trace_manager_term - Terminate GPU clock rate trace manager. + * + * @kbdev: Device pointer + */ +void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev); + +/** + * kbase_clk_rate_trace_manager_gpu_active - Inform GPU clock rate trace + * manager of GPU becoming active. + * + * @kbdev: Device pointer + */ +void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev); + +/** + * kbase_clk_rate_trace_manager_gpu_idle - Inform GPU clock rate trace + * manager of GPU becoming idle. + * @kbdev: Device pointer + */ +void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev); + +/** + * kbase_clk_rate_trace_manager_subscribe_no_lock() - Add freq change listener. + * + * @clk_rtm: Clock rate manager instance. + * @listener: Listener handle + * + * kbase_clk_rate_trace_manager:lock must be held by the caller. + */ +static inline void kbase_clk_rate_trace_manager_subscribe_no_lock( + struct kbase_clk_rate_trace_manager *clk_rtm, + struct kbase_clk_rate_listener *listener) +{ + lockdep_assert_held(&clk_rtm->lock); + list_add(&listener->node, &clk_rtm->listeners); +} + +/** + * kbase_clk_rate_trace_manager_subscribe() - Add freq change listener. + * + * @clk_rtm: Clock rate manager instance. + * @listener: Listener handle + */ +static inline void kbase_clk_rate_trace_manager_subscribe( + struct kbase_clk_rate_trace_manager *clk_rtm, + struct kbase_clk_rate_listener *listener) +{ + unsigned long flags; + + spin_lock_irqsave(&clk_rtm->lock, flags); + kbase_clk_rate_trace_manager_subscribe_no_lock( + clk_rtm, listener); + spin_unlock_irqrestore(&clk_rtm->lock, flags); +} + +/** + * kbase_clk_rate_trace_manager_unsubscribe() - Remove freq change listener. + * + * @clk_rtm: Clock rate manager instance. + * @listener: Listener handle + */ +static inline void kbase_clk_rate_trace_manager_unsubscribe( + struct kbase_clk_rate_trace_manager *clk_rtm, + struct kbase_clk_rate_listener *listener) +{ + unsigned long flags; + + spin_lock_irqsave(&clk_rtm->lock, flags); + list_del(&listener->node); + spin_unlock_irqrestore(&clk_rtm->lock, flags); +} + +/** + * kbase_clk_rate_trace_manager_notify_all() - Notify all clock \ + * rate listeners. + * + * @clk_rtm: Clock rate manager instance. + * @clk_index: Clock index. + * @new_rate: New clock frequency(Hz) + * + * kbase_clk_rate_trace_manager:lock must be locked. + * This function is exported to be used by clock rate trace test + * portal. + */ +void kbase_clk_rate_trace_manager_notify_all( + struct kbase_clk_rate_trace_manager *clk_rtm, + u32 clock_index, + unsigned long new_rate); + +#endif /* _KBASE_CLK_RATE_TRACE_MGR_ */ + diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_debug_job_fault_backend.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_debug_job_fault_backend.c new file mode 100644 index 000000000000..b05844ef4f50 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_debug_job_fault_backend.c @@ -0,0 +1,164 @@ +/* + * + * (C) COPYRIGHT 2012-2015,2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include "mali_kbase_debug_job_fault.h" + +#ifdef CONFIG_DEBUG_FS + +/*GPU_CONTROL_REG(r)*/ +static int gpu_control_reg_snapshot[] = { + GPU_ID, + SHADER_READY_LO, + SHADER_READY_HI, + TILER_READY_LO, + TILER_READY_HI, + L2_READY_LO, + L2_READY_HI +}; + +/* JOB_CONTROL_REG(r) */ +static int job_control_reg_snapshot[] = { + JOB_IRQ_MASK, + JOB_IRQ_STATUS +}; + +/* JOB_SLOT_REG(n,r) */ +static int job_slot_reg_snapshot[] = { + JS_HEAD_LO, + JS_HEAD_HI, + JS_TAIL_LO, + JS_TAIL_HI, + JS_AFFINITY_LO, + JS_AFFINITY_HI, + JS_CONFIG, + JS_STATUS, + JS_HEAD_NEXT_LO, + JS_HEAD_NEXT_HI, + JS_AFFINITY_NEXT_LO, + JS_AFFINITY_NEXT_HI, + JS_CONFIG_NEXT +}; + +/*MMU_REG(r)*/ +static int mmu_reg_snapshot[] = { + MMU_IRQ_MASK, + MMU_IRQ_STATUS +}; + +/* MMU_AS_REG(n,r) */ +static int as_reg_snapshot[] = { + AS_TRANSTAB_LO, + AS_TRANSTAB_HI, + AS_TRANSCFG_LO, + AS_TRANSCFG_HI, + AS_MEMATTR_LO, + AS_MEMATTR_HI, + AS_FAULTSTATUS, + AS_FAULTADDRESS_LO, + AS_FAULTADDRESS_HI, + AS_STATUS +}; + +bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, + int reg_range) +{ + int i, j; + int offset = 0; + int slot_number; + int as_number; + + if (kctx->reg_dump == NULL) + return false; + + slot_number = kctx->kbdev->gpu_props.num_job_slots; + as_number = kctx->kbdev->gpu_props.num_address_spaces; + + /* get the GPU control registers*/ + for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + GPU_CONTROL_REG(gpu_control_reg_snapshot[i]); + offset += 2; + } + + /* get the Job control registers*/ + for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + JOB_CONTROL_REG(job_control_reg_snapshot[i]); + offset += 2; + } + + /* get the Job Slot registers*/ + for (j = 0; j < slot_number; j++) { + for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + JOB_SLOT_REG(j, job_slot_reg_snapshot[i]); + offset += 2; + } + } + + /* get the MMU registers*/ + for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]); + offset += 2; + } + + /* get the Address space registers*/ + for (j = 0; j < as_number; j++) { + for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + MMU_AS_REG(j, as_reg_snapshot[i]); + offset += 2; + } + } + + WARN_ON(offset >= (reg_range*2/4)); + + /* set the termination flag*/ + kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG; + kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG; + + dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n", + offset); + + return true; +} + +bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx) +{ + int offset = 0; + + if (kctx->reg_dump == NULL) + return false; + + while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) { + kctx->reg_dump[offset+1] = + kbase_reg_read(kctx->kbdev, + kctx->reg_dump[offset]); + offset += 2; + } + return true; +} + + +#endif diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_devfreq.c new file mode 100644 index 000000000000..2806f05c12c9 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_devfreq.c @@ -0,0 +1,731 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include + +#include +#include +#include +#ifdef CONFIG_DEVFREQ_THERMAL +#include +#endif + +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) +#include +#else /* Linux >= 3.13 */ +/* In 3.13 the OPP include header file, types, and functions were all + * renamed. Use the old filename for the include, and define the new names to + * the old, when an old kernel is detected. + */ +#include +#define dev_pm_opp opp +#define dev_pm_opp_get_voltage opp_get_voltage +#define dev_pm_opp_get_opp_count opp_get_opp_count +#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil +#define dev_pm_opp_find_freq_floor opp_find_freq_floor +#endif /* Linux >= 3.13 */ + +/** + * opp_translate - Translate nominal OPP frequency from devicetree into real + * frequency and core mask + * @kbdev: Device pointer + * @freq: Nominal frequency + * @core_mask: Pointer to u64 to store core mask to + * @freqs: Pointer to array of frequencies + * @volts: Pointer to array of voltages + * + * This function will only perform translation if an operating-points-v2-mali + * table is present in devicetree. If one is not present then it will return an + * untranslated frequency and all cores enabled. + */ +static void opp_translate(struct kbase_device *kbdev, unsigned long freq, + u64 *core_mask, unsigned long *freqs, unsigned long *volts) +{ + unsigned int i; + + for (i = 0; i < kbdev->num_opps; i++) { + if (kbdev->devfreq_table[i].opp_freq == freq) { + unsigned int j; + + *core_mask = kbdev->devfreq_table[i].core_mask; + for (j = 0; j < kbdev->nr_clocks; j++) { + freqs[j] = + kbdev->devfreq_table[i].real_freqs[j]; + volts[j] = + kbdev->devfreq_table[i].opp_volts[j]; + } + + break; + } + } + + /* If failed to find OPP, return all cores enabled + * and nominal frequency + */ + if (i == kbdev->num_opps) { + *core_mask = kbdev->gpu_props.props.raw_props.shader_present; + for (i = 0; i < kbdev->nr_clocks; i++) + freqs[i] = freq; + } +} + +static int +kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + struct dev_pm_opp *opp; + unsigned long nominal_freq; + unsigned long freqs[BASE_MAX_NR_CLOCKS_REGULATORS] = {0}; + unsigned long volts[BASE_MAX_NR_CLOCKS_REGULATORS] = {0}; + unsigned int i; + u64 core_mask; + + nominal_freq = *target_freq; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + rcu_read_lock(); +#endif + opp = devfreq_recommended_opp(dev, &nominal_freq, flags); +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + rcu_read_unlock(); +#endif + if (IS_ERR_OR_NULL(opp)) { + dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); + return PTR_ERR(opp); + } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) + dev_pm_opp_put(opp); +#endif + + /* + * Only update if there is a change of frequency + */ + if (kbdev->current_nominal_freq == nominal_freq) { + *target_freq = nominal_freq; + return 0; + } + + opp_translate(kbdev, nominal_freq, &core_mask, freqs, volts); + +#ifdef CONFIG_REGULATOR + /* Regulators and clocks work in pairs: every clock has a regulator, + * and we never expect to have more regulators than clocks. + * + * We always need to increase the voltage before increasing + * the frequency of a regulator/clock pair, otherwise the clock + * wouldn't have enough power to perform the transition. + * + * It's always safer to decrease the frequency before decreasing + * voltage of a regulator/clock pair, otherwise the clock could have + * problems operating if it is deprived of the necessary power + * to sustain its current frequency (even if that happens for a short + * transition interval). + */ + for (i = 0; i < kbdev->nr_clocks; i++) { + if (kbdev->regulators[i] && + kbdev->current_voltages[i] != volts[i] && + kbdev->current_freqs[i] < freqs[i]) { + int err; + + err = regulator_set_voltage(kbdev->regulators[i], + volts[i], volts[i]); + if (!err) { + kbdev->current_voltages[i] = volts[i]; + } else { + dev_err(dev, "Failed to increase voltage (%d) (target %lu)\n", + err, volts[i]); + return err; + } + } + } +#endif + + for (i = 0; i < kbdev->nr_clocks; i++) { + if (kbdev->clocks[i]) { + int err; + + err = clk_set_rate(kbdev->clocks[i], freqs[i]); + if (!err) { + kbdev->current_freqs[i] = freqs[i]; + } else { + dev_err(dev, "Failed to set clock %lu (target %lu)\n", + freqs[i], *target_freq); + return err; + } + } + } + +#ifdef CONFIG_REGULATOR + for (i = 0; i < kbdev->nr_clocks; i++) { + if (kbdev->regulators[i] && + kbdev->current_voltages[i] != volts[i] && + kbdev->current_freqs[i] > freqs[i]) { + int err; + + err = regulator_set_voltage(kbdev->regulators[i], + volts[i], volts[i]); + if (!err) { + kbdev->current_voltages[i] = volts[i]; + } else { + dev_err(dev, "Failed to decrease voltage (%d) (target %lu)\n", + err, volts[i]); + return err; + } + } + } +#endif + + kbase_devfreq_set_core_mask(kbdev, core_mask); + + *target_freq = nominal_freq; + kbdev->current_nominal_freq = nominal_freq; + kbdev->current_core_mask = core_mask; + + KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)nominal_freq); + + return 0; +} + +void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq) +{ + unsigned long target_freq = freq; + + kbase_devfreq_target(kbdev->dev, &target_freq, 0); +} + +static int +kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + *freq = kbdev->current_nominal_freq; + + return 0; +} + +static int +kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + struct kbasep_pm_metrics diff; + + kbase_pm_get_dvfs_metrics(kbdev, &kbdev->last_devfreq_metrics, &diff); + + stat->busy_time = diff.time_busy; + stat->total_time = diff.time_busy + diff.time_idle; + stat->current_frequency = kbdev->current_nominal_freq; + stat->private_data = NULL; + + return 0; +} + +static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, + struct devfreq_dev_profile *dp) +{ + int count; + int i = 0; + unsigned long freq; + struct dev_pm_opp *opp; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + rcu_read_lock(); +#endif + count = dev_pm_opp_get_opp_count(kbdev->dev); +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + rcu_read_unlock(); +#endif + if (count < 0) + return count; + + dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), + GFP_KERNEL); + if (!dp->freq_table) + return -ENOMEM; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + rcu_read_lock(); +#endif + for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) { + opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq); + if (IS_ERR(opp)) + break; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) + dev_pm_opp_put(opp); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) */ + + dp->freq_table[i] = freq; + } +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + rcu_read_unlock(); +#endif + + if (count != i) + dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n", + count, i); + + dp->max_state = i; + + /* Have the lowest clock as suspend clock. + * It may be overridden by 'opp-mali-errata-1485982'. + */ + if (kbdev->pm.backend.gpu_clock_slow_down_wa) { + freq = 0; + opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq); + if (IS_ERR(opp)) { + dev_err(kbdev->dev, "failed to find slowest clock"); + return 0; + } + dev_info(kbdev->dev, "suspend clock %lu from slowest", freq); + kbdev->pm.backend.gpu_clock_suspend_freq = freq; + } + + return 0; +} + +static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev) +{ + struct devfreq_dev_profile *dp = &kbdev->devfreq_profile; + + kfree(dp->freq_table); +} + +static void kbase_devfreq_term_core_mask_table(struct kbase_device *kbdev) +{ + kfree(kbdev->devfreq_table); +} + +static void kbase_devfreq_exit(struct device *dev) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + kbase_devfreq_term_freq_table(kbdev); +} + +static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev, + struct device_node *node) +{ + u64 freq = 0; + int err = 0; + + /* Check if this node is the opp entry having 'opp-mali-errata-1485982' + * to get the suspend clock, otherwise skip it. + */ + if (!of_property_read_bool(node, "opp-mali-errata-1485982")) + return; + + /* In kbase DevFreq, the clock will be read from 'opp-hz' + * and translated into the actual clock by opp_translate. + * + * In customer DVFS, the clock will be read from 'opp-hz-real' + * for clk driver. If 'opp-hz-real' does not exist, + * read from 'opp-hz'. + */ + if (IS_ENABLED(CONFIG_MALI_DEVFREQ)) + err = of_property_read_u64(node, "opp-hz", &freq); + else { + if (of_property_read_u64(node, "opp-hz-real", &freq)) + err = of_property_read_u64(node, "opp-hz", &freq); + } + + if (WARN_ON(err || !freq)) + return; + + kbdev->pm.backend.gpu_clock_suspend_freq = freq; + dev_info(kbdev->dev, + "suspend clock %llu by opp-mali-errata-1485982", freq); +} + +static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) +{ +#if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF) + /* OPP table initialization requires at least the capability to get + * regulators and clocks from the device tree, as well as parsing + * arrays of unsigned integer values. + * + * The whole initialization process shall simply be skipped if the + * minimum capability is not available. + */ + return 0; +#else + struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node, + "operating-points-v2", 0); + struct device_node *node; + int i = 0; + int count; + u64 shader_present = kbdev->gpu_props.props.raw_props.shader_present; + + if (!opp_node) + return 0; + if (!of_device_is_compatible(opp_node, "operating-points-v2-mali")) + return 0; + + count = dev_pm_opp_get_opp_count(kbdev->dev); + kbdev->devfreq_table = kmalloc_array(count, + sizeof(struct kbase_devfreq_opp), GFP_KERNEL); + if (!kbdev->devfreq_table) + return -ENOMEM; + + for_each_available_child_of_node(opp_node, node) { + const void *core_count_p; + u64 core_mask, opp_freq, + real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; + int err; +#ifdef CONFIG_REGULATOR + u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS]; +#endif + + /* Read suspend clock from opp table */ + if (kbdev->pm.backend.gpu_clock_slow_down_wa) + kbasep_devfreq_read_suspend_clock(kbdev, node); + + err = of_property_read_u64(node, "opp-hz", &opp_freq); + if (err) { + dev_warn(kbdev->dev, "Failed to read opp-hz property with error %d\n", + err); + continue; + } + + +#if BASE_MAX_NR_CLOCKS_REGULATORS > 1 + err = of_property_read_u64_array(node, "opp-hz-real", + real_freqs, kbdev->nr_clocks); +#else + WARN_ON(kbdev->nr_clocks != 1); + err = of_property_read_u64(node, "opp-hz-real", real_freqs); +#endif + if (err < 0) { + dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d\n", + err); + continue; + } +#ifdef CONFIG_REGULATOR + err = of_property_read_u32_array(node, + "opp-microvolt", opp_volts, kbdev->nr_regulators); + if (err < 0) { + dev_warn(kbdev->dev, "Failed to read opp-microvolt property with error %d\n", + err); + continue; + } +#endif + + if (of_property_read_u64(node, "opp-core-mask", &core_mask)) + core_mask = shader_present; + if (core_mask != shader_present && corestack_driver_control) { + + dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n", + opp_freq); + continue; + } + + core_count_p = of_get_property(node, "opp-core-count", NULL); + if (core_count_p) { + u64 remaining_core_mask = + kbdev->gpu_props.props.raw_props.shader_present; + int core_count = be32_to_cpup(core_count_p); + + core_mask = 0; + + for (; core_count > 0; core_count--) { + int core = ffs(remaining_core_mask); + + if (!core) { + dev_err(kbdev->dev, "OPP has more cores than GPU\n"); + return -ENODEV; + } + + core_mask |= (1ull << (core-1)); + remaining_core_mask &= ~(1ull << (core-1)); + } + } + + if (!core_mask) { + dev_err(kbdev->dev, "OPP has invalid core mask of 0\n"); + return -ENODEV; + } + + kbdev->devfreq_table[i].opp_freq = opp_freq; + kbdev->devfreq_table[i].core_mask = core_mask; + if (kbdev->nr_clocks > 0) { + int j; + + for (j = 0; j < kbdev->nr_clocks; j++) + kbdev->devfreq_table[i].real_freqs[j] = + real_freqs[j]; + } +#ifdef CONFIG_REGULATOR + if (kbdev->nr_regulators > 0) { + int j; + + for (j = 0; j < kbdev->nr_regulators; j++) + kbdev->devfreq_table[i].opp_volts[j] = + opp_volts[j]; + } +#endif + + dev_info(kbdev->dev, "OPP %d : opp_freq=%llu core_mask=%llx\n", + i, opp_freq, core_mask); + + i++; + } + + kbdev->num_opps = i; + + return 0; +#endif /* KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE */ +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) + +static const char *kbase_devfreq_req_type_name(enum kbase_devfreq_work_type type) +{ + const char *p; + + switch (type) { + case DEVFREQ_WORK_NONE: + p = "devfreq_none"; + break; + case DEVFREQ_WORK_SUSPEND: + p = "devfreq_suspend"; + break; + case DEVFREQ_WORK_RESUME: + p = "devfreq_resume"; + break; + default: + p = "Unknown devfreq_type"; + } + return p; +} + +static void kbase_devfreq_suspend_resume_worker(struct work_struct *work) +{ + struct kbase_devfreq_queue_info *info = container_of(work, + struct kbase_devfreq_queue_info, work); + struct kbase_device *kbdev = container_of(info, struct kbase_device, + devfreq_queue); + unsigned long flags; + enum kbase_devfreq_work_type type, acted_type; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + type = kbdev->devfreq_queue.req_type; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + acted_type = kbdev->devfreq_queue.acted_type; + dev_dbg(kbdev->dev, "Worker handles queued req: %s (acted: %s)\n", + kbase_devfreq_req_type_name(type), + kbase_devfreq_req_type_name(acted_type)); + switch (type) { + case DEVFREQ_WORK_SUSPEND: + case DEVFREQ_WORK_RESUME: + if (type != acted_type) { + if (type == DEVFREQ_WORK_RESUME) + devfreq_resume_device(kbdev->devfreq); + else + devfreq_suspend_device(kbdev->devfreq); + dev_dbg(kbdev->dev, "Devfreq transition occured: %s => %s\n", + kbase_devfreq_req_type_name(acted_type), + kbase_devfreq_req_type_name(type)); + kbdev->devfreq_queue.acted_type = type; + } + break; + default: + WARN_ON(1); + } +} + +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */ + +void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, + enum kbase_devfreq_work_type work_type) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) + unsigned long flags; + + WARN_ON(work_type == DEVFREQ_WORK_NONE); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->devfreq_queue.req_type = work_type; + queue_work(kbdev->devfreq_queue.workq, &kbdev->devfreq_queue.work); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n", + kbase_devfreq_req_type_name(work_type)); +#endif +} + +static int kbase_devfreq_work_init(struct kbase_device *kbdev) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) + kbdev->devfreq_queue.req_type = DEVFREQ_WORK_NONE; + kbdev->devfreq_queue.acted_type = DEVFREQ_WORK_RESUME; + + kbdev->devfreq_queue.workq = alloc_ordered_workqueue("devfreq_workq", 0); + if (!kbdev->devfreq_queue.workq) + return -ENOMEM; + + INIT_WORK(&kbdev->devfreq_queue.work, + kbase_devfreq_suspend_resume_worker); +#endif + return 0; +} + +static void kbase_devfreq_work_term(struct kbase_device *kbdev) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) + destroy_workqueue(kbdev->devfreq_queue.workq); +#endif +} + +int kbase_devfreq_init(struct kbase_device *kbdev) +{ + struct devfreq_dev_profile *dp; + int err; + unsigned int i; + + if (kbdev->nr_clocks == 0) { + dev_err(kbdev->dev, "Clock not available for devfreq\n"); + return -ENODEV; + } + + for (i = 0; i < kbdev->nr_clocks; i++) { + if (kbdev->clocks[i]) + kbdev->current_freqs[i] = + clk_get_rate(kbdev->clocks[i]); + else + kbdev->current_freqs[i] = 0; + } + kbdev->current_nominal_freq = kbdev->current_freqs[0]; + + dp = &kbdev->devfreq_profile; + + dp->initial_freq = kbdev->current_freqs[0]; + dp->polling_ms = 100; + dp->target = kbase_devfreq_target; + dp->get_dev_status = kbase_devfreq_status; + dp->get_cur_freq = kbase_devfreq_cur_freq; + dp->exit = kbase_devfreq_exit; + + if (kbase_devfreq_init_freq_table(kbdev, dp)) + return -EFAULT; + + if (dp->max_state > 0) { + /* Record the maximum frequency possible */ + kbdev->gpu_props.props.core_props.gpu_freq_khz_max = + dp->freq_table[0] / 1000; + }; + + err = kbase_devfreq_init_core_mask_table(kbdev); + if (err) { + kbase_devfreq_term_freq_table(kbdev); + return err; + } + + /* Initialise devfreq suspend/resume workqueue */ + err = kbase_devfreq_work_init(kbdev); + if (err) { + kbase_devfreq_term_freq_table(kbdev); + dev_err(kbdev->dev, "Devfreq initialization failed"); + return err; + } + + kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, + "simple_ondemand", NULL); + if (IS_ERR(kbdev->devfreq)) { + err = PTR_ERR(kbdev->devfreq); + kbase_devfreq_work_term(kbdev); + kbase_devfreq_term_freq_table(kbdev); + return err; + } + + /* devfreq_add_device only copies a few of kbdev->dev's fields, so + * set drvdata explicitly so IPA models can access kbdev. */ + dev_set_drvdata(&kbdev->devfreq->dev, kbdev); + + err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); + if (err) { + dev_err(kbdev->dev, + "Failed to register OPP notifier (%d)\n", err); + goto opp_notifier_failed; + } + +#ifdef CONFIG_DEVFREQ_THERMAL + err = kbase_ipa_init(kbdev); + if (err) { + dev_err(kbdev->dev, "IPA initialization failed\n"); + goto cooling_failed; + } + + kbdev->devfreq_cooling = of_devfreq_cooling_register_power( + kbdev->dev->of_node, + kbdev->devfreq, + &kbase_ipa_power_model_ops); + if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) { + err = PTR_ERR(kbdev->devfreq_cooling); + dev_err(kbdev->dev, + "Failed to register cooling device (%d)\n", + err); + goto cooling_failed; + } +#endif + + return 0; + +#ifdef CONFIG_DEVFREQ_THERMAL +cooling_failed: + devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); +#endif /* CONFIG_DEVFREQ_THERMAL */ +opp_notifier_failed: + if (devfreq_remove_device(kbdev->devfreq)) + dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); + else + kbdev->devfreq = NULL; + + kbase_devfreq_work_term(kbdev); + + return err; +} + +void kbase_devfreq_term(struct kbase_device *kbdev) +{ + int err; + + dev_dbg(kbdev->dev, "Term Mali devfreq\n"); + +#ifdef CONFIG_DEVFREQ_THERMAL + if (kbdev->devfreq_cooling) + devfreq_cooling_unregister(kbdev->devfreq_cooling); + + kbase_ipa_term(kbdev); +#endif + + devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); + + err = devfreq_remove_device(kbdev->devfreq); + if (err) + dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); + else + kbdev->devfreq = NULL; + + kbase_devfreq_term_core_mask_table(kbdev); + + kbase_devfreq_work_term(kbdev); +} diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_devfreq.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_devfreq.h new file mode 100644 index 000000000000..8c976b29ec38 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_devfreq.h @@ -0,0 +1,46 @@ +/* + * + * (C) COPYRIGHT 2014, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _BASE_DEVFREQ_H_ +#define _BASE_DEVFREQ_H_ + +int kbase_devfreq_init(struct kbase_device *kbdev); + +void kbase_devfreq_term(struct kbase_device *kbdev); + +/** + * kbase_devfreq_force_freq - Set GPU frequency on L2 power on/off. + * @kbdev: Device pointer + * @freq: GPU frequency in HZ to be set when + * MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE is enabled + */ +void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq); + +/** + * kbase_devfreq_enqueue_work - Enqueue a work item for suspend/resume devfreq. + * @kbdev: Device pointer + * @work_type: The type of the devfreq work item, i.e. suspend or resume + */ +void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, + enum kbase_devfreq_work_type work_type); + +#endif /* _BASE_DEVFREQ_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_device_hw.c new file mode 100644 index 000000000000..d4e2957f8b24 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_device_hw.c @@ -0,0 +1,391 @@ +/* + * + * (C) COPYRIGHT 2014-2016, 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * + */ +#include +#include +#include +#include +#include +#include +#include + +#if !defined(CONFIG_MALI_NO_MALI) + +#ifdef CONFIG_DEBUG_FS + +int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size) +{ + struct kbase_io_access *old_buf; + struct kbase_io_access *new_buf; + unsigned long flags; + + if (!new_size) + goto out_err; /* The new size must not be 0 */ + + new_buf = vmalloc(new_size * sizeof(*h->buf)); + if (!new_buf) + goto out_err; + + spin_lock_irqsave(&h->lock, flags); + + old_buf = h->buf; + + /* Note: we won't bother with copying the old data over. The dumping + * logic wouldn't work properly as it relies on 'count' both as a + * counter and as an index to the buffer which would have changed with + * the new array. This is a corner case that we don't need to support. + */ + h->count = 0; + h->size = new_size; + h->buf = new_buf; + + spin_unlock_irqrestore(&h->lock, flags); + + vfree(old_buf); + + return 0; + +out_err: + return -1; +} + + +int kbase_io_history_init(struct kbase_io_history *h, u16 n) +{ + h->enabled = false; + spin_lock_init(&h->lock); + h->count = 0; + h->size = 0; + h->buf = NULL; + if (kbase_io_history_resize(h, n)) + return -1; + + return 0; +} + + +void kbase_io_history_term(struct kbase_io_history *h) +{ + vfree(h->buf); + h->buf = NULL; +} + + +/* kbase_io_history_add - add new entry to the register access history + * + * @h: Pointer to the history data structure + * @addr: Register address + * @value: The value that is either read from or written to the register + * @write: 1 if it's a register write, 0 if it's a read + */ +static void kbase_io_history_add(struct kbase_io_history *h, + void __iomem const *addr, u32 value, u8 write) +{ + struct kbase_io_access *io; + unsigned long flags; + + spin_lock_irqsave(&h->lock, flags); + + io = &h->buf[h->count % h->size]; + io->addr = (uintptr_t)addr | write; + io->value = value; + ++h->count; + /* If count overflows, move the index by the buffer size so the entire + * buffer will still be dumped later */ + if (unlikely(!h->count)) + h->count = h->size; + + spin_unlock_irqrestore(&h->lock, flags); +} + + +void kbase_io_history_dump(struct kbase_device *kbdev) +{ + struct kbase_io_history *const h = &kbdev->io_history; + u16 i; + size_t iters; + unsigned long flags; + + if (!unlikely(h->enabled)) + return; + + spin_lock_irqsave(&h->lock, flags); + + dev_err(kbdev->dev, "Register IO History:"); + iters = (h->size > h->count) ? h->count : h->size; + dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters, + h->count); + for (i = 0; i < iters; ++i) { + struct kbase_io_access *io = + &h->buf[(h->count - iters + i) % h->size]; + char const access = (io->addr & 1) ? 'w' : 'r'; + + dev_err(kbdev->dev, "%6i: %c: reg 0x%016lx val %08x\n", i, + access, (unsigned long)(io->addr & ~0x1), io->value); + } + + spin_unlock_irqrestore(&h->lock, flags); +} + + +#endif /* CONFIG_DEBUG_FS */ + + +void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) +{ + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + + writel(value, kbdev->reg + offset); + +#ifdef CONFIG_DEBUG_FS + if (unlikely(kbdev->io_history.enabled)) + kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, + value, 1); +#endif /* CONFIG_DEBUG_FS */ + dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value); +} + +KBASE_EXPORT_TEST_API(kbase_reg_write); + +u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) +{ + u32 val; + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + + val = readl(kbdev->reg + offset); + +#ifdef CONFIG_DEBUG_FS + if (unlikely(kbdev->io_history.enabled)) + kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, + val, 0); +#endif /* CONFIG_DEBUG_FS */ + dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val); + + return val; +} + +KBASE_EXPORT_TEST_API(kbase_reg_read); + +bool kbase_is_gpu_lost(struct kbase_device *kbdev) +{ + u32 val; + + val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); + + return val == 0; +} +#endif /* !defined(CONFIG_MALI_NO_MALI) */ + +/** + * kbase_report_gpu_fault - Report a GPU fault. + * @kbdev: Kbase device pointer + * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS + * was also set + * + * This function is called from the interrupt handler when a GPU fault occurs. + * It reports the details of the fault using dev_warn(). + */ +static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) +{ + u32 status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)); + u64 address = (u64) kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; + + address |= kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); + + /* MALI_SEC_INTEGRATION */ + if (kbdev->vendor_callbacks->update_status) + kbdev->vendor_callbacks->update_status(kbdev, "completion_code", status); + + dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", + status, + kbase_gpu_exception_name(status & 0xFF), + address); + if (multiple) + dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); +} + +static bool kbase_gpu_fault_interrupt(struct kbase_device *kbdev, int multiple) +{ + kbase_report_gpu_fault(kbdev, multiple); + return false; +} + +void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev) +{ + u32 irq_mask; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbdev->cache_clean_in_progress) { + /* If this is called while another clean is in progress, we + * can't rely on the current one to flush any new changes in + * the cache. Instead, trigger another cache clean immediately + * after this one finishes. + */ + kbdev->cache_clean_queued = true; + return; + } + + /* Enable interrupt */ + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask | CLEAN_CACHES_COMPLETED); + + KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES); + + kbdev->cache_clean_in_progress = true; +} + +void kbase_gpu_start_cache_clean(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_gpu_start_cache_clean_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->cache_clean_queued = false; + kbdev->cache_clean_in_progress = false; + wake_up(&kbdev->cache_clean_wait); +} + +static void kbase_clean_caches_done(struct kbase_device *kbdev) +{ + u32 irq_mask; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (kbdev->cache_clean_queued) { + kbdev->cache_clean_queued = false; + + KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES); + } else { + /* Disable interrupt */ + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask & ~CLEAN_CACHES_COMPLETED); + + kbase_gpu_cache_clean_wait_complete(kbdev); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +static inline bool get_cache_clean_flag(struct kbase_device *kbdev) +{ + bool cache_clean_in_progress; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + cache_clean_in_progress = kbdev->cache_clean_in_progress; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return cache_clean_in_progress; +} + +void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev) +{ + while (get_cache_clean_flag(kbdev)) { + wait_event_interruptible(kbdev->cache_clean_wait, + !kbdev->cache_clean_in_progress); + } +} + +int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, + unsigned int wait_timeout_ms) +{ + long remaining = msecs_to_jiffies(wait_timeout_ms); + + while (remaining && get_cache_clean_flag(kbdev)) { + remaining = wait_event_timeout(kbdev->cache_clean_wait, + !kbdev->cache_clean_in_progress, + remaining); + } + + return (remaining ? 0 : -ETIMEDOUT); +} + +void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) +{ + bool clear_gpu_fault = false; + + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, val); + if (val & GPU_FAULT) + clear_gpu_fault = kbase_gpu_fault_interrupt(kbdev, + val & MULTIPLE_GPU_FAULTS); + + if (val & RESET_COMPLETED) + kbase_pm_reset_done(kbdev); + + if (val & PRFCNT_SAMPLE_COMPLETED) + kbase_instr_hwcnt_sample_done(kbdev); + + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val); + + /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must + * be called after the IRQ has been cleared. This is because it might + * trigger further power transitions and we don't want to miss the + * interrupt raised to notify us that these further transitions have + * finished. The same applies to kbase_clean_caches_done() - if another + * clean was queued, it might trigger another clean, which might + * generate another interrupt which shouldn't be missed. + */ + + if (val & CLEAN_CACHES_COMPLETED) + kbase_clean_caches_done(kbdev); + + if (val & POWER_CHANGED_ALL) { + kbase_pm_power_changed(kbdev); + } else if (val & CLEAN_CACHES_COMPLETED) { + /* If cache line evict messages can be lost when shader cores + * power down then we need to flush the L2 cache before powering + * down cores. When the flush completes, the shaders' state + * machine needs to be re-invoked to proceed with powering down + * cores. + */ + if (kbdev->pm.backend.l2_always_on || + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) + kbase_pm_power_changed(kbdev); + } + + + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); +} diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_device_internal.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_device_internal.h new file mode 100644 index 000000000000..2e1d0112172e --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_device_internal.h @@ -0,0 +1,127 @@ +/* + * + * (C) COPYRIGHT 2014,2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Backend-specific HW access device APIs + */ + +#ifndef _KBASE_DEVICE_INTERNAL_H_ +#define _KBASE_DEVICE_INTERNAL_H_ + +/** + * kbase_reg_write - write to GPU register + * @kbdev: Kbase device pointer + * @offset: Offset of register + * @value: Value to write + * + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). + */ +void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value); + +/** + * kbase_reg_read - read from GPU register + * @kbdev: Kbase device pointer + * @offset: Offset of register + * + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). + * + * Return: Value in desired register + */ +u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset); + +/** + * kbase_is_gpu_lost() - Has the GPU been lost. + * @kbdev: Kbase device pointer + * + * This function will return true if the GPU has been lost. + * When this happens register reads will be zero. A zero GPU_ID is + * invalid so this is used to detect GPU_LOST + * + * Return: True if GPU LOST + */ +bool kbase_is_gpu_lost(struct kbase_device *kbdev); + +/** + * kbase_gpu_start_cache_clean - Start a cache clean + * @kbdev: Kbase device + * + * Issue a cache clean and invalidate command to hardware. This function will + * take hwaccess_lock. + */ +void kbase_gpu_start_cache_clean(struct kbase_device *kbdev); + +/** + * kbase_gpu_start_cache_clean_nolock - Start a cache clean + * @kbdev: Kbase device + * + * Issue a cache clean and invalidate command to hardware. hwaccess_lock + * must be held by the caller. + */ +void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev); + +/** + * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish + * @kbdev: Kbase device + * + * This function will take hwaccess_lock, and may sleep. + */ +void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev); + +/** + * kbase_gpu_wait_cache_clean_timeout - Wait for certain time for cache + * cleaning to finish + * @kbdev: Kbase device + * @wait_timeout_ms: Time, in milli seconds, to wait for cache clean to complete. + * + * This function will take hwaccess_lock, and may sleep. This is supposed to be + * called from paths (like GPU reset) where an indefinite wait for the completion + * of cache clean operation can cause deadlock, as the operation may never + * complete. + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, + unsigned int wait_timeout_ms); + +/** + * kbase_gpu_cache_clean_wait_complete - Called after the cache cleaning is + * finished. Would also be called after + * the GPU reset. + * @kbdev: Kbase device + * + * Caller must hold the hwaccess_lock. + */ +void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev); + +/** + * kbase_gpu_interrupt - GPU interrupt handler + * @kbdev: Kbase device pointer + * @val: The value of the GPU IRQ status register which triggered the call + * + * This function is called from the interrupt handler when a GPU irq is to be + * handled. + */ +void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val); + +#endif /* _KBASE_DEVICE_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_gpuprops_backend.c new file mode 100644 index 000000000000..352afa11907a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_gpuprops_backend.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel property query backend APIs + */ + +#include +#include +#include +#include + +int kbase_backend_gpuprops_get(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump) +{ + int i; + struct kbase_gpuprops_regdump registers; + + /* Fill regdump with the content of the relevant registers */ + registers.gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); + + registers.l2_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_FEATURES)); + registers.core_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(CORE_FEATURES)); + registers.tiler_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_FEATURES)); + registers.mem_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(MEM_FEATURES)); + registers.mmu_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(MMU_FEATURES)); + registers.as_present = kbase_reg_read(kbdev, + GPU_CONTROL_REG(AS_PRESENT)); + registers.js_present = kbase_reg_read(kbdev, + GPU_CONTROL_REG(JS_PRESENT)); + + for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) + registers.js_features[i] = kbase_reg_read(kbdev, + GPU_CONTROL_REG(JS_FEATURES_REG(i))); + + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + registers.texture_features[i] = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i))); + + registers.thread_max_threads = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_MAX_THREADS)); + registers.thread_max_workgroup_size = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE)); + registers.thread_max_barrier_size = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE)); + registers.thread_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_FEATURES)); + registers.thread_tls_alloc = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_TLS_ALLOC)); + + registers.shader_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_PRESENT_LO)); + registers.shader_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_PRESENT_HI)); + + registers.tiler_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_PRESENT_LO)); + registers.tiler_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_PRESENT_HI)); + + registers.l2_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_PRESENT_LO)); + registers.l2_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_PRESENT_HI)); + + registers.stack_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(STACK_PRESENT_LO)); + registers.stack_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(STACK_PRESENT_HI)); + + if (!kbase_is_gpu_lost(kbdev)) { + *regdump = registers; + return 0; + } else + return -EIO; +} + +int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump) +{ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) { + u32 coherency_features; + + /* Ensure we can access the GPU registers */ + kbase_pm_register_access_enable(kbdev); + + coherency_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(COHERENCY_FEATURES)); + + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + regdump->coherency_features = coherency_features; + + /* We're done accessing the GPU registers for now. */ + kbase_pm_register_access_disable(kbdev); + } else { + /* Pre COHERENCY_FEATURES we only supported ACE_LITE */ + regdump->coherency_features = + COHERENCY_FEATURE_BIT(COHERENCY_NONE) | + COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); + } + + return 0; +} + +int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump) +{ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { + u32 l2_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_FEATURES)); + + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + regdump->l2_features = l2_features; + } + + return 0; +} diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_instr_backend.c new file mode 100644 index 000000000000..f9c2ec7c4135 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_instr_backend.c @@ -0,0 +1,411 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * GPU backend instrumentation APIs. + */ + +#include +#include +#include +#include +#include + + +int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_instr_hwcnt_enable *enable) +{ + unsigned long flags; + int err = -EINVAL; + u32 irq_mask; + u32 prfcnt_config; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* alignment failure */ + if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1))) + goto out_err; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { + /* Instrumentation is already enabled */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + goto out_err; + } + + /* Enable interrupt */ + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | + PRFCNT_SAMPLE_COMPLETED); + + /* In use, this context is the owner */ + kbdev->hwcnt.kctx = kctx; + /* Remember the dump address so we can reprogram it later */ + kbdev->hwcnt.addr = enable->dump_buffer; + kbdev->hwcnt.addr_bytes = enable->dump_buffer_bytes; + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Configure */ + prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + if (kbdev->hwcnt.backend.use_secondary_override) +#else + if (enable->use_secondary) +#endif + prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), + prfcnt_config | PRFCNT_CONFIG_MODE_OFF); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), + enable->dump_buffer & 0xFFFFFFFF); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), + enable->dump_buffer >> 32); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), + enable->fe_bm); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), + enable->shader_bm); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), + enable->mmu_l2_bm); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), + enable->tiler_bm); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), + prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + err = 0; + + dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); + return err; + out_err: + return err; +} + +int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) +{ + unsigned long flags, pm_flags; + int err = -EINVAL; + u32 irq_mask; + struct kbase_device *kbdev = kctx->kbdev; + + while (1) { + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) { + /* Instrumentation is not enabled */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + goto out; + } + + if (kbdev->hwcnt.kctx != kctx) { + /* Instrumentation has been setup for another context */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + goto out; + } + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) + break; + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + + /* Ongoing dump/setup - wait for its completion */ + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + } + + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; + kbdev->hwcnt.backend.triggered = 0; + + /* Disable interrupt */ + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask & ~PRFCNT_SAMPLE_COMPLETED); + + /* Disable the counters */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0); + + kbdev->hwcnt.kctx = NULL; + kbdev->hwcnt.addr = 0ULL; + kbdev->hwcnt.addr_bytes = 0ULL; + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", + kctx); + + err = 0; + + out: + return err; +} + +int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) +{ + unsigned long flags; + int err = -EINVAL; + struct kbase_device *kbdev = kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.kctx != kctx) { + /* The instrumentation has been setup for another context */ + goto unlock; + } + + if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) { + /* HW counters are disabled or another dump is ongoing, or we're + * resetting */ + goto unlock; + } + + kbdev->hwcnt.backend.triggered = 0; + + /* Mark that we're dumping - the PF handler can signal that we faulted + */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; + + + /* Reconfigure the dump address */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), + kbdev->hwcnt.addr & 0xFFFFFFFF); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), + kbdev->hwcnt.addr >> 32); + + /* Start dumping */ + KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, + kbdev->hwcnt.addr); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_PRFCNT_SAMPLE); + + dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx); + + err = 0; + + unlock: + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); + +bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, + bool * const success) +{ + unsigned long flags; + bool complete = false; + struct kbase_device *kbdev = kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) { + *success = true; + complete = true; + } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { + *success = false; + complete = true; + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + } + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + return complete; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete); + +void kbasep_cache_clean_worker(struct work_struct *data) +{ + struct kbase_device *kbdev; + unsigned long flags, pm_flags; + + kbdev = container_of(data, struct kbase_device, + hwcnt.backend.cache_clean_work); + + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + /* Clean and invalidate the caches so we're sure the mmu tables for the + * dump buffer is valid. + */ + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_REQUEST_CLEAN); + kbase_gpu_start_cache_clean_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + + kbase_gpu_wait_cache_clean(kbdev); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_REQUEST_CLEAN); + /* All finished and idle */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +} + + +void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) { + if (kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) { + /* All finished and idle */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + } else { + int ret; + /* Always clean and invalidate the cache after a successful dump + */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; + ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, + &kbdev->hwcnt.backend.cache_clean_work); + KBASE_DEBUG_ASSERT(ret); + } + } + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +} + +int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + unsigned long flags; + int err; + + /* Wait for dump & cache clean to complete */ + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { + err = -EINVAL; + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + } else { + /* Dump done */ + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_IDLE); + err = 0; + } + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + return err; +} + +int kbase_instr_hwcnt_clear(struct kbase_context *kctx) +{ + unsigned long flags; + int err = -EINVAL; + struct kbase_device *kbdev = kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + /* Check it's the context previously set up and we're not already + * dumping */ + if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != + KBASE_INSTR_STATE_IDLE) + goto out; + + /* Clear the counters */ + KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_PRFCNT_CLEAR); + + err = 0; + +out: + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear); + +int kbase_instr_backend_init(struct kbase_device *kbdev) +{ + int ret = 0; + + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; + + init_waitqueue_head(&kbdev->hwcnt.backend.wait); + INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work, + kbasep_cache_clean_worker); + + + kbdev->hwcnt.backend.triggered = 0; + +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + kbdev->hwcnt.backend.use_secondary_override = false; +#endif + + kbdev->hwcnt.backend.cache_clean_wq = + alloc_workqueue("Mali cache cleaning workqueue", 0, 1); + if (NULL == kbdev->hwcnt.backend.cache_clean_wq) + ret = -EINVAL; + + return ret; +} + +void kbase_instr_backend_term(struct kbase_device *kbdev) +{ + destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq); +} + +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_bool("hwcnt_use_secondary", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, + &kbdev->hwcnt.backend.use_secondary_override); +} +#endif diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_instr_defs.h new file mode 100644 index 000000000000..99309685c4ff --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_instr_defs.h @@ -0,0 +1,60 @@ +/* + * + * (C) COPYRIGHT 2014, 2016, 2018, 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Backend-specific instrumentation definitions + */ + +#ifndef _KBASE_INSTR_DEFS_H_ +#define _KBASE_INSTR_DEFS_H_ + +/* + * Instrumentation State Machine States + */ +enum kbase_instr_state { + /* State where instrumentation is not active */ + KBASE_INSTR_STATE_DISABLED = 0, + /* State machine is active and ready for a command. */ + KBASE_INSTR_STATE_IDLE, + /* Hardware is currently dumping a frame. */ + KBASE_INSTR_STATE_DUMPING, + /* We've requested a clean to occur on a workqueue */ + KBASE_INSTR_STATE_REQUEST_CLEAN, + /* An error has occured during DUMPING (page fault). */ + KBASE_INSTR_STATE_FAULT +}; + +/* Structure used for instrumentation and HW counters dumping */ +struct kbase_instr_backend { + wait_queue_head_t wait; + int triggered; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + bool use_secondary_override; +#endif + + enum kbase_instr_state state; + struct workqueue_struct *cache_clean_wq; + struct work_struct cache_clean_work; +}; + +#endif /* _KBASE_INSTR_DEFS_H_ */ + diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_instr_internal.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_instr_internal.h new file mode 100644 index 000000000000..2254b9f30d02 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_instr_internal.h @@ -0,0 +1,44 @@ +/* + * + * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Backend-specific HW access instrumentation APIs + */ + +#ifndef _KBASE_INSTR_INTERNAL_H_ +#define _KBASE_INSTR_INTERNAL_H_ + +/** + * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning + * @data: a &struct work_struct + */ +void kbasep_cache_clean_worker(struct work_struct *data); + +/** + * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received + * @kbdev: Kbase device + */ +void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev); + +#endif /* _KBASE_INSTR_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_irq_internal.h new file mode 100644 index 000000000000..ca3c048b637a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_irq_internal.h @@ -0,0 +1,44 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Backend specific IRQ APIs + */ + +#ifndef _KBASE_IRQ_INTERNAL_H_ +#define _KBASE_IRQ_INTERNAL_H_ + +int kbase_install_interrupts(struct kbase_device *kbdev); + +void kbase_release_interrupts(struct kbase_device *kbdev); + +/** + * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed + * execution + * @kbdev: The kbase device + */ +void kbase_synchronize_irqs(struct kbase_device *kbdev); + +int kbasep_common_test_interrupt_handlers( + struct kbase_device * const kbdev); + +#endif /* _KBASE_IRQ_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_irq_linux.c new file mode 100644 index 000000000000..d0b5c2ca77a3 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_irq_linux.c @@ -0,0 +1,517 @@ +/* + * + * (C) COPYRIGHT 2014-2016,2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include + +#include + +#if !defined(CONFIG_MALI_NO_MALI) + +/* GPU IRQ Tags */ +#define JOB_IRQ_TAG 0 +#define MMU_IRQ_TAG 1 +#define GPU_IRQ_TAG 2 + +static void *kbase_tag(void *ptr, u32 tag) +{ + return (void *)(((uintptr_t) ptr) | tag); +} + +static void *kbase_untag(void *ptr) +{ + return (void *)(((uintptr_t) ptr) & ~3); +} + +static irqreturn_t kbase_job_irq_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + /* MALI_SEC_INTEGRAION */ + KBASE_KTRACE_ADD(kbdev, LSI_JM_IRQ, NULL, 0); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); + +#ifdef CONFIG_MALI_DEBUG + if (!kbdev->pm.backend.driver_ready_for_irqs) + dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", + __func__, irq, val); +#endif /* CONFIG_MALI_DEBUG */ + + if (!val) { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return IRQ_NONE; + } + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_job_done(kbdev, val); + + /* MALI_SEC_INTEGRAION */ + KBASE_KTRACE_ADD(kbdev, LSI_JM_IRQ_E, NULL, val); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return IRQ_HANDLED; +} + +static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + /* MALI_SEC_INTEGRAION */ + KBASE_KTRACE_ADD(kbdev, LSI_MMU_IRQ, NULL, 0); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return IRQ_NONE; + } + + atomic_inc(&kbdev->faults_pending); + + val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); + +#ifdef CONFIG_MALI_DEBUG + if (!kbdev->pm.backend.driver_ready_for_irqs) + dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", + __func__, irq, val); +#endif /* CONFIG_MALI_DEBUG */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!val) { + atomic_dec(&kbdev->faults_pending); + return IRQ_NONE; + } + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_mmu_interrupt(kbdev, val); + + atomic_dec(&kbdev->faults_pending); + + /* MALI_SEC_INTEGRAION */ + KBASE_KTRACE_ADD(kbdev, LSI_MMU_IRQ_E, NULL, val); + + return IRQ_HANDLED; +} + +static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + /* MALI_SEC_INTEGRAION */ + KBASE_KTRACE_ADD(kbdev, LSI_GPU_IRQ, NULL, 0); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); + +#ifdef CONFIG_MALI_DEBUG + if (!kbdev->pm.backend.driver_ready_for_irqs) + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", + __func__, irq, val); +#endif /* CONFIG_MALI_DEBUG */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_gpu_interrupt(kbdev, val); + + /* MALI_SEC_INTEGRAION */ + KBASE_KTRACE_ADD(kbdev, LSI_GPU_IRQ_E, NULL, val); + + return IRQ_HANDLED; +} + +static irq_handler_t kbase_handler_table[] = { + [JOB_IRQ_TAG] = kbase_job_irq_handler, + [MMU_IRQ_TAG] = kbase_mmu_irq_handler, + [GPU_IRQ_TAG] = kbase_gpu_irq_handler, +}; + +#ifdef CONFIG_MALI_DEBUG +#define JOB_IRQ_HANDLER JOB_IRQ_TAG +#define MMU_IRQ_HANDLER MMU_IRQ_TAG +#define GPU_IRQ_HANDLER GPU_IRQ_TAG + +/** + * kbase_gpu_irq_test_handler - Variant (for test) of kbase_gpu_irq_handler() + * @irq: IRQ number + * @data: Data associated with this IRQ (i.e. kbdev) + * @val: Value of the GPU_CONTROL_REG(GPU_IRQ_STATUS) + * + * Handle the GPU device interrupt source requests reflected in the + * given source bit-pattern. The test code caller is responsible for + * undertaking the required device power maintenace. + * + * Return: IRQ_HANDLED if the requests are from the GPU device, + * IRQ_NONE otherwise + */ +irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val) +{ + struct kbase_device *kbdev = kbase_untag(data); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_gpu_interrupt(kbdev, val); + + return IRQ_HANDLED; +} + +KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler); + +/** + * kbase_set_custom_irq_handler - Set a custom IRQ handler + * @kbdev: Device for which the handler is to be registered + * @custom_handler: Handler to be registered + * @irq_type: Interrupt type + * + * Registers given interrupt handler for requested interrupt type + * In the case where irq handler is not specified, the default handler shall be + * registered + * + * Return: 0 case success, error code otherwise + */ +int kbase_set_custom_irq_handler(struct kbase_device *kbdev, + irq_handler_t custom_handler, + int irq_type) +{ + int result = 0; + irq_handler_t requested_irq_handler = NULL; + + KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) && + (GPU_IRQ_HANDLER >= irq_type)); + + /* Release previous handler */ + if (kbdev->irqs[irq_type].irq) + free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type)); + + requested_irq_handler = (NULL != custom_handler) ? custom_handler : + kbase_handler_table[irq_type]; + + if (0 != request_irq(kbdev->irqs[irq_type].irq, + requested_irq_handler, + kbdev->irqs[irq_type].flags | IRQF_SHARED, + dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) { + result = -EINVAL; + dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", + kbdev->irqs[irq_type].irq, irq_type); +#ifdef CONFIG_SPARSE_IRQ + dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); +#endif /* CONFIG_SPARSE_IRQ */ + } + + return result; +} + +KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler); + +/* test correct interrupt assigment and reception by cpu */ +struct kbasep_irq_test { + struct hrtimer timer; + wait_queue_head_t wait; + int triggered; + u32 timeout; +}; + +static struct kbasep_irq_test kbasep_irq_test_data; + +#define IRQ_TEST_TIMEOUT 500 + +static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbasep_irq_test_data.triggered = 1; + wake_up(&kbasep_irq_test_data.wait); + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); + + return IRQ_HANDLED; +} + +static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbasep_irq_test_data.triggered = 1; + wake_up(&kbasep_irq_test_data.wait); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val); + + return IRQ_HANDLED; +} + +static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer) +{ + struct kbasep_irq_test *test_data = container_of(timer, + struct kbasep_irq_test, timer); + + test_data->timeout = 1; + test_data->triggered = 1; + wake_up(&test_data->wait); + return HRTIMER_NORESTART; +} + +static int kbasep_common_test_interrupt( + struct kbase_device * const kbdev, u32 tag) +{ + int err = 0; + irq_handler_t test_handler; + + u32 old_mask_val; + u16 mask_offset; + u16 rawstat_offset; + + switch (tag) { + case JOB_IRQ_TAG: + test_handler = kbase_job_irq_test_handler; + rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT); + mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK); + break; + case MMU_IRQ_TAG: + test_handler = kbase_mmu_irq_test_handler; + rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT); + mask_offset = MMU_REG(MMU_IRQ_MASK); + break; + case GPU_IRQ_TAG: + /* already tested by pm_driver - bail out */ + default: + return 0; + } + + /* store old mask */ + old_mask_val = kbase_reg_read(kbdev, mask_offset); + /* mask interrupts */ + kbase_reg_write(kbdev, mask_offset, 0x0); + + if (kbdev->irqs[tag].irq) { + /* release original handler and install test handler */ + if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) { + err = -EINVAL; + } else { + kbasep_irq_test_data.timeout = 0; + hrtimer_init(&kbasep_irq_test_data.timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); + kbasep_irq_test_data.timer.function = + kbasep_test_interrupt_timeout; + + /* trigger interrupt */ + kbase_reg_write(kbdev, mask_offset, 0x1); + kbase_reg_write(kbdev, rawstat_offset, 0x1); + + hrtimer_start(&kbasep_irq_test_data.timer, + HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), + HRTIMER_MODE_REL); + + wait_event(kbasep_irq_test_data.wait, + kbasep_irq_test_data.triggered != 0); + + if (kbasep_irq_test_data.timeout != 0) { + dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n", + kbdev->irqs[tag].irq, tag); + err = -EINVAL; + } else { + dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n", + kbdev->irqs[tag].irq, tag); + } + + hrtimer_cancel(&kbasep_irq_test_data.timer); + kbasep_irq_test_data.triggered = 0; + + /* mask interrupts */ + kbase_reg_write(kbdev, mask_offset, 0x0); + + /* release test handler */ + free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag)); + } + + /* restore original interrupt */ + if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag], + kbdev->irqs[tag].flags | IRQF_SHARED, + dev_name(kbdev->dev), kbase_tag(kbdev, tag))) { + dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n", + kbdev->irqs[tag].irq, tag); + err = -EINVAL; + } + } + /* restore old mask */ + kbase_reg_write(kbdev, mask_offset, old_mask_val); + + return err; +} + +int kbasep_common_test_interrupt_handlers( + struct kbase_device * const kbdev) +{ + int err; + + init_waitqueue_head(&kbasep_irq_test_data.wait); + kbasep_irq_test_data.triggered = 0; + + /* A suspend won't happen during startup/insmod */ + kbase_pm_context_active(kbdev); + + err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG); + if (err) { + dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n"); + goto out; + } + + err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG); + if (err) { + dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n"); + goto out; + } + + dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n"); + + out: + kbase_pm_context_idle(kbdev); + + return err; +} +#endif /* CONFIG_MALI_DEBUG */ + +int kbase_install_interrupts(struct kbase_device *kbdev) +{ + u32 nr = ARRAY_SIZE(kbase_handler_table); + int err; + u32 i; + + for (i = 0; i < nr; i++) { + err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i], + kbdev->irqs[i].flags | IRQF_SHARED, + dev_name(kbdev->dev), + kbase_tag(kbdev, i)); + if (err) { + dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", + kbdev->irqs[i].irq, i); +#ifdef CONFIG_SPARSE_IRQ + dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); +#endif /* CONFIG_SPARSE_IRQ */ + goto release; + } + } + + return 0; + + release: + while (i-- > 0) + free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); + + return err; +} + +void kbase_release_interrupts(struct kbase_device *kbdev) +{ + u32 nr = ARRAY_SIZE(kbase_handler_table); + u32 i; + + for (i = 0; i < nr; i++) { + if (kbdev->irqs[i].irq) + free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); + } +} + +void kbase_synchronize_irqs(struct kbase_device *kbdev) +{ + u32 nr = ARRAY_SIZE(kbase_handler_table); + u32 i; + + for (i = 0; i < nr; i++) { + if (kbdev->irqs[i].irq) + synchronize_irq(kbdev->irqs[i].irq); + } +} + +KBASE_EXPORT_TEST_API(kbase_synchronize_irqs); + +#endif /* !defined(CONFIG_MALI_NO_MALI) */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_as.c new file mode 100644 index 000000000000..bb4f548e9a4d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_as.c @@ -0,0 +1,243 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * Register backend context / address space management + */ + +#include +#include +#include + +/** + * assign_and_activate_kctx_addr_space - Assign an AS to a context + * @kbdev: Kbase device + * @kctx: Kbase context + * @current_as: Address Space to assign + * + * Assign an Address Space (AS) to a context, and add the context to the Policy. + * + * This includes + * setting up the global runpool_irq structure and the context on the AS, + * Activating the MMU on the AS, + * Allowing jobs to be submitted on the AS. + * + * Context: + * kbasep_js_kctx_info.jsctx_mutex held, + * kbasep_js_device_data.runpool_mutex held, + * AS transaction mutex held, + * Runpool IRQ lock held + */ +static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_as *current_as) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Attribute handling */ + kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx); + + /* Allow it to run jobs */ + kbasep_js_set_submit_allowed(js_devdata, kctx); + + kbase_js_runpool_inc_context_count(kbdev, kctx); +} + +bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + int i; + + if (kbdev->hwaccess.active_kctx[js] == kctx) { + /* Context is already active */ + return true; + } + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + if (kbdev->as_to_kctx[i] == kctx) { + /* Context already has ASID - mark as active */ + return true; + } + } + + /* Context does not have address space assigned */ + return false; +} + +void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + int as_nr = kctx->as_nr; + + if (as_nr == KBASEP_AS_NR_INVALID) { + WARN(1, "Attempting to release context without ASID\n"); + return; + } + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (atomic_read(&kctx->refcount) != 1) { + WARN(1, "Attempting to release active ASID\n"); + return; + } + + kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx); + + kbase_ctx_sched_release_ctx(kctx); + kbase_js_runpool_dec_context_count(kbdev, kctx); +} + +void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ +} + +int kbase_backend_find_and_release_free_address_space( + struct kbase_device *kbdev, struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + unsigned long flags; + int i; + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + struct kbasep_js_kctx_info *as_js_kctx_info; + struct kbase_context *as_kctx; + + as_kctx = kbdev->as_to_kctx[i]; + as_js_kctx_info = &as_kctx->jctx.sched_info; + + /* Don't release privileged or active contexts, or contexts with + * jobs running. + * Note that a context will have at least 1 reference (which + * was previously taken by kbasep_js_schedule_ctx()) until + * descheduled. + */ + if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) && + atomic_read(&as_kctx->refcount) == 1) { + if (!kbase_ctx_sched_inc_refcount_nolock(as_kctx)) { + WARN(1, "Failed to retain active context\n"); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return KBASEP_AS_NR_INVALID; + } + + kbasep_js_clear_submit_allowed(js_devdata, as_kctx); + + /* Drop and retake locks to take the jsctx_mutex on the + * context we're about to release without violating lock + * ordering + */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + + /* Release context from address space */ + mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx); + + if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) { + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, + as_kctx, + true); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); + + return i; + } + + /* Context was retained while locks were dropped, + * continue looking for free AS */ + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + } + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return KBASEP_AS_NR_INVALID; +} + +bool kbase_backend_use_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, + int as_nr) +{ + struct kbasep_js_device_data *js_devdata; + struct kbase_as *new_address_space = NULL; + int js; + + js_devdata = &kbdev->js_data; + + for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { + if (kbdev->hwaccess.active_kctx[js] == kctx) { + WARN(1, "Context is already scheduled in\n"); + return false; + } + } + + new_address_space = &kbdev->as[as_nr]; + + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&kbdev->mmu_hw_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space); + + if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { + /* We need to retain it to keep the corresponding address space + */ + kbase_ctx_sched_retain_ctx_refcount(kctx); + } + + return true; +} + diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_defs.h new file mode 100644 index 000000000000..7cda61ac6cdb --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_defs.h @@ -0,0 +1,111 @@ +/* + * + * (C) COPYRIGHT 2014-2016, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * Register-based HW access backend specific definitions + */ + +#ifndef _KBASE_HWACCESS_GPU_DEFS_H_ +#define _KBASE_HWACCESS_GPU_DEFS_H_ + +/* SLOT_RB_SIZE must be < 256 */ +#define SLOT_RB_SIZE 2 +#define SLOT_RB_MASK (SLOT_RB_SIZE - 1) + +/** + * struct rb_entry - Ringbuffer entry + * @katom: Atom associated with this entry + */ +struct rb_entry { + struct kbase_jd_atom *katom; +}; + +/** + * struct slot_rb - Slot ringbuffer + * @entries: Ringbuffer entries + * @last_context: The last context to submit a job on this slot + * @read_idx: Current read index of buffer + * @write_idx: Current write index of buffer + * @job_chain_flag: Flag used to implement jobchain disambiguation + */ +struct slot_rb { + struct rb_entry entries[SLOT_RB_SIZE]; + + struct kbase_context *last_context; + + u8 read_idx; + u8 write_idx; + + u8 job_chain_flag; +}; + +/** + * struct kbase_backend_data - GPU backend specific data for HW access layer + * @slot_rb: Slot ringbuffers + * @scheduling_timer: The timer tick used for rescheduling jobs + * @timer_running: Is the timer running? The runpool_mutex must be + * held whilst modifying this. + * @suspend_timer: Is the timer suspended? Set when a suspend + * occurs and cleared on resume. The runpool_mutex + * must be held whilst modifying this. + * @reset_gpu: Set to a KBASE_RESET_xxx value (see comments) + * @reset_workq: Work queue for performing the reset + * @reset_work: Work item for performing the reset + * @reset_wait: Wait event signalled when the reset is complete + * @reset_timer: Timeout for soft-stops before the reset + * @timeouts_updated: Have timeout values just been updated? + * + * The hwaccess_lock (a spinlock) must be held when accessing this structure + */ +struct kbase_backend_data { + struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS]; + + struct hrtimer scheduling_timer; + + bool timer_running; + bool suspend_timer; + + atomic_t reset_gpu; + +/* The GPU reset isn't pending */ +#define KBASE_RESET_GPU_NOT_PENDING 0 +/* kbase_prepare_to_reset_gpu has been called */ +#define KBASE_RESET_GPU_PREPARED 1 +/* kbase_reset_gpu has been called - the reset will now definitely happen + * within the timeout period */ +#define KBASE_RESET_GPU_COMMITTED 2 +/* The GPU reset process is currently occuring (timeout has expired or + * kbasep_try_reset_gpu_early was called) */ +#define KBASE_RESET_GPU_HAPPENING 3 +/* Reset the GPU silently, used when resetting the GPU as part of normal + * behavior (e.g. when exiting protected mode). */ +#define KBASE_RESET_GPU_SILENT 4 + struct workqueue_struct *reset_workq; + struct work_struct reset_work; + wait_queue_head_t reset_wait; + struct hrtimer reset_timer; + + bool timeouts_updated; +}; + +#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_hw.c new file mode 100644 index 000000000000..2ce203e3a31b --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_hw.c @@ -0,0 +1,1485 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel job manager APIs + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev); + +static u64 kbase_job_write_affinity(struct kbase_device *kbdev, + base_jd_core_req core_req, + int js) +{ + u64 affinity; + + if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == + BASE_JD_REQ_T) { + /* Tiler-only atom */ + /* If the hardware supports XAFFINITY then we'll only enable + * the tiler (which is the default so this is a no-op), + * otherwise enable shader core 0. + */ + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) + affinity = 1; + else + affinity = 0; + } else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | + BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { + unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; + struct mali_base_gpu_coherent_group_info *coherency_info = + &kbdev->gpu_props.props.coherency_info; + + affinity = kbdev->pm.backend.shaders_avail & + kbdev->pm.debug_core_mask[js]; + + /* JS2 on a dual core group system targets core group 1. All + * other cases target core group 0. + */ + if (js == 2 && num_core_groups > 1) + affinity &= coherency_info->group[1].core_mask; + else + affinity &= coherency_info->group[0].core_mask; + } else { + /* Use all cores */ + affinity = kbdev->pm.backend.shaders_avail & + kbdev->pm.debug_core_mask[js]; + } + + if (unlikely(!affinity)) { +#ifdef CONFIG_MALI_DEBUG + u64 shaders_ready = + kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + + WARN_ON(!(shaders_ready & kbdev->pm.backend.shaders_avail)); +#endif + + affinity = kbdev->pm.backend.shaders_avail; + } + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), + affinity & 0xFFFFFFFF); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), + affinity >> 32); + + return affinity; +} + +/** + * select_job_chain() - Select which job chain to submit to the GPU + * @katom: Pointer to the atom about to be submitted to the GPU + * + * Selects one of the fragment job chains attached to the special atom at the + * end of a renderpass, or returns the address of the single job chain attached + * to any other type of atom. + * + * Which job chain is selected depends upon whether the tiling phase of the + * renderpass completed normally or was soft-stopped because it used too + * much memory. It also depends upon whether one of the fragment job chains + * has already been run as part of the same renderpass. + * + * Return: GPU virtual address of the selected job chain + */ +static u64 select_job_chain(struct kbase_jd_atom *katom) +{ + struct kbase_context *const kctx = katom->kctx; + u64 jc = katom->jc; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) + return jc; + + compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[katom->renderpass_id]; + /* We can read a subset of renderpass state without holding + * higher-level locks (but not end_katom, for example). + * If the end-of-renderpass atom is running with as-yet indeterminate + * OOM state then assume that the start atom was not soft-stopped. + */ + switch (rp->state) { + case KBASE_JD_RP_OOM: + /* Tiling ran out of memory. + * Start of incremental rendering, used once. + */ + jc = katom->jc_fragment.norm_read_forced_write; + break; + case KBASE_JD_RP_START: + case KBASE_JD_RP_PEND_OOM: + /* Tiling completed successfully first time. + * Single-iteration rendering, used once. + */ + jc = katom->jc_fragment.norm_read_norm_write; + break; + case KBASE_JD_RP_RETRY_OOM: + /* Tiling ran out of memory again. + * Continuation of incremental rendering, used as + * many times as required. + */ + jc = katom->jc_fragment.forced_read_forced_write; + break; + case KBASE_JD_RP_RETRY: + case KBASE_JD_RP_RETRY_PEND_OOM: + /* Tiling completed successfully this time. + * End of incremental rendering, used once. + */ + jc = katom->jc_fragment.forced_read_norm_write; + break; + default: + WARN_ON(1); + break; + } + + dev_dbg(kctx->kbdev->dev, + "Selected job chain 0x%llx for end atom %p in state %d\n", + jc, (void *)katom, (int)rp->state); + + katom->jc = jc; + return jc; +} + +void kbase_job_hw_submit(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + int js) +{ + struct kbase_context *kctx; + u32 cfg; + u64 const jc_head = select_job_chain(katom); + u64 affinity; +#ifdef CONFIG_MALI_SEC_VK_BOOST + struct exynos_context *platform = (struct exynos_context *)kbdev->platform_context; +#endif + + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(katom); + + kctx = katom->kctx; + + /* Command register must be available */ + KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); + +#ifdef CONFIG_MALI_SEC_VK_BOOST + if (kctx->ctx_vk_need_qos == true) + platform->ctx_vk_need_qos = true; + else + platform->ctx_vk_need_qos = false; +#endif + + dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %p\n", + jc_head, (void *)katom); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), + jc_head & 0xFFFFFFFF); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), + jc_head >> 32); + + affinity = kbase_job_write_affinity(kbdev, katom->core_req, js); + + /* start MMU, medium priority, cache clean/flush on end, clean/flush on + * start */ + cfg = kctx->as_nr; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) && + !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) + cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; + + if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) + cfg |= JS_CONFIG_START_FLUSH_NO_ACTION; + else + cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; + + if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && + !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) + cfg |= JS_CONFIG_END_FLUSH_NO_ACTION; + else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) + cfg |= JS_CONFIG_END_FLUSH_CLEAN; + else + cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; + + cfg |= JS_CONFIG_THREAD_PRI(8); + + if ((katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED) || + (katom->core_req & BASE_JD_REQ_END_RENDERPASS)) + cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; + + if (kbase_hw_has_feature(kbdev, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) { + cfg |= JS_CONFIG_JOB_CHAIN_FLAG; + katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN; + kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = + true; + } else { + katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN; + kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = + false; + } + } + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), + katom->flush_id); + + /* Write an approximate start timestamp. + * It's approximate because there might be a job in the HEAD register. + */ + katom->start_timestamp = ktime_get(); + + /* GO ! */ + dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx", + katom, kctx, js, jc_head); + + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, + (u32)affinity); + + KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, kctx, + js, kbase_jd_atom_id(kctx, katom), TL_JS_EVENT_START); + + KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(kbdev, katom, jc_head, + affinity, cfg); + KBASE_TLSTREAM_TL_RET_CTX_LPU( + kbdev, + kctx, + &kbdev->gpu_props.props.raw_props.js_features[ + katom->slot_nr]); + KBASE_TLSTREAM_TL_RET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]); + KBASE_TLSTREAM_TL_RET_ATOM_LPU( + kbdev, + katom, + &kbdev->gpu_props.props.raw_props.js_features[js], + "ctx_nr,atom_nr"); + kbase_kinstr_jm_atom_hw_submit(katom); +#ifdef CONFIG_GPU_TRACEPOINTS + if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { + /* If this is the only job on the slot, trace it as starting */ + char js_string[16]; + + trace_gpu_sched_switch( + kbasep_make_job_slot_string(js, js_string, + sizeof(js_string)), + ktime_to_ns(katom->start_timestamp), + (u32)katom->kctx->id, 0, katom->work_id); + kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx; + } +#endif + + trace_sysgraph_gpu(SGR_SUBMIT, kctx->id, + kbase_jd_atom_id(kctx, katom), js); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), + JS_COMMAND_START); +} + +/** + * kbasep_job_slot_update_head_start_timestamp - Update timestamp + * @kbdev: kbase device + * @js: job slot + * @end_timestamp: timestamp + * + * Update the start_timestamp of the job currently in the HEAD, based on the + * fact that we got an IRQ for the previous set of completed jobs. + * + * The estimate also takes into account the time the job was submitted, to + * work out the best estimate (which might still result in an over-estimate to + * the calculated time spent) + */ +static void kbasep_job_slot_update_head_start_timestamp( + struct kbase_device *kbdev, + int js, + ktime_t end_timestamp) +{ + ktime_t timestamp_diff; + struct kbase_jd_atom *katom; + + /* Checking the HEAD position for the job slot */ + katom = kbase_gpu_inspect(kbdev, js, 0); + if (katom != NULL) { + timestamp_diff = ktime_sub(end_timestamp, + katom->start_timestamp); + if (ktime_to_ns(timestamp_diff) >= 0) { + /* Only update the timestamp if it's a better estimate + * than what's currently stored. This is because our + * estimate that accounts for the throttle time may be + * too much of an overestimate */ + katom->start_timestamp = end_timestamp; + } + } +} + +/** + * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline + * tracepoint + * @kbdev: kbase device + * @js: job slot + * + * Make a tracepoint call to the instrumentation module informing that + * softstop happened on given lpu (job slot). + */ +static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, + int js) +{ + KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( + kbdev, + &kbdev->gpu_props.props.raw_props.js_features[js]); +} + +void kbase_job_done(struct kbase_device *kbdev, u32 done) +{ + int i; + u32 count = 0; + ktime_t end_timestamp; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + KBASE_DEBUG_ASSERT(kbdev); + + KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ, NULL, NULL, 0, done); + + end_timestamp = ktime_get(); + + while (done) { + u32 failed = done >> 16; + + /* treat failed slots as finished slots */ + u32 finished = (done & 0xFFFF) | failed; + + /* Note: This is inherently unfair, as we always check + * for lower numbered interrupts before the higher + * numbered ones.*/ + i = ffs(finished) - 1; + KBASE_DEBUG_ASSERT(i >= 0); + + do { + int nr_done; + u32 active; + u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */ + u64 job_tail = 0; + + if (failed & (1u << i)) { + /* read out the job slot status code if the job + * slot reported failure */ + completion_code = kbase_reg_read(kbdev, + JOB_SLOT_REG(i, JS_STATUS)); + + if (completion_code == BASE_JD_EVENT_STOPPED) { + KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT( + kbdev, NULL, + i, 0, TL_JS_EVENT_SOFT_STOP); + + kbasep_trace_tl_event_lpu_softstop( + kbdev, i); + + /* Soft-stopped job - read the value of + * JS_TAIL so that the job chain can + * be resumed */ + job_tail = (u64)kbase_reg_read(kbdev, + JOB_SLOT_REG(i, JS_TAIL_LO)) | + ((u64)kbase_reg_read(kbdev, + JOB_SLOT_REG(i, JS_TAIL_HI)) + << 32); + } else if (completion_code == + BASE_JD_EVENT_NOT_STARTED) { + /* PRLAM-10673 can cause a TERMINATED + * job to come back as NOT_STARTED, but + * the error interrupt helps us detect + * it */ + completion_code = + BASE_JD_EVENT_TERMINATED; + } + + kbase_gpu_irq_evict(kbdev, i, completion_code); + + /* Some jobs that encounter a BUS FAULT may result in corrupted + * state causing future jobs to hang. Reset GPU before + * allowing any other jobs on the slot to continue. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) { + if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) { + if (kbase_prepare_to_reset_gpu_locked(kbdev)) + kbase_reset_gpu_locked(kbdev); + } + } + } + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), + done & ((1 << i) | (1 << (i + 16)))); + active = kbase_reg_read(kbdev, + JOB_CONTROL_REG(JOB_IRQ_JS_STATE)); + + if (((active >> i) & 1) == 0 && + (((done >> (i + 16)) & 1) == 0)) { + /* There is a potential race we must work + * around: + * + * 1. A job slot has a job in both current and + * next registers + * 2. The job in current completes + * successfully, the IRQ handler reads + * RAWSTAT and calls this function with the + * relevant bit set in "done" + * 3. The job in the next registers becomes the + * current job on the GPU + * 4. Sometime before the JOB_IRQ_CLEAR line + * above the job on the GPU _fails_ + * 5. The IRQ_CLEAR clears the done bit but not + * the failed bit. This atomically sets + * JOB_IRQ_JS_STATE. However since both jobs + * have now completed the relevant bits for + * the slot are set to 0. + * + * If we now did nothing then we'd incorrectly + * assume that _both_ jobs had completed + * successfully (since we haven't yet observed + * the fail bit being set in RAWSTAT). + * + * So at this point if there are no active jobs + * left we check to see if RAWSTAT has a failure + * bit set for the job slot. If it does we know + * that there has been a new failure that we + * didn't previously know about, so we make sure + * that we record this in active (but we wait + * for the next loop to deal with it). + * + * If we were handling a job failure (i.e. done + * has the relevant high bit set) then we know + * that the value read back from + * JOB_IRQ_JS_STATE is the correct number of + * remaining jobs because the failed job will + * have prevented any futher jobs from starting + * execution. + */ + u32 rawstat = kbase_reg_read(kbdev, + JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); + + if ((rawstat >> (i + 16)) & 1) { + /* There is a failed job that we've + * missed - add it back to active */ + active |= (1u << i); + } + } + + dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n", + completion_code); + + nr_done = kbase_backend_nr_atoms_submitted(kbdev, i); + nr_done -= (active >> i) & 1; + nr_done -= (active >> (i + 16)) & 1; + + if (nr_done <= 0) { + dev_warn(kbdev->dev, "Spurious interrupt on slot %d", + i); + + goto spurious; + } + + count += nr_done; + + while (nr_done) { + if (nr_done == 1) { + kbase_gpu_complete_hw(kbdev, i, + completion_code, + job_tail, + &end_timestamp); + kbase_jm_try_kick_all(kbdev); + } else { + /* More than one job has completed. + * Since this is not the last job being + * reported this time it must have + * passed. This is because the hardware + * will not allow further jobs in a job + * slot to complete until the failed job + * is cleared from the IRQ status. + */ + kbase_gpu_complete_hw(kbdev, i, + BASE_JD_EVENT_DONE, + 0, + &end_timestamp); + } + nr_done--; + } + spurious: + done = kbase_reg_read(kbdev, + JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); + + failed = done >> 16; + finished = (done & 0xFFFF) | failed; + if (done) + end_timestamp = ktime_get(); + } while (finished & (1 << i)); + + kbasep_job_slot_update_head_start_timestamp(kbdev, i, + end_timestamp); + } + + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_COMMITTED) { + /* If we're trying to reset the GPU then we might be able to do + * it early (without waiting for a timeout) because some jobs + * have completed + */ + kbasep_try_reset_gpu_early_locked(kbdev); + } + KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ_END, NULL, NULL, 0, count); +} + +void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, + int js, + u32 action, + base_jd_core_req core_reqs, + struct kbase_jd_atom *target_katom) +{ +#if KBASE_KTRACE_ENABLE + u32 status_reg_before; + u64 job_in_head_before; + u32 status_reg_after; + + KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK))); + + /* Check the head pointer */ + job_in_head_before = ((u64) kbase_reg_read(kbdev, + JOB_SLOT_REG(js, JS_HEAD_LO))) + | (((u64) kbase_reg_read(kbdev, + JOB_SLOT_REG(js, JS_HEAD_HI))) + << 32); + status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); +#endif + + if (action == JS_COMMAND_SOFT_STOP) { + if (kbase_jd_katom_is_protected(target_katom)) { +#ifdef CONFIG_MALI_DEBUG + dev_dbg(kbdev->dev, + "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%x", + (unsigned int)core_reqs); +#endif /* CONFIG_MALI_DEBUG */ + return; + } + + /* We are about to issue a soft stop, so mark the atom as having + * been soft stopped */ + target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED; + + /* MALI_SEC_INTEGRATION */ + if (kbdev->vendor_callbacks->update_status) + kbdev->vendor_callbacks->update_status(kbdev, "soft_stop", 0); + + /* Mark the point where we issue the soft-stop command */ + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom); + + if (kbase_hw_has_feature( + kbdev, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + action = (target_katom->atom_flags & + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_SOFT_STOP_1 : + JS_COMMAND_SOFT_STOP_0; + } + } else if (action == JS_COMMAND_HARD_STOP) { + target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED; + /* MALI_SEC_INTEGRATION */ + if (kbdev->vendor_callbacks->update_status) + kbdev->vendor_callbacks->update_status(kbdev, "hard_stop", 0); + + if (kbase_hw_has_feature( + kbdev, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + action = (target_katom->atom_flags & + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_HARD_STOP_1 : + JS_COMMAND_HARD_STOP_0; + } + } + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action); + +#if KBASE_KTRACE_ENABLE + status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); + if (status_reg_after == BASE_JD_EVENT_ACTIVE) { + struct kbase_jd_atom *head; + struct kbase_context *head_kctx; + + head = kbase_gpu_inspect(kbdev, js, 0); + head_kctx = head->kctx; + + if (status_reg_before == BASE_JD_EVENT_ACTIVE) + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, head, job_in_head_before, js); + else + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); + + switch (action) { + case JS_COMMAND_SOFT_STOP: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, head_kctx, head, head->jc, js); + break; + case JS_COMMAND_SOFT_STOP_0: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, head, head->jc, js); + break; + case JS_COMMAND_SOFT_STOP_1: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, head, head->jc, js); + break; + case JS_COMMAND_HARD_STOP: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, head_kctx, head, head->jc, js); + break; + case JS_COMMAND_HARD_STOP_0: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, head, head->jc, js); + break; + case JS_COMMAND_HARD_STOP_1: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js); + break; + default: + BUG(); + break; + } + } else { + if (status_reg_before == BASE_JD_EVENT_ACTIVE) + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, job_in_head_before, js); + else + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); + + switch (action) { + case JS_COMMAND_SOFT_STOP: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, js); + break; + case JS_COMMAND_SOFT_STOP_0: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, 0, js); + break; + case JS_COMMAND_SOFT_STOP_1: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, 0, js); + break; + case JS_COMMAND_HARD_STOP: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, js); + break; + case JS_COMMAND_HARD_STOP_0: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, 0, js); + break; + case JS_COMMAND_HARD_STOP_1: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js); + break; + default: + BUG(); + break; + } + } +#endif +} + +void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbase_job_slot_hardstop(kctx, i, NULL); +} + +/** + * kbase_is_existing_atom_submitted_later_than_ready + * @ready: sequence number of the ready atom + * @existing: sequence number of the existing atom + * + * Returns true if the existing atom has been submitted later than the + * ready atom. It is used to understand if an atom that is ready has been + * submitted earlier than the currently running atom, so that the currently + * running atom should be preempted to allow the ready atom to run. + */ +static inline bool kbase_is_existing_atom_submitted_later_than_ready(u64 ready, u64 existing) +{ + /* No seq_nr set? */ + if (!ready || !existing) + return false; + + /* Efficiently handle the unlikely case of wrapping. + * The following code assumes that the delta between the sequence number + * of the two atoms is less than INT64_MAX. + * In the extremely unlikely case where the delta is higher, the comparison + * defaults for no preemption. + * The code also assumes that the conversion from unsigned to signed types + * works because the signed integers are 2's complement. + */ + return (s64)(ready - existing) < 0; +} + +void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, + struct kbase_jd_atom *target_katom) +{ + struct kbase_device *kbdev; + int js = target_katom->slot_nr; + int priority = target_katom->sched_priority; + int seq_nr = target_katom->seq_nr; + int i; + bool stop_sent = false; + + KBASE_DEBUG_ASSERT(kctx != NULL); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) { + struct kbase_jd_atom *katom; + + katom = kbase_gpu_inspect(kbdev, js, i); + if (!katom) + continue; + + if ((kbdev->js_ctx_scheduling_mode == + KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) && + (katom->kctx != kctx)) + continue; + + if ((katom->sched_priority > priority) || + (katom->kctx == kctx && kbase_is_existing_atom_submitted_later_than_ready(seq_nr, katom->seq_nr))) { + if (!stop_sent) + KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( + kbdev, + target_katom); + + kbase_job_slot_softstop(kbdev, js, katom); + stop_sent = true; + } + } +} + +static int softstop_start_rp_nolock( + struct kbase_context *kctx, struct kbase_va_region *reg) +{ + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_atom *katom; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom = kbase_gpu_inspect(kbdev, 1, 0); + + if (!katom) { + dev_dbg(kctx->kbdev->dev, "No atom on job slot\n"); + return -ESRCH; + } + + if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) { + dev_dbg(kctx->kbdev->dev, + "Atom %p on job slot is not start RP\n", (void *)katom); + return -EPERM; + } + + compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[katom->renderpass_id]; + if (WARN_ON(rp->state != KBASE_JD_RP_START && + rp->state != KBASE_JD_RP_RETRY)) + return -EINVAL; + + dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %p\n", + (int)rp->state, (void *)reg); + + if (WARN_ON(katom != rp->start_katom)) + return -EINVAL; + + dev_dbg(kctx->kbdev->dev, "Adding region %p to list %p\n", + (void *)reg, (void *)&rp->oom_reg_list); + list_move_tail(®->link, &rp->oom_reg_list); + dev_dbg(kctx->kbdev->dev, "Added region to list\n"); + + rp->state = (rp->state == KBASE_JD_RP_START ? + KBASE_JD_RP_PEND_OOM : KBASE_JD_RP_RETRY_PEND_OOM); + + kbase_job_slot_softstop(kbdev, 1, katom); + + return 0; +} + +int kbase_job_slot_softstop_start_rp(struct kbase_context *const kctx, + struct kbase_va_region *const reg) +{ + struct kbase_device *const kbdev = kctx->kbdev; + int err; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + err = softstop_start_rp_nolock(kctx, reg); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return err; +} + +void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + unsigned long timeout = msecs_to_jiffies(ZAP_TIMEOUT); + + timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait, + kctx->jctx.job_nr == 0, timeout); + + if (timeout != 0) + timeout = wait_event_timeout( + kctx->jctx.sched_info.ctx.is_scheduled_wait, + !kbase_ctx_flag(kctx, KCTX_SCHEDULED), + timeout); + + /* Neither wait timed out; all done! */ + if (timeout != 0) + goto exit; + + if (kbase_prepare_to_reset_gpu(kbdev)) { + dev_err(kbdev->dev, + "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", + ZAP_TIMEOUT); + kbase_reset_gpu(kbdev); + } + + /* Wait for the reset to complete */ + kbase_reset_gpu_wait(kbdev); +exit: + dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx); + + /* Ensure that the signallers of the waitqs have finished */ + mutex_lock(&kctx->jctx.lock); + mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&kctx->jctx.lock); +} + +u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) +{ + u32 flush_id = 0; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) { + mutex_lock(&kbdev->pm.lock); + if (kbdev->pm.backend.gpu_powered) + flush_id = kbase_reg_read(kbdev, + GPU_CONTROL_REG(LATEST_FLUSH)); + mutex_unlock(&kbdev->pm.lock); + } + + return flush_id; +} + +int kbase_job_slot_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); + return 0; +} +KBASE_EXPORT_TEST_API(kbase_job_slot_init); + +void kbase_job_slot_halt(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +void kbase_job_slot_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} +KBASE_EXPORT_TEST_API(kbase_job_slot_term); + + +/** + * kbase_job_slot_softstop_swflags - Soft-stop a job with flags + * @kbdev: The kbase device + * @js: The job slot to soft-stop + * @target_katom: The job that should be soft-stopped (or NULL for any job) + * @sw_flags: Flags to pass in about the soft-stop + * + * Context: + * The job slot lock must be held when calling this function. + * The job slot must not already be in the process of being soft-stopped. + * + * Soft-stop the specified job slot, with extra information about the stop + * + * Where possible any job in the next register is evicted before the soft-stop. + */ +void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, + struct kbase_jd_atom *target_katom, u32 sw_flags) +{ + dev_dbg(kbdev->dev, "Soft-stop atom %p with flags 0x%x (s:%d)\n", + target_katom, sw_flags, js); + + KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); + kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, + JS_COMMAND_SOFT_STOP | sw_flags); +} + +/** + * kbase_job_slot_softstop - Soft-stop the specified job slot + * @kbdev: The kbase device + * @js: The job slot to soft-stop + * @target_katom: The job that should be soft-stopped (or NULL for any job) + * Context: + * The job slot lock must be held when calling this function. + * The job slot must not already be in the process of being soft-stopped. + * + * Where possible any job in the next register is evicted before the soft-stop. + */ +void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, + struct kbase_jd_atom *target_katom) +{ + kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u); +} + +/** + * kbase_job_slot_hardstop - Hard-stop the specified job slot + * @kctx: The kbase context that contains the job(s) that should + * be hard-stopped + * @js: The job slot to hard-stop + * @target_katom: The job that should be hard-stopped (or NULL for all + * jobs from the context) + * Context: + * The job slot lock must be held when calling this function. + */ +void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, + struct kbase_jd_atom *target_katom) +{ + struct kbase_device *kbdev = kctx->kbdev; + bool stopped; + + stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, + target_katom, + JS_COMMAND_HARD_STOP); +} + +/** + * kbase_job_check_enter_disjoint - potentiall enter disjoint mode + * @kbdev: kbase device + * @action: the event which has occurred + * @core_reqs: core requirements of the atom + * @target_katom: the atom which is being affected + * + * For a certain soft-stop action, work out whether to enter disjoint + * state. + * + * This does not register multiple disjoint events if the atom has already + * started a disjoint period + * + * @core_reqs can be supplied as 0 if the atom had not started on the hardware + * (and so a 'real' soft/hard-stop was not required, but it still interrupted + * flow, perhaps on another context) + * + * kbase_job_check_leave_disjoint() should be used to end the disjoint + * state when the soft/hard-stop action is complete + */ +void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) +{ + u32 hw_action = action & JS_COMMAND_MASK; + + /* For soft-stop, don't enter if soft-stop not allowed, or isn't + * causing disjoint. + */ + if (hw_action == JS_COMMAND_SOFT_STOP && + (kbase_jd_katom_is_protected(target_katom) || + (0 == (action & JS_COMMAND_SW_CAUSES_DISJOINT)))) + return; + + /* Nothing to do if already logged disjoint state on this atom */ + if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) + return; + + target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT; + kbase_disjoint_state_up(kbdev); +} + +/** + * kbase_job_check_enter_disjoint - potentially leave disjoint state + * @kbdev: kbase device + * @target_katom: atom which is finishing + * + * Work out whether to leave disjoint state when finishing an atom that was + * originated by kbase_job_check_enter_disjoint(). + */ +void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, + struct kbase_jd_atom *target_katom) +{ + if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) { + target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT; + kbase_disjoint_state_down(kbdev); + } +} + +static void kbase_debug_dump_registers(struct kbase_device *kbdev) +{ + int i; + + kbase_io_history_dump(kbdev); + + dev_err(kbdev->dev, "Register state:"); + dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS))); + dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x", + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE))); + for (i = 0; i < 3; i++) { + dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x", + i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS)), + i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO))); + } + dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", + kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); + dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), + kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); + dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); + dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG))); + dev_err(kbdev->dev, " TILER_CONFIG=0x%08x JM_CONFIG=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG))); +} + +/* MALI_SEC_INTEGRATION */ +void gpu_dump_register_hooks(struct kbase_device *kbdev) +{ + kbase_debug_dump_registers(kbdev); +} +static void kbasep_reset_timeout_worker(struct work_struct *data) +{ + unsigned long flags; + struct kbase_device *kbdev; + ktime_t end_timestamp = ktime_get(); + struct kbasep_js_device_data *js_devdata; + bool silent = false; + u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + + KBASE_DEBUG_ASSERT(data); + + kbdev = container_of(data, struct kbase_device, + hwaccess.backend.reset_work); + + KBASE_DEBUG_ASSERT(kbdev); + js_devdata = &kbdev->js_data; + + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_SILENT) + silent = true; + + KBASE_KTRACE_ADD_JM(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); + + /* Disable GPU hardware counters. + * This call will block until counters are disabled. + */ + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + /* Make sure the timer has completed - this cannot be done from + * interrupt context, so this cannot be done within + * kbasep_try_reset_gpu_early. */ + hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); + + if (kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + /* This would re-activate the GPU. Since it's already idle, + * there's no need to reset it */ + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + kbase_disjoint_state_down(kbdev); + wake_up(&kbdev->hwaccess.backend.reset_wait); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return; + } + + KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock(&kbdev->mmu_mask_change); + kbase_pm_reset_start_locked(kbdev); + + /* We're about to flush out the IRQs and their bottom half's */ + kbdev->irq_reset_flush = true; + + /* Disable IRQ to avoid IRQ handlers to kick in after releasing the + * spinlock; this also clears any outstanding interrupts */ + kbase_pm_disable_interrupts_nolock(kbdev); + + spin_unlock(&kbdev->mmu_mask_change); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Ensure that any IRQ handlers have finished + * Must be done without any locks IRQ handlers will take */ + kbase_synchronize_irqs(kbdev); + + /* Flush out any in-flight work items */ + kbase_flush_mmu_wqs(kbdev); + + /* The flush has completed so reset the active indicator */ + kbdev->irq_reset_flush = false; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) { + /* Ensure that L2 is not transitioning when we send the reset + * command */ + while (--max_loops && kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_L2)) + ; + + WARN(!max_loops, "L2 power transition timed out while trying to reset\n"); + } + + mutex_lock(&kbdev->pm.lock); + /* We hold the pm lock, so there ought to be a current policy */ + KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); + + /* All slot have been soft-stopped and we've waited + * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we + * assume that anything that is still left on the GPU is stuck there and + * we'll kill it when we reset the GPU */ + + if (!silent) + dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", + RESET_TIMEOUT); + + /* Output the state of some interesting registers to help in the + * debugging of GPU resets */ + if (!silent) + kbase_debug_dump_registers(kbdev); + + /* Complete any jobs that were still on the GPU */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->protected_mode = false; + if (!kbdev->pm.backend.protected_entry_transition_override) + kbase_backend_reset(kbdev, &end_timestamp); + kbase_pm_metrics_update(kbdev, NULL); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Reset the GPU */ + kbase_pm_init_hw(kbdev, 0); + + mutex_unlock(&kbdev->pm.lock); + + mutex_lock(&js_devdata->runpool_mutex); + + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_ctx_sched_restore_all_as(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + kbase_pm_enable_interrupts(kbdev); + + kbase_disjoint_state_down(kbdev); + + mutex_unlock(&js_devdata->runpool_mutex); + + /* MALI_SEC_INTEGRATION */ + if (kbdev->vendor_callbacks->update_status) + kbdev->vendor_callbacks->update_status(kbdev, "reset_count", 0); + mutex_lock(&kbdev->pm.lock); + + kbase_pm_reset_complete(kbdev); + + /* Find out what cores are required now */ + kbase_pm_update_cores_state(kbdev); + + /* Synchronously request and wait for those cores, because if + * instrumentation is enabled it would need them immediately. */ + kbase_pm_wait_for_desired_state(kbdev); + + mutex_unlock(&kbdev->pm.lock); + + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + + wake_up(&kbdev->hwaccess.backend.reset_wait); + if (!silent) + dev_err(kbdev->dev, "Reset complete"); + + /* Try submitting some jobs to restart processing */ + KBASE_KTRACE_ADD_JM(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0); + kbase_js_sched_all(kbdev); + + /* Process any pending slot updates */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_backend_slot_update(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_pm_context_idle(kbdev); + + /* Re-enable GPU hardware counters */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + KBASE_KTRACE_ADD_JM(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); +} + +static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) +{ + struct kbase_device *kbdev = container_of(timer, struct kbase_device, + hwaccess.backend.reset_timer); + + KBASE_DEBUG_ASSERT(kbdev); + + /* Reset still pending? */ + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == + KBASE_RESET_GPU_COMMITTED) + queue_work(kbdev->hwaccess.backend.reset_workq, + &kbdev->hwaccess.backend.reset_work); + + return HRTIMER_NORESTART; +} + +/* + * If all jobs are evicted from the GPU then we can reset the GPU + * immediately instead of waiting for the timeout to elapse + */ + +static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) +{ + int i; + int pending_jobs = 0; + + KBASE_DEBUG_ASSERT(kbdev); + + /* Count the number of jobs */ + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i); + + if (pending_jobs > 0) { + /* There are still jobs on the GPU - wait */ + return; + } + + /* To prevent getting incorrect registers when dumping failed job, + * skip early reset. + */ + if (atomic_read(&kbdev->job_fault_debug) > 0) + return; + + /* Check that the reset has been committed to (i.e. kbase_reset_gpu has + * been called), and that no other thread beat this thread to starting + * the reset */ + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != + KBASE_RESET_GPU_COMMITTED) { + /* MALI_SEC_INTEGRATION */ + KBASE_KTRACE_ADD(kbdev, LSI_RESET_GPU_EARLY_DUPE, NULL, atomic_read(&kbdev->hwaccess.backend.reset_gpu)); + dev_err(kbdev->dev, + "%s: unexpected reset race\n", __func__); + /* Reset has already occurred */ + return; + } + + queue_work(kbdev->hwaccess.backend.reset_workq, + &kbdev->hwaccess.backend.reset_work); +} + +static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbasep_try_reset_gpu_early_locked(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +/** + * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU + * @kbdev: kbase device + * + * This function just soft-stops all the slots to ensure that as many jobs as + * possible are saved. + * + * Return: + * The function returns a boolean which should be interpreted as follows: + * true - Prepared for reset, kbase_reset_gpu_locked should be called. + * false - Another thread is performing a reset, kbase_reset_gpu should + * not be called. + */ +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) +{ + int i; + + KBASE_DEBUG_ASSERT(kbdev); + + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING, + KBASE_RESET_GPU_PREPARED) != + KBASE_RESET_GPU_NOT_PENDING) { + /* Some other thread is already resetting the GPU */ + /* MALI_SEC_INTEGRATION */ + KBASE_KTRACE_ADD(kbdev, LSI_RESET_RACE_DETECTED_EARLY_OUT, NULL, atomic_read(&kbdev->hwaccess.backend.reset_gpu)); + dev_err(kbdev->dev, + "%s: unexpected reset race\n", __func__); + return false; + } + + kbase_disjoint_state_up(kbdev); + + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbase_job_slot_softstop(kbdev, i, NULL); + + return true; +} + +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) +{ + unsigned long flags; + bool ret; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ret = kbase_prepare_to_reset_gpu_locked(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} +KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); + +/* + * This function should be called after kbase_prepare_to_reset_gpu if it + * returns true. It should never be called without a corresponding call to + * kbase_prepare_to_reset_gpu. + * + * After this function is called (or not called if kbase_prepare_to_reset_gpu + * returned false), the caller should wait for + * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset + * has completed. + */ +void kbase_reset_gpu(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + + /* Note this is an assert/atomic_set because it is a software issue for + * a race to be occuring here */ + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_PREPARED); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED); + + dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + kbdev->reset_timeout_ms); + + hrtimer_start(&kbdev->hwaccess.backend.reset_timer, + HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), + HRTIMER_MODE_REL); + + /* Try resetting early */ + kbasep_try_reset_gpu_early(kbdev); +} +KBASE_EXPORT_TEST_API(kbase_reset_gpu); + +void kbase_reset_gpu_locked(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + + /* Note this is an assert/atomic_set because it is a software issue for + * a race to be occuring here */ + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_PREPARED); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED); + + dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + kbdev->reset_timeout_ms); + hrtimer_start(&kbdev->hwaccess.backend.reset_timer, + HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), + HRTIMER_MODE_REL); + + /* Try resetting early */ + kbasep_try_reset_gpu_early_locked(kbdev); +} + +int kbase_reset_gpu_silent(struct kbase_device *kbdev) +{ + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING, + KBASE_RESET_GPU_SILENT) != + KBASE_RESET_GPU_NOT_PENDING) { + /* Some other thread is already resetting the GPU */ + return -EAGAIN; + } + + kbase_disjoint_state_up(kbdev); + + queue_work(kbdev->hwaccess.backend.reset_workq, + &kbdev->hwaccess.backend.reset_work); + + return 0; +} + +bool kbase_reset_gpu_is_active(struct kbase_device *kbdev) +{ + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_NOT_PENDING) + return false; + + return true; +} + +int kbase_reset_gpu_wait(struct kbase_device *kbdev) +{ + wait_event(kbdev->hwaccess.backend.reset_wait, + atomic_read(&kbdev->hwaccess.backend.reset_gpu) + == KBASE_RESET_GPU_NOT_PENDING); + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait); + +int kbase_reset_gpu_init(struct kbase_device *kbdev) +{ + kbdev->hwaccess.backend.reset_workq = alloc_workqueue( + "Mali reset workqueue", 0, 1); + if (kbdev->hwaccess.backend.reset_workq == NULL) + return -ENOMEM; + + INIT_WORK(&kbdev->hwaccess.backend.reset_work, + kbasep_reset_timeout_worker); + + hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + kbdev->hwaccess.backend.reset_timer.function = + kbasep_reset_timer_callback; + + return 0; +} + +void kbase_reset_gpu_term(struct kbase_device *kbdev) +{ + destroy_workqueue(kbdev->hwaccess.backend.reset_workq); +} diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_internal.h new file mode 100644 index 000000000000..1419b5987eff --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_internal.h @@ -0,0 +1,177 @@ +/* + * + * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Job Manager backend-specific low-level APIs. + */ + +#ifndef _KBASE_JM_HWACCESS_H_ +#define _KBASE_JM_HWACCESS_H_ + +#include +#include +#include + +#include +#include + +/** + * kbase_job_submit_nolock() - Submit a job to a certain job-slot + * @kbdev: Device pointer + * @katom: Atom to submit + * @js: Job slot to submit on + * + * The caller must check kbasep_jm_is_submit_slots_free() != false before + * calling this. + * + * The following locking conditions are made on the caller: + * - it must hold the hwaccess_lock + */ +void kbase_job_submit_nolock(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, int js); + +/** + * kbase_job_done_slot() - Complete the head job on a particular job-slot + * @kbdev: Device pointer + * @s: Job slot + * @completion_code: Completion code of job reported by GPU + * @job_tail: Job tail address reported by GPU + * @end_timestamp: Timestamp of job completion + */ +void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, + u64 job_tail, ktime_t *end_timestamp); + +#ifdef CONFIG_GPU_TRACEPOINTS +static inline char *kbasep_make_job_slot_string(int js, char *js_string, + size_t js_size) +{ + snprintf(js_string, js_size, "job_slot_%i", js); + return js_string; +} +#endif + +static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, + struct kbase_context *kctx) +{ + return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)); +} + + +/** + * kbase_job_hw_submit() - Submit a job to the GPU + * @kbdev: Device pointer + * @katom: Atom to submit + * @js: Job slot to submit on + * + * The caller must check kbasep_jm_is_submit_slots_free() != false before + * calling this. + * + * The following locking conditions are made on the caller: + * - it must hold the hwaccess_lock + */ +void kbase_job_hw_submit(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + int js); + +/** + * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop + * on the specified atom + * @kbdev: Device pointer + * @js: Job slot to stop on + * @action: The action to perform, either JSn_COMMAND_HARD_STOP or + * JSn_COMMAND_SOFT_STOP + * @core_reqs: Core requirements of atom to stop + * @target_katom: Atom to stop + * + * The following locking conditions are made on the caller: + * - it must hold the hwaccess_lock + */ +void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, + int js, + u32 action, + base_jd_core_req core_reqs, + struct kbase_jd_atom *target_katom); + +/** + * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job + * slot belonging to a given context. + * @kbdev: Device pointer + * @kctx: Context pointer. May be NULL + * @katom: Specific atom to stop. May be NULL + * @js: Job slot to hard stop + * @action: The action to perform, either JSn_COMMAND_HARD_STOP or + * JSn_COMMAND_SOFT_STOP + * + * If no context is provided then all jobs on the slot will be soft or hard + * stopped. + * + * If a katom is provided then only that specific atom will be stopped. In this + * case the kctx parameter is ignored. + * + * Jobs that are on the slot but are not yet on the GPU will be unpulled and + * returned to the job scheduler. + * + * Return: true if an atom was stopped, false otherwise + */ +bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js, + struct kbase_jd_atom *katom, + u32 action); + +/** + * kbase_job_slot_init - Initialise job slot framework + * @kbdev: Device pointer + * + * Called on driver initialisation + * + * Return: 0 on success + */ +int kbase_job_slot_init(struct kbase_device *kbdev); + +/** + * kbase_job_slot_halt - Halt the job slot framework + * @kbdev: Device pointer + * + * Should prevent any further job slot processing + */ +void kbase_job_slot_halt(struct kbase_device *kbdev); + +/** + * kbase_job_slot_term - Terminate job slot framework + * @kbdev: Device pointer + * + * Called on driver termination + */ +void kbase_job_slot_term(struct kbase_device *kbdev); + +/** + * kbase_gpu_cache_clean - Cause a GPU cache clean & flush + * @kbdev: Device pointer + * + * Caller must not be in IRQ context + */ +void kbase_gpu_cache_clean(struct kbase_device *kbdev); + +#endif /* _KBASE_JM_HWACCESS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_rb.c new file mode 100644 index 000000000000..3212d2257d38 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_rb.c @@ -0,0 +1,1676 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * Register-based HW access backend specific APIs + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY) +#include +#endif + +/* Return whether the specified ringbuffer is empty. HW access lock must be + * held */ +#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) +/* Return number of atoms currently in the specified ringbuffer. HW access lock + * must be held */ +#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx) + +static void kbase_gpu_release_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + ktime_t *end_timestamp); + +/** + * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer + * @kbdev: Device pointer + * @katom: Atom to enqueue + * + * Context: Caller must hold the HW access lock + */ +static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr]; + + WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom; + rb->write_idx++; + + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; +} + +/** + * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once + * it has been completed + * @kbdev: Device pointer + * @js: Job slot to remove atom from + * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in + * which case current time will be used. + * + * Context: Caller must hold the HW access lock + * + * Return: Atom removed from ringbuffer + */ +static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, + int js, + ktime_t *end_timestamp) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + struct kbase_jd_atom *katom; + + if (SLOT_RB_EMPTY(rb)) { + WARN(1, "GPU ringbuffer unexpectedly empty\n"); + return NULL; + } + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom; + + kbase_gpu_release_atom(kbdev, katom, end_timestamp); + + rb->read_idx++; + + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB; + + return katom; +} + +struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, + int idx) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if ((SLOT_RB_ENTRIES(rb) - 1) < idx) + return NULL; /* idx out of range */ + + return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom; +} + +struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, + int js) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + + if (SLOT_RB_EMPTY(rb)) + return NULL; + + return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom; +} + +bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) +{ + int js; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) + return true; + } + } + return false; +} + +int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) +{ + int nr = 0; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + if (katom && (katom->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED)) + nr++; + } + + return nr; +} + +int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) +{ + int nr = 0; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < SLOT_RB_SIZE; i++) { + if (kbase_gpu_inspect(kbdev, js, i)) + nr++; + } + + return nr; +} + +static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, + enum kbase_atom_gpu_rb_state min_rb_state) +{ + int nr = 0; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + if (katom && (katom->gpu_rb_state >= min_rb_state)) + nr++; + } + + return nr; +} + +/** + * check_secure_atom - Check if the given atom is in the given secure state and + * has a ringbuffer state of at least + * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION + * @katom: Atom pointer + * @secure: Desired secure state + * + * Return: true if atom is in the given state, false otherwise + */ +static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) +{ + if (katom->gpu_rb_state >= + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && + ((kbase_jd_katom_is_protected(katom) && secure) || + (!kbase_jd_katom_is_protected(katom) && !secure))) + return true; + + return false; +} + +/** + * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given + * secure state in the ringbuffers of at least + * state + * KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE + * @kbdev: Device pointer + * @secure: Desired secure state + * + * Return: true if any atoms are in the given state, false otherwise + */ +static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, + bool secure) +{ + int js, i; + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, + js, i); + + if (katom) { + if (check_secure_atom(katom, secure)) + return true; + } + } + } + + return false; +} + +int kbase_backend_slot_free(struct kbase_device *kbdev, int js) +{ + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != + KBASE_RESET_GPU_NOT_PENDING) { + /* The GPU is being reset - so prevent submission */ + return 0; + } + + return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js); +} + + +static void kbase_gpu_release_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + ktime_t *end_timestamp) +{ + struct kbase_context *kctx = katom->kctx; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + switch (katom->gpu_rb_state) { + case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: + /* Should be impossible */ + WARN(1, "Attempting to release atom not in ringbuffer\n"); + break; + + case KBASE_ATOM_GPU_RB_SUBMITTED: + kbase_kinstr_jm_atom_hw_release(katom); + /* Inform power management at start/finish of atom so it can + * update its GPU utilisation metrics. Mark atom as not + * submitted beforehand. */ + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; + kbase_pm_metrics_update(kbdev, end_timestamp); + + if (katom->core_req & BASE_JD_REQ_PERMON) + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, katom, + &kbdev->gpu_props.props.raw_props.js_features + [katom->slot_nr]); + KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]); + KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, kctx, + &kbdev->gpu_props.props.raw_props.js_features + [katom->slot_nr]); + + case KBASE_ATOM_GPU_RB_READY: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: + break; + + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: + if (kbase_jd_katom_is_protected(katom) && + (katom->protected_state.enter != + KBASE_ATOM_ENTER_PROTECTED_CHECK) && + (katom->protected_state.enter != + KBASE_ATOM_ENTER_PROTECTED_HWCNT)) { + kbase_pm_protected_override_disable(kbdev); + kbase_pm_update_cores_state_nolock(kbdev); + } + if (kbase_jd_katom_is_protected(katom) && + (katom->protected_state.enter == + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) + kbase_pm_protected_entry_override_disable(kbdev); + if (!kbase_jd_katom_is_protected(katom) && + (katom->protected_state.exit != + KBASE_ATOM_EXIT_PROTECTED_CHECK) && + (katom->protected_state.exit != + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)) { + kbase_pm_protected_override_disable(kbdev); + kbase_pm_update_cores_state_nolock(kbdev); + } + + if (katom->protected_state.enter != + KBASE_ATOM_ENTER_PROTECTED_CHECK || + katom->protected_state.exit != + KBASE_ATOM_EXIT_PROTECTED_CHECK) + kbdev->protected_mode_transition = false; + /* If the atom has suspended hwcnt but has not yet entered + * protected mode, then resume hwcnt now. If the GPU is now in + * protected mode then hwcnt will be resumed by GPU reset so + * don't resume it here. + */ + if (kbase_jd_katom_is_protected(katom) && + ((katom->protected_state.enter == + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) || + (katom->protected_state.enter == + KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) { + WARN_ON(!kbdev->protected_mode_hwcnt_disabled); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } + } + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { + if (katom->atom_flags & + KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { + kbase_pm_protected_l2_override(kbdev, false); + katom->atom_flags &= + ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; + } + } + + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_RETURN_TO_JS: + break; + } + + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; + katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; +} + +static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_gpu_release_atom(kbdev, katom, NULL); + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; +} + +/** + * other_slots_busy - Determine if any job slots other than @js are currently + * running atoms + * @kbdev: Device pointer + * @js: Job slot + * + * Return: true if any slots other than @js are busy, false otherwise + */ +static inline bool other_slots_busy(struct kbase_device *kbdev, int js) +{ + int slot; + + for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) { + if (slot == js) + continue; + + if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot, + KBASE_ATOM_GPU_RB_SUBMITTED)) + return true; + } + + return false; +} + +static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev) +{ + return kbdev->protected_mode; +} + +static void kbase_gpu_disable_coherent(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* + * When entering into protected mode, we must ensure that the + * GPU is not operating in coherent mode as well. This is to + * ensure that no protected memory can be leaked. + */ + if (kbdev->system_coherency == COHERENCY_ACE) + kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE); +} + +static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) +{ + int err = -EINVAL; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ONCE(!kbdev->protected_ops, + "Cannot enter protected mode: protected callbacks not specified.\n"); + + if (kbdev->protected_ops) { + /* Switch GPU to protected mode */ + err = kbdev->protected_ops->protected_mode_enable( + kbdev->protected_dev); + + if (err) { + dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", + err); + } else { + kbdev->protected_mode = true; + kbase_ipa_protection_mode_switch_event(kbdev); + } + } + + return err; +} + +static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ONCE(!kbdev->protected_ops, + "Cannot exit protected mode: protected callbacks not specified.\n"); + + if (!kbdev->protected_ops) + return -EINVAL; + + /* The protected mode disable callback will be called as part of reset + */ + return kbase_reset_gpu_silent(kbdev); +} + +static int kbase_jm_protected_entry(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js) +{ + int err = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + err = kbase_gpu_protected_mode_enter(kbdev); + + /* + * Regardless of result before this call, we are no longer + * transitioning the GPU. + */ + + kbdev->protected_mode_transition = false; + kbase_pm_protected_override_disable(kbdev); + kbase_pm_update_cores_state_nolock(kbdev); + + KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev); + if (err) { + /* + * Failed to switch into protected mode, resume + * GPU hwcnt and fail atom. + */ + WARN_ON(!kbdev->protected_mode_hwcnt_disabled); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } + + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); + /* + * Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order. + */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + + return -EINVAL; + } + + /* + * Protected mode sanity checks. + */ + KBASE_DEBUG_ASSERT_MSG( + kbase_jd_katom_is_protected(katom[idx]) == + kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom[idx]), + kbase_gpu_in_protected_mode(kbdev)); + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_READY; + + return err; +} + +static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js) +#if IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY) +{ + return kbase_jm_enter_protected_mode_exynos(kbdev, katom, idx, js); +} +#else +{ + int err = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + switch (katom[idx]->protected_state.enter) { + case KBASE_ATOM_ENTER_PROTECTED_CHECK: + KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev); + /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV + * should ensure that we are not already transitiong, and that + * there are no atoms currently on the GPU. */ + WARN_ON(kbdev->protected_mode_transition); + WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + /* If hwcnt is disabled, it means we didn't clean up correctly + * during last exit from protected mode. + */ + WARN_ON(kbdev->protected_mode_hwcnt_disabled); + + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_HWCNT; + + kbdev->protected_mode_transition = true; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_ENTER_PROTECTED_HWCNT: + /* See if we can get away with disabling hwcnt atomically */ + kbdev->protected_mode_hwcnt_desired = false; + if (!kbdev->protected_mode_hwcnt_disabled) { + if (kbase_hwcnt_context_disable_atomic( + kbdev->hwcnt_gpu_ctx)) + kbdev->protected_mode_hwcnt_disabled = true; + } + + /* We couldn't disable atomically, so kick off a worker */ + if (!kbdev->protected_mode_hwcnt_disabled) { +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, + &kbdev->protected_mode_hwcnt_disable_work); +#else + queue_work(system_highpri_wq, + &kbdev->protected_mode_hwcnt_disable_work); +#endif + return -EAGAIN; + } + + /* Once reaching this point GPU must be + * switched to protected mode or hwcnt + * re-enabled. */ + + if (kbase_pm_protected_entry_override_enable(kbdev)) + return -EAGAIN; + + /* + * Not in correct mode, begin protected mode switch. + * Entering protected mode requires us to power down the L2, + * and drop out of fully coherent mode. + */ + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; + + kbase_pm_protected_override_enable(kbdev); + /* + * Only if the GPU reset hasn't been initiated, there is a need + * to invoke the state machine to explicitly power down the + * shader cores and L2. + */ + if (!kbdev->pm.backend.protected_entry_transition_override) + kbase_pm_update_cores_state_nolock(kbdev); + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: + /* Avoid unnecessary waiting on non-ACE platforms. */ + if (kbdev->system_coherency == COHERENCY_ACE) { + if (kbdev->pm.backend.l2_always_on) { + /* + * If the GPU reset hasn't completed, then L2 + * could still be powered up. + */ + if (kbase_reset_gpu_is_active(kbdev)) + return -EAGAIN; + } + + if (kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_L2) || + kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_L2) || + kbase_is_gpu_lost(kbdev)) { + /* + * The L2 is still powered, wait for all + * the users to finish with it before doing + * the actual reset. + */ + return -EAGAIN; + } + } + + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: + /* + * When entering into protected mode, we must ensure that the + * GPU is not operating in coherent mode as well. This is to + * ensure that no protected memory can be leaked. + */ + kbase_gpu_disable_coherent(kbdev); + + kbase_pm_protected_entry_override_disable(kbdev); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { + /* + * Power on L2 caches; this will also result in the + * correct value written to coherency enable register. + */ + kbase_pm_protected_l2_override(kbdev, true); + + /* + * Set the flag on the atom that additional + * L2 references are taken. + */ + katom[idx]->atom_flags |= + KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; + } + + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_FINISHED; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) + return -EAGAIN; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_ENTER_PROTECTED_FINISHED: + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { + /* + * Check that L2 caches are powered and, if so, + * enter protected mode. + */ + if (kbdev->pm.backend.l2_state == KBASE_L2_ON) { + /* + * Remove additional L2 reference and reset + * the atom flag which denotes it. + */ + if (katom[idx]->atom_flags & + KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { + kbase_pm_protected_l2_override(kbdev, + false); + katom[idx]->atom_flags &= + ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; + } + + err = kbase_jm_protected_entry(kbdev, katom, idx, js); + + if (err) + return err; + } else { + /* + * still waiting for L2 caches to power up + */ + return -EAGAIN; + } + } else { + err = kbase_jm_protected_entry(kbdev, katom, idx, js); + + if (err) + return err; + } + } + + return 0; +} +#endif /* CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY */ + +static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js) +#if IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY) +{ + return kbase_jm_exit_protected_mode_exynos(kbdev, katom, idx, js); +} +#else +{ + int err = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + switch (katom[idx]->protected_state.exit) { + case KBASE_ATOM_EXIT_PROTECTED_CHECK: + KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev, kbdev); + /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV + * should ensure that we are not already transitiong, and that + * there are no atoms currently on the GPU. */ + WARN_ON(kbdev->protected_mode_transition); + WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + + /* + * Exiting protected mode requires a reset, but first the L2 + * needs to be powered down to ensure it's not active when the + * reset is issued. + */ + katom[idx]->protected_state.exit = + KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; + + kbdev->protected_mode_transition = true; + kbase_pm_protected_override_enable(kbdev); + kbase_pm_update_cores_state_nolock(kbdev); + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: + if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) { + /* + * The L2 is still powered, wait for all the users to + * finish with it before doing the actual reset. + */ + return -EAGAIN; + } + katom[idx]->protected_state.exit = + KBASE_ATOM_EXIT_PROTECTED_RESET; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_EXIT_PROTECTED_RESET: + /* Issue the reset to the GPU */ + err = kbase_gpu_protected_mode_reset(kbdev); + + if (err == -EAGAIN) + return -EAGAIN; + + if (err) { + kbdev->protected_mode_transition = false; + kbase_pm_protected_override_disable(kbdev); + + /* Failed to exit protected mode, fail atom */ + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); + /* Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + + /* If we're exiting from protected mode, hwcnt must have + * been disabled during entry. + */ + WARN_ON(!kbdev->protected_mode_hwcnt_disabled); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } + + return -EINVAL; + } + + katom[idx]->protected_state.exit = + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: + /* A GPU reset is issued when exiting protected mode. Once the + * reset is done all atoms' state will also be reset. For this + * reason, if the atom is still in this state we can safely + * say that the reset has not completed i.e., we have not + * finished exiting protected mode yet. + */ + return -EAGAIN; + } + + return 0; +} +#endif /* CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY */ + +void kbase_backend_slot_update(struct kbase_device *kbdev) +{ + int js; + + lockdep_assert_held(&kbdev->hwaccess_lock); + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_reset_gpu_is_active(kbdev) || kbase_is_gpu_lost(kbdev)) +#else + if (kbase_reset_gpu_is_active(kbdev)) +#endif + return; + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + struct kbase_jd_atom *katom[2]; + int idx; + + katom[0] = kbase_gpu_inspect(kbdev, js, 0); + katom[1] = kbase_gpu_inspect(kbdev, js, 1); + WARN_ON(katom[1] && !katom[0]); + + for (idx = 0; idx < SLOT_RB_SIZE; idx++) { + bool cores_ready; + int ret; + + if (!katom[idx]) + continue; + + switch (katom[idx]->gpu_rb_state) { + case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: + /* Should be impossible */ + WARN(1, "Attempting to update atom not in ringbuffer\n"); + break; + + case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: + if (kbase_js_atom_blocked_on_x_dep(katom[idx])) + break; + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: + if (kbase_gpu_check_secure_atoms(kbdev, + !kbase_jd_katom_is_protected( + katom[idx]))) + break; + + if ((idx == 1) && (kbase_jd_katom_is_protected( + katom[0]) != + kbase_jd_katom_is_protected( + katom[1]))) + break; + + if (kbdev->protected_mode_transition) + break; + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: + + /* + * Exiting protected mode must be done before + * the references on the cores are taken as + * a power down the L2 is required which + * can't happen after the references for this + * atom are taken. + */ + + if (!kbase_gpu_in_protected_mode(kbdev) && + kbase_jd_katom_is_protected(katom[idx])) { + /* Atom needs to transition into protected mode. */ + ret = kbase_jm_enter_protected_mode(kbdev, + katom, idx, js); + if (ret) + break; + } else if (kbase_gpu_in_protected_mode(kbdev) && + !kbase_jd_katom_is_protected(katom[idx])) { + /* Atom needs to transition out of protected mode. */ + ret = kbase_jm_exit_protected_mode(kbdev, + katom, idx, js); + if (ret) + break; + } + katom[idx]->protected_state.exit = + KBASE_ATOM_EXIT_PROTECTED_CHECK; + + /* Atom needs no protected mode transition. */ + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: + if (katom[idx]->will_fail_event_code) { + kbase_gpu_mark_atom_for_return(kbdev, + katom[idx]); + /* Set EVENT_DONE so this atom will be + completed, not unpulled. */ + katom[idx]->event_code = + BASE_JD_EVENT_DONE; + /* Only return if head atom or previous + * atom already removed - as atoms must + * be returned in order. */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + break; + } + + cores_ready = kbase_pm_cores_requested(kbdev, + true); + + if (katom[idx]->event_code == + BASE_JD_EVENT_PM_EVENT) { + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_RETURN_TO_JS; + break; + } + + if (!cores_ready) + break; + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_READY; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_GPU_RB_READY: + + if (idx == 1) { + /* Only submit if head atom or previous + * atom already submitted */ + if ((katom[0]->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED && + katom[0]->gpu_rb_state != + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) + break; + + /* If intra-slot serialization in use + * then don't submit atom to NEXT slot + */ + if (kbdev->serialize_jobs & + KBASE_SERIALIZE_INTRA_SLOT) + break; + } + + /* If inter-slot serialization in use then don't + * submit atom if any other slots are in use */ + if ((kbdev->serialize_jobs & + KBASE_SERIALIZE_INTER_SLOT) && + other_slots_busy(kbdev, js)) + break; + +#ifdef CONFIG_MALI_GEM5_BUILD + if (!kbasep_jm_is_js_free(kbdev, js, + katom[idx]->kctx)) + break; +#endif + /* Check if this job needs the cycle counter + * enabled before submission */ + if (katom[idx]->core_req & BASE_JD_REQ_PERMON) + kbase_pm_request_gpu_cycle_counter_l2_is_on( + kbdev); + + kbase_job_hw_submit(kbdev, katom[idx], js); + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_SUBMITTED; + + /* Inform power management at start/finish of + * atom so it can update its GPU utilisation + * metrics. */ + kbase_pm_metrics_update(kbdev, + &katom[idx]->start_timestamp); + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_GPU_RB_SUBMITTED: + /* Atom submitted to HW, nothing else to do */ + break; + + case KBASE_ATOM_GPU_RB_RETURN_TO_JS: + /* Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, + katom[idx]); + } + break; + } + } + } +} + + +void kbase_backend_run_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Backend running atom %p\n", (void *)katom); + + kbase_gpu_enqueue_atom(kbdev, katom); + kbase_backend_slot_update(kbdev); +} + +#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \ + (KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER)) + +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, + u32 completion_code) +{ + struct kbase_jd_atom *katom; + struct kbase_jd_atom *next_katom; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom = kbase_gpu_inspect(kbdev, js, 0); + next_katom = kbase_gpu_inspect(kbdev, js, 1); + + if (next_katom && katom->kctx == next_katom->kctx && + next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && + (HAS_DEP(next_katom) || next_katom->sched_priority == + katom->sched_priority) && + (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) + != 0 || + kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) + != 0)) { + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), + JS_COMMAND_NOP); + next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; + + if (completion_code == BASE_JD_EVENT_STOPPED) { + KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, next_katom, + &kbdev->gpu_props.props.raw_props.js_features + [next_katom->slot_nr]); + KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, next_katom, &kbdev->as + [next_katom->kctx->as_nr]); + KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, next_katom->kctx, + &kbdev->gpu_props.props.raw_props.js_features + [next_katom->slot_nr]); + } + + if (next_katom->core_req & BASE_JD_REQ_PERMON) + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + + return true; + } + + return false; +} + +void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, + u32 completion_code, + u64 job_tail, + ktime_t *end_timestamp) +{ + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); + struct kbase_context *kctx = katom->kctx; + + dev_dbg(kbdev->dev, + "Atom %p completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", + (void *)katom, completion_code, job_tail, js); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* + * When a hard-stop is followed close after a soft-stop, the completion + * code may be set to STOPPED, even though the job is terminated + */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) { + if (completion_code == BASE_JD_EVENT_STOPPED && + (katom->atom_flags & + KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) { + completion_code = BASE_JD_EVENT_TERMINATED; + } + } + + if ((katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && + completion_code != BASE_JD_EVENT_DONE && + !(completion_code & BASE_JD_SW_EVENT)) { + /* When a job chain fails, on a T60x or when + * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not + * flushed. To prevent future evictions causing possible memory + * corruption we need to flush the cache manually before any + * affected memory gets reused. */ + katom->need_cache_flush_cores_retained = true; + } + + katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); + + if (completion_code == BASE_JD_EVENT_STOPPED) { + struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, + 0); + + /* + * Dequeue next atom from ringbuffers on same slot if required. + * This atom will already have been removed from the NEXT + * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that + * the atoms on this slot are returned in the correct order. + */ + if (next_katom && katom->kctx == next_katom->kctx && + next_katom->sched_priority == + katom->sched_priority) { + WARN_ON(next_katom->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED); + kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); + kbase_jm_return_atom_to_js(kbdev, next_katom); + } + } else if (completion_code != BASE_JD_EVENT_DONE) { + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + int i; + + if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) + dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", + js, completion_code, + kbase_gpu_exception_name( + completion_code)); + +#if KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR != 0 + KBASE_KTRACE_DUMP(kbdev); +#endif + kbasep_js_clear_submit_allowed(js_devdata, katom->kctx); + + /* + * Remove all atoms on the same context from ringbuffers. This + * will not remove atoms that are already on the GPU, as these + * are guaranteed not to have fail dependencies on the failed + * atom. + */ + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) { + struct kbase_jd_atom *katom_idx0 = + kbase_gpu_inspect(kbdev, i, 0); + struct kbase_jd_atom *katom_idx1 = + kbase_gpu_inspect(kbdev, i, 1); + + if (katom_idx0 && katom_idx0->kctx == katom->kctx && + HAS_DEP(katom_idx0) && + katom_idx0->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Dequeue katom_idx0 from ringbuffer */ + kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); + + if (katom_idx1 && + katom_idx1->kctx == katom->kctx + && HAS_DEP(katom_idx1) && + katom_idx0->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Dequeue katom_idx1 from ringbuffer */ + kbase_gpu_dequeue_atom(kbdev, i, + end_timestamp); + + katom_idx1->event_code = + BASE_JD_EVENT_STOPPED; + kbase_jm_return_atom_to_js(kbdev, + katom_idx1); + } + katom_idx0->event_code = BASE_JD_EVENT_STOPPED; + kbase_jm_return_atom_to_js(kbdev, katom_idx0); + + } else if (katom_idx1 && + katom_idx1->kctx == katom->kctx && + HAS_DEP(katom_idx1) && + katom_idx1->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Can not dequeue this atom yet - will be + * dequeued when atom at idx0 completes */ + katom_idx1->event_code = BASE_JD_EVENT_STOPPED; + kbase_gpu_mark_atom_for_return(kbdev, + katom_idx1); + } + } + } + + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc, js, completion_code); + + if (job_tail != 0 && job_tail != katom->jc) { + /* Some of the job has been executed */ + dev_dbg(kbdev->dev, + "Update job chain address of atom %p to resume from 0x%llx\n", + (void *)katom, job_tail); + + katom->jc = job_tail; + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, + katom, job_tail, js); + } + + /* Only update the event code for jobs that weren't cancelled */ + if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) + katom->event_code = (enum base_jd_event_code)completion_code; + + /* Complete the job, and start new ones + * + * Also defer remaining work onto the workqueue: + * - Re-queue Soft-stopped jobs + * - For any other jobs, queue the job back into the dependency system + * - Schedule out the parent context if necessary, and schedule a new + * one in. + */ +#ifdef CONFIG_GPU_TRACEPOINTS + { + /* The atom in the HEAD */ + struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, + 0); + + if (next_katom && next_katom->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED) { + char js_string[16]; + + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, + js_string, + sizeof(js_string)), + ktime_to_ns(*end_timestamp), + (u32)next_katom->kctx->id, 0, + next_katom->work_id); + kbdev->hwaccess.backend.slot_rb[js].last_context = + next_katom->kctx; + } else { + char js_string[16]; + + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, + js_string, + sizeof(js_string)), + ktime_to_ns(ktime_get()), 0, 0, + 0); + kbdev->hwaccess.backend.slot_rb[js].last_context = 0; + } + } +#endif + + if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET) + kbase_reset_gpu_silent(kbdev); + + if (completion_code == BASE_JD_EVENT_STOPPED) + katom = kbase_jm_return_atom_to_js(kbdev, katom); + else + katom = kbase_jm_complete(kbdev, katom, end_timestamp); + + if (katom) { + dev_dbg(kbdev->dev, + "Cross-slot dependency %p has become runnable.\n", + (void *)katom); + + /* Check if there are lower priority jobs to soft stop */ + kbase_job_slot_ctx_priority_check_locked(kctx, katom); + + kbase_jm_try_kick(kbdev, 1 << katom->slot_nr); + } + + /* For partial shader core off L2 cache flush */ + kbase_pm_update_state(kbdev); + + /* Job completion may have unblocked other atoms. Try to update all job + * slots */ + kbase_backend_slot_update(kbdev); +} + +void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) +{ + int js; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Reset should always take the GPU out of protected mode */ + WARN_ON(kbase_gpu_in_protected_mode(kbdev)); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int atom_idx = 0; + int idx; + + for (idx = 0; idx < SLOT_RB_SIZE; idx++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, + js, atom_idx); + bool keep_in_jm_rb = false; + + if (!katom) + break; + if (katom->protected_state.exit == + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { + /* protected mode sanity checks */ + KBASE_DEBUG_ASSERT_MSG( + kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev)); + KBASE_DEBUG_ASSERT_MSG( + (kbase_jd_katom_is_protected(katom) && js == 0) || + !kbase_jd_katom_is_protected(katom), + "Protected atom on JS%d not supported", js); + } + if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) && + !kbase_ctx_flag(katom->kctx, KCTX_DYING)) + keep_in_jm_rb = true; + + kbase_gpu_release_atom(kbdev, katom, NULL); + + /* + * If the atom wasn't on HW when the reset was issued + * then leave it in the RB and next time we're kicked + * it will be processed again from the starting state. + */ + if (keep_in_jm_rb) { + katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; + /* As the atom was not removed, increment the + * index so that we read the correct atom in the + * next iteration. */ + atom_idx++; + continue; + } + + /* + * The atom was on the HW when the reset was issued + * all we can do is fail the atom. + */ + kbase_gpu_dequeue_atom(kbdev, js, NULL); + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + kbase_jm_complete(kbdev, katom, end_timestamp); + } + } + + /* Re-enable GPU hardware counters if we're resetting from protected + * mode. + */ + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + + KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev, kbdev); + } + + kbdev->protected_mode_transition = false; + kbase_pm_protected_override_disable(kbdev); +} + +static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, + int js, + struct kbase_jd_atom *katom, + u32 action) +{ + u32 hw_action = action & JS_COMMAND_MASK; + + kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom); + kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action, + katom->core_req, katom); + katom->kctx->blocked_js[js][katom->sched_priority] = true; +} + +static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + u32 action, + bool disjoint) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; + kbase_gpu_mark_atom_for_return(kbdev, katom); + katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true; + + if (disjoint) + kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, + katom); + + /* MALI_SEC_INTEGRATION */ + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, LSI_KATOM_REMOVED, katom->kctx, katom, 0u, katom->slot_nr, /*info_value*/ 0); + +} + +static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) +{ + if (katom->x_post_dep) { + struct kbase_jd_atom *dep_atom = katom->x_post_dep; + + if (dep_atom->gpu_rb_state != + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB && + dep_atom->gpu_rb_state != + KBASE_ATOM_GPU_RB_RETURN_TO_JS) + return dep_atom->slot_nr; + } + return -1; +} + +bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js, + struct kbase_jd_atom *katom, + u32 action) +{ + struct kbase_jd_atom *katom_idx0; + struct kbase_jd_atom *katom_idx1; + + bool katom_idx0_valid, katom_idx1_valid; + + bool ret = false; + + int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1; + int prio_idx0 = 0, prio_idx1 = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom_idx0 = kbase_gpu_inspect(kbdev, js, 0); + katom_idx1 = kbase_gpu_inspect(kbdev, js, 1); + + if (katom_idx0) + prio_idx0 = katom_idx0->sched_priority; + if (katom_idx1) + prio_idx1 = katom_idx1->sched_priority; + + if (katom) { + katom_idx0_valid = (katom_idx0 == katom); + /* If idx0 is to be removed and idx1 is on the same context, + * then idx1 must also be removed otherwise the atoms might be + * returned out of order */ + if (katom_idx1) + katom_idx1_valid = (katom_idx1 == katom) || + (katom_idx0_valid && + (katom_idx0->kctx == + katom_idx1->kctx)); + else + katom_idx1_valid = false; + } else { + katom_idx0_valid = (katom_idx0 && + (!kctx || katom_idx0->kctx == kctx)); + katom_idx1_valid = (katom_idx1 && + (!kctx || katom_idx1->kctx == kctx) && + prio_idx0 == prio_idx1); + } + + if (katom_idx0_valid) + stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0); + if (katom_idx1_valid) + stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1); + + if (katom_idx0_valid) { + if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Simple case - just dequeue and return */ + kbase_gpu_dequeue_atom(kbdev, js, NULL); + if (katom_idx1_valid) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + katom_idx1->event_code = + BASE_JD_EVENT_REMOVED_FROM_NEXT; + kbase_jm_return_atom_to_js(kbdev, katom_idx1); + katom_idx1->kctx->blocked_js[js][prio_idx1] = + true; + } + + katom_idx0->event_code = + BASE_JD_EVENT_REMOVED_FROM_NEXT; + kbase_jm_return_atom_to_js(kbdev, katom_idx0); + katom_idx0->kctx->blocked_js[js][prio_idx0] = true; + } else { + /* katom_idx0 is on GPU */ + if (katom_idx1_valid && katom_idx1->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* katom_idx0 and katom_idx1 are on GPU */ + + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_COMMAND_NEXT)) == 0) { + /* idx0 has already completed - stop + * idx1 if needed*/ + if (katom_idx1_valid) { + kbase_gpu_stop_atom(kbdev, js, + katom_idx1, + action); + ret = true; + } + } else { + /* idx1 is in NEXT registers - attempt + * to remove */ + kbase_reg_write(kbdev, + JOB_SLOT_REG(js, + JS_COMMAND_NEXT), + JS_COMMAND_NOP); + + if (kbase_reg_read(kbdev, + JOB_SLOT_REG(js, + JS_HEAD_NEXT_LO)) + != 0 || + kbase_reg_read(kbdev, + JOB_SLOT_REG(js, + JS_HEAD_NEXT_HI)) + != 0) { + /* idx1 removed successfully, + * will be handled in IRQ */ + kbase_gpu_remove_atom(kbdev, + katom_idx1, + action, true); + stop_x_dep_idx1 = + should_stop_x_dep_slot(katom_idx1); + + /* stop idx0 if still on GPU */ + kbase_gpu_stop_atom(kbdev, js, + katom_idx0, + action); + ret = true; + } else if (katom_idx1_valid) { + /* idx0 has already completed, + * stop idx1 if needed */ + kbase_gpu_stop_atom(kbdev, js, + katom_idx1, + action); + ret = true; + } + } + } else if (katom_idx1_valid) { + /* idx1 not on GPU but must be dequeued*/ + + /* idx1 will be handled in IRQ */ + kbase_gpu_remove_atom(kbdev, katom_idx1, action, + false); + /* stop idx0 */ + /* This will be repeated for anything removed + * from the next registers, since their normal + * flow was also interrupted, and this function + * might not enter disjoint state e.g. if we + * don't actually do a hard stop on the head + * atom */ + kbase_gpu_stop_atom(kbdev, js, katom_idx0, + action); + ret = true; + } else { + /* no atom in idx1 */ + /* just stop idx0 */ + kbase_gpu_stop_atom(kbdev, js, katom_idx0, + action); + ret = true; + } + } + } else if (katom_idx1_valid) { + if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Mark for return */ + /* idx1 will be returned once idx0 completes */ + kbase_gpu_remove_atom(kbdev, katom_idx1, action, + false); + } else { + /* idx1 is on GPU */ + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_COMMAND_NEXT)) == 0) { + /* idx0 has already completed - stop idx1 */ + kbase_gpu_stop_atom(kbdev, js, katom_idx1, + action); + ret = true; + } else { + /* idx1 is in NEXT registers - attempt to + * remove */ + kbase_reg_write(kbdev, JOB_SLOT_REG(js, + JS_COMMAND_NEXT), + JS_COMMAND_NOP); + + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_HEAD_NEXT_LO)) != 0 || + kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_HEAD_NEXT_HI)) != 0) { + /* idx1 removed successfully, will be + * handled in IRQ once idx0 completes */ + kbase_gpu_remove_atom(kbdev, katom_idx1, + action, + false); + } else { + /* idx0 has already completed - stop + * idx1 */ + kbase_gpu_stop_atom(kbdev, js, + katom_idx1, + action); + ret = true; + } + } + } + } + + + if (stop_x_dep_idx0 != -1) + kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0, + NULL, action); + + if (stop_x_dep_idx1 != -1) + kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1, + NULL, action); + + return ret; +} + +void kbase_backend_cache_clean(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + if (katom->need_cache_flush_cores_retained) { + kbase_gpu_start_cache_clean(kbdev); + kbase_gpu_wait_cache_clean(kbdev); + + katom->need_cache_flush_cores_retained = false; + } +} + +void kbase_backend_complete_wq(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + /* + * If cache flush required due to HW workaround then perform the flush + * now + */ + kbase_backend_cache_clean(kbdev, katom); +} + +void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, + base_jd_core_req core_req) +{ + if (!kbdev->pm.active_count) { + mutex_lock(&kbdev->js_data.runpool_mutex); + mutex_lock(&kbdev->pm.lock); + kbase_pm_update_active(kbdev); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&kbdev->js_data.runpool_mutex); + } +} + +void kbase_gpu_dump_slots(struct kbase_device *kbdev) +{ + unsigned long flags; + int js; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n"); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int idx; + + for (idx = 0; idx < SLOT_RB_SIZE; idx++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, + js, + idx); + + if (katom) + dev_info(kbdev->dev, + " js%d idx%d : katom=%p gpu_rb_state=%d\n", + js, idx, katom, katom->gpu_rb_state); + else + dev_info(kbdev->dev, " js%d idx%d : empty\n", + js, idx); + } + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_rb.h new file mode 100644 index 000000000000..c3b9f2d85536 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_jm_rb.h @@ -0,0 +1,83 @@ +/* + * + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * Register-based HW access backend specific APIs + */ + +#ifndef _KBASE_HWACCESS_GPU_H_ +#define _KBASE_HWACCESS_GPU_H_ + +#include + +/** + * kbase_gpu_irq_evict - Evict an atom from a NEXT slot + * + * @kbdev: Device pointer + * @js: Job slot to evict from + * @completion_code: Event code from job that was run. + * + * Evict the atom in the NEXT slot for the specified job slot. This function is + * called from the job complete IRQ handler when the previous job has failed. + * + * Return: true if job evicted from NEXT registers, false otherwise + */ +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, + u32 completion_code); + +/** + * kbase_gpu_complete_hw - Complete an atom on job slot js + * + * @kbdev: Device pointer + * @js: Job slot that has completed + * @completion_code: Event code from job that has completed + * @job_tail: The tail address from the hardware if the job has partially + * completed + * @end_timestamp: Time of completion + */ +void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, + u32 completion_code, + u64 job_tail, + ktime_t *end_timestamp); + +/** + * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer + * + * @kbdev: Device pointer + * @js: Job slot to inspect + * @idx: Index into ringbuffer. 0 is the job currently running on + * the slot, 1 is the job waiting, all other values are invalid. + * Return: The atom at that position in the ringbuffer + * or NULL if no atom present + */ +struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, + int idx); + +/** + * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers + * + * @kbdev: Device pointer + */ +void kbase_gpu_dump_slots(struct kbase_device *kbdev); + +#endif /* _KBASE_HWACCESS_GPU_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_js_backend.c new file mode 100644 index 000000000000..42d77c0b469c --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_js_backend.c @@ -0,0 +1,356 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * Register-based HW access backend specific job scheduler APIs + */ + +#include +#include +#include +#include +#include + +/* MALI_SEC_INTEGRATION */ +#if (LINUX_VERSION_CODE > KERNEL_VERSION(4, 14, 0)) +#include +#endif + +/* + * Hold the runpool_mutex for this + */ +static inline bool timer_callback_should_run(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + int nr_running_ctxs; + + lockdep_assert_held(&kbdev->js_data.runpool_mutex); + + /* Timer must stop if we are suspending */ + if (backend->suspend_timer) + return false; + + /* nr_contexts_pullable is updated with the runpool_mutex. However, the + * locking in the caller gives us a barrier that ensures + * nr_contexts_pullable is up-to-date for reading */ + nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); + +#ifdef CONFIG_MALI_DEBUG + if (kbdev->js_data.softstop_always) { + /* Debug support for allowing soft-stop on a single context */ + return true; + } +#endif /* CONFIG_MALI_DEBUG */ + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) { + /* Timeouts would have to be 4x longer (due to micro- + * architectural design) to support OpenCL conformance tests, so + * only run the timer when there's: + * - 2 or more CL contexts + * - 1 or more GLES contexts + * + * NOTE: We will treat a context that has both Compute and Non- + * Compute jobs will be treated as an OpenCL context (hence, we + * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE). + */ + { + int nr_compute_ctxs = + kbasep_js_ctx_attr_count_on_runpool(kbdev, + KBASEP_JS_CTX_ATTR_COMPUTE); + int nr_noncompute_ctxs = nr_running_ctxs - + nr_compute_ctxs; + + return (bool) (nr_compute_ctxs >= 2 || + nr_noncompute_ctxs > 0); + } + } else { + /* Run the timer callback whenever you have at least 1 context + */ + return (bool) (nr_running_ctxs > 0); + } +} + +static enum hrtimer_restart timer_callback(struct hrtimer *timer) +{ + unsigned long flags; + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + struct kbase_backend_data *backend; + int s; + bool reset_needed = false; + + KBASE_DEBUG_ASSERT(timer != NULL); + + backend = container_of(timer, struct kbase_backend_data, + scheduling_timer); + kbdev = container_of(backend, struct kbase_device, hwaccess.backend); + js_devdata = &kbdev->js_data; + + /* Loop through the slots */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) { + struct kbase_jd_atom *atom = NULL; + + if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) { + atom = kbase_gpu_inspect(kbdev, s, 0); + KBASE_DEBUG_ASSERT(atom != NULL); + } + + if (atom != NULL) { + /* The current version of the model doesn't support + * Soft-Stop */ + if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) { + u32 ticks = atom->ticks++; + +#if !defined(CONFIG_MALI_JOB_DUMP) && !defined(CONFIG_MALI_VECTOR_DUMP) + u32 soft_stop_ticks, hard_stop_ticks, + gpu_reset_ticks; + if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { + soft_stop_ticks = + js_devdata->soft_stop_ticks_cl; + hard_stop_ticks = + js_devdata->hard_stop_ticks_cl; + gpu_reset_ticks = + js_devdata->gpu_reset_ticks_cl; + } else { + soft_stop_ticks = + js_devdata->soft_stop_ticks; + hard_stop_ticks = + js_devdata->hard_stop_ticks_ss; + gpu_reset_ticks = + js_devdata->gpu_reset_ticks_ss; + } + + /* If timeouts have been changed then ensure + * that atom tick count is not greater than the + * new soft_stop timeout. This ensures that + * atoms do not miss any of the timeouts due to + * races between this worker and the thread + * changing the timeouts. */ + if (backend->timeouts_updated && + ticks > soft_stop_ticks) + ticks = atom->ticks = soft_stop_ticks; + + /* Job is Soft-Stoppable */ + if (ticks == soft_stop_ticks) { + /* Job has been scheduled for at least + * js_devdata->soft_stop_ticks ticks. + * Soft stop the slot so we can run + * other jobs. + */ +#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS + int disjoint_threshold = + KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD; + u32 softstop_flags = 0u; + + dev_dbg(kbdev->dev, "Soft-stop"); + /* nr_user_contexts_running is updated + * with the runpool_mutex, but we can't + * take that here. + * + * However, if it's about to be + * increased then the new context can't + * run any jobs until they take the + * hwaccess_lock, so it's OK to observe + * the older value. + * + * Similarly, if it's about to be + * decreased, the last job from another + * context has already finished, so it's + * not too bad that we observe the older + * value and register a disjoint event + * when we try soft-stopping */ + if (js_devdata->nr_user_contexts_running + >= disjoint_threshold) + softstop_flags |= + JS_COMMAND_SW_CAUSES_DISJOINT; + + kbase_job_slot_softstop_swflags(kbdev, + s, atom, softstop_flags); +#endif + } else if (ticks == hard_stop_ticks) { + /* Job has been scheduled for at least + * js_devdata->hard_stop_ticks_ss ticks. + * It should have been soft-stopped by + * now. Hard stop the slot. + */ +#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS + int ms = + js_devdata->scheduling_period_ns + / 1000000u; + dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", + (unsigned long)ticks, + (unsigned long)ms); + kbase_job_slot_hardstop(atom->kctx, s, + atom); +#endif + } else if (ticks == gpu_reset_ticks) { + /* Job has been scheduled for at least + * js_devdata->gpu_reset_ticks_ss ticks. + * It should have left the GPU by now. + * Signal that the GPU needs to be + * reset. + */ + reset_needed = true; + } +#else /* !CONFIG_MALI_JOB_DUMP */ + /* NOTE: During CONFIG_MALI_JOB_DUMP, we use + * the alternate timeouts, which makes the hard- + * stop and GPU reset timeout much longer. We + * also ensure that we don't soft-stop at all. + */ + if (ticks == js_devdata->soft_stop_ticks) { + /* Job has been scheduled for at least + * js_devdata->soft_stop_ticks. We do + * not soft-stop during + * CONFIG_MALI_JOB_DUMP, however. + */ + dev_dbg(kbdev->dev, "Soft-stop"); + } else if (ticks == + js_devdata->hard_stop_ticks_dumping) { + /* Job has been scheduled for at least + * js_devdata->hard_stop_ticks_dumping + * ticks. Hard stop the slot. + */ +#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS + int ms = + js_devdata->scheduling_period_ns + / 1000000u; + dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", + (unsigned long)ticks, + (unsigned long)ms); + kbase_job_slot_hardstop(atom->kctx, s, + atom); +#endif + } else if (ticks == + js_devdata->gpu_reset_ticks_dumping) { + /* Job has been scheduled for at least + * js_devdata->gpu_reset_ticks_dumping + * ticks. It should have left the GPU by + * now. Signal that the GPU needs to be + * reset. + */ + reset_needed = true; + } +#endif /* !CONFIG_MALI_JOB_DUMP */ + } + } + } + if (reset_needed) { + dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve."); + + if (kbase_prepare_to_reset_gpu_locked(kbdev)) + kbase_reset_gpu_locked(kbdev); + } + /* the timer is re-issued if there is contexts in the run-pool */ + + if (backend->timer_running) + hrtimer_start(&backend->scheduling_timer, + HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), + HRTIMER_MODE_REL); + + backend->timeouts_updated = false; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return HRTIMER_NORESTART; +} + +void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + unsigned long flags; + + lockdep_assert_held(&js_devdata->runpool_mutex); + + if (!timer_callback_should_run(kbdev)) { + /* Take spinlock to force synchronisation with timer */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + backend->timer_running = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + /* From now on, return value of timer_callback_should_run() will + * also cause the timer to not requeue itself. Its return value + * cannot change, because it depends on variables updated with + * the runpool_mutex held, which the caller of this must also + * hold */ + hrtimer_cancel(&backend->scheduling_timer); + } + + if (timer_callback_should_run(kbdev) && !backend->timer_running) { + /* Take spinlock to force synchronisation with timer */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + backend->timer_running = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + hrtimer_start(&backend->scheduling_timer, + HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), + HRTIMER_MODE_REL); + + KBASE_KTRACE_ADD_JM(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u); + } +} + +int kbase_backend_timer_init(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + backend->scheduling_timer.function = timer_callback; + backend->timer_running = false; + + return 0; +} + +void kbase_backend_timer_term(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + hrtimer_cancel(&backend->scheduling_timer); +} + +void kbase_backend_timer_suspend(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + backend->suspend_timer = true; + + kbase_backend_ctx_count_changed(kbdev); +} + +void kbase_backend_timer_resume(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + backend->suspend_timer = false; + + kbase_backend_ctx_count_changed(kbdev); +} + +void kbase_backend_timeouts_changed(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + backend->timeouts_updated = true; +} + diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_js_internal.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_js_internal.h new file mode 100644 index 000000000000..6576e55d2e39 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_js_internal.h @@ -0,0 +1,74 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * Register-based HW access backend specific job scheduler APIs + */ + +#ifndef _KBASE_JS_BACKEND_H_ +#define _KBASE_JS_BACKEND_H_ + +/** + * kbase_backend_timer_init() - Initialise the JS scheduling timer + * @kbdev: Device pointer + * + * This function should be called at driver initialisation + * + * Return: 0 on success + */ +int kbase_backend_timer_init(struct kbase_device *kbdev); + +/** + * kbase_backend_timer_term() - Terminate the JS scheduling timer + * @kbdev: Device pointer + * + * This function should be called at driver termination + */ +void kbase_backend_timer_term(struct kbase_device *kbdev); + +/** + * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling + * timer + * @kbdev: Device pointer + * + * This function should be called on suspend, after the active count has reached + * zero. This is required as the timer may have been started on job submission + * to the job scheduler, but before jobs are submitted to the GPU. + * + * Caller must hold runpool_mutex. + */ +void kbase_backend_timer_suspend(struct kbase_device *kbdev); + +/** + * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS + * scheduling timer + * @kbdev: Device pointer + * + * This function should be called on resume. Note that is is not guaranteed to + * re-start the timer, only evalute whether it should be re-started. + * + * Caller must hold runpool_mutex. + */ +void kbase_backend_timer_resume(struct kbase_device *kbdev); + +#endif /* _KBASE_JS_BACKEND_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_l2_mmu_config.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_l2_mmu_config.c new file mode 100644 index 000000000000..e67d12b1ba3d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_l2_mmu_config.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include +#include "mali_kbase_l2_mmu_config.h" + +/** + * struct l2_mmu_config_limit_region + * + * @value: The default value to load into the L2_MMU_CONFIG register + * @mask: The shifted mask of the field in the L2_MMU_CONFIG register + * @shift: The shift of where the field starts in the L2_MMU_CONFIG register + * This should be the same value as the smaller of the two mask + * values + */ +struct l2_mmu_config_limit_region { + u32 value, mask, shift; +}; + +/** + * struct l2_mmu_config_limit + * + * @product_model: The GPU for which this entry applies + * @read: Values for the read limit field + * @write: Values for the write limit field + */ +struct l2_mmu_config_limit { + u32 product_model; + struct l2_mmu_config_limit_region read; + struct l2_mmu_config_limit_region write; +}; + +/* + * Zero represents no limit + * + * For LBEX TBEX TTRX and TNAX: + * The value represents the number of outstanding reads (6 bits) or writes (5 bits) + * + * For all other GPUS it is a fraction see: mali_kbase_config_defaults.h + */ +static const struct l2_mmu_config_limit limits[] = { + /* GPU read write */ + {GPU_ID2_PRODUCT_LBEX, {0, GENMASK(10, 5), 5}, {0, GENMASK(16, 12), 12} }, + {GPU_ID2_PRODUCT_TBEX, {0, GENMASK(10, 5), 5}, {0, GENMASK(16, 12), 12} }, + {GPU_ID2_PRODUCT_TTRX, {0, GENMASK(12, 7), 7}, {0, GENMASK(17, 13), 13} }, + {GPU_ID2_PRODUCT_TNAX, {0, GENMASK(12, 7), 7}, {0, GENMASK(17, 13), 13} }, + {GPU_ID2_PRODUCT_TGOX, + {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, + {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, + {GPU_ID2_PRODUCT_TNOX, + {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, + {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, +}; + +int kbase_set_mmu_quirks(struct kbase_device *kbdev) +{ + /* All older GPUs had 2 bits for both fields, this is a default */ + struct l2_mmu_config_limit limit = { + 0, /* Any GPU not in the limits array defined above */ + {KBASE_AID_32, GENMASK(25, 24), 24}, + {KBASE_AID_32, GENMASK(27, 26), 26} + }; + u32 product_model, gpu_id; + u32 mmu_config; + int i; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + product_model = gpu_id & GPU_ID2_PRODUCT_MODEL; + + /* Limit the GPU bus bandwidth if the platform needs this. */ + for (i = 0; i < ARRAY_SIZE(limits); i++) { + if (product_model == limits[i].product_model) { + limit = limits[i]; + break; + } + } + + mmu_config = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)); + + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + mmu_config &= ~(limit.read.mask | limit.write.mask); + /* Can't use FIELD_PREP() macro here as the mask isn't constant */ + mmu_config |= (limit.read.value << limit.read.shift) | + (limit.write.value << limit.write.shift); + + kbdev->hw_quirks_mmu = mmu_config; + + if (kbdev->system_coherency == COHERENCY_ACE) { + /* Allow memory configuration disparity to be ignored, + * we optimize the use of shared memory and thus we + * expect some disparity in the memory configuration. + */ + kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY; + } + + return 0; +} diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_l2_mmu_config.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_l2_mmu_config.h new file mode 100644 index 000000000000..0c779ac80d27 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_l2_mmu_config.h @@ -0,0 +1,55 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_L2_MMU_CONFIG_H_ +#define _KBASE_L2_MMU_CONFIG_H_ +/** + * kbase_set_mmu_quirks - Set the hw_quirks_mmu field of kbdev + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Use this function to initialise the hw_quirks_mmu field, for instance to set + * the MAX_READS and MAX_WRITES to sane defaults for each GPU. + * + * Return: Zero for succeess or a Linux error code + */ +int kbase_set_mmu_quirks(struct kbase_device *kbdev); + +#endif /* _KBASE_L2_MMU_CONFIG_H */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_always_on.c new file mode 100644 index 000000000000..e33fe0b8e415 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_always_on.c @@ -0,0 +1,67 @@ +/* + * + * (C) COPYRIGHT 2010-2015, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * "Always on" power management policy + */ + +#include +#include + +static bool always_on_shaders_needed(struct kbase_device *kbdev) +{ + return true; +} + +static bool always_on_get_core_active(struct kbase_device *kbdev) +{ + return true; +} + +static void always_on_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static void always_on_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +/* + * The struct kbase_pm_policy structure for the demand power policy. + * + * This is the static structure that defines the demand power policy's callback + * and name. + */ +const struct kbase_pm_policy kbase_pm_always_on_policy_ops = { + "always_on", /* name */ + always_on_init, /* init */ + always_on_term, /* term */ + always_on_shaders_needed, /* shaders_needed */ + always_on_get_core_active, /* get_core_active */ + KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops); diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_always_on.h new file mode 100644 index 000000000000..e7927cf82e5a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_always_on.h @@ -0,0 +1,81 @@ +/* + * + * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * "Always on" power management policy + */ + +#ifndef MALI_KBASE_PM_ALWAYS_ON_H +#define MALI_KBASE_PM_ALWAYS_ON_H + +/** + * DOC: + * The "Always on" power management policy has the following + * characteristics: + * + * - When KBase indicates that the GPU will be powered up, but we don't yet + * know which Job Chains are to be run: + * Shader Cores are powered up, regardless of whether or not they will be + * needed later. + * + * - When KBase indicates that Shader Cores are needed to submit the currently + * queued Job Chains: + * Shader Cores are kept powered, regardless of whether or not they will be + * needed + * + * - When KBase indicates that the GPU need not be powered: + * The Shader Cores are kept powered, regardless of whether or not they will + * be needed. The GPU itself is also kept powered, even though it is not + * needed. + * + * This policy is automatically overridden during system suspend: the desired + * core state is ignored, and the cores are forced off regardless of what the + * policy requests. After resuming from suspend, new changes to the desired + * core state made by the policy are honored. + * + * Note: + * + * - KBase indicates the GPU will be powered up when it has a User Process that + * has just started to submit Job Chains. + * + * - KBase indicates the GPU need not be powered when all the Job Chains from + * User Processes have finished, and it is waiting for a User Process to + * submit some more Job Chains. + */ + +/** + * struct kbasep_pm_policy_always_on - Private struct for policy instance data + * @dummy: unused dummy variable + * + * This contains data that is private to the particular power policy that is + * active. + */ +struct kbasep_pm_policy_always_on { + int dummy; +}; + +extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops; + +#endif /* MALI_KBASE_PM_ALWAYS_ON_H */ + diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_backend.c new file mode 100644 index 000000000000..a357f529198d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_backend.c @@ -0,0 +1,763 @@ + /* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * GPU backend implementation of base kernel power management APIs + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); +static void kbase_pm_hwcnt_disable_worker(struct work_struct *data); +static void kbase_pm_gpu_clock_control_worker(struct work_struct *data); + +int kbase_pm_runtime_init(struct kbase_device *kbdev) +{ + struct kbase_pm_callback_conf *callbacks; + + callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; + if (callbacks) { + kbdev->pm.backend.callback_power_on = + callbacks->power_on_callback; + kbdev->pm.backend.callback_power_off = + callbacks->power_off_callback; + kbdev->pm.backend.callback_power_suspend = + callbacks->power_suspend_callback; + kbdev->pm.backend.callback_power_resume = + callbacks->power_resume_callback; + kbdev->pm.callback_power_runtime_init = + callbacks->power_runtime_init_callback; + kbdev->pm.callback_power_runtime_term = + callbacks->power_runtime_term_callback; + kbdev->pm.backend.callback_power_runtime_on = + callbacks->power_runtime_on_callback; + kbdev->pm.backend.callback_power_runtime_off = + callbacks->power_runtime_off_callback; + kbdev->pm.backend.callback_power_runtime_idle = + callbacks->power_runtime_idle_callback; + kbdev->pm.backend.callback_soft_reset = + callbacks->soft_reset_callback; + /* MALI_SEC_INTEGRATION */ + kbdev->pm.backend.callback_power_dvfs_on = + callbacks->power_dvfs_on_callback; + + if (callbacks->power_runtime_init_callback) + return callbacks->power_runtime_init_callback(kbdev); + else + return 0; + } + + kbdev->pm.backend.callback_power_on = NULL; + kbdev->pm.backend.callback_power_off = NULL; + kbdev->pm.backend.callback_power_suspend = NULL; + kbdev->pm.backend.callback_power_resume = NULL; + kbdev->pm.callback_power_runtime_init = NULL; + kbdev->pm.callback_power_runtime_term = NULL; + kbdev->pm.backend.callback_power_runtime_on = NULL; + kbdev->pm.backend.callback_power_runtime_off = NULL; + kbdev->pm.backend.callback_power_runtime_idle = NULL; + kbdev->pm.backend.callback_soft_reset = NULL; + + return 0; +} + +void kbase_pm_runtime_term(struct kbase_device *kbdev) +{ + if (kbdev->pm.callback_power_runtime_term) { + kbdev->pm.callback_power_runtime_term(kbdev); + } +} + +void kbase_pm_register_access_enable(struct kbase_device *kbdev) +{ + struct kbase_pm_callback_conf *callbacks; + + callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; + + if (callbacks) + callbacks->power_on_callback(kbdev); + + kbdev->pm.backend.gpu_powered = true; +} + +void kbase_pm_register_access_disable(struct kbase_device *kbdev) +{ + struct kbase_pm_callback_conf *callbacks; + + callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; + + if (callbacks) + callbacks->power_off_callback(kbdev); + + kbdev->pm.backend.gpu_powered = false; +} + +int kbase_hwaccess_pm_init(struct kbase_device *kbdev) +{ + int ret = 0; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + mutex_init(&kbdev->pm.lock); + + kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!kbdev->pm.backend.gpu_poweroff_wait_wq) + return -ENOMEM; + + INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, + kbase_pm_gpu_poweroff_wait_wq); + + kbdev->pm.backend.ca_cores_enabled = ~0ull; + kbdev->pm.backend.gpu_powered = false; + kbdev->pm.suspending = false; + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbdev->pm.gpu_lost = false; +#endif + /* MALI_SEC_INTEGRATION */ + init_waitqueue_head(&kbdev->pm.suspending_wait); +#ifdef CONFIG_MALI_DEBUG + kbdev->pm.backend.driver_ready_for_irqs = false; +#endif /* CONFIG_MALI_DEBUG */ + init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); + + /* Initialise the metrics subsystem */ + ret = kbasep_pm_metrics_init(kbdev); + if (ret) + return ret; + + init_waitqueue_head(&kbdev->pm.backend.reset_done_wait); + kbdev->pm.backend.reset_done = false; + + init_waitqueue_head(&kbdev->pm.zero_active_count_wait); + kbdev->pm.active_count = 0; + + spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock); + + init_waitqueue_head(&kbdev->pm.backend.poweroff_wait); + + if (kbase_pm_ca_init(kbdev) != 0) + goto workq_fail; + + kbase_pm_policy_init(kbdev); + + if (kbase_pm_state_machine_init(kbdev) != 0) + goto pm_state_machine_fail; + + kbdev->pm.backend.hwcnt_desired = false; + kbdev->pm.backend.hwcnt_disabled = true; + INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work, + kbase_pm_hwcnt_disable_worker); + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) { + kbdev->pm.backend.l2_always_on = false; + kbdev->pm.backend.gpu_clock_slow_down_wa = false; + + return 0; + } + + /* WA1: L2 always_on for GPUs being affected by GPU2017-1336 */ + if (!IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE)) { + kbdev->pm.backend.gpu_clock_slow_down_wa = false; + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) + kbdev->pm.backend.l2_always_on = true; + else + kbdev->pm.backend.l2_always_on = false; + + return 0; + } + + /* WA3: Clock slow down for GPUs being affected by GPU2017-1336 */ + kbdev->pm.backend.l2_always_on = false; + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) { + kbdev->pm.backend.gpu_clock_slow_down_wa = true; + kbdev->pm.backend.gpu_clock_suspend_freq = 0; + kbdev->pm.backend.gpu_clock_slow_down_desired = true; + kbdev->pm.backend.gpu_clock_slowed_down = false; + INIT_WORK(&kbdev->pm.backend.gpu_clock_control_work, + kbase_pm_gpu_clock_control_worker); + } else + kbdev->pm.backend.gpu_clock_slow_down_wa = false; + + return 0; + +pm_state_machine_fail: + kbase_pm_policy_term(kbdev); + kbase_pm_ca_term(kbdev); +workq_fail: + kbasep_pm_metrics_term(kbdev); + return -EINVAL; +} + +void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) +{ + lockdep_assert_held(&kbdev->pm.lock); + + /* Turn clocks and interrupts on - no-op if we haven't done a previous + * kbase_pm_clock_off() */ + kbase_pm_clock_on(kbdev, is_resume); + + if (!is_resume) { + unsigned long flags; + + /* Force update of L2 state - if we have abandoned a power off + * then this may be required to power the L2 back on. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + + /* Update core status as required by the policy */ + kbase_pm_update_cores_state(kbdev); + + /* NOTE: We don't wait to reach the desired state, since running atoms + * will wait for that state to be reached anyway */ +} + +static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) +{ + struct kbase_device *kbdev = container_of(data, struct kbase_device, + pm.backend.gpu_poweroff_wait_work); + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; + unsigned long flags; + + /* Wait for power transitions to complete. We do this with no locks held + * so that we don't deadlock with any pending workqueues. + */ + kbase_pm_wait_for_desired_state(kbdev); + + kbase_pm_lock(kbdev); + + /* MALI_SEC_INTEGRATION */ + KBASE_KTRACE_ADD(kbdev, KBASE_DEVICE_PM_WAIT_WQ_RUN, NULL, backend->poweron_required); + + if (!backend->poweron_required) { + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + WARN_ON(backend->shaders_state != + KBASE_SHADERS_OFF_CORESTACK_OFF || + backend->l2_state != KBASE_L2_OFF); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Disable interrupts and turn the clock off */ + if (!kbase_pm_clock_off(kbdev)) { + /* + * Page/bus faults are pending, must drop locks to + * process. Interrupts are disabled so no more faults + * should be generated at this point. + */ + kbase_pm_unlock(kbdev); + kbase_flush_mmu_wqs(kbdev); + kbase_pm_lock(kbdev); + + /* Turn off clock now that fault have been handled. We + * dropped locks so poweron_required may have changed - + * power back on if this is the case (effectively only + * re-enabling of the interrupts would be done in this + * case, as the clocks to GPU were not withdrawn yet). + */ + if (backend->poweron_required) + kbase_pm_clock_on(kbdev, false); + else + WARN_ON(!kbase_pm_clock_off(kbdev)); + } + } + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + backend->poweroff_wait_in_progress = false; + if (backend->poweron_required) { + backend->poweron_required = false; + kbdev->pm.backend.l2_desired = true; + kbase_pm_update_state(kbdev); + kbase_pm_update_cores_state_nolock(kbdev); + kbase_backend_slot_update(kbdev); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_pm_unlock(kbdev); + + wake_up(&kbdev->pm.backend.poweroff_wait); +} + +static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev) +{ +#if defined(CONFIG_MALI_MIDGARD_DVFS) + struct clk *clk = kbdev->clocks[0]; +#endif + + if (!kbdev->pm.backend.gpu_clock_slow_down_wa) + return; + + /* No suspend clock is specified */ + if (WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_suspend_freq)) + return; + +#if defined(CONFIG_MALI_DEVFREQ) + + /* Suspend devfreq */ + devfreq_suspend_device(kbdev->devfreq); + + /* Keep the current freq to restore it upon resume */ + kbdev->previous_frequency = kbdev->current_nominal_freq; + + /* Slow down GPU clock to the suspend clock*/ + kbase_devfreq_force_freq(kbdev, + kbdev->pm.backend.gpu_clock_suspend_freq); + +#elif defined(CONFIG_MALI_MIDGARD_DVFS) /* CONFIG_MALI_DEVFREQ */ + + if (WARN_ON_ONCE(!clk)) + return; + + /* Stop the metrics gathering framework */ + if (kbase_pm_metrics_is_active(kbdev)) + kbase_pm_metrics_stop(kbdev); + + /* Keep the current freq to restore it upon resume */ + kbdev->previous_frequency = clk_get_rate(clk); + + /* Slow down GPU clock to the suspend clock*/ + if (WARN_ON_ONCE(clk_set_rate(clk, + kbdev->pm.backend.gpu_clock_suspend_freq))) + dev_err(kbdev->dev, "Failed to set suspend freq\n"); + +#endif /* CONFIG_MALI_MIDGARD_DVFS */ +} + +static void kbase_pm_l2_clock_normalize(struct kbase_device *kbdev) +{ +#if defined(CONFIG_MALI_MIDGARD_DVFS) + struct clk *clk = kbdev->clocks[0]; +#endif + + if (!kbdev->pm.backend.gpu_clock_slow_down_wa) + return; + +#if defined(CONFIG_MALI_DEVFREQ) + + /* Restore GPU clock to the previous one */ + kbase_devfreq_force_freq(kbdev, kbdev->previous_frequency); + + /* Resume devfreq */ + devfreq_resume_device(kbdev->devfreq); + +#elif defined(CONFIG_MALI_MIDGARD_DVFS) /* CONFIG_MALI_DEVFREQ */ + + if (WARN_ON_ONCE(!clk)) + return; + + /* Restore GPU clock */ + if (WARN_ON_ONCE(clk_set_rate(clk, kbdev->previous_frequency))) + dev_err(kbdev->dev, "Failed to restore freq (%lu)\n", + kbdev->previous_frequency); + + /* Restart the metrics gathering framework */ + kbase_pm_metrics_start(kbdev); + +#endif /* CONFIG_MALI_MIDGARD_DVFS */ +} + +static void kbase_pm_gpu_clock_control_worker(struct work_struct *data) +{ + struct kbase_device *kbdev = container_of(data, struct kbase_device, + pm.backend.gpu_clock_control_work); + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; + unsigned long flags; + bool slow_down = false, normalize = false; + + /* Determine if GPU clock control is required */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (!backend->gpu_clock_slowed_down && + backend->gpu_clock_slow_down_desired) { + slow_down = true; + backend->gpu_clock_slowed_down = true; + } else if (backend->gpu_clock_slowed_down && + !backend->gpu_clock_slow_down_desired) { + normalize = true; + backend->gpu_clock_slowed_down = false; + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Control GPU clock according to the request of L2 state machine. + * The GPU clock needs to be lowered for safe L2 power down + * and restored to previous speed at L2 power up. + */ + if (slow_down) + kbase_pm_l2_clock_slow(kbdev); + else if (normalize) + kbase_pm_l2_clock_normalize(kbdev); + + /* Tell L2 state machine to transit to next state */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +static void kbase_pm_hwcnt_disable_worker(struct work_struct *data) +{ + struct kbase_device *kbdev = container_of(data, struct kbase_device, + pm.backend.hwcnt_disable_work); + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; + unsigned long flags; + + bool do_disable; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!do_disable) + return; + + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled; + + if (do_disable) { + /* PM state did not change while we were doing the disable, + * so commit the work we just performed and continue the state + * machine. + */ + backend->hwcnt_disabled = true; + kbase_pm_update_state(kbdev); + kbase_backend_slot_update(kbdev); + } else { + /* PM state was updated while we were doing the disable, + * so we need to undo the disable we just performed. + */ + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +void kbase_pm_do_poweroff(struct kbase_device *kbdev) +{ + unsigned long flags; + + lockdep_assert_held(&kbdev->pm.lock); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) + goto unlock_hwaccess; + + if (kbdev->pm.backend.poweroff_wait_in_progress) + goto unlock_hwaccess; + + /* Force all cores off */ + kbdev->pm.backend.shaders_desired = false; + kbdev->pm.backend.l2_desired = false; + + kbdev->pm.backend.poweroff_wait_in_progress = true; + kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = true; + + /* l2_desired being false should cause the state machine to + * start powering off the L2. When it actually is powered off, + * the interrupt handler will call kbase_pm_l2_update_state() + * again, which will trigger the kbase_pm_gpu_poweroff_wait_wq. + * Callers of this function will need to wait on poweroff_wait. + */ + kbase_pm_update_state(kbdev); + +unlock_hwaccess: + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +static bool is_poweroff_in_progress(struct kbase_device *kbdev) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ret = (kbdev->pm.backend.poweroff_wait_in_progress == false); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} + +void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev) +{ + wait_event_killable(kbdev->pm.backend.poweroff_wait, + is_poweroff_in_progress(kbdev)); +} +KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_complete); + +int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, + unsigned int flags) +{ + unsigned long irq_flags; + int ret; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + kbase_pm_lock(kbdev); + + /* A suspend won't happen during startup/insmod */ + KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + + /* Power up the GPU, don't enable IRQs as we are not ready to receive + * them. */ + ret = kbase_pm_init_hw(kbdev, flags); + if (ret) { + kbase_pm_unlock(kbdev); + return ret; + } + + kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = + kbdev->pm.debug_core_mask[1] = + kbdev->pm.debug_core_mask[2] = + kbdev->gpu_props.props.raw_props.shader_present; + + /* Pretend the GPU is active to prevent a power policy turning the GPU + * cores off */ + kbdev->pm.active_count = 1; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + /* Ensure cycle counter is off */ + kbdev->pm.backend.gpu_cycle_counter_requests = 0; + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + + /* We are ready to receive IRQ's now as power policy is set up, so + * enable them now. */ +#ifdef CONFIG_MALI_DEBUG + kbdev->pm.backend.driver_ready_for_irqs = true; +#endif + kbase_pm_enable_interrupts(kbdev); + + /* Turn on the GPU and any cores needed by the policy */ + kbase_pm_do_poweron(kbdev, false); + kbase_pm_unlock(kbdev); + + return 0; +} + +void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + mutex_lock(&kbdev->pm.lock); + kbase_pm_do_poweroff(kbdev); + mutex_unlock(&kbdev->pm.lock); +} + +KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt); + +void kbase_hwaccess_pm_term(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0); + + cancel_work_sync(&kbdev->pm.backend.hwcnt_disable_work); + + if (kbdev->pm.backend.hwcnt_disabled) { + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + + /* Free any resources the policy allocated */ + kbase_pm_state_machine_term(kbdev); + kbase_pm_policy_term(kbdev); + kbase_pm_ca_term(kbdev); + + /* Shut down the metrics subsystem */ + kbasep_pm_metrics_term(kbdev); + + destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq); +} + +void kbase_pm_power_changed(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + + kbase_backend_slot_update(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, + u64 new_core_mask_js0, u64 new_core_mask_js1, + u64 new_core_mask_js2) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->pm.lock); + + if (kbase_dummy_job_wa_enabled(kbdev)) { + dev_warn(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled"); + new_core_mask_js0 = kbdev->pm.debug_core_mask[0]; + } + + kbdev->pm.debug_core_mask[0] = new_core_mask_js0; + kbdev->pm.debug_core_mask[1] = new_core_mask_js1; + kbdev->pm.debug_core_mask[2] = new_core_mask_js2; + kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | + new_core_mask_js2; + + kbase_pm_update_dynamic_cores_onoff(kbdev); +} + +void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev) +{ + kbase_pm_update_active(kbdev); +} + +void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev) +{ + kbase_pm_update_active(kbdev); +} + +void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) +{ + /* Force power off the GPU and all cores (regardless of policy), only + * after the PM active count reaches zero (otherwise, we risk turning it + * off prematurely) */ + kbase_pm_lock(kbdev); + + kbase_pm_do_poweroff(kbdev); + + kbase_backend_timer_suspend(kbdev); + + kbase_pm_unlock(kbdev); + + kbase_pm_wait_for_poweroff_complete(kbdev); + + if (kbdev->pm.backend.callback_power_suspend) + kbdev->pm.backend.callback_power_suspend(kbdev); + + /* MALI_SEC_INTEGRATION */ + KBASE_KTRACE_ADD(kbdev, KBASE_DEVICE_PM_SUSPEND, NULL, 0u); +} + +void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) +{ + kbase_pm_lock(kbdev); + + kbdev->pm.suspending = false; +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbdev->pm.gpu_lost = false; +#endif + kbase_pm_do_poweron(kbdev, true); + /* MALI_SEC_INTEGRATION */ + wake_up(&kbdev->pm.suspending_wait); + + kbase_backend_timer_resume(kbdev); + + kbase_pm_unlock(kbdev); + /* MALI_SEC_INTEGRATION */ + KBASE_KTRACE_ADD(kbdev, KBASE_DEVICE_PM_RESUME, NULL, 0u); +} + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) +{ + unsigned long flags; + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + ktime_t end_timestamp = ktime_get(); + + /* Full GPU reset will have been done by hypervisor, so cancel */ + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); + + /* GPU is no longer mapped to VM. So no interrupts will be received + * and Mali registers have been replaced by dummy RAM + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock(&kbdev->mmu_mask_change); + kbdev->irq_reset_flush = true; + spin_unlock(&kbdev->mmu_mask_change); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_synchronize_irqs(kbdev); + kbase_flush_mmu_wqs(kbdev); + kbdev->irq_reset_flush = false; + + /* Clear all jobs running on the GPU */ + mutex_lock(&kbdev->pm.lock); + kbdev->pm.gpu_lost = true; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->protected_mode = false; + if (!kbdev->pm.backend.protected_entry_transition_override) + kbase_backend_reset(kbdev, &end_timestamp); + kbase_pm_metrics_update(kbdev, NULL); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Cancel any pending HWC dumps */ + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Wait for all threads keeping GPU active to complete */ + mutex_unlock(&kbdev->pm.lock); + wait_event(kbdev->pm.zero_active_count_wait, + kbdev->pm.active_count == 0); + mutex_lock(&kbdev->pm.lock); + + /* Update state to GPU off */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.shaders_desired = false; + kbdev->pm.backend.l2_desired = false; + backend->l2_state = KBASE_L2_OFF; + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; + kbdev->pm.backend.gpu_powered = false; + backend->poweroff_wait_in_progress = false; + KBASE_KTRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, 0); + wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); + kbase_gpu_cache_clean_wait_complete(kbdev); + backend->poweroff_wait_in_progress = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + wake_up(&kbdev->pm.backend.poweroff_wait); + mutex_unlock(&kbdev->pm.lock); +} +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_ca.c new file mode 100644 index 000000000000..e7eef2618ac1 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_ca.c @@ -0,0 +1,112 @@ +/* + * + * (C) COPYRIGHT 2013-2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel core availability APIs + */ + +#include +#include +#include +#ifdef CONFIG_MALI_NO_MALI +#include +#endif +#include + +int kbase_pm_ca_init(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_DEVFREQ + struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; + + if (kbdev->current_core_mask) + pm_backend->ca_cores_enabled = kbdev->current_core_mask; + else + pm_backend->ca_cores_enabled = + kbdev->gpu_props.props.raw_props.shader_present; +#endif + + return 0; +} + +void kbase_pm_ca_term(struct kbase_device *kbdev) +{ +} + +#ifdef CONFIG_MALI_DEVFREQ +void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) +{ + struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!(core_mask & kbdev->pm.debug_core_mask_all)) { + dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", + core_mask, kbdev->pm.debug_core_mask_all); + goto unlock; + } + + if (kbase_dummy_job_wa_enabled(kbdev)) { + dev_err(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled"); + goto unlock; + } + + pm_backend->ca_cores_enabled = core_mask; + + kbase_pm_update_state(kbdev); + +unlock: + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", + pm_backend->ca_cores_enabled); +} +#endif + +u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_DEVFREQ + struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; +#endif + + lockdep_assert_held(&kbdev->hwaccess_lock); + +#ifdef CONFIG_MALI_DEVFREQ + return pm_backend->ca_cores_enabled & kbdev->pm.debug_core_mask_all; +#else + return kbdev->gpu_props.props.raw_props.shader_present & + kbdev->pm.debug_core_mask_all; +#endif +} + +KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); + +u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + +#ifdef CONFIG_MALI_NO_MALI + return (((1ull) << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1); +#else + return kbdev->pm.backend.pm_shaders_core_mask; +#endif +} diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_ca.h new file mode 100644 index 000000000000..5423e96725b9 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_ca.h @@ -0,0 +1,89 @@ +/* + * + * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel core availability APIs + */ + +#ifndef _KBASE_PM_CA_H_ +#define _KBASE_PM_CA_H_ + +/** + * kbase_pm_ca_init - Initialize core availability framework + * + * Must be called before calling any other core availability function + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 if the core availability framework was successfully initialized, + * -errno otherwise + */ +int kbase_pm_ca_init(struct kbase_device *kbdev); + +/** + * kbase_pm_ca_term - Terminate core availability framework + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_ca_term(struct kbase_device *kbdev); + +/** + * kbase_pm_ca_get_core_mask - Get currently available shaders core mask + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Returns a mask of the currently available shader cores. + * Calls into the core availability policy + * + * Return: The bit mask of available cores + */ +u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev); + +/** + * kbase_pm_ca_update_core_status - Update core status + * + * @kbdev: The kbase device structure for the device (must be + * a valid pointer) + * @cores_ready: The bit mask of cores ready for job submission + * @cores_transitioning: The bit mask of cores that are transitioning power + * state + * + * Update core availability policy with current core power status + * + * Calls into the core availability policy + */ +void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, + u64 cores_transitioning); + +/** + * kbase_pm_ca_get_instr_core_mask - Get the PM state sync-ed shaders core mask + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Returns a mask of the PM state synchronised shader cores for arranging + * HW performance counter dumps + * + * Return: The bit mask of PM state synchronised cores + */ +u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev); + +#endif /* _KBASE_PM_CA_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_ca_devfreq.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_ca_devfreq.h new file mode 100644 index 000000000000..f67ec650c981 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_ca_devfreq.h @@ -0,0 +1,60 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * A core availability policy for use with devfreq, where core masks are + * associated with OPPs. + */ + +#ifndef MALI_KBASE_PM_CA_DEVFREQ_H +#define MALI_KBASE_PM_CA_DEVFREQ_H + +/** + * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy + * + * This contains data that is private to the devfreq core availability + * policy. + * + * @cores_desired: Cores that the policy wants to be available + * @cores_enabled: Cores that the policy is currently returning as available + * @cores_used: Cores currently powered or transitioning + */ +struct kbasep_pm_ca_policy_devfreq { + u64 cores_desired; + u64 cores_enabled; + u64 cores_used; +}; + +extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops; + +/** + * kbase_devfreq_set_core_mask - Set core mask for policy to use + * @kbdev: Device pointer + * @core_mask: New core mask + * + * The new core mask will have immediate effect if the GPU is powered, or will + * take effect when it is next powered on. + */ +void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); + +#endif /* MALI_KBASE_PM_CA_DEVFREQ_H */ + diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_coarse_demand.c new file mode 100644 index 000000000000..9eef44ad877f --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_coarse_demand.c @@ -0,0 +1,66 @@ +/* + * + * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * "Coarse Demand" power management policy + */ + +#include +#include + +static bool coarse_demand_shaders_needed(struct kbase_device *kbdev) +{ + return kbase_pm_is_active(kbdev); +} + +static bool coarse_demand_get_core_active(struct kbase_device *kbdev) +{ + return kbase_pm_is_active(kbdev); +} + +static void coarse_demand_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static void coarse_demand_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +/* The struct kbase_pm_policy structure for the demand power policy. + * + * This is the static structure that defines the demand power policy's callback + * and name. + */ +const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = { + "coarse_demand", /* name */ + coarse_demand_init, /* init */ + coarse_demand_term, /* term */ + coarse_demand_shaders_needed, /* shaders_needed */ + coarse_demand_get_core_active, /* get_core_active */ + KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops); diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_coarse_demand.h new file mode 100644 index 000000000000..304e5d7fa32d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_coarse_demand.h @@ -0,0 +1,69 @@ +/* + * + * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * "Coarse Demand" power management policy + */ + +#ifndef MALI_KBASE_PM_COARSE_DEMAND_H +#define MALI_KBASE_PM_COARSE_DEMAND_H + +/** + * DOC: + * The "Coarse" demand power management policy has the following + * characteristics: + * - When KBase indicates that the GPU will be powered up, but we don't yet + * know which Job Chains are to be run: + * - Shader Cores are powered up, regardless of whether or not they will be + * needed later. + * - When KBase indicates that Shader Cores are needed to submit the currently + * queued Job Chains: + * - Shader Cores are kept powered, regardless of whether or not they will + * be needed + * - When KBase indicates that the GPU need not be powered: + * - The Shader Cores are powered off, and the GPU itself is powered off too. + * + * @note: + * - KBase indicates the GPU will be powered up when it has a User Process that + * has just started to submit Job Chains. + * - KBase indicates the GPU need not be powered when all the Job Chains from + * User Processes have finished, and it is waiting for a User Process to + * submit some more Job Chains. + */ + +/** + * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand + * policy + * + * This contains data that is private to the coarse demand power policy. + * + * @dummy: Dummy member - no state needed + */ +struct kbasep_pm_policy_coarse_demand { + int dummy; +}; + +extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops; + +#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_defs.h new file mode 100644 index 000000000000..872578f8943d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_defs.h @@ -0,0 +1,532 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Backend-specific Power Manager definitions + */ + +#ifndef _KBASE_PM_HWACCESS_DEFS_H_ +#define _KBASE_PM_HWACCESS_DEFS_H_ + +#include "mali_kbase_pm_always_on.h" +#include "mali_kbase_pm_coarse_demand.h" +#if !MALI_CUSTOMER_RELEASE +#include "mali_kbase_pm_always_on_demand.h" +#endif + +/* Forward definition - see mali_kbase.h */ +struct kbase_device; +struct kbase_jd_atom; + +/** + * enum kbase_pm_core_type - The types of core in a GPU. + * + * These enumerated values are used in calls to + * - kbase_pm_get_present_cores() + * - kbase_pm_get_active_cores() + * - kbase_pm_get_trans_cores() + * - kbase_pm_get_ready_cores(). + * + * They specify which type of core should be acted on. These values are set in + * a manner that allows core_type_to_reg() function to be simpler and more + * efficient. + * + * @KBASE_PM_CORE_L2: The L2 cache + * @KBASE_PM_CORE_SHADER: Shader cores + * @KBASE_PM_CORE_TILER: Tiler cores + * @KBASE_PM_CORE_STACK: Core stacks + */ +enum kbase_pm_core_type { + KBASE_PM_CORE_L2 = L2_PRESENT_LO, + KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO, + KBASE_PM_CORE_TILER = TILER_PRESENT_LO, + KBASE_PM_CORE_STACK = STACK_PRESENT_LO +}; + +/** + * enum kbase_l2_core_state - The states used for the L2 cache & tiler power + * state machine. + * + * @KBASE_L2_OFF: The L2 cache and tiler are off + * @KBASE_L2_PEND_ON: The L2 cache and tiler are powering on + * @KBASE_L2_RESTORE_CLOCKS: The GPU clock is restored. Conditionally used. + * @KBASE_L2_ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being + * enabled + * @KBASE_L2_ON: The L2 cache and tiler are on, and hwcnt is enabled + * @KBASE_L2_ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being + * disabled + * @KBASE_L2_SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest + * clock. Conditionally used. + * @KBASE_L2_POWER_DOWN: The L2 cache and tiler are about to be powered off + * @KBASE_L2_PEND_OFF: The L2 cache and tiler are powering off + * @KBASE_L2_RESET_WAIT: The GPU is resetting, L2 cache and tiler power state + * are unknown + */ +enum kbase_l2_core_state { +#define KBASEP_L2_STATE(n) KBASE_L2_ ## n, +#include "mali_kbase_pm_l2_states.h" +#undef KBASEP_L2_STATE +}; + +/** + * enum kbase_shader_core_state - The states used for the shaders' state machine. + * + * @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off + * @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have + * been requested to power on and hwcnt + * is being disabled + * @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been + * requested to power on. Or after doing + * partial shader on/off, checking whether + * it's the desired state. + * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on, and hwcnt + * already enabled. + * @KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: The shaders and core stacks + * are on, hwcnt disabled, and checks + * to powering down or re-enabling + * hwcnt. + * @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to + * power off, but they remain on for the + * duration of the hysteresis timer + * @KBASE_SHADERS_WAIT_GPU_IDLE: The shaders partial poweroff needs to reach + * a state where jobs on the GPU are finished + * including jobs currently running and in the + * GPU queue because of GPU2017-861 + * @KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired + * @KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: The core stacks are on and the + * level 2 cache is being flushed. + * @KBASE_SHADERS_READY_OFF_CORESTACK_ON: The core stacks are on and the shaders + * are ready to be powered off. + * @KBASE_SHADERS_PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders + * have been requested to power off + * @KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks + * have been requested to power off + * @KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are + * off, but the tick timer + * cancellation is still + * pending. + * @KBASE_SHADERS_RESET_WAIT: The GPU is resetting, shader and core stack power + * states are unknown + */ +enum kbase_shader_core_state { +#define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n, +#include "mali_kbase_pm_shader_states.h" +#undef KBASEP_SHADER_STATE +}; + +/** + * struct kbasep_pm_metrics - Metrics data collected for use by the power + * management framework. + * + * @time_busy: number of ns the GPU was busy executing jobs since the + * @time_period_start timestamp. + * @time_idle: number of ns since time_period_start the GPU was not executing + * jobs since the @time_period_start timestamp. + * @busy_cl: number of ns the GPU was busy executing CL jobs. Note that + * if two CL jobs were active for 400ns, this value would be updated + * with 800. + * @busy_gl: number of ns the GPU was busy executing GL jobs. Note that + * if two GL jobs were active for 400ns, this value would be updated + * with 800. + */ +struct kbasep_pm_metrics { + u32 time_busy; + u32 time_idle; + u32 busy_cl[2]; + u32 busy_gl; +}; + +/** + * struct kbasep_pm_metrics_state - State required to collect the metrics in + * struct kbasep_pm_metrics + * @time_period_start: time at which busy/idle measurements started + * @gpu_active: true when the GPU is executing jobs. false when + * not. Updated when the job scheduler informs us a job in submitted + * or removed from a GPU slot. + * @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device. + * @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. + * @lock: spinlock protecting the kbasep_pm_metrics_data structure + * @platform_data: pointer to data controlled by platform specific code + * @kbdev: pointer to kbase device for which metrics are collected + * @values: The current values of the power management metrics. The + * kbase_pm_get_dvfs_metrics() function is used to compare these + * current values with the saved values from a previous invocation. + * @timer: timer to regularly make DVFS decisions based on the power + * management metrics. + * @timer_active: boolean indicating @timer is running + * @dvfs_last: values of the PM metrics from the last DVFS tick + * @dvfs_diff: different between the current and previous PM metrics. + */ +struct kbasep_pm_metrics_state { + ktime_t time_period_start; + bool gpu_active; + u32 active_cl_ctx[2]; + u32 active_gl_ctx[3]; + spinlock_t lock; +/* MALI_SEC_INTEGRATION */ +/* #ifdef CONFIG_MALI_MIDGARD_DVFS */ + struct hrtimer timer; + bool timer_active; +/* MALI_SEC_INTEGRATION */ + struct delayed_work work; +/* #endif */ + + void *platform_data; + struct kbase_device *kbdev; + + struct kbasep_pm_metrics values; + +#ifdef CONFIG_MALI_MIDGARD_DVFS + struct hrtimer timer; + bool timer_active; + struct kbasep_pm_metrics dvfs_last; + struct kbasep_pm_metrics dvfs_diff; +#endif + +/* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_SEC_CL_BOOST + atomic_t time_compute_jobs, time_vertex_jobs, time_fragment_jobs; + bool is_full_compute_util; /* Only compute utilisation is 100% */ +#endif +}; + +/** + * struct kbasep_pm_tick_timer_state - State for the shader hysteresis timer + * @wq: Work queue to wait for the timer to stopped + * @work: Work item which cancels the timer + * @timer: Timer for powering off the shader cores + * @configured_interval: Period of GPU poweroff timer + * @configured_ticks: User-configured number of ticks to wait after the shader + * power down request is received before turning off the cores + * @remaining_ticks: Number of remaining timer ticks until shaders are powered off + * @cancel_queued: True if the cancellation work item has been queued. This is + * required to ensure that it is not queued twice, e.g. after + * a reset, which could cause the timer to be incorrectly + * cancelled later by a delayed workitem. + * @needed: Whether the timer should restart itself + */ +struct kbasep_pm_tick_timer_state { + struct workqueue_struct *wq; + struct work_struct work; + struct hrtimer timer; + + ktime_t configured_interval; + unsigned int configured_ticks; + unsigned int remaining_ticks; + + bool cancel_queued; + bool needed; +}; + +union kbase_pm_policy_data { + struct kbasep_pm_policy_always_on always_on; + struct kbasep_pm_policy_coarse_demand coarse_demand; +#if !MALI_CUSTOMER_RELEASE + struct kbasep_pm_policy_always_on_demand always_on_demand; +#endif +}; + +/** + * struct kbase_pm_backend_data - Data stored per device for power management. + * + * This structure contains data for the power management framework. There is one + * instance of this structure per device in the system. + * + * @pm_current_policy: The policy that is currently actively controlling the + * power state. + * @pm_policy_data: Private data for current PM policy + * @reset_done: Flag when a reset is complete + * @reset_done_wait: Wait queue to wait for changes to @reset_done + * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter + * users + * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests + * @gpu_in_desired_state_wait: Wait queue set when the GPU is in the desired + * state according to the L2 and shader power state + * machines + * @gpu_powered: Set to true when the GPU is powered and register + * accesses are possible, false otherwise. Access to this + * variable should be protected by: both the hwaccess_lock + * spinlock and the pm.lock mutex for writes; or at least + * one of either lock for reads. + * @pm_shaders_core_mask: Shader PM state synchronised shaders core mask. It + * holds the cores enabled in a hardware counters dump, + * and may differ from @shaders_avail when under different + * states and transitions. + * @cg1_disabled: Set if the policy wants to keep the second core group + * powered off + * @driver_ready_for_irqs: Debug state indicating whether sufficient + * initialization of the driver has occurred to handle + * IRQs + * @metrics: Structure to hold metrics for the GPU + * @shader_tick_timer: Structure to hold the shader poweroff tick timer state + * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress. + * hwaccess_lock must be held when accessing + * @invoke_poweroff_wait_wq_when_l2_off: flag indicating that the L2 power state + * machine should invoke the poweroff + * worker after the L2 has turned off. + * @poweron_required: true if a GPU power on is required. Should only be set + * when poweroff_wait_in_progress is true, and therefore the + * GPU can not immediately be powered on. pm.lock must be + * held when accessing + * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off + * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq + * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete + * @callback_power_on: Callback when the GPU needs to be turned on. See + * &struct kbase_pm_callback_conf + * @callback_power_off: Callback when the GPU may be turned off. See + * &struct kbase_pm_callback_conf + * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to + * be turned off. See &struct kbase_pm_callback_conf + * @callback_power_resume: Callback when a resume occurs and the GPU needs to + * be turned on. See &struct kbase_pm_callback_conf + * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See + * &struct kbase_pm_callback_conf + * @callback_power_runtime_off: Callback when the GPU may be turned off. See + * &struct kbase_pm_callback_conf + * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See + * &struct kbase_pm_callback_conf + * @callback_soft_reset: Optional callback to software reset the GPU. See + * &struct kbase_pm_callback_conf + * @ca_cores_enabled: Cores that are currently available + * @l2_state: The current state of the L2 cache state machine. See + * &enum kbase_l2_core_state + * @l2_desired: True if the L2 cache should be powered on by the L2 cache state + * machine + * @l2_always_on: If true, disable powering down of l2 cache. + * @shaders_state: The current state of the shader state machine. + * @shaders_avail: This is updated by the state machine when it is in a state + * where it can write to the SHADER_PWRON or PWROFF registers + * to have the same set of available cores as specified by + * @shaders_desired_mask. So it would eventually have the same + * value as @shaders_desired_mask and would precisely indicate + * the cores that are currently available. This is internal to + * shader state machine and should *not* be modified elsewhere. + * @shaders_desired_mask: This is updated by the state machine when it is in + * a state where it can handle changes to the core + * availability (either by DVFS or sysfs). This is + * internal to the shader state machine and should + * *not* be modified elsewhere. + * @shaders_desired: True if the PM active count or power policy requires the + * shader cores to be on. This is used as an input to the + * shader power state machine. The current state of the + * cores may be different, but there should be transitions in + * progress that will eventually achieve this state (assuming + * that the policy doesn't change its mind in the mean time). + * @in_reset: True if a GPU is resetting and normal power manager operation is + * suspended + * @partial_shaderoff: True if we want to partial power off shader cores, + * it indicates a partial shader core off case, + * do some special operation for such case like flush + * L2 cache because of GPU2017-861 + * @protected_entry_transition_override : True if GPU reset is being used + * before entering the protected mode and so + * the reset handling behaviour is being + * overridden. + * @protected_transition_override : True if a protected mode transition is in + * progress and is overriding power manager + * behaviour. + * @protected_l2_override : Non-zero if the L2 cache is required during a + * protected mode transition. Has no effect if not + * transitioning. + * @hwcnt_desired: True if we want GPU hardware counters to be enabled. + * @hwcnt_disabled: True if GPU hardware counters are not enabled. + * @hwcnt_disable_work: Work item to disable GPU hardware counters, used if + * atomic disable is not possible. + * @gpu_clock_suspend_freq: 'opp-mali-errata-1485982' clock in opp table + * for safe L2 power cycle. + * If no opp-mali-errata-1485982 specified, + * the slowest clock will be taken. + * @gpu_clock_slow_down_wa: If true, slow down GPU clock during L2 power cycle. + * @gpu_clock_slow_down_desired: True if we want lower GPU clock + * for safe L2 power cycle. False if want GPU clock + * to back to normalized one. This is updated only + * in L2 state machine, kbase_pm_l2_update_state. + * @gpu_clock_slowed_down: During L2 power cycle, + * True if gpu clock is set at lower frequency + * for safe L2 power down, False if gpu clock gets + * restored to previous speed. This is updated only in + * work function, kbase_pm_gpu_clock_control_worker. + * @gpu_clock_control_work: work item to set GPU clock during L2 power cycle + * using gpu_clock_control + * + * Note: + * During an IRQ, @pm_current_policy can be NULL when the policy is being + * changed with kbase_pm_set_policy(). The change is protected under + * kbase_device.pm.pcower_change_lock. Direct access to this from IRQ context + * must therefore check for NULL. If NULL, then kbase_pm_set_policy() will + * re-issue the policy functions that would have been done under IRQ. + */ +struct kbase_pm_backend_data { + const struct kbase_pm_policy *pm_current_policy; + union kbase_pm_policy_data pm_policy_data; + bool reset_done; + wait_queue_head_t reset_done_wait; + int gpu_cycle_counter_requests; + spinlock_t gpu_cycle_counter_requests_lock; + + wait_queue_head_t gpu_in_desired_state_wait; + + bool gpu_powered; + + u64 pm_shaders_core_mask; + + bool cg1_disabled; + +#ifdef CONFIG_MALI_DEBUG + bool driver_ready_for_irqs; +#endif /* CONFIG_MALI_DEBUG */ + + struct kbasep_pm_metrics_state metrics; + + struct kbasep_pm_tick_timer_state shader_tick_timer; + + bool poweroff_wait_in_progress; + bool invoke_poweroff_wait_wq_when_l2_off; + bool poweron_required; + + struct workqueue_struct *gpu_poweroff_wait_wq; + struct work_struct gpu_poweroff_wait_work; + + wait_queue_head_t poweroff_wait; + + int (*callback_power_on)(struct kbase_device *kbdev); + void (*callback_power_off)(struct kbase_device *kbdev); + void (*callback_power_suspend)(struct kbase_device *kbdev); + void (*callback_power_resume)(struct kbase_device *kbdev); + int (*callback_power_runtime_on)(struct kbase_device *kbdev); + void (*callback_power_runtime_off)(struct kbase_device *kbdev); + int (*callback_power_runtime_idle)(struct kbase_device *kbdev); + int (*callback_soft_reset)(struct kbase_device *kbdev); + /* MALI_SEC_INTEGRATION */ + int (*callback_power_dvfs_on)(struct kbase_device *kbdev); + + u64 ca_cores_enabled; + + enum kbase_l2_core_state l2_state; + enum kbase_shader_core_state shaders_state; + u64 shaders_avail; + u64 shaders_desired_mask; + bool l2_desired; + bool l2_always_on; + bool shaders_desired; + + bool in_reset; + + bool partial_shaderoff; + + bool protected_entry_transition_override; + bool protected_transition_override; + int protected_l2_override; + + bool hwcnt_desired; + bool hwcnt_disabled; + struct work_struct hwcnt_disable_work; + + u64 gpu_clock_suspend_freq; + bool gpu_clock_slow_down_wa; + bool gpu_clock_slow_down_desired; + bool gpu_clock_slowed_down; + struct work_struct gpu_clock_control_work; +}; + + +/* List of policy IDs */ +enum kbase_pm_policy_id { + KBASE_PM_POLICY_ID_COARSE_DEMAND, +#if !MALI_CUSTOMER_RELEASE + KBASE_PM_POLICY_ID_ALWAYS_ON_DEMAND, +#endif + KBASE_PM_POLICY_ID_ALWAYS_ON +}; + +/** + * struct kbase_pm_policy - Power policy structure. + * + * Each power policy exposes a (static) instance of this structure which + * contains function pointers to the policy's methods. + * + * @name: The name of this policy + * @init: Function called when the policy is selected + * @term: Function called when the policy is unselected + * @shaders_needed: Function called to find out if shader cores are needed + * @get_core_active: Function called to get the current overall GPU power + * state + * @id: Field indicating an ID for this policy. This is not + * necessarily the same as its index in the list returned + * by kbase_pm_list_policies(). + * It is used purely for debugging. + */ +struct kbase_pm_policy { + char *name; + + /** + * Function called when the policy is selected + * + * This should initialize the kbdev->pm.pm_policy_data structure. It + * should not attempt to make any changes to hardware state. + * + * It is undefined what state the cores are in when the function is + * called. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + */ + void (*init)(struct kbase_device *kbdev); + + /** + * Function called when the policy is unselected. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + */ + void (*term)(struct kbase_device *kbdev); + + /** + * Function called to find out if shader cores are needed + * + * This needs to at least satisfy kbdev->pm.backend.shaders_desired, + * and so must never return false when shaders_desired is true. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * + * Return: true if shader cores are needed, false otherwise + */ + bool (*shaders_needed)(struct kbase_device *kbdev); + + /** + * Function called to get the current overall GPU power state + * + * This function must meet or exceed the requirements for power + * indicated by kbase_pm_is_active(). + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * + * Return: true if the GPU should be powered, false otherwise + */ + bool (*get_core_active)(struct kbase_device *kbdev); + + enum kbase_pm_policy_id id; +}; + +#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_driver.c new file mode 100644 index 000000000000..0290e1db7025 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_driver.c @@ -0,0 +1,2269 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel Power Management hardware control + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + +#include +#include + +#include + +#if IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY) +#include +#endif + +#ifdef CONFIG_MALI_CORESTACK +bool corestack_driver_control = true; +#else +bool corestack_driver_control; /* Default value of 0/false */ +#endif +module_param(corestack_driver_control, bool, 0444); +MODULE_PARM_DESC(corestack_driver_control, + "Let the driver power on/off the GPU core stack independently " + "without involving the Power Domain Controller. This should " + "only be enabled on platforms for which integration of the PDC " + "to the Mali GPU is known to be problematic."); +KBASE_EXPORT_TEST_API(corestack_driver_control); + +/** + * enum kbasep_pm_action - Actions that can be performed on a core. + * + * This enumeration is private to the file. Its values are set to allow + * core_type_to_reg() function, which decodes this enumeration, to be simpler + * and more efficient. + * + * @ACTION_PRESENT: The cores that are present + * @ACTION_READY: The cores that are ready + * @ACTION_PWRON: Power on the cores specified + * @ACTION_PWROFF: Power off the cores specified + * @ACTION_PWRTRANS: The cores that are transitioning + * @ACTION_PWRACTIVE: The cores that are active + */ +enum kbasep_pm_action { + ACTION_PRESENT = 0, + ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO), + ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO), + ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO), + ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO), + ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO) +}; + +static u64 kbase_pm_get_state( + struct kbase_device *kbdev, + enum kbase_pm_core_type core_type, + enum kbasep_pm_action action); + +bool kbase_pm_is_l2_desired(struct kbase_device *kbdev) +{ + if (kbdev->pm.backend.protected_entry_transition_override) + return false; + + if (kbdev->pm.backend.protected_transition_override && + kbdev->pm.backend.protected_l2_override) + return true; + + if (kbdev->pm.backend.protected_transition_override && + !kbdev->pm.backend.shaders_desired) + return false; + + return kbdev->pm.backend.l2_desired; +} + +void kbase_pm_protected_override_enable(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->pm.backend.protected_transition_override = true; +} +void kbase_pm_protected_override_disable(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->pm.backend.protected_transition_override = false; +} + +int kbase_pm_protected_entry_override_enable(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON(!kbdev->protected_mode_transition); + + if (kbdev->pm.backend.l2_always_on && + (kbdev->system_coherency == COHERENCY_ACE)) { + WARN_ON(kbdev->pm.backend.protected_entry_transition_override); + + /* + * If there is already a GPU reset pending then wait for it to + * complete before initiating a special reset for protected + * mode entry. + */ + if (kbase_reset_gpu_silent(kbdev)) + return -EAGAIN; + + kbdev->pm.backend.protected_entry_transition_override = true; + } + + return 0; +} + +void kbase_pm_protected_entry_override_disable(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON(!kbdev->protected_mode_transition); + + if (kbdev->pm.backend.l2_always_on && + (kbdev->system_coherency == COHERENCY_ACE)) { + WARN_ON(!kbdev->pm.backend.protected_entry_transition_override); + + kbdev->pm.backend.protected_entry_transition_override = false; + } +} + +void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (override) { + kbdev->pm.backend.protected_l2_override++; + WARN_ON(kbdev->pm.backend.protected_l2_override <= 0); + } else { + kbdev->pm.backend.protected_l2_override--; + WARN_ON(kbdev->pm.backend.protected_l2_override < 0); + } + + kbase_pm_update_state(kbdev); +} + +/** + * core_type_to_reg - Decode a core type and action to a register. + * + * Given a core type (defined by kbase_pm_core_type) and an action (defined + * by kbasep_pm_action) this function will return the register offset that + * will perform the action on the core type. The register returned is the _LO + * register and an offset must be applied to use the _HI register. + * + * @core_type: The type of core + * @action: The type of action + * + * Return: The register offset of the _LO register that performs an action of + * type @action on a core of type @core_type. + */ +static u32 core_type_to_reg(enum kbase_pm_core_type core_type, + enum kbasep_pm_action action) +{ + if (corestack_driver_control) { + if (core_type == KBASE_PM_CORE_STACK) { + switch (action) { + case ACTION_PRESENT: + return STACK_PRESENT_LO; + case ACTION_READY: + return STACK_READY_LO; + case ACTION_PWRON: + return STACK_PWRON_LO; + case ACTION_PWROFF: + return STACK_PWROFF_LO; + case ACTION_PWRTRANS: + return STACK_PWRTRANS_LO; + default: + WARN(1, "Invalid action for core type\n"); + } + } + } + + return (u32)core_type + (u32)action; +} + +#ifdef CONFIG_ARM64 +static void mali_cci_flush_l2(struct kbase_device *kbdev) +{ + const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED; + u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + u32 raw; + + /* + * Note that we don't take the cache flush mutex here since + * we expect to be the last user of the L2, all other L2 users + * would have dropped their references, to initiate L2 power + * down, L2 power down being the only valid place for this + * to be called from. + */ + + kbase_reg_write(kbdev, + GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES); + + raw = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); + + /* Wait for cache flush to complete before continuing, exit on + * gpu resets or loop expiry. */ + while (((raw & mask) == 0) && --loops) { + raw = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); + } +} +#endif + +/** + * kbase_pm_invoke - Invokes an action on a core set + * + * This function performs the action given by @action on a set of cores of a + * type given by @core_type. It is a static function used by + * kbase_pm_transition_core_type() + * + * @kbdev: The kbase device structure of the device + * @core_type: The type of core that the action should be performed on + * @cores: A bit mask of cores to perform the action on (low 32 bits) + * @action: The action to perform on the cores + */ +static void kbase_pm_invoke(struct kbase_device *kbdev, + enum kbase_pm_core_type core_type, + u64 cores, + enum kbasep_pm_action action) +{ + u32 reg; + u32 lo = cores & 0xFFFFFFFF; + u32 hi = (cores >> 32) & 0xFFFFFFFF; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + reg = core_type_to_reg(core_type, action); + + KBASE_DEBUG_ASSERT(reg); + + if (cores) { + u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); + + if (action == ACTION_PWRON) + state |= cores; + else if (action == ACTION_PWROFF) + state &= ~cores; + KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state); + } + + /* Tracing */ + if (cores) { + if (action == ACTION_PWRON) + switch (core_type) { + case KBASE_PM_CORE_SHADER: + KBASE_KTRACE_ADD(kbdev, PM_PWRON, NULL, cores); + break; + case KBASE_PM_CORE_TILER: + KBASE_KTRACE_ADD(kbdev, PM_PWRON_TILER, NULL, cores); + break; + case KBASE_PM_CORE_L2: + KBASE_KTRACE_ADD(kbdev, PM_PWRON_L2, NULL, cores); + break; + default: + break; + } + else if (action == ACTION_PWROFF) + switch (core_type) { + case KBASE_PM_CORE_SHADER: + KBASE_KTRACE_ADD(kbdev, PM_PWROFF, NULL, cores); + break; + case KBASE_PM_CORE_TILER: + KBASE_KTRACE_ADD(kbdev, PM_PWROFF_TILER, NULL, cores); + break; + case KBASE_PM_CORE_L2: + KBASE_KTRACE_ADD(kbdev, PM_PWROFF_L2, NULL, cores); + /* disable snoops before L2 is turned off */ + kbase_pm_cache_snoop_disable(kbdev); + break; + default: + break; + } + } + + if (kbase_dummy_job_wa_enabled(kbdev) && + action == ACTION_PWRON && + core_type == KBASE_PM_CORE_SHADER && + !(kbdev->dummy_job_wa.flags & + KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER)) { + kbase_dummy_job_wa_execute(kbdev, cores); + } else { + if (lo != 0) + kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo); + if (hi != 0) + kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi); + } +} + +/** + * kbase_pm_get_state - Get information about a core set + * + * This function gets information (chosen by @action) about a set of cores of + * a type given by @core_type. It is a static function used by + * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and + * kbase_pm_get_ready_cores(). + * + * @kbdev: The kbase device structure of the device + * @core_type: The type of core that the should be queried + * @action: The property of the cores to query + * + * Return: A bit mask specifying the state of the cores + */ +static u64 kbase_pm_get_state(struct kbase_device *kbdev, + enum kbase_pm_core_type core_type, + enum kbasep_pm_action action) +{ + u32 reg; + u32 lo, hi; + + reg = core_type_to_reg(core_type, action); + + KBASE_DEBUG_ASSERT(reg); + + lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg)); + hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4)); + + return (((u64) hi) << 32) | ((u64) lo); +} + +/** + * kbase_pm_get_present_cores - Get the cores that are present + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of the cores that are present + */ +u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + switch (type) { + case KBASE_PM_CORE_L2: + return kbdev->gpu_props.props.raw_props.l2_present; + case KBASE_PM_CORE_SHADER: + return kbdev->gpu_props.props.raw_props.shader_present; + case KBASE_PM_CORE_TILER: + return kbdev->gpu_props.props.raw_props.tiler_present; + case KBASE_PM_CORE_STACK: + return kbdev->gpu_props.props.raw_props.stack_present; + default: + break; + } + KBASE_DEBUG_ASSERT(0); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores); + +/** + * kbase_pm_get_active_cores - Get the cores that are "active" + * (busy processing work) + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of cores that are active + */ +u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE); +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores); + +/** + * kbase_pm_get_trans_cores - Get the cores that are transitioning between + * power states + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of cores that are transitioning + */ +u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS); +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores); + +/** + * kbase_pm_get_ready_cores - Get the cores that are powered on + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of cores that are ready (powered on) + */ +u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + u64 result; + + result = kbase_pm_get_state(kbdev, type, ACTION_READY); + + switch (type) { + case KBASE_PM_CORE_SHADER: + KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED, NULL, result); + break; + case KBASE_PM_CORE_TILER: + KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, result); + break; + case KBASE_PM_CORE_L2: + KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, result); + break; + default: + break; + } + + return result; +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores); + +static void kbase_pm_trigger_hwcnt_disable(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* See if we can get away with disabling hwcnt + * atomically, otherwise kick off a worker. + */ + if (kbase_hwcnt_context_disable_atomic(kbdev->hwcnt_gpu_ctx)) { + backend->hwcnt_disabled = true; + } else { +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, + &backend->hwcnt_disable_work); +#else + queue_work(system_highpri_wq, + &backend->hwcnt_disable_work); +#endif + } +} + +static void kbase_pm_l2_config_override(struct kbase_device *kbdev) +{ + u32 val; + + /* + * Skip if it is not supported + */ + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) + return; + + /* + * Skip if size and hash are not given explicitly, + * which means default values are used. + */ + if ((kbdev->l2_size_override == 0) && (kbdev->l2_hash_override == 0)) + return; + + val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); + + if (kbdev->l2_size_override) { + val &= ~L2_CONFIG_SIZE_MASK; + val |= (kbdev->l2_size_override << L2_CONFIG_SIZE_SHIFT); + } + + if (kbdev->l2_hash_override) { + val &= ~L2_CONFIG_HASH_MASK; + val |= (kbdev->l2_hash_override << L2_CONFIG_HASH_SHIFT); + } + + dev_dbg(kbdev->dev, "Program 0x%x to L2_CONFIG\n", val); + + /* Write L2_CONFIG to override */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), val); +} + +static void kbase_pm_control_gpu_clock(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *const backend = &kbdev->pm.backend; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + queue_work(system_wq, &backend->gpu_clock_control_work); +} + +static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) +{ + const char *const strings[] = { +#define KBASEP_L2_STATE(n) #n, +#include "mali_kbase_pm_l2_states.h" +#undef KBASEP_L2_STATE + }; + if (WARN_ON((size_t)state >= ARRAY_SIZE(strings))) + return "Bad level 2 cache state"; + else + return strings[state]; +} + +static int kbase_pm_l2_update_state(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present; + u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; + enum kbase_l2_core_state prev_state; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + do { + /* Get current state */ + u64 l2_trans = kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_L2); + u64 l2_ready = kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_L2); + u64 tiler_trans = kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_TILER); + u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_TILER); + + /* + * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores + * are vulnerable to corruption if gpu is lost + */ + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + /* mask off ready from trans in case transitions finished + * between the register reads + */ + l2_trans &= ~l2_ready; + tiler_trans &= ~tiler_ready; + + prev_state = backend->l2_state; + + switch (backend->l2_state) { + case KBASE_L2_OFF: + if (kbase_pm_is_l2_desired(kbdev)) { + /* + * Set the desired config for L2 before powering + * it on + */ + kbase_pm_l2_config_override(kbdev); + + /* L2 is required, power on. Powering on the + * tiler will also power the first L2 cache. + */ + kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, + tiler_present, ACTION_PWRON); + + /* If we have more than one L2 cache then we + * must power them on explicitly. + */ + if (l2_present != 1) + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, + l2_present & ~1, + ACTION_PWRON); + backend->l2_state = KBASE_L2_PEND_ON; + } + break; + + case KBASE_L2_PEND_ON: + if (!l2_trans && l2_ready == l2_present && !tiler_trans + && tiler_ready == tiler_present) { + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, tiler_ready); + /* + * Ensure snoops are enabled after L2 is powered + * up. Note that kbase keeps track of the snoop + * state, so safe to repeatedly call. + */ + kbase_pm_cache_snoop_enable(kbdev); + + /* With the L2 enabled, we can now enable + * hardware counters. + */ + if (kbdev->pm.backend.gpu_clock_slow_down_wa) + backend->l2_state = + KBASE_L2_RESTORE_CLOCKS; + else + backend->l2_state = + KBASE_L2_ON_HWCNT_ENABLE; + + /* Now that the L2 is on, the shaders can start + * powering on if they're required. The obvious + * way to do this would be to call + * kbase_pm_shaders_update_state() here. + * However, that would make the two state + * machines mutually recursive, as the opposite + * would be needed for powering down. Instead, + * callers of this function should use the + * kbase_pm_update_state() wrapper, which will + * call the shader state machine immediately + * after the L2 (for power up), or + * automatically re-invoke the L2 state machine + * when the shaders power down. + */ + } + break; + + case KBASE_L2_RESTORE_CLOCKS: + /* We always assume only GPUs being affected by + * BASE_HW_ISSUE_GPU2017_1336 fall into this state + */ + WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_slow_down_wa); + + /* If L2 not needed, we need to make sure cancellation + * of any previously issued work to restore GPU clock. + * For it, move to KBASE_L2_SLOW_DOWN_CLOCKS state. + */ + if (!kbase_pm_is_l2_desired(kbdev)) { + backend->l2_state = KBASE_L2_SLOW_DOWN_CLOCKS; + break; + } + + backend->gpu_clock_slow_down_desired = false; + if (backend->gpu_clock_slowed_down) + kbase_pm_control_gpu_clock(kbdev); + else + backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; + break; + + case KBASE_L2_ON_HWCNT_ENABLE: + backend->hwcnt_desired = true; + if (backend->hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + backend->hwcnt_disabled = false; + } + backend->l2_state = KBASE_L2_ON; + break; + + case KBASE_L2_ON: + if (!kbase_pm_is_l2_desired(kbdev)) { + /* Do not power off L2 until the shaders and + * core stacks are off. + */ + if (backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + break; + + /* We need to make sure hardware counters are + * disabled before powering down the L2, to + * prevent loss of data. + * + * We waited until after the cores were powered + * down to prevent ping-ponging between hwcnt + * enabled and disabled, which would have + * happened if userspace submitted more work + * while we were trying to power down. + */ + backend->l2_state = KBASE_L2_ON_HWCNT_DISABLE; + } + break; + + case KBASE_L2_ON_HWCNT_DISABLE: + /* If the L2 became desired while we were waiting on the + * worker to do the actual hwcnt disable (which might + * happen if some work was submitted immediately after + * the shaders powered off), then we need to early-out + * of this state and re-enable hwcnt. + * + * If we get lucky, the hwcnt disable might not have + * actually started yet, and the logic in the hwcnt + * enable state will prevent the worker from + * performing the disable entirely, preventing loss of + * any hardware counter data. + * + * If the hwcnt disable has started, then we'll lose + * a tiny amount of hardware counter data between the + * disable and the re-enable occurring. + * + * This loss of data is preferable to the alternative, + * which is to block the shader cores from doing any + * work until we're sure hwcnt has been re-enabled. + */ + if (kbase_pm_is_l2_desired(kbdev)) { + backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; + break; + } + + backend->hwcnt_desired = false; + if (!backend->hwcnt_disabled) { + kbase_pm_trigger_hwcnt_disable(kbdev); + } + + if (backend->hwcnt_disabled) { + if (kbdev->pm.backend.gpu_clock_slow_down_wa) + backend->l2_state = + KBASE_L2_SLOW_DOWN_CLOCKS; + else + backend->l2_state = KBASE_L2_POWER_DOWN; + } + break; + + case KBASE_L2_SLOW_DOWN_CLOCKS: + /* We always assume only GPUs being affected by + * BASE_HW_ISSUE_GPU2017_1336 fall into this state + */ + WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_slow_down_wa); + + /* L2 needs to be powered up. And we need to make sure + * cancellation of any previously issued work to slow + * down GPU clock. For it, we move to the state, + * KBASE_L2_RESTORE_CLOCKS. + */ + if (kbase_pm_is_l2_desired(kbdev)) { + backend->l2_state = KBASE_L2_RESTORE_CLOCKS; + break; + } + + backend->gpu_clock_slow_down_desired = true; + if (!backend->gpu_clock_slowed_down) + kbase_pm_control_gpu_clock(kbdev); + else + backend->l2_state = KBASE_L2_POWER_DOWN; + + break; + + case KBASE_L2_POWER_DOWN: + if (!backend->l2_always_on) + /* Powering off the L2 will also power off the + * tiler. + */ + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, + l2_present, + ACTION_PWROFF); + else + /* If L2 cache is powered then we must flush it + * before we power off the GPU. Normally this + * would have been handled when the L2 was + * powered off. + */ + kbase_gpu_start_cache_clean_nolock( + kbdev); + + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u); + + backend->l2_state = KBASE_L2_PEND_OFF; + break; + + case KBASE_L2_PEND_OFF: + if (!backend->l2_always_on) { + /* We only need to check the L2 here - if the L2 + * is off then the tiler is definitely also off. + */ + if (!l2_trans && !l2_ready) + /* L2 is now powered off */ + backend->l2_state = KBASE_L2_OFF; + } else { + if (!kbdev->cache_clean_in_progress) + backend->l2_state = KBASE_L2_OFF; + } + break; + + case KBASE_L2_RESET_WAIT: + /* Reset complete */ + if (!backend->in_reset) + backend->l2_state = KBASE_L2_OFF; + break; + + default: + WARN(1, "Invalid state in l2_state: %d", + backend->l2_state); + } + + if (backend->l2_state != prev_state) + dev_dbg(kbdev->dev, "L2 state transition: %s to %s\n", + kbase_l2_core_state_to_string(prev_state), + kbase_l2_core_state_to_string( + backend->l2_state)); + + } while (backend->l2_state != prev_state); + + if (kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off && + backend->l2_state == KBASE_L2_OFF) { + + /* MALI_SEC_INTEGRATION */ + KBASE_KTRACE_ADD(kbdev, KBASE_DEVICE_PM_WAIT_WQ_QUEUE_WORK, NULL, 0u); + + kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false; + queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, + &kbdev->pm.backend.gpu_poweroff_wait_work); + } + + return 0; +} + +static void shader_poweroff_timer_stop_callback(struct work_struct *data) +{ + unsigned long flags; + struct kbasep_pm_tick_timer_state *stt = container_of(data, + struct kbasep_pm_tick_timer_state, work); + struct kbase_device *kbdev = container_of(stt, struct kbase_device, + pm.backend.shader_tick_timer); + + hrtimer_cancel(&stt->timer); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + stt->cancel_queued = false; + if (kbdev->pm.backend.gpu_powered) + kbase_pm_update_state(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +/** + * shader_poweroff_timer_queue_cancel - cancel the shader poweroff tick timer + * @kbdev: pointer to kbase device + * + * Synchronization between the shader state machine and the timer thread is + * difficult. This is because situations may arise where the state machine + * wants to start the timer, but the callback is already running, and has + * already passed the point at which it checks whether it is required, and so + * cancels itself, even though the state machine may have just tried to call + * hrtimer_start. + * + * This cannot be stopped by holding hwaccess_lock in the timer thread, + * because there are still infinitesimally small sections at the start and end + * of the callback where the lock is not held. + * + * Instead, a new state is added to the shader state machine, + * KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF. This is used to guarantee + * that when the shaders are switched off, the timer has definitely been + * cancelled. As a result, when KBASE_SHADERS_ON_CORESTACK_ON is left and the + * timer is started, it is guaranteed that either the timer is already running + * (from an availability change or cancelled timer), or hrtimer_start will + * succeed. It is critical to avoid ending up in + * KBASE_SHADERS_WAIT_OFF_CORESTACK_ON without the timer running, or it could + * hang there forever. + */ +static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev) +{ + struct kbasep_pm_tick_timer_state *stt = + &kbdev->pm.backend.shader_tick_timer; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + stt->needed = false; + + if (hrtimer_active(&stt->timer) && !stt->cancel_queued) { + stt->cancel_queued = true; + queue_work(stt->wq, &stt->work); + } +} + +static const char *kbase_shader_core_state_to_string( + enum kbase_shader_core_state state) +{ + const char *const strings[] = { +#define KBASEP_SHADER_STATE(n) #n, +#include "mali_kbase_pm_shader_states.h" +#undef KBASEP_SHADER_STATE + }; + if (WARN_ON((size_t)state >= ARRAY_SIZE(strings))) + return "Bad shader core state"; + else + return strings[state]; +} + +static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + struct kbasep_pm_tick_timer_state *stt = + &kbdev->pm.backend.shader_tick_timer; + enum kbase_shader_core_state prev_state; + u64 stacks_avail = 0; + int err = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (corestack_driver_control) + /* Always power on all the corestacks. Disabling certain + * corestacks when their respective shaders are not in the + * available bitmap is not currently supported. + */ + stacks_avail = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_STACK); + + do { + u64 shaders_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER); + u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + u64 stacks_trans = 0; + u64 stacks_ready = 0; + + if (corestack_driver_control) { + stacks_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_STACK); + stacks_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK); + } + + /* + * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores + * are vulnerable to corruption if gpu is lost + */ + if (kbase_is_gpu_lost(kbdev)) { + err = -EIO; + break; + } + + /* mask off ready from trans in case transitions finished + * between the register reads + */ + shaders_trans &= ~shaders_ready; + stacks_trans &= ~stacks_ready; + + prev_state = backend->shaders_state; + + switch (backend->shaders_state) { + case KBASE_SHADERS_OFF_CORESTACK_OFF: + /* Ignore changes to the shader core availability + * except at certain points where we can handle it, + * i.e. off and SHADERS_ON_CORESTACK_ON. + */ + backend->shaders_desired_mask = + kbase_pm_ca_get_core_mask(kbdev); + backend->pm_shaders_core_mask = 0; + + if (backend->shaders_desired && + backend->l2_state == KBASE_L2_ON) { + if (backend->hwcnt_desired && + !backend->hwcnt_disabled) { + /* Trigger a hwcounter dump */ + backend->hwcnt_desired = false; + kbase_pm_trigger_hwcnt_disable(kbdev); + } + + if (backend->hwcnt_disabled) { + if (corestack_driver_control) { + kbase_pm_invoke(kbdev, + KBASE_PM_CORE_STACK, + stacks_avail, + ACTION_PWRON); + } + backend->shaders_state = + KBASE_SHADERS_OFF_CORESTACK_PEND_ON; + } + } + break; + + case KBASE_SHADERS_OFF_CORESTACK_PEND_ON: + if (!stacks_trans && stacks_ready == stacks_avail) { + backend->shaders_avail = + backend->shaders_desired_mask; + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + backend->shaders_avail, ACTION_PWRON); + + backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; + } + break; + + case KBASE_SHADERS_PEND_ON_CORESTACK_ON: + if (!shaders_trans && shaders_ready == backend->shaders_avail) { + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, shaders_ready); + backend->pm_shaders_core_mask = shaders_ready; + backend->hwcnt_desired = true; + if (backend->hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + backend->hwcnt_disabled = false; + } + + backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON; + } + break; + + case KBASE_SHADERS_ON_CORESTACK_ON: + backend->shaders_desired_mask = + kbase_pm_ca_get_core_mask(kbdev); + + /* If shaders to change state, trigger a counter dump */ + if (!backend->shaders_desired || + (backend->shaders_desired_mask != shaders_ready)) { + backend->hwcnt_desired = false; + if (!backend->hwcnt_disabled) + kbase_pm_trigger_hwcnt_disable(kbdev); + backend->shaders_state = + KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; + } + break; + + case KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: + backend->shaders_desired_mask = + kbase_pm_ca_get_core_mask(kbdev); + + if (!backend->hwcnt_disabled) { + /* Wait for being disabled */ + ; + } else if (!backend->shaders_desired) { + if (kbdev->pm.backend.protected_transition_override || +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbase_pm_is_suspending(kbdev) || + kbase_pm_is_gpu_lost(kbdev) || +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + !stt->configured_ticks || + WARN_ON(stt->cancel_queued)) { + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; + } else { + stt->remaining_ticks = stt->configured_ticks; + stt->needed = true; + + /* The shader hysteresis timer is not + * done the obvious way, which would be + * to start an hrtimer when the shader + * power off is requested. Instead, + * use a 'tick' timer, and set the + * remaining number of ticks on a power + * off request. This avoids the + * latency of starting, then + * immediately cancelling an hrtimer + * when the shaders are re-requested + * before the timeout expires. + */ + if (!hrtimer_active(&stt->timer)) + hrtimer_start(&stt->timer, + stt->configured_interval, + HRTIMER_MODE_REL); + + backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON; + } + } else if (backend->shaders_desired_mask & ~shaders_ready) { + /* set cores ready but not available to + * meet KBASE_SHADERS_PEND_ON_CORESTACK_ON + * check pass + */ + backend->shaders_avail = + (backend->shaders_desired_mask | shaders_ready); + + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + backend->shaders_avail & ~shaders_ready, + ACTION_PWRON); + backend->shaders_state = + KBASE_SHADERS_PEND_ON_CORESTACK_ON; + } else if (shaders_ready & ~backend->shaders_desired_mask) { + backend->shaders_state = + KBASE_SHADERS_WAIT_GPU_IDLE; + } else { + backend->shaders_state = + KBASE_SHADERS_PEND_ON_CORESTACK_ON; + } + break; + + case KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: + if (WARN_ON(!hrtimer_active(&stt->timer))) { + stt->remaining_ticks = 0; + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; + } + + if (backend->shaders_desired) { + stt->remaining_ticks = 0; + backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; + } else if (stt->remaining_ticks == 0) { + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; +#ifdef CONFIG_MALI_ARBITER_SUPPORT + } else if (kbase_pm_is_suspending(kbdev) || + kbase_pm_is_gpu_lost(kbdev)) { + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + } + break; + + case KBASE_SHADERS_WAIT_GPU_IDLE: + /* If partial shader core off need to wait the job in + * running and next register finished then flush L2 + * or it might hit GPU2017-861 + */ + if (!kbase_gpu_atoms_submitted_any(kbdev)) { + backend->partial_shaderoff = true; + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; + } + break; + + case KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: + shader_poweroff_timer_queue_cancel(kbdev); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) { + kbase_gpu_start_cache_clean_nolock(kbdev); + backend->shaders_state = + KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON; + } else { + backend->shaders_state = + KBASE_SHADERS_READY_OFF_CORESTACK_ON; + } + break; + + case KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: + if (!kbdev->cache_clean_in_progress) + backend->shaders_state = + KBASE_SHADERS_READY_OFF_CORESTACK_ON; + + break; + + case KBASE_SHADERS_READY_OFF_CORESTACK_ON: + if (backend->partial_shaderoff) { + backend->partial_shaderoff = false; + /* remove cores available but not ready to + * meet KBASE_SHADERS_PEND_ON_CORESTACK_ON + * check pass + */ + + /* shaders_desired_mask shall be a subset of + * shaders_ready + */ + WARN_ON(backend->shaders_desired_mask & ~shaders_ready); + WARN_ON(!(backend->shaders_desired_mask & shaders_ready)); + + backend->shaders_avail = + backend->shaders_desired_mask; + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + shaders_ready & ~backend->shaders_avail, ACTION_PWROFF); + backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, (shaders_ready & ~backend->shaders_avail)); + } else { + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + shaders_ready, ACTION_PWROFF); + + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, 0u); + + backend->shaders_state = KBASE_SHADERS_PEND_OFF_CORESTACK_ON; + } + break; + + case KBASE_SHADERS_PEND_OFF_CORESTACK_ON: + if (!shaders_trans && !shaders_ready) { + if (corestack_driver_control) + kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, + stacks_avail, ACTION_PWROFF); + + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_OFF; + } + break; + + case KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: + if (!stacks_trans && !stacks_ready) { + /* On powered off, re-enable the hwcnt */ + backend->pm_shaders_core_mask = 0; + backend->hwcnt_desired = true; + if (backend->hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + backend->hwcnt_disabled = false; + } + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; + } + break; + + case KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: + if (!hrtimer_active(&stt->timer) && !stt->cancel_queued) + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; + break; + + case KBASE_SHADERS_RESET_WAIT: + /* Reset complete */ + if (!backend->in_reset) + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; + break; + } + + if (backend->shaders_state != prev_state) + dev_dbg(kbdev->dev, "Shader state transition: %s to %s\n", + kbase_shader_core_state_to_string(prev_state), + kbase_shader_core_state_to_string( + backend->shaders_state)); + + } while (backend->shaders_state != prev_state); + + return err; +} + +static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev) +{ + bool in_desired_state = true; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbase_pm_is_l2_desired(kbdev) && + kbdev->pm.backend.l2_state != KBASE_L2_ON) + in_desired_state = false; + else if (!kbase_pm_is_l2_desired(kbdev) && + kbdev->pm.backend.l2_state != KBASE_L2_OFF) + in_desired_state = false; + + if (kbdev->pm.backend.shaders_desired && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) + in_desired_state = false; + else if (!kbdev->pm.backend.shaders_desired && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + in_desired_state = false; + + return in_desired_state; +} + +static bool kbase_pm_is_in_desired_state(struct kbase_device *kbdev) +{ + bool in_desired_state; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + in_desired_state = kbase_pm_is_in_desired_state_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return in_desired_state; +} + +static bool kbase_pm_is_in_desired_state_with_l2_powered( + struct kbase_device *kbdev) +{ + bool in_desired_state = false; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (kbase_pm_is_in_desired_state_nolock(kbdev) && + (kbdev->pm.backend.l2_state == KBASE_L2_ON)) + in_desired_state = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return in_desired_state; +} + +static void kbase_pm_trace_power_state(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + KBASE_TLSTREAM_AUX_PM_STATE( + kbdev, + KBASE_PM_CORE_L2, + kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_L2)); + KBASE_TLSTREAM_AUX_PM_STATE( + kbdev, + KBASE_PM_CORE_SHADER, + kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_SHADER)); + KBASE_TLSTREAM_AUX_PM_STATE( + kbdev, + KBASE_PM_CORE_TILER, + kbase_pm_get_ready_cores( + kbdev, + KBASE_PM_CORE_TILER)); + + if (corestack_driver_control) + KBASE_TLSTREAM_AUX_PM_STATE( + kbdev, + KBASE_PM_CORE_STACK, + kbase_pm_get_ready_cores( + kbdev, + KBASE_PM_CORE_STACK)); +} + +void kbase_pm_update_state(struct kbase_device *kbdev) +{ + enum kbase_shader_core_state prev_shaders_state = + kbdev->pm.backend.shaders_state; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!kbdev->pm.backend.gpu_powered) + return; /* Do nothing if the GPU is off */ + + if (kbase_pm_l2_update_state(kbdev)) + return; + + if (kbase_pm_shaders_update_state(kbdev)) + return; + + /* If the shaders just turned off, re-invoke the L2 state machine, in + * case it was waiting for the shaders to turn off before powering down + * the L2. + */ + if (prev_shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF && + kbdev->pm.backend.shaders_state == + KBASE_SHADERS_OFF_CORESTACK_OFF) { + if (kbase_pm_l2_update_state(kbdev)) + return; + } + + if (kbase_pm_is_in_desired_state_nolock(kbdev)) { + KBASE_KTRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, kbdev->pm.backend.shaders_avail); + + kbase_pm_trace_power_state(kbdev); + + KBASE_KTRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, 0); + wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); + } +} + +static enum hrtimer_restart +shader_tick_timer_callback(struct hrtimer *timer) +{ + struct kbasep_pm_tick_timer_state *stt = container_of(timer, + struct kbasep_pm_tick_timer_state, timer); + struct kbase_device *kbdev = container_of(stt, struct kbase_device, + pm.backend.shader_tick_timer); + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + unsigned long flags; + enum hrtimer_restart restart = HRTIMER_NORESTART; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (stt->remaining_ticks && + backend->shaders_state == KBASE_SHADERS_WAIT_OFF_CORESTACK_ON) { + stt->remaining_ticks--; + + /* If the remaining ticks just changed from 1 to 0, invoke the + * PM state machine to power off the shader cores. + */ + if (!stt->remaining_ticks && !backend->shaders_desired) + kbase_pm_update_state(kbdev); + } + + if (stt->needed) { + hrtimer_forward_now(timer, stt->configured_interval); + restart = HRTIMER_RESTART; + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return restart; +} + +int kbase_pm_state_machine_init(struct kbase_device *kbdev) +{ + struct kbasep_pm_tick_timer_state *stt = &kbdev->pm.backend.shader_tick_timer; + + stt->wq = alloc_workqueue("kbase_pm_shader_poweroff", WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!stt->wq) + return -ENOMEM; + + INIT_WORK(&stt->work, shader_poweroff_timer_stop_callback); + + stt->needed = false; + hrtimer_init(&stt->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + stt->timer.function = shader_tick_timer_callback; + stt->configured_interval = HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); + stt->configured_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; + + return 0; +} + +void kbase_pm_state_machine_term(struct kbase_device *kbdev) +{ + hrtimer_cancel(&kbdev->pm.backend.shader_tick_timer.timer); + destroy_workqueue(kbdev->pm.backend.shader_tick_timer.wq); +} + +void kbase_pm_reset_start_locked(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + backend->in_reset = true; + backend->l2_state = KBASE_L2_RESET_WAIT; + backend->shaders_state = KBASE_SHADERS_RESET_WAIT; + + /* We're in a reset, so hwcnt will have been synchronously disabled by + * this function's caller as part of the reset process. We therefore + * know that any call to kbase_hwcnt_context_disable_atomic, if + * required to sync the hwcnt refcount with our internal state, is + * guaranteed to succeed. + */ + backend->hwcnt_desired = false; + if (!backend->hwcnt_disabled) { + WARN_ON(!kbase_hwcnt_context_disable_atomic( + kbdev->hwcnt_gpu_ctx)); + backend->hwcnt_disabled = true; + } + + shader_poweroff_timer_queue_cancel(kbdev); +} + +void kbase_pm_reset_complete(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + unsigned long flags; + + WARN_ON(!kbase_reset_gpu_is_active(kbdev)); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* As GPU has just been reset, that results in implicit flush of L2 + * cache, can safely mark the pending cache flush operation (if there + * was any) as complete and unblock the waiter. + * No work can be submitted whilst GPU reset is ongoing. + */ + kbase_gpu_cache_clean_wait_complete(kbdev); + backend->in_reset = false; + kbase_pm_update_state(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +/* Timeout for kbase_pm_wait_for_desired_state when wait_event_killable has + * aborted due to a fatal signal. If the time spent waiting has exceeded this + * threshold then there is most likely a hardware issue. */ +#define PM_TIMEOUT (5*HZ) /* 5s */ + +static void kbase_pm_timed_out(struct kbase_device *kbdev) +{ + dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); + dev_err(kbdev->dev, "Desired state :\n"); + dev_err(kbdev->dev, "\tShader=%016llx\n", + kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); + dev_err(kbdev->dev, "Current state :\n"); + dev_err(kbdev->dev, "\tShader=%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_READY_HI)), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_READY_LO))); + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_READY_HI)), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_READY_LO))); + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_READY_HI)), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_READY_LO))); + dev_err(kbdev->dev, "Cores transitioning :\n"); + dev_err(kbdev->dev, "\tShader=%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + SHADER_PWRTRANS_HI)), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + SHADER_PWRTRANS_LO))); + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + TILER_PWRTRANS_HI)), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + TILER_PWRTRANS_LO))); + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + L2_PWRTRANS_HI)), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + L2_PWRTRANS_LO))); + + dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); +} + +void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) +{ + unsigned long flags; + unsigned long timeout; + int err; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + timeout = jiffies + PM_TIMEOUT; + + /* Wait for cores */ + err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state_with_l2_powered(kbdev)); + + if (err < 0 && time_after(jiffies, timeout)) + kbase_pm_timed_out(kbdev); +} + +void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) +{ + unsigned long flags; + unsigned long timeout; + int err; + + /* Let the state machine latch the most recent desired state. */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + timeout = jiffies + PM_TIMEOUT; + + /* Wait for cores */ + err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state(kbdev)); + + if (err < 0 && time_after(jiffies, timeout)) + kbase_pm_timed_out(kbdev); +} +KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); + +void kbase_pm_enable_interrupts(struct kbase_device *kbdev) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + /* + * Clear all interrupts, + * and unmask them all. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF); +} + +KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); + +void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(NULL != kbdev); + /* + * Mask all interrupts, + * and clear them all. + */ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); +} + +void kbase_pm_disable_interrupts(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_disable_interrupts_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); + +/* + * pmu layout: + * 0x0000: PMU TAG (RO) (0xCAFECAFE) + * 0x0004: PMU VERSION ID (RO) (0x00000000) + * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE) + */ +void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) +{ + bool reset_required = is_resume; + unsigned long flags; + + /* MALI_SEC_INTEGRATION */ + struct exynos_context *platform = NULL; + platform = (struct exynos_context *)kbdev->platform_context; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->js_data.runpool_mutex); + lockdep_assert_held(&kbdev->pm.lock); + + if (kbdev->pm.backend.gpu_powered) { + /* Already turned on */ + if (kbdev->poweroff_pending) + kbase_pm_enable_interrupts(kbdev); + kbdev->poweroff_pending = false; + KBASE_DEBUG_ASSERT(!is_resume); + return; + } + + kbdev->poweroff_pending = false; + + KBASE_KTRACE_ADD(kbdev, PM_GPU_ON, NULL, 0u); + + /* MALI_SEC_INTEGRATION */ + GPU_LOG(DVFS_INFO, LSI_RESUME_CHECK, is_resume, kbdev->pm.backend.metrics.timer_active, "resume_check\n"); + + if (is_resume && kbdev->pm.backend.callback_power_resume) { + kbdev->pm.backend.callback_power_resume(kbdev); + return; + } else if (kbdev->pm.backend.callback_power_on) { + reset_required = kbdev->pm.backend.callback_power_on(kbdev); + } + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.gpu_powered = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (reset_required) { + /* GPU state was lost, reset GPU to ensure it is in a + * consistent state */ + /* MALI_SEC_INTEGRATION */ + if (kbdev->vendor_callbacks->init_hw) + kbdev->vendor_callbacks->init_hw(kbdev); + + kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS); + + /* MALI_SEC_INTEGRATION */ + if (kbdev->pm.backend.callback_power_dvfs_on) + kbdev->pm.backend.callback_power_dvfs_on(kbdev); + } + + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_ctx_sched_restore_all_as(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + /* MALI_SEC_INTEGRATION */ + if (platform) { + GPU_LOG(DVFS_INFO, LSI_RESUME_FREQ, kbdev->pm.backend.metrics.timer_active, platform->cur_clock, "resume_freq\n"); + } + + if (kbdev->dummy_job_wa.flags & + KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_dummy_job_wa_execute(kbdev, + kbase_pm_get_present_cores(kbdev, + KBASE_PM_CORE_SHADER)); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + + /* Enable the interrupts */ + kbase_pm_enable_interrupts(kbdev); + + /* Turn on the L2 caches */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.l2_desired = true; + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_clock_on); + +bool kbase_pm_clock_off(struct kbase_device *kbdev) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.lock); + + /* ASSERT that the cores should now be unavailable. No lock needed. */ + WARN_ON(kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF); + + kbdev->poweroff_pending = true; + + if (!kbdev->pm.backend.gpu_powered) { + /* Already turned off */ + return true; + } + + KBASE_KTRACE_ADD(kbdev, PM_GPU_OFF, NULL, 0u); + +#if IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY) + kbase_protected_mode_disable_exynos(kbdev); +#endif + + /* Disable interrupts. This also clears any outstanding interrupts */ + kbase_pm_disable_interrupts(kbdev); + /* Ensure that any IRQ handlers have finished */ + kbase_synchronize_irqs(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (atomic_read(&kbdev->faults_pending)) { + /* Page/bus faults are still being processed. The GPU can not + * be powered off until they have completed */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return false; + } + + kbase_pm_cache_snoop_disable(kbdev); + + /* The GPU power may be turned off from this point */ + kbdev->pm.backend.gpu_powered = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_IDLE_EVENT); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + + if (kbdev->pm.backend.callback_power_off) + kbdev->pm.backend.callback_power_off(kbdev); + return true; +} + +KBASE_EXPORT_TEST_API(kbase_pm_clock_off); + +struct kbasep_reset_timeout_data { + struct hrtimer timer; + bool timed_out; + struct kbase_device *kbdev; +}; + +void kbase_pm_reset_done(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + kbdev->pm.backend.reset_done = true; + wake_up(&kbdev->pm.backend.reset_done_wait); +} + +/** + * kbase_pm_wait_for_reset - Wait for a reset to happen + * + * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state. + * + * @kbdev: Kbase device + */ +static void kbase_pm_wait_for_reset(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->pm.lock); + + wait_event(kbdev->pm.backend.reset_done_wait, + (kbdev->pm.backend.reset_done)); + kbdev->pm.backend.reset_done = false; +} + +KBASE_EXPORT_TEST_API(kbase_pm_reset_done); + +static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) +{ + struct kbasep_reset_timeout_data *rtdata = + container_of(timer, struct kbasep_reset_timeout_data, timer); + + rtdata->timed_out = 1; + + /* Set the wait queue to wake up kbase_pm_init_hw even though the reset + * hasn't completed */ + kbase_pm_reset_done(rtdata->kbdev); + + return HRTIMER_NORESTART; +} + +static int kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id) +{ + u32 hw_quirks_jm = kbase_reg_read(kbdev, + GPU_CONTROL_REG(JM_CONFIG)); + + if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == GPU_ID2_PRODUCT_TMIX) { + /* Only for tMIx */ + u32 coherency_features; + + coherency_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(COHERENCY_FEATURES)); + + /* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly + * documented for tMIx so force correct value here. + */ + if (coherency_features == + COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { + hw_quirks_jm |= (COHERENCY_ACE_LITE | + COHERENCY_ACE) << + JM_FORCE_COHERENCY_FEATURES_SHIFT; + } + } + + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + kbdev->hw_quirks_jm = hw_quirks_jm; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) { + int default_idvs_group_size = 0xF; + u32 tmp; + + if (of_property_read_u32(kbdev->dev->of_node, + "idvs-group-size", &tmp)) + tmp = default_idvs_group_size; + + if (tmp > IDVS_GROUP_MAX_SIZE) { + dev_err(kbdev->dev, + "idvs-group-size of %d is too large. Maximum value is %d", + tmp, IDVS_GROUP_MAX_SIZE); + tmp = default_idvs_group_size; + } + + kbdev->hw_quirks_jm |= tmp << IDVS_GROUP_SIZE_SHIFT; + } + +#define MANUAL_POWER_CONTROL ((u32)(1 << 8)) + if (corestack_driver_control) + kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL; + + return 0; +} + +static int kbase_set_sc_quirks(struct kbase_device *kbdev, const u32 prod_id) +{ + u32 hw_quirks_sc = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_CONFIG)); + + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ + hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; + else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */ + hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_2968_TTRX_3162)) + hw_quirks_sc |= SC_VAR_ALGORITHM; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING)) + hw_quirks_sc |= SC_TLS_HASH_ENABLE; + + kbdev->hw_quirks_sc = hw_quirks_sc; + + return 0; +} + +static int kbase_set_tiler_quirks(struct kbase_device *kbdev) +{ + u32 hw_quirks_tiler = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_CONFIG)); + + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + /* Set tiler clock gate override if required */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953)) + hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE; + + kbdev->hw_quirks_tiler = hw_quirks_tiler; + + return 0; +} + +static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) +{ + struct device_node *np = kbdev->dev->of_node; + const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> + GPU_ID_VERSION_PRODUCT_ID_SHIFT; + int error = 0; + + kbdev->hw_quirks_jm = 0; + kbdev->hw_quirks_sc = 0; + kbdev->hw_quirks_tiler = 0; + kbdev->hw_quirks_mmu = 0; + + if (!of_property_read_u32(np, "quirks_jm", + &kbdev->hw_quirks_jm)) { + dev_info(kbdev->dev, + "Found quirks_jm = [0x%x] in Devicetree\n", + kbdev->hw_quirks_jm); + } else { + error = kbase_set_jm_quirks(kbdev, prod_id); + if (error) + return error; + } + + if (!of_property_read_u32(np, "quirks_sc", + &kbdev->hw_quirks_sc)) { + dev_info(kbdev->dev, + "Found quirks_sc = [0x%x] in Devicetree\n", + kbdev->hw_quirks_sc); + } else { + error = kbase_set_sc_quirks(kbdev, prod_id); + if (error) + return error; + } + + if (!of_property_read_u32(np, "quirks_tiler", + &kbdev->hw_quirks_tiler)) { + dev_info(kbdev->dev, + "Found quirks_tiler = [0x%x] in Devicetree\n", + kbdev->hw_quirks_tiler); + } else { + error = kbase_set_tiler_quirks(kbdev); + if (error) + return error; + } + + if (!of_property_read_u32(np, "quirks_mmu", + &kbdev->hw_quirks_mmu)) { + dev_info(kbdev->dev, + "Found quirks_mmu = [0x%x] in Devicetree\n", + kbdev->hw_quirks_mmu); + } else { + error = kbase_set_mmu_quirks(kbdev); + } + + return error; +} + +static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) +{ + kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), + kbdev->hw_quirks_sc); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG), + kbdev->hw_quirks_tiler); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), + kbdev->hw_quirks_mmu); + kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), + kbdev->hw_quirks_jm); +} + +void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) +{ + if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && + !kbdev->cci_snoop_enabled) { +#ifdef CONFIG_ARM64 + if (kbdev->snoop_enable_smc != 0) + kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); +#endif /* CONFIG_ARM64 */ + dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n"); + kbdev->cci_snoop_enabled = true; + } +} + +void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) +{ + if (kbdev->cci_snoop_enabled) { +#ifdef CONFIG_ARM64 + if (kbdev->snoop_disable_smc != 0) { + mali_cci_flush_l2(kbdev); + kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0); + } +#endif /* CONFIG_ARM64 */ + dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n"); + kbdev->cci_snoop_enabled = false; + } +} + +static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev) +{ + unsigned long irq_flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); +} + +static int kbase_pm_do_reset(struct kbase_device *kbdev) +{ + struct kbasep_reset_timeout_data rtdata; + int ret; + + KBASE_KTRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, 0); + + KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev); + + if (kbdev->pm.backend.callback_soft_reset) { + ret = kbdev->pm.backend.callback_soft_reset(kbdev); + if (ret < 0) + return ret; + else if (ret > 0) + return 0; + } else { + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_SOFT_RESET); + } + + /* Unmask the reset complete interrupt only */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED); + + /* Initialize a structure for tracking the status of the reset */ + rtdata.kbdev = kbdev; + rtdata.timed_out = 0; + + /* Create a timer to use as a timeout on the reset */ + hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + rtdata.timer.function = kbasep_reset_timeout; + + hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), + HRTIMER_MODE_REL); + + /* Wait for the RESET_COMPLETED interrupt to be raised */ + kbase_pm_wait_for_reset(kbdev); + + if (rtdata.timed_out == 0) { + /* GPU has been reset */ + hrtimer_cancel(&rtdata.timer); + destroy_hrtimer_on_stack(&rtdata.timer); + return 0; + } + + /* No interrupt has been received - check if the RAWSTAT register says + * the reset has completed */ + if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & + RESET_COMPLETED) + || kbase_is_gpu_lost(kbdev)) { + /* The interrupt is set in the RAWSTAT; this suggests that the + * interrupts are not getting to the CPU */ + dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); + /* If interrupts aren't working we can't continue. */ + destroy_hrtimer_on_stack(&rtdata.timer); + return -EINVAL; + } + + /* The GPU doesn't seem to be responding to the reset so try a hard + * reset */ + dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", + RESET_TIMEOUT); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_HARD_RESET); + + /* Restart the timer to wait for the hard reset to complete */ + rtdata.timed_out = 0; + + hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), + HRTIMER_MODE_REL); + + /* Wait for the RESET_COMPLETED interrupt to be raised */ + kbase_pm_wait_for_reset(kbdev); + + if (rtdata.timed_out == 0) { + /* GPU has been reset */ + hrtimer_cancel(&rtdata.timer); + destroy_hrtimer_on_stack(&rtdata.timer); + return 0; + } + + destroy_hrtimer_on_stack(&rtdata.timer); + + dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", + RESET_TIMEOUT); + + return -EINVAL; +} + +int kbase_pm_protected_mode_enable(struct kbase_device *const kbdev) +{ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_SET_PROTECTED_MODE); + return 0; +} + +int kbase_pm_protected_mode_disable(struct kbase_device *const kbdev) +{ + lockdep_assert_held(&kbdev->pm.lock); + + return kbase_pm_do_reset(kbdev); +} + +int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) +{ + unsigned long irq_flags; + int err; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.lock); + + /* Ensure the clock is on before attempting to access the hardware */ + if (!kbdev->pm.backend.gpu_powered) { + if (kbdev->pm.backend.callback_power_on) + kbdev->pm.backend.callback_power_on(kbdev); + + kbdev->pm.backend.gpu_powered = true; + } + + /* Ensure interrupts are off to begin with, this also clears any + * outstanding interrupts */ + kbase_pm_disable_interrupts(kbdev); + /* Ensure cache snoops are disabled before reset. */ + kbase_pm_cache_snoop_disable(kbdev); + /* Prepare for the soft-reset */ + kbdev->pm.backend.reset_done = false; + + /* The cores should be made unavailable due to the reset */ + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + if (kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, 0u); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + +#if IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY) + if (kbdev->protected_mode) + err = kbdev->protected_ops->protected_mode_disable( + kbdev->protected_dev); + else + err = kbase_pm_do_reset(kbdev); +#else + /* Soft reset the GPU */ + err = kbdev->protected_ops->protected_mode_disable( + kbdev->protected_dev); +#endif + + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + kbdev->protected_mode = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + + if (err) + goto exit; + + if (flags & PM_HW_ISSUES_DETECT) { + err = kbase_pm_hw_issues_detect(kbdev); + if (err) + goto exit; + } + + kbase_pm_hw_issues_apply(kbdev); + kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); + + /* Sanity check protected mode was left after reset */ + WARN_ON(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & + GPU_STATUS_PROTECTED_MODE_ACTIVE); + + /* If cycle counter was in use re-enable it, enable_irqs will only be + * false when called from kbase_pm_powerup */ + if (kbdev->pm.backend.gpu_cycle_counter_requests && + (flags & PM_ENABLE_IRQS)) { + kbase_pm_enable_interrupts(kbdev); + + /* Re-enable the counters if we need to */ + spin_lock_irqsave( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + if (kbdev->pm.backend.gpu_cycle_counter_requests) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START); + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + + kbase_pm_disable_interrupts(kbdev); + } + + if (flags & PM_ENABLE_IRQS) + kbase_pm_enable_interrupts(kbdev); + +exit: + if (!kbdev->pm.backend.protected_entry_transition_override) { + /* Re-enable GPU hardware counters if we're resetting from + * protected mode. + */ + reenable_protected_mode_hwcnt(kbdev); + } + + return err; +} + +/** + * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters + * + * Increase the count of cycle counter users and turn the cycle counters on if + * they were previously off + * + * This function is designed to be called by + * kbase_pm_request_gpu_cycle_counter() or + * kbase_pm_request_gpu_cycle_counter_l2_is_on() only + * + * When this function is called the l2 cache must be on - i.e., the GPU must be + * on. + * + * @kbdev: The kbase device structure of the device + */ +static void +kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); + + ++kbdev->pm.backend.gpu_cycle_counter_requests; + + if (1 == kbdev->pm.backend.gpu_cycle_counter_requests) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START); + + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); +} + +void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < + INT_MAX); + + kbase_pm_request_gpu_cycle_counter_do_request(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter); + +void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < + INT_MAX); + + kbase_pm_request_gpu_cycle_counter_do_request(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on); + +void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0); + + --kbdev->pm.backend.gpu_cycle_counter_requests; + + if (0 == kbdev->pm.backend.gpu_cycle_counter_requests) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_STOP); + + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); +} + +void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter); diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_internal.h new file mode 100644 index 000000000000..95f10e0fbcb8 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_internal.h @@ -0,0 +1,710 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Power management API definitions used internally by GPU backend + */ + +#ifndef _KBASE_BACKEND_PM_INTERNAL_H_ +#define _KBASE_BACKEND_PM_INTERNAL_H_ + +#include + +#include "mali_kbase_pm_ca.h" +#include "mali_kbase_pm_policy.h" + + +/** + * kbase_pm_dev_idle - The GPU is idle. + * + * The OS may choose to turn off idle devices + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_dev_idle(struct kbase_device *kbdev); + +/** + * kbase_pm_dev_activate - The GPU is active. + * + * The OS should avoid opportunistically turning off the GPU while it is active + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_dev_activate(struct kbase_device *kbdev); + +/** + * kbase_pm_get_present_cores - Get details of the cores that are present in + * the device. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) present in the GPU device and also a count of + * the number of cores. + * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * + * Return: The bit mask of cores present + */ +u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * kbase_pm_get_active_cores - Get details of the cores that are currently + * active in the device. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) that are actively processing work (i.e. + * turned on *and* busy). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * + * Return: The bit mask of active cores + */ +u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * kbase_pm_get_trans_cores - Get details of the cores that are currently + * transitioning between power states. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) that are currently transitioning between + * power states. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * + * Return: The bit mask of transitioning cores + */ +u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * kbase_pm_get_ready_cores - Get details of the cores that are currently + * powered and ready for jobs. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) that are powered and ready for jobs (they may + * or may not be currently executing jobs). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * + * Return: The bit mask of ready cores + */ +u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * kbase_pm_clock_on - Turn the clock for the device on, and enable device + * interrupts. + * + * This function can be used by a power policy to turn the clock for the GPU on. + * It should be modified during integration to perform the necessary actions to + * ensure that the GPU is fully powered and clocked. + * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @is_resume: true if clock on due to resume after suspend, false otherwise + */ +void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume); + +/** + * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the + * device off. + * + * This function can be used by a power policy to turn the clock for the GPU + * off. It should be modified during integration to perform the necessary + * actions to turn the clock off (if this is possible in the integration). + * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * + * Return: true if clock was turned off, or + * false if clock can not be turned off due to pending page/bus fault + * workers. Caller must flush MMU workqueues and retry + */ +bool kbase_pm_clock_off(struct kbase_device *kbdev); + +/** + * kbase_pm_enable_interrupts - Enable interrupts on the device. + * + * Interrupts are also enabled after a call to kbase_pm_clock_on(). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_enable_interrupts(struct kbase_device *kbdev); + +/** + * kbase_pm_disable_interrupts - Disable interrupts on the device. + * + * This prevents delivery of Power Management interrupts to the CPU so that + * kbase_pm_update_state() will not be called from the IRQ handler + * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called. + * + * Interrupts are also disabled after a call to kbase_pm_clock_off(). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_disable_interrupts(struct kbase_device *kbdev); + +/** + * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts() + * that does not take the hwaccess_lock + * + * Caller must hold the hwaccess_lock. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev); + +/** + * kbase_pm_init_hw - Initialize the hardware. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @flags: Flags specifying the type of PM init + * + * This function checks the GPU ID register to ensure that the GPU is supported + * by the driver and performs a reset on the device so that it is in a known + * state before the device is used. + * + * Return: 0 if the device is supported and successfully reset. + */ +int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags); + +/** + * kbase_pm_reset_done - The GPU has been reset successfully. + * + * This function must be called by the GPU interrupt handler when the + * RESET_COMPLETED bit is set. It signals to the power management initialization + * code that the GPU has been successfully reset. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_reset_done(struct kbase_device *kbdev); + +/** + * kbase_pm_wait_for_desired_state - Wait for the desired power state to be + * reached + * + * Wait for the L2 and shader power state machines to reach the states + * corresponding to the values of 'l2_desired' and 'shaders_desired'. + * + * The usual use-case for this is to ensure cores are 'READY' after performing + * a GPU Reset. + * + * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, + * because this function will take that lock itself. + * + * NOTE: This may not wait until the correct state is reached if there is a + * power off in progress. To correctly wait for the desired state the caller + * must ensure that this is not the case by, for example, calling + * kbase_pm_wait_for_poweroff_complete() + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); + +/** + * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on + * + * Wait for the L2 to be powered on, and for the L2 and shader state machines to + * stabilise by reaching the states corresponding to the values of 'l2_desired' + * and 'shaders_desired'. + * + * kbdev->pm.active_count must be non-zero when calling this function. + * + * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, + * because this function will take that lock itself. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); + +/** + * kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state + * machines after changing shader core + * availability + * + * It can be called in any status, so need to check the l2 and shader core + * power status in this function or it will break shader/l2 state machine + * + * Caller must hold hwaccess_lock + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev); + +/** + * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state() + * where the caller must hold + * kbase_device.hwaccess_lock + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev); + +/** + * kbase_pm_update_state - Update the L2 and shader power state machines + * @kbdev: Device pointer + */ +void kbase_pm_update_state(struct kbase_device *kbdev); + +/** + * kbase_pm_state_machine_init - Initialize the state machines, primarily the + * shader poweroff timer + * @kbdev: Device pointer + */ +int kbase_pm_state_machine_init(struct kbase_device *kbdev); + +/** + * kbase_pm_state_machine_term - Clean up the PM state machines' data + * @kbdev: Device pointer + */ +void kbase_pm_state_machine_term(struct kbase_device *kbdev); + +/** + * kbase_pm_update_cores_state - Update the desired state of shader cores from + * the Power Policy, and begin any power + * transitions. + * + * This function will update the desired_xx_state members of + * struct kbase_pm_device_data by calling into the current Power Policy. It will + * then begin power transitions to make the hardware acheive the desired shader + * core state. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_update_cores_state(struct kbase_device *kbdev); + +/** + * kbasep_pm_metrics_init - Initialize the metrics gathering framework. + * + * This must be called before other metric gathering APIs are called. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 on success, error code on error + */ +int kbasep_pm_metrics_init(struct kbase_device *kbdev); + +/** + * kbasep_pm_metrics_term - Terminate the metrics gathering framework. + * + * This must be called when metric gathering is no longer required. It is an + * error to call any metrics gathering function (other than + * kbasep_pm_metrics_init()) after calling this function. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbasep_pm_metrics_term(struct kbase_device *kbdev); + +/** + * kbase_pm_report_vsync - Function to be called by the frame buffer driver to + * update the vsync metric. + * + * This function should be called by the frame buffer driver to update whether + * the system is hitting the vsync target or not. buffer_updated should be true + * if the vsync corresponded with a new frame being displayed, otherwise it + * should be false. This function does not need to be called every vsync, but + * only when the value of @buffer_updated differs from a previous call. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * @buffer_updated: True if the buffer has been updated on this VSync, + * false otherwise + */ +void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated); + +/** + * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change + * the clock speed of the GPU. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function should be called regularly by the DVFS system to check whether + * the clock speed of the GPU needs updating. + */ +void kbase_pm_get_dvfs_action(struct kbase_device *kbdev); + +/** + * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is + * needed + * + * If the caller is the first caller then the GPU cycle counters will be enabled + * along with the l2 cache + * + * The GPU must be powered when calling this function (i.e. + * kbase_pm_context_active() must have been called). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev); + +/** + * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is + * needed (l2 cache already on) + * + * This is a version of the above function + * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the + * l2 cache is known to be on and assured to be on until the subsequent call of + * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does + * not sleep and can be called from atomic functions. + * + * The GPU must be powered when calling this function (i.e. + * kbase_pm_context_active() must have been called) and the l2 cache must be + * powered on. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev); + +/** + * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no + * longer in use + * + * If the caller is the last caller then the GPU cycle counters will be + * disabled. A request must have been made before a call to this. + * + * Caller must not hold the hwaccess_lock, as it will be taken in this function. + * If the caller is already holding this lock then + * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev); + +/** + * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter() + * that does not take hwaccess_lock + * + * Caller must hold the hwaccess_lock. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); + +/** + * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to + * complete + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev); + +/** + * kbase_pm_runtime_init - Initialize runtime-pm for Mali GPU platform device + * + * Setup the power management callbacks and initialize/enable the runtime-pm + * for the Mali GPU platform device, using the callback function. This must be + * called before the kbase_pm_register_access_enable() function. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +int kbase_pm_runtime_init(struct kbase_device *kbdev); + +/** + * kbase_pm_runtime_term - Disable runtime-pm for Mali GPU platform device + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_runtime_term(struct kbase_device *kbdev); + +/** + * kbase_pm_register_access_enable - Enable access to GPU registers + * + * Enables access to the GPU registers before power management has powered up + * the GPU with kbase_pm_powerup(). + * + * This results in the power management callbacks provided in the driver + * configuration to get called to turn on power and/or clocks to the GPU. See + * kbase_pm_callback_conf. + * + * This should only be used before power management is powered up with + * kbase_pm_powerup() + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_register_access_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_register_access_disable - Disable early register access + * + * Disables access to the GPU registers enabled earlier by a call to + * kbase_pm_register_access_enable(). + * + * This results in the power management callbacks provided in the driver + * configuration to get called to turn off power and/or clocks to the GPU. See + * kbase_pm_callback_conf + * + * This should only be used before power management is powered up with + * kbase_pm_powerup() + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_register_access_disable(struct kbase_device *kbdev); + +/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline + * function */ + +/** + * kbase_pm_metrics_is_active - Check if the power management metrics + * collection is active. + * + * Note that this returns if the power management metrics collection was + * active at the time of calling, it is possible that after the call the metrics + * collection enable may have changed state. + * + * The caller must handle the consequence that the state may have changed. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * Return: true if metrics collection was active else false. + */ +bool kbase_pm_metrics_is_active(struct kbase_device *kbdev); + +/** + * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested. + * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @is_resume: true if power on due to resume after suspend, + * false otherwise + */ +void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume); + +/** + * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been + * requested. + * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_do_poweroff(struct kbase_device *kbdev); + +#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) +void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, + struct kbasep_pm_metrics *last, + struct kbasep_pm_metrics *diff); +#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */ + +#ifdef CONFIG_MALI_MIDGARD_DVFS + +/** + * kbase_platform_dvfs_event - Report utilisation to DVFS code + * + * Function provided by platform specific code when DVFS is enabled to allow + * the power management metrics system to report utilisation. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * @utilisation: The current calculated utilisation by the metrics system. + * @util_gl_share: The current calculated gl share of utilisation. + * @util_cl_share: The current calculated cl share of utilisation per core + * group. + * Return: Returns 0 on failure and non zero on success. + */ + +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, + u32 util_gl_share, u32 util_cl_share[2]); +#endif + +void kbase_pm_power_changed(struct kbase_device *kbdev); + +/** + * kbase_pm_metrics_update - Inform the metrics system that an atom is either + * about to be run or has just completed. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @now: Pointer to the timestamp of the change, or NULL to use current time + * + * Caller must hold hwaccess_lock + */ +void kbase_pm_metrics_update(struct kbase_device *kbdev, + ktime_t *now); + +/** + * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU + * If the GPU does not have coherency this is a no-op + * @kbdev: Device pointer + * + * This function should be called after L2 power up. + */ + +void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU + * If the GPU does not have coherency this is a no-op + * @kbdev: Device pointer + * + * This function should be called before L2 power off. + */ +void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev); + +#ifdef CONFIG_MALI_DEVFREQ +/** + * kbase_devfreq_set_core_mask - Set devfreq core mask + * @kbdev: Device pointer + * @core_mask: New core mask + * + * This function is used by devfreq to change the available core mask as + * required by Dynamic Core Scaling. + */ +void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); +#endif + +/** + * kbase_pm_reset_start_locked - Signal that GPU reset has started + * @kbdev: Device pointer + * + * Normal power management operation will be suspended until the reset has + * completed. + * + * Caller must hold hwaccess_lock. + */ +void kbase_pm_reset_start_locked(struct kbase_device *kbdev); + +/** + * kbase_pm_reset_complete - Signal that GPU reset has completed + * @kbdev: Device pointer + * + * Normal power management operation will be resumed. The power manager will + * re-evaluate what cores are needed and power on or off as required. + */ +void kbase_pm_reset_complete(struct kbase_device *kbdev); + +/** + * kbase_pm_protected_override_enable - Enable the protected mode override + * @kbdev: Device pointer + * + * When the protected mode override is enabled, all shader cores are requested + * to power down, and the L2 power state can be controlled by + * kbase_pm_protected_l2_override(). + * + * Caller must hold hwaccess_lock. + */ +void kbase_pm_protected_override_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_protected_override_disable - Disable the protected mode override + * @kbdev: Device pointer + * + * Caller must hold hwaccess_lock. + */ +void kbase_pm_protected_override_disable(struct kbase_device *kbdev); + +/** + * kbase_pm_protected_l2_override - Control the protected mode L2 override + * @kbdev: Device pointer + * @override: true to enable the override, false to disable + * + * When the driver is transitioning in or out of protected mode, the L2 cache is + * forced to power off. This can be overridden to force the L2 cache to power + * on. This is required to change coherency settings on some GPUs. + */ +void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override); + +/** + * kbase_pm_protected_entry_override_enable - Enable the protected mode entry + * override + * @kbdev: Device pointer + * + * Initiate a GPU reset and enable the protected mode entry override flag if + * l2_always_on WA is enabled and platform is fully coherent. If the GPU + * reset is already ongoing then protected mode entry override flag will not + * be enabled and function will have to be called again. + * + * When protected mode entry override flag is enabled to power down L2 via GPU + * reset, the GPU reset handling behavior gets changed. For example call to + * kbase_backend_reset() is skipped, Hw counters are not re-enabled and L2 + * isn't powered up again post reset. + * This is needed only as a workaround for a Hw issue where explicit power down + * of L2 causes a glitch. For entering protected mode on fully coherent + * platforms L2 needs to be powered down to switch to IO coherency mode, so to + * avoid the glitch GPU reset is used to power down L2. Hence, this function + * does nothing on systems where the glitch issue isn't present. + * + * Caller must hold hwaccess_lock. Should be only called during the transition + * to enter protected mode. + * + * Return: -EAGAIN if a GPU reset was required for the glitch workaround but + * was already ongoing, otherwise 0. + */ +int kbase_pm_protected_entry_override_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_protected_entry_override_disable - Disable the protected mode entry + * override + * @kbdev: Device pointer + * + * This shall be called once L2 has powered down and switch to IO coherency + * mode has been made. As with kbase_pm_protected_entry_override_enable(), + * this function does nothing on systems where the glitch issue isn't present. + * + * Caller must hold hwaccess_lock. Should be only called during the transition + * to enter protected mode. + */ +void kbase_pm_protected_entry_override_disable(struct kbase_device *kbdev); + +/* If true, the driver should explicitly control corestack power management, + * instead of relying on the Power Domain Controller. + */ +extern bool corestack_driver_control; + +/** + * kbase_pm_is_l2_desired - Check whether l2 is desired + * + * @kbdev: Device pointer + * + * This shall be called to check whether l2 is needed to power on + * + * Return: true if l2 need to power on + */ +bool kbase_pm_is_l2_desired(struct kbase_device *kbdev); + +/** + * kbase_pm_lock - Lock all necessary mutexes to perform PM actions + * + * @kbdev: Device pointer + * + * This function locks correct mutexes independent of GPU architecture. + */ +static inline void kbase_pm_lock(struct kbase_device *kbdev) +{ + mutex_lock(&kbdev->js_data.runpool_mutex); + mutex_lock(&kbdev->pm.lock); +} + +/** + * kbase_pm_unlock - Unlock mutexes locked by kbase_pm_lock + * + * @kbdev: Device pointer + */ +static inline void kbase_pm_unlock(struct kbase_device *kbdev) +{ + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&kbdev->js_data.runpool_mutex); +} + +#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_l2_states.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_l2_states.h new file mode 100644 index 000000000000..12cb051db42a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_l2_states.h @@ -0,0 +1,38 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Backend-specific Power Manager level 2 cache state definitions. + * The function-like macro KBASEP_L2_STATE() must be defined before including + * this header file. This header file can be included multiple times in the + * same compilation unit with different definitions of KBASEP_L2_STATE(). + */ +KBASEP_L2_STATE(OFF) +KBASEP_L2_STATE(PEND_ON) +KBASEP_L2_STATE(RESTORE_CLOCKS) +KBASEP_L2_STATE(ON_HWCNT_ENABLE) +KBASEP_L2_STATE(ON) +KBASEP_L2_STATE(ON_HWCNT_DISABLE) +KBASEP_L2_STATE(SLOW_DOWN_CLOCKS) +KBASEP_L2_STATE(POWER_DOWN) +KBASEP_L2_STATE(PEND_OFF) +KBASEP_L2_STATE(RESET_WAIT) diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_metrics.c new file mode 100644 index 000000000000..390bda8b0597 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_metrics.c @@ -0,0 +1,331 @@ +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Metrics for power management + */ + +#include +#include +#include +#include +#include +#include + +/* When VSync is being hit aim for utilisation between 70-90% */ +#define KBASE_PM_VSYNC_MIN_UTILISATION 70 +#define KBASE_PM_VSYNC_MAX_UTILISATION 90 +/* Otherwise aim for 10-40% */ +#define KBASE_PM_NO_VSYNC_MIN_UTILISATION 10 +#define KBASE_PM_NO_VSYNC_MAX_UTILISATION 40 + +/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns + * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly + * under 11s. Exceeding this will cause overflow */ +#define KBASE_PM_TIME_SHIFT 8 + +#ifdef CONFIG_MALI_MIDGARD_DVFS +static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) +{ + unsigned long flags; + struct kbasep_pm_metrics_state *metrics; + + KBASE_DEBUG_ASSERT(timer != NULL); + + metrics = container_of(timer, struct kbasep_pm_metrics_state, timer); + kbase_pm_get_dvfs_action(metrics->kbdev); + + spin_lock_irqsave(&metrics->lock, flags); + + if (metrics->timer_active) + hrtimer_start(timer, + HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period), + HRTIMER_MODE_REL); + + spin_unlock_irqrestore(&metrics->lock, flags); + + return HRTIMER_NORESTART; +} +#endif /* CONFIG_MALI_MIDGARD_DVFS */ + +int kbasep_pm_metrics_init(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + kbdev->pm.backend.metrics.kbdev = kbdev; + + kbdev->pm.backend.metrics.time_period_start = ktime_get(); + kbdev->pm.backend.metrics.gpu_active = false; + kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; + kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; + kbdev->pm.backend.metrics.active_gl_ctx[2] = 0; + + kbdev->pm.backend.metrics.values.time_busy = 0; + kbdev->pm.backend.metrics.values.time_idle = 0; + kbdev->pm.backend.metrics.values.busy_cl[0] = 0; + kbdev->pm.backend.metrics.values.busy_cl[1] = 0; + kbdev->pm.backend.metrics.values.busy_gl = 0; + + spin_lock_init(&kbdev->pm.backend.metrics.lock); + + /* MALI_SEC_INTEGRATION */ + if (kbdev->vendor_callbacks->pm_metrics_init) + kbdev->vendor_callbacks->pm_metrics_init(kbdev); + else { +#ifdef CONFIG_MALI_MIDGARD_DVFS + hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + kbdev->pm.backend.metrics.timer.function = dvfs_callback; + + kbase_pm_metrics_start(kbdev); +#endif /* CONFIG_MALI_MIDGARD_DVFS */ + } + + /* MALI_SEC_INTEGRATION */ + if (kbdev->vendor_callbacks->cl_boost_init) + kbdev->vendor_callbacks->cl_boost_init(kbdev); + + return 0; +} +KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); + +void kbasep_pm_metrics_term(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_MIDGARD_DVFS + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbdev->pm.backend.metrics.timer_active = false; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + + hrtimer_cancel(&kbdev->pm.backend.metrics.timer); +#endif /* CONFIG_MALI_MIDGARD_DVFS */ + + /* MALI_SEC_INTEGRATION */ + if (kbdev->vendor_callbacks->pm_metrics_term) + kbdev->vendor_callbacks->pm_metrics_term(kbdev); +} + +KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); + +/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this + * function + */ +static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, + ktime_t now) +{ + ktime_t diff; + + lockdep_assert_held(&kbdev->pm.backend.metrics.lock); + + diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); + if (ktime_to_ns(diff) < 0) + return; + + if (kbdev->pm.backend.metrics.gpu_active) { + u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + + kbdev->pm.backend.metrics.values.time_busy += ns_time; + if (kbdev->pm.backend.metrics.active_cl_ctx[0]) + kbdev->pm.backend.metrics.values.busy_cl[0] += ns_time; + if (kbdev->pm.backend.metrics.active_cl_ctx[1]) + kbdev->pm.backend.metrics.values.busy_cl[1] += ns_time; + if (kbdev->pm.backend.metrics.active_gl_ctx[0]) + kbdev->pm.backend.metrics.values.busy_gl += ns_time; + if (kbdev->pm.backend.metrics.active_gl_ctx[1]) + kbdev->pm.backend.metrics.values.busy_gl += ns_time; + if (kbdev->pm.backend.metrics.active_gl_ctx[2]) + kbdev->pm.backend.metrics.values.busy_gl += ns_time; + } else { + kbdev->pm.backend.metrics.values.time_idle += (u32) (ktime_to_ns(diff) + >> KBASE_PM_TIME_SHIFT); + } + + kbdev->pm.backend.metrics.time_period_start = now; +} + +#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) +void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, + struct kbasep_pm_metrics *last, + struct kbasep_pm_metrics *diff) +{ + struct kbasep_pm_metrics *cur = &kbdev->pm.backend.metrics.values; + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get()); + + memset(diff, 0, sizeof(*diff)); + diff->time_busy = cur->time_busy - last->time_busy; + diff->time_idle = cur->time_idle - last->time_idle; + diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0]; + diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1]; + diff->busy_gl = cur->busy_gl - last->busy_gl; + + *last = *cur; + + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} +KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics); +#endif + +#ifdef CONFIG_MALI_MIDGARD_DVFS +void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) +{ + int utilisation, util_gl_share; + int util_cl_share[2]; + int busy; + struct kbasep_pm_metrics *diff; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + diff = &kbdev->pm.backend.metrics.dvfs_diff; + + kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff); + + utilisation = (100 * diff->time_busy) / + max(diff->time_busy + diff->time_idle, 1u); + + busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u); + util_gl_share = (100 * diff->busy_gl) / busy; + util_cl_share[0] = (100 * diff->busy_cl[0]) / busy; + util_cl_share[1] = (100 * diff->busy_cl[1]) / busy; + + kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share); +} + +bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) +{ + bool isactive; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + isactive = kbdev->pm.backend.metrics.timer_active; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + + return isactive; +} +KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); + +void kbase_pm_metrics_start(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbdev->pm.backend.metrics.timer_active = true; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + hrtimer_start(&kbdev->pm.backend.metrics.timer, + HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), + HRTIMER_MODE_REL); +} + +void kbase_pm_metrics_stop(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbdev->pm.backend.metrics.timer_active = false; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + hrtimer_cancel(&kbdev->pm.backend.metrics.timer); +} + + +#endif /* CONFIG_MALI_MIDGARD_DVFS */ + +/** + * kbase_pm_metrics_active_calc - Update PM active counts based on currently + * running atoms + * @kbdev: Device pointer + * + * The caller must hold kbdev->pm.backend.metrics.lock + */ +static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) +{ + int js; + + lockdep_assert_held(&kbdev->pm.backend.metrics.lock); + + kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; + kbdev->pm.backend.metrics.active_gl_ctx[2] = 0; + kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; + kbdev->pm.backend.metrics.gpu_active = false; + + for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); + + /* Head atom may have just completed, so if it isn't running + * then try the next atom */ + if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) + katom = kbase_gpu_inspect(kbdev, js, 1); + + if (katom && katom->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED) { + if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { + int device_nr = (katom->core_req & + BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) + ? katom->device_nr : 0; + if (!WARN_ON(device_nr >= 2)) + kbdev->pm.backend.metrics. + active_cl_ctx[device_nr] = 1; + } else { + kbdev->pm.backend.metrics.active_gl_ctx[js] = 1; + trace_sysgraph(SGR_ACTIVE, 0, js); + } + kbdev->pm.backend.metrics.gpu_active = true; + } else { + trace_sysgraph(SGR_INACTIVE, 0, js); + } + } +} + +/* called when job is submitted to or removed from a GPU slot */ +void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) +{ + unsigned long flags; + ktime_t now; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + + if (!timestamp) { + now = ktime_get(); + timestamp = &now; + } + + /* Track how long CL and/or GL jobs have been busy for */ + kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp); + + kbase_pm_metrics_active_calc(kbdev); + + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_policy.c new file mode 100644 index 000000000000..17ed21e2fa35 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_policy.c @@ -0,0 +1,249 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Power policy API implementations + */ + +#include +#include +#include +#include + +static const struct kbase_pm_policy *const all_policy_list[] = { +#ifdef CONFIG_MALI_NO_MALI + &kbase_pm_always_on_policy_ops, + &kbase_pm_coarse_demand_policy_ops, +#if !MALI_CUSTOMER_RELEASE + &kbase_pm_always_on_demand_policy_ops, +#endif +#else /* CONFIG_MALI_NO_MALI */ + &kbase_pm_coarse_demand_policy_ops, +#if !MALI_CUSTOMER_RELEASE + &kbase_pm_always_on_demand_policy_ops, +#endif + &kbase_pm_always_on_policy_ops +#endif /* CONFIG_MALI_NO_MALI */ +}; + +void kbase_pm_policy_init(struct kbase_device *kbdev) +{ + kbdev->pm.backend.pm_current_policy = all_policy_list[0]; + kbdev->pm.backend.pm_current_policy->init(kbdev); +} + +void kbase_pm_policy_term(struct kbase_device *kbdev) +{ + kbdev->pm.backend.pm_current_policy->term(kbdev); +} + +void kbase_pm_update_active(struct kbase_device *kbdev) +{ + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; + unsigned long flags; + bool active; + + lockdep_assert_held(&pm->lock); + + /* pm_current_policy will never be NULL while pm.lock is held */ + KBASE_DEBUG_ASSERT(backend->pm_current_policy); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + active = backend->pm_current_policy->get_core_active(kbdev); + WARN((kbase_pm_is_active(kbdev) && !active), + "GPU is active but policy '%s' is indicating that it can be powered off", + kbdev->pm.backend.pm_current_policy->name); + + if (active) { + /* Power on the GPU and any cores requested by the policy */ + if (!pm->backend.invoke_poweroff_wait_wq_when_l2_off && + pm->backend.poweroff_wait_in_progress) { + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + pm->backend.poweron_required = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else { + /* Cancel the invocation of + * kbase_pm_gpu_poweroff_wait_wq() from the L2 state + * machine. This is safe - it + * invoke_poweroff_wait_wq_when_l2_off is true, then + * the poweroff work hasn't even been queued yet, + * meaning we can go straight to powering on. + */ + pm->backend.invoke_poweroff_wait_wq_when_l2_off = false; + pm->backend.poweroff_wait_in_progress = false; + pm->backend.l2_desired = true; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_pm_do_poweron(kbdev, false); + } + } else { + /* It is an error for the power policy to power off the GPU + * when there are contexts active */ + KBASE_DEBUG_ASSERT(pm->active_count == 0); + + pm->backend.poweron_required = false; + + /* Request power off */ + if (pm->backend.gpu_powered) { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Power off the GPU immediately */ + kbase_pm_do_poweroff(kbdev); + } else { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + } +} + +void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev) +{ + bool shaders_desired; + + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->pm.lock); + + if (kbdev->pm.backend.pm_current_policy == NULL) + return; + if (kbdev->pm.backend.poweroff_wait_in_progress) + return; + /* In protected transition, don't allow outside shader core request + * affect transition, return directly + */ + if (kbdev->pm.backend.protected_transition_override) + return; + + shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev); + + if (shaders_desired && kbase_pm_is_l2_desired(kbdev)) { + kbase_pm_update_state(kbdev); + } +} + +void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) +{ + bool shaders_desired; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbdev->pm.backend.pm_current_policy == NULL) + return; + if (kbdev->pm.backend.poweroff_wait_in_progress) + return; + + if (kbdev->pm.backend.protected_transition_override) + /* We are trying to change in/out of protected mode - force all + * cores off so that the L2 powers down */ + shaders_desired = false; + else + shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev); + + if (kbdev->pm.backend.shaders_desired != shaders_desired) { + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, kbdev->pm.backend.shaders_desired); + + kbdev->pm.backend.shaders_desired = shaders_desired; + kbase_pm_update_state(kbdev); + } +} + +void kbase_pm_update_cores_state(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kbase_pm_update_cores_state_nolock(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +int kbase_pm_list_policies(struct kbase_device *kbdev, + const struct kbase_pm_policy * const **list) +{ + if (list) + *list = all_policy_list; + + return ARRAY_SIZE(all_policy_list); +} + +KBASE_EXPORT_TEST_API(kbase_pm_list_policies); + +const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + return kbdev->pm.backend.pm_current_policy; +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_policy); + +void kbase_pm_set_policy(struct kbase_device *kbdev, + const struct kbase_pm_policy *new_policy) +{ + const struct kbase_pm_policy *old_policy; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(new_policy != NULL); + + KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id); + + /* During a policy change we pretend the GPU is active */ + /* A suspend won't happen here, because we're in a syscall from a + * userspace thread */ + kbase_pm_context_active(kbdev); + + kbase_pm_lock(kbdev); + + /* Remove the policy to prevent IRQ handlers from working on it */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + old_policy = kbdev->pm.backend.pm_current_policy; + kbdev->pm.backend.pm_current_policy = NULL; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + KBASE_KTRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, old_policy->id); + if (old_policy->term) + old_policy->term(kbdev); + + KBASE_KTRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, new_policy->id); + if (new_policy->init) + new_policy->init(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.pm_current_policy = new_policy; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* If any core power state changes were previously attempted, but + * couldn't be made because the policy was changing (current_policy was + * NULL), then re-try them here. */ + kbase_pm_update_active(kbdev); + kbase_pm_update_cores_state(kbdev); + + kbase_pm_unlock(kbdev); + + /* Now the policy change is finished, we release our fake context active + * reference */ + kbase_pm_context_idle(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_set_policy); diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_policy.h new file mode 100644 index 000000000000..f103ef0c01e4 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_policy.h @@ -0,0 +1,106 @@ +/* + * + * (C) COPYRIGHT 2010-2015, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Power policy API definitions + */ + +#ifndef _KBASE_PM_POLICY_H_ +#define _KBASE_PM_POLICY_H_ + +/** + * kbase_pm_policy_init - Initialize power policy framework + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Must be called before calling any other policy function + */ +void kbase_pm_policy_init(struct kbase_device *kbdev); + +/** + * kbase_pm_policy_term - Terminate power policy framework + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_policy_term(struct kbase_device *kbdev); + +/** + * kbase_pm_update_active - Update the active power state of the GPU + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Calls into the current power policy + */ +void kbase_pm_update_active(struct kbase_device *kbdev); + +/** + * kbase_pm_update_cores - Update the desired core state of the GPU + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Calls into the current power policy + */ +void kbase_pm_update_cores(struct kbase_device *kbdev); + +/** + * kbase_pm_cores_requested - Check that a power request has been locked into + * the HW. + * @kbdev: Kbase device + * @shader_required: true if shaders are required + * + * Called by the scheduler to check if a power on request has been locked into + * the HW. + * + * Note that there is no guarantee that the cores are actually ready, however + * when the request has been locked into the HW, then it is safe to submit work + * since the HW will wait for the transition to ready. + * + * A reference must first be taken prior to making this call. + * + * Caller must hold the hwaccess_lock. + * + * Return: true if the request to the HW was successfully made else false if the + * request is still pending. + */ +static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev, + bool shader_required) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* If the L2 & tiler are not on or pending, then the tiler is not yet + * available, and shaders are definitely not powered. + */ + if (kbdev->pm.backend.l2_state != KBASE_L2_PEND_ON && + kbdev->pm.backend.l2_state != KBASE_L2_ON && + kbdev->pm.backend.l2_state != KBASE_L2_ON_HWCNT_ENABLE) + return false; + + if (shader_required && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON_RECHECK) + return false; + + return true; +} + +#endif /* _KBASE_PM_POLICY_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_shader_states.h b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_shader_states.h new file mode 100644 index 000000000000..2bd9e4798e93 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_pm_shader_states.h @@ -0,0 +1,43 @@ +/* + * + * (C) COPYRIGHT 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Backend-specific Power Manager shader core state definitions. + * The function-like macro KBASEP_SHADER_STATE() must be defined before + * including this header file. This header file can be included multiple + * times in the same compilation unit with different definitions of + * KBASEP_SHADER_STATE(). + */ +KBASEP_SHADER_STATE(OFF_CORESTACK_OFF) +KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_ON) +KBASEP_SHADER_STATE(PEND_ON_CORESTACK_ON) +KBASEP_SHADER_STATE(ON_CORESTACK_ON) +KBASEP_SHADER_STATE(ON_CORESTACK_ON_RECHECK) +KBASEP_SHADER_STATE(WAIT_OFF_CORESTACK_ON) +KBASEP_SHADER_STATE(WAIT_GPU_IDLE) +KBASEP_SHADER_STATE(WAIT_FINISHED_CORESTACK_ON) +KBASEP_SHADER_STATE(L2_FLUSHING_CORESTACK_ON) +KBASEP_SHADER_STATE(READY_OFF_CORESTACK_ON) +KBASEP_SHADER_STATE(PEND_OFF_CORESTACK_ON) +KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_OFF) +KBASEP_SHADER_STATE(OFF_CORESTACK_OFF_TIMER_PEND_OFF) +KBASEP_SHADER_STATE(RESET_WAIT) diff --git a/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_time.c new file mode 100644 index 000000000000..a9c33e25ccca --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/backend/gpu/mali_kbase_time.c @@ -0,0 +1,77 @@ +/* + * + * (C) COPYRIGHT 2014-2016,2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include + +void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, + u64 *cycle_counter, + u64 *system_time, + struct timespec64 *ts) +{ + u32 hi1, hi2; + + if (cycle_counter) { + /* Read hi, lo, hi to ensure a coherent u64 */ + do { + hi1 = kbase_reg_read(kbdev, + GPU_CONTROL_REG(CYCLE_COUNT_HI)); + *cycle_counter = kbase_reg_read(kbdev, + GPU_CONTROL_REG(CYCLE_COUNT_LO)); + hi2 = kbase_reg_read(kbdev, + GPU_CONTROL_REG(CYCLE_COUNT_HI)); + } while (hi1 != hi2); + *cycle_counter |= (((u64) hi1) << 32); + } + + if (system_time) { + /* Read hi, lo, hi to ensure a coherent u64 */ + do { + hi1 = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TIMESTAMP_HI)); + *system_time = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TIMESTAMP_LO)); + hi2 = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TIMESTAMP_HI)); + } while (hi1 != hi2); + *system_time |= (((u64) hi1) << 32); + } + + /* Record the CPU's idea of current time */ + if (ts != NULL) +#if (KERNEL_VERSION(4, 17, 0) > LINUX_VERSION_CODE) + *ts = ktime_to_timespec64(ktime_get_raw()); +#else + ktime_get_raw_ts64(ts); +#endif +} + +void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, + u64 *system_time, struct timespec64 *ts) +{ + kbase_pm_request_gpu_cycle_counter(kbdev); + kbase_backend_get_gpu_time_norequest( + kbdev, cycle_counter, system_time, ts); + kbase_pm_release_gpu_cycle_counter(kbdev); +} diff --git a/drivers/gpu/arm/b_r26p0/build.bp b/drivers/gpu/arm/b_r26p0/build.bp new file mode 100644 index 000000000000..51aeecd6b006 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/build.bp @@ -0,0 +1,183 @@ +/* + * + * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +/* Kernel-side tests may include mali_kbase's headers. Therefore any config + * options which affect the sizes of any structs (e.g. adding extra members) + * must be included in these defaults, so that the structs are consistent in + * both mali_kbase and the test modules. */ +bob_defaults { + name: "mali_kbase_shared_config_defaults", + no_mali: { + kbuild_options: ["CONFIG_MALI_NO_MALI=y"], + }, + mali_real_hw: { + kbuild_options: ["CONFIG_MALI_REAL_HW=y"], + }, + mali_devfreq: { + kbuild_options: ["CONFIG_MALI_DEVFREQ=y"], + }, + mali_midgard_dvfs: { + kbuild_options: ["CONFIG_MALI_MIDGARD_DVFS=y"], + }, + mali_debug: { + kbuild_options: ["CONFIG_MALI_DEBUG=y"], + }, + buslog: { + kbuild_options: ["CONFIG_MALI_BUSLOG=y"], + }, + cinstr_vector_dump: { + kbuild_options: ["CONFIG_MALI_VECTOR_DUMP=y"], + }, + cinstr_gwt: { + kbuild_options: ["CONFIG_MALI_CINSTR_GWT=y"], + }, + mali_gator_support: { + kbuild_options: ["CONFIG_MALI_GATOR_SUPPORT=y"], + }, + mali_midgard_enable_trace: { + kbuild_options: ["CONFIG_MALI_MIDGARD_ENABLE_TRACE=y"], + }, + mali_system_trace: { + kbuild_options: ["CONFIG_MALI_SYSTEM_TRACE=y"], + }, + mali_pwrsoft_765: { + kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"], + }, + mali_memory_fully_backed: { + kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"], + }, + mali_dma_buf_map_on_demand: { + kbuild_options: ["CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y"], + }, + mali_dma_buf_legacy_compat: { + kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"], + }, + mali_arbiter_support: { + kbuild_options: ["CONFIG_MALI_ARBITER_SUPPORT=y"], + }, + mali_gem5_build: { + kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"], + }, + kbuild_options: [ + "MALI_UNIT_TEST={{.unit_test_code}}", + "MALI_CUSTOMER_RELEASE={{.release}}", + "MALI_USE_CSF={{.gpu_has_csf}}", + "MALI_KERNEL_TEST_API={{.debug}}", + ], + defaults: ["kernel_defaults"], +} + +bob_kernel_module { + name: "mali_kbase", + srcs: [ + "*.c", + "*.h", + "Kbuild", + "backend/gpu/*.c", + "backend/gpu/*.h", + "backend/gpu/Kbuild", + "context/*.c", + "context/*.h", + "ipa/*.c", + "ipa/*.h", + "ipa/Kbuild", + "platform/*.h", + "platform/*/*.c", + "platform/*/*.h", + "platform/*/Kbuild", + "thirdparty/*.c", + "debug/*.c", + "debug/*.h", + "device/*.c", + "device/*.h", + "gpu/*.c", + "gpu/*.h", + "tl/*.c", + "tl/*.h", + "mmu/*.c", + "mmu/*.h", + ], + kbuild_options: [ + "CONFIG_MALI_KUTF=n", + "CONFIG_MALI_MIDGARD=m", + "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}", + "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", + ], + buslog: { + extra_symbols: [ + "bus_logger", + ], + }, + mali_corestack: { + kbuild_options: ["CONFIG_MALI_CORESTACK=y"], + }, + mali_error_inject: { + kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"], + }, + mali_error_inject_random: { + kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"], + }, + cinstr_secondary_hwc: { + kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"], + }, + cinstr_secondary_hwc_via_debug_fs: { + kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS=y"], + }, + mali_2mb_alloc: { + kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"], + }, + mali_hw_errata_1485982_not_affected: { + kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y"], + }, + mali_hw_errata_1485982_use_clock_alternative: { + kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE=y"], + }, + gpu_has_job_manager: { + srcs: [ + "context/backend/*_jm.c", + "debug/backend/*_jm.c", + "debug/backend/*_jm.h", + "device/backend/*_jm.c", + "gpu/backend/*_jm.c", + "gpu/backend/*_jm.h", + "jm/*.h", + "tl/backend/*_jm.c", + "mmu/backend/*_jm.c", + ], + }, + gpu_has_csf: { + srcs: [ + "context/backend/*_csf.c", + "csf/*.c", + "csf/*.h", + "csf/Kbuild", + "debug/backend/*_csf.c", + "debug/backend/*_csf.h", + "device/backend/*_csf.c", + "gpu/backend/*_csf.c", + "gpu/backend/*_csf.h", + "tl/backend/*_csf.c", + "mmu/backend/*_csf.c", + ], + }, + mali_arbiter_support: { + srcs: [ + "arbiter/*.c", + "arbiter/*.h", + "arbiter/Kbuild", + ], + }, + defaults: ["mali_kbase_shared_config_defaults"], +} diff --git a/drivers/gpu/arm/b_r26p0/context/backend/mali_kbase_context_jm.c b/drivers/gpu/arm/b_r26p0/context/backend/mali_kbase_context_jm.c new file mode 100644 index 000000000000..818ae95b5829 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/context/backend/mali_kbase_context_jm.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel context APIs for Job Manager GPUs + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_DEBUG_FS +#include +#include + +void kbase_context_debugfs_init(struct kbase_context *const kctx) +{ + kbase_debug_mem_view_init(kctx); + kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); + kbase_jit_debugfs_init(kctx); + kbasep_jd_debugfs_ctx_init(kctx); + kbase_debug_job_fault_context_init(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); + +void kbase_context_debugfs_term(struct kbase_context *const kctx) +{ + debugfs_remove_recursive(kctx->kctx_dentry); + kbase_debug_job_fault_context_term(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); +#else +void kbase_context_debugfs_init(struct kbase_context *const kctx) +{ + CSTD_UNUSED(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); + +void kbase_context_debugfs_term(struct kbase_context *const kctx) +{ + CSTD_UNUSED(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); +#endif /* CONFIG_DEBUG_FS */ + +static int kbase_context_kbase_kinstr_jm_init(struct kbase_context *kctx) +{ + int ret = kbase_kinstr_jm_init(&kctx->kinstr_jm); + + if (!ret) + return ret; + + return 0; +} + +static void kbase_context_kbase_kinstr_jm_term(struct kbase_context *kctx) +{ + kbase_kinstr_jm_term(kctx->kinstr_jm); +} + +static int kbase_context_kbase_timer_setup(struct kbase_context *kctx) +{ + kbase_timer_setup(&kctx->soft_job_timeout, + kbasep_soft_job_timeout_worker); + + return 0; +} + +static int kbase_context_submit_check(struct kbase_context *kctx) +{ + struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; + unsigned long irq_flags = 0; + + base_context_create_flags const flags = kctx->create_flags; + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); + + /* Translate the flags */ + if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) + kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); + + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return 0; +} + +static const struct kbase_context_init context_init[] = { + {kbase_context_common_init, kbase_context_common_term, NULL}, + {kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term, + "Memory pool goup initialization failed"}, + {kbase_mem_evictable_init, kbase_mem_evictable_deinit, + "Memory evictable initialization failed"}, + {kbasep_js_kctx_init, kbasep_js_kctx_term, + "JS kctx initialization failed"}, + {kbase_jd_init, kbase_jd_exit, + "JD initialization failed"}, + {kbase_event_init, kbase_event_cleanup, + "Event initialization failed"}, + {kbase_dma_fence_init, kbase_dma_fence_term, + "DMA fence initialization failed"}, + {kbase_context_mmu_init, kbase_context_mmu_term, + "MMU initialization failed"}, + {kbase_context_mem_alloc_page, kbase_context_mem_pool_free, + "Memory alloc page failed"}, + {kbase_region_tracker_init, kbase_region_tracker_term, + "Region tracker initialization failed"}, + {kbase_sticky_resource_init, kbase_context_sticky_resource_term, + "Sticky resource initialization failed"}, + {kbase_jit_init, kbase_jit_term, + "JIT initialization failed"}, + {kbase_context_kbase_kinstr_jm_init, kbase_context_kbase_kinstr_jm_term, + "JM instrumentation initialization failed"}, + {kbase_context_kbase_timer_setup, NULL, NULL}, + {kbase_context_submit_check, NULL, NULL}, +}; + +static void kbase_context_term_partial( + struct kbase_context *kctx, + unsigned int i) +{ + while (i-- > 0) { + if (context_init[i].term) + context_init[i].term(kctx); + } +} + +struct kbase_context *kbase_create_context(struct kbase_device *kbdev, + bool is_compat, + base_context_create_flags const flags, + unsigned long const api_version, + struct file *const filp) +{ + struct kbase_context *kctx; + unsigned int i = 0; + + if (WARN_ON(!kbdev)) + return NULL; + + /* Validate flags */ + if (WARN_ON(flags != (flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS))) + return NULL; + + /* zero-inited as lot of code assume it's zero'ed out on create */ + kctx = vzalloc(sizeof(*kctx)); + if (WARN_ON(!kctx)) + return NULL; + + kctx->kbdev = kbdev; + kctx->api_version = api_version; + kctx->filp = filp; + kctx->create_flags = flags; + + if (is_compat) + kbase_ctx_flag_set(kctx, KCTX_COMPAT); +#if defined(CONFIG_64BIT) + else + kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); +#endif /* !defined(CONFIG_64BIT) */ + + for (i = 0; i < ARRAY_SIZE(context_init); i++) { + int err = context_init[i].init(kctx); + + if (err) { + dev_err(kbdev->dev, "%s error = %d\n", + context_init[i].err_mes, err); + kbase_context_term_partial(kctx, i); + return NULL; + } + } + + /* MALI_SEC_INTEGRATION */ + if (kbdev->vendor_callbacks->create_context) + kbdev->vendor_callbacks->create_context(kctx); + + /* MALI_SEC_INTEGRATION */ + atomic_set(&kctx->mem_profile_showing_state, 0); + init_waitqueue_head(&kctx->mem_profile_wait); + + return kctx; +} +KBASE_EXPORT_SYMBOL(kbase_create_context); + +void kbase_destroy_context(struct kbase_context *kctx) +{ + struct kbase_device *kbdev; + + if (WARN_ON(!kctx)) + return; + + kbdev = kctx->kbdev; + if (WARN_ON(!kbdev)) + return; + + /* Ensure the core is powered up for the destroy process + * A suspend won't happen here, because we're in a syscall + * from a userspace thread. + */ + kbase_pm_context_active(kbdev); + + kbase_mem_pool_group_mark_dying(&kctx->mem_pools); + + kbase_jd_zap_context(kctx); + flush_workqueue(kctx->jctx.job_done_wq); + + /* MALI_SEC_INTEGRATION */ + if (kbdev->vendor_callbacks->destroy_context) + kbdev->vendor_callbacks->destroy_context(kctx); + + kbase_context_term_partial(kctx, ARRAY_SIZE(context_init)); + + kbase_pm_context_idle(kbdev); +} +KBASE_EXPORT_SYMBOL(kbase_destroy_context); diff --git a/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c b/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c new file mode 100644 index 000000000000..10211ba60805 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel context APIs + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * find_process_node - Used to traverse the process rb_tree to find if + * process exists already in process rb_tree. + * + * @node: Pointer to root node to start search. + * @tgid: Thread group PID to search for. + * + * Return: Pointer to kbase_process if exists otherwise NULL. + */ +static struct kbase_process *find_process_node(struct rb_node *node, pid_t tgid) +{ + struct kbase_process *kprcs = NULL; + + /* Check if the kctx creation request is from a existing process.*/ + while (node) { + struct kbase_process *prcs_node = + rb_entry(node, struct kbase_process, kprcs_node); + if (prcs_node->tgid == tgid) { + kprcs = prcs_node; + break; + } + + if (tgid < prcs_node->tgid) + node = node->rb_left; + else + node = node->rb_right; + } + + return kprcs; +} + +/** + * kbase_insert_kctx_to_process - Initialise kbase process context. + * + * @kctx: Pointer to kbase context. + * + * Here we initialise per process rb_tree managed by kbase_device. + * We maintain a rb_tree of each unique process that gets created. + * and Each process maintains a list of kbase context. + * This setup is currently used by kernel trace functionality + * to trace and visualise gpu memory consumption. + * + * Return: 0 on success and error number on failure. + */ +static int kbase_insert_kctx_to_process(struct kbase_context *kctx) +{ + struct rb_root *const prcs_root = &kctx->kbdev->process_root; + const pid_t tgid = kctx->tgid; + struct kbase_process *kprcs = NULL; + + lockdep_assert_held(&kctx->kbdev->kctx_list_lock); + + kprcs = find_process_node(prcs_root->rb_node, tgid); + + /* if the kctx is from new process then create a new kbase_process + * and add it to the &kbase_device->rb_tree + */ + if (!kprcs) { + struct rb_node **new = &prcs_root->rb_node, *parent = NULL; + + kprcs = kzalloc(sizeof(*kprcs), GFP_KERNEL); + if (kprcs == NULL) + return -ENOMEM; + kprcs->tgid = tgid; + INIT_LIST_HEAD(&kprcs->kctx_list); + kprcs->dma_buf_root = RB_ROOT; + kprcs->total_gpu_pages = 0; + + while (*new) { + struct kbase_process *prcs_node; + + parent = *new; + prcs_node = rb_entry(parent, struct kbase_process, + kprcs_node); + if (tgid < prcs_node->tgid) + new = &(*new)->rb_left; + else + new = &(*new)->rb_right; + } + rb_link_node(&kprcs->kprcs_node, parent, new); + rb_insert_color(&kprcs->kprcs_node, prcs_root); + } + + kctx->kprcs = kprcs; + list_add(&kctx->kprcs_link, &kprcs->kctx_list); + + return 0; +} + +int kbase_context_common_init(struct kbase_context *kctx) +{ + const unsigned long cookies_mask = KBASE_COOKIE_MASK; + int err = 0; + + /* creating a context is considered a disjoint event */ + kbase_disjoint_event(kctx->kbdev); + + kctx->as_nr = KBASEP_AS_NR_INVALID; + + atomic_set(&kctx->refcount, 0); + + spin_lock_init(&kctx->mm_update_lock); + kctx->process_mm = NULL; + atomic_set(&kctx->nonmapped_pages, 0); + atomic_set(&kctx->permanent_mapped_pages, 0); + kctx->tgid = current->tgid; + kctx->pid = current->pid; + + atomic_set(&kctx->used_pages, 0); + + mutex_init(&kctx->reg_lock); + + spin_lock_init(&kctx->mem_partials_lock); + INIT_LIST_HEAD(&kctx->mem_partials); + + spin_lock_init(&kctx->waiting_soft_jobs_lock); + INIT_LIST_HEAD(&kctx->waiting_soft_jobs); + + init_waitqueue_head(&kctx->event_queue); + atomic_set(&kctx->event_count, 0); + atomic_set(&kctx->event_closed, false); + + bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG); + +#ifdef CONFIG_GPU_TRACEPOINTS + atomic_set(&kctx->jctx.work_id, 0); +#endif + + kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1; + + mutex_init(&kctx->legacy_hwcnt_lock); + + mutex_lock(&kctx->kbdev->kctx_list_lock); + list_add(&kctx->kctx_list_link, &kctx->kbdev->kctx_list); + + err = kbase_insert_kctx_to_process(kctx); + if (err) + dev_err(kctx->kbdev->dev, + "(err:%d) failed to insert kctx to kbase_process\n", err); + + KBASE_TLSTREAM_TL_KBASE_NEW_CTX(kctx->kbdev, kctx->id, + kctx->kbdev->gpu_props.props.raw_props.gpu_id); + KBASE_TLSTREAM_TL_NEW_CTX(kctx->kbdev, kctx, kctx->id, + (u32)(kctx->tgid)); + mutex_unlock(&kctx->kbdev->kctx_list_lock); + + return err; +} + +/** + * kbase_remove_kctx_from_process - remove a terminating context from + * the process list. + * + * @kctx: Pointer to kbase context. + * + * Remove the tracking of context from the list of contexts maintained under + * kbase process and if the list if empty then there no outstanding contexts + * we can remove the process node as well. + */ + +static void kbase_remove_kctx_from_process(struct kbase_context *kctx) +{ + struct kbase_process *kprcs = kctx->kprcs; + + lockdep_assert_held(&kctx->kbdev->kctx_list_lock); + list_del(&kctx->kprcs_link); + + /* if there are no outstanding contexts in current process node, + * we can remove it from the process rb_tree. + */ + if (list_empty(&kprcs->kctx_list)) { + rb_erase(&kprcs->kprcs_node, &kctx->kbdev->process_root); + /* Add checks, so that the terminating process Should not + * hold any gpu_memory. + */ + WARN_ON(kprcs->total_gpu_pages); + WARN_ON(!RB_EMPTY_ROOT(&kprcs->dma_buf_root)); + kfree(kprcs); + } +} + +void kbase_context_common_term(struct kbase_context *kctx) +{ + unsigned long flags; + int pages; + + mutex_lock(&kctx->kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); + kbase_ctx_sched_remove_ctx(kctx); + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); + mutex_unlock(&kctx->kbdev->mmu_hw_mutex); + + pages = atomic_read(&kctx->used_pages); + if (pages != 0) + dev_warn(kctx->kbdev->dev, + "%s: %d pages in use!\n", __func__, pages); + + WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); + + mutex_lock(&kctx->kbdev->kctx_list_lock); + kbase_remove_kctx_from_process(kctx); + + KBASE_TLSTREAM_TL_KBASE_DEL_CTX(kctx->kbdev, kctx->id); + + KBASE_TLSTREAM_TL_DEL_CTX(kctx->kbdev, kctx); + list_del(&kctx->kctx_list_link); + mutex_unlock(&kctx->kbdev->kctx_list_lock); + + KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, kctx->tgid); + + /* Flush the timeline stream, so the user can see the termination + * tracepoints being fired. + * The "if" statement below is for optimization. It is safe to call + * kbase_timeline_streams_flush when timeline is disabled. + */ + if (atomic_read(&kctx->kbdev->timeline_flags) != 0) + kbase_timeline_streams_flush(kctx->kbdev->timeline); + + vfree(kctx); +} + +int kbase_context_mem_pool_group_init(struct kbase_context *kctx) +{ + return kbase_mem_pool_group_init(&kctx->mem_pools, + kctx->kbdev, + &kctx->kbdev->mem_pool_defaults, + &kctx->kbdev->mem_pools); +} + +void kbase_context_mem_pool_group_term(struct kbase_context *kctx) +{ + kbase_mem_pool_group_term(&kctx->mem_pools); +} + +int kbase_context_mmu_init(struct kbase_context *kctx) +{ + kbase_mmu_init(kctx->kbdev, + &kctx->mmu, kctx, + base_context_mmu_group_id_get(kctx->create_flags)); + + return 0; +} + +void kbase_context_mmu_term(struct kbase_context *kctx) +{ + kbase_mmu_term(kctx->kbdev, &kctx->mmu); +} + +int kbase_context_mem_alloc_page(struct kbase_context *kctx) +{ + struct page *p; + + p = kbase_mem_alloc_page(&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK]); + if (!p) + return -ENOMEM; + + kctx->aliasing_sink_page = as_tagged(page_to_phys(p)); + + return 0; +} + +void kbase_context_mem_pool_free(struct kbase_context *kctx) +{ + /* drop the aliasing sink page now that it can't be mapped anymore */ + kbase_mem_pool_free( + &kctx->mem_pools.small[KBASE_MEM_GROUP_SINK], + as_page(kctx->aliasing_sink_page), + false); +} + +void kbase_context_sticky_resource_term(struct kbase_context *kctx) +{ + unsigned long pending_regions_to_clean; + + kbase_gpu_vm_lock(kctx); + kbase_sticky_resource_term(kctx); + + /* free pending region setups */ + pending_regions_to_clean = KBASE_COOKIE_MASK; + bitmap_andnot(&pending_regions_to_clean, &pending_regions_to_clean, + kctx->cookies, BITS_PER_LONG); + while (pending_regions_to_clean) { + unsigned int cookie = find_first_bit(&pending_regions_to_clean, + BITS_PER_LONG); + + if (!WARN_ON(!kctx->pending_regions[cookie])) { + dev_dbg(kctx->kbdev->dev, "Freeing pending unmapped region\n"); + kbase_mem_phy_alloc_put( + kctx->pending_regions[cookie]->cpu_alloc); + kbase_mem_phy_alloc_put( + kctx->pending_regions[cookie]->gpu_alloc); + kfree(kctx->pending_regions[cookie]); + + kctx->pending_regions[cookie] = NULL; + } + + bitmap_clear(&pending_regions_to_clean, cookie, 1); + } + kbase_gpu_vm_unlock(kctx); +} diff --git a/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.h b/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.h new file mode 100644 index 000000000000..e4ed8944bdd2 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.h @@ -0,0 +1,157 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2011-2017, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + */ + +#ifndef _KBASE_CONTEXT_H_ +#define _KBASE_CONTEXT_H_ + +#include + +/** + * kbase_context_debugfs_init - Initialize the kctx platform + * specific debugfs + * + * @kctx: kbase context + * + * This initializes some debugfs interfaces specific to the platform the source + * is compiled for. + */ +void kbase_context_debugfs_init(struct kbase_context *const kctx); + +/** + * kbase_context_debugfs_term - Terminate the kctx platform + * specific debugfs + * + * @kctx: kbase context + * + * This terminates some debugfs interfaces specific to the platform the source + * is compiled for. + */ +void kbase_context_debugfs_term(struct kbase_context *const kctx); + +/** + * kbase_create_context() - Create a kernel base context. + * + * @kbdev: Object representing an instance of GPU platform device, + * allocated from the probe method of the Mali driver. + * @is_compat: Force creation of a 32-bit context + * @flags: Flags to set, which shall be any combination of + * BASEP_CONTEXT_CREATE_KERNEL_FLAGS. + * @api_version: Application program interface version, as encoded in + * a single integer by the KBASE_API_VERSION macro. + * @filp: Pointer to the struct file corresponding to device file + * /dev/malixx instance, passed to the file's open method. + * + * Up to one context can be created for each client that opens the device file + * /dev/malixx. Context creation is deferred until a special ioctl() system call + * is made on the device file. Each context has its own GPU address space. + * + * Return: new kbase context or NULL on failure + */ +struct kbase_context * +kbase_create_context(struct kbase_device *kbdev, bool is_compat, + base_context_create_flags const flags, + unsigned long api_version, + struct file *filp); + +/** + * kbase_destroy_context - Destroy a kernel base context. + * @kctx: Context to destroy + * + * Will release all outstanding regions. + */ +void kbase_destroy_context(struct kbase_context *kctx); + +/** + * kbase_ctx_flag - Check if @flag is set on @kctx + * @kctx: Pointer to kbase context to check + * @flag: Flag to check + * + * Return: true if @flag is set on @kctx, false if not. + */ +static inline bool kbase_ctx_flag(struct kbase_context *kctx, + enum kbase_context_flags flag) +{ + return atomic_read(&kctx->flags) & flag; +} + +/** + * kbase_ctx_flag_clear - Clear @flag on @kctx + * @kctx: Pointer to kbase context + * @flag: Flag to clear + * + * Clear the @flag on @kctx. This is done atomically, so other flags being + * cleared or set at the same time will be safe. + * + * Some flags have locking requirements, check the documentation for the + * respective flags. + */ +static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, + enum kbase_context_flags flag) +{ +#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE + /* + * Earlier kernel versions doesn't have atomic_andnot() or + * atomic_and(). atomic_clear_mask() was only available on some + * architectures and removed on arm in v3.13 on arm and arm64. + * + * Use a compare-exchange loop to clear the flag on pre 4.3 kernels, + * when atomic_andnot() becomes available. + */ + int old, new; + + do { + old = atomic_read(&kctx->flags); + new = old & ~flag; + + } while (atomic_cmpxchg(&kctx->flags, old, new) != old); +#else + atomic_andnot(flag, &kctx->flags); +#endif +} + +/** + * kbase_ctx_flag_set - Set @flag on @kctx + * @kctx: Pointer to kbase context + * @flag: Flag to set + * + * Set the @flag on @kctx. This is done atomically, so other flags being + * cleared or set at the same time will be safe. + * + * Some flags have locking requirements, check the documentation for the + * respective flags. + */ +static inline void kbase_ctx_flag_set(struct kbase_context *kctx, + enum kbase_context_flags flag) +{ + atomic_or(flag, &kctx->flags); +} +#endif /* _KBASE_CONTEXT_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/context/mali_kbase_context_internal.h b/drivers/gpu/arm/b_r26p0/context/mali_kbase_context_internal.h new file mode 100644 index 000000000000..818cdbea960d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/context/mali_kbase_context_internal.h @@ -0,0 +1,60 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + */ + +#include + +typedef int kbase_context_init_method(struct kbase_context *kctx); +typedef void kbase_context_term_method(struct kbase_context *kctx); + +/** + * struct kbase_context_init - Device init/term methods. + * @init: Function pointer to a initialise method. + * @term: Function pointer to a terminate method. + * @err_mes: Error message to be printed when init method fails. + */ +struct kbase_context_init { + kbase_context_init_method *init; + kbase_context_term_method *term; + char *err_mes; +}; + +int kbase_context_common_init(struct kbase_context *kctx); +void kbase_context_common_term(struct kbase_context *kctx); + +int kbase_context_mem_pool_group_init(struct kbase_context *kctx); +void kbase_context_mem_pool_group_term(struct kbase_context *kctx); + +int kbase_context_mmu_init(struct kbase_context *kctx); +void kbase_context_mmu_term(struct kbase_context *kctx); + +int kbase_context_mem_alloc_page(struct kbase_context *kctx); +void kbase_context_mem_pool_free(struct kbase_context *kctx); + +void kbase_context_sticky_resource_term(struct kbase_context *kctx); diff --git a/drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_codes_jm.h b/drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_codes_jm.h new file mode 100644 index 000000000000..d534f3006c9b --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_codes_jm.h @@ -0,0 +1,170 @@ +/* + * + * (C) COPYRIGHT 2011-2015,2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** + * ***** DO NOT INCLUDE DIRECTLY ***** + * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** + */ + +/* + * The purpose of this header file is just to contain a list of trace code + * identifiers + * + * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THAT + * DESCRIBED IN mali_kbase_debug_ktrace_codes.h + */ + +#if 0 /* Dummy section to avoid breaking formatting */ +int dummy_array[] = { +#endif + + /* + * Job Slot management events + */ + /* info_val==irq rawstat at start */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_IRQ), + /* info_val==jobs processed */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_IRQ_END), + /* In the following: + * + * - ctx is set if a corresponding job found (NULL otherwise, e.g. some + * soft-stop cases) + * - uatom==kernel-side mapped uatom address (for correlation with + * user-side) + */ + /* info_val==exit code; gpu_addr==chain gpuaddr */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_JOB_DONE), + /* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of + * affinity + */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_SUBMIT), + /* gpu_addr is as follows: + * - If JS_STATUS active after soft-stop, val==gpu addr written to + * JS_HEAD on submit + * - otherwise gpu_addr==0 + */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP), + KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0), + KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1), + /* gpu_addr==JS_HEAD read */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP), + /* gpu_addr==JS_HEAD read */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP_0), + /* gpu_addr==JS_HEAD read */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP_1), + /* gpu_addr==JS_TAIL read */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD), + /* gpu_addr is as follows: + * - If JS_STATUS active before soft-stop, val==JS_HEAD + * - otherwise gpu_addr==0 + */ + /* gpu_addr==JS_HEAD read */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_CHECK_HEAD), + KBASE_KTRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS), + KBASE_KTRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE), + /* info_val == is_scheduled */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED), + /* info_val == is_scheduled */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED), + KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_DONE), + /* info_val == nr jobs submitted */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP), + /* gpu_addr==JS_HEAD_NEXT last written */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_SLOT_EVICT), + KBASE_KTRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET), + KBASE_KTRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER), + KBASE_KTRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER), + /* + * Job dispatch events + */ + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE_WORKER), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB), + /* gpu_addr==0, info_val==0, uatom==0 */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_CANCEL), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER), + /* + * Scheduler Core events + */ + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_ADD_JOB), + /* gpu_addr==last value written/would be written to JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_REMOVE_JOB), + KBASE_KTRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED), + KBASE_KTRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED), + /* info_val == lower 32 bits of affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT), + /* info_val == lower 32 bits of affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED), + /* info_val == lower 32 bits of affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED), + /* info_val == lower 32 bits of rechecked affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED), + /* info_val == lower 32 bits of rechecked affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED), + /* info_val == lower 32 bits of affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE), + /* info_val == the ctx attribute now on ctx */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX), + /* info_val == the ctx attribute now on runpool */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL), + /* info_val == the ctx attribute now off ctx */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX), + /* info_val == the ctx attribute now off runpool */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL), + /* + * Scheduler Policy events + */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX), + /* info_val == whether it was evicted */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ), + /* gpu_addr==JS_HEAD to write if the job were run */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END), + +#if 0 /* Dummy section to avoid breaking formatting */ +}; +#endif + +/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ diff --git a/drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_defs_jm.h b/drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_defs_jm.h new file mode 100644 index 000000000000..b9657ca10c51 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_defs_jm.h @@ -0,0 +1,80 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_KTRACE_DEFS_JM_H_ +#define _KBASE_DEBUG_KTRACE_DEFS_JM_H_ + +/** + * DOC: KTrace version history, JM variant + * 1.0: + * - Original version (implicit, header did not carry version information) + * 2.0: + * - Introduced version information into the header + * - some changes of parameter names in header + * - trace now uses all 64-bits of info_val + * - Non-JM specific parts moved to using info_val instead of refcount/gpu_addr + */ +#define KBASE_KTRACE_VERSION_MAJOR 2 +#define KBASE_KTRACE_VERSION_MINOR 0 + +/* indicates if the trace message has a valid refcount member */ +#define KBASE_KTRACE_FLAG_JM_REFCOUNT (((kbase_ktrace_flag_t)1) << 0) +/* indicates if the trace message has a valid jobslot member */ +#define KBASE_KTRACE_FLAG_JM_JOBSLOT (((kbase_ktrace_flag_t)1) << 1) +/* indicates if the trace message has valid atom related info. */ +#define KBASE_KTRACE_FLAG_JM_ATOM (((kbase_ktrace_flag_t)1) << 2) + + +/** + * struct kbase_ktrace_backend - backend specific part of a trace message + * + * @atom_udata: Copy of the user data sent for the atom in base_jd_submit. + * Only valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags + * @gpu_addr: GPU address, usually of the job-chain represented by an atom. + * @atom_number: id of the atom for which trace message was added. Only valid + * if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags + * @code: Identifies the event, refer to enum kbase_ktrace_code. + * @flags: indicates information about the trace message itself. Used + * during dumping of the message. + * @jobslot: job-slot for which trace message was added, valid only for + * job-slot management events. + * @refcount: reference count for the context, valid for certain events + * related to scheduler core and policy. + */ +struct kbase_ktrace_backend { + /* Place 64 and 32-bit members together */ + u64 atom_udata[2]; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */ + u64 gpu_addr; + int atom_number; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */ + /* Pack smaller members together */ + /* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_EXYNOS_TRACE + enum kbase_ktrace_code code; +#else + kbase_ktrace_code_t code; +#endif + kbase_ktrace_flag_t flags; + u8 jobslot; + u8 refcount; +}; + +#endif /* _KBASE_DEBUG_KTRACE_DEFS_JM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_jm.c b/drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_jm.c new file mode 100644 index 000000000000..772795c09b93 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_jm.c @@ -0,0 +1,125 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ +#include +#include "debug/mali_kbase_debug_ktrace_internal.h" +#include "debug/backend/mali_kbase_debug_ktrace_jm.h" + +#if KBASE_KTRACE_TARGET_RBUF + +void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written) +{ + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), + "katom,gpu_addr,jobslot,refcount"), 0); +} + +void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, + char *buffer, int sz, s32 *written) +{ + /* katom */ + if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_ATOM) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + "atom %d (ud: 0x%llx 0x%llx)", + trace_msg->backend.atom_number, + trace_msg->backend.atom_udata[0], + trace_msg->backend.atom_udata[1]), 0); + + /* gpu_addr */ + if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_BACKEND) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + ",%.8llx,", trace_msg->backend.gpu_addr), 0); + else + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + ",,"), 0); + + /* jobslot */ + if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_JOBSLOT) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + "%d", trace_msg->backend.jobslot), 0); + + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), + ","), 0); + + /* refcount */ + if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_REFCOUNT) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + "%d", trace_msg->backend.refcount), 0); +} + +#ifdef CONFIG_MALI_EXYNOS_TRACE +bool check_trace_code(enum kbase_ktrace_code); +#endif + +void kbasep_ktrace_add_jm(struct kbase_device *kbdev, + enum kbase_ktrace_code code, struct kbase_context *kctx, + struct kbase_jd_atom *katom, u64 gpu_addr, + kbase_ktrace_flag_t flags, int refcount, int jobslot, + u64 info_val) +{ + unsigned long irqflags; + struct kbase_ktrace_msg *trace_msg; + +#ifdef CONFIG_MALI_EXYNOS_TRACE + if (!check_trace_code(code)) + return; + + if (code == KBASE_KTRACE_CODE(JM_SOFTSTOP) || code == KBASE_KTRACE_CODE(JM_HARDSTOP)) + gpu_dump_register_hooks(kbdev); +#endif + + spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); + + /* Reserve and update indices */ + trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); + + /* Fill the common part of the message (including backend.flags) */ + kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, + info_val); + + /* Indicate to the common code that backend-specific parts will be + * valid + */ + trace_msg->backend.flags |= KBASE_KTRACE_FLAG_BACKEND; + + /* Fill the JM-specific parts of the message */ + if (katom) { + trace_msg->backend.flags |= KBASE_KTRACE_FLAG_JM_ATOM; + + trace_msg->backend.atom_number = kbase_jd_atom_id(katom->kctx, katom); + trace_msg->backend.atom_udata[0] = katom->udata.blob[0]; + trace_msg->backend.atom_udata[1] = katom->udata.blob[1]; + } + + trace_msg->backend.gpu_addr = gpu_addr; + trace_msg->backend.jobslot = jobslot; + /* Clamp refcount */ + trace_msg->backend.refcount = MIN((unsigned int)refcount, 0xFF); + + /* Done */ + spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); +} + +#endif /* KBASE_KTRACE_TARGET_RBUF */ diff --git a/drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_jm.h b/drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_jm.h new file mode 100644 index 000000000000..c1bacf95d481 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_ktrace_jm.h @@ -0,0 +1,362 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_KTRACE_JM_H_ +#define _KBASE_DEBUG_KTRACE_JM_H_ + +/* + * KTrace target for internal ringbuffer + */ +#if KBASE_KTRACE_TARGET_RBUF +/** + * kbasep_ktrace_add_jm - internal function to add trace about Job Management + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @flags: flags about the message + * @refcount: reference count information to add to the trace + * @jobslot: jobslot information to add to the trace + * @info_val: generic information about @code to add to the trace + * + * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_JM() instead. + */ +void kbasep_ktrace_add_jm(struct kbase_device *kbdev, + enum kbase_ktrace_code code, struct kbase_context *kctx, + struct kbase_jd_atom *katom, u64 gpu_addr, + kbase_ktrace_flag_t flags, int refcount, int jobslot, + u64 info_val); + +#define KBASE_KTRACE_RBUF_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ + gpu_addr, KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, jobslot, 0) + +#define KBASE_KTRACE_RBUF_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ + jobslot, info_val) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ + gpu_addr, KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, jobslot, \ + info_val) + +#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ + refcount) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ + gpu_addr, KBASE_KTRACE_FLAG_JM_REFCOUNT, refcount, 0, 0) +#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ + gpu_addr, KBASE_KTRACE_FLAG_JM_REFCOUNT, refcount, 0, \ + info_val) + +#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, info_val) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ + gpu_addr, 0, 0, 0, info_val) + +#else /* KBASE_KTRACE_TARGET_RBUF */ +#define KBASE_KTRACE_RBUF_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_RBUF_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ + jobslot, info_val) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ + refcount) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(refcount);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ + info_val)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) +#endif /* KBASE_KTRACE_TARGET_RBUF */ + +/* + * KTrace target for Linux's ftrace + */ +#if KBASE_KTRACE_TARGET_FTRACE +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ + trace_mali_##code(jobslot, 0) + +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, jobslot, info_val) \ + trace_mali_##code(jobslot, info_val) + +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, \ + gpu_addr, refcount) \ + trace_mali_##code(refcount, 0) + +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ + trace_mali_##code(refcount, info_val) + +#define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ + info_val) \ + trace_mali_##code(gpu_addr, info_val) +#else /* KBASE_KTRACE_TARGET_FTRACE */ +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, jobslot, info_val) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, \ + gpu_addr, refcount) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(refcount);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ + info_val)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) +#endif /* KBASE_KTRACE_TARGET_FTRACE */ + +/* + * Master set of macros to route KTrace to any of the targets + */ + +/** + * KBASE_KTRACE_ADD_JM_SLOT - Add trace values about a job-slot + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @jobslot: jobslot information to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __jobslot = jobslot; \ + KBASE_KTRACE_RBUF_ADD_JM_SLOT(kbdev, code, kctx, katom, __gpu_addr, __jobslot); \ + KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, __gpu_addr, __jobslot); \ + } while (0) + +/** + * KBASE_KTRACE_ADD_JM_SLOT_INFO - Add trace values about a job-slot, with info + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @jobslot: jobslot information to add to the trace + * @info_val: generic information about @code to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ + jobslot, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __jobslot = jobslot; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, __gpu_addr, __jobslot, __info_val); \ + KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, __gpu_addr, __jobslot, __info_val); \ + } while (0) + +/** + * KBASE_KTRACE_ADD_JM_REFCOUNT - Add trace values about a kctx refcount + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @refcount: reference count information to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ + refcount) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __refcount = refcount; \ + KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount); \ + KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount); \ + } while (0) + +/** + * KBASE_KTRACE_ADD_JM_REFCOUNT_INFO - Add trace values about a kctx refcount, + * and info + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @refcount: reference count information to add to the trace + * @info_val: generic information about @code to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __refcount = refcount; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount, __info_val); \ + KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount, __info_val); \ + } while (0) + +/** + * KBASE_KTRACE_ADD_JM - Add trace values (no slot or refcount) + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @info_val: generic information about @code to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, __info_val); \ + KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, __info_val); \ + } while (0) + +#endif /* _KBASE_DEBUG_KTRACE_JM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_linux_ktrace_jm.h b/drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_linux_ktrace_jm.h new file mode 100644 index 000000000000..9cb34b0e886a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/debug/backend/mali_kbase_debug_linux_ktrace_jm.h @@ -0,0 +1,151 @@ +/* + * + * (C) COPYRIGHT 2014,2018,2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * NOTE: This must **only** be included through mali_linux_trace.h, + * otherwise it will fail to setup tracepoints correctly + */ + +#if !defined(_KBASE_DEBUG_LINUX_KTRACE_JM_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _KBASE_DEBUG_LINUX_KTRACE_JM_H_ + +DECLARE_EVENT_CLASS(mali_jm_slot_template, + TP_PROTO(int jobslot, u64 info_val), + TP_ARGS(jobslot, info_val), + TP_STRUCT__entry( + __field(unsigned int, jobslot) + __field(u64, info_val) + ), + TP_fast_assign( + __entry->jobslot = jobslot; + __entry->info_val = info_val; + ), + TP_printk("jobslot=%u info=0x%llx", __entry->jobslot, __entry->info_val) +); + +#define DEFINE_MALI_JM_SLOT_EVENT(name) \ +DEFINE_EVENT(mali_jm_slot_template, mali_##name, \ + TP_PROTO(int jobslot, u64 info_val), \ + TP_ARGS(jobslot, info_val)) +DEFINE_MALI_JM_SLOT_EVENT(JM_SUBMIT); +DEFINE_MALI_JM_SLOT_EVENT(LSI_KATOM_REMOVED); +DEFINE_MALI_JM_SLOT_EVENT(JM_JOB_DONE); +DEFINE_MALI_JM_SLOT_EVENT(JM_UPDATE_HEAD); +DEFINE_MALI_JM_SLOT_EVENT(JM_CHECK_HEAD); +DEFINE_MALI_JM_SLOT_EVENT(JM_SOFTSTOP); +DEFINE_MALI_JM_SLOT_EVENT(JM_SOFTSTOP_0); +DEFINE_MALI_JM_SLOT_EVENT(JM_SOFTSTOP_1); +DEFINE_MALI_JM_SLOT_EVENT(JM_HARDSTOP); +DEFINE_MALI_JM_SLOT_EVENT(JM_HARDSTOP_0); +DEFINE_MALI_JM_SLOT_EVENT(JM_HARDSTOP_1); +DEFINE_MALI_JM_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP); +DEFINE_MALI_JM_SLOT_EVENT(JM_SLOT_EVICT); +DEFINE_MALI_JM_SLOT_EVENT(JM_BEGIN_RESET_WORKER); +DEFINE_MALI_JM_SLOT_EVENT(JM_END_RESET_WORKER); +DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED); +DEFINE_MALI_JM_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED); +DEFINE_MALI_JM_SLOT_EVENT(JS_AFFINITY_CURRENT); +DEFINE_MALI_JM_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB); +DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED); +DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED); +DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED); +DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE); +DEFINE_MALI_JM_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB); +DEFINE_MALI_JM_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED); +DEFINE_MALI_JM_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB); +DEFINE_MALI_JM_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ); +#undef DEFINE_MALI_JM_SLOT_EVENT + +DECLARE_EVENT_CLASS(mali_jm_refcount_template, + TP_PROTO(int refcount, u64 info_val), + TP_ARGS(refcount, info_val), + TP_STRUCT__entry( + __field(unsigned int, refcount) + __field(u64, info_val) + ), + TP_fast_assign( + __entry->refcount = refcount; + __entry->info_val = info_val; + ), + TP_printk("refcount=%u info=0x%llx", __entry->refcount, __entry->info_val) +); + +#define DEFINE_MALI_JM_REFCOUNT_EVENT(name) \ +DEFINE_EVENT(mali_jm_refcount_template, mali_##name, \ + TP_PROTO(int refcount, u64 info_val), \ + TP_ARGS(refcount, info_val)) +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_ADD_JOB); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_REMOVE_JOB); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_INIT_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_TERM_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS); +#undef DEFINE_MALI_JM_REFCOUNT_EVENT + +DECLARE_EVENT_CLASS(mali_jm_add_template, + TP_PROTO(u64 gpu_addr, u64 info_val), + TP_ARGS(gpu_addr, info_val), + TP_STRUCT__entry( + __field(u64, gpu_addr) + __field(u64, info_val) + ), + TP_fast_assign( + __entry->gpu_addr = gpu_addr; + __entry->info_val = info_val; + ), + TP_printk("gpu_addr=0x%llx info=0x%llx", __entry->gpu_addr, __entry->info_val) +); + +#define DEFINE_MALI_JM_ADD_EVENT(name) \ +DEFINE_EVENT(mali_jm_add_template, mali_##name, \ + TP_PROTO(u64 gpu_addr, u64 info_val), \ + TP_ARGS(gpu_addr, info_val)) +DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER); +DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER_END); +DEFINE_MALI_JM_ADD_EVENT(JD_CANCEL_WORKER); +DEFINE_MALI_JM_ADD_EVENT(JD_DONE); +DEFINE_MALI_JM_ADD_EVENT(JD_CANCEL); +DEFINE_MALI_JM_ADD_EVENT(JD_ZAP_CONTEXT); +DEFINE_MALI_JM_ADD_EVENT(JM_IRQ); +DEFINE_MALI_JM_ADD_EVENT(JM_IRQ_END); +DEFINE_MALI_JM_ADD_EVENT(JM_FLUSH_WORKQS); +DEFINE_MALI_JM_ADD_EVENT(JM_FLUSH_WORKQS_DONE); +DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_NON_SCHEDULED); +DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_SCHEDULED); +DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_DONE); +DEFINE_MALI_JM_ADD_EVENT(JM_SUBMIT_AFTER_RESET); +DEFINE_MALI_JM_ADD_EVENT(JM_JOB_COMPLETE); +DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL); +DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL); +DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX); +DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX); +DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_TIMER_END); +DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_TIMER_START); +DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_ENQUEUE_JOB); +#undef DEFINE_MALI_JM_ADD_EVENT + +#endif /* !defined(_KBASE_DEBUG_LINUX_KTRACE_JM_H_) || defined(TRACE_HEADER_MULTI_READ)*/ diff --git a/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace.c b/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace.c new file mode 100644 index 000000000000..f523948533a3 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace.c @@ -0,0 +1,429 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ +#include +#include "debug/mali_kbase_debug_ktrace_internal.h" + +/* MALI_SEC_INTEGRATION */ +#if (LINUX_VERSION_CODE > KERNEL_VERSION(4, 10, 0)) +#include +#endif + +int kbase_ktrace_init(struct kbase_device *kbdev) +{ +/* MALI_SEC_INTEGRATION */ +#ifndef CONFIG_MALI_EXYNOS_TRACE +#if KBASE_KTRACE_TARGET_RBUF + struct kbase_ktrace_msg *rbuf; + + /* See also documentation of enum kbase_ktrace_code */ + compiletime_assert(sizeof(kbase_ktrace_code_t) == sizeof(unsigned long long) || + KBASE_KTRACE_CODE_COUNT <= (1ull << (sizeof(kbase_ktrace_code_t) * BITS_PER_BYTE)), + "kbase_ktrace_code_t not wide enough for KBASE_KTRACE_CODE_COUNT"); + + rbuf = kmalloc_array(KBASE_KTRACE_SIZE, sizeof(*rbuf), GFP_KERNEL); + + if (!rbuf) + return -EINVAL; + + kbdev->ktrace.rbuf = rbuf; + spin_lock_init(&kbdev->ktrace.lock); +#endif /* KBASE_KTRACE_TARGET_RBUF */ +#endif + return 0; +} + +void kbase_ktrace_term(struct kbase_device *kbdev) +{ +/* MALI_SEC_INTEGRATION */ +#ifndef CONFIG_MALI_EXYNOS_TRACE +#if KBASE_KTRACE_TARGET_RBUF + kfree(kbdev->ktrace.rbuf); +#endif /* KBASE_KTRACE_TARGET_RBUF */ +#endif +} + +void kbase_ktrace_hook_wrapper(void *param) +{ + struct kbase_device *kbdev = (struct kbase_device *)param; + + KBASE_KTRACE_DUMP(kbdev); +} + +#if KBASE_KTRACE_TARGET_RBUF + +static const char * const kbasep_ktrace_code_string[] = { + /* + * IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE + * THIS MUST BE USED AT THE START OF THE ARRAY + */ +#define KBASE_KTRACE_CODE_MAKE_CODE(X) # X +#include "debug/mali_kbase_debug_ktrace_codes.h" +#undef KBASE_KTRACE_CODE_MAKE_CODE +}; + +static void kbasep_ktrace_format_header(char *buffer, int sz, s32 written) +{ + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), + "secs,thread_id,cpu,code,kctx,"), 0); + + kbasep_ktrace_backend_format_header(buffer, sz, &written); + + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), + ",info_val,ktrace_version=%u.%u", + KBASE_KTRACE_VERSION_MAJOR, + KBASE_KTRACE_VERSION_MINOR), 0); + + buffer[sz - 1] = 0; +} +/* MALI_SEC_INTEGRATION */ +void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, + char *buffer, int sz) +{ + s32 written = 0; + + /* Initial part of message: + * + * secs,thread_id,cpu,code,kctx, + */ + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), + "%d.%.6d,%d,%d,%s,%p,", + (int)trace_msg->timestamp.tv_sec, + (int)(trace_msg->timestamp.tv_nsec / 1000), + trace_msg->thread_id, trace_msg->cpu, + kbasep_ktrace_code_string[trace_msg->backend.code], + trace_msg->kctx), 0); + + /* Backend parts */ + kbasep_ktrace_backend_format_msg(trace_msg, buffer, sz, + &written); + + /* Rest of message: + * + * ,info_val + * + * Note that the last column is empty, it's simply to hold the ktrace + * version in the header + */ + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), + ",0x%.16llx", + (unsigned long long)trace_msg->info_val), 0); + buffer[sz - 1] = 0; +} + +static void kbasep_ktrace_dump_msg(struct kbase_device *kbdev, + struct kbase_ktrace_msg *trace_msg) +{ + char buffer[KTRACE_DUMP_MESSAGE_SIZE]; + + lockdep_assert_held(&kbdev->ktrace.lock); + + kbasep_ktrace_format_msg(trace_msg, buffer, sizeof(buffer)); + dev_dbg(kbdev->dev, "%s", buffer); +} + +/* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_EXYNOS_TRACE +bool check_trace_code(enum kbase_ktrace_code code) +{ +#ifdef CONFIG_MALI_EXYNOS_TRACE_ALL + return true; +#else + unsigned int temp = code; + + switch (temp) { + case KBASE_KTRACE_CODE(PM_PWROFF_L2): + case KBASE_KTRACE_CODE(PM_PWRON_L2): + case KBASE_KTRACE_CODE(CORE_CTX_DESTROY): + case KBASE_KTRACE_CODE(CORE_GPU_SOFT_RESET): + case KBASE_KTRACE_CODE(CORE_GPU_HARD_RESET): + case KBASE_KTRACE_CODE(JM_SOFTSTOP): + case KBASE_KTRACE_CODE(JM_HARDSTOP): + case KBASE_KTRACE_CODE(LSI_KATOM_REMOVED): + case KBASE_KTRACE_CODE(JM_SUBMIT): + case KBASE_KTRACE_CODE(JM_JOB_DONE): + case KBASE_KTRACE_CODE(LSI_GPU_RPM_RESUME_API): + case KBASE_KTRACE_CODE(LSI_GPU_RPM_SUSPEND_API): + case KBASE_KTRACE_CODE(LSI_SUSPEND_CALLBACK): + case KBASE_KTRACE_CODE(KBASE_DEVICE_SUSPEND): + case KBASE_KTRACE_CODE(KBASE_DEVICE_RESUME): + case KBASE_KTRACE_CODE(KBASE_DEVICE_PM_WAIT_WQ_QUEUE_WORK): + case KBASE_KTRACE_CODE(LSI_JM_IRQ_E): + case KBASE_KTRACE_CODE(LSI_MMU_IRQ_E): + case KBASE_KTRACE_CODE(LSI_GPU_IRQ_E): + case KBASE_KTRACE_CODE(LSI_GPU_ON): + case KBASE_KTRACE_CODE(LSI_GPU_OFF): + case KBASE_KTRACE_CODE(LSI_RESUME_FREQ): + case KBASE_KTRACE_CODE(LSI_CLOCK_VALUE): + case KBASE_KTRACE_CODE(LSI_TMU_VALUE): + case KBASE_KTRACE_CODE(LSI_GPU_MAX_LOCK): + case KBASE_KTRACE_CODE(LSI_GPU_MIN_LOCK): + case KBASE_KTRACE_CODE(LSI_RESET_GPU_EARLY_DUPE): + case KBASE_KTRACE_CODE(LSI_RESET_RACE_DETECTED_EARLY_OUT): + case KBASE_KTRACE_CODE(LSI_PM_SUSPEND): + return true; + default: + return false; + } + return true; +#endif +} +#endif + +struct kbase_ktrace_msg *kbasep_ktrace_reserve(struct kbase_ktrace *ktrace) +{ + struct kbase_ktrace_msg *trace_msg; + + lockdep_assert_held(&ktrace->lock); + + trace_msg = &ktrace->rbuf[ktrace->next_in]; + + /* Update the ringbuffer indices */ + ktrace->next_in = (ktrace->next_in + 1) & KBASE_KTRACE_MASK; + if (ktrace->next_in == ktrace->first_out) + ktrace->first_out = (ktrace->first_out + 1) & KBASE_KTRACE_MASK; + + return trace_msg; +} +void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, + struct kbase_ktrace_msg *trace_msg, enum kbase_ktrace_code code, + struct kbase_context *kctx, kbase_ktrace_flag_t flags, + u64 info_val) +{ + /* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_EXYNOS_TRACE + u64 time; + unsigned long rem_nsec; +#endif + + lockdep_assert_held(&ktrace->lock); + + trace_msg->thread_id = task_pid_nr(current); + + trace_msg->cpu = task_cpu(current); + + /* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_EXYNOS_TRACE + time = local_clock(); + rem_nsec = do_div(time, 1000000000); + trace_msg->timestamp.tv_sec = time; + trace_msg->timestamp.tv_nsec = rem_nsec; +#else + ktime_get_real_ts64(&trace_msg->timestamp); +#endif + + trace_msg->kctx = kctx; + + trace_msg->info_val = info_val; + trace_msg->backend.code = code; + trace_msg->backend.flags = flags; +} + +void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, + struct kbase_context *kctx, kbase_ktrace_flag_t flags, + u64 info_val) +{ + unsigned long irqflags; + struct kbase_ktrace_msg *trace_msg; + + /* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_EXYNOS_TRACE + if (!check_trace_code(code)) + return; + + if (code == KBASE_KTRACE_CODE(JM_SOFTSTOP) || code == KBASE_KTRACE_CODE(JM_HARDSTOP)) + gpu_dump_register_hooks(kbdev); +#endif + + spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); + + /* Reserve and update indices */ + trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); + + /* Fill the common part of the message (including backend.flags) */ + kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, + info_val); + + /* Done */ + spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); +} + +static void kbasep_ktrace_clear_locked(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->ktrace.lock); + kbdev->ktrace.first_out = kbdev->ktrace.next_in; +} +void kbasep_ktrace_clear(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->ktrace.lock, flags); + kbasep_ktrace_clear_locked(kbdev); + spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); +} + +void kbasep_ktrace_dump(struct kbase_device *kbdev) +{ + unsigned long flags; + u32 start; + u32 end; + char buffer[KTRACE_DUMP_MESSAGE_SIZE] = "Dumping trace:\n"; + + kbasep_ktrace_format_header(buffer, sizeof(buffer), strlen(buffer)); + dev_dbg(kbdev->dev, "%s", buffer); + + spin_lock_irqsave(&kbdev->ktrace.lock, flags); + start = kbdev->ktrace.first_out; + end = kbdev->ktrace.next_in; + + while (start != end) { + struct kbase_ktrace_msg *trace_msg = &kbdev->ktrace.rbuf[start]; + + kbasep_ktrace_dump_msg(kbdev, trace_msg); + + start = (start + 1) & KBASE_KTRACE_MASK; + } + dev_dbg(kbdev->dev, "TRACE_END"); + + kbasep_ktrace_clear_locked(kbdev); + + spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); +} + +#ifdef CONFIG_DEBUG_FS +struct trace_seq_state { + struct kbase_ktrace_msg trace_buf[KBASE_KTRACE_SIZE]; + u32 start; + u32 end; +}; + +static void *kbasep_ktrace_seq_start(struct seq_file *s, loff_t *pos) +{ + struct trace_seq_state *state = s->private; + int i; + + if (*pos == 0) + /* See Documentation/filesystems/seq_file.txt */ + return SEQ_START_TOKEN; + + if (*pos > KBASE_KTRACE_SIZE) + return NULL; + i = state->start + *pos; + if ((state->end >= state->start && i >= state->end) || + i >= state->end + KBASE_KTRACE_SIZE) + return NULL; + + i &= KBASE_KTRACE_MASK; + + return &state->trace_buf[i]; +} + +static void kbasep_ktrace_seq_stop(struct seq_file *s, void *data) +{ +} + +static void *kbasep_ktrace_seq_next(struct seq_file *s, void *data, loff_t *pos) +{ + struct trace_seq_state *state = s->private; + int i; + + if (data != SEQ_START_TOKEN) + (*pos)++; + + i = (state->start + *pos) & KBASE_KTRACE_MASK; + if (i == state->end) + return NULL; + + return &state->trace_buf[i]; +} + +static int kbasep_ktrace_seq_show(struct seq_file *s, void *data) +{ + struct kbase_ktrace_msg *trace_msg = data; + char buffer[KTRACE_DUMP_MESSAGE_SIZE]; + + /* If this is the start, print a header */ + if (data == SEQ_START_TOKEN) + kbasep_ktrace_format_header(buffer, sizeof(buffer), 0); + else + kbasep_ktrace_format_msg(trace_msg, buffer, sizeof(buffer)); + + seq_printf(s, "%s\n", buffer); + return 0; +} + +static const struct seq_operations kbasep_ktrace_seq_ops = { + .start = kbasep_ktrace_seq_start, + .next = kbasep_ktrace_seq_next, + .stop = kbasep_ktrace_seq_stop, + .show = kbasep_ktrace_seq_show, +}; + +static int kbasep_ktrace_debugfs_open(struct inode *inode, struct file *file) +{ + struct kbase_device *kbdev = inode->i_private; + unsigned long flags; + + struct trace_seq_state *state; + + state = __seq_open_private(file, &kbasep_ktrace_seq_ops, + sizeof(*state)); + if (!state) + return -ENOMEM; + + spin_lock_irqsave(&kbdev->ktrace.lock, flags); + state->start = kbdev->ktrace.first_out; + state->end = kbdev->ktrace.next_in; + memcpy(state->trace_buf, kbdev->ktrace.rbuf, sizeof(state->trace_buf)); + spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); + + return 0; +} + +/* MALI_SEC_INTEGRATIONS : REMOVE STATIC */ +const struct file_operations kbasep_ktrace_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbasep_ktrace_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +void kbase_ktrace_debugfs_init(struct kbase_device *kbdev) +{ +/* MALI_SEC_INTEGRATION */ +#ifndef CONFIG_MALI_EXYNOS_TRACE + debugfs_create_file("mali_trace", 0444, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_ktrace_debugfs_fops); +#endif +} +#endif /* CONFIG_DEBUG_FS */ + +#else /* KBASE_KTRACE_TARGET_RBUF */ + +#ifdef CONFIG_DEBUG_FS +void kbase_ktrace_debugfs_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} +#endif /* CONFIG_DEBUG_FS */ +#endif /* KBASE_KTRACE_TARGET_RBUF */ diff --git a/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace.h b/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace.h new file mode 100644 index 000000000000..58104fc65d35 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace.h @@ -0,0 +1,228 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * DOC: Kbase's own trace, 'KTrace' + * + * Low overhead trace specific to kbase, aimed at: + * - common use-cases for tracing kbase specific functionality to do with + * running work on the GPU + * - easy 1-line addition of new types of trace + * + * KTrace can be recorded in one or more of the following targets: + * - KBASE_KTRACE_TARGET_RBUF: low overhead ringbuffer protected by an + * irq-spinlock, output available via dev_dbg() and debugfs file + * - KBASE_KTRACE_TARGET_FTRACE: ftrace based tracepoints under 'mali' events + */ + +#ifndef _KBASE_DEBUG_KTRACE_H_ +#define _KBASE_DEBUG_KTRACE_H_ + +#include "debug/backend/mali_kbase_debug_ktrace_jm.h" + +/** + * kbase_ktrace_init - initialize kbase ktrace. + * @kbdev: kbase device + */ +int kbase_ktrace_init(struct kbase_device *kbdev); + +/** + * kbase_ktrace_term - terminate kbase ktrace. + * @kbdev: kbase device + */ +void kbase_ktrace_term(struct kbase_device *kbdev); + +/** + * kbase_ktrace_hook_wrapper - wrapper so that dumping ktrace can be done via a + * callback. + * @param: kbase device, cast to void pointer + */ +void kbase_ktrace_hook_wrapper(void *param); + +#ifdef CONFIG_DEBUG_FS +/** + * kbase_ktrace_debugfs_init - initialize kbase ktrace for debugfs usage, if + * the selected targets support it. + * @kbdev: kbase device + * + * There is no matching 'term' call, debugfs_remove_recursive() is sufficient. + */ +void kbase_ktrace_debugfs_init(struct kbase_device *kbdev); +#endif /* CONFIG_DEBUG_FS */ + +/* + * KTrace target for internal ringbuffer + */ +#if KBASE_KTRACE_TARGET_RBUF +/** + * kbasep_ktrace_add - internal function to add trace to the ringbuffer. + * @kbdev: kbase device + * @code: ktrace code + * @kctx: kbase context, or NULL if no context + * @flags: flags about the message + * @info_val: generic information about @code to add to the trace + * + * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD() instead. + */ +void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, + struct kbase_context *kctx, kbase_ktrace_flag_t flags, + u64 info_val); + +/** + * kbasep_ktrace_clear - clear the trace ringbuffer + * @kbdev: kbase device + * + * PRIVATE: do not use directly. Use KBASE_KTRACE_CLEAR() instead. + */ +void kbasep_ktrace_clear(struct kbase_device *kbdev); + +/** + * kbasep_ktrace_dump - dump ktrace ringbuffer to dev_dbg(), then clear it + * @kbdev: kbase device + * + * PRIVATE: do not use directly. Use KBASE_KTRACE_DUMP() instead. + */ +void kbasep_ktrace_dump(struct kbase_device *kbdev); + +#define KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, info_val) \ + kbasep_ktrace_add(kbdev, KBASE_KTRACE_CODE(code), kctx, 0, \ + info_val) \ + +#define KBASE_KTRACE_RBUF_CLEAR(kbdev) \ + kbasep_ktrace_clear(kbdev) + +#define KBASE_KTRACE_RBUF_DUMP(kbdev) \ + kbasep_ktrace_dump(kbdev) + +#else /* KBASE_KTRACE_TARGET_RBUF */ + +#define KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, info_val) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(kctx); \ + CSTD_UNUSED(info_val); \ + CSTD_NOP(0); \ + } while (0) + +#define KBASE_KTRACE_RBUF_CLEAR(kbdev) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ + } while (0) +#define KBASE_KTRACE_RBUF_DUMP(kbdev) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ + } while (0) +#endif /* KBASE_KTRACE_TARGET_RBUF */ + +/* + * KTrace target for Linux's ftrace + */ +#if KBASE_KTRACE_TARGET_FTRACE +#include "mali_linux_trace.h" + +#define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ + trace_mali_##code(info_val) + +#else /* KBASE_KTRACE_TARGET_FTRACE */ +#define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(kctx); \ + CSTD_UNUSED(info_val); \ + CSTD_NOP(0); \ + } while (0) +#endif /* KBASE_KTRACE_TARGET_FTRACE */ + +/* No 'clear' implementation for ftrace yet */ +#define KBASE_KTRACE_FTRACE_CLEAR(kbdev) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ + } while (0) + +/* No 'dump' implementation for ftrace yet */ +#define KBASE_KTRACE_FTRACE_DUMP(kbdev) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ + } while (0) + +/* + * Master set of macros to route KTrace to any of the targets + */ + +/** + * KBASE_KTRACE_ADD - Add trace values + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @info_val: generic information about @code to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD(kbdev, code, kctx, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, __info_val); \ + KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, __info_val); \ + } while (0) + +/* MALI_SEC_INTEGRATION */ +#define KBASE_KTRACE_ADD_EXYNOS(kbdev, code, kctx, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, __info_val); \ + KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, __info_val); \ + } while (0) + +/** + * KBASE_KTRACE_CLEAR - Clear the trace, if applicable to the target(s) + * @kbdev: kbase device + */ +#define KBASE_KTRACE_CLEAR(kbdev) \ + do { \ + KBASE_KTRACE_RBUF_CLEAR(kbdev); \ + KBASE_KTRACE_FTRACE_CLEAR(kbdev); \ + } while (0) + +/** + * KBASE_KTRACE_DUMP - Dump the trace, if applicable to the target(s) + * @kbdev: kbase device + */ +#define KBASE_KTRACE_DUMP(kbdev) \ + do { \ + KBASE_KTRACE_RBUF_DUMP(kbdev); \ + KBASE_KTRACE_FTRACE_DUMP(kbdev); \ + } while (0) + +#endif /* _KBASE_DEBUG_KTRACE_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace_codes.h b/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace_codes.h new file mode 100644 index 000000000000..c34dd0f9f645 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace_codes.h @@ -0,0 +1,160 @@ +/* + * + * (C) COPYRIGHT 2011-2015,2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** + * ***** DO NOT INCLUDE DIRECTLY ***** + * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** + */ + +/* + * The purpose of this header file is just to contain a list of trace code + * identifiers + * + * Each identifier is wrapped in a macro, so that its string form and enum form + * can be created + * + * Each macro is separated with a comma, to allow insertion into an array + * initializer or enum definition block. + * + * This allows automatic creation of an enum and a corresponding array of + * strings + * + * Before #including, the includer MUST #define KBASE_KTRACE_CODE_MAKE_CODE. + * After #including, the includer MUST #under KBASE_KTRACE_CODE_MAKE_CODE. + * + * e.g.: + * #define KBASE_KTRACE_CODE( X ) KBASE_KTRACE_CODE_ ## X + * typedef enum + * { + * #define KBASE_KTRACE_CODE_MAKE_CODE( X ) KBASE_KTRACE_CODE( X ) + * #include "mali_kbase_debug_ktrace_codes.h" + * #undef KBASE_KTRACE_CODE_MAKE_CODE + * } kbase_ktrace_code; + * + * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE + * + * + * The use of the macro here is: + * - KBASE_KTRACE_CODE_MAKE_CODE( X ) + * + * Which produces: + * - For an enum, KBASE_KTRACE_CODE_X + * - For a string, "X" + * + * + * For example: + * - KBASE_KTRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to: + * - KBASE_KTRACE_CODE_JM_JOB_COMPLETE for the enum + * - "JM_JOB_COMPLETE" for the string + * - To use it to trace an event, do: + * - KBASE_KTRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val ); + */ + +#if 0 /* Dummy section to avoid breaking formatting */ +int dummy_array[] = { +#endif + + /* + * Core events + */ + /* no info_val */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY), + /* no info_val */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM), + /* info_val == GPU_IRQ_STATUS register */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_IRQ), + /* info_val == bits cleared */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR), + /* info_val == GPU_IRQ_STATUS register */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE), + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET), + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET), + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR), + /* info_val == dump address */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE), + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES), + + /* + * Power Management Events + */ + KBASE_KTRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP), + KBASE_KTRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON_L2), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF_L2), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_NEEDED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_NEEDED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS), + /* info_val == kbdev->pm.active_count*/ + KBASE_KTRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE), + /* info_val == kbdev->pm.active_count*/ + KBASE_KTRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE), + KBASE_KTRACE_CODE_MAKE_CODE(PM_GPU_ON), + KBASE_KTRACE_CODE_MAKE_CODE(PM_GPU_OFF), + /* info_val == policy number, or -1 for "Already changing" */ + KBASE_KTRACE_CODE_MAKE_CODE(PM_SET_POLICY), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY), + /* info_val == policy number */ + KBASE_KTRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT), + /* info_val == policy number */ + KBASE_KTRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM), + + /* + * Context Scheduler events + */ + /* info_val == kctx->refcount */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RETAIN_CTX_NOLOCK), + /* info_val == kctx->refcount */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RELEASE_CTX), + +/* MALI_SEC_INTEGRATION */ +#include "platform/exynos/gpu_trace_defs.h" + +#include "debug/backend/mali_kbase_debug_ktrace_codes_jm.h" + /* + * Unused code just to make it easier to not have a comma at the end. + * All other codes MUST come before this + */ + KBASE_KTRACE_CODE_MAKE_CODE(DUMMY) + +#if 0 /* Dummy section to avoid breaking formatting */ +}; +#endif + +/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ diff --git a/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace_defs.h b/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace_defs.h new file mode 100644 index 000000000000..e9c84528df9d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace_defs.h @@ -0,0 +1,157 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_KTRACE_DEFS_H_ +#define _KBASE_DEBUG_KTRACE_DEFS_H_ + +/* Enable SW tracing when set */ +/* MALI_SEC_INTEGRATION */ +#if defined(CONFIG_MALI_MIDGARD_ENABLE_TRACE) || defined(CONFIG_MALI_SYSTEM_TRACE) || defined(CONFIG_MALI_EXYNOS_TRACE) +#define KBASE_KTRACE_ENABLE 1 +#endif + +#ifndef KBASE_KTRACE_ENABLE +#ifdef CONFIG_MALI_DEBUG +#define KBASE_KTRACE_ENABLE 1 +#else /* CONFIG_MALI_DEBUG */ +#define KBASE_KTRACE_ENABLE 0 +#endif /* CONFIG_MALI_DEBUG */ +#endif /* KBASE_KTRACE_ENABLE */ + +/* Select targets for recording of trace: + * + */ +#if KBASE_KTRACE_ENABLE + +#ifdef CONFIG_MALI_SYSTEM_TRACE +#define KBASE_KTRACE_TARGET_FTRACE 1 +#else /* CONFIG_MALI_SYSTEM_TRACE */ +#define KBASE_KTRACE_TARGET_FTRACE 0 +#endif /* CONFIG_MALI_SYSTEM_TRACE */ + +/* MALI_SEC_INTEGRATION */ +#if defined(CONFIG_MALI_MIDGARD_ENABLE_TRACE) || defined(CONFIG_MALI_EXYNOS_TRACE) +#define KBASE_KTRACE_TARGET_RBUF 1 +#else /* CONFIG_MALI_MIDGARD_ENABLE_TRACE*/ +#define KBASE_KTRACE_TARGET_RBUF 0 +#endif /* CONFIG_MALI_MIDGARD_ENABLE_TRACE */ + +#else /* KBASE_KTRACE_ENABLE */ +#define KBASE_KTRACE_TARGET_FTRACE 0 +#define KBASE_KTRACE_TARGET_RBUF 0 +#endif /* KBASE_KTRACE_ENABLE */ + +/* + * NOTE: KBASE_KTRACE_VERSION_MAJOR, KBASE_KTRACE_VERSION_MINOR are kept in + * the backend, since updates can be made to one backend in a way that doesn't + * affect the other. + * + * However, modifying the common part could require both backend versions to be + * updated. + */ + +#if KBASE_KTRACE_TARGET_RBUF +typedef u8 kbase_ktrace_flag_t; +typedef u8 kbase_ktrace_code_t; + +/* Indicates if the trace message has backend related info. + * + * If not set, consider the &kbase_ktrace_backend part of a &kbase_ktrace_msg + * as uninitialized, apart from the mandatory parts: + * - code + * - flags + */ +#define KBASE_KTRACE_FLAG_BACKEND (((kbase_ktrace_flag_t)1) << 7) +/* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_EXYNOS_TRACE +#define KBASE_KTRACE_SHIFT 11 /* 2048 entries */ +#else +#define KBASE_KTRACE_SHIFT 8 /* 256 entries */ +#endif +#define KBASE_KTRACE_SIZE (1 << KBASE_KTRACE_SHIFT) +#define KBASE_KTRACE_MASK ((1 << KBASE_KTRACE_SHIFT)-1) + +#define KBASE_KTRACE_CODE(X) KBASE_KTRACE_CODE_ ## X + +/* Note: compiletime_assert() about this against kbase_ktrace_code_t is in + * kbase_ktrace_init() + */ +enum kbase_ktrace_code { + /* + * IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE + * THIS MUST BE USED AT THE START OF THE ENUM + */ +#define KBASE_KTRACE_CODE_MAKE_CODE(X) KBASE_KTRACE_CODE(X) +#include +#undef KBASE_KTRACE_CODE_MAKE_CODE + /* Comma on its own, to extend the list */ + , + /* Must be the last in the enum */ + KBASE_KTRACE_CODE_COUNT +}; + +/* + * struct kbase_ktrace_backend - backend specific part of a trace message + * + * At the very least, this must contain a kbase_ktrace_code_t 'code' member and + * a kbase_ktrace_flag_t 'flags' member + */ +struct kbase_ktrace_backend; +#include "debug/backend/mali_kbase_debug_ktrace_defs_jm.h" + +/** + * struct kbase_ktrace - object representing a trace message added to trace + * buffer trace_rbuf in &kbase_device + * @timestamp: CPU timestamp at which the trace message was added. + * @thread_id: id of the thread in the context of which trace message was + * added. + * @cpu: indicates which CPU the @thread_id was scheduled on when the + * trace message was added. + * @kctx: Pointer to the kbase context for which the trace message was + * added. Will be NULL for certain trace messages associated with + * the &kbase_device itself, such as power management events. + * Will point to the appropriate context corresponding to + * backend-specific events. + * @info_val: value specific to the type of event being traced. Refer to the + * specific code in enum kbase_ktrace_code + * @backend: backend-specific trace information. All backends must implement + * a minimum common set of members + */ +struct kbase_ktrace_msg { + struct timespec64 timestamp; + u32 thread_id; + u32 cpu; + void *kctx; + u64 info_val; + + struct kbase_ktrace_backend backend; +}; + +struct kbase_ktrace { + spinlock_t lock; + u16 first_out; + u16 next_in; + struct kbase_ktrace_msg *rbuf; +}; + +#endif /* KBASE_KTRACE_TARGET_RBUF */ +#endif /* _KBASE_DEBUG_KTRACE_DEFS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace_internal.h b/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace_internal.h new file mode 100644 index 000000000000..e450760e3426 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_ktrace_internal.h @@ -0,0 +1,89 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_KTRACE_INTERNAL_H_ +#define _KBASE_DEBUG_KTRACE_INTERNAL_H_ + +#if KBASE_KTRACE_TARGET_RBUF + +#define KTRACE_DUMP_MESSAGE_SIZE 256 + +/** + * kbasep_ktrace_backend_format_header - format the backend part of the header + * @buffer: buffer to write to + * @sz: size of @buffer in bytes + * @written: pointer to storage for updating bytes written so far to @buffer + * + * The backend must format only the non-common backend specific parts of the + * header. It must format them as though they were standalone. The caller will + * handle adding any delimiters around this. + */ +void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written); + +/** + * kbasep_ktrace_backend_format_msg - format the backend part of the message + * @trace_msg: ktrace message + * @buffer: buffer to write to + * @sz: size of @buffer in bytes + * @written: pointer to storage for updating bytes written so far to @buffer + * + * The backend must format only the non-common backend specific parts of the + * message. It must format them as though they were standalone. The caller will + * handle adding any delimiters around this. + * + * A caller may have the flags member of @trace_msg with + * %KBASE_KTRACE_FLAG_BACKEND clear. The backend must handle that setting + * appropriately. + */ +void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, + char *buffer, int sz, s32 *written); + + +/** + * kbasep_ktrace_reserve - internal function to reserve space for a ktrace + * message + * @ktrace: kbase device's ktrace + * + * This may also empty the oldest entry in the ringbuffer to make space. + */ +struct kbase_ktrace_msg *kbasep_ktrace_reserve(struct kbase_ktrace *ktrace); + +/** + * kbasep_ktrace_msg_init - internal function to initialize just the common + * part of a ktrace message + * @ktrace: kbase device's ktrace + * @trace_msg: ktrace message to initialize + * @code: ktrace code + * @kctx: kbase context, or NULL if no context + * @flags: flags about the message + * @info_val: generic information about @code to add to the trace + * + * The common part includes the mandatory parts of the backend part + */ +void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, + struct kbase_ktrace_msg *trace_msg, enum kbase_ktrace_code code, + struct kbase_context *kctx, kbase_ktrace_flag_t flags, + u64 info_val); + +#endif /* KBASE_KTRACE_TARGET_RBUF */ + +#endif /* _KBASE_DEBUG_KTRACE_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_linux_ktrace.h b/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_linux_ktrace.h new file mode 100644 index 000000000000..18e4f7c4f069 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/debug/mali_kbase_debug_linux_ktrace.h @@ -0,0 +1,99 @@ +/* + * + * (C) COPYRIGHT 2014,2018,2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * NOTE: This must **only** be included through mali_linux_trace.h, + * otherwise it will fail to setup tracepoints correctly + */ + +#if !defined(_KBASE_DEBUG_LINUX_KTRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _KBASE_DEBUG_LINUX_KTRACE_H_ + +#if KBASE_KTRACE_TARGET_FTRACE + +DECLARE_EVENT_CLASS(mali_add_template, + TP_PROTO(u64 info_val), + TP_ARGS(info_val), + TP_STRUCT__entry( + __field(u64, info_val) + ), + TP_fast_assign( + __entry->info_val = info_val; + ), + TP_printk("info=0x%llx", __entry->info_val) +); + +#define DEFINE_MALI_ADD_EVENT(name) \ +DEFINE_EVENT(mali_add_template, mali_##name, \ + TP_PROTO(u64 info_val), \ + TP_ARGS(info_val)) +DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY); +DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM); +DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ); +DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR); +DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE); +DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET); +DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET); +DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE); +DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR); +DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES); +DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED); +DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP); +DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP); +DEFINE_MALI_ADD_EVENT(PM_PWRON); +DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER); +DEFINE_MALI_ADD_EVENT(PM_PWRON_L2); +DEFINE_MALI_ADD_EVENT(PM_PWROFF); +DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER); +DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2); +DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED); +DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER); +DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2); +DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED); +DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER); +DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_TILER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_TILER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE); +DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER); +DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE); +DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER); +DEFINE_MALI_ADD_EVENT(PM_GPU_ON); +DEFINE_MALI_ADD_EVENT(PM_GPU_OFF); +DEFINE_MALI_ADD_EVENT(PM_SET_POLICY); +DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT); +DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM); +DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY); +DEFINE_MALI_ADD_EVENT(PM_CONTEXT_ACTIVE); +DEFINE_MALI_ADD_EVENT(PM_CONTEXT_IDLE); +DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS); +DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK); +DEFINE_MALI_ADD_EVENT(SCHED_RELEASE_CTX); + +#undef DEFINE_MALI_ADD_EVENT + +#include "mali_kbase_debug_linux_ktrace_jm.h" + +#endif /* KBASE_KTRACE_TARGET_FTRACE */ + +#endif /* !defined(_KBASE_DEBUG_LINUX_KTRACE_H_) || defined(TRACE_HEADER_MULTI_READ) */ diff --git a/drivers/gpu/arm/b_r26p0/device/backend/mali_kbase_device_jm.c b/drivers/gpu/arm/b_r26p0/device/backend/mali_kbase_device_jm.c new file mode 100644 index 000000000000..2a45a33c52c9 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/device/backend/mali_kbase_device_jm.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "../mali_kbase_device_internal.h" +#include "../mali_kbase_device.h" + +#include +#include +#include +#include + +#ifdef CONFIG_MALI_NO_MALI +#include +#endif + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +/** + * kbase_backend_late_init - Perform any backend-specific initialization. + * @kbdev: Device pointer + * + * Return: 0 on success, or an error code on failure. + */ +static int kbase_backend_late_init(struct kbase_device *kbdev) +{ + int err; + + err = kbase_hwaccess_pm_init(kbdev); + if (err) + return err; + + err = kbase_reset_gpu_init(kbdev); + if (err) + goto fail_reset_gpu_init; + + err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); + if (err) + goto fail_pm_powerup; + + err = kbase_backend_timer_init(kbdev); + if (err) + goto fail_timer; + +#ifdef CONFIG_MALI_DEBUG +#ifndef CONFIG_MALI_NO_MALI + if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { + dev_err(kbdev->dev, "Interrupt assignment check failed.\n"); + err = -EINVAL; + goto fail_interrupt_test; + } +#endif /* !CONFIG_MALI_NO_MALI */ +#endif /* CONFIG_MALI_DEBUG */ + + err = kbase_job_slot_init(kbdev); + if (err) + goto fail_job_slot; + + /* Do the initialisation of devfreq. + * Devfreq needs backend_timer_init() for completion of its + * initialisation and it also needs to catch the first callback + * occurrence of the runtime_suspend event for maintaining state + * coherence with the backend power management, hence needs to be + * placed before the kbase_pm_context_idle(). + */ + err = kbase_backend_devfreq_init(kbdev); + if (err) + goto fail_devfreq_init; + + /* Idle the GPU and/or cores, if the policy wants it to */ + kbase_pm_context_idle(kbdev); + + /* Update gpuprops with L2_FEATURES if applicable */ + err = kbase_gpuprops_update_l2_features(kbdev); + if (err) + goto fail_update_l2_features; + + init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); + + return 0; + +fail_update_l2_features: +fail_devfreq_init: + kbase_job_slot_term(kbdev); +fail_job_slot: + +#ifdef CONFIG_MALI_DEBUG +#ifndef CONFIG_MALI_NO_MALI +fail_interrupt_test: +#endif /* !CONFIG_MALI_NO_MALI */ +#endif /* CONFIG_MALI_DEBUG */ + + kbase_backend_timer_term(kbdev); +fail_timer: + kbase_hwaccess_pm_halt(kbdev); +fail_pm_powerup: + kbase_reset_gpu_term(kbdev); +fail_reset_gpu_init: + kbase_hwaccess_pm_term(kbdev); + + return err; +} + +/** + * kbase_backend_late_term - Perform any backend-specific termination. + * @kbdev: Device pointer + */ +static void kbase_backend_late_term(struct kbase_device *kbdev) +{ + kbase_backend_devfreq_term(kbdev); + kbase_job_slot_halt(kbdev); + kbase_job_slot_term(kbdev); + kbase_backend_timer_term(kbdev); + kbase_hwaccess_pm_halt(kbdev); + kbase_reset_gpu_term(kbdev); + kbase_hwaccess_pm_term(kbdev); +} + +static const struct kbase_device_init dev_init[] = { +#ifdef CONFIG_MALI_NO_MALI + {kbase_gpu_device_create, kbase_gpu_device_destroy, + "Dummy model initialization failed"}, +#else + {assign_irqs, NULL, + "IRQ search failed"}, + {registers_map, registers_unmap, + "Register map failed"}, +#endif + {kbase_device_io_history_init, kbase_device_io_history_term, + "Register access history initialization failed"}, + {kbase_device_pm_init, kbase_device_pm_term, + "Power management initialization failed"}, + {kbase_device_early_init, kbase_device_early_term, + "Early device initialization failed"}, + {kbase_device_populate_max_freq, NULL, + "Populating max frequency failed"}, + {kbase_device_misc_init, kbase_device_misc_term, + "Miscellaneous device initialization failed"}, + {kbase_ctx_sched_init, kbase_ctx_sched_term, + "Context scheduler initialization failed"}, + {kbase_mem_init, kbase_mem_term, + "Memory subsystem initialization failed"}, + {kbase_device_coherency_init, NULL, + "Device coherency init failed"}, + {kbase_protected_mode_init, kbase_protected_mode_term, + "Protected mode subsystem initialization failed"}, + {kbase_device_list_init, kbase_device_list_term, + "Device list setup failed"}, + {kbasep_js_devdata_init, kbasep_js_devdata_term, + "Job JS devdata initialization failed"}, + {kbase_device_timeline_init, kbase_device_timeline_term, + "Timeline stream initialization failed"}, + {kbase_clk_rate_trace_manager_init, + kbase_clk_rate_trace_manager_term, + "Clock rate trace manager initialization failed"}, + {kbase_device_hwcnt_backend_jm_init, + kbase_device_hwcnt_backend_jm_term, + "GPU hwcnt backend creation failed"}, + {kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, + "GPU hwcnt context initialization failed"}, + {kbase_device_hwcnt_virtualizer_init, + kbase_device_hwcnt_virtualizer_term, + "GPU hwcnt virtualizer initialization failed"}, + {kbase_device_vinstr_init, kbase_device_vinstr_term, + "Virtual instrumentation initialization failed"}, + {kbase_backend_late_init, kbase_backend_late_term, + "Late backend initialization failed"}, +#ifdef MALI_KBASE_BUILD + {kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term, + "Job fault debug initialization failed"}, + {kbase_device_debugfs_init, kbase_device_debugfs_term, + "DebugFS initialization failed"}, + /* Sysfs init needs to happen before registering the device with + * misc_register(), otherwise it causes a race condition between + * registering the device and a uevent event being generated for + * userspace, causing udev rules to run which might expect certain + * sysfs attributes present. As a result of the race condition + * we avoid, some Mali sysfs entries may have appeared to udev + * to not exist. + * For more information, see + * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the + * paragraph that starts with "Word of warning", currently the + * second-last paragraph. + */ + {kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed"}, + {kbase_device_misc_register, kbase_device_misc_deregister, + "Misc device registration failed"}, +#ifdef CONFIG_MALI_BUSLOG + {buslog_init, buslog_term, "Bus log client registration failed"}, +#endif + {kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, + "GPU property population failed"}, +#endif + {kbase_dummy_job_wa_load, kbase_dummy_job_wa_cleanup, + "Dummy job workaround load failed"}, +}; + +static void kbase_device_term_partial(struct kbase_device *kbdev, + unsigned int i) +{ + while (i-- > 0) { + if (dev_init[i].term) + dev_init[i].term(kbdev); + } +} + +void kbase_device_term(struct kbase_device *kbdev) +{ + kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init)); + kbasep_js_devdata_halt(kbdev); + kbase_mem_halt(kbdev); +} + +int kbase_device_init(struct kbase_device *kbdev) +{ + int err = 0; + unsigned int i = 0; + + dev_info(kbdev->dev, "Kernel DDK version %s", MALI_RELEASE_NAME); + + kbase_device_id_init(kbdev); + kbase_disjoint_init(kbdev); + + for (i = 0; i < ARRAY_SIZE(dev_init); i++) { + err = dev_init[i].init(kbdev); + if (err) { + dev_err(kbdev->dev, "%s error = %d\n", + dev_init[i].err_mes, err); + kbase_device_term_partial(kbdev, i); + break; + } + } + + return err; +} diff --git a/drivers/gpu/arm/b_r26p0/device/mali_kbase_device.c b/drivers/gpu/arm/b_r26p0/device/mali_kbase_device.c new file mode 100644 index 000000000000..7b3c7eaa15bc --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/device/mali_kbase_device.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Base kernel device APIs + */ + +#include +#include +#include +#include +#include +#include +#include +/* MALI_SEC_INTEGRATION */ +#include +/* MALI_SEC_INTEGRATION */ +#if (LINUX_VERSION_CODE > KERNEL_VERSION(4, 10, 0)) +#include +#endif +#include +#include +#include +#include +#include + +#include +#include "mali_kbase_vinstr.h" +#include "mali_kbase_hwcnt_context.h" +#include "mali_kbase_hwcnt_virtualizer.h" + +#include "mali_kbase_device.h" +#include "mali_kbase_device_internal.h" +#include "backend/gpu/mali_kbase_pm_internal.h" +#include "backend/gpu/mali_kbase_irq_internal.h" + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include "arbiter/mali_kbase_arbiter_pm.h" +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + +/* NOTE: Magic - 0x45435254 (TRCE in ASCII). + * Supports tracing feature provided in the base module. + * Please keep it in sync with the value of base module. + */ +#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254 + +/* Number of register accesses for the buffer that we allocate during + * initialization time. The buffer size can be changed later via debugfs. + */ +#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512) + +static DEFINE_MUTEX(kbase_dev_list_lock); +static LIST_HEAD(kbase_dev_list); +static int kbase_dev_nr; + +struct kbase_device *kbase_device_alloc(void) +{ + return kzalloc(sizeof(struct kbase_device), GFP_KERNEL); +} + +static int kbase_device_as_init(struct kbase_device *kbdev, int i) +{ + kbdev->as[i].number = i; + kbdev->as[i].bf_data.addr = 0ULL; + kbdev->as[i].pf_data.addr = 0ULL; + + kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i); + if (!kbdev->as[i].pf_wq) + return -EINVAL; + + INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker); + INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker); + + return 0; +} + +static void kbase_device_as_term(struct kbase_device *kbdev, int i) +{ + destroy_workqueue(kbdev->as[i].pf_wq); +} + +static int kbase_device_all_as_init(struct kbase_device *kbdev) +{ + int i, err; + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + err = kbase_device_as_init(kbdev, i); + if (err) + goto free_workqs; + } + + return 0; + +free_workqs: + for (; i > 0; i--) + kbase_device_as_term(kbdev, i); + + return err; +} + +static void kbase_device_all_as_term(struct kbase_device *kbdev) +{ + int i; + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) + kbase_device_as_term(kbdev, i); +} + +int kbase_device_misc_init(struct kbase_device * const kbdev) +{ + int err; +#ifdef CONFIG_ARM64 + struct device_node *np = NULL; +#endif /* CONFIG_ARM64 */ + + spin_lock_init(&kbdev->mmu_mask_change); + mutex_init(&kbdev->mmu_hw_mutex); +#ifdef CONFIG_ARM64 + kbdev->cci_snoop_enabled = false; + np = kbdev->dev->of_node; + if (np != NULL) { + if (of_property_read_u32(np, "snoop_enable_smc", + &kbdev->snoop_enable_smc)) + kbdev->snoop_enable_smc = 0; + if (of_property_read_u32(np, "snoop_disable_smc", + &kbdev->snoop_disable_smc)) + kbdev->snoop_disable_smc = 0; + /* Either both or none of the calls should be provided. */ + if (!((kbdev->snoop_disable_smc == 0 + && kbdev->snoop_enable_smc == 0) + || (kbdev->snoop_disable_smc != 0 + && kbdev->snoop_enable_smc != 0))) { + WARN_ON(1); + err = -EINVAL; + goto fail; + } + } +#endif /* CONFIG_ARM64 */ + /* Get the list of workarounds for issues on the current HW + * (identified by the GPU_ID register) + */ + err = kbase_hw_set_issues_mask(kbdev); + if (err) + goto fail; + + /* Set the list of features available on the current HW + * (identified by the GPU_ID register) + */ + kbase_hw_set_features_mask(kbdev); + + err = kbase_gpuprops_set_features(kbdev); + if (err) + goto fail; + + /* On Linux 4.0+, dma coherency is determined from device tree */ +#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) + set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops); +#endif + + /* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our + * device structure was created by device-tree + */ + if (!kbdev->dev->dma_mask) + kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask; + + err = dma_set_mask(kbdev->dev, + DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); + if (err) + goto dma_set_mask_failed; + + err = dma_set_coherent_mask(kbdev->dev, + DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); + if (err) + goto dma_set_mask_failed; + + kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces; + + err = kbase_device_all_as_init(kbdev); + if (err) + goto as_init_failed; + + spin_lock_init(&kbdev->hwcnt.lock); + + err = kbase_ktrace_init(kbdev); + if (err) + goto term_as; + + init_waitqueue_head(&kbdev->cache_clean_wait); + + kbase_debug_assert_register_hook(&kbase_ktrace_hook_wrapper, kbdev); + + atomic_set(&kbdev->ctx_num, 0); + + err = kbase_instr_backend_init(kbdev); + if (err) + goto term_trace; + + kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; + + kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) + kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); + else + kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); + + mutex_init(&kbdev->kctx_list_lock); + INIT_LIST_HEAD(&kbdev->kctx_list); + + spin_lock_init(&kbdev->hwaccess_lock); + + return 0; +term_trace: + kbase_ktrace_term(kbdev); +term_as: + kbase_device_all_as_term(kbdev); +as_init_failed: +dma_set_mask_failed: +fail: + return err; +} + +void kbase_device_misc_term(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + + WARN_ON(!list_empty(&kbdev->kctx_list)); + +#if KBASE_KTRACE_ENABLE + kbase_debug_assert_register_hook(NULL, NULL); +#endif + + kbase_instr_backend_term(kbdev); + + kbase_ktrace_term(kbdev); + + kbase_device_all_as_term(kbdev); +} + +void kbase_device_free(struct kbase_device *kbdev) +{ + kfree(kbdev); +} + +void kbase_device_id_init(struct kbase_device *kbdev) +{ + scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, + kbase_dev_nr); + kbdev->id = kbase_dev_nr; +} + +void kbase_increment_device_id(void) +{ + kbase_dev_nr++; +} + +int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface); +} + +void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface); +} + +int kbase_device_hwcnt_context_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface, + &kbdev->hwcnt_gpu_ctx); +} + +void kbase_device_hwcnt_context_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_context_term(kbdev->hwcnt_gpu_ctx); +} + +int kbase_device_hwcnt_virtualizer_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_virtualizer_init(kbdev->hwcnt_gpu_ctx, + KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS, + &kbdev->hwcnt_gpu_virt); +} + +void kbase_device_hwcnt_virtualizer_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); +} + +int kbase_device_timeline_init(struct kbase_device *kbdev) +{ + atomic_set(&kbdev->timeline_flags, 0); + return kbase_timeline_init(&kbdev->timeline, &kbdev->timeline_flags); +} + +void kbase_device_timeline_term(struct kbase_device *kbdev) +{ + kbase_timeline_term(kbdev->timeline); +} + +int kbase_device_vinstr_init(struct kbase_device *kbdev) +{ + return kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx); +} + +void kbase_device_vinstr_term(struct kbase_device *kbdev) +{ + kbase_vinstr_term(kbdev->vinstr_ctx); +} + +int kbase_device_io_history_init(struct kbase_device *kbdev) +{ + return kbase_io_history_init(&kbdev->io_history, + KBASEP_DEFAULT_REGISTER_HISTORY_SIZE); +} + +void kbase_device_io_history_term(struct kbase_device *kbdev) +{ + kbase_io_history_term(&kbdev->io_history); +} + +int kbase_device_misc_register(struct kbase_device *kbdev) +{ + return misc_register(&kbdev->mdev); +} + +void kbase_device_misc_deregister(struct kbase_device *kbdev) +{ + misc_deregister(&kbdev->mdev); +} + +int kbase_device_list_init(struct kbase_device *kbdev) +{ + const struct list_head *dev_list; + + dev_list = kbase_device_get_list(); + list_add(&kbdev->entry, &kbase_dev_list); + kbase_device_put_list(dev_list); + + return 0; +} + +void kbase_device_list_term(struct kbase_device *kbdev) +{ + const struct list_head *dev_list; + + dev_list = kbase_device_get_list(); + list_del(&kbdev->entry); + kbase_device_put_list(dev_list); +} + +const struct list_head *kbase_device_get_list(void) +{ + mutex_lock(&kbase_dev_list_lock); + return &kbase_dev_list; +} +KBASE_EXPORT_TEST_API(kbase_device_get_list); + +void kbase_device_put_list(const struct list_head *dev_list) +{ + mutex_unlock(&kbase_dev_list_lock); +} +KBASE_EXPORT_TEST_API(kbase_device_put_list); + +int kbase_device_early_init(struct kbase_device *kbdev) +{ + int err; + + err = kbasep_platform_device_init(kbdev); + if (err) + return err; + + err = kbase_pm_runtime_init(kbdev); + if (err) + goto fail_runtime_pm; + + /* Ensure we can access the GPU registers */ + kbase_pm_register_access_enable(kbdev); + + /* Find out GPU properties based on the GPU feature registers */ + kbase_gpuprops_set(kbdev); + + /* We're done accessing the GPU registers for now. */ + kbase_pm_register_access_disable(kbdev); + + err = kbase_install_interrupts(kbdev); + if (err) + goto fail_interrupts; + + return 0; + +fail_interrupts: + kbase_pm_runtime_term(kbdev); +fail_runtime_pm: + kbasep_platform_device_term(kbdev); + + return err; +} + +void kbase_device_early_term(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) + kbase_arbiter_pm_release_interrupts(kbdev); + else + kbase_release_interrupts(kbdev); +#else + kbase_release_interrupts(kbdev); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + kbase_pm_runtime_term(kbdev); + kbasep_platform_device_term(kbdev); +} diff --git a/drivers/gpu/arm/b_r26p0/device/mali_kbase_device.h b/drivers/gpu/arm/b_r26p0/device/mali_kbase_device.h new file mode 100644 index 000000000000..16f1d7098688 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/device/mali_kbase_device.h @@ -0,0 +1,71 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include + +/** + * kbase_device_get_list - get device list. + * + * Get access to device list. + * + * Return: Pointer to the linked list head. + */ +const struct list_head *kbase_device_get_list(void); + +/** + * kbase_device_put_list - put device list. + * + * @dev_list: head of linked list containing device list. + * + * Put access to the device list. + */ +void kbase_device_put_list(const struct list_head *dev_list); + +/** + * Kbase_increment_device_id - increment device id. + * + * Used to increment device id on successful initialization of the device. + */ +void kbase_increment_device_id(void); + +/** + * kbase_device_init - Device initialisation. + * + * This is called from device probe to initialise various other + * components needed. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 on success and non-zero value on failure. + */ +int kbase_device_init(struct kbase_device *kbdev); + +/** + * kbase_device_term - Device termination. + * + * This is called from device remove to terminate various components that + * were initialised during kbase_device_init. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + */ +void kbase_device_term(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/b_r26p0/device/mali_kbase_device_internal.h b/drivers/gpu/arm/b_r26p0/device/mali_kbase_device_internal.h new file mode 100644 index 000000000000..54644582eac5 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/device/mali_kbase_device_internal.h @@ -0,0 +1,78 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include + +typedef int kbase_device_init_method(struct kbase_device *kbdev); +typedef void kbase_device_term_method(struct kbase_device *kbdev); + +/** + * struct kbase_device_init - Device init/term methods. + * @init: Function pointer to a initialise method. + * @term: Function pointer to a terminate method. + * @err_mes: Error message to be printed when init method fails. + */ +struct kbase_device_init { + kbase_device_init_method *init; + kbase_device_term_method *term; + char *err_mes; +}; + +int kbase_device_vinstr_init(struct kbase_device *kbdev); +void kbase_device_vinstr_term(struct kbase_device *kbdev); + +int kbase_device_timeline_init(struct kbase_device *kbdev); +void kbase_device_timeline_term(struct kbase_device *kbdev); + +int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev); +void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev); + +int kbase_device_hwcnt_context_init(struct kbase_device *kbdev); +void kbase_device_hwcnt_context_term(struct kbase_device *kbdev); + +int kbase_device_hwcnt_virtualizer_init(struct kbase_device *kbdev); +void kbase_device_hwcnt_virtualizer_term(struct kbase_device *kbdev); + +int kbase_device_list_init(struct kbase_device *kbdev); +void kbase_device_list_term(struct kbase_device *kbdev); + +int kbase_device_io_history_init(struct kbase_device *kbdev); +void kbase_device_io_history_term(struct kbase_device *kbdev); + +int kbase_device_misc_register(struct kbase_device *kbdev); +void kbase_device_misc_deregister(struct kbase_device *kbdev); + +void kbase_device_id_init(struct kbase_device *kbdev); + +/** + * kbase_device_early_init - Perform any device-specific initialization. + * @kbdev: Device pointer + * + * Return: 0 on success, or an error code on failure. + */ +int kbase_device_early_init(struct kbase_device *kbdev); + +/** + * kbase_device_early_term - Perform any device-specific termination. + * @kbdev: Device pointer + */ +void kbase_device_early_term(struct kbase_device *kbdev); diff --git a/drivers/gpu/arm/b_r26p0/gpu/backend/mali_kbase_gpu_fault_jm.c b/drivers/gpu/arm/b_r26p0/gpu/backend/mali_kbase_gpu_fault_jm.c new file mode 100644 index 000000000000..63132dc80fa5 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/gpu/backend/mali_kbase_gpu_fault_jm.c @@ -0,0 +1,181 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include + +#include "../mali_kbase_gpu_fault.h" + +const char *kbase_gpu_exception_name(u32 const exception_code) +{ + const char *e; + + switch (exception_code) { + /* Non-Fault Status code */ + case 0x00: + e = "NOT_STARTED/IDLE/OK"; + break; + case 0x01: + e = "DONE"; + break; + case 0x02: + e = "INTERRUPTED"; + break; + case 0x03: + e = "STOPPED"; + break; + case 0x04: + e = "TERMINATED"; + break; + case 0x08: + e = "ACTIVE"; + break; + /* Job exceptions */ + case 0x40: + e = "JOB_CONFIG_FAULT"; + break; + case 0x41: + e = "JOB_POWER_FAULT"; + break; + case 0x42: + e = "JOB_READ_FAULT"; + break; + case 0x43: + e = "JOB_WRITE_FAULT"; + break; + case 0x44: + e = "JOB_AFFINITY_FAULT"; + break; + case 0x48: + e = "JOB_BUS_FAULT"; + break; + case 0x50: + e = "INSTR_INVALID_PC"; + break; + case 0x51: + e = "INSTR_INVALID_ENC"; + break; + case 0x52: + e = "INSTR_TYPE_MISMATCH"; + break; + case 0x53: + e = "INSTR_OPERAND_FAULT"; + break; + case 0x54: + e = "INSTR_TLS_FAULT"; + break; + case 0x55: + e = "INSTR_BARRIER_FAULT"; + break; + case 0x56: + e = "INSTR_ALIGN_FAULT"; + break; + case 0x58: + e = "DATA_INVALID_FAULT"; + break; + case 0x59: + e = "TILE_RANGE_FAULT"; + break; + case 0x5A: + e = "ADDR_RANGE_FAULT"; + break; + case 0x60: + e = "OUT_OF_MEMORY"; + break; + /* GPU exceptions */ + case 0x80: + e = "DELAYED_BUS_FAULT"; + break; + case 0x88: + e = "SHAREABILITY_FAULT"; + break; + /* MMU exceptions */ + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + e = "TRANSLATION_FAULT"; + break; + case 0xC8: + e = "PERMISSION_FAULT"; + break; + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + e = "PERMISSION_FAULT"; + break; + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + e = "TRANSTAB_BUS_FAULT"; + break; + case 0xD8: + e = "ACCESS_FLAG"; + break; + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + e = "ACCESS_FLAG"; + break; + case 0xE0: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + e = "ADDRESS_SIZE_FAULT"; + break; + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + e = "MEMORY_ATTRIBUTES_FAULT"; + break; + default: + e = "UNKNOWN"; + break; + }; + + return e; +} diff --git a/drivers/gpu/arm/b_r26p0/gpu/backend/mali_kbase_gpu_regmap_jm.h b/drivers/gpu/arm/b_r26p0/gpu/backend/mali_kbase_gpu_regmap_jm.h new file mode 100644 index 000000000000..258ff33348fe --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -0,0 +1,262 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_GPU_REGMAP_JM_H_ +#define _KBASE_GPU_REGMAP_JM_H_ + + +/* Set to implementation defined, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull +/* Set to write back memory, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull +/* Set to inner non-cacheable, outer-non-cacheable + * Setting defined by the alloc bits is ignored, but set to a valid encoding: + * - no-alloc on read + * - no alloc on write + */ +#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull + +/* Symbols for default MEMATTR to use + * Default is - HW implementation defined caching + */ +#define AS_MEMATTR_INDEX_DEFAULT 0 +#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 + +/* HW implementation defined caching */ +#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 +/* Force cache on */ +#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 +/* Write-alloc */ +#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 +/* Outer coherent, inner implementation defined policy */ +#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 +/* Outer coherent, write alloc inner */ +#define AS_MEMATTR_INDEX_OUTER_WA 4 +/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ +#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 + +/* GPU control registers */ + +#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */ +#define JS_PRESENT 0x01C /* (RO) Job slots present */ +#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest + * clean-and-invalidate operation + */ + +#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory + * region base address, low word + */ +#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory + * region base address, high word + */ +#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter + * configuration + */ +#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable + * flags for Job Manager + */ +#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable + * flags for shader cores + */ +#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable + * flags for tiler + */ +#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable + * flags for MMU/L2 cache + */ + +#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ +#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ +#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */ +#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */ +#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */ +#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */ +#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */ +#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */ +#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */ +#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */ +#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */ +#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */ +#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */ +#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */ +#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */ +#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */ + +#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2)) + +#define JM_CONFIG 0xF00 /* (RW) Job manager configuration (implementation-specific) */ + +/* Job control registers */ + +#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ +#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ + +#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ +#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ +#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ +#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ +#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */ +#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */ +#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */ +#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */ +#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */ +#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */ +#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */ +#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */ +#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */ +#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */ +#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ +#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ + +#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) + +#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ +#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ +#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ +#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ +#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ +#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ +#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ +#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job + slot n */ + +#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ +#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ + +#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ +#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ + +#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ +#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ +#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ +#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for + job slot n */ + +#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ + +#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ + +/* No JM-specific MMU control registers */ +/* No JM-specific MMU address space control registers */ + +/* JS_COMMAND register commands */ +#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ +#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ +#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ +#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ +#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ +#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ + +#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ + +/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ +#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) +#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) +#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) +#define JS_CONFIG_START_MMU (1u << 10) +#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) +#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION +#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) +#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) +#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) +#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) +#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) + +/* JS_XAFFINITY register values */ +#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) +#define JS_XAFFINITY_TILER_ENABLE (1u << 8) +#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) + +/* JS_STATUS register values */ + +/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. + * The values are separated to avoid dependency of userspace and kernel code. + */ + +/* Group of values representing the job status instead of a particular fault */ +#define JS_STATUS_NO_EXCEPTION_BASE 0x00 +#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ +#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ +#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */ + +/* General fault values */ +#define JS_STATUS_FAULT_BASE 0x40 +#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */ +#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */ +#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */ +#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */ +#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */ +#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */ + +/* Instruction or data faults */ +#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50 +#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */ +#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */ +#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */ +#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */ +#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */ +#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */ +#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */ +/* NOTE: No fault with 0x57 code defined in spec. */ +#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */ +#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */ +#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */ + +/* Other faults */ +#define JS_STATUS_MEMORY_FAULT_BASE 0x60 +#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */ +#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */ + +/* JS_FEATURES register */ +#define JS_FEATURE_NULL_JOB (1u << 1) +#define JS_FEATURE_SET_VALUE_JOB (1u << 2) +#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3) +#define JS_FEATURE_COMPUTE_JOB (1u << 4) +#define JS_FEATURE_VERTEX_JOB (1u << 5) +#define JS_FEATURE_GEOMETRY_JOB (1u << 6) +#define JS_FEATURE_TILER_JOB (1u << 7) +#define JS_FEATURE_FUSED_JOB (1u << 8) +#define JS_FEATURE_FRAGMENT_JOB (1u << 9) + +/* JM_CONFIG register */ +#define JM_TIMESTAMP_OVERRIDE (1ul << 0) +#define JM_CLOCK_GATE_OVERRIDE (1ul << 1) +#define JM_JOB_THROTTLE_ENABLE (1ul << 2) +#define JM_JOB_THROTTLE_LIMIT_SHIFT (3) +#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F) +#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2) + +/* GPU_COMMAND values */ +#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ +#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */ +#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */ +#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */ +#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */ +#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ +#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ +#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ + +#endif /* _KBASE_GPU_REGMAP_JM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu.c new file mode 100644 index 000000000000..3128db4cabfc --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu.c @@ -0,0 +1,41 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include + +const char *kbase_gpu_access_type_name(u32 fault_status) +{ + switch (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status)) { + case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: + return "ATOMIC"; + case AS_FAULTSTATUS_ACCESS_TYPE_READ: + return "READ"; + case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: + return "WRITE"; + case AS_FAULTSTATUS_ACCESS_TYPE_EX: + return "EXECUTE"; + default: + WARN_ON(1); + return NULL; + } +} diff --git a/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu.h b/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu.h new file mode 100644 index 000000000000..9516e56eda01 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu.h @@ -0,0 +1,31 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_GPU_H_ +#define _KBASE_GPU_H_ + +#include "mali_kbase_gpu_regmap.h" +#include "mali_kbase_gpu_fault.h" +#include "mali_kbase_gpu_coherency.h" +#include "mali_kbase_gpu_id.h" + +#endif /* _KBASE_GPU_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_coherency.h b/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_coherency.h new file mode 100644 index 000000000000..bb2b1613aa47 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_coherency.h @@ -0,0 +1,31 @@ +/* + * + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_GPU_COHERENCY_H_ +#define _KBASE_GPU_COHERENCY_H_ + +#define COHERENCY_ACE_LITE 0 +#define COHERENCY_ACE 1 +#define COHERENCY_NONE 31 +#define COHERENCY_FEATURE_BIT(x) (1 << (x)) + +#endif /* _KBASE_GPU_COHERENCY_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_fault.h b/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_fault.h new file mode 100644 index 000000000000..b59b9d15f945 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_fault.h @@ -0,0 +1,59 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_GPU_FAULT_H_ +#define _KBASE_GPU_FAULT_H_ + +/** Returns the name associated with a Mali exception code + * + * @exception_code: exception code + * + * This function is called from the interrupt handler when a GPU fault occurs. + * + * Return: name associated with the exception code + */ +const char *kbase_gpu_exception_name(u32 exception_code); + +/** Returns the name associated with a Mali fatal exception code + * + * @fatal_exception_code: fatal exception code + * + * This function is called from the interrupt handler when a GPU fatal + * exception occurs. + * + * Return: name associated with the fatal exception code + */ +const char *kbase_gpu_fatal_exception_name(u32 const fatal_exception_code); + +/** + * kbase_gpu_access_type_name - Convert MMU_AS_CONTROL.FAULTSTATUS.ACCESS_TYPE + * into string. + * @fault_status: value of FAULTSTATUS register. + * + * After MMU fault, this function can be used to get readable information about + * access_type of the MMU fault. + * + * Return: String of the access type. + */ +const char *kbase_gpu_access_type_name(u32 fault_status); + +#endif /* _KBASE_GPU_FAULT_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_id.h b/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_id.h new file mode 100644 index 000000000000..9f3d6b1d5b51 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_id.h @@ -0,0 +1,118 @@ +/* + * + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_GPU_ID_H_ +#define _KBASE_GPU_ID_H_ + +/* GPU_ID register */ +#define GPU_ID_VERSION_STATUS_SHIFT 0 +#define GPU_ID_VERSION_MINOR_SHIFT 4 +#define GPU_ID_VERSION_MAJOR_SHIFT 12 +#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16 +#define GPU_ID_VERSION_STATUS (0xFu << GPU_ID_VERSION_STATUS_SHIFT) +#define GPU_ID_VERSION_MINOR (0xFFu << GPU_ID_VERSION_MINOR_SHIFT) +#define GPU_ID_VERSION_MAJOR (0xFu << GPU_ID_VERSION_MAJOR_SHIFT) +#define GPU_ID_VERSION_PRODUCT_ID (0xFFFFu << GPU_ID_VERSION_PRODUCT_ID_SHIFT) + +#define GPU_ID2_VERSION_STATUS_SHIFT 0 +#define GPU_ID2_VERSION_MINOR_SHIFT 4 +#define GPU_ID2_VERSION_MAJOR_SHIFT 12 +#define GPU_ID2_PRODUCT_MAJOR_SHIFT 16 +#define GPU_ID2_ARCH_REV_SHIFT 20 +#define GPU_ID2_ARCH_MINOR_SHIFT 24 +#define GPU_ID2_ARCH_MAJOR_SHIFT 28 +#define GPU_ID2_VERSION_STATUS (0xFu << GPU_ID2_VERSION_STATUS_SHIFT) +#define GPU_ID2_VERSION_MINOR (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT) +#define GPU_ID2_VERSION_MAJOR (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT) +#define GPU_ID2_PRODUCT_MAJOR (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT) +#define GPU_ID2_ARCH_REV (0xFu << GPU_ID2_ARCH_REV_SHIFT) +#define GPU_ID2_ARCH_MINOR (0xFu << GPU_ID2_ARCH_MINOR_SHIFT) +#define GPU_ID2_ARCH_MAJOR (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT) +#define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR) +#define GPU_ID2_VERSION (GPU_ID2_VERSION_MAJOR | \ + GPU_ID2_VERSION_MINOR | \ + GPU_ID2_VERSION_STATUS) + +/* Helper macro to create a partial GPU_ID (new format) that defines + a product ignoring its version. */ +#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \ + ((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + (((u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ + (((u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ + (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + +/* Helper macro to create a partial GPU_ID (new format) that specifies the + revision (major, minor, status) of a product */ +#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \ + ((((u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ + (((u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ + (((u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) + +/* Helper macro to create a complete GPU_ID (new format) */ +#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ + version_major, version_minor, version_status) \ + (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \ + product_major) | \ + GPU_ID2_VERSION_MAKE(version_major, version_minor, \ + version_status)) + +/* Helper macro to create a partial GPU_ID (new format) that identifies + a particular GPU model by its arch_major and product_major. */ +#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \ + ((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + +/* Strip off the non-relevant bits from a product_id value and make it suitable + for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU + model. */ +#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \ + ((((u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ + GPU_ID2_PRODUCT_MODEL) + +#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) +#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6, 1) +#define GPU_ID2_PRODUCT_TSIX GPU_ID2_MODEL_MAKE(7, 0) +#define GPU_ID2_PRODUCT_TDVX GPU_ID2_MODEL_MAKE(7, 3) +#define GPU_ID2_PRODUCT_TNOX GPU_ID2_MODEL_MAKE(7, 1) +#define GPU_ID2_PRODUCT_TGOX GPU_ID2_MODEL_MAKE(7, 2) +#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(9, 0) +#define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1) +#define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2) +#define GPU_ID2_PRODUCT_LBEX GPU_ID2_MODEL_MAKE(9, 4) +#define GPU_ID2_PRODUCT_TDUX GPU_ID2_MODEL_MAKE(10, 1) +#define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2) +#define GPU_ID2_PRODUCT_TGRX GPU_ID2_MODEL_MAKE(10, 3) +#define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4) +#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7) +#define GPU_ID2_PRODUCT_TTUX GPU_ID2_MODEL_MAKE(11, 2) +#define GPU_ID2_PRODUCT_LTUX GPU_ID2_MODEL_MAKE(11, 3) +#define GPU_ID2_PRODUCT_TE2X GPU_ID2_MODEL_MAKE(11, 1) + +/* Helper macro to create a GPU_ID assuming valid values for id, major, + minor, status */ +#define GPU_ID_MAKE(id, major, minor, status) \ + ((((u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ + (((u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ + (((u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ + (((u32)status) << GPU_ID_VERSION_STATUS_SHIFT)) + +#endif /* _KBASE_GPU_ID_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_regmap.h b/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_regmap.h new file mode 100644 index 000000000000..759f30d21b8d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/gpu/mali_kbase_gpu_regmap.h @@ -0,0 +1,437 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_GPU_REGMAP_H_ +#define _KBASE_GPU_REGMAP_H_ + +#include "mali_kbase_gpu_coherency.h" +#include "mali_kbase_gpu_id.h" +#include "backend/mali_kbase_gpu_regmap_jm.h" + +/* Begin Register Offsets */ +/* GPU control registers */ + +#define GPU_CONTROL_BASE 0x0000 +#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) +#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ +#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ +#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ +#define MEM_FEATURES 0x010 /* (RO) Memory system features */ +#define MMU_FEATURES 0x014 /* (RO) MMU features */ +#define AS_PRESENT 0x018 /* (RO) Address space slots present */ +#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ +#define GPU_IRQ_CLEAR 0x024 /* (WO) */ +#define GPU_IRQ_MASK 0x028 /* (RW) */ +#define GPU_IRQ_STATUS 0x02C /* (RO) */ + +#define GPU_COMMAND 0x030 /* (WO) */ +#define GPU_STATUS 0x034 /* (RO) */ + +#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ + +#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ +#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ +#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ + +#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ + +#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ +#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core + * supergroup are l2 coherent + */ + +#define PWR_KEY 0x050 /* (WO) Power manager key register */ +#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ +#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ + +#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ +#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ +#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ +#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ + +#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ +#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ +#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ +#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ +#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */ + +#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ +#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ +#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ +#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ + +#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) + +#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ +#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ + +#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ +#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ + +#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ +#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ + +#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ +#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ + +#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ +#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ + +#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ +#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ + +#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ +#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ + +#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ +#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ + +#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ +#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ + +#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ +#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ + +#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ +#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ + +#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ +#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ + +#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ +#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ + +#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ +#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ + +#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ +#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ + +#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ +#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ + +#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ +#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ + +#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ +#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ + +#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ +#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ + +#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ +#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ + +#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ +#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ + +#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ +#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ + +#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ +#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ + +#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ +#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ + +#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ +#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ +#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ + +/* Job control registers */ + +#define JOB_CONTROL_BASE 0x1000 + +#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) + +#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ +#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ +#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ +#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ + +/* MMU control registers */ + +#define MEMORY_MANAGEMENT_BASE 0x2000 +#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) + +#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ +#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ +#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ +#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ + +#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ +#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ +#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ +#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ +#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ +#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ +#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ +#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ +#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ +#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ +#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ +#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ +#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ +#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ +#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ +#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ + +/* MMU address space control registers */ + +#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) + +#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ +#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ +#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ +#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ +#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ +#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ +#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ +#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ +#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ +#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ +#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ + +/* (RW) Translation table configuration for address space n, low word */ +#define AS_TRANSCFG_LO 0x30 +/* (RW) Translation table configuration for address space n, high word */ +#define AS_TRANSCFG_HI 0x34 +/* (RO) Secondary fault address for address space n, low word */ +#define AS_FAULTEXTRA_LO 0x38 +/* (RO) Secondary fault address for address space n, high word */ +#define AS_FAULTEXTRA_HI 0x3C + +/* End Register Offsets */ + +/* IRQ flags */ +#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ +#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ +#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ +#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ +#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ + +#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ +#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ + +/* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. + */ +#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ + | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) + +#ifdef CONFIG_MALI_DEBUG +#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE) +#else /* CONFIG_MALI_DEBUG */ +#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON) +#endif /* CONFIG_MALI_DEBUG */ + +/* + * MMU_IRQ_RAWSTAT register values. Values are valid also for + * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. + */ + +#define MMU_PAGE_FAULT_FLAGS 16 + +/* Macros returning a bitmask to retrieve page fault or bus error flags from + * MMU registers */ +#define MMU_PAGE_FAULT(n) (1UL << (n)) +#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) + +/* + * Begin LPAE MMU TRANSTAB register values + */ +#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK 0xfffff000 +#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED (0u << 0) +#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY (1u << 1) +#define AS_TRANSTAB_LPAE_ADRMODE_TABLE (3u << 0) +#define AS_TRANSTAB_LPAE_READ_INNER (1u << 2) +#define AS_TRANSTAB_LPAE_SHARE_OUTER (1u << 4) + +#define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003 + +/* + * Begin AARCH64 MMU TRANSTAB register values + */ +#define MMU_HW_OUTA_BITS 40 +#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) + +/* + * Begin MMU STATUS register values + */ +#define AS_STATUS_AS_ACTIVE 0x01 + +#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) + +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) + +#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 +#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) +#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) + +#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0) +#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1) +#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2) +#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3) + +#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16 +#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT) +#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT) + +/* + * Begin MMU TRANSCFG register values + */ +#define AS_TRANSCFG_ADRMODE_LEGACY 0 +#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 +#define AS_TRANSCFG_ADRMODE_IDENTITY 2 +#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 +#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 + +#define AS_TRANSCFG_ADRMODE_MASK 0xF + +/* + * Begin TRANSCFG register values + */ +#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) +#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) +#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) + +#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) +#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) +#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) +#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) + +/* + * Begin Command Values + */ + +/* AS_COMMAND register commands */ +#define AS_COMMAND_NOP 0x00 /* NOP Operation */ +#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ +#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ +#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ +#define AS_COMMAND_FLUSH 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs + (deprecated - only for use with T60x) */ +#define AS_COMMAND_FLUSH_PT 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs */ +#define AS_COMMAND_FLUSH_MEM 0x05 /* Wait for memory accesses to complete, flush all the L1s cache then + flush all L2 caches then issue a flush region command to all MMUs */ + +/* GPU_STATUS values */ +#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ +#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ + +/* PRFCNT_CONFIG register values */ +#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ +#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ +#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ + +/* The performance counters are disabled. */ +#define PRFCNT_CONFIG_MODE_OFF 0 +/* The performance counters are enabled, but are only written out when a + * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. + */ +#define PRFCNT_CONFIG_MODE_MANUAL 1 +/* The performance counters are enabled, and are written out each time a tile + * finishes rendering. + */ +#define PRFCNT_CONFIG_MODE_TILE 2 + +/* AS_MEMATTR values from MMU_MEMATTR_STAGE1: */ +/* Use GPU implementation-defined caching policy. */ +#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull +/* The attribute set to force all resources to be cached. */ +#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full +/* Inner write-alloc cache setup, no outer caching */ +#define AS_MEMATTR_WRITE_ALLOC 0x8Dull + +/* Use GPU implementation-defined caching policy. */ +#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull +/* The attribute set to force all resources to be cached. */ +#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full +/* Inner write-alloc cache setup, no outer caching */ +#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull +/* Set to implementation defined, outer caching */ +#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull +/* Set to write back memory, outer caching */ +#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull +/* There is no LPAE support for non-cacheable, since the memory type is always + * write-back. + * Marking this setting as reserved for LPAE + */ +#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED + +/* L2_MMU_CONFIG register */ +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) + +/* End L2_MMU_CONFIG register */ + +/* THREAD_* registers */ + +/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ +#define IMPLEMENTATION_UNSPECIFIED 0 +#define IMPLEMENTATION_SILICON 1 +#define IMPLEMENTATION_FPGA 2 +#define IMPLEMENTATION_MODEL 3 + +/* Default values when registers are not supported by the implemented hardware */ +#define THREAD_MT_DEFAULT 256 +#define THREAD_MWS_DEFAULT 256 +#define THREAD_MBS_DEFAULT 256 +#define THREAD_MR_DEFAULT 1024 +#define THREAD_MTQ_DEFAULT 4 +#define THREAD_MTGS_DEFAULT 10 + +/* End THREAD_* registers */ + +/* SHADER_CONFIG register */ +#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) +#define SC_TLS_HASH_ENABLE (1ul << 17) +#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) +#define SC_VAR_ALGORITHM (1ul << 29) +/* End SHADER_CONFIG register */ + +/* TILER_CONFIG register */ +#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) +/* End TILER_CONFIG register */ + +/* L2_CONFIG register */ +#define L2_CONFIG_SIZE_SHIFT 16 +#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) +#define L2_CONFIG_HASH_SHIFT 24 +#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT) +/* End L2_CONFIG register */ + +/* IDVS_GROUP register */ +#define IDVS_GROUP_SIZE_SHIFT (16) +#define IDVS_GROUP_MAX_SIZE (0x3F) + +#endif /* _KBASE_GPU_REGMAP_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/ipa/Kbuild b/drivers/gpu/arm/b_r26p0/ipa/Kbuild new file mode 100644 index 000000000000..3d9cf8006b80 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/ipa/Kbuild @@ -0,0 +1,28 @@ +# +# (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +mali_kbase-y += \ + ipa/mali_kbase_ipa_simple.o \ + ipa/mali_kbase_ipa.o \ + ipa/mali_kbase_ipa_vinstr_g7x.o \ + ipa/mali_kbase_ipa_vinstr_common.o + +mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o \ No newline at end of file diff --git a/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa.c new file mode 100644 index 000000000000..2851ae0ff3c6 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa.c @@ -0,0 +1,673 @@ +/* + * + * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ +#include +#include +#include +#include "mali_kbase.h" +#include "mali_kbase_ipa.h" +#include "mali_kbase_ipa_debugfs.h" +#include "mali_kbase_ipa_simple.h" +#include "backend/gpu/mali_kbase_pm_internal.h" + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) +#include +#else +#include +#define dev_pm_opp_find_freq_exact opp_find_freq_exact +#define dev_pm_opp_get_voltage opp_get_voltage +#define dev_pm_opp opp +#endif + +#define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model" + +static const struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = { + &kbase_simple_ipa_model_ops, + &kbase_g71_ipa_model_ops, + &kbase_g72_ipa_model_ops, + &kbase_g76_ipa_model_ops, + &kbase_g52_ipa_model_ops, + &kbase_g52_r1_ipa_model_ops, + &kbase_g51_ipa_model_ops, + &kbase_g77_ipa_model_ops, + &kbase_tnax_ipa_model_ops, + &kbase_tbex_ipa_model_ops +}; + +int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) +{ + int err = 0; + + lockdep_assert_held(&model->kbdev->ipa.lock); + + if (model->ops->recalculate) { + err = model->ops->recalculate(model); + if (err) { + dev_err(model->kbdev->dev, + "recalculation of power model %s returned error %d\n", + model->ops->name, err); + } + } + + return err; +} + +const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, + const char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) { + const struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i]; + + if (!strcmp(ops->name, name)) + return ops; + } + + dev_err(kbdev->dev, "power model \'%s\' not found\n", name); + + return NULL; +} +KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find); + +const char *kbase_ipa_model_name_from_id(u32 gpu_id) +{ + const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> + GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { + case GPU_ID2_PRODUCT_TMIX: + return "mali-g71-power-model"; + case GPU_ID2_PRODUCT_THEX: + return "mali-g72-power-model"; + case GPU_ID2_PRODUCT_TNOX: + return "mali-g76-power-model"; + case GPU_ID2_PRODUCT_TSIX: + return "mali-g51-power-model"; + case GPU_ID2_PRODUCT_TGOX: + if ((gpu_id & GPU_ID2_VERSION_MAJOR) == + (0 << GPU_ID2_VERSION_MAJOR_SHIFT)) + /* g52 aliased to g76 power-model's ops */ + return "mali-g52-power-model"; + else + return "mali-g52_r1-power-model"; + case GPU_ID2_PRODUCT_TNAX: + return "mali-tnax-power-model"; + case GPU_ID2_PRODUCT_TTRX: + return "mali-g77-power-model"; + case GPU_ID2_PRODUCT_TBEX: + return "mali-tbex-power-model"; + default: + return KBASE_IPA_FALLBACK_MODEL_NAME; + } +} +KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id); + +static struct device_node *get_model_dt_node(struct kbase_ipa_model *model, + bool dt_required) +{ + struct device_node *model_dt_node; + char compat_string[64]; + + snprintf(compat_string, sizeof(compat_string), "arm,%s", + model->ops->name); + + /* of_find_compatible_node() will call of_node_put() on the root node, + * so take a reference on it first. + */ + of_node_get(model->kbdev->dev->of_node); + model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node, + NULL, compat_string); + if (!model_dt_node && !model->missing_dt_node_warning) { + if (dt_required) + dev_warn(model->kbdev->dev, + "Couldn't find power_model DT node matching \'%s\'\n", + compat_string); + model->missing_dt_node_warning = true; + } + + return model_dt_node; +} + +int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, + const char *name, s32 *addr, + size_t num_elems, bool dt_required) +{ + int err, i; + struct device_node *model_dt_node = get_model_dt_node(model, + dt_required); + char *origin; + + err = of_property_read_u32_array(model_dt_node, name, addr, num_elems); + /* We're done with model_dt_node now, so drop the reference taken in + * get_model_dt_node()/of_find_compatible_node(). + */ + of_node_put(model_dt_node); + + if (err && dt_required) { + memset(addr, 0, sizeof(s32) * num_elems); + dev_warn(model->kbdev->dev, + "Error %d, no DT entry: %s.%s = %zu*[0]\n", + err, model->ops->name, name, num_elems); + origin = "zero"; + } else if (err && !dt_required) { + origin = "default"; + } else /* !err */ { + origin = "DT"; + } + + /* Create a unique debugfs entry for each element */ + for (i = 0; i < num_elems; ++i) { + char elem_name[32]; + + if (num_elems == 1) + snprintf(elem_name, sizeof(elem_name), "%s", name); + else + snprintf(elem_name, sizeof(elem_name), "%s.%d", + name, i); + + dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n", + model->ops->name, elem_name, addr[i], origin); + + err = kbase_ipa_model_param_add(model, elem_name, + &addr[i], sizeof(s32), + PARAM_TYPE_S32); + if (err) + goto exit; + } +exit: + return err; +} + +int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, + const char *name, char *addr, + size_t size, bool dt_required) +{ + int err; + struct device_node *model_dt_node = get_model_dt_node(model, + dt_required); + const char *string_prop_value; + char *origin; + + err = of_property_read_string(model_dt_node, name, + &string_prop_value); + + /* We're done with model_dt_node now, so drop the reference taken in + * get_model_dt_node()/of_find_compatible_node(). + */ + of_node_put(model_dt_node); + + if (err && dt_required) { + strncpy(addr, "", size - 1); + dev_warn(model->kbdev->dev, + "Error %d, no DT entry: %s.%s = \'%s\'\n", + err, model->ops->name, name, addr); + err = 0; + origin = "zero"; + } else if (err && !dt_required) { + origin = "default"; + } else /* !err */ { + strncpy(addr, string_prop_value, size - 1); + origin = "DT"; + } + + addr[size - 1] = '\0'; + + dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n", + model->ops->name, name, string_prop_value, origin); + + err = kbase_ipa_model_param_add(model, name, addr, size, + PARAM_TYPE_STRING); + return err; +} + +void kbase_ipa_term_model(struct kbase_ipa_model *model) +{ + if (!model) + return; + + lockdep_assert_held(&model->kbdev->ipa.lock); + + if (model->ops->term) + model->ops->term(model); + + kbase_ipa_model_param_free_all(model); + + kfree(model); +} +KBASE_EXPORT_TEST_API(kbase_ipa_term_model); + +struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, + const struct kbase_ipa_model_ops *ops) +{ + struct kbase_ipa_model *model; + int err; + + lockdep_assert_held(&kbdev->ipa.lock); + + if (!ops || !ops->name) + return NULL; + + model = kzalloc(sizeof(struct kbase_ipa_model), GFP_KERNEL); + if (!model) + return NULL; + + model->kbdev = kbdev; + model->ops = ops; + INIT_LIST_HEAD(&model->params); + + err = model->ops->init(model); + if (err) { + dev_err(kbdev->dev, + "init of power model \'%s\' returned error %d\n", + ops->name, err); + kfree(model); + return NULL; + } + + err = kbase_ipa_model_recalculate(model); + if (err) { + kbase_ipa_term_model(model); + return NULL; + } + + return model; +} +KBASE_EXPORT_TEST_API(kbase_ipa_init_model); + +static void kbase_ipa_term_locked(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->ipa.lock); + + /* Clean up the models */ + if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model) + kbase_ipa_term_model(kbdev->ipa.configured_model); + kbase_ipa_term_model(kbdev->ipa.fallback_model); + + kbdev->ipa.configured_model = NULL; + kbdev->ipa.fallback_model = NULL; +} + +int kbase_ipa_init(struct kbase_device *kbdev) +{ + + const char *model_name; + const struct kbase_ipa_model_ops *ops; + struct kbase_ipa_model *default_model = NULL; + int err; + + mutex_init(&kbdev->ipa.lock); + /* + * Lock during init to avoid warnings from lockdep_assert_held (there + * shouldn't be any concurrent access yet). + */ + mutex_lock(&kbdev->ipa.lock); + + /* The simple IPA model must *always* be present.*/ + ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME); + + default_model = kbase_ipa_init_model(kbdev, ops); + if (!default_model) { + err = -EINVAL; + goto end; + } + + kbdev->ipa.fallback_model = default_model; + err = of_property_read_string(kbdev->dev->of_node, + "ipa-model", + &model_name); + if (err) { + /* Attempt to load a match from GPU-ID */ + u32 gpu_id; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + model_name = kbase_ipa_model_name_from_id(gpu_id); + dev_dbg(kbdev->dev, + "Inferring model from GPU ID 0x%x: \'%s\'\n", + gpu_id, model_name); + err = 0; + } else { + dev_dbg(kbdev->dev, + "Using ipa-model parameter from DT: \'%s\'\n", + model_name); + } + + if (strcmp(KBASE_IPA_FALLBACK_MODEL_NAME, model_name) != 0) { + ops = kbase_ipa_model_ops_find(kbdev, model_name); + kbdev->ipa.configured_model = kbase_ipa_init_model(kbdev, ops); + if (!kbdev->ipa.configured_model) { + dev_warn(kbdev->dev, + "Failed to initialize ipa-model: \'%s\'\n" + "Falling back on default model\n", + model_name); + kbdev->ipa.configured_model = default_model; + } + } else { + kbdev->ipa.configured_model = default_model; + } + +end: + if (err) + kbase_ipa_term_locked(kbdev); + else + dev_info(kbdev->dev, + "Using configured power model %s, and fallback %s\n", + kbdev->ipa.configured_model->ops->name, + kbdev->ipa.fallback_model->ops->name); + + mutex_unlock(&kbdev->ipa.lock); + return err; +} +KBASE_EXPORT_TEST_API(kbase_ipa_init); + +void kbase_ipa_term(struct kbase_device *kbdev) +{ + mutex_lock(&kbdev->ipa.lock); + kbase_ipa_term_locked(kbdev); + mutex_unlock(&kbdev->ipa.lock); + + mutex_destroy(&kbdev->ipa.lock); +} +KBASE_EXPORT_TEST_API(kbase_ipa_term); + +/** + * kbase_scale_dynamic_power() - Scale a dynamic power coefficient to an OPP + * @c: Dynamic model coefficient, in pW/(Hz V^2). Should be in range + * 0 < c < 2^26 to prevent overflow. + * @freq: Frequency, in Hz. Range: 2^23 < freq < 2^30 (~8MHz to ~1GHz) + * @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V) + * + * Keep a record of the approximate range of each value at every stage of the + * calculation, to ensure we don't overflow. This makes heavy use of the + * approximations 1000 = 2^10 and 1000000 = 2^20, but does the actual + * calculations in decimal for increased accuracy. + * + * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W) + */ +static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq, + const u32 voltage) +{ + /* Range: 2^8 < v2 < 2^16 m(V^2) */ + const u32 v2 = (voltage * voltage) / 1000; + + /* Range: 2^3 < f_MHz < 2^10 MHz */ + const u32 f_MHz = freq / 1000000; + + /* Range: 2^11 < v2f_big < 2^26 kHz V^2 */ + const u32 v2f_big = v2 * f_MHz; + + /* Range: 2^1 < v2f < 2^16 MHz V^2 */ + const u32 v2f = v2f_big / 1000; + + /* Range (working backwards from next line): 0 < v2fc < 2^23 uW. + * Must be < 2^42 to avoid overflowing the return value. */ + const u64 v2fc = (u64) c * (u64) v2f; + + /* Range: 0 < v2fc / 1000 < 2^13 mW */ + return div_u64(v2fc, 1000); +} + +/** + * kbase_scale_static_power() - Scale a static power coefficient to an OPP + * @c: Static model coefficient, in uW/V^3. Should be in range + * 0 < c < 2^32 to prevent overflow. + * @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V) + * + * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W) + */ +u32 kbase_scale_static_power(const u32 c, const u32 voltage) +{ + /* Range: 2^8 < v2 < 2^16 m(V^2) */ + const u32 v2 = (voltage * voltage) / 1000; + + /* Range: 2^17 < v3_big < 2^29 m(V^2) mV */ + const u32 v3_big = v2 * voltage; + + /* Range: 2^7 < v3 < 2^19 m(V^3) */ + const u32 v3 = v3_big / 1000; + + /* + * Range (working backwards from next line): 0 < v3c_big < 2^33 nW. + * The result should be < 2^52 to avoid overflowing the return value. + */ + const u64 v3c_big = (u64) c * (u64) v3; + + /* Range: 0 < v3c_big / 1000000 < 2^13 mW */ + return div_u64(v3c_big, 1000000); +} + +void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Record the event of GPU entering protected mode. */ + kbdev->ipa_protection_mode_switched = true; +} + +static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev) +{ + struct kbase_ipa_model *model; + unsigned long flags; + + lockdep_assert_held(&kbdev->ipa.lock); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (kbdev->ipa_protection_mode_switched || + kbdev->ipa.force_fallback_model) + model = kbdev->ipa.fallback_model; + else + model = kbdev->ipa.configured_model; + + /* + * Having taken cognizance of the fact that whether GPU earlier + * protected mode or not, the event can be now reset (if GPU is not + * currently in protected mode) so that configured model is used + * for the next sample. + */ + if (!kbdev->protected_mode) + kbdev->ipa_protection_mode_switched = false; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return model; +} + +static u32 get_static_power_locked(struct kbase_device *kbdev, + struct kbase_ipa_model *model, + unsigned long voltage) +{ + u32 power = 0; + int err; + u32 power_coeff; + + lockdep_assert_held(&model->kbdev->ipa.lock); + + if (!model->ops->get_static_coeff) + model = kbdev->ipa.fallback_model; + + if (model->ops->get_static_coeff) { + err = model->ops->get_static_coeff(model, &power_coeff); + if (!err) + power = kbase_scale_static_power(power_coeff, + (u32) voltage); + } + + return power; +} + +#if defined(CONFIG_MALI_PWRSOFT_765) || \ + LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) +static unsigned long kbase_get_static_power(struct devfreq *df, + unsigned long voltage) +#else +static unsigned long kbase_get_static_power(unsigned long voltage) +#endif +{ + struct kbase_ipa_model *model; + u32 power = 0; +#if defined(CONFIG_MALI_PWRSOFT_765) || \ + LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + struct kbase_device *kbdev = dev_get_drvdata(&df->dev); +#else + struct kbase_device *kbdev = kbase_find_device(-1); +#endif + + if (!kbdev) + return 0ul; + + mutex_lock(&kbdev->ipa.lock); + + model = get_current_model(kbdev); + power = get_static_power_locked(kbdev, model, voltage); + + mutex_unlock(&kbdev->ipa.lock); + +#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ + LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) + kbase_release_device(kbdev); +#endif + + return power; +} + +#if defined(CONFIG_MALI_PWRSOFT_765) || \ + LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) +static unsigned long kbase_get_dynamic_power(struct devfreq *df, + unsigned long freq, + unsigned long voltage) +#else +static unsigned long kbase_get_dynamic_power(unsigned long freq, + unsigned long voltage) +#endif +{ + struct kbase_ipa_model *model; + u32 power_coeff = 0, power = 0; + int err = 0; +#if defined(CONFIG_MALI_PWRSOFT_765) || \ + LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + struct kbase_device *kbdev = dev_get_drvdata(&df->dev); +#else + struct kbase_device *kbdev = kbase_find_device(-1); +#endif + + if (!kbdev) + return 0ul; + + mutex_lock(&kbdev->ipa.lock); + + model = kbdev->ipa.fallback_model; + + err = model->ops->get_dynamic_coeff(model, &power_coeff); + + if (!err) + power = kbase_scale_dynamic_power(power_coeff, freq, voltage); + else + dev_err_ratelimited(kbdev->dev, + "Model %s returned error code %d\n", + model->ops->name, err); + + mutex_unlock(&kbdev->ipa.lock); + +#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ + LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) + kbase_release_device(kbdev); +#endif + + return power; +} + +int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, + unsigned long freq, + unsigned long voltage) +{ + struct kbase_ipa_model *model; + u32 power_coeff = 0; + int err = 0; + struct kbasep_pm_metrics diff; + u64 total_time; + + lockdep_assert_held(&kbdev->ipa.lock); + + kbase_pm_get_dvfs_metrics(kbdev, &kbdev->ipa.last_metrics, &diff); + + model = get_current_model(kbdev); + + err = model->ops->get_dynamic_coeff(model, &power_coeff); + + /* If the counter model returns an error (e.g. switching back to + * protected mode and failing to read counters, or a counter sample + * with too few cycles), revert to the fallback model. + */ + if (err && model != kbdev->ipa.fallback_model) { + model = kbdev->ipa.fallback_model; + err = model->ops->get_dynamic_coeff(model, &power_coeff); + } + + if (err) + return err; + + *power = kbase_scale_dynamic_power(power_coeff, freq, voltage); + + /* time_busy / total_time cannot be >1, so assigning the 64-bit + * result of div_u64 to *power cannot overflow. + */ + total_time = diff.time_busy + (u64) diff.time_idle; + *power = div_u64(*power * (u64) diff.time_busy, + max(total_time, 1ull)); + + *power += get_static_power_locked(kbdev, model, voltage); + + return err; +} +KBASE_EXPORT_TEST_API(kbase_get_real_power_locked); + +int kbase_get_real_power(struct devfreq *df, u32 *power, + unsigned long freq, + unsigned long voltage) +{ + int ret; + struct kbase_device *kbdev = dev_get_drvdata(&df->dev); + + if (!kbdev) + return -ENODEV; + + mutex_lock(&kbdev->ipa.lock); + ret = kbase_get_real_power_locked(kbdev, power, freq, voltage); + mutex_unlock(&kbdev->ipa.lock); + + return ret; +} +KBASE_EXPORT_TEST_API(kbase_get_real_power); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +struct devfreq_cooling_ops kbase_ipa_power_model_ops = { +#else +struct devfreq_cooling_power kbase_ipa_power_model_ops = { +#endif + .get_static_power = &kbase_get_static_power, + .get_dynamic_power = &kbase_get_dynamic_power, +#if defined(CONFIG_MALI_PWRSOFT_765) || \ + LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + .get_real_power = &kbase_get_real_power, +#endif +}; +KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops); diff --git a/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa.h b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa.h new file mode 100644 index 000000000000..92aace911e6b --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa.h @@ -0,0 +1,253 @@ +/* + * + * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_IPA_H_ +#define _KBASE_IPA_H_ + +#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) + +struct devfreq; + +/** + * struct kbase_ipa_model - Object describing a particular IPA model. + * @kbdev: pointer to kbase device + * @model_data: opaque pointer to model specific data, accessed + * only by model specific methods. + * @ops: pointer to object containing model specific methods. + * @params: head of the list of debugfs params added for model + * @missing_dt_node_warning: flag to limit the matching power model DT not found + * warning to once. + */ +struct kbase_ipa_model { + struct kbase_device *kbdev; + void *model_data; + const struct kbase_ipa_model_ops *ops; + struct list_head params; + bool missing_dt_node_warning; +}; + +/** + * kbase_ipa_model_add_param_s32 - Add an integer model parameter + * @model: pointer to IPA model + * @name: name of corresponding debugfs entry + * @addr: address where the value is stored + * @num_elems: number of elements (1 if not an array) + * @dt_required: if false, a corresponding devicetree entry is not required, + * and the current value will be used. If true, a warning is + * output and the data is zeroed + * + * Return: 0 on success, or an error code + */ +int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, + const char *name, s32 *addr, + size_t num_elems, bool dt_required); + +/** + * kbase_ipa_model_add_param_string - Add a string model parameter + * @model: pointer to IPA model + * @name: name of corresponding debugfs entry + * @addr: address where the value is stored + * @size: size, in bytes, of the value storage (so the maximum string + * length is size - 1) + * @dt_required: if false, a corresponding devicetree entry is not required, + * and the current value will be used. If true, a warning is + * output and the data is zeroed + * + * Return: 0 on success, or an error code + */ +int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, + const char *name, char *addr, + size_t size, bool dt_required); + +struct kbase_ipa_model_ops { + char *name; + /* The init, recalculate and term ops on the default model are always + * called. However, all the other models are only invoked if the model + * is selected in the device tree. Otherwise they are never + * initialized. Additional resources can be acquired by models in + * init(), however they must be terminated in the term(). + */ + int (*init)(struct kbase_ipa_model *model); + /* Called immediately after init(), or when a parameter is changed, so + * that any coefficients derived from model parameters can be + * recalculated. */ + int (*recalculate)(struct kbase_ipa_model *model); + void (*term)(struct kbase_ipa_model *model); + /* + * get_dynamic_coeff() - calculate dynamic power coefficient + * @model: pointer to model + * @coeffp: pointer to return value location + * + * Calculate a dynamic power coefficient, with units pW/(Hz V^2), which + * is then scaled by the IPA framework according to the current OPP's + * frequency and voltage. + * + * Return: 0 on success, or an error code. + */ + int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp); + /* + * get_static_coeff() - calculate static power coefficient + * @model: pointer to model + * @coeffp: pointer to return value location + * + * Calculate a static power coefficient, with units uW/(V^3), which is + * scaled by the IPA framework according to the current OPP's voltage. + * + * Return: 0 on success, or an error code. + */ + int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp); +}; + +/** + * kbase_ipa_init - Initialize the IPA feature + * @kbdev: pointer to kbase device + * + * simple IPA power model is initialized as a fallback model and if that + * initialization fails then IPA is not used. + * The device tree is read for the name of ipa model to be used, by using the + * property string "ipa-model". If that ipa model is supported then it is + * initialized but if the initialization fails then simple power model is used. + * + * Return: 0 on success, negative -errno on error + */ +int kbase_ipa_init(struct kbase_device *kbdev); + +/** + * kbase_ipa_term - Terminate the IPA feature + * @kbdev: pointer to kbase device + * + * Both simple IPA power model and model retrieved from device tree are + * terminated. + */ +void kbase_ipa_term(struct kbase_device *kbdev); + +/** + * kbase_ipa_model_recalculate - Recalculate the model coefficients + * @model: pointer to the IPA model object, already initialized + * + * It shall be called immediately after the model has been initialized + * or when the model parameter has changed, so that any coefficients + * derived from parameters can be recalculated. + * Its a wrapper for the module specific recalculate() method. + * + * Return: 0 on success, negative -errno on error + */ +int kbase_ipa_model_recalculate(struct kbase_ipa_model *model); + +/** + * kbase_ipa_model_ops_find - Lookup an IPA model using its name + * @kbdev: pointer to kbase device + * @name: name of model to lookup + * + * Return: Pointer to model's 'ops' structure, or NULL if the lookup failed. + */ +const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, + const char *name); + +/** + * kbase_ipa_model_name_from_id - Find the best model for a given GPU ID + * @gpu_id: GPU ID of GPU the model will be used for + * + * Return: The name of the appropriate counter-based model, or the name of the + * fallback model if no counter model exists. + */ +const char *kbase_ipa_model_name_from_id(u32 gpu_id); + +/** + * kbase_ipa_init_model - Initilaize the particular IPA model + * @kbdev: pointer to kbase device + * @ops: pointer to object containing model specific methods. + * + * Initialize the model corresponding to the @ops pointer passed. + * The init() method specified in @ops would be called. + * + * Return: pointer to kbase_ipa_model on success, NULL on error + */ +struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, + const struct kbase_ipa_model_ops *ops); +/** + * kbase_ipa_term_model - Terminate the particular IPA model + * @model: pointer to the IPA model object, already initialized + * + * Terminate the model, using the term() method. + * Module specific parameters would be freed. + */ +void kbase_ipa_term_model(struct kbase_ipa_model *model); + +/** + * kbase_ipa_protection_mode_switch_event - Inform IPA of the GPU's entry into + * protected mode + * @kbdev: pointer to kbase device + * + * Makes IPA aware of the GPU switching to protected mode. + */ +void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev); + +extern const struct kbase_ipa_model_ops kbase_g71_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g72_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g76_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g52_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g52_r1_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g51_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g77_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_tnax_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_tbex_ipa_model_ops; + +/** + * kbase_get_real_power() - get the real power consumption of the GPU + * @df: dynamic voltage and frequency scaling information for the GPU. + * @power: where to store the power consumption, in mW. + * @freq: a frequency, in HZ. + * @voltage: a voltage, in mV. + * + * The returned value incorporates both static and dynamic power consumption. + * + * Return: 0 on success, or an error code. + */ +int kbase_get_real_power(struct devfreq *df, u32 *power, + unsigned long freq, + unsigned long voltage); + +#if MALI_UNIT_TEST +/* Called by kbase_get_real_power() to invoke the power models. + * Must be called with kbdev->ipa.lock held. + * This function is only exposed for use by unit tests. + */ +int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, + unsigned long freq, + unsigned long voltage); +#endif /* MALI_UNIT_TEST */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +extern struct devfreq_cooling_ops kbase_ipa_power_model_ops; +#else +extern struct devfreq_cooling_power kbase_ipa_power_model_ops; +#endif + +#else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ + +static inline void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev) +{ } + +#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ + +#endif diff --git a/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_debugfs.c b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_debugfs.c new file mode 100644 index 000000000000..30a3b7d1b3be --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_debugfs.c @@ -0,0 +1,322 @@ +/* + * + * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include + +#include "mali_kbase.h" +#include "mali_kbase_ipa.h" +#include "mali_kbase_ipa_debugfs.h" + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)) +#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE +#endif + +struct kbase_ipa_model_param { + char *name; + union { + void *voidp; + s32 *s32p; + char *str; + } addr; + size_t size; + enum kbase_ipa_model_param_type type; + struct kbase_ipa_model *model; + struct list_head link; +}; + +static int param_int_get(void *data, u64 *val) +{ + struct kbase_ipa_model_param *param = data; + + mutex_lock(¶m->model->kbdev->ipa.lock); + *(s64 *) val = *param->addr.s32p; + mutex_unlock(¶m->model->kbdev->ipa.lock); + + return 0; +} + +static int param_int_set(void *data, u64 val) +{ + struct kbase_ipa_model_param *param = data; + struct kbase_ipa_model *model = param->model; + s64 sval = (s64) val; + s32 old_val; + int err = 0; + + if (sval < S32_MIN || sval > S32_MAX) + return -ERANGE; + + mutex_lock(¶m->model->kbdev->ipa.lock); + old_val = *param->addr.s32p; + *param->addr.s32p = val; + err = kbase_ipa_model_recalculate(model); + if (err < 0) + *param->addr.s32p = old_val; + mutex_unlock(¶m->model->kbdev->ipa.lock); + + return err; +} + +DEFINE_DEBUGFS_ATTRIBUTE(fops_s32, param_int_get, param_int_set, "%lld\n"); + +static ssize_t param_string_get(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct kbase_ipa_model_param *param = file->private_data; + ssize_t ret; + size_t len; + + mutex_lock(¶m->model->kbdev->ipa.lock); + len = strnlen(param->addr.str, param->size - 1) + 1; + ret = simple_read_from_buffer(user_buf, count, ppos, + param->addr.str, len); + mutex_unlock(¶m->model->kbdev->ipa.lock); + + return ret; +} + +static ssize_t param_string_set(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct kbase_ipa_model_param *param = file->private_data; + struct kbase_ipa_model *model = param->model; + char *old_str = NULL; + ssize_t ret = count; + size_t buf_size; + int err; + + mutex_lock(&model->kbdev->ipa.lock); + + if (count > param->size) { + ret = -EINVAL; + goto end; + } + + old_str = kstrndup(param->addr.str, param->size, GFP_KERNEL); + if (!old_str) { + ret = -ENOMEM; + goto end; + } + + buf_size = min(param->size - 1, count); + if (copy_from_user(param->addr.str, user_buf, buf_size)) { + ret = -EFAULT; + goto end; + } + + param->addr.str[buf_size] = '\0'; + + err = kbase_ipa_model_recalculate(model); + if (err < 0) { + ret = err; + strlcpy(param->addr.str, old_str, param->size); + } + +end: + kfree(old_str); + mutex_unlock(&model->kbdev->ipa.lock); + + return ret; +} + +static const struct file_operations fops_string = { + .owner = THIS_MODULE, + .read = param_string_get, + .write = param_string_set, + .open = simple_open, + .llseek = default_llseek, +}; + +int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, + void *addr, size_t size, + enum kbase_ipa_model_param_type type) +{ + struct kbase_ipa_model_param *param; + + param = kzalloc(sizeof(*param), GFP_KERNEL); + + if (!param) + return -ENOMEM; + + /* 'name' is stack-allocated for array elements, so copy it into + * heap-allocated storage */ + param->name = kstrdup(name, GFP_KERNEL); + + if (!param->name) { + kfree(param); + return -ENOMEM; + } + + param->addr.voidp = addr; + param->size = size; + param->type = type; + param->model = model; + + list_add(¶m->link, &model->params); + + return 0; +} + +void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) +{ + struct kbase_ipa_model_param *param_p, *param_n; + + list_for_each_entry_safe(param_p, param_n, &model->params, link) { + list_del(¶m_p->link); + kfree(param_p->name); + kfree(param_p); + } +} + +static int force_fallback_model_get(void *data, u64 *val) +{ + struct kbase_device *kbdev = data; + + mutex_lock(&kbdev->ipa.lock); + *val = kbdev->ipa.force_fallback_model; + mutex_unlock(&kbdev->ipa.lock); + + return 0; +} + +static int force_fallback_model_set(void *data, u64 val) +{ + struct kbase_device *kbdev = data; + + mutex_lock(&kbdev->ipa.lock); + kbdev->ipa.force_fallback_model = (val ? true : false); + mutex_unlock(&kbdev->ipa.lock); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(force_fallback_model, + force_fallback_model_get, + force_fallback_model_set, + "%llu\n"); + +static int current_power_get(void *data, u64 *val) +{ + struct kbase_device *kbdev = data; + struct devfreq *df = kbdev->devfreq; + u32 power; + + kbase_pm_context_active(kbdev); + /* The current model assumes that there's no more than one voltage + * regulator currently available in the system. + */ + kbase_get_real_power(df, &power, + kbdev->current_nominal_freq, + (kbdev->current_voltages[0] / 1000)); + kbase_pm_context_idle(kbdev); + + *val = power; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(current_power, current_power_get, NULL, "%llu\n"); + +static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model) +{ + struct list_head *it; + struct dentry *dir; + + lockdep_assert_held(&model->kbdev->ipa.lock); + + dir = debugfs_create_dir(model->ops->name, + model->kbdev->mali_debugfs_directory); + + if (!dir) { + dev_err(model->kbdev->dev, + "Couldn't create mali debugfs %s directory", + model->ops->name); + return; + } + + list_for_each(it, &model->params) { + struct kbase_ipa_model_param *param = + list_entry(it, + struct kbase_ipa_model_param, + link); + const struct file_operations *fops = NULL; + + switch (param->type) { + case PARAM_TYPE_S32: + fops = &fops_s32; + break; + case PARAM_TYPE_STRING: + fops = &fops_string; + break; + } + + if (unlikely(!fops)) { + dev_err(model->kbdev->dev, + "Type not set for %s parameter %s\n", + model->ops->name, param->name); + } else { + debugfs_create_file(param->name, S_IRUGO | S_IWUSR, + dir, param, fops); + } + } +} + +void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, + const char *name, s32 val) +{ + struct kbase_ipa_model_param *param; + + mutex_lock(&model->kbdev->ipa.lock); + + list_for_each_entry(param, &model->params, link) { + if (!strcmp(param->name, name)) { + if (param->type == PARAM_TYPE_S32) { + *param->addr.s32p = val; + } else { + dev_err(model->kbdev->dev, + "Wrong type for %s parameter %s\n", + model->ops->name, param->name); + } + break; + } + } + + mutex_unlock(&model->kbdev->ipa.lock); +} +KBASE_EXPORT_TEST_API(kbase_ipa_model_param_set_s32); + +void kbase_ipa_debugfs_init(struct kbase_device *kbdev) +{ + mutex_lock(&kbdev->ipa.lock); + + if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model) + kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model); + kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model); + + debugfs_create_file("ipa_current_power", 0444, + kbdev->mali_debugfs_directory, kbdev, ¤t_power); + debugfs_create_file("ipa_force_fallback_model", 0644, + kbdev->mali_debugfs_directory, kbdev, &force_fallback_model); + + mutex_unlock(&kbdev->ipa.lock); +} diff --git a/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_debugfs.h b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_debugfs.h new file mode 100644 index 000000000000..a983d9c14216 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_debugfs.h @@ -0,0 +1,68 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_IPA_DEBUGFS_H_ +#define _KBASE_IPA_DEBUGFS_H_ + +enum kbase_ipa_model_param_type { + PARAM_TYPE_S32 = 1, + PARAM_TYPE_STRING, +}; + +#ifdef CONFIG_DEBUG_FS + +void kbase_ipa_debugfs_init(struct kbase_device *kbdev); +int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, + void *addr, size_t size, + enum kbase_ipa_model_param_type type); +void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model); + +/** + * kbase_ipa_model_param_set_s32 - Set an integer model parameter + * + * @model: pointer to IPA model + * @name: name of corresponding debugfs entry + * @val: new value of the parameter + * + * This function is only exposed for use by unit tests running in + * kernel space. Normally it is expected that parameter values will + * instead be set via debugfs. + */ +void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, + const char *name, s32 val); + +#else /* CONFIG_DEBUG_FS */ + +static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model, + const char *name, void *addr, + size_t size, + enum kbase_ipa_model_param_type type) +{ + return 0; +} + +static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) +{ } + +#endif /* CONFIG_DEBUG_FS */ + +#endif /* _KBASE_IPA_DEBUGFS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_simple.c new file mode 100644 index 000000000000..852559e54c70 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_simple.c @@ -0,0 +1,351 @@ +/* + * + * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#ifdef CONFIG_DEVFREQ_THERMAL +#include +#endif +#include +#include +#include + +#include "mali_kbase.h" +#include "mali_kbase_defs.h" +#include "mali_kbase_ipa_simple.h" +#include "mali_kbase_ipa_debugfs.h" + +#if MALI_UNIT_TEST + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) +static unsigned long dummy_temp; + +static int kbase_simple_power_model_get_dummy_temp( + struct thermal_zone_device *tz, + unsigned long *temp) +{ + *temp = READ_ONCE(dummy_temp); + return 0; +} + +#else +static int dummy_temp; + +static int kbase_simple_power_model_get_dummy_temp( + struct thermal_zone_device *tz, + int *temp) +{ + *temp = READ_ONCE(dummy_temp); + return 0; +} +#endif + +/* Intercept calls to the kernel function using a macro */ +#ifdef thermal_zone_get_temp +#undef thermal_zone_get_temp +#endif +#define thermal_zone_get_temp(tz, temp) \ + kbase_simple_power_model_get_dummy_temp(tz, temp) + +void kbase_simple_power_model_set_dummy_temp(int temp) +{ + WRITE_ONCE(dummy_temp, temp); +} +KBASE_EXPORT_TEST_API(kbase_simple_power_model_set_dummy_temp); + +#endif /* MALI_UNIT_TEST */ + +/* + * This model is primarily designed for the Juno platform. It may not be + * suitable for other platforms. The additional resources in this model + * should preferably be minimal, as this model is rarely used when a dynamic + * model is available. + */ + +/** + * struct kbase_ipa_model_simple_data - IPA context per device + * @dynamic_coefficient: dynamic coefficient of the model + * @static_coefficient: static coefficient of the model + * @ts: Thermal scaling coefficients of the model + * @tz_name: Thermal zone name + * @gpu_tz: thermal zone device + * @poll_temperature_thread: Handle for temperature polling thread + * @current_temperature: Most recent value of polled temperature + * @temperature_poll_interval_ms: How often temperature should be checked, in ms + */ + +struct kbase_ipa_model_simple_data { + u32 dynamic_coefficient; + u32 static_coefficient; + s32 ts[4]; + char tz_name[THERMAL_NAME_LENGTH]; + struct thermal_zone_device *gpu_tz; + struct task_struct *poll_temperature_thread; + int current_temperature; + int temperature_poll_interval_ms; +}; +#define FALLBACK_STATIC_TEMPERATURE 55000 + +/** + * calculate_temp_scaling_factor() - Calculate temperature scaling coefficient + * @ts: Signed coefficients, in order t^0 to t^3, with units Deg^-N + * @t: Temperature, in mDeg C. Range: -2^17 < t < 2^17 + * + * Scale the temperature according to a cubic polynomial whose coefficients are + * provided in the device tree. The result is used to scale the static power + * coefficient, where 1000000 means no change. + * + * Return: Temperature scaling factor. Range 0 <= ret <= 10,000,000. + */ +static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t) +{ + /* Range: -2^24 < t2 < 2^24 m(Deg^2) */ + const s64 t2 = div_s64((t * t), 1000); + + /* Range: -2^31 < t3 < 2^31 m(Deg^3) */ + const s64 t3 = div_s64((t * t2), 1000); + + /* + * Sum the parts. t^[1-3] are in m(Deg^N), but the coefficients are in + * Deg^-N, so we need to multiply the last coefficient by 1000. + * Range: -2^63 < res_big < 2^63 + */ + const s64 res_big = ts[3] * t3 /* +/- 2^62 */ + + ts[2] * t2 /* +/- 2^55 */ + + ts[1] * t /* +/- 2^48 */ + + ts[0] * (s64)1000; /* +/- 2^41 */ + + /* Range: -2^60 < res_unclamped < 2^60 */ + s64 res_unclamped = div_s64(res_big, 1000); + + /* Clamp to range of 0x to 10x the static power */ + return clamp(res_unclamped, (s64) 0, (s64) 10000000); +} + +/* We can't call thermal_zone_get_temp() directly in model_static_coeff(), + * because we don't know if tz->lock is held in the same thread. So poll it in + * a separate thread to get around this. */ +static int poll_temperature(void *data) +{ + struct kbase_ipa_model_simple_data *model_data = + (struct kbase_ipa_model_simple_data *) data; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) + unsigned long temp; +#else + int temp; +#endif + + while (!kthread_should_stop()) { + struct thermal_zone_device *tz = READ_ONCE(model_data->gpu_tz); + + if (tz) { + int ret; + + ret = thermal_zone_get_temp(tz, &temp); + if (ret) { + pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n", + ret); + temp = FALLBACK_STATIC_TEMPERATURE; + } + } else { + temp = FALLBACK_STATIC_TEMPERATURE; + } + + WRITE_ONCE(model_data->current_temperature, temp); + + msleep_interruptible(READ_ONCE(model_data->temperature_poll_interval_ms)); + } + + return 0; +} + +static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp) +{ + u32 temp_scaling_factor; + struct kbase_ipa_model_simple_data *model_data = + (struct kbase_ipa_model_simple_data *) model->model_data; + u64 coeff_big; + int temp; + + temp = READ_ONCE(model_data->current_temperature); + + /* Range: 0 <= temp_scaling_factor < 2^24 */ + temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts, + temp); + + /* + * Range: 0 <= coeff_big < 2^52 to avoid overflowing *coeffp. This + * means static_coefficient must be in range + * 0 <= static_coefficient < 2^28. + */ + coeff_big = (u64) model_data->static_coefficient * (u64) temp_scaling_factor; + *coeffp = div_u64(coeff_big, 1000000); + + return 0; +} + +static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) +{ + struct kbase_ipa_model_simple_data *model_data = + (struct kbase_ipa_model_simple_data *) model->model_data; + + *coeffp = model_data->dynamic_coefficient; + + return 0; +} + +static int add_params(struct kbase_ipa_model *model) +{ + int err = 0; + struct kbase_ipa_model_simple_data *model_data = + (struct kbase_ipa_model_simple_data *)model->model_data; + + err = kbase_ipa_model_add_param_s32(model, "static-coefficient", + &model_data->static_coefficient, + 1, true); + if (err) + goto end; + + err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient", + &model_data->dynamic_coefficient, + 1, true); + if (err) + goto end; + + err = kbase_ipa_model_add_param_s32(model, "ts", + model_data->ts, 4, true); + if (err) + goto end; + + err = kbase_ipa_model_add_param_string(model, "thermal-zone", + model_data->tz_name, + sizeof(model_data->tz_name), true); + if (err) + goto end; + + model_data->temperature_poll_interval_ms = 200; + err = kbase_ipa_model_add_param_s32(model, "temp-poll-interval-ms", + &model_data->temperature_poll_interval_ms, + 1, false); + +end: + return err; +} + +static int kbase_simple_power_model_init(struct kbase_ipa_model *model) +{ + int err; + struct kbase_ipa_model_simple_data *model_data; + + model_data = kzalloc(sizeof(struct kbase_ipa_model_simple_data), + GFP_KERNEL); + if (!model_data) + return -ENOMEM; + + model->model_data = (void *) model_data; + + model_data->current_temperature = FALLBACK_STATIC_TEMPERATURE; + model_data->poll_temperature_thread = kthread_run(poll_temperature, + (void *) model_data, + "mali-simple-power-model-temp-poll"); + if (IS_ERR(model_data->poll_temperature_thread)) { + err = PTR_ERR(model_data->poll_temperature_thread); + kfree(model_data); + return err; + } + + err = add_params(model); + if (err) { + kbase_ipa_model_param_free_all(model); + kthread_stop(model_data->poll_temperature_thread); + kfree(model_data); + } + + return err; +} + +static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model) +{ + struct kbase_ipa_model_simple_data *model_data = + (struct kbase_ipa_model_simple_data *)model->model_data; + struct thermal_zone_device *tz; + + lockdep_assert_held(&model->kbdev->ipa.lock); + + if (!strnlen(model_data->tz_name, sizeof(model_data->tz_name))) { + model_data->gpu_tz = NULL; + } else { + char tz_name[THERMAL_NAME_LENGTH]; + + strlcpy(tz_name, model_data->tz_name, sizeof(tz_name)); + + /* Release ipa.lock so that thermal_list_lock is not acquired + * with ipa.lock held, thereby avoid lock ordering violation + * lockdep warning. The warning comes as a chain of locks + * ipa.lock --> thermal_list_lock --> tz->lock gets formed + * on registering devfreq cooling device when probe method + * of mali platform driver is invoked. + */ + mutex_unlock(&model->kbdev->ipa.lock); + tz = thermal_zone_get_zone_by_name(tz_name); + mutex_lock(&model->kbdev->ipa.lock); + + if (IS_ERR_OR_NULL(tz)) { + pr_warn_ratelimited("Error %ld getting thermal zone \'%s\', not yet ready?\n", + PTR_ERR(tz), tz_name); + return -EPROBE_DEFER; + } + + /* Check if another thread raced against us & updated the + * thermal zone name string. Update the gpu_tz pointer only if + * the name string did not change whilst we retrieved the new + * thermal_zone_device pointer, otherwise model_data->tz_name & + * model_data->gpu_tz would become inconsistent with each other. + * The below check will succeed only for the thread which last + * updated the name string. + */ + if (strncmp(tz_name, model_data->tz_name, sizeof(tz_name)) == 0) + model_data->gpu_tz = tz; + } + + return 0; +} + +static void kbase_simple_power_model_term(struct kbase_ipa_model *model) +{ + struct kbase_ipa_model_simple_data *model_data = + (struct kbase_ipa_model_simple_data *)model->model_data; + + kthread_stop(model_data->poll_temperature_thread); + + kfree(model_data); +} + +struct kbase_ipa_model_ops kbase_simple_ipa_model_ops = { + .name = "mali-simple-power-model", + .init = &kbase_simple_power_model_init, + .recalculate = &kbase_simple_power_model_recalculate, + .term = &kbase_simple_power_model_term, + .get_dynamic_coeff = &model_dynamic_coeff, + .get_static_coeff = &model_static_coeff, +}; +KBASE_EXPORT_TEST_API(kbase_simple_ipa_model_ops); diff --git a/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_simple.h b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_simple.h new file mode 100644 index 000000000000..fed67d527c7c --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_simple.h @@ -0,0 +1,45 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_IPA_SIMPLE_H_ +#define _KBASE_IPA_SIMPLE_H_ + +#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) + +extern struct kbase_ipa_model_ops kbase_simple_ipa_model_ops; + +#if MALI_UNIT_TEST +/** + * kbase_simple_power_model_set_dummy_temp() - set a dummy temperature value + * @temp: Temperature of the thermal zone, in millidegrees celsius. + * + * This is only intended for use in unit tests, to ensure that the temperature + * values used by the simple power model are predictable. Deterministic + * behavior is necessary to allow validation of the static power values + * computed by this model. + */ +void kbase_simple_power_model_set_dummy_temp(int temp); +#endif /* MALI_UNIT_TEST */ + +#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ + +#endif /* _KBASE_IPA_SIMPLE_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_vinstr_common.c b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_vinstr_common.c new file mode 100644 index 000000000000..9fae8f1d0522 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_vinstr_common.c @@ -0,0 +1,346 @@ +/* + * + * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_ipa_vinstr_common.h" +#include "mali_kbase_ipa_debugfs.h" + +#define DEFAULT_SCALING_FACTOR 5 + +/* If the value of GPU_ACTIVE is below this, use the simple model + * instead, to avoid extrapolating small amounts of counter data across + * large sample periods. + */ +#define DEFAULT_MIN_SAMPLE_CYCLES 10000 + +/** + * read_hwcnt() - read a counter value + * @model_data: pointer to model data + * @offset: offset, in bytes, into vinstr buffer + * + * Return: A 32-bit counter value. Range: 0 < value < 2^27 (worst case would be + * incrementing every cycle over a ~100ms sample period at a high frequency, + * e.g. 1 GHz: 2^30 * 0.1seconds ~= 2^27. + */ +static inline u32 kbase_ipa_read_hwcnt( + struct kbase_ipa_model_vinstr_data *model_data, + u32 offset) +{ + u8 *p = (u8 *)model_data->dump_buf.dump_buf; + + return *(u32 *)&p[offset]; +} + +static inline s64 kbase_ipa_add_saturate(s64 a, s64 b) +{ + s64 rtn; + + if (a > 0 && (S64_MAX - a) < b) + rtn = S64_MAX; + else if (a < 0 && (S64_MIN - a) > b) + rtn = S64_MIN; + else + rtn = a + b; + + return rtn; +} + +s64 kbase_ipa_sum_all_shader_cores( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, u32 counter) +{ + struct kbase_device *kbdev = model_data->kbdev; + u64 core_mask; + u32 base = 0; + s64 ret = 0; + + core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + while (core_mask != 0ull) { + if ((core_mask & 1ull) != 0ull) { + /* 0 < counter_value < 2^27 */ + u32 counter_value = kbase_ipa_read_hwcnt(model_data, + base + counter); + + /* 0 < ret < 2^27 * max_num_cores = 2^32 */ + ret = kbase_ipa_add_saturate(ret, counter_value); + } + base += KBASE_IPA_NR_BYTES_PER_BLOCK; + core_mask >>= 1; + } + + /* Range: -2^54 < ret * coeff < 2^54 */ + return ret * coeff; +} + +s64 kbase_ipa_sum_all_memsys_blocks( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, u32 counter) +{ + struct kbase_device *kbdev = model_data->kbdev; + const u32 num_blocks = kbdev->gpu_props.props.l2_props.num_l2_slices; + u32 base = 0; + s64 ret = 0; + u32 i; + + for (i = 0; i < num_blocks; i++) { + /* 0 < counter_value < 2^27 */ + u32 counter_value = kbase_ipa_read_hwcnt(model_data, + base + counter); + + /* 0 < ret < 2^27 * max_num_memsys_blocks = 2^29 */ + ret = kbase_ipa_add_saturate(ret, counter_value); + base += KBASE_IPA_NR_BYTES_PER_BLOCK; + } + + /* Range: -2^51 < ret * coeff < 2^51 */ + return ret * coeff; +} + +s64 kbase_ipa_single_counter( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, u32 counter) +{ + /* Range: 0 < counter_value < 2^27 */ + const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter); + + /* Range: -2^49 < ret < 2^49 */ + return counter_value * (s64) coeff; +} + +int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) +{ + int errcode; + struct kbase_device *kbdev = model_data->kbdev; + struct kbase_hwcnt_virtualizer *hvirt = kbdev->hwcnt_gpu_virt; + struct kbase_hwcnt_enable_map enable_map; + const struct kbase_hwcnt_metadata *metadata = + kbase_hwcnt_virtualizer_metadata(hvirt); + + if (!metadata) + return -1; + + errcode = kbase_hwcnt_enable_map_alloc(metadata, &enable_map); + if (errcode) { + dev_err(kbdev->dev, "Failed to allocate IPA enable map"); + return errcode; + } + + kbase_hwcnt_enable_map_enable_all(&enable_map); + + errcode = kbase_hwcnt_virtualizer_client_create( + hvirt, &enable_map, &model_data->hvirt_cli); + kbase_hwcnt_enable_map_free(&enable_map); + if (errcode) { + dev_err(kbdev->dev, "Failed to register IPA with virtualizer"); + model_data->hvirt_cli = NULL; + return errcode; + } + + errcode = kbase_hwcnt_dump_buffer_alloc( + metadata, &model_data->dump_buf); + if (errcode) { + dev_err(kbdev->dev, "Failed to allocate IPA dump buffer"); + kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli); + model_data->hvirt_cli = NULL; + return errcode; + } + + return 0; +} + +void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) +{ + if (model_data->hvirt_cli) { + kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli); + kbase_hwcnt_dump_buffer_free(&model_data->dump_buf); + model_data->hvirt_cli = NULL; + } +} + +int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) +{ + struct kbase_ipa_model_vinstr_data *model_data = + (struct kbase_ipa_model_vinstr_data *)model->model_data; + s64 energy = 0; + size_t i; + u64 coeff = 0, coeff_mul = 0; + u64 start_ts_ns, end_ts_ns; + u32 active_cycles; + int err = 0; + + err = kbase_hwcnt_virtualizer_client_dump(model_data->hvirt_cli, + &start_ts_ns, &end_ts_ns, &model_data->dump_buf); + if (err) + goto err0; + + /* Range: 0 (GPU not used at all), to the max sampling interval, say + * 1s, * max GPU frequency (GPU 100% utilized). + * 0 <= active_cycles <= 1 * ~2GHz + * 0 <= active_cycles < 2^31 + */ + active_cycles = model_data->get_active_cycles(model_data); + + if (active_cycles < (u32) max(model_data->min_sample_cycles, 0)) { + err = -ENODATA; + goto err0; + } + + /* Range: 1 <= active_cycles < 2^31 */ + active_cycles = max(1u, active_cycles); + + /* Range of 'energy' is +/- 2^54 * number of IPA groups (~8), so around + * -2^57 < energy < 2^57 + */ + for (i = 0; i < model_data->groups_def_num; i++) { + const struct kbase_ipa_group *group = &model_data->groups_def[i]; + s32 coeff = model_data->group_values[i]; + s64 group_energy = group->op(model_data, coeff, + group->counter_block_offset); + + energy = kbase_ipa_add_saturate(energy, group_energy); + } + + /* Range: 0 <= coeff < 2^57 */ + if (energy > 0) + coeff = energy; + + /* Range: 0 <= coeff < 2^57 (because active_cycles >= 1). However, this + * can be constrained further: Counter values can only be increased by + * a theoretical maximum of about 64k per clock cycle. Beyond this, + * we'd have to sample every 1ms to avoid them overflowing at the + * lowest clock frequency (say 100MHz). Therefore, we can write the + * range of 'coeff' in terms of active_cycles: + * + * coeff = SUM(coeffN * counterN * num_cores_for_counterN) + * coeff <= SUM(coeffN * counterN) * max_num_cores + * coeff <= num_IPA_groups * max_coeff * max_counter * max_num_cores + * (substitute max_counter = 2^16 * active_cycles) + * coeff <= num_IPA_groups * max_coeff * 2^16 * active_cycles * max_num_cores + * coeff <= 2^3 * 2^22 * 2^16 * active_cycles * 2^5 + * coeff <= 2^46 * active_cycles + * + * So after the division: 0 <= coeff <= 2^46 + */ + coeff = div_u64(coeff, active_cycles); + + /* Not all models were derived at the same reference voltage. Voltage + * scaling is done by multiplying by V^2, so we need to *divide* by + * Vref^2 here. + * Range: 0 <= coeff <= 2^49 + */ + coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); + /* Range: 0 <= coeff <= 2^52 */ + coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); + + /* Scale by user-specified integer factor. + * Range: 0 <= coeff_mul < 2^57 + */ + coeff_mul = coeff * model_data->scaling_factor; + + /* The power models have results with units + * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this + * becomes fW/(Hz V^2), which are the units of coeff_mul. However, + * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide + * by 1000. + * Range: 0 <= coeff_mul < 2^47 + */ + coeff_mul = div_u64(coeff_mul, 1000u); + +err0: + /* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */ + *coeffp = clamp(coeff_mul, (u64) 0, (u64) 1 << 16); + return err; +} + +int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, + const struct kbase_ipa_group *ipa_groups_def, + size_t ipa_group_size, + kbase_ipa_get_active_cycles_callback get_active_cycles, + s32 reference_voltage) +{ + int err = 0; + size_t i; + struct kbase_ipa_model_vinstr_data *model_data; + + if (!model || !ipa_groups_def || !ipa_group_size || !get_active_cycles) + return -EINVAL; + + model_data = kzalloc(sizeof(*model_data), GFP_KERNEL); + if (!model_data) + return -ENOMEM; + + model_data->kbdev = model->kbdev; + model_data->groups_def = ipa_groups_def; + model_data->groups_def_num = ipa_group_size; + model_data->get_active_cycles = get_active_cycles; + + model->model_data = (void *) model_data; + + for (i = 0; i < model_data->groups_def_num; ++i) { + const struct kbase_ipa_group *group = &model_data->groups_def[i]; + + model_data->group_values[i] = group->default_value; + err = kbase_ipa_model_add_param_s32(model, group->name, + &model_data->group_values[i], + 1, false); + if (err) + goto exit; + } + + model_data->scaling_factor = DEFAULT_SCALING_FACTOR; + err = kbase_ipa_model_add_param_s32(model, "scale", + &model_data->scaling_factor, + 1, false); + if (err) + goto exit; + + model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES; + err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles", + &model_data->min_sample_cycles, + 1, false); + if (err) + goto exit; + + model_data->reference_voltage = reference_voltage; + err = kbase_ipa_model_add_param_s32(model, "reference_voltage", + &model_data->reference_voltage, + 1, false); + if (err) + goto exit; + + err = kbase_ipa_attach_vinstr(model_data); + +exit: + if (err) { + kbase_ipa_model_param_free_all(model); + kfree(model_data); + } + return err; +} + +void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model) +{ + struct kbase_ipa_model_vinstr_data *model_data = + (struct kbase_ipa_model_vinstr_data *)model->model_data; + + kbase_ipa_detach_vinstr(model_data); + kfree(model_data); +} diff --git a/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_vinstr_common.h b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_vinstr_common.h new file mode 100644 index 000000000000..46e3cd4bc6e1 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_vinstr_common.h @@ -0,0 +1,217 @@ +/* + * + * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_IPA_VINSTR_COMMON_H_ +#define _KBASE_IPA_VINSTR_COMMON_H_ + +#include "mali_kbase.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_types.h" + +/* Maximum number of IPA groups for an IPA model. */ +#define KBASE_IPA_MAX_GROUP_DEF_NUM 16 + +/* Number of bytes per hardware counter in a vinstr_buffer. */ +#define KBASE_IPA_NR_BYTES_PER_CNT 4 + +/* Number of hardware counters per block in a vinstr_buffer. */ +#define KBASE_IPA_NR_CNT_PER_BLOCK 64 + +/* Number of bytes per block in a vinstr_buffer. */ +#define KBASE_IPA_NR_BYTES_PER_BLOCK \ + (KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT) + +struct kbase_ipa_model_vinstr_data; + +typedef u32 (*kbase_ipa_get_active_cycles_callback)(struct kbase_ipa_model_vinstr_data *); + +/** + * struct kbase_ipa_model_vinstr_data - IPA context per device + * @kbdev: pointer to kbase device + * @groups_def: Array of IPA groups. + * @groups_def_num: Number of elements in the array of IPA groups. + * @get_active_cycles: Callback to return number of active cycles during + * counter sample period + * @hvirt_cli: hardware counter virtualizer client handle + * @dump_buf: buffer to dump hardware counters onto + * @reference_voltage: voltage, in mV, of the operating point used when + * deriving the power model coefficients. Range approx + * 0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13 + * @scaling_factor: User-specified power scaling factor. This is an + * integer, which is multiplied by the power coefficient + * just before OPP scaling. + * Range approx 0-32: 0 < scaling_factor < 2^5 + * @min_sample_cycles: If the value of the GPU_ACTIVE counter (the number of + * cycles the GPU was working) is less than + * min_sample_cycles, the counter model will return an + * error, causing the IPA framework to approximate using + * the cached simple model results instead. This may be + * more accurate than extrapolating using a very small + * counter dump. + */ +struct kbase_ipa_model_vinstr_data { + struct kbase_device *kbdev; + s32 group_values[KBASE_IPA_MAX_GROUP_DEF_NUM]; + const struct kbase_ipa_group *groups_def; + size_t groups_def_num; + kbase_ipa_get_active_cycles_callback get_active_cycles; + struct kbase_hwcnt_virtualizer_client *hvirt_cli; + struct kbase_hwcnt_dump_buffer dump_buf; + s32 reference_voltage; + s32 scaling_factor; + s32 min_sample_cycles; +}; + +/** + * struct ipa_group - represents a single IPA group + * @name: name of the IPA group + * @default_value: default value of coefficient for IPA group. + * Coefficients are interpreted as fractions where the + * denominator is 1000000. + * @op: which operation to be performed on the counter values + * @counter_block_offset: block offset in bytes of the counter used to calculate energy for IPA group + */ +struct kbase_ipa_group { + const char *name; + s32 default_value; + s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32); + u32 counter_block_offset; +}; + +/** + * kbase_ipa_sum_all_shader_cores() - sum a counter over all cores + * @model_data: pointer to model data + * @coeff: model coefficient. Unity is ~2^20, so range approx + * +/- 4.0: -2^22 < coeff < 2^22 + * @counter offset in bytes of the counter used to calculate energy + * for IPA group + * + * Calculate energy estimation based on hardware counter `counter' + * across all shader cores. + * + * Return: Sum of counter values. Range: -2^54 < ret < 2^54 + */ +s64 kbase_ipa_sum_all_shader_cores( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, u32 counter); + +/** + * kbase_ipa_sum_all_memsys_blocks() - sum a counter over all mem system blocks + * @model_data: pointer to model data + * @coeff: model coefficient. Unity is ~2^20, so range approx + * +/- 4.0: -2^22 < coeff < 2^22 + * @counter: offset in bytes of the counter used to calculate energy + * for IPA group + * + * Calculate energy estimation based on hardware counter `counter' across all + * memory system blocks. + * + * Return: Sum of counter values. Range: -2^51 < ret < 2^51 + */ +s64 kbase_ipa_sum_all_memsys_blocks( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, u32 counter); + +/** + * kbase_ipa_single_counter() - sum a single counter + * @model_data: pointer to model data + * @coeff: model coefficient. Unity is ~2^20, so range approx + * +/- 4.0: -2^22 < coeff < 2^22 + * @counter: offset in bytes of the counter used to calculate energy + * for IPA group + * + * Calculate energy estimation based on hardware counter `counter'. + * + * Return: Counter value. Range: -2^49 < ret < 2^49 + */ +s64 kbase_ipa_single_counter( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, u32 counter); + +/** + * attach_vinstr() - attach a vinstr_buffer to an IPA model. + * @model_data pointer to model data + * + * Attach a vinstr_buffer to an IPA model. The vinstr_buffer + * allows access to the hardware counters used to calculate + * energy consumption. + * + * Return: 0 on success, or an error code. + */ +int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data); + +/** + * detach_vinstr() - detach a vinstr_buffer from an IPA model. + * @model_data pointer to model data + * + * Detach a vinstr_buffer from an IPA model. + */ +void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data); + +/** + * kbase_ipa_vinstr_dynamic_coeff() - calculate dynamic power based on HW counters + * @model: pointer to instantiated model + * @coeffp: pointer to location where calculated power, in + * pW/(Hz V^2), is stored. + * + * This is a GPU-agnostic implementation of the get_dynamic_coeff() + * function of an IPA model. It relies on the model being populated + * with GPU-specific attributes at initialization time. + * + * Return: 0 on success, or an error code. + */ +int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp); + +/** + * kbase_ipa_vinstr_common_model_init() - initialize ipa power model + * @model: ipa power model to initialize + * @ipa_groups_def: array of ipa groups which sets coefficients for + * the corresponding counters used in the ipa model + * @ipa_group_size: number of elements in the array @ipa_groups_def + * @get_active_cycles: callback to return the number of cycles the GPU was + * active during the counter sample period. + * @reference_voltage: voltage, in mV, of the operating point used when + * deriving the power model coefficients. + * + * This initialization function performs initialization steps common + * for ipa models based on counter values. In each call, the model + * passes its specific coefficient values per ipa counter group via + * @ipa_groups_def array. + * + * Return: 0 on success, error code otherwise + */ +int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, + const struct kbase_ipa_group *ipa_groups_def, + size_t ipa_group_size, + kbase_ipa_get_active_cycles_callback get_active_cycles, + s32 reference_voltage); + +/** + * kbase_ipa_vinstr_common_model_term() - terminate ipa power model + * @model: ipa power model to terminate + * + * This function performs all necessary steps to terminate ipa power model + * including clean up of resources allocated to hold model data. + */ +void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model); + +#endif /* _KBASE_IPA_VINSTR_COMMON_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_vinstr_g7x.c b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_vinstr_g7x.c new file mode 100644 index 000000000000..270b75e07b0d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/ipa/mali_kbase_ipa_vinstr_g7x.c @@ -0,0 +1,456 @@ +/* + * + * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ +#include + +#include "mali_kbase_ipa_vinstr_common.h" +#include "mali_kbase.h" + + +/* Performance counter blocks base offsets */ +#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) +#define TILER_BASE (1 * KBASE_IPA_NR_BYTES_PER_BLOCK) +#define MEMSYS_BASE (2 * KBASE_IPA_NR_BYTES_PER_BLOCK) + +/* JM counter block offsets */ +#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 6) + +/* Tiler counter block offsets */ +#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45) + +/* MEMSYS counter block offsets */ +#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25) + +/* SC counter block offsets */ +#define SC_FRAG_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 4) +#define SC_EXEC_CORE_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 26) +#define SC_EXEC_INSTR_FMA (KBASE_IPA_NR_BYTES_PER_CNT * 27) +#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28) +#define SC_EXEC_INSTR_MSG (KBASE_IPA_NR_BYTES_PER_CNT * 30) +#define SC_TEX_FILT_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 39) +#define SC_TEX_COORD_ISSUE (KBASE_IPA_NR_BYTES_PER_CNT * 40) +#define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42) +#define SC_VARY_INSTR (KBASE_IPA_NR_BYTES_PER_CNT * 49) +#define SC_VARY_SLOT_32 (KBASE_IPA_NR_BYTES_PER_CNT * 50) +#define SC_VARY_SLOT_16 (KBASE_IPA_NR_BYTES_PER_CNT * 51) +#define SC_BEATS_RD_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 56) +#define SC_BEATS_WR_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 61) +#define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62) + +/** + * get_jm_counter() - get performance counter offset inside the Job Manager block + * @model_data: pointer to GPU model data. + * @counter_block_offset: offset in bytes of the performance counter inside the Job Manager block. + * + * Return: Block offset in bytes of the required performance counter. + */ +static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data, + u32 counter_block_offset) +{ + return JM_BASE + counter_block_offset; +} + +/** + * get_memsys_counter() - get performance counter offset inside the Memory System block + * @model_data: pointer to GPU model data. + * @counter_block_offset: offset in bytes of the performance counter inside the (first) Memory System block. + * + * Return: Block offset in bytes of the required performance counter. + */ +static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data, + u32 counter_block_offset) +{ + /* The base address of Memory System performance counters is always the same, although their number + * may vary based on the number of cores. For the moment it's ok to return a constant. + */ + return MEMSYS_BASE + counter_block_offset; +} + +/** + * get_sc_counter() - get performance counter offset inside the Shader Cores block + * @model_data: pointer to GPU model data. + * @counter_block_offset: offset in bytes of the performance counter inside the (first) Shader Cores block. + * + * Return: Block offset in bytes of the required performance counter. + */ +static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data, + u32 counter_block_offset) +{ + const u32 sc_base = MEMSYS_BASE + + (model_data->kbdev->gpu_props.props.l2_props.num_l2_slices * + KBASE_IPA_NR_BYTES_PER_BLOCK); + + return sc_base + counter_block_offset; +} + +/** + * memsys_single_counter() - calculate energy for a single Memory System performance counter. + * @model_data: pointer to GPU model data. + * @coeff: default value of coefficient for IPA group. + * @offset: offset in bytes of the counter inside the block it belongs to. + * + * Return: Energy estimation for a single Memory System performance counter. + */ +static s64 kbase_g7x_sum_all_memsys_blocks( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, + u32 offset) +{ + u32 counter; + + counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset); + return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter); +} + +/** + * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores. + * @model_data: pointer to GPU model data. + * @coeff: default value of coefficient for IPA group. + * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. + * + * Return: Energy estimation for a Shader Cores performance counter for all cores. + */ +static s64 kbase_g7x_sum_all_shader_cores( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, + u32 counter_block_offset) +{ + u32 counter; + + counter = kbase_g7x_power_model_get_sc_counter(model_data, + counter_block_offset); + return kbase_ipa_sum_all_shader_cores(model_data, coeff, counter); +} + +/** + * jm_single_counter() - calculate energy for a single Job Manager performance counter. + * @model_data: pointer to GPU model data. + * @coeff: default value of coefficient for IPA group. + * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. + * + * Return: Energy estimation for a single Job Manager performance counter. + */ +static s64 kbase_g7x_jm_single_counter( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, + u32 counter_block_offset) +{ + u32 counter; + + counter = kbase_g7x_power_model_get_jm_counter(model_data, + counter_block_offset); + return kbase_ipa_single_counter(model_data, coeff, counter); +} + +/** + * get_active_cycles() - return the GPU_ACTIVE counter + * @model_data: pointer to GPU model data. + * + * Return: the number of cycles the GPU was active during the counter sampling + * period. + */ +static u32 kbase_g7x_get_active_cycles( + struct kbase_ipa_model_vinstr_data *model_data) +{ + u32 counter = kbase_g7x_power_model_get_jm_counter(model_data, JM_GPU_ACTIVE); + + /* Counters are only 32-bit, so we can safely multiply by 1 then cast + * the 64-bit result back to a u32. + */ + return kbase_ipa_single_counter(model_data, 1, counter); +} + +/** Table of IPA group definitions. + * + * For each IPA group, this table defines a function to access the given performance block counter (or counters, + * if the operation needs to be iterated on multiple blocks) and calculate energy estimation. + */ + +static const struct kbase_ipa_group ipa_groups_def_g71[] = { + { + .name = "l2_access", + .default_value = 526300, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, + { + .name = "exec_instr_count", + .default_value = 301100, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "tex_issue", + .default_value = 197400, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_COORD_ISSUE, + }, + { + .name = "tile_wb", + .default_value = -156400, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_BEATS_WR_TIB, + }, + { + .name = "gpu_active", + .default_value = 115800, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_g72[] = { + { + .name = "l2_access", + .default_value = 393000, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, + { + .name = "exec_instr_count", + .default_value = 227000, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "tex_issue", + .default_value = 181900, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_COORD_ISSUE, + }, + { + .name = "tile_wb", + .default_value = -120200, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_BEATS_WR_TIB, + }, + { + .name = "gpu_active", + .default_value = 133100, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_g76[] = { + { + .name = "gpu_active", + .default_value = 122000, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, + { + .name = "exec_instr_count", + .default_value = 488900, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "vary_instr", + .default_value = 212100, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_VARY_INSTR, + }, + { + .name = "tex_tfch_num_operations", + .default_value = 288000, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, + }, + { + .name = "l2_access", + .default_value = 378100, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_g52_r1[] = { + { + .name = "gpu_active", + .default_value = 224200, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, + { + .name = "exec_instr_count", + .default_value = 384700, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "vary_instr", + .default_value = 271900, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_VARY_INSTR, + }, + { + .name = "tex_tfch_num_operations", + .default_value = 477700, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, + }, + { + .name = "l2_access", + .default_value = 551400, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_g51[] = { + { + .name = "gpu_active", + .default_value = 201400, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, + { + .name = "exec_instr_count", + .default_value = 392700, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "vary_instr", + .default_value = 274000, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_VARY_INSTR, + }, + { + .name = "tex_tfch_num_operations", + .default_value = 528000, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, + }, + { + .name = "l2_access", + .default_value = 506400, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_g77[] = { + { + .name = "l2_access", + .default_value = 710800, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, + { + .name = "exec_instr_msg", + .default_value = 2375300, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_MSG, + }, + { + .name = "exec_instr_fma", + .default_value = 656100, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_FMA, + }, + { + .name = "tex_filt_num_operations", + .default_value = 318800, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS, + }, + { + .name = "gpu_active", + .default_value = 172800, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_tbex[] = { + { + .name = "l2_access", + .default_value = 599800, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, + { + .name = "exec_instr_msg", + .default_value = 1830200, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_MSG, + }, + { + .name = "exec_instr_fma", + .default_value = 407300, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_FMA, + }, + { + .name = "tex_filt_num_operations", + .default_value = 224500, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS, + }, + { + .name = "gpu_active", + .default_value = 153800, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, +}; + + +#define IPA_POWER_MODEL_OPS(gpu, init_token) \ + const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ + .name = "mali-" #gpu "-power-model", \ + .init = kbase_ ## init_token ## _power_model_init, \ + .term = kbase_ipa_vinstr_common_model_term, \ + .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \ + }; \ + KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops) + +#define STANDARD_POWER_MODEL(gpu, reference_voltage) \ + static int kbase_ ## gpu ## _power_model_init(\ + struct kbase_ipa_model *model) \ + { \ + BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def_ ## gpu) > \ + KBASE_IPA_MAX_GROUP_DEF_NUM); \ + return kbase_ipa_vinstr_common_model_init(model, \ + ipa_groups_def_ ## gpu, \ + ARRAY_SIZE(ipa_groups_def_ ## gpu), \ + kbase_g7x_get_active_cycles, \ + (reference_voltage)); \ + } \ + IPA_POWER_MODEL_OPS(gpu, gpu) + +#define ALIAS_POWER_MODEL(gpu, as_gpu) \ + IPA_POWER_MODEL_OPS(gpu, as_gpu) + +STANDARD_POWER_MODEL(g71, 800); +STANDARD_POWER_MODEL(g72, 800); +STANDARD_POWER_MODEL(g76, 800); +STANDARD_POWER_MODEL(g52_r1, 1000); +STANDARD_POWER_MODEL(g51, 1000); +STANDARD_POWER_MODEL(g77, 1000); +STANDARD_POWER_MODEL(tbex, 1000); + +/* g52 is an alias of g76 (TNOX) for IPA */ +ALIAS_POWER_MODEL(g52, g76); +/* tnax is an alias of g77 (TTRX) for IPA */ +ALIAS_POWER_MODEL(tnax, g77); diff --git a/drivers/gpu/arm/b_r26p0/jm/mali_base_jm_kernel.h b/drivers/gpu/arm/b_r26p0/jm/mali_base_jm_kernel.h new file mode 100644 index 000000000000..ce36020fcfa5 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/jm/mali_base_jm_kernel.h @@ -0,0 +1,1076 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ +#ifndef _BASE_JM_KERNEL_H_ +#define _BASE_JM_KERNEL_H_ + +/* Memory allocation, access/hint flags. + * + * See base_mem_alloc_flags. + */ + +/* IN */ +/* Read access CPU side + */ +#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) + +/* Write access CPU side + */ +#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) + +/* Read access GPU side + */ +#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) + +/* Write access GPU side + */ +#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) + +/* Execute allowed on the GPU side + */ +#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) + +/* Will be permanently mapped in kernel space. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) + +/* The allocation will completely reside within the same 4GB chunk in the GPU + * virtual space. + * Since this flag is primarily required only for the TLS memory which will + * not be used to contain executable code and also not used for Tiler heap, + * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. + */ +#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) + +/* Userspace is not allowed to free this memory. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) + +#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) + +/* Grow backing store on GPU Page Fault + */ +#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) + +/* Page coherence Outer shareable, if available + */ +#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) + +/* Page coherence Inner shareable + */ +#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) + +/* Should be cached on the CPU + */ +#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) + +/* IN/OUT */ +/* Must have same VA on both the GPU and the CPU + */ +#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) + +/* OUT */ +/* Must call mmap to acquire a GPU address for the allocation + */ +#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) + +/* IN */ +/* Page coherence Outer shareable, required. + */ +#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) + +/* Protected memory + */ +#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) + +/* Not needed physical memory + */ +#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) + +/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the + * addresses to be the same + */ +#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) + +/** + * Bit 19 is reserved. + * + * Do not remove, use the next unreserved bit for new flags + */ +#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19) + +/** + * Memory starting from the end of the initial commit is aligned to 'extent' + * pages, where 'extent' must be a power of 2 and no more than + * BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES + */ +#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20) + +/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu + * mode. Some components within the GPU might only be able to access memory + * that is GPU cacheable. Refer to the specific GPU implementation for more + * details. The 3 shareability flags will be ignored for GPU uncached memory. + * If used while importing USER_BUFFER type memory, then the import will fail + * if the memory is not aligned to GPU and CPU cache line width. + */ +#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) + +/* + * Bits [22:25] for group_id (0~15). + * + * base_mem_group_id_set() should be used to pack a memory group ID into a + * base_mem_alloc_flags value instead of accessing the bits directly. + * base_mem_group_id_get() should be used to extract the memory group ID from + * a base_mem_alloc_flags value. + */ +#define BASEP_MEM_GROUP_ID_SHIFT 22 +#define BASE_MEM_GROUP_ID_MASK \ + ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) + +/* Must do CPU cache maintenance when imported memory is mapped/unmapped + * on GPU. Currently applicable to dma-buf type only. + */ +#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) + +/* Use the GPU VA chosen by the kernel client */ +#define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27) + +/* Bit 28 reserved for Kernel side cache sync ops flag */ + +/* Force trimming of JIT allocations when creating a new allocation */ +#define BASEP_MEM_PERFORM_JIT_TRIM ((base_mem_alloc_flags)1 << 29) + +/* Number of bits used as flags for base memory management + * + * Must be kept in sync with the base_mem_alloc_flags flags + */ +#define BASE_MEM_FLAGS_NR_BITS 30 + +/* A mask of all the flags which are only valid for allocations within kbase, + * and may not be passed from user space. + */ +#define BASEP_MEM_FLAGS_KERNEL_ONLY \ + (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \ + BASE_MEM_FLAG_MAP_FIXED | BASEP_MEM_PERFORM_JIT_TRIM) + +/* A mask for all output bits, excluding IN/OUT bits. + */ +#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP + +/* A mask for all input bits, including IN/OUT bits. + */ +#define BASE_MEM_FLAGS_INPUT_MASK \ + (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) + +/* A mask of all currently reserved flags + */ +#define BASE_MEM_FLAGS_RESERVED \ + (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19) + +#define BASEP_MEM_INVALID_HANDLE (0ull << 12) +#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) +#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) +#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) +#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) +/* reserved handles ..-47< for future special handles */ +#define BASE_MEM_COOKIE_BASE (64ul << 12) +#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ + BASE_MEM_COOKIE_BASE) + +/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the + * initial commit is aligned to 'extent' pages, where 'extent' must be a power + * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES + */ +#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0) + +/** + * If set, the heap info address points to a u32 holding the used size in bytes; + * otherwise it points to a u64 holding the lowest address of unused memory. + */ +#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1) + +/** + * Valid set of just-in-time memory allocation flags + * + * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr + * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set + * and heap_info_gpu_addr being 0 will be rejected). + */ +#define BASE_JIT_ALLOC_VALID_FLAGS \ + (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) + +/** + * typedef base_context_create_flags - Flags to pass to ::base_context_init. + * + * Flags can be ORed together to enable multiple things. + * + * These share the same space as BASEP_CONTEXT_FLAG_*, and so must + * not collide with them. + */ +typedef u32 base_context_create_flags; + +/* No flags set */ +#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) + +/* Base context is embedded in a cctx object (flag used for CINSTR + * software counter macros) + */ +#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) + +/* Base context is a 'System Monitor' context for Hardware counters. + * + * One important side effect of this is that job submission is disabled. + */ +#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ + ((base_context_create_flags)1 << 1) + +/* Bit-shift used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) + +/* Bitmask used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ + ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) + +/* Bitpattern describing the base_context_create_flags that can be + * passed to the kernel + */ +#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ + (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ + BASEP_CONTEXT_MMU_GROUP_ID_MASK) + +/* Bitpattern describing the ::base_context_create_flags that can be + * passed to base_context_init() + */ +#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \ + (BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS) + +/* + * Private flags used on the base context + * + * These start at bit 31, and run down to zero. + * + * They share the same space as base_context_create_flags, and so must + * not collide with them. + */ + +/* Private flag tracking whether job descriptor dumping is disabled */ +#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \ + ((base_context_create_flags)(1 << 31)) + +/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, + * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) + */ +#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) + +/* Indicate that job dumping is enabled. This could affect certain timers + * to account for the performance impact. + */ +#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) + +#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ + BASE_TLSTREAM_JOB_DUMPING_ENABLED) +/* + * Dependency stuff, keep it private for now. May want to expose it if + * we decide to make the number of semaphores a configurable + * option. + */ +#define BASE_JD_ATOM_COUNT 256 + +/* Maximum number of concurrent render passes. + */ +#define BASE_JD_RP_COUNT (256) + +/* Set/reset values for a software event */ +#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) +#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) + +/** + * struct base_jd_udata - Per-job data + * + * This structure is used to store per-job data, and is completely unused + * by the Base driver. It can be used to store things such as callback + * function pointer, data to handle job completion. It is guaranteed to be + * untouched by the Base driver. + * + * @blob: per-job data array + */ +struct base_jd_udata { + u64 blob[2]; +}; + +/** + * typedef base_jd_dep_type - Job dependency type. + * + * A flags field will be inserted into the atom structure to specify whether a + * dependency is a data or ordering dependency (by putting it before/after + * 'core_req' in the structure it should be possible to add without changing + * the structure size). + * When the flag is set for a particular dependency to signal that it is an + * ordering only dependency then errors will not be propagated. + */ +typedef u8 base_jd_dep_type; + +#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ +#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ +#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */ + +/** + * typedef base_jd_core_req - Job chain hardware requirements. + * + * A job chain must specify what GPU features it needs to allow the + * driver to schedule the job correctly. By not specifying the + * correct settings can/will cause an early job termination. Multiple + * values can be ORed together to specify multiple requirements. + * Special case is ::BASE_JD_REQ_DEP, which is used to express complex + * dependencies, and that doesn't execute anything on the hardware. + */ +typedef u32 base_jd_core_req; + +/* Requirements that come from the HW */ + +/* No requirement, dependency only + */ +#define BASE_JD_REQ_DEP ((base_jd_core_req)0) + +/* Requires fragment shaders + */ +#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) + +/* Requires compute shaders + * + * This covers any of the following GPU job types: + * - Vertex Shader Job + * - Geometry Shader Job + * - An actual Compute Shader Job + * + * Compare this with BASE_JD_REQ_ONLY_COMPUTE, which specifies that the + * job is specifically just the "Compute Shader" job type, and not the "Vertex + * Shader" nor the "Geometry Shader" job type. + */ +#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1) + +/* Requires tiling */ +#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) + +/* Requires cache flushes */ +#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) + +/* Requires value writeback */ +#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) + +/* SW-only requirements - the HW does not expose these as part of the job slot + * capabilities + */ + +/* Requires fragment job with AFBC encoding */ +#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) + +/* SW-only requirement: coalesce completion events. + * If this bit is set then completion of this atom will not cause an event to + * be sent to userspace, whether successful or not; completion events will be + * deferred until an atom completes which does not have this bit set. + * + * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES. + */ +#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5) + +/* SW Only requirement: the job chain requires a coherent core group. We don't + * mind which coherent core group is used. + */ +#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) + +/* SW Only requirement: The performance counters should be enabled only when + * they are needed, to reduce power consumption. + */ +#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) + +/* SW Only requirement: External resources are referenced by this atom. + * + * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and + * BASE_JD_REQ_SOFT_EVENT_WAIT. + */ +#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) + +/* SW Only requirement: Software defined job. Jobs with this bit set will not be + * submitted to the hardware but will cause some action to happen within the + * driver + */ +#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) + +#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) +#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) +#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3) + +/* 0x4 RESERVED for now */ + +/* SW only requirement: event wait/trigger job. + * + * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set. + * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the + * other waiting jobs. It completes immediately. + * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it + * possible for other jobs to wait upon. It completes immediately. + */ +#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) +#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) +#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) + +#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) + +/* SW only requirement: Just In Time allocation + * + * This job requests a single or multiple just-in-time allocations through a + * list of base_jit_alloc_info structure which is passed via the jc element of + * the atom. The number of base_jit_alloc_info structures present in the + * list is passed via the nr_extres element of the atom + * + * It should be noted that the id entry in base_jit_alloc_info must not + * be reused until it has been released via BASE_JD_REQ_SOFT_JIT_FREE. + * + * Should this soft job fail it is expected that a BASE_JD_REQ_SOFT_JIT_FREE + * soft job to free the JIT allocation is still made. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) + +/* SW only requirement: Just In Time free + * + * This job requests a single or multiple just-in-time allocations created by + * BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the just-in-time + * allocations is passed via the jc element of the atom. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) + +/* SW only requirement: Map external resource + * + * This job requests external resource(s) are mapped once the dependencies + * of the job have been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) + +/* SW only requirement: Unmap external resource + * + * This job requests external resource(s) are unmapped once the dependencies + * of the job has been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) + +/* HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) + * + * This indicates that the Job Chain contains GPU jobs of the 'Compute + * Shaders' type. + * + * In contrast to BASE_JD_REQ_CS, this does not indicate that the Job + * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. + */ +#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) + +/* HW Requirement: Use the base_jd_atom::device_nr field to specify a + * particular core group + * + * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag + * takes priority + * + * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms. + * + * If the core availability policy is keeping the required core group turned + * off, then the job will fail with a BASE_JD_EVENT_PM_EVENT error code. + */ +#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) + +/* SW Flag: If this bit is set then the successful completion of this atom + * will not cause an event to be sent to userspace + */ +#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) + +/* SW Flag: If this bit is set then completion of this atom will not cause an + * event to be sent to userspace, whether successful or not. + */ +#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14) + +/* SW Flag: Skip GPU cache clean and invalidation before starting a GPU job. + * + * If this bit is set then the GPU's cache will not be cleaned and invalidated + * until a GPU job starts which does not have this bit set or a job completes + * which does not have the BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use + * if the CPU may have written to memory addressed by the job since the last job + * without this bit set was submitted. + */ +#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15) + +/* SW Flag: Skip GPU cache clean and invalidation after a GPU job completes. + * + * If this bit is set then the GPU's cache will not be cleaned and invalidated + * until a GPU job completes which does not have this bit set or a job starts + * which does not have the BASE_JD_REQ_SKIP_CACHE_START bit set. Do not use + * if the CPU may read from or partially overwrite memory addressed by the job + * before the next job without this bit set completes. + */ +#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16) + +/* Request the atom be executed on a specific job slot. + * + * When this flag is specified, it takes precedence over any existing job slot + * selection logic. + */ +#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17) + +/* SW-only requirement: The atom is the start of a renderpass. + * + * If this bit is set then the job chain will be soft-stopped if it causes the + * GPU to write beyond the end of the physical pages backing the tiler heap, and + * committing more memory to the heap would exceed an internal threshold. It may + * be resumed after running one of the job chains attached to an atom with + * BASE_JD_REQ_END_RENDERPASS set and the same renderpass ID. It may be + * resumed multiple times until it completes without memory usage exceeding the + * threshold. + * + * Usually used with BASE_JD_REQ_T. + */ +#define BASE_JD_REQ_START_RENDERPASS ((base_jd_core_req)1 << 18) + +/* SW-only requirement: The atom is the end of a renderpass. + * + * If this bit is set then the atom incorporates the CPU address of a + * base_jd_fragment object instead of the GPU address of a job chain. + * + * Which job chain is run depends upon whether the atom with the same renderpass + * ID and the BASE_JD_REQ_START_RENDERPASS bit set completed normally or + * was soft-stopped when it exceeded an upper threshold for tiler heap memory + * usage. + * + * It also depends upon whether one of the job chains attached to the atom has + * already been run as part of the same renderpass (in which case it would have + * written unresolved multisampled and otherwise-discarded output to temporary + * buffers that need to be read back). The job chain for doing a forced read and + * forced write (from/to temporary buffers) is run as many times as necessary. + * + * Usually used with BASE_JD_REQ_FS. + */ +#define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19) + +/* These requirement bits are currently unused in base_jd_core_req + */ +#define BASEP_JD_REQ_RESERVED \ + (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ + BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ + BASE_JD_REQ_EVENT_COALESCE | \ + BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \ + BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ + BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \ + BASE_JD_REQ_JOB_SLOT | BASE_JD_REQ_START_RENDERPASS | \ + BASE_JD_REQ_END_RENDERPASS)) + +/* Mask of all bits in base_jd_core_req that control the type of the atom. + * + * This allows dependency only atoms to have flags set + */ +#define BASE_JD_REQ_ATOM_TYPE \ + (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \ + BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) + +/** + * Mask of all bits in base_jd_core_req that control the type of a soft job. + */ +#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f) + +/* Returns non-zero value if core requirements passed define a soft job or + * a dependency only job. + */ +#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \ + (((core_req) & BASE_JD_REQ_SOFT_JOB) || \ + ((core_req) & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) + +/** + * enum kbase_jd_atom_state + * + * @KBASE_JD_ATOM_STATE_UNUSED: Atom is not used. + * @KBASE_JD_ATOM_STATE_QUEUED: Atom is queued in JD. + * @KBASE_JD_ATOM_STATE_IN_JS: Atom has been given to JS (is runnable/running). + * @KBASE_JD_ATOM_STATE_HW_COMPLETED: Atom has been completed, but not yet + * handed back to job dispatcher for + * dependency resolution. + * @KBASE_JD_ATOM_STATE_COMPLETED: Atom has been completed, but not yet handed + * back to userspace. + */ +enum kbase_jd_atom_state { + KBASE_JD_ATOM_STATE_UNUSED, + KBASE_JD_ATOM_STATE_QUEUED, + KBASE_JD_ATOM_STATE_IN_JS, + KBASE_JD_ATOM_STATE_HW_COMPLETED, + KBASE_JD_ATOM_STATE_COMPLETED +}; + +/** + * typedef base_atom_id - Type big enough to store an atom number in. + */ +typedef u8 base_atom_id; + +/** + * struct base_dependency - + * + * @atom_id: An atom number + * @dependency_type: Dependency type + */ +struct base_dependency { + base_atom_id atom_id; + base_jd_dep_type dependency_type; +}; + +/** + * struct base_jd_fragment - Set of GPU fragment job chains used for rendering. + * + * @norm_read_norm_write: Job chain for full rendering. + * GPU address of a fragment job chain to render in the + * circumstance where the tiler job chain did not exceed + * its memory usage threshold and no fragment job chain + * was previously run for the same renderpass. + * It is used no more than once per renderpass. + * @norm_read_forced_write: Job chain for starting incremental + * rendering. + * GPU address of a fragment job chain to render in + * the circumstance where the tiler job chain exceeded + * its memory usage threshold for the first time and + * no fragment job chain was previously run for the + * same renderpass. + * Writes unresolved multisampled and normally- + * discarded output to temporary buffers that must be + * read back by a subsequent forced_read job chain + * before the renderpass is complete. + * It is used no more than once per renderpass. + * @forced_read_forced_write: Job chain for continuing incremental + * rendering. + * GPU address of a fragment job chain to render in + * the circumstance where the tiler job chain + * exceeded its memory usage threshold again + * and a fragment job chain was previously run for + * the same renderpass. + * Reads unresolved multisampled and + * normally-discarded output from temporary buffers + * written by a previous forced_write job chain and + * writes the same to temporary buffers again. + * It is used as many times as required until + * rendering completes. + * @forced_read_norm_write: Job chain for ending incremental rendering. + * GPU address of a fragment job chain to render in the + * circumstance where the tiler job chain did not + * exceed its memory usage threshold this time and a + * fragment job chain was previously run for the same + * renderpass. + * Reads unresolved multisampled and normally-discarded + * output from temporary buffers written by a previous + * forced_write job chain in order to complete a + * renderpass. + * It is used no more than once per renderpass. + * + * This structure is referenced by the main atom structure if + * BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req. + */ +struct base_jd_fragment { + u64 norm_read_norm_write; + u64 norm_read_forced_write; + u64 forced_read_forced_write; + u64 forced_read_norm_write; +}; + +/** + * typedef base_jd_prio - Base Atom priority. + * + * Only certain priority levels are actually implemented, as specified by the + * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority + * level that is not one of those defined below. + * + * Priority levels only affect scheduling after the atoms have had dependencies + * resolved. For example, a low priority atom that has had its dependencies + * resolved might run before a higher priority atom that has not had its + * dependencies resolved. + * + * In general, fragment atoms do not affect non-fragment atoms with + * lower priorities, and vice versa. One exception is that there is only one + * priority value for each context. So a high-priority (e.g.) fragment atom + * could increase its context priority, causing its non-fragment atoms to also + * be scheduled sooner. + * + * The atoms are scheduled as follows with respect to their priorities: + * * Let atoms 'X' and 'Y' be for the same job slot who have dependencies + * resolved, and atom 'X' has a higher priority than atom 'Y' + * * If atom 'Y' is currently running on the HW, then it is interrupted to + * allow atom 'X' to run soon after + * * If instead neither atom 'Y' nor atom 'X' are running, then when choosing + * the next atom to run, atom 'X' will always be chosen instead of atom 'Y' + * * Any two atoms that have the same priority could run in any order with + * respect to each other. That is, there is no ordering constraint between + * atoms of the same priority. + * + * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are + * scheduled between contexts. The default value, 0, will cause higher-priority + * atoms to be scheduled first, regardless of their context. The value 1 will + * use a round-robin algorithm when deciding which context's atoms to schedule + * next, so higher-priority atoms can only preempt lower priority atoms within + * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and + * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details. + */ +typedef u8 base_jd_prio; + +/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */ +#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) +/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and + * BASE_JD_PRIO_LOW + */ +#define BASE_JD_PRIO_HIGH ((base_jd_prio)1) +/* Low atom priority. */ +#define BASE_JD_PRIO_LOW ((base_jd_prio)2) + +/* Count of the number of priority levels. This itself is not a valid + * base_jd_prio setting + */ +#define BASE_JD_NR_PRIO_LEVELS 3 + +/** + * struct base_jd_atom_v2 - Node of a dependency graph used to submit a + * GPU job chain or soft-job to the kernel driver. + * + * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS + * is set in the base_jd_core_req) the CPU address of a + * base_jd_fragment object. + * @udata: User data. + * @extres_list: List of external resources. + * @nr_extres: Number of external resources or JIT allocations. + * @jit_id: Zero-terminated array of IDs of just-in-time memory + * allocations written to by the atom. When the atom + * completes, the value stored at the + * &struct_base_jit_alloc_info.heap_info_gpu_addr of + * each allocation is read in order to enforce an + * overall physical memory usage limit. + * @pre_dep: Pre-dependencies. One need to use SETTER function to assign + * this field; this is done in order to reduce possibility of + * improper assignment of a dependency field. + * @atom_number: Unique number to identify the atom. + * @prio: Atom priority. Refer to base_jd_prio for more details. + * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP + * specified. + * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. + * @core_req: Core requirements. + * @renderpass_id: Renderpass identifier used to associate an atom that has + * BASE_JD_REQ_START_RENDERPASS set in its core requirements + * with an atom that has BASE_JD_REQ_END_RENDERPASS set. + * @padding: Unused. Must be zero. + * + * This structure has changed since UK 10.2 for which base_jd_core_req was a + * u16 value. + * + * In UK 10.3 a core_req field of a u32 type was added to the end of the + * structure, and the place in the structure previously occupied by u16 + * core_req was kept but renamed to compat_core_req. + * + * From UK 11.20 - compat_core_req is now occupied by u8 jit_id[2]. + * Compatibility with UK 10.x from UK 11.y is not handled because + * the major version increase prevents this. + * + * For UK 11.20 jit_id[2] must be initialized to zero. + */ +struct base_jd_atom_v2 { + u64 jc; + struct base_jd_udata udata; + u64 extres_list; + u16 nr_extres; + u8 jit_id[2]; + struct base_dependency pre_dep[2]; + base_atom_id atom_number; + base_jd_prio prio; + u8 device_nr; + u8 jobslot; + base_jd_core_req core_req; + u8 renderpass_id; + u8 padding[7]; +}; + +/** + * struct base_jd_atom - Same as base_jd_atom_v2, but has an extra seq_nr + * at the beginning. + * + * @seq_nr: Sequence number of logical grouping of atoms. + * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS + * is set in the base_jd_core_req) the CPU address of a + * base_jd_fragment object. + * @udata: User data. + * @extres_list: List of external resources. + * @nr_extres: Number of external resources or JIT allocations. + * @jit_id: Zero-terminated array of IDs of just-in-time memory + * allocations written to by the atom. When the atom + * completes, the value stored at the + * &struct_base_jit_alloc_info.heap_info_gpu_addr of + * each allocation is read in order to enforce an + * overall physical memory usage limit. + * @pre_dep: Pre-dependencies. One need to use SETTER function to assign + * this field; this is done in order to reduce possibility of + * improper assignment of a dependency field. + * @atom_number: Unique number to identify the atom. + * @prio: Atom priority. Refer to base_jd_prio for more details. + * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP + * specified. + * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. + * @core_req: Core requirements. + * @renderpass_id: Renderpass identifier used to associate an atom that has + * BASE_JD_REQ_START_RENDERPASS set in its core requirements + * with an atom that has BASE_JD_REQ_END_RENDERPASS set. + * @padding: Unused. Must be zero. + */ +typedef struct base_jd_atom { + u64 seq_nr; + u64 jc; + struct base_jd_udata udata; + u64 extres_list; + u16 nr_extres; + u8 jit_id[2]; + struct base_dependency pre_dep[2]; + base_atom_id atom_number; + base_jd_prio prio; + u8 device_nr; + u8 jobslot; + base_jd_core_req core_req; + u8 renderpass_id; + u8 padding[7]; +} base_jd_atom; + +/* Job chain event code bits + * Defines the bits used to create ::base_jd_event_code + */ +enum { + BASE_JD_SW_EVENT_KERNEL = (1u << 15), /* Kernel side event */ + BASE_JD_SW_EVENT = (1u << 14), /* SW defined event */ + /* Event indicates success (SW events only) */ + BASE_JD_SW_EVENT_SUCCESS = (1u << 13), + BASE_JD_SW_EVENT_JOB = (0u << 11), /* Job related event */ + BASE_JD_SW_EVENT_BAG = (1u << 11), /* Bag related event */ + BASE_JD_SW_EVENT_INFO = (2u << 11), /* Misc/info event */ + BASE_JD_SW_EVENT_RESERVED = (3u << 11), /* Reserved event type */ + /* Mask to extract the type from an event code */ + BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) +}; + +/** + * enum base_jd_event_code - Job chain event codes + * + * @BASE_JD_EVENT_RANGE_HW_NONFAULT_START: Start of hardware non-fault status + * codes. + * Obscurely, BASE_JD_EVENT_TERMINATED + * indicates a real fault, because the + * job was hard-stopped. + * @BASE_JD_EVENT_NOT_STARTED: Can't be seen by userspace, treated as + * 'previous job done'. + * @BASE_JD_EVENT_STOPPED: Can't be seen by userspace, becomes + * TERMINATED, DONE or JOB_CANCELLED. + * @BASE_JD_EVENT_TERMINATED: This is actually a fault status code - the job + * was hard stopped. + * @BASE_JD_EVENT_ACTIVE: Can't be seen by userspace, jobs only returned on + * complete/fail/cancel. + * @BASE_JD_EVENT_RANGE_HW_NONFAULT_END: End of hardware non-fault status codes. + * Obscurely, BASE_JD_EVENT_TERMINATED + * indicates a real fault, + * because the job was hard-stopped. + * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START: Start of hardware fault and + * software error status codes. + * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END: End of hardware fault and + * software error status codes. + * @BASE_JD_EVENT_RANGE_SW_SUCCESS_START: Start of software success status + * codes. + * @BASE_JD_EVENT_RANGE_SW_SUCCESS_END: End of software success status codes. + * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_START: Start of kernel-only status codes. + * Such codes are never returned to + * user-space. + * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_END: End of kernel-only status codes. + * + * HW and low-level SW events are represented by event codes. + * The status of jobs which succeeded are also represented by + * an event code (see @BASE_JD_EVENT_DONE). + * Events are usually reported as part of a &struct base_jd_event. + * + * The event codes are encoded in the following way: + * * 10:0 - subtype + * * 12:11 - type + * * 13 - SW success (only valid if the SW bit is set) + * * 14 - SW event (HW event if not set) + * * 15 - Kernel event (should never be seen in userspace) + * + * Events are split up into ranges as follows: + * * BASE_JD_EVENT_RANGE__START + * * BASE_JD_EVENT_RANGE__END + * + * code is in 's range when: + * BASE_JD_EVENT_RANGE__START <= code < + * BASE_JD_EVENT_RANGE__END + * + * Ranges can be asserted for adjacency by testing that the END of the previous + * is equal to the START of the next. This is useful for optimizing some tests + * for range. + * + * A limitation is that the last member of this enum must explicitly be handled + * (with an assert-unreachable statement) in switch statements that use + * variables of this type. Otherwise, the compiler warns that we have not + * handled that enum value. + */ +enum base_jd_event_code { + /* HW defined exceptions */ + BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0, + + /* non-fatal exceptions */ + BASE_JD_EVENT_NOT_STARTED = 0x00, + BASE_JD_EVENT_DONE = 0x01, + BASE_JD_EVENT_STOPPED = 0x03, + BASE_JD_EVENT_TERMINATED = 0x04, + BASE_JD_EVENT_ACTIVE = 0x08, + + BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40, + BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40, + + /* job exceptions */ + BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40, + BASE_JD_EVENT_JOB_POWER_FAULT = 0x41, + BASE_JD_EVENT_JOB_READ_FAULT = 0x42, + BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43, + BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44, + BASE_JD_EVENT_JOB_BUS_FAULT = 0x48, + BASE_JD_EVENT_INSTR_INVALID_PC = 0x50, + BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51, + BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52, + BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53, + BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54, + BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55, + BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56, + BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58, + BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59, + BASE_JD_EVENT_STATE_FAULT = 0x5A, + BASE_JD_EVENT_OUT_OF_MEMORY = 0x60, + BASE_JD_EVENT_UNKNOWN = 0x7F, + + /* GPU exceptions */ + BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80, + BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88, + + /* MMU exceptions */ + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4, + BASE_JD_EVENT_PERMISSION_FAULT = 0xC8, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4, + BASE_JD_EVENT_ACCESS_FLAG = 0xD8, + + /* SW defined exceptions */ + BASE_JD_EVENT_MEM_GROWTH_FAILED = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_TIMED_OUT = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001, + BASE_JD_EVENT_JOB_CANCELLED = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, + BASE_JD_EVENT_JOB_INVALID = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, + BASE_JD_EVENT_PM_EVENT = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004, + + BASE_JD_EVENT_BAG_INVALID = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003, + + BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_RESERVED | 0x3FF, + + BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | 0x000, + + BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | + BASE_JD_SW_EVENT_BAG | 0x000, + BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, + + BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF, + + BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | 0x000, + BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_END_RP_DONE = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x001, + + BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF +}; + +/** + * struct base_jd_event_v2 - Event reporting structure + * + * @event_code: event code. + * @atom_number: the atom number that has completed. + * @udata: user data. + * + * This structure is used by the kernel driver to report information + * about GPU events. They can either be HW-specific events or low-level + * SW events, such as job-chain completion. + * + * The event code contains an event type field which can be extracted + * by ANDing with BASE_JD_SW_EVENT_TYPE_MASK. + */ +struct base_jd_event_v2 { + enum base_jd_event_code event_code; + base_atom_id atom_number; + struct base_jd_udata udata; +}; + +/** + * struct base_dump_cpu_gpu_counters - Structure for + * BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS + * jobs. + * + * This structure is stored into the memory pointed to by the @jc field + * of &struct base_jd_atom. + * + * It must not occupy the same CPU cache line(s) as any neighboring data. + * This is to avoid cases where access to pages containing the structure + * is shared between cached and un-cached memory regions, which would + * cause memory corruption. + */ + +struct base_dump_cpu_gpu_counters { + u64 system_time; + u64 cycle_counter; + u64 sec; + u32 usec; + u8 padding[36]; +}; + +#endif /* _BASE_JM_KERNEL_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_defs.h b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_defs.h new file mode 100644 index 000000000000..49ab3827868e --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_defs.h @@ -0,0 +1,826 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Definitions (types, defines, etcs) specific to Job Manager Kbase. + * They are placed here to allow the hierarchy of header files to work. + */ + +#ifndef _KBASE_JM_DEFS_H_ +#define _KBASE_JM_DEFS_H_ + +#include "mali_kbase_js_defs.h" + +/* Dump Job slot trace on error (only active if KBASE_KTRACE_ENABLE != 0) */ +#define KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR 1 + +/* + * Number of milliseconds before resetting the GPU when a job cannot be "zapped" + * from the hardware. Note that the time is actually + * ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and + * the GPU actually being reset to give other contexts time for their jobs + * to be soft-stopped and removed from the hardware before resetting. + */ +#define ZAP_TIMEOUT 1000 + +/* + * Prevent soft-stops from occurring in scheduling situations + * + * This is not due to HW issues, but when scheduling is desired to be more + * predictable. + * + * Therefore, soft stop may still be disabled due to HW issues. + * + * Soft stop will still be used for non-scheduling purposes e.g. when + * terminating a context. + * + * if not in use, define this value to 0 instead of being undefined. + */ +#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0 + +/* + * Prevent hard-stops from occurring in scheduling situations + * + * This is not due to HW issues, but when scheduling is desired to be more + * predictable. + * + * Hard stop will still be used for non-scheduling purposes e.g. when + * terminating a context. + * + * if not in use, define this value to 0 instead of being undefined. + */ +#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0 + +/* Atom has been previously soft-stopped */ +#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED (1<<1) +/* Atom has been previously retried to execute */ +#define KBASE_KATOM_FLAGS_RERUN (1<<2) +/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps + * to disambiguate short-running job chains during soft/hard stopping of jobs + */ +#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3) +/* Atom has been previously hard-stopped. */ +#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) +/* Atom has caused us to enter disjoint state */ +#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) +/* Atom blocked on cross-slot dependency */ +#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) +/* Atom has fail dependency on cross-slot dependency */ +#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) +/* Atom is currently in the list of atoms blocked on cross-slot dependencies */ +#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9) +/* Atom is currently holding a context reference */ +#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10) +/* Atom requires GPU to be in protected mode */ +#define KBASE_KATOM_FLAG_PROTECTED (1<<11) +/* Atom has been stored in runnable_tree */ +#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) +/* Atom is waiting for L2 caches to power up in order to enter protected mode */ +#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13) + +/* SW related flags about types of JS_COMMAND action + * NOTE: These must be masked off by JS_COMMAND_MASK + */ + +/* This command causes a disjoint event */ +#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100 + +/* Bitmask of all SW related flags */ +#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT) + +#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK) +#error "JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK." \ + "Must update JS_COMMAND_SW_<..> bitmasks" +#endif + +/* Soft-stop command that causes a Disjoint event. This of course isn't + * entirely masked off by JS_COMMAND_MASK + */ +#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \ + (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP) + +#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT + +/* Serialize atoms within a slot (ie only one atom per job slot) */ +#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0) +/* Serialize atoms between slots (ie only one job slot running at any time) */ +#define KBASE_SERIALIZE_INTER_SLOT (1 << 1) +/* Reset the GPU after each atom completion */ +#define KBASE_SERIALIZE_RESET (1 << 2) + +#ifdef CONFIG_DEBUG_FS +/** + * struct base_job_fault_event - keeps track of the atom which faulted or which + * completed after the faulty atom but before the + * debug data for faulty atom was dumped. + * + * @event_code: event code for the atom, should != BASE_JD_EVENT_DONE for + * the atom which faulted. + * @katom: pointer to the atom for which job fault occurred or which + * completed after the faulty atom. + * @job_fault_work: work item, queued only for the faulty atom, which waits for + * the dumping to get completed and then does the bottom half + * of job done for the atoms which followed the faulty atom. + * @head: List head used to store the atom in the global list of + * faulty atoms or context specific list of atoms which got + * completed during the dump. + * @reg_offset: offset of the register to be dumped next, only applicable + * for the faulty atom. + */ +struct base_job_fault_event { + + u32 event_code; + struct kbase_jd_atom *katom; + struct work_struct job_fault_work; + struct list_head head; + int reg_offset; +}; +#endif + +/** + * struct kbase_jd_atom_dependency - Contains the dependency info for an atom. + * @atom: pointer to the dependee atom. + * @dep_type: type of dependency on the dependee @atom, i.e. order or data + * dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency. + */ +struct kbase_jd_atom_dependency { + struct kbase_jd_atom *atom; + u8 dep_type; +}; + +/** + * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the + * dependee atom. + * @dep: pointer to the dependency info structure. + * + * Return: readonly reference to dependee atom. + */ +static inline const struct kbase_jd_atom * +kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) +{ + LOCAL_ASSERT(dep != NULL); + + return (const struct kbase_jd_atom *)(dep->atom); +} + +/** + * kbase_jd_katom_dep_type - Retrieves the dependency type info + * + * @dep: pointer to the dependency info structure. + * + * Return: the type of dependency there is on the dependee atom. + */ +static inline u8 kbase_jd_katom_dep_type( + const struct kbase_jd_atom_dependency *dep) +{ + LOCAL_ASSERT(dep != NULL); + + return dep->dep_type; +} + +/** + * kbase_jd_katom_dep_set - sets up the dependency info structure + * as per the values passed. + * @const_dep: pointer to the dependency info structure to be setup. + * @a: pointer to the dependee atom. + * @type: type of dependency there is on the dependee atom. + */ +static inline void kbase_jd_katom_dep_set( + const struct kbase_jd_atom_dependency *const_dep, + struct kbase_jd_atom *a, u8 type) +{ + struct kbase_jd_atom_dependency *dep; + + LOCAL_ASSERT(const_dep != NULL); + + dep = (struct kbase_jd_atom_dependency *)const_dep; + + dep->atom = a; + dep->dep_type = type; +} + +/** + * kbase_jd_katom_dep_clear - resets the dependency info structure + * + * @const_dep: pointer to the dependency info structure to be setup. + */ +static inline void kbase_jd_katom_dep_clear( + const struct kbase_jd_atom_dependency *const_dep) +{ + struct kbase_jd_atom_dependency *dep; + + LOCAL_ASSERT(const_dep != NULL); + + dep = (struct kbase_jd_atom_dependency *)const_dep; + + dep->atom = NULL; + dep->dep_type = BASE_JD_DEP_TYPE_INVALID; +} + +/** + * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it + * becomes runnable, with respect to job slot + * ringbuffer/fifo. + * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, + * which implies that either atom has not become + * runnable due to dependency or has completed + * the execution on GPU. + * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is + * blocked due to cross slot dependency, + * can't be submitted to GPU. + * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot + * fifo but is waiting for the completion of + * previously added atoms in current & other + * slots, as their protected mode requirements + * do not match with the current atom. + * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo + * and is waiting for completion of protected + * mode transition, needed before the atom is + * submitted to GPU. + * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is + * waiting for the cores, which are needed to + * execute the job chain represented by the atom, + * to become available + * @KBASE_ATOM_GPU_RB_READY: Atom is in slot fifo and can be submitted to + * GPU. + * @KBASE_ATOM_GPU_RB_SUBMITTED: Atom is in slot fifo and has been submitted + * to GPU. + * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some + * failure, but only after the previously added + * atoms in fifo have completed or have also + * been returned to JS. + */ +enum kbase_atom_gpu_rb_state { + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, + KBASE_ATOM_GPU_RB_WAITING_BLOCKED, + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV, + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION, + KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, + KBASE_ATOM_GPU_RB_READY, + KBASE_ATOM_GPU_RB_SUBMITTED, + KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1 +}; + +/** + * enum kbase_atom_enter_protected_state - The state of an atom with respect to + * the preparation for GPU's entry into protected mode, + * becomes pertinent only after atom's state with respect + * to slot ringbuffer is + * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION + * @KBASE_ATOM_ENTER_PROTECTED_CHECK: Starting state. Check if there are any + * atoms currently submitted to GPU and protected mode + * transition is not already in progress. + * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to + * become disabled before entry into protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in + * preparation for the coherency change. L2 shall be + * powered down and GPU shall come out of fully + * coherent mode before entering protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change; + * for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on + * so that coherency register contains correct value when + * GPU enters protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for + * BASE_HW_ISSUE_TGOX_R1_1234 check + * that L2 is powered up and switch GPU to protected mode. + */ +enum kbase_atom_enter_protected_state { + /* + * NOTE: The integer value of this must match + * KBASE_ATOM_EXIT_PROTECTED_CHECK. + */ + KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, + KBASE_ATOM_ENTER_PROTECTED_HWCNT, + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, + KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY, + KBASE_ATOM_ENTER_PROTECTED_FINISHED, +}; + +/** + * enum kbase_atom_exit_protected_state - The state of an atom with respect to + * the preparation for GPU's exit from protected mode, + * becomes pertinent only after atom's state with respect + * to slot ngbuffer is + * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION + * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any + * atoms currently submitted to GPU and protected mode + * transition is not already in progress. + * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in + * preparation for the reset, as exiting protected mode + * requires a reset. + * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from + * protected mode + * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to + * complete + */ +enum kbase_atom_exit_protected_state { + /* + * NOTE: The integer value of this must match + * KBASE_ATOM_ENTER_PROTECTED_CHECK. + */ + KBASE_ATOM_EXIT_PROTECTED_CHECK = 0, + KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, + KBASE_ATOM_EXIT_PROTECTED_RESET, + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, +}; + +/** + * struct kbase_ext_res - Contains the info for external resources referred + * by an atom, which have been mapped on GPU side. + * @gpu_address: Start address of the memory region allocated for + * the resource from GPU virtual address space. + * @alloc: pointer to physical pages tracking object, set on + * mapping the external resource on GPU side. + */ +struct kbase_ext_res { + u64 gpu_address; + struct kbase_mem_phy_alloc *alloc; +}; + +/** + * struct kbase_jd_atom - object representing the atom, containing the complete + * state and attributes of an atom. + * @work: work item for the bottom half processing of the atom, + * by JD or JS, after it got executed on GPU or the + * input fence got signaled + * @start_timestamp: time at which the atom was submitted to the GPU, by + * updating the JS_HEAD_NEXTn register. + * @udata: copy of the user data sent for the atom in + * base_jd_submit. + * @kctx: Pointer to the base context with which the atom is + * associated. + * @dep_head: Array of 2 list heads, pointing to the two list of + * atoms + * which are blocked due to dependency on this atom. + * @dep_item: Array of 2 list heads, used to store the atom in the + * list of other atoms depending on the same dependee + * atom. + * @dep: Array containing the dependency info for the 2 atoms + * on which the atom depends upon. + * @jd_item: List head used during job dispatch job_done + * processing - as dependencies may not be entirely + * resolved at this point, + * we need to use a separate list head. + * @in_jd_list: flag set to true if atom's @jd_item is currently on + * a list, prevents atom being processed twice. + * @jit_ids: Zero-terminated array of IDs of just-in-time memory + * allocations written to by the atom. When the atom + * completes, the value stored at the + * &struct_base_jit_alloc_info.heap_info_gpu_addr of + * each allocation is read in order to enforce an + * overall physical memory usage limit. + * @nr_extres: number of external resources referenced by the atom. + * @extres: pointer to the location containing info about + * @nr_extres external resources referenced by the atom. + * @device_nr: indicates the coregroup with which the atom is + * associated, when + * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified. + * @jc: GPU address of the job-chain. + * @softjob_data: Copy of data read from the user space buffer that @jc + * points to. + * @fence: Stores either an input or output sync fence, + * depending on soft-job type + * @sync_waiter: Pointer to the sync fence waiter structure passed to + * the callback function on signaling of the input + * fence. + * @dma_fence: object containing pointers to both input & output + * fences and other related members used for explicit + * sync through soft jobs and for the implicit + * synchronization required on access to external + * resources. + * @event_code: Event code for the job chain represented by the atom, + * both HW and low-level SW events are represented by + * event codes. + * @core_req: bitmask of BASE_JD_REQ_* flags specifying either + * Hw or Sw requirements for the job chain represented + * by the atom. + * @ticks: Number of scheduling ticks for which atom has been + * running on the GPU. + * @sched_priority: Priority of the atom for Job scheduling, as per the + * KBASE_JS_ATOM_SCHED_PRIO_*. + * @completed: Wait queue to wait upon for the completion of atom. + * @status: Indicates at high level at what stage the atom is in, + * as per KBASE_JD_ATOM_STATE_*, that whether it is not + * in use or its queued in JD or given to JS or + * submitted to Hw or it completed the execution on Hw. + * @work_id: used for GPU tracepoints, its a snapshot of the + * 'work_id' counter in kbase_jd_context which is + * incremented on every call to base_jd_submit. + * @slot_nr: Job slot chosen for the atom. + * @atom_flags: bitmask of KBASE_KATOM_FLAG* flags capturing the + * excat low level state of the atom. + * @gpu_rb_state: bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely + * tracking atom's state after it has entered + * Job scheduler on becoming runnable. Atom + * could be blocked due to cross slot dependency + * or waiting for the shader cores to become available + * or waiting for protected mode transitions to + * complete. + * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU + * cache is needed for the atom and the shader cores + * used for atom have been kept on. + * @blocked: flag indicating that atom's resubmission to GPU is + * blocked till the work item is scheduled to return the + * atom to JS. + * @pre_dep: Pointer to atom that this atom has same-slot + * dependency on + * @post_dep: Pointer to atom that has same-slot dependency on + * this atom + * @x_pre_dep: Pointer to atom that this atom has cross-slot + * dependency on + * @x_post_dep: Pointer to atom that has cross-slot dependency on + * this atom + * @flush_id: The GPU's flush count recorded at the time of + * submission, + * used for the cache flush optimization + * @fault_event: Info for dumping the debug data on Job fault. + * @queue: List head used for 4 different purposes : + * Adds atom to the list of dma-buf fence waiting atoms. + * Adds atom to the list of atoms blocked due to cross + * slot dependency. + * Adds atom to the list of softjob atoms for which JIT + * allocation has been deferred + * Adds atom to the list of softjob atoms waiting for + * the signaling of fence. + * @jit_node: Used to keep track of all JIT free/alloc jobs in + * submission order + * @jit_blocked: Flag indicating that JIT allocation requested through + * softjob atom will be reattempted after the impending + * free of other active JIT allocations. + * @will_fail_event_code: If non-zero, this indicates that the atom will fail + * with the set event_code when the atom is processed. + * Used for special handling of atoms, which have a data + * dependency on the failed atoms. + * @protected_state: State of the atom, as per + * KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*, + * when transitioning into or out of protected mode. + * Atom will be either entering or exiting the + * protected mode. + * @runnable_tree_node: The node added to context's job slot specific rb tree + * when the atom becomes runnable. + * @age: Age of atom relative to other atoms in the context, + * is snapshot of the age_count counter in kbase + * context. + */ +struct kbase_jd_atom { + struct work_struct work; + ktime_t start_timestamp; + + struct base_jd_udata udata; + struct kbase_context *kctx; + + struct list_head dep_head[2]; + struct list_head dep_item[2]; + const struct kbase_jd_atom_dependency dep[2]; + struct list_head jd_item; + bool in_jd_list; + +#if MALI_JIT_PRESSURE_LIMIT_BASE + u8 jit_ids[2]; +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + u16 nr_extres; + struct kbase_ext_res *extres; + + u32 device_nr; + u64 jc; + void *softjob_data; + /* MALI_SEC_INTEGRATION */ + spinlock_t fence_lock; +#if defined(CONFIG_SYNC) + struct sync_fence *fence; + struct sync_fence_waiter sync_waiter; +#endif /* CONFIG_SYNC */ + /* MALI_SEC_INTEGRATION */ + struct mutex fence_mt; + struct timer_list fence_timer; +#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) + struct { + /* Use the functions/API defined in mali_kbase_fence.h to + * when working with this sub struct + */ +#if defined(CONFIG_SYNC_FILE) + /* Input fence */ +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence_in; +#else + struct dma_fence *fence_in; +#endif +#endif + /* This points to the dma-buf output fence for this atom. If + * this is NULL then there is no fence for this atom and the + * following fields related to dma_fence may have invalid data. + * + * The context and seqno fields contain the details for this + * fence. + * + * This fence is signaled when the katom is completed, + * regardless of the event_code of the katom (signal also on + * failure). + */ +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + /* The dma-buf fence context number for this atom. A unique + * context number is allocated to each katom in the context on + * context creation. + */ + unsigned int context; + /* The dma-buf fence sequence number for this atom. This is + * increased every time this katom uses dma-buf fence. + */ + atomic_t seqno; + /* This contains a list of all callbacks set up to wait on + * other fences. This atom must be held back from JS until all + * these callbacks have been called and dep_count have reached + * 0. The initial value of dep_count must be equal to the + * number of callbacks on this list. + * + * This list is protected by jctx.lock. Callbacks are added to + * this list when the atom is built and the wait are set up. + * All the callbacks then stay on the list until all callbacks + * have been called and the atom is queued, or cancelled, and + * then all callbacks are taken off the list and freed. + */ + struct list_head callbacks; + /* Atomic counter of number of outstandind dma-buf fence + * dependencies for this atom. When dep_count reaches 0 the + * atom may be queued. + * + * The special value "-1" may only be set after the count + * reaches 0, while holding jctx.lock. This indicates that the + * atom has been handled, either queued in JS or cancelled. + * + * If anyone but the dma-fence worker sets this to -1 they must + * ensure that any potentially queued worker must have + * completed before allowing the atom to be marked as unused. + * This can be done by flushing the fence work queue: + * kctx->dma_fence.wq. + */ + atomic_t dep_count; + } dma_fence; +#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */ + + /* Note: refer to kbasep_js_atom_retained_state, which will take a copy + * of some of the following members + */ + enum base_jd_event_code event_code; + base_jd_core_req core_req; + u8 jobslot; + u8 renderpass_id; + struct base_jd_fragment jc_fragment; + + u32 ticks; + int sched_priority; + + wait_queue_head_t completed; + enum kbase_jd_atom_state status; +#ifdef CONFIG_GPU_TRACEPOINTS + int work_id; +#endif + int slot_nr; + + u32 atom_flags; + + int retry_count; + + enum kbase_atom_gpu_rb_state gpu_rb_state; + + bool need_cache_flush_cores_retained; + + atomic_t blocked; + + /* user-space sequence number, to order atoms in some temporal order */ + u64 seq_nr; + + struct kbase_jd_atom *pre_dep; + struct kbase_jd_atom *post_dep; + + struct kbase_jd_atom *x_pre_dep; + struct kbase_jd_atom *x_post_dep; + + u32 flush_id; + +#ifdef CONFIG_DEBUG_FS + struct base_job_fault_event fault_event; +#endif + struct list_head queue; + + struct list_head jit_node; + bool jit_blocked; + + enum base_jd_event_code will_fail_event_code; + + union { + enum kbase_atom_enter_protected_state enter; + enum kbase_atom_exit_protected_state exit; + } protected_state; + + struct rb_node runnable_tree_node; + + u32 age; +}; + +static inline bool kbase_jd_katom_is_protected( + const struct kbase_jd_atom *katom) +{ + return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); +} + +/* + * Theory of operations: + * + * Atom objects are statically allocated within the context structure. + * + * Each atom is the head of two lists, one for the "left" set of dependencies, + * one for the "right" set. + */ + +#define KBASE_JD_DEP_QUEUE_SIZE 256 + +/** + * enum kbase_jd_renderpass_state - State of a renderpass + * @KBASE_JD_RP_COMPLETE: Unused or completed renderpass. Can only transition to + * START. + * @KBASE_JD_RP_START: Renderpass making a first attempt at tiling. + * Can transition to PEND_OOM or COMPLETE. + * @KBASE_JD_RP_PEND_OOM: Renderpass whose first attempt at tiling used too much + * memory and has a soft-stop pending. Can transition to + * OOM or COMPLETE. + * @KBASE_JD_RP_OOM: Renderpass whose first attempt at tiling used too much + * memory and therefore switched to incremental + * rendering. The fragment job chain is forced to run. + * Can only transition to RETRY. + * @KBASE_JD_RP_RETRY: Renderpass making a second or subsequent attempt at + * tiling. Can transition to RETRY_PEND_OOM or COMPLETE. + * @KBASE_JD_RP_RETRY_PEND_OOM: Renderpass whose second or subsequent attempt at + * tiling used too much memory again and has a + * soft-stop pending. Can transition to RETRY_OOM + * or COMPLETE. + * @KBASE_JD_RP_RETRY_OOM: Renderpass whose second or subsequent attempt at + * tiling used too much memory again. The fragment job + * chain is forced to run. Can only transition to RETRY. + * + * A state machine is used to control incremental rendering. + */ +enum kbase_jd_renderpass_state { + KBASE_JD_RP_COMPLETE, /* COMPLETE => START */ + KBASE_JD_RP_START, /* START => PEND_OOM or COMPLETE */ + KBASE_JD_RP_PEND_OOM, /* PEND_OOM => OOM or COMPLETE */ + KBASE_JD_RP_OOM, /* OOM => RETRY */ + KBASE_JD_RP_RETRY, /* RETRY => RETRY_PEND_OOM or + * COMPLETE + */ + KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or + * COMPLETE + */ + KBASE_JD_RP_RETRY_OOM, /* RETRY_OOM => RETRY */ +}; + +/** + * struct kbase_jd_renderpass - Data for a renderpass + * @state: Current state of the renderpass. If KBASE_JD_RP_COMPLETE then + * all other members are invalid. + * Both the job dispatcher context and hwaccess_lock must be + * locked to modify this so that it can be read with either + * (or both) locked. + * @start_katom: Address of the atom that is the start of a renderpass. + * Both the job dispatcher context and hwaccess_lock must be + * locked to modify this so that it can be read with either + * (or both) locked. + * @end_katom: Address of the atom that is the end of a renderpass, or NULL + * if that atom hasn't been added to the job scheduler yet. + * The job dispatcher context and hwaccess_lock must be + * locked to modify this so that it can be read with either + * (or both) locked. + * @oom_reg_list: A list of region structures which triggered out-of-memory. + * The hwaccess_lock must be locked to access this. + * + * Atoms tagged with BASE_JD_REQ_START_RENDERPASS or BASE_JD_REQ_END_RENDERPASS + * are associated with an object of this type, which is created and maintained + * by kbase to keep track of each renderpass. + */ +struct kbase_jd_renderpass { + enum kbase_jd_renderpass_state state; + struct kbase_jd_atom *start_katom; + struct kbase_jd_atom *end_katom; + struct list_head oom_reg_list; +}; + +/** + * struct kbase_jd_context - per context object encapsulating all the + * Job dispatcher related state. + * @lock: lock to serialize the updates made to the + * Job dispatcher state and kbase_jd_atom objects. + * @sched_info: Structure encapsulating all the Job scheduling + * info. + * @atoms: Array of the objects representing atoms, + * containing the complete state and attributes + * of an atom. + * @renderpasses: Array of renderpass state for incremental + * rendering, indexed by user-specified renderpass + * ID. + * @job_nr: Tracks the number of atoms being processed by the + * kbase. This includes atoms that are not tracked by + * scheduler: 'not ready to run' & 'dependency-only' + * jobs. + * @zero_jobs_wait: Waitq that reflects whether there are no jobs + * (including SW-only dependency jobs). This is set + * when no jobs are present on the ctx, and clear + * when there are jobs. + * This must be updated atomically with @job_nr. + * note: Job Dispatcher knows about more jobs than + * the Job Scheduler as it is unaware of jobs that + * are blocked on dependencies and SW-only dependency + * jobs. This waitq can be waited upon to find out + * when the context jobs are all done/cancelled + * (including those that might've been blocked + * on dependencies) - and so, whether it can be + * terminated. However, it should only be terminated + * once it is not present in the run-pool. + * Since the waitq is only set under @lock, + * the waiter should also briefly obtain and drop + * @lock to guarantee that the setter has completed + * its work on the kbase_context + * @job_done_wq: Workqueue to which the per atom work item is + * queued for bottom half processing when the + * atom completes + * execution on GPU or the input fence get signaled. + * @tb_lock: Lock to serialize the write access made to @tb to + * to store the register access trace messages. + * @tb: Pointer to the Userspace accessible buffer storing + * the trace messages for register read/write + * accesses made by the Kbase. The buffer is filled + * in circular fashion. + * @tb_wrap_offset: Offset to the end location in the trace buffer, + * the write pointer is moved to the beginning on + * reaching this offset. + * @work_id: atomic variable used for GPU tracepoints, + * incremented on every call to base_jd_submit. + * @jit_atoms_head: A list of the just-in-time memory soft-jobs, both + * allocate & free, in submission order, protected + * by kbase_jd_context.lock. + * @jit_pending_alloc: A list of just-in-time memory allocation + * soft-jobs which will be reattempted after the + * impending free of other active allocations. + */ +struct kbase_jd_context { + struct mutex lock; + struct kbasep_js_kctx_info sched_info; + struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT]; + struct kbase_jd_renderpass renderpasses[BASE_JD_RP_COUNT]; + struct workqueue_struct *job_done_wq; + + wait_queue_head_t zero_jobs_wait; + spinlock_t tb_lock; + u32 *tb; + u32 job_nr; + size_t tb_wrap_offset; + +#ifdef CONFIG_GPU_TRACEPOINTS + atomic_t work_id; +#endif + + struct list_head jit_atoms_head; + struct list_head jit_pending_alloc; +}; + +/** + * struct jsctx_queue - JS context atom queue + * @runnable_tree: Root of RB-tree containing currently runnable atoms on this + * job slot. + * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot + * dependencies. Atoms on this list will be moved to the + * runnable_tree when the blocking atom completes. + * + * hwaccess_lock must be held when accessing this structure. + */ +struct jsctx_queue { + struct rb_root runnable_tree; + struct list_head x_dep_head; +}; + +#endif /* _KBASE_JM_DEFS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h new file mode 100644 index 000000000000..6dc57d04426d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h @@ -0,0 +1,194 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_JM_IOCTL_H_ +#define _KBASE_JM_IOCTL_H_ + +#include +#include + +/* + * 11.1: + * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags + * 11.2: + * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED, + * which some user-side clients prior to 11.2 might fault if they received + * them + * 11.3: + * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and + * KBASE_IOCTL_STICKY_RESOURCE_UNMAP + * 11.4: + * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET + * 11.5: + * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD) + * 11.6: + * - Added flags field to base_jit_alloc_info structure, which can be used to + * specify pseudo chunked tiler alignment for JIT allocations. + * 11.7: + * - Removed UMP support + * 11.8: + * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags + * 11.9: + * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY + * under base_mem_alloc_flags + * 11.10: + * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for + * JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations + * with one softjob. + * 11.11: + * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags + * 11.12: + * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS + * 11.13: + * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT + * 11.14: + * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set + * under base_mem_alloc_flags + * 11.15: + * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags. + * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be + * passed to mmap(). + * 11.16: + * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf. + * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for + * dma-buf. Now, buffers are mapped on GPU when first imported, no longer + * requiring external resource or sticky resource tracking. UNLESS, + * CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled. + * 11.17: + * - Added BASE_JD_REQ_JOB_SLOT. + * - Reused padding field in base_jd_atom_v2 to pass job slot number. + * - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO + * 11.18: + * - Added BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP under base_mem_alloc_flags + * 11.19: + * - Extended base_jd_atom_v2 to allow a renderpass ID to be specified. + * 11.20: + * - Added new phys_pages member to kbase_ioctl_mem_jit_init for + * KBASE_IOCTL_MEM_JIT_INIT, previous variants of this renamed to use _10_2 + * (replacing '_OLD') and _11_5 suffixes + * - Replaced compat_core_req (deprecated in 10.3) with jit_id[2] in + * base_jd_atom_v2. It must currently be initialized to zero. + * - Added heap_info_gpu_addr to base_jit_alloc_info, and + * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE allowable in base_jit_alloc_info's + * flags member. Previous variants of this structure are kept and given _10_2 + * and _11_5 suffixes. + * - The above changes are checked for safe values in usual builds + * 11.21: + * - v2.0 of mali_trace debugfs file, which now versions the file separately + * 11.22: + * - Added base_jd_atom (v3), which is seq_nr + base_jd_atom_v2. + * KBASE_IOCTL_JOB_SUBMIT supports both in parallel. + * 11.23: + * - Modified KBASE_IOCTL_MEM_COMMIT behavior to reject requests to modify + * the physical memory backing of JIT allocations. This was not supposed + * to be a valid use case, but it was allowed by the previous implementation. + * 11.24: + * - Added a sysfs file 'serialize_jobs' inside a new sub-directory + * 'scheduling'. + * 11.25: + * - Enabled JIT pressure limit in base/kbase by default + * 11.26 + * - Added kinstr_jm API + * 11.27 + * - Backwards compatible extension to HWC ioctl. + */ +#define BASE_UK_VERSION_MAJOR 11 +#define BASE_UK_VERSION_MINOR 27 + +/** + * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel + * + * @addr: Memory address of an array of struct base_jd_atom_v2 or v3 + * @nr_atoms: Number of entries in the array + * @stride: sizeof(struct base_jd_atom_v2) or sizeof(struct base_jd_atom) + */ +struct kbase_ioctl_job_submit { + __u64 addr; + __u32 nr_atoms; + __u32 stride; +}; + +#define KBASE_IOCTL_JOB_SUBMIT \ + _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit) + +#define KBASE_IOCTL_POST_TERM \ + _IO(KBASE_IOCTL_TYPE, 4) + +/** + * struct kbase_ioctl_soft_event_update - Update the status of a soft-event + * @event: GPU address of the event which has been updated + * @new_status: The new status to set + * @flags: Flags for future expansion + */ +struct kbase_ioctl_soft_event_update { + __u64 event; + __u32 new_status; + __u32 flags; +}; + +#define KBASE_IOCTL_SOFT_EVENT_UPDATE \ + _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update) + +/** + * struct kbase_kinstr_jm_fd_out - Explains the compatibility information for + * the `struct kbase_kinstr_jm_atom_state_change` structure returned from the + * kernel + * + * @size: The size of the `struct kbase_kinstr_jm_atom_state_change` + * @version: Represents a breaking change in the + * `struct kbase_kinstr_jm_atom_state_change` + * @padding: Explicit padding to get the structure up to 64bits. See + * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst + * + * The `struct kbase_kinstr_jm_atom_state_change` may have extra members at the + * end of the structure that older user space might not understand. If the + * `version` is the same, the structure is still compatible with newer kernels. + * The `size` can be used to cast the opaque memory returned from the kernel. + */ +struct kbase_kinstr_jm_fd_out { + __u16 size; + __u8 version; + __u8 padding[5]; +}; + +/** + * struct kbase_kinstr_jm_fd_in - Options when creating the file descriptor + * + * @count: Number of atom states that can be stored in the kernel circular + * buffer. Must be a power of two + * @padding: Explicit padding to get the structure up to 64bits. See + * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst + */ +struct kbase_kinstr_jm_fd_in { + __u16 count; + __u8 padding[6]; +}; + +union kbase_kinstr_jm_fd { + struct kbase_kinstr_jm_fd_in in; + struct kbase_kinstr_jm_fd_out out; +}; + +#define KBASE_IOCTL_KINSTR_JM_FD \ + _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd) + +#endif /* _KBASE_JM_IOCTL_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_js.h b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_js.h new file mode 100644 index 000000000000..6c222ceae8ee --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_js.h @@ -0,0 +1,892 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Job Scheduler Interface. + * These interfaces are Internal to KBase. + */ + +#ifndef _KBASE_JM_JS_H_ +#define _KBASE_JM_JS_H_ + +#include "mali_kbase_js_ctx_attr.h" + +/** + * kbasep_js_devdata_init - Initialize the Job Scheduler + * + * The struct kbasep_js_device_data sub-structure of kbdev must be zero + * initialized before passing to the kbasep_js_devdata_init() function. This is + * to give efficient error path code. + */ +int kbasep_js_devdata_init(struct kbase_device * const kbdev); + +/** + * kbasep_js_devdata_halt - Halt the Job Scheduler. + * + * It is safe to call this on kbdev even if it the kbasep_js_device_data + * sub-structure was never initialized/failed initialization, to give efficient + * error-path code. + * + * For this to work, the struct kbasep_js_device_data sub-structure of kbdev + * must be zero initialized before passing to the kbasep_js_devdata_init() + * function. This is to give efficient error path code. + * + * It is a programming error to call this whilst there are still kbase_context + * structures registered with this scheduler. + * + */ +void kbasep_js_devdata_halt(struct kbase_device *kbdev); + +/** + * kbasep_js_devdata_term - Terminate the Job Scheduler + * + * It is safe to call this on kbdev even if it the kbasep_js_device_data + * sub-structure was never initialized/failed initialization, to give efficient + * error-path code. + * + * For this to work, the struct kbasep_js_device_data sub-structure of kbdev + * must be zero initialized before passing to the kbasep_js_devdata_init() + * function. This is to give efficient error path code. + * + * It is a programming error to call this whilst there are still kbase_context + * structures registered with this scheduler. + */ +void kbasep_js_devdata_term(struct kbase_device *kbdev); + +/** + * kbasep_js_kctx_init - Initialize the Scheduling Component of a + * struct kbase_context on the Job Scheduler. + * + * This effectively registers a struct kbase_context with a Job Scheduler. + * + * It does not register any jobs owned by the struct kbase_context with + * the scheduler. Those must be separately registered by kbasep_js_add_job(). + * + * The struct kbase_context must be zero initialized before passing to the + * kbase_js_init() function. This is to give efficient error path code. + */ +int kbasep_js_kctx_init(struct kbase_context *const kctx); + +/** + * kbasep_js_kctx_term - Terminate the Scheduling Component of a + * struct kbase_context on the Job Scheduler + * + * This effectively de-registers a struct kbase_context from its Job Scheduler + * + * It is safe to call this on a struct kbase_context that has never had or + * failed initialization of its jctx.sched_info member, to give efficient + * error-path code. + * + * For this to work, the struct kbase_context must be zero intitialized before + * passing to the kbase_js_init() function. + * + * It is a Programming Error to call this whilst there are still jobs + * registered with this context. + */ +void kbasep_js_kctx_term(struct kbase_context *kctx); + +/** + * kbasep_js_add_job - Add a job chain to the Job Scheduler, + * and take necessary actions to + * schedule the context/run the job. + * + * This atomically does the following: + * * Update the numbers of jobs information + * * Add the job to the run pool if necessary (part of init_job) + * + * Once this is done, then an appropriate action is taken: + * * If the ctx is scheduled, it attempts to start the next job (which might be + * this added job) + * * Otherwise, and if this is the first job on the context, it enqueues it on + * the Policy Queue + * + * The Policy's Queue can be updated by this in the following ways: + * * In the above case that this is the first job on the context + * * If the context is high priority and the context is not scheduled, then it + * could cause the Policy to schedule out a low-priority context, allowing + * this context to be scheduled in. + * + * If the context is already scheduled on the RunPool, then adding a job to it + * is guaranteed not to update the Policy Queue. And so, the caller is + * guaranteed to not need to try scheduling a context from the Run Pool - it + * can safely assert that the result is false. + * + * It is a programming error to have more than U32_MAX jobs in flight at a time. + * + * The following locking conditions are made on the caller: + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold hwaccess_lock (as this will be obtained internally) + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold kbasep_jd_device_data::queue_mutex (again, it's used + * internally). + * + * Return: true indicates that the Policy Queue was updated, and so the + * caller will need to try scheduling a context onto the Run Pool, + * false indicates that no updates were made to the Policy Queue, + * so no further action is required from the caller. This is always returned + * when the context is currently scheduled. + */ +bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom); + +/** + * kbasep_js_remove_job - Remove a job chain from the Job Scheduler, + * except for its 'retained state'. + * + * Completely removing a job requires several calls: + * * kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of + * the atom + * * kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler + * * kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the + * remaining state held as part of the job having been run. + * + * In the common case of atoms completing normally, this set of actions is more + * optimal for spinlock purposes than having kbasep_js_remove_job() handle all + * of the actions. + * + * In the case of canceling atoms, it is easier to call + * kbasep_js_remove_cancelled_job(), which handles all the necessary actions. + * + * It is a programming error to call this when: + * * a atom is not a job belonging to kctx. + * * a atom has already been removed from the Job Scheduler. + * * a atom is still in the runpool + * + * Do not use this for removing jobs being killed by kbase_jd_cancel() - use + * kbasep_js_remove_cancelled_job() instead. + * + * The following locking conditions are made on the caller: + * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * + */ +void kbasep_js_remove_job(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_jd_atom *atom); + +/** + * kbasep_js_remove_cancelled_job - Completely remove a job chain from the + * Job Scheduler, in the case + * where the job chain was cancelled. + * + * This is a variant of kbasep_js_remove_job() that takes care of removing all + * of the retained state too. This is generally useful for cancelled atoms, + * which need not be handled in an optimal way. + * + * It is a programming error to call this when: + * * a atom is not a job belonging to kctx. + * * a atom has already been removed from the Job Scheduler. + * * a atom is still in the runpool: + * * it is not being killed with kbasep_jd_cancel() + * + * The following locking conditions are made on the caller: + * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold the hwaccess_lock, (as this will be obtained + * internally) + * * it must not hold kbasep_js_device_data::runpool_mutex (as this could be + * obtained internally) + * + * Return: true indicates that ctx attributes have changed and the caller + * should call kbase_js_sched_all() to try to run more jobs and + * false otherwise. + */ +bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * kbasep_js_runpool_requeue_or_kill_ctx - Handling the requeuing/killing of a + * context that was evicted from the + * policy queue or runpool. + * + * This should be used whenever handing off a context that has been evicted + * from the policy queue or the runpool: + * * If the context is not dying and has jobs, it gets re-added to the policy + * queue + * * Otherwise, it is not added + * + * In addition, if the context is dying the jobs are killed asynchronously. + * + * In all cases, the Power Manager active reference is released + * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true. + * has_pm_ref must be set to false whenever the context was not previously in + * the runpool and does not hold a Power Manager active refcount. Note that + * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an + * active refcount even though they weren't in the runpool. + * + * The following locking conditions are made on the caller: + * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold kbasep_jd_device_data::queue_mutex (as this will be + * obtained internally) + */ +void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, bool has_pm_ref); + +/** + * kbasep_js_runpool_release_ctx - Release a refcount of a context being busy, + * allowing it to be scheduled out. + * + * When the refcount reaches zero and the context might be scheduled out + * (depending on whether the Scheduling Policy has deemed it so, or if it has + * run out of jobs). + * + * If the context does get scheduled out, then The following actions will be + * taken as part of deschduling a context: + * For the context being descheduled: + * * If the context is in the processing of dying (all the jobs are being + * removed from it), then descheduling also kills off any jobs remaining in the + * context. + * * If the context is not dying, and any jobs remain after descheduling the + * context then it is re-enqueued to the Policy's Queue. + * * Otherwise, the context is still known to the scheduler, but remains absent + * from the Policy Queue until a job is next added to it. + * * In all descheduling cases, the Power Manager active reference (obtained + * during kbasep_js_try_schedule_head_ctx()) is released + * (kbase_pm_context_idle()). + * + * Whilst the context is being descheduled, this also handles actions that + * cause more atoms to be run: + * * Attempt submitting atoms when the Context Attributes on the Runpool have + * changed. This is because the context being scheduled out could mean that + * there are more opportunities to run atoms. + * * Attempt submitting to a slot that was previously blocked due to affinity + * restrictions. This is usually only necessary when releasing a context + * happens as part of completing a previous job, but is harmless nonetheless. + * * Attempt scheduling in a new context (if one is available), and if + * necessary, running a job from that new context. + * + * Unlike retaining a context in the runpool, this function cannot be called + * from IRQ context. + * + * It is a programming error to call this on a kctx that is not currently + * scheduled, or that already has a zero refcount. + * + * The following locking conditions are made on the caller: + * * it must not hold the hwaccess_lock, because it will be used internally. + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold the kbase_device::mmu_hw_mutex (as this will be + * obtained internally) + * * it must not hold kbasep_jd_device_data::queue_mutex (as this will be + * obtained internally) + * + */ +void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of + * kbasep_js_runpool_release_ctx() that handles additional + * actions from completing an atom. + * + * This is usually called as part of completing an atom and releasing the + * refcount on the context held by the atom. + * + * Therefore, the extra actions carried out are part of handling actions queued + * on a completed atom, namely: + * * Releasing the atom's context attributes + * * Retrying the submission on a particular slot, because we couldn't submit + * on that slot from an IRQ handler. + * + * The locking conditions of this function are the same as those for + * kbasep_js_runpool_release_ctx() + */ +void kbasep_js_runpool_release_ctx_and_katom_retained_state( + struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state); + +/** + * kbasep_js_runpool_release_ctx_nolock - Variant of + * kbase_js_runpool_release_ctx() that assumes that + * kbasep_js_device_data::runpool_mutex and + * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not + * attempt to schedule new contexts. + */ +void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbasep_js_schedule_privileged_ctx - Schedule in a privileged context + * + * This schedules a context in regardless of the context priority. + * If the runpool is full, a context will be forced out of the runpool and the + * function will wait for the new context to be scheduled in. + * The context will be kept scheduled in (and the corresponding address space + * reserved) until kbasep_js_release_privileged_ctx is called). + * + * The following locking conditions are made on the caller: + * * it must not hold the hwaccess_lock, because it will be used internally. + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold the kbase_device::mmu_hw_mutex (as this will be + * obtained internally) + * * it must not hold kbasep_jd_device_data::queue_mutex (again, it's used + * internally). + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will + * be used internally. + * + */ +void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbasep_js_release_privileged_ctx - Release a privileged context, + * allowing it to be scheduled out. + * + * See kbasep_js_runpool_release_ctx for potential side effects. + * + * The following locking conditions are made on the caller: + * * it must not hold the hwaccess_lock, because it will be used internally. + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold the kbase_device::mmu_hw_mutex (as this will be + * obtained internally) + * + */ +void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbase_js_try_run_jobs - Try to submit the next job on each slot + * + * The following locks may be used: + * * kbasep_js_device_data::runpool_mutex + * * hwaccess_lock + */ +void kbase_js_try_run_jobs(struct kbase_device *kbdev); + +/** + * kbasep_js_suspend - Suspend the job scheduler during a Power Management + * Suspend event. + * + * Causes all contexts to be removed from the runpool, and prevents any + * contexts from (re)entering the runpool. + * + * This does not handle suspending the one privileged context: the caller must + * instead do this by by suspending the GPU HW Counter Instrumentation. + * + * This will eventually cause all Power Management active references held by + * contexts on the runpool to be released, without running any more atoms. + * + * The caller must then wait for all Power Management active refcount to become + * zero before completing the suspend. + * + * The emptying mechanism may take some time to complete, since it can wait for + * jobs to complete naturally instead of forcing them to end quickly. However, + * this is bounded by the Job Scheduler's Job Timeouts. Hence, this + * function is guaranteed to complete in a finite time. + */ +void kbasep_js_suspend(struct kbase_device *kbdev); + +/** + * kbasep_js_resume - Resume the Job Scheduler after a Power Management + * Resume event. + * + * This restores the actions from kbasep_js_suspend(): + * * Schedules contexts back into the runpool + * * Resumes running atoms on the GPU + */ +void kbasep_js_resume(struct kbase_device *kbdev); + +/** + * kbase_js_dep_resolved_submit - Submit an atom to the job scheduler. + * + * @kctx: Context pointer + * @atom: Pointer to the atom to submit + * + * The atom is enqueued on the context's ringbuffer. The caller must have + * ensured that all dependencies can be represented in the ringbuffer. + * + * Caller must hold jctx->lock + * + * Return: true if the context requires to be enqueued, otherwise false. + */ +bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer. + * @kctx: Context Pointer + * @prio: Priority (specifies the queue together with js). + * @js: Job slot (specifies the queue together with prio). + * + * Pushes all possible atoms from the linked list to the ringbuffer. + * Number of atoms are limited to free space in the ringbuffer and + * number of available atoms in the linked list. + * + */ +void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); + +/** + * kbase_js_pull - Pull an atom from a context in the job scheduler for + * execution. + * + * @kctx: Context to pull from + * @js: Job slot to pull from + * + * The atom will not be removed from the ringbuffer at this stage. + * + * The HW access lock must be held when calling this function. + * + * Return: a pointer to an atom, or NULL if there are no atoms for this + * slot that can be currently run. + */ +struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js); + +/** + * kbase_js_unpull - Return an atom to the job scheduler ringbuffer. + * + * @kctx: Context pointer + * @atom: Pointer to the atom to unpull + * + * An atom is 'unpulled' if execution is stopped but intended to be returned to + * later. The most common reason for this is that the atom has been + * soft-stopped. Another reason is if an end-of-renderpass atom completed + * but will need to be run again as part of the same renderpass. + * + * Note that if multiple atoms are to be 'unpulled', they must be returned in + * the reverse order to which they were originally pulled. It is a programming + * error to return atoms in any other order. + * + * The HW access lock must be held when calling this function. + * + */ +void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom); + +/** + * kbase_js_complete_atom_wq - Complete an atom from jd_done_worker(), + * removing it from the job + * scheduler ringbuffer. + * @kctx: Context pointer + * @katom: Pointer to the atom to complete + * + * If the atom failed then all dependee atoms marked for failure propagation + * will also fail. + * + * Return: true if the context is now idle (no jobs pulled) false otherwise. + */ +bool kbase_js_complete_atom_wq(struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * kbase_js_complete_atom - Complete an atom. + * + * @katom: Pointer to the atom to complete + * @end_timestamp: The time that the atom completed (may be NULL) + * + * Most of the work required to complete an atom will be performed by + * jd_done_worker(). + * + * The HW access lock must be held when calling this function. + * + * Return: a atom that has now been unblocked and can now be run, or NULL + * if none + */ +struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, + ktime_t *end_timestamp); + +/** + * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot + * dependency + * @katom: Pointer to an atom in the slot ringbuffer + * + * A cross-slot dependency is ignored if necessary to unblock incremental + * rendering. If the atom at the start of a renderpass used too much memory + * and was soft-stopped then the atom at the end of a renderpass is submitted + * to hardware regardless of its dependency on the start-of-renderpass atom. + * This can happen multiple times for the same pair of atoms. + * + * Return: true to block the atom or false to allow it to be submitted to + * hardware. + */ +bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom); + +/** + * kbase_js_sched - Submit atoms from all available contexts. + * + * @kbdev: Device pointer + * @js_mask: Mask of job slots to submit to + * + * This will attempt to submit as many jobs as possible to the provided job + * slots. It will exit when either all job slots are full, or all contexts have + * been used. + * + */ +void kbase_js_sched(struct kbase_device *kbdev, int js_mask); + +/** + * kbase_jd_zap_context - Attempt to deschedule a context that is being + * destroyed + * @kctx: Context pointer + * + * This will attempt to remove a context from any internal job scheduler queues + * and perform any other actions to ensure a context will not be submitted + * from. + * + * If the context is currently scheduled, then the caller must wait for all + * pending jobs to complete before taking any further action. + */ +void kbase_js_zap_context(struct kbase_context *kctx); + +/** + * kbase_js_is_atom_valid - Validate an atom + * + * @kbdev: Device pointer + * @katom: Atom to validate + * + * This will determine whether the atom can be scheduled onto the GPU. Atoms + * with invalid combinations of core requirements will be rejected. + * + * Return: true if atom is valid false otherwise. + */ +bool kbase_js_is_atom_valid(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/** + * kbase_js_set_timeouts - update all JS timeouts with user specified data + * + * @kbdev: Device pointer + * + * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is + * set to a positive number then that becomes the new value used, if a timeout + * is negative then the default is set. + */ +void kbase_js_set_timeouts(struct kbase_device *kbdev); + +/** + * kbase_js_set_ctx_priority - set the context priority + * + * @kctx: Context pointer + * @new_priority: New priority value for the Context + * + * The context priority is set to a new value and it is moved to the + * pullable/unpullable list as per the new priority. + */ +void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority); + + +/** + * kbase_js_update_ctx_priority - update the context priority + * + * @kctx: Context pointer + * + * The context priority gets updated as per the priority of atoms currently in + * use for that context, but only if system priority mode for context scheduling + * is being used. + */ +void kbase_js_update_ctx_priority(struct kbase_context *kctx); + +/* + * Helpers follow + */ + +/** + * kbasep_js_is_submit_allowed - Check that a context is allowed to submit + * jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, + * and wrap up the long repeated line of code. + * + * As with any bool, never test the return value with true. + * + * The caller must hold hwaccess_lock. + */ +static inline bool kbasep_js_is_submit_allowed( + struct kbasep_js_device_data *js_devdata, + struct kbase_context *kctx) +{ + u16 test_bit; + bool is_allowed; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + test_bit = (u16) (1u << kctx->as_nr); + + is_allowed = (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); + dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %p (as=%d)", + is_allowed ? "is" : "isn't", (void *)kctx, kctx->as_nr); + return is_allowed; +} + +/** + * kbasep_js_set_submit_allowed - Allow a context to submit jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, + * and wrap up the long repeated line of code. + * + * The caller must hold hwaccess_lock. + */ +static inline void kbasep_js_set_submit_allowed( + struct kbasep_js_device_data *js_devdata, + struct kbase_context *kctx) +{ + u16 set_bit; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + set_bit = (u16) (1u << kctx->as_nr); + + dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", + kctx, kctx->as_nr); + + js_devdata->runpool_irq.submit_allowed |= set_bit; +} + +/** + * kbasep_js_clear_submit_allowed - Prevent a context from submitting more + * jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, + * and wrap up the long repeated line of code. + * + * The caller must hold hwaccess_lock. + */ +static inline void kbasep_js_clear_submit_allowed( + struct kbasep_js_device_data *js_devdata, + struct kbase_context *kctx) +{ + u16 clear_bit; + u16 clear_mask; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + clear_bit = (u16) (1u << kctx->as_nr); + clear_mask = ~clear_bit; + + dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", + kctx, kctx->as_nr); + + js_devdata->runpool_irq.submit_allowed &= clear_mask; +} + +/** + * Create an initial 'invalid' atom retained state, that requires no + * atom-related work to be done on releasing with + * kbasep_js_runpool_release_ctx_and_katom_retained_state() + */ +static inline void kbasep_js_atom_retained_state_init_invalid( + struct kbasep_js_atom_retained_state *retained_state) +{ + retained_state->event_code = BASE_JD_EVENT_NOT_STARTED; + retained_state->core_req = + KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID; +} + +/** + * Copy atom state that can be made available after jd_done_nolock() is called + * on that atom. + */ +static inline void kbasep_js_atom_retained_state_copy( + struct kbasep_js_atom_retained_state *retained_state, + const struct kbase_jd_atom *katom) +{ + retained_state->event_code = katom->event_code; + retained_state->core_req = katom->core_req; + retained_state->sched_priority = katom->sched_priority; + retained_state->device_nr = katom->device_nr; +} + +/** + * kbasep_js_has_atom_finished - Determine whether an atom has finished + * (given its retained state), + * and so should be given back to + * userspace/removed from the system. + * + * @katom_retained_state: the retained state of the atom to check + * + * Reasons for an atom not finishing include: + * * Being soft-stopped (and so, the atom should be resubmitted sometime later) + * * It is an end of renderpass atom that was run to consume the output of a + * start-of-renderpass atom that was soft-stopped because it used too much + * memory. In this case, it will have to be run again later. + * + * Return: false if the atom has not finished, true otherwise. + */ +static inline bool kbasep_js_has_atom_finished( + const struct kbasep_js_atom_retained_state *katom_retained_state) +{ + return (bool) (katom_retained_state->event_code != + BASE_JD_EVENT_STOPPED && + katom_retained_state->event_code != + BASE_JD_EVENT_REMOVED_FROM_NEXT && + katom_retained_state->event_code != + BASE_JD_EVENT_END_RP_DONE); +} + +/** + * kbasep_js_atom_retained_state_is_valid - Determine whether a struct + * kbasep_js_atom_retained_state + * is valid + * @katom_retained_state the atom's retained state to check + * + * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates + * that the code should just ignore it. + * + * Return: false if the retained state is invalid, true otherwise. + */ +static inline bool kbasep_js_atom_retained_state_is_valid( + const struct kbasep_js_atom_retained_state *katom_retained_state) +{ + return (bool) (katom_retained_state->core_req != + KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID); +} + +/** + * kbase_js_runpool_inc_context_count - Increment number of running contexts. + * + * The following locking conditions are made on the caller: + * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. + * * The caller must hold the kbasep_js_device_data::runpool_mutex + */ +static inline void kbase_js_runpool_inc_context_count( + struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + + /* Track total contexts */ + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX); + ++(js_devdata->nr_all_contexts_running); + + if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { + /* Track contexts that can submit jobs */ + KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running < + S8_MAX); + ++(js_devdata->nr_user_contexts_running); + } +} + +/** + * kbase_js_runpool_dec_context_count - decrement number of running contexts. + * + * The following locking conditions are made on the caller: + * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. + * * The caller must hold the kbasep_js_device_data::runpool_mutex + */ +static inline void kbase_js_runpool_dec_context_count( + struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + + /* Track total contexts */ + --(js_devdata->nr_all_contexts_running); + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0); + + if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { + /* Track contexts that can submit jobs */ + --(js_devdata->nr_user_contexts_running); + KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0); + } +} + +/** + * kbase_js_sched_all - Submit atoms from all available contexts to all + * job slots. + * + * @kbdev: Device pointer + * + * This will attempt to submit as many jobs as possible. It will exit when + * either all job slots are full, or all contexts have been used. + */ +static inline void kbase_js_sched_all(struct kbase_device *kbdev) +{ + kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); +} + +extern const int +kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS]; + +extern const base_jd_prio +kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + +/** + * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio) + * to relative ordering + * @atom_prio: Priority ID to translate. + * + * Atom priority values for @ref base_jd_prio cannot be compared directly to + * find out which are higher or lower. + * + * This function will convert base_jd_prio values for successively lower + * priorities into a monotonically increasing sequence. That is, the lower the + * base_jd_prio priority, the higher the value produced by this function. This + * is in accordance with how the rest of the kernel treats priority. + * + * The mapping is 1:1 and the size of the valid input range is the same as the + * size of the valid output range, i.e. + * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS + * + * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions + * + * Return: On success: a value in the inclusive range + * 0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure: + * KBASE_JS_ATOM_SCHED_PRIO_INVALID + */ +static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio) +{ + if (atom_prio >= BASE_JD_NR_PRIO_LEVELS) + return KBASE_JS_ATOM_SCHED_PRIO_INVALID; + + return kbasep_js_atom_priority_to_relative[atom_prio]; +} + +static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio) +{ + unsigned int prio_idx; + + KBASE_DEBUG_ASSERT(sched_prio >= 0 && + sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT); + + prio_idx = (unsigned int)sched_prio; + + return kbasep_js_relative_priority_to_atom[prio_idx]; +} + +#endif /* _KBASE_JM_JS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/jm/mali_kbase_js_defs.h b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_js_defs.h new file mode 100644 index 000000000000..0b48615d2ca3 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_js_defs.h @@ -0,0 +1,409 @@ +/* + * + * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/** + * @file mali_kbase_js.h + * Job Scheduler Type Definitions + */ + +#ifndef _KBASE_JS_DEFS_H_ +#define _KBASE_JS_DEFS_H_ + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_js + * @{ + */ +/* Forward decls */ +struct kbase_device; +struct kbase_jd_atom; + + +typedef u32 kbase_context_flags; + +/** Callback function run on all of a context's jobs registered with the Job + * Scheduler */ +typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom); + +/** + * @brief Maximum number of jobs that can be submitted to a job slot whilst + * inside the IRQ handler. + * + * This is important because GPU NULL jobs can complete whilst the IRQ handler + * is running. Otherwise, it potentially allows an unlimited number of GPU NULL + * jobs to be submitted inside the IRQ handler, which increases IRQ latency. + */ +#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2 + +/** + * @brief Context attributes + * + * Each context attribute can be thought of as a boolean value that caches some + * state information about either the runpool, or the context: + * - In the case of the runpool, it is a cache of "Do any contexts owned by + * the runpool have attribute X?" + * - In the case of a context, it is a cache of "Do any atoms owned by the + * context have attribute X?" + * + * The boolean value of the context attributes often affect scheduling + * decisions, such as affinities to use and job slots to use. + * + * To accomodate changes of state in the context, each attribute is refcounted + * in the context, and in the runpool for all running contexts. Specifically: + * - The runpool holds a refcount of how many contexts in the runpool have this + * attribute. + * - The context holds a refcount of how many atoms have this attribute. + */ +enum kbasep_js_ctx_attr { + /** Attribute indicating a context that contains Compute jobs. That is, + * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE + * + * @note A context can be both 'Compute' and 'Non Compute' if it contains + * both types of jobs. + */ + KBASEP_JS_CTX_ATTR_COMPUTE, + + /** Attribute indicating a context that contains Non-Compute jobs. That is, + * the context has some jobs that are \b not of type @ref + * BASE_JD_REQ_ONLY_COMPUTE. + * + * @note A context can be both 'Compute' and 'Non Compute' if it contains + * both types of jobs. + */ + KBASEP_JS_CTX_ATTR_NON_COMPUTE, + + /** Attribute indicating that a context contains compute-job atoms that + * aren't restricted to a coherent group, and can run on all cores. + * + * Specifically, this is when the atom's \a core_req satisfy: + * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 + * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups + * + * Such atoms could be blocked from running if one of the coherent groups + * is being used by another job slot, so tracking this context attribute + * allows us to prevent such situations. + * + * @note This doesn't take into account the 1-coregroup case, where all + * compute atoms would effectively be able to run on 'all cores', but + * contexts will still not always get marked with this attribute. Instead, + * it is the caller's responsibility to take into account the number of + * coregroups when interpreting this attribute. + * + * @note Whilst Tiler atoms are normally combined with + * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without + * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy + * enough to handle anyway. + */ + KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, + + /** Must be the last in the enum */ + KBASEP_JS_CTX_ATTR_COUNT +}; + +enum { + /** Bit indicating that new atom should be started because this atom completed */ + KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0), + /** Bit indicating that the atom was evicted from the JS_NEXT registers */ + KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1) +}; + +/** Combination of KBASE_JS_ATOM_DONE_<...> bits */ +typedef u32 kbasep_js_atom_done_code; + +/* + * Context scheduling mode defines for kbase_device::js_ctx_scheduling_mode + */ +enum { + /* + * In this mode, higher priority atoms will be scheduled first, + * regardless of the context they belong to. Newly-runnable higher + * priority atoms can preempt lower priority atoms currently running on + * the GPU, even if they belong to a different context. + */ + KBASE_JS_SYSTEM_PRIORITY_MODE = 0, + + /* + * In this mode, the highest-priority atom will be chosen from each + * context in turn using a round-robin algorithm, so priority only has + * an effect within the context an atom belongs to. Newly-runnable + * higher priority atoms can preempt the lower priority atoms currently + * running on the GPU, but only if they belong to the same context. + */ + KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE, + + /* Must be the last in the enum */ + KBASE_JS_PRIORITY_MODE_COUNT, +}; + +/* + * Internal atom priority defines for kbase_jd_atom::sched_prio + */ +enum { + KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0, + KBASE_JS_ATOM_SCHED_PRIO_MED, + KBASE_JS_ATOM_SCHED_PRIO_LOW, + KBASE_JS_ATOM_SCHED_PRIO_COUNT, +}; + +/* Invalid priority for kbase_jd_atom::sched_prio */ +#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1 + +/* Default priority in the case of contexts with no atoms, or being lenient + * about invalid priorities from userspace. + */ +#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED + +/** + * @brief KBase Device Data Job Scheduler sub-structure + * + * This encapsulates the current context of the Job Scheduler on a particular + * device. This context is global to the device, and is not tied to any + * particular struct kbase_context running on the device. + * + * nr_contexts_running and as_free are optimized for packing together (by making + * them smaller types than u32). The operations on them should rarely involve + * masking. The use of signed types for arithmetic indicates to the compiler that + * the value will not rollover (which would be undefined behavior), and so under + * the Total License model, it is free to make optimizations based on that (i.e. + * to remove masking). + */ +struct kbasep_js_device_data { + /* Sub-structure to collect together Job Scheduling data used in IRQ + * context. The hwaccess_lock must be held when accessing. */ + struct runpool_irq { + /** Bitvector indicating whether a currently scheduled context is allowed to submit jobs. + * When bit 'N' is set in this, it indicates whether the context bound to address space + * 'N' is allowed to submit jobs. + */ + u16 submit_allowed; + + /** Context Attributes: + * Each is large enough to hold a refcount of the number of contexts + * that can fit into the runpool. This is currently BASE_MAX_NR_AS + * + * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store + * the refcount. Hence, it's not worthwhile reducing this to + * bit-manipulation on u32s to save space (where in contrast, 4 bit + * sub-fields would be easy to do and would save space). + * + * Whilst this must not become negative, the sign bit is used for: + * - error detection in debug builds + * - Optimization: it is undefined for a signed int to overflow, and so + * the compiler can optimize for that never happening (thus, no masking + * is required on updating the variable) */ + s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; + + /* + * Affinity management and tracking + */ + /** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates + * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */ + u64 slot_affinities[BASE_JM_MAX_NR_SLOTS]; + /** Refcount for each core owned by each slot. Used to generate the + * slot_affinities array of bitvectors + * + * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, + * because it is refcounted only when a job is definitely about to be + * submitted to a slot, and is de-refcounted immediately after a job + * finishes */ + s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; + } runpool_irq; + + /** + * Scheduling semaphore. This must be held when calling + * kbase_jm_kick() + */ + struct semaphore schedule_sem; + + /** + * List of contexts that can currently be pulled from + */ + struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + /** + * List of contexts that can not currently be pulled from, but have + * jobs currently running. + */ + struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + + /** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */ + s8 nr_user_contexts_running; + /** Number of currently scheduled contexts (including ones that are not submitting jobs) */ + s8 nr_all_contexts_running; + + /** Core Requirements to match up with base_js_atom's core_req memeber + * @note This is a write-once member, and so no locking is required to read */ + base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS]; + + u32 scheduling_period_ns; /*< Value for JS_SCHEDULING_PERIOD_NS */ + u32 soft_stop_ticks; /*< Value for JS_SOFT_STOP_TICKS */ + u32 soft_stop_ticks_cl; /*< Value for JS_SOFT_STOP_TICKS_CL */ + u32 hard_stop_ticks_ss; /*< Value for JS_HARD_STOP_TICKS_SS */ + u32 hard_stop_ticks_cl; /*< Value for JS_HARD_STOP_TICKS_CL */ + u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */ + u32 gpu_reset_ticks_ss; /*< Value for JS_RESET_TICKS_SS */ + u32 gpu_reset_ticks_cl; /*< Value for JS_RESET_TICKS_CL */ + u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */ + u32 ctx_timeslice_ns; /**< Value for JS_CTX_TIMESLICE_NS */ + + /** List of suspended soft jobs */ + struct list_head suspended_soft_jobs_list; + +#ifdef CONFIG_MALI_DEBUG + /* Support soft-stop on a single context */ + bool softstop_always; +#endif /* CONFIG_MALI_DEBUG */ + + /** The initalized-flag is placed at the end, to avoid cache-pollution (we should + * only be using this during init/term paths). + * @note This is a write-once member, and so no locking is required to read */ + int init_status; + + /* Number of contexts that can currently be pulled from */ + u32 nr_contexts_pullable; + + /* Number of contexts that can either be pulled from or are currently + * running */ + atomic_t nr_contexts_runnable; + + /** Value for JS_SOFT_JOB_TIMEOUT */ + atomic_t soft_job_timeout_ms; + + /** + * Queue Lock, used to access the Policy's queue of contexts + * independently of the Run Pool. + * + * Of course, you don't need the Run Pool lock to access this. + */ + struct mutex queue_mutex; + + /** + * Run Pool mutex, for managing contexts within the runpool. + * Unless otherwise specified, you must hold this lock whilst accessing + * any members that follow + * + * In addition, this is used to access: + * * the kbasep_js_kctx_info::runpool substructure + */ + struct mutex runpool_mutex; +}; + +/** + * @brief KBase Context Job Scheduling information structure + * + * This is a substructure in the struct kbase_context that encapsulates all the + * scheduling information. + */ +struct kbasep_js_kctx_info { + + /** + * Job Scheduler Context information sub-structure. These members are + * accessed regardless of whether the context is: + * - In the Policy's Run Pool + * - In the Policy's Queue + * - Not queued nor in the Run Pool. + * + * You must obtain the jsctx_mutex before accessing any other members of + * this substructure. + * + * You may not access any of these members from IRQ context. + */ + struct kbase_jsctx { + struct mutex jsctx_mutex; /**< Job Scheduler Context lock */ + + /** Number of jobs ready to run - does \em not include the jobs waiting in + * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr + * for such jobs*/ + u32 nr_jobs; + + /** Context Attributes: + * Each is large enough to hold a refcount of the number of atoms on + * the context. **/ + u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; + + /** + * Wait queue to wait for KCTX_SHEDULED flag state changes. + * */ + wait_queue_head_t is_scheduled_wait; + + /** Link implementing JS queues. Context can be present on one + * list per job slot + */ + struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS]; + } ctx; + + /* The initalized-flag is placed at the end, to avoid cache-pollution (we should + * only be using this during init/term paths) */ + int init_status; +}; + +/** Subset of atom state that can be available after jd_done_nolock() is called + * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(), + * because the original atom could disappear. */ +struct kbasep_js_atom_retained_state { + /** Event code - to determine whether the atom has finished */ + enum base_jd_event_code event_code; + /** core requirements */ + base_jd_core_req core_req; + /* priority */ + int sched_priority; + /* Core group atom was executed on */ + u32 device_nr; + +}; + +/** + * Value signifying 'no retry on a slot required' for: + * - kbase_js_atom_retained_state::retry_submit_on_slot + * - kbase_jd_atom::retry_submit_on_slot + */ +#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1) + +/** + * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state. + * + * @see kbase_atom_retained_state_is_valid() + */ +#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP + +/** + * @brief The JS timer resolution, in microseconds + * + * Any non-zero difference in time will be at least this size. + */ +#define KBASEP_JS_TICK_RESOLUTION_US 1 + + + /** @} *//* end group kbase_js */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_JS_DEFS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_base_hwconfig_features.h b/drivers/gpu/arm/b_r26p0/mali_base_hwconfig_features.h new file mode 100644 index 000000000000..6885f8d58066 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_base_hwconfig_features.h @@ -0,0 +1,486 @@ +/* + * + * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, + * please update base/tools/hwconfig_generator/hwc_{issues,features}.py + * For more information see base/tools/hwconfig_generator/README + */ + +#ifndef _BASE_HWCONFIG_FEATURES_H_ +#define _BASE_HWCONFIG_FEATURES_H_ + +enum base_hw_feature { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_TLS_HASHING, + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_generic[] = { + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tMIx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tHEx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tSIx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tDVx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tNOx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_TLS_HASHING, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tGOx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_TLS_HASHING, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tTRx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tNAx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tBEx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tDUx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tODx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tGRx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tVAx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tTUx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tE2x[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +#endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_base_hwconfig_issues.h b/drivers/gpu/arm/b_r26p0/mali_base_hwconfig_issues.h new file mode 100644 index 000000000000..3966069178c1 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_base_hwconfig_issues.h @@ -0,0 +1,635 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, + * please update base/tools/hwconfig_generator/hwc_{issues,features}.py + * For more information see base/tools/hwconfig_generator/README + */ + +#ifndef _BASE_HWCONFIG_ISSUES_H_ +#define _BASE_HWCONFIG_ISSUES_H_ + +enum base_hw_issue { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_TMIX_8463, + BASE_HW_ISSUE_TMIX_8456, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TMIX_8438, + BASE_HW_ISSUE_TNOX_1194, + BASE_HW_ISSUE_TGOX_R1_1234, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TSIX_1792, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_generic[] = { + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_TMIX_8463, + BASE_HW_ISSUE_TMIX_8456, + BASE_HW_ISSUE_TMIX_8438, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_TMIX_8463, + BASE_HW_ISSUE_TMIX_8456, + BASE_HW_ISSUE_TMIX_8438, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_TMIX_8463, + BASE_HW_ISSUE_TMIX_8456, + BASE_HW_ISSUE_TMIX_8438, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tMIx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_TMIX_8456, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tHEx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TSIX_1792, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TSIX_1792, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tSIx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tDVx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TNOX_1194, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tNOx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TNOX_1194, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TGOX_R1_1234, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tGOx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tTRx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tNAx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tBEx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tDUx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tODx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tGRx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tVAx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tTUx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tE2x_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tE2x[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_END +}; + +#endif /* _BASE_HWCONFIG_ISSUES_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_base_kernel.h b/drivers/gpu/arm/b_r26p0/mali_base_kernel.h new file mode 100644 index 000000000000..d45092fcf6f4 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_base_kernel.h @@ -0,0 +1,801 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Base structures shared with the kernel. + */ + +#ifndef _BASE_KERNEL_H_ +#define _BASE_KERNEL_H_ + +struct base_mem_handle { + struct { + u64 handle; + } basep; +}; + +#include "mali_base_mem_priv.h" +#include "gpu/mali_kbase_gpu_coherency.h" +#include "gpu/mali_kbase_gpu_id.h" + +#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 + +#define BASE_MAX_COHERENT_GROUPS 16 + +#if defined CDBG_ASSERT +#define LOCAL_ASSERT CDBG_ASSERT +#elif defined KBASE_DEBUG_ASSERT +#define LOCAL_ASSERT KBASE_DEBUG_ASSERT +#else +#error assert macro not defined! +#endif + +#if defined(PAGE_MASK) && defined(PAGE_SHIFT) +#define LOCAL_PAGE_SHIFT PAGE_SHIFT +#define LOCAL_PAGE_LSB ~PAGE_MASK +#else +#include + +#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2 +#define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2 +#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1) +#else +#error Failed to find page size +#endif +#endif + +/* Physical memory group ID for normal usage. + */ +#define BASE_MEM_GROUP_DEFAULT (0) + +/* Number of physical memory groups. + */ +#define BASE_MEM_GROUP_COUNT (16) + +/** + * typedef base_mem_alloc_flags - Memory allocation, access/hint flags. + * + * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator + * in order to determine the best cache policy. Some combinations are + * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD), + * which defines a write-only region on the CPU side, which is + * heavily read by the CPU... + * Other flags are only meaningful to a particular allocator. + * More flags can be added to this list, as long as they don't clash + * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit). + */ +typedef u32 base_mem_alloc_flags; + +/* A mask for all the flags which are modifiable via the base_mem_set_flags + * interface. + */ +#define BASE_MEM_FLAGS_MODIFIABLE \ + (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ + BASE_MEM_COHERENT_LOCAL) + +/* A mask of all the flags that can be returned via the base_mem_get_flags() + * interface. + */ +#define BASE_MEM_FLAGS_QUERYABLE \ + (BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \ + BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \ + BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \ + BASEP_MEM_FLAGS_KERNEL_ONLY)) + +/** + * enum base_mem_import_type - Memory types supported by @a base_mem_import + * + * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type + * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int) + * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a + * base_mem_import_user_buffer + * + * Each type defines what the supported handle type is. + * + * If any new type is added here ARM must be contacted + * to allocate a numeric value for it. + * Do not just add a new type without synchronizing with ARM + * as future releases from ARM might include other new types + * which could clash with your custom types. + */ +enum base_mem_import_type { + BASE_MEM_IMPORT_TYPE_INVALID = 0, + /** + * Import type with value 1 is deprecated. + */ + BASE_MEM_IMPORT_TYPE_UMM = 2, + BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3 +}; + +/** + * struct base_mem_import_user_buffer - Handle of an imported user buffer + * + * @ptr: address of imported user buffer + * @length: length of imported user buffer in bytes + * + * This structure is used to represent a handle of an imported user buffer. + */ + +struct base_mem_import_user_buffer { + u64 ptr; + u64 length; +}; + +/* Mask to detect 4GB boundary alignment */ +#define BASE_MEM_MASK_4GB 0xfffff000UL +/* Mask to detect 4GB boundary (in page units) alignment */ +#define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT) + +/* Limit on the 'extent' parameter for an allocation with the + * BASE_MEM_TILER_ALIGN_TOP flag set + * + * This is the same as the maximum limit for a Buffer Descriptor's chunk size + */ +#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2 \ + (21u - (LOCAL_PAGE_SHIFT)) +#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES \ + (1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2)) + +/* Bit mask of cookies used for for memory allocation setup */ +#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ + +/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */ +#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */ + +/** + * struct base_fence - Cross-device synchronisation fence. + * + * A fence is used to signal when the GPU has finished accessing a resource that + * may be shared with other devices, and also to delay work done asynchronously + * by the GPU until other devices have finished accessing a shared resource. + */ +struct base_fence { + struct { + int fd; + int stream_fd; + } basep; +}; + +/** + * struct base_mem_aliasing_info - Memory aliasing info + * + * Describes a memory handle to be aliased. + * A subset of the handle can be chosen for aliasing, given an offset and a + * length. + * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a + * region where a special page is mapped with a write-alloc cache setup, + * typically used when the write result of the GPU isn't needed, but the GPU + * must write anyway. + * + * Offset and length are specified in pages. + * Offset must be within the size of the handle. + * Offset+length must not overrun the size of the handle. + * + * @handle: Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE + * @offset: Offset within the handle to start aliasing from, in pages. + * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE. + * @length: Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE + * specifies the number of times the special page is needed. + */ +struct base_mem_aliasing_info { + struct base_mem_handle handle; + u64 offset; + u64 length; +}; + +/* Maximum percentage of just-in-time memory allocation trimming to perform + * on free. + */ +#define BASE_JIT_MAX_TRIM_LEVEL (100) + +/* Maximum number of concurrent just-in-time memory allocations. + */ +#define BASE_JIT_ALLOC_COUNT (255) + +/* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5 + * + * jit_version is 1 + * + * Due to the lack of padding specified, user clients between 32 and 64-bit + * may have assumed a different size of the struct + * + * An array of structures was not supported + */ +struct base_jit_alloc_info_10_2 { + u64 gpu_alloc_addr; + u64 va_pages; + u64 commit_pages; + u64 extent; + u8 id; +}; + +/* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up + * to 11.19 + * + * This structure had a number of modifications during and after kernel driver + * version 11.5, but remains size-compatible throughout its version history, and + * with earlier variants compatible with future variants by requiring + * zero-initialization to the unused space in the structure. + * + * jit_version is 2 + * + * Kernel driver version history: + * 11.5: Initial introduction with 'usage_id' and padding[5]. All padding bytes + * must be zero. Kbase minor version was not incremented, so some + * versions of 11.5 do not have this change. + * 11.5: Added 'bin_id' and 'max_allocations', replacing 2 padding bytes (Kbase + * minor version not incremented) + * 11.6: Added 'flags', replacing 1 padding byte + * 11.10: Arrays of this structure are supported + */ +struct base_jit_alloc_info_11_5 { + u64 gpu_alloc_addr; + u64 va_pages; + u64 commit_pages; + u64 extent; + u8 id; + u8 bin_id; + u8 max_allocations; + u8 flags; + u8 padding[2]; + u16 usage_id; +}; + +/** + * struct base_jit_alloc_info - Structure which describes a JIT allocation + * request. + * @gpu_alloc_addr: The GPU virtual address to write the JIT + * allocated GPU virtual address to. + * @va_pages: The minimum number of virtual pages required. + * @commit_pages: The minimum number of physical pages which + * should back the allocation. + * @extent: Granularity of physical pages to grow the + * allocation by during a fault. + * @id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + * Zero is not a valid value. + * @bin_id: The JIT allocation bin, used in conjunction with + * @max_allocations to limit the number of each + * type of JIT allocation. + * @max_allocations: The maximum number of allocations allowed within + * the bin specified by @bin_id. Should be the same + * for all allocations within the same bin. + * @flags: flags specifying the special requirements for + * the JIT allocation, see + * %BASE_JIT_ALLOC_VALID_FLAGS + * @padding: Expansion space - should be initialised to zero + * @usage_id: A hint about which allocation should be reused. + * The kernel should attempt to use a previous + * allocation with the same usage_id + * @heap_info_gpu_addr: Pointer to an object in GPU memory describing + * the actual usage of the region. + * + * jit_version is 3. + * + * When modifications are made to this structure, it is still compatible with + * jit_version 3 when: a) the size is unchanged, and b) new members only + * replace the padding bytes. + * + * Previous jit_version history: + * jit_version == 1, refer to &base_jit_alloc_info_10_2 + * jit_version == 2, refer to &base_jit_alloc_info_11_5 + * + * Kbase version history: + * 11.20: added @heap_info_gpu_addr + */ +struct base_jit_alloc_info { + u64 gpu_alloc_addr; + u64 va_pages; + u64 commit_pages; + u64 extent; + u8 id; + u8 bin_id; + u8 max_allocations; + u8 flags; + u8 padding[2]; + u16 usage_id; + u64 heap_info_gpu_addr; +}; + +enum base_external_resource_access { + BASE_EXT_RES_ACCESS_SHARED, + BASE_EXT_RES_ACCESS_EXCLUSIVE +}; + +struct base_external_resource { + u64 ext_resource; +}; + + +/** + * The maximum number of external resources which can be mapped/unmapped + * in a single request. + */ +#define BASE_EXT_RES_COUNT_MAX 10 + +/** + * struct base_external_resource_list - Structure which describes a list of + * external resources. + * @count: The number of resources. + * @ext_res: Array of external resources which is + * sized at allocation time. + */ +struct base_external_resource_list { + u64 count; + struct base_external_resource ext_res[1]; +}; + +struct base_jd_debug_copy_buffer { + u64 address; + u64 size; + struct base_external_resource extres; +}; + +#define GPU_MAX_JOB_SLOTS 16 + +/** + * User-side Base GPU Property Queries + * + * The User-side Base GPU Property Query interface encapsulates two + * sub-modules: + * + * - "Dynamic GPU Properties" + * - "Base Platform Config GPU Properties" + * + * Base only deals with properties that vary between different GPU + * implementations - the Dynamic GPU properties and the Platform Config + * properties. + * + * For properties that are constant for the GPU Architecture, refer to the + * GPU module. However, we will discuss their relevance here just to + * provide background information. + * + * About the GPU Properties in Base and GPU modules + * + * The compile-time properties (Platform Config, GPU Compile-time + * properties) are exposed as pre-processor macros. + * + * Complementing the compile-time properties are the Dynamic GPU + * Properties, which act as a conduit for the GPU Configuration + * Discovery. + * + * In general, the dynamic properties are present to verify that the platform + * has been configured correctly with the right set of Platform Config + * Compile-time Properties. + * + * As a consistent guide across the entire DDK, the choice for dynamic or + * compile-time should consider the following, in order: + * 1. Can the code be written so that it doesn't need to know the + * implementation limits at all? + * 2. If you need the limits, get the information from the Dynamic Property + * lookup. This should be done once as you fetch the context, and then cached + * as part of the context data structure, so it's cheap to access. + * 3. If there's a clear and arguable inefficiency in using Dynamic Properties, + * then use a Compile-Time Property (Platform Config, or GPU Compile-time + * property). Examples of where this might be sensible follow: + * - Part of a critical inner-loop + * - Frequent re-use throughout the driver, causing significant extra load + * instructions or control flow that would be worthwhile optimizing out. + * + * We cannot provide an exhaustive set of examples, neither can we provide a + * rule for every possible situation. Use common sense, and think about: what + * the rest of the driver will be doing; how the compiler might represent the + * value if it is a compile-time constant; whether an OEM shipping multiple + * devices would benefit much more from a single DDK binary, instead of + * insignificant micro-optimizations. + * + * Dynamic GPU Properties + * + * Dynamic GPU properties are presented in two sets: + * 1. the commonly used properties in @ref base_gpu_props, which have been + * unpacked from GPU register bitfields. + * 2. The full set of raw, unprocessed properties in gpu_raw_gpu_props + * (also a member of base_gpu_props). All of these are presented in + * the packed form, as presented by the GPU registers themselves. + * + * The raw properties in gpu_raw_gpu_props are necessary to + * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device + * behaving differently?". In this case, all information about the + * configuration is potentially useful, but it does not need to be processed + * by the driver. Instead, the raw registers can be processed by the Mali + * Tools software on the host PC. + * + * The properties returned extend the GPU Configuration Discovery + * registers. For example, GPU clock speed is not specified in the GPU + * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function. + * + * The GPU properties are obtained by a call to + * base_get_gpu_props(). This simply returns a pointer to a const + * base_gpu_props structure. It is constant for the life of a base + * context. Multiple calls to base_get_gpu_props() to a base context + * return the same pointer to a constant structure. This avoids cache pollution + * of the common data. + * + * This pointer must not be freed, because it does not point to the start of a + * region allocated by the memory allocator; instead, just close the @ref + * base_context. + * + * + * Kernel Operation + * + * During Base Context Create time, user-side makes a single kernel call: + * - A call to fill user memory with GPU information structures + * + * The kernel-side will fill the provided the entire processed base_gpu_props + * structure, because this information is required in both + * user and kernel side; it does not make sense to decode it twice. + * + * Coherency groups must be derived from the bitmasks, but this can be done + * kernel side, and just once at kernel startup: Coherency groups must already + * be known kernel-side, to support chains that specify a 'Only Coherent Group' + * SW requirement, or 'Only Coherent Group with Tiler' SW requirement. + * + * Coherency Group calculation + * + * Creation of the coherent group data is done at device-driver startup, and so + * is one-time. This will most likely involve a loop with CLZ, shifting, and + * bit clearing on the L2_PRESENT mask, depending on whether the + * system is L2 Coherent. The number of shader cores is done by a + * population count, since faulty cores may be disabled during production, + * producing a non-contiguous mask. + * + * The memory requirements for this algorithm can be determined either by a u64 + * population count on the L2_PRESENT mask (a LUT helper already is + * required for the above), or simple assumption that there can be no more than + * 16 coherent groups, since core groups are typically 4 cores. + */ + +#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 + +#define BASE_MAX_COHERENT_GROUPS 16 + +struct mali_base_gpu_core_props { + /** + * Product specific value. + */ + u32 product_id; + + /** + * Status of the GPU release. + * No defined values, but starts at 0 and increases by one for each + * release status (alpha, beta, EAC, etc.). + * 4 bit values (0-15). + */ + u16 version_status; + + /** + * Minor release number of the GPU. "P" part of an "RnPn" release number. + * 8 bit values (0-255). + */ + u16 minor_revision; + + /** + * Major release number of the GPU. "R" part of an "RnPn" release number. + * 4 bit values (0-15). + */ + u16 major_revision; + + u16 padding; + + /* The maximum GPU frequency. Reported to applications by + * clGetDeviceInfo() + */ + u32 gpu_freq_khz_max; + + /** + * Size of the shader program counter, in bits. + */ + u32 log2_program_counter_size; + + /** + * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a + * bitpattern where a set bit indicates that the format is supported. + * + * Before using a texture format, it is recommended that the corresponding + * bit be checked. + */ + u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + + /** + * Theoretical maximum memory available to the GPU. It is unlikely that a + * client will be able to allocate all of this memory for their own + * purposes, but this at least provides an upper bound on the memory + * available to the GPU. + * + * This is required for OpenCL's clGetDeviceInfo() call when + * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The + * client will not be expecting to allocate anywhere near this value. + */ + u64 gpu_available_memory_size; + + /** + * The number of execution engines. + */ + u8 num_exec_engines; +}; + +/** + * + * More information is possible - but associativity and bus width are not + * required by upper-level apis. + */ +struct mali_base_gpu_l2_cache_props { + u8 log2_line_size; + u8 log2_cache_size; + u8 num_l2_slices; /* Number of L2C slices. 1 or higher */ + u8 padding[5]; +}; + +struct mali_base_gpu_tiler_props { + u32 bin_size_bytes; /* Max is 4*2^15 */ + u32 max_active_levels; /* Max is 2^15 */ +}; + +/** + * GPU threading system details. + */ +struct mali_base_gpu_thread_props { + u32 max_threads; /* Max. number of threads per core */ + u32 max_workgroup_size; /* Max. number of threads per workgroup */ + u32 max_barrier_size; /* Max. number of threads that can synchronize on a simple barrier */ + u16 max_registers; /* Total size [1..65535] of the register file available per core. */ + u8 max_task_queue; /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */ + u8 max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */ + u8 impl_tech; /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */ + u8 padding[3]; + u32 tls_alloc; /* Number of threads per core that TLS must + * be allocated for + */ +}; + +/** + * struct mali_base_gpu_coherent_group - descriptor for a coherent group + * + * \c core_mask exposes all cores in that coherent group, and \c num_cores + * provides a cached population-count for that mask. + * + * @note Whilst all cores are exposed in the mask, not all may be available to + * the application, depending on the Kernel Power policy. + * + * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage. + */ +struct mali_base_gpu_coherent_group { + u64 core_mask; /**< Core restriction mask required for the group */ + u16 num_cores; /**< Number of cores in the group */ + u16 padding[3]; +}; + +/** + * struct mali_base_gpu_coherent_group_info - Coherency group information + * + * Note that the sizes of the members could be reduced. However, the \c group + * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte + * aligned, thus leading to wastage if the other members sizes were reduced. + * + * The groups are sorted by core mask. The core masks are non-repeating and do + * not intersect. + */ +struct mali_base_gpu_coherent_group_info { + u32 num_groups; + + /** + * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches. + * + * The GPU Counter dumping writes 2048 bytes per core group, regardless of + * whether the core groups are coherent or not. Hence this member is needed + * to calculate how much memory is required for dumping. + * + * @note Do not use it to work out how many valid elements are in the + * group[] member. Use num_groups instead. + */ + u32 num_core_groups; + + /** + * Coherency features of the memory, accessed by gpu_mem_features + * methods + */ + u32 coherency; + + u32 padding; + + /** + * Descriptors of coherent groups + */ + struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS]; +}; + +/** + * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware + * Configuration Discovery registers. + * + * The information is presented inefficiently for access. For frequent access, + * the values should be better expressed in an unpacked form in the + * base_gpu_props structure. + * + * The raw properties in gpu_raw_gpu_props are necessary to + * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device + * behaving differently?". In this case, all information about the + * configuration is potentially useful, but it does not need to be processed + * by the driver. Instead, the raw registers can be processed by the Mali + * Tools software on the host PC. + * + */ +struct gpu_raw_gpu_props { + u64 shader_present; + u64 tiler_present; + u64 l2_present; + u64 stack_present; + + u32 l2_features; + u32 core_features; + u32 mem_features; + u32 mmu_features; + + u32 as_present; + + u32 js_present; + u32 js_features[GPU_MAX_JOB_SLOTS]; + u32 tiler_features; + u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + + u32 gpu_id; + + u32 thread_max_threads; + u32 thread_max_workgroup_size; + u32 thread_max_barrier_size; + u32 thread_features; + + /* + * Note: This is the _selected_ coherency mode rather than the + * available modes as exposed in the coherency_features register. + */ + u32 coherency_mode; + + u32 thread_tls_alloc; +}; + +/** + * struct base_gpu_props - Return structure for base_get_gpu_props(). + * + * NOTE: the raw_props member in this data structure contains the register + * values from which the value of the other members are derived. The derived + * members exist to allow for efficient access and/or shielding the details + * of the layout of the registers. + * + * @unused_1: Keep for backwards compatibility. + * @raw_props: This member is large, likely to be 128 bytes. + * @coherency_info: This must be last member of the structure. + */ +struct base_gpu_props { + struct mali_base_gpu_core_props core_props; + struct mali_base_gpu_l2_cache_props l2_props; + u64 unused_1; + struct mali_base_gpu_tiler_props tiler_props; + struct mali_base_gpu_thread_props thread_props; + struct gpu_raw_gpu_props raw_props; + struct mali_base_gpu_coherent_group_info coherency_info; +}; + +#include "jm/mali_base_jm_kernel.h" + +/** + * base_mem_group_id_get() - Get group ID from flags + * @flags: Flags to pass to base_mem_alloc + * + * This inline function extracts the encoded group ID from flags + * and converts it into numeric value (0~15). + * + * Return: group ID(0~15) extracted from the parameter + */ +static inline int base_mem_group_id_get(base_mem_alloc_flags flags) +{ + LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0); + return (int)((flags & BASE_MEM_GROUP_ID_MASK) >> + BASEP_MEM_GROUP_ID_SHIFT); +} + +/** + * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags + * @id: group ID(0~15) you want to encode + * + * This inline function encodes specific group ID into base_mem_alloc_flags. + * Parameter 'id' should lie in-between 0 to 15. + * + * Return: base_mem_alloc_flags with the group ID (id) encoded + * + * The return value can be combined with other flags against base_mem_alloc + * to identify a specific memory group. + */ +static inline base_mem_alloc_flags base_mem_group_id_set(int id) +{ + LOCAL_ASSERT(id >= 0); + LOCAL_ASSERT(id < BASE_MEM_GROUP_COUNT); + + return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) & + BASE_MEM_GROUP_ID_MASK; +} + +/** + * base_context_mmu_group_id_set - Encode a memory group ID in + * base_context_create_flags + * + * Memory allocated for GPU page tables will come from the specified group. + * + * @group_id: Physical memory group ID. Range is 0..(BASE_MEM_GROUP_COUNT-1). + * + * Return: Bitmask of flags to pass to base_context_init. + */ +static inline base_context_create_flags base_context_mmu_group_id_set( + int const group_id) +{ + LOCAL_ASSERT(group_id >= 0); + LOCAL_ASSERT(group_id < BASE_MEM_GROUP_COUNT); + return BASEP_CONTEXT_MMU_GROUP_ID_MASK & + ((base_context_create_flags)group_id << + BASEP_CONTEXT_MMU_GROUP_ID_SHIFT); +} + +/** + * base_context_mmu_group_id_get - Decode a memory group ID from + * base_context_create_flags + * + * Memory allocated for GPU page tables will come from the returned group. + * + * @flags: Bitmask of flags to pass to base_context_init. + * + * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1). + */ +static inline int base_context_mmu_group_id_get( + base_context_create_flags const flags) +{ + LOCAL_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS)); + return (int)((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >> + BASEP_CONTEXT_MMU_GROUP_ID_SHIFT); +} + +/* + * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These + * flags are also used, where applicable, for specifying which fields + * are valid following the request operation. + */ + +/* For monotonic (counter) timefield */ +#define BASE_TIMEINFO_MONOTONIC_FLAG (1UL << 0) +/* For system wide timestamp */ +#define BASE_TIMEINFO_TIMESTAMP_FLAG (1UL << 1) +/* For GPU cycle counter */ +#define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1UL << 2) +/* Specify kernel GPU register timestamp */ +#define BASE_TIMEINFO_KERNEL_SOURCE_FLAG (1UL << 30) +/* Specify userspace cntvct_el0 timestamp source */ +#define BASE_TIMEINFO_USER_SOURCE_FLAG (1UL << 31) + +#define BASE_TIMEREQUEST_ALLOWED_FLAGS (\ + BASE_TIMEINFO_MONOTONIC_FLAG | \ + BASE_TIMEINFO_TIMESTAMP_FLAG | \ + BASE_TIMEINFO_CYCLE_COUNTER_FLAG | \ + BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \ + BASE_TIMEINFO_USER_SOURCE_FLAG) + +#endif /* _BASE_KERNEL_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_base_mem_priv.h b/drivers/gpu/arm/b_r26p0/mali_base_mem_priv.h new file mode 100644 index 000000000000..844a025b715d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_base_mem_priv.h @@ -0,0 +1,57 @@ +/* + * + * (C) COPYRIGHT 2010-2015, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#ifndef _BASE_MEM_PRIV_H_ +#define _BASE_MEM_PRIV_H_ + +#define BASE_SYNCSET_OP_MSYNC (1U << 0) +#define BASE_SYNCSET_OP_CSYNC (1U << 1) + +/* + * This structure describe a basic memory coherency operation. + * It can either be: + * @li a sync from CPU to Memory: + * - type = ::BASE_SYNCSET_OP_MSYNC + * - mem_handle = a handle to the memory object on which the operation + * is taking place + * - user_addr = the address of the range to be synced + * - size = the amount of data to be synced, in bytes + * - offset is ignored. + * @li a sync from Memory to CPU: + * - type = ::BASE_SYNCSET_OP_CSYNC + * - mem_handle = a handle to the memory object on which the operation + * is taking place + * - user_addr = the address of the range to be synced + * - size = the amount of data to be synced, in bytes. + * - offset is ignored. + */ +struct basep_syncset { + struct base_mem_handle mem_handle; + u64 user_addr; + u64 size; + u8 type; + u8 padding[7]; +}; + +#endif diff --git a/drivers/gpu/arm/b_r26p0/mali_gpu_mem_trace.h b/drivers/gpu/arm/b_r26p0/mali_gpu_mem_trace.h new file mode 100644 index 000000000000..183e6c4d8576 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_gpu_mem_trace.h @@ -0,0 +1,73 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gpu_mem +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mali_gpu_mem_trace + +#if !defined(_TRACE_MALI_GPU_MEM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MALI_GPU_MEM_H + +#include + +/* + * trace_gpu_mem_total + * + * The gpu_memory_total event indicates that there's an update to either the + * global or process total gpu memory counters. + * + * This event should be emitted whenever the kernel device driver allocates, + * frees, imports, unimports memory in the GPU addressable space. + * + * @gpu_id: Kbase device id. + * @pid: This is either the thread group ID of the process for which there was + * an update in the GPU memory usage or 0 so as to indicate an update in + * the device wide GPU memory usage. + * @size: GPU memory usage in bytes. + */ +TRACE_EVENT(gpu_mem_total, + TP_PROTO(uint32_t gpu_id, uint32_t pid, uint64_t size), + + TP_ARGS(gpu_id, pid, size), + + TP_STRUCT__entry( + __field(uint32_t, gpu_id) + __field(uint32_t, pid) + __field(uint64_t, size) + ), + + TP_fast_assign( + __entry->gpu_id = gpu_id; + __entry->pid = pid; + __entry->size = size; + ), + + TP_printk("gpu_id=%u pid=%u size=%llu", + __entry->gpu_id, + __entry->pid, + __entry->size) +); +#endif /* _TRACE_MALI_GPU_MEM_H */ + +/* This part must be outside protection */ +#include diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase.h b/drivers/gpu/arm/b_r26p0/mali_kbase.h new file mode 100644 index 000000000000..34bc91cdfcad --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase.h @@ -0,0 +1,634 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#ifndef _KBASE_H_ +#define _KBASE_H_ + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)) +#include +#endif +#include +#include +#include +#include +#include + +#include "mali_base_kernel.h" +#include + +/* + * Include mali_kbase_defs.h first as this provides types needed by other local + * header files. + */ +#include "mali_kbase_defs.h" + +#include "debug/mali_kbase_debug_ktrace.h" +#include "context/mali_kbase_context.h" +#include "mali_kbase_strings.h" +#include "mali_kbase_mem_lowlevel.h" +#include "mali_kbase_utility.h" +#include "mali_kbase_mem.h" +#include "mmu/mali_kbase_mmu.h" +#include "mali_kbase_gpu_memory_debugfs.h" +#include "mali_kbase_mem_profile_debugfs.h" +#include "mali_kbase_gpuprops.h" +#include "mali_kbase_ioctl.h" +#include "mali_kbase_debug_job_fault.h" +#include "mali_kbase_jd_debugfs.h" +#include "mali_kbase_jm.h" +#include "mali_kbase_js.h" + +#include "ipa/mali_kbase_ipa.h" + +#ifdef CONFIG_GPU_TRACEPOINTS +#include +#endif + +#include "mali_linux_trace.h" + +/* MALI_SEC_INTEGRATION */ +#include + + +#ifndef u64_to_user_ptr +/* Introduced in Linux v4.6 */ +#define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x) +#endif + + +/* Physical memory group ID for a special page which can alias several regions. + */ +#define KBASE_MEM_GROUP_SINK BASE_MEM_GROUP_DEFAULT + +/* + * Kernel-side Base (KBase) APIs + */ + +struct kbase_device *kbase_device_alloc(void); +/* +* note: configuration attributes member of kbdev needs to have +* been setup before calling kbase_device_init +*/ + +int kbase_device_misc_init(struct kbase_device *kbdev); +void kbase_device_misc_term(struct kbase_device *kbdev); +void kbase_device_free(struct kbase_device *kbdev); +int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature); + +/* Needed for gator integration and for reporting vsync information */ +struct kbase_device *kbase_find_device(int minor); +void kbase_release_device(struct kbase_device *kbdev); + +/** + * kbase_context_get_unmapped_area() - get an address range which is currently + * unmapped. + * @kctx: A kernel base context (which has its own GPU address space). + * @addr: CPU mapped address (set to 0 since MAP_FIXED mapping is not allowed + * as Mali GPU driver decides about the mapping). + * @len: Length of the address range. + * @pgoff: Page offset within the GPU address space of the kbase context. + * @flags: Flags for the allocation. + * + * Finds the unmapped address range which satisfies requirements specific to + * GPU and those provided by the call parameters. + * + * 1) Requirement for allocations greater than 2MB: + * - alignment offset is set to 2MB and the alignment mask to 2MB decremented + * by 1. + * + * 2) Requirements imposed for the shader memory alignment: + * - alignment is decided by the number of GPU pc bits which can be read from + * GPU properties of the device associated with this kbase context; alignment + * offset is set to this value in bytes and the alignment mask to the offset + * decremented by 1. + * - allocations must not to be at 4GB boundaries. Such cases are indicated + * by the flag KBASE_REG_GPU_NX not being set (check the flags of the kbase + * region). 4GB boundaries can be checked against @ref BASE_MEM_MASK_4GB. + * + * 3) Requirements imposed for tiler memory alignment, cases indicated by + * the flag @ref KBASE_REG_TILER_ALIGN_TOP (check the flags of the kbase + * region): + * - alignment offset is set to the difference between the kbase region + * extent (converted from the original value in pages to bytes) and the kbase + * region initial_commit (also converted from the original value in pages to + * bytes); alignment mask is set to the kbase region extent in bytes and + * decremented by 1. + * + * Return: if successful, address of the unmapped area aligned as required; + * error code (negative) in case of failure; + */ +unsigned long kbase_context_get_unmapped_area(struct kbase_context *kctx, + const unsigned long addr, const unsigned long len, + const unsigned long pgoff, const unsigned long flags); + + +int assign_irqs(struct kbase_device *kbdev); + +int kbase_sysfs_init(struct kbase_device *kbdev); +void kbase_sysfs_term(struct kbase_device *kbdev); + + +int kbase_protected_mode_init(struct kbase_device *kbdev); +void kbase_protected_mode_term(struct kbase_device *kbdev); + +/** + * kbase_device_pm_init() - Performs power management initialization and + * Verifies device tree configurations. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 if successful, otherwise a standard Linux error code + */ +int kbase_device_pm_init(struct kbase_device *kbdev); + +/** + * kbase_device_pm_term() - Performs power management deinitialization and + * Free resources. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Clean up all the resources + */ +void kbase_device_pm_term(struct kbase_device *kbdev); + + +int power_control_init(struct kbase_device *kbdev); +void power_control_term(struct kbase_device *kbdev); + +#ifdef CONFIG_DEBUG_FS +void kbase_device_debugfs_term(struct kbase_device *kbdev); +int kbase_device_debugfs_init(struct kbase_device *kbdev); +#else /* CONFIG_DEBUG_FS */ +static inline int kbase_device_debugfs_init(struct kbase_device *kbdev) +{ + return 0; +} + +static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } +#endif /* CONFIG_DEBUG_FS */ + +int registers_map(struct kbase_device *kbdev); +void registers_unmap(struct kbase_device *kbdev); + +int kbase_device_coherency_init(struct kbase_device *kbdev); + +#ifdef CONFIG_MALI_BUSLOG +int buslog_init(struct kbase_device *kbdev); +void buslog_term(struct kbase_device *kbdev); +#endif + +int kbase_jd_init(struct kbase_context *kctx); +void kbase_jd_exit(struct kbase_context *kctx); + +/** + * kbase_jd_submit - Submit atoms to the job dispatcher + * + * @kctx: The kbase context to submit to + * @user_addr: The address in user space of the struct base_jd_atom array + * @nr_atoms: The number of atoms in the array + * @stride: sizeof(struct base_jd_atom) + * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6) + * + * Return: 0 on success or error code + */ +int kbase_jd_submit(struct kbase_context *kctx, + void __user *user_addr, u32 nr_atoms, u32 stride, + bool uk6_atom); + +/** + * kbase_jd_done_worker - Handle a job completion + * @data: a &struct work_struct + * + * This function requeues the job from the runpool (if it was soft-stopped or + * removed from NEXT registers). + * + * Removes it from the system if it finished/failed/was cancelled. + * + * Resolves dependencies to add dependent jobs to the context, potentially + * starting them if necessary (which may add more references to the context) + * + * Releases the reference to the context from the no-longer-running job. + * + * Handles retrying submission outside of IRQ context if it failed from within + * IRQ context. + */ +void kbase_jd_done_worker(struct work_struct *data); + +void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, + kbasep_js_atom_done_code done_code); +void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom); +void kbase_jd_zap_context(struct kbase_context *kctx); +bool jd_done_nolock(struct kbase_jd_atom *katom, + struct list_head *completed_jobs_ctx); +void kbase_jd_free_external_resources(struct kbase_jd_atom *katom); +void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom); + +/** + * kbase_job_done - Process completed jobs from job interrupt + * @kbdev: Pointer to the kbase device. + * @done: Bitmask of done or failed jobs, from JOB_IRQ_STAT register + * + * This function processes the completed, or failed, jobs from the GPU job + * slots, for the bits set in the @done bitmask. + * + * The hwaccess_lock must be held when calling this function. + */ +void kbase_job_done(struct kbase_device *kbdev, u32 done); + +/** + * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms + * and soft stop them + * @kctx: Pointer to context to check. + * @katom: Pointer to priority atom. + * + * Atoms from @kctx on the same job slot as @katom, which have lower priority + * than @katom will be soft stopped and put back in the queue, so that atoms + * with higher priority can run. + * + * The hwaccess_lock must be held when calling this function. + */ +void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * kbase_job_slot_softstop_start_rp() - Soft-stop the atom at the start + * of a renderpass. + * @kctx: Pointer to a kernel base context. + * @reg: Reference of a growable GPU memory region in the same context. + * Takes ownership of the reference if successful. + * + * Used to switch to incremental rendering if we have nearly run out of + * virtual address space in a growable memory region and the atom currently + * executing on a job slot is the tiler job chain at the start of a renderpass. + * + * Return 0 if successful, otherwise a negative error code. + */ +int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx, + struct kbase_va_region *reg); + +void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, + struct kbase_jd_atom *target_katom); +void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, + struct kbase_jd_atom *target_katom, u32 sw_flags); +void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, + struct kbase_jd_atom *target_katom); +void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom); +void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, + struct kbase_jd_atom *target_katom); + + +void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event); +int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent); +int kbase_event_pending(struct kbase_context *ctx); +int kbase_event_init(struct kbase_context *kctx); +void kbase_event_close(struct kbase_context *kctx); +void kbase_event_cleanup(struct kbase_context *kctx); +void kbase_event_wakeup(struct kbase_context *kctx); + +/** + * kbasep_jit_alloc_validate() - Validate the JIT allocation info. + * + * @kctx: Pointer to the kbase context within which the JIT + * allocation is to be validated. + * @info: Pointer to struct @base_jit_alloc_info + * which is to be validated. + * @return: 0 if jit allocation is valid; negative error code otherwise + */ +int kbasep_jit_alloc_validate(struct kbase_context *kctx, + struct base_jit_alloc_info *info); + +/** + * kbase_jit_retry_pending_alloc() - Retry blocked just-in-time memory + * allocations. + * + * @kctx: Pointer to the kbase context within which the just-in-time + * memory allocations are to be retried. + */ +void kbase_jit_retry_pending_alloc(struct kbase_context *kctx); + +/** + * kbase_free_user_buffer() - Free memory allocated for struct + * @kbase_debug_copy_buffer. + * + * @buffer: Pointer to the memory location allocated for the object + * of the type struct @kbase_debug_copy_buffer. + */ +static inline void kbase_free_user_buffer( + struct kbase_debug_copy_buffer *buffer) +{ + struct page **pages = buffer->extres_pages; + int nr_pages = buffer->nr_extres_pages; + + if (pages) { + int i; + + for (i = 0; i < nr_pages; i++) { + struct page *pg = pages[i]; + + if (pg) + put_page(pg); + } + kfree(pages); + } +} + +/** + * kbase_mem_copy_from_extres() - Copy from external resources. + * + * @kctx: kbase context within which the copying is to take place. + * @buf_data: Pointer to the information about external resources: + * pages pertaining to the external resource, number of + * pages to copy. + */ +int kbase_mem_copy_from_extres(struct kbase_context *kctx, + struct kbase_debug_copy_buffer *buf_data); +int kbase_process_soft_job(struct kbase_jd_atom *katom); +int kbase_prepare_soft_job(struct kbase_jd_atom *katom); +void kbase_finish_soft_job(struct kbase_jd_atom *katom); +void kbase_cancel_soft_job(struct kbase_jd_atom *katom); +void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev); +void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom); +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom); +#endif +int kbase_soft_event_update(struct kbase_context *kctx, + u64 event, + unsigned char new_status); + +void kbasep_soft_job_timeout_worker(struct timer_list *timer); +void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); + +/* MALI_SEC_INTEGRATION */ +/* api to be ported per OS, only need to do the raw register access */ +void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value); +u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset); + +void kbasep_as_do_poke(struct work_struct *work); + +/** + * Check whether a system suspend is in progress, or has already been suspended + * + * The caller should ensure that either kbdev->pm.active_count_lock is held, or + * a dmb was executed recently (to ensure the value is most + * up-to-date). However, without a lock the value could change afterwards. + * + * @return false if a suspend is not in progress + * @return !=false otherwise + */ +static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) +{ + return kbdev->pm.suspending; +} + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +/* + * Check whether a gpu lost is in progress + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Indicates whether a gpu lost has been received and jobs are no longer + * being scheduled + * + * Return: false if gpu is lost + * Return: != false otherwise + */ +static inline bool kbase_pm_is_gpu_lost(struct kbase_device *kbdev) +{ + return kbdev->pm.gpu_lost; +} +#endif + +/** + * kbase_pm_is_active - Determine whether the GPU is active + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This takes into account whether there is an active context reference. + * + * Return: true if the GPU is active, false otherwise + */ +static inline bool kbase_pm_is_active(struct kbase_device *kbdev) +{ + return kbdev->pm.active_count > 0; +} + +/** + * kbase_pm_metrics_start - Start the utilization metrics timer + * @kbdev: Pointer to the kbase device for which to start the utilization + * metrics calculation thread. + * + * Start the timer that drives the metrics calculation, runs the custom DVFS. + */ +void kbase_pm_metrics_start(struct kbase_device *kbdev); + +/** + * kbase_pm_metrics_stop - Stop the utilization metrics timer + * @kbdev: Pointer to the kbase device for which to stop the utilization + * metrics calculation thread. + * + * Stop the timer that drives the metrics calculation, runs the custom DVFS. + */ +void kbase_pm_metrics_stop(struct kbase_device *kbdev); + +/** + * Return the atom's ID, as was originally supplied by userspace in + * base_jd_atom::atom_number + */ +static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + int result; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(katom->kctx == kctx); + + result = katom - &kctx->jctx.atoms[0]; + KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT); + return result; +} + +/** + * kbase_jd_atom_from_id - Return the atom structure for the given atom ID + * @kctx: Context pointer + * @id: ID of atom to retrieve + * + * Return: Pointer to struct kbase_jd_atom associated with the supplied ID + */ +static inline struct kbase_jd_atom *kbase_jd_atom_from_id( + struct kbase_context *kctx, int id) +{ + return &kctx->jctx.atoms[id]; +} + +/** + * Initialize the disjoint state + * + * The disjoint event count and state are both set to zero. + * + * Disjoint functions usage: + * + * The disjoint event count should be incremented whenever a disjoint event occurs. + * + * There are several cases which are regarded as disjoint behavior. Rather than just increment + * the counter during disjoint events we also increment the counter when jobs may be affected + * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state. + * + * Disjoint state is entered during GPU reset. Increasing the disjoint state also increases + * the count of disjoint events. + * + * The disjoint state is then used to increase the count of disjoint events during job submission + * and job completion. Any atom submitted or completed while the disjoint state is greater than + * zero is regarded as a disjoint event. + * + * The disjoint event counter is also incremented immediately whenever a job is soft stopped + * and during context creation. + * + * @param kbdev The kbase device + * + * Return: 0 on success and non-zero value on failure. + */ +void kbase_disjoint_init(struct kbase_device *kbdev); + +/** + * Increase the count of disjoint events + * called when a disjoint event has happened + * + * @param kbdev The kbase device + */ +void kbase_disjoint_event(struct kbase_device *kbdev); + +/** + * Increase the count of disjoint events only if the GPU is in a disjoint state + * + * This should be called when something happens which could be disjoint if the GPU + * is in a disjoint state. The state refcount keeps track of this. + * + * @param kbdev The kbase device + */ +void kbase_disjoint_event_potential(struct kbase_device *kbdev); + +/** + * Returns the count of disjoint events + * + * @param kbdev The kbase device + * @return the count of disjoint events + */ +u32 kbase_disjoint_event_get(struct kbase_device *kbdev); + +/** + * Increment the refcount state indicating that the GPU is in a disjoint state. + * + * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) + * eventually after the disjoint state has completed @ref kbase_disjoint_state_down + * should be called + * + * @param kbdev The kbase device + */ +void kbase_disjoint_state_up(struct kbase_device *kbdev); + +/** + * Decrement the refcount state + * + * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) + * + * Called after @ref kbase_disjoint_state_up once the disjoint state is over + * + * @param kbdev The kbase device + */ +void kbase_disjoint_state_down(struct kbase_device *kbdev); + +/** + * If a job is soft stopped and the number of contexts is >= this value + * it is reported as a disjoint event + */ +#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2 + +#if !defined(UINT64_MAX) + #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) +#endif + +/* MALI_SEC_INTEGRATION */ +void gpu_dump_register_hooks(struct kbase_device *kbdev); +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) + +/* kbase_io_history_init - initialize data struct for register access history + * + * @kbdev The register history to initialize + * @n The number of register accesses that the buffer could hold + * + * @return 0 if successfully initialized, failure otherwise + */ +int kbase_io_history_init(struct kbase_io_history *h, u16 n); + +/* kbase_io_history_term - uninit all resources for the register access history + * + * @h The register history to terminate + */ +void kbase_io_history_term(struct kbase_io_history *h); + +/* kbase_io_history_dump - print the register history to the kernel ring buffer + * + * @kbdev Pointer to kbase_device containing the register history to dump + */ +void kbase_io_history_dump(struct kbase_device *kbdev); + +/** + * kbase_io_history_resize - resize the register access history buffer. + * + * @h: Pointer to a valid register history to resize + * @new_size: Number of accesses the buffer could hold + * + * A successful resize will clear all recent register accesses. + * If resizing fails for any reason (e.g., could not allocate memory, invalid + * buffer size) then the original buffer will be kept intact. + * + * @return 0 if the buffer was resized, failure otherwise + */ +int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size); + +#else /* CONFIG_DEBUG_FS */ + +#define kbase_io_history_init(...) ((int)0) + +#define kbase_io_history_term CSTD_NOP + +#define kbase_io_history_dump CSTD_NOP + +#define kbase_io_history_resize CSTD_NOP + +#endif /* CONFIG_DEBUG_FS */ + +#endif diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/b_r26p0/mali_kbase_as_fault_debugfs.c new file mode 100644 index 000000000000..2e2e3945c829 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_as_fault_debugfs.c @@ -0,0 +1,113 @@ +/* + * + * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include + +#include +#include +#include + +#ifdef CONFIG_DEBUG_FS +#ifdef CONFIG_MALI_DEBUG + +static int kbase_as_fault_read(struct seq_file *sfile, void *data) +{ + uintptr_t as_no = (uintptr_t) sfile->private; + + struct list_head *entry; + const struct list_head *kbdev_list; + struct kbase_device *kbdev = NULL; + + kbdev_list = kbase_device_get_list(); + + list_for_each(entry, kbdev_list) { + kbdev = list_entry(entry, struct kbase_device, entry); + + if (kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) { + + /* don't show this one again until another fault occors */ + kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no); + + /* output the last page fault addr */ + seq_printf(sfile, "%llu\n", + (u64) kbdev->as[as_no].pf_data.addr); + } + + } + + kbase_device_put_list(kbdev_list); + + return 0; +} + +static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbase_as_fault_read, in->i_private); +} + +static const struct file_operations as_fault_fops = { + .owner = THIS_MODULE, + .open = kbase_as_fault_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif /* CONFIG_MALI_DEBUG */ +#endif /* CONFIG_DEBUG_FS */ + +/* + * Initialize debugfs entry for each address space + */ +void kbase_as_fault_debugfs_init(struct kbase_device *kbdev) +{ +#ifdef CONFIG_DEBUG_FS +#ifdef CONFIG_MALI_DEBUG + uint i; + char as_name[64]; + struct dentry *debugfs_directory; + + kbdev->debugfs_as_read_bitmap = 0ULL; + + KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces); + KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].pf_data.addr) == sizeof(u64)); + + debugfs_directory = debugfs_create_dir("address_spaces", + kbdev->mali_debugfs_directory); + + if (debugfs_directory) { + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i); + debugfs_create_file(as_name, S_IRUGO, + debugfs_directory, + (void *)(uintptr_t)i, + &as_fault_fops); + } + } else { + dev_warn(kbdev->dev, + "unable to create address_spaces debugfs directory"); + } + +#endif /* CONFIG_MALI_DEBUG */ +#endif /* CONFIG_DEBUG_FS */ + return; +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_as_fault_debugfs.h b/drivers/gpu/arm/b_r26p0/mali_kbase_as_fault_debugfs.h new file mode 100644 index 000000000000..496d8b17f240 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_as_fault_debugfs.h @@ -0,0 +1,50 @@ +/* + * + * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_AS_FAULT_DEBUG_FS_H +#define _KBASE_AS_FAULT_DEBUG_FS_H + +/** + * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults + * + * @kbdev: Pointer to kbase_device + */ +void kbase_as_fault_debugfs_init(struct kbase_device *kbdev); + +/** + * kbase_as_fault_debugfs_new() - make the last fault available on debugfs + * + * @kbdev: Pointer to kbase_device + * @as_no: The address space the fault occurred on + */ +static inline void +kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no) +{ +#ifdef CONFIG_DEBUG_FS +#ifdef CONFIG_MALI_DEBUG + kbdev->debugfs_as_read_bitmap |= (1ULL << as_no); +#endif /* CONFIG_DEBUG_FS */ +#endif /* CONFIG_MALI_DEBUG */ + return; +} + +#endif /*_KBASE_AS_FAULT_DEBUG_FS_H*/ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_bits.h b/drivers/gpu/arm/b_r26p0/mali_kbase_bits.h new file mode 100644 index 000000000000..2c110937a792 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_bits.h @@ -0,0 +1,41 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + */ + +#ifndef _KBASE_BITS_H_ +#define _KBASE_BITS_H_ + +#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE) +#include +#else +#include +#endif + +#endif /* _KBASE_BITS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_cache_policy.c b/drivers/gpu/arm/b_r26p0/mali_kbase_cache_policy.c new file mode 100644 index 000000000000..27a03cf02138 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_cache_policy.c @@ -0,0 +1,67 @@ +/* + * + * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Cache Policy API. + */ + +#include "mali_kbase_cache_policy.h" + +/* + * The output flags should be a combination of the following values: + * KBASE_REG_CPU_CACHED: CPU cache should be enabled + * KBASE_REG_GPU_CACHED: GPU cache should be enabled + * + * NOTE: Some components within the GPU might only be able to access memory + * that is KBASE_REG_GPU_CACHED. Refer to the specific GPU implementation for + * more details. + */ +u32 kbase_cache_enabled(u32 flags, u32 nr_pages) +{ + u32 cache_flags = 0; + + CSTD_UNUSED(nr_pages); + + if (!(flags & BASE_MEM_UNCACHED_GPU)) + cache_flags |= KBASE_REG_GPU_CACHED; + + if (flags & BASE_MEM_CACHED_CPU) + cache_flags |= KBASE_REG_CPU_CACHED; + + return cache_flags; +} + + +void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir) +{ + dma_sync_single_for_device(kbdev->dev, handle, size, dir); +} + + +void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir) +{ + dma_sync_single_for_cpu(kbdev->dev, handle, size, dir); +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_cache_policy.h b/drivers/gpu/arm/b_r26p0/mali_kbase_cache_policy.h new file mode 100644 index 000000000000..8a1e5291bf5f --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_cache_policy.h @@ -0,0 +1,50 @@ +/* + * + * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Cache Policy API. + */ + +#ifndef _KBASE_CACHE_POLICY_H_ +#define _KBASE_CACHE_POLICY_H_ + +#include "mali_kbase.h" +#include "mali_base_kernel.h" + +/** + * kbase_cache_enabled - Choose the cache policy for a specific region + * @flags: flags describing attributes of the region + * @nr_pages: total number of pages (backed or not) for the region + * + * Tells whether the CPU and GPU caches should be enabled or not for a specific + * region. + * This function can be modified to customize the cache policy depending on the + * flags and size of the region. + * + * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED + * depending on the cache policy + */ +u32 kbase_cache_enabled(u32 flags, u32 nr_pages); + +#endif /* _KBASE_CACHE_POLICY_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_caps.h b/drivers/gpu/arm/b_r26p0/mali_kbase_caps.h new file mode 100644 index 000000000000..b201a60fa6e3 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_caps.h @@ -0,0 +1,65 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/** + * @file mali_kbase_caps.h + * + * Driver Capability Queries. + */ + +#ifndef _KBASE_CAPS_H_ +#define _KBASE_CAPS_H_ + +#include + +typedef enum mali_kbase_cap { + MALI_KBASE_CAP_SYSTEM_MONITOR = 0, + MALI_KBASE_CAP_JIT_PRESSURE_LIMIT, + MALI_KBASE_CAP_MEM_GROW_ON_GPF, + MALI_KBASE_CAP_MEM_PROTECTED, + MALI_KBASE_NUM_CAPS +} mali_kbase_cap; + +extern bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap); + +static inline bool mali_kbase_supports_system_monitor(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_SYSTEM_MONITOR); +} + +static inline bool mali_kbase_supports_jit_pressure_limit(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT); +} + +static inline bool mali_kbase_supports_mem_grow_on_gpf(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_GROW_ON_GPF); +} + +static inline bool mali_kbase_supports_mem_protected(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_PROTECTED); +} + +#endif /* __KBASE_CAPS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_ccswe.c b/drivers/gpu/arm/b_r26p0/mali_kbase_ccswe.c new file mode 100644 index 000000000000..87d5aaa6bb5d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_ccswe.c @@ -0,0 +1,105 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_ccswe.h" +#include "mali_kbase_linux.h" + +#include +#include + +static u64 kbasep_ccswe_cycle_at_no_lock( + struct kbase_ccswe *self, u64 timestamp_ns) +{ + s64 diff_s, diff_ns; + u32 gpu_freq; + + lockdep_assert_held(&self->access); + + diff_ns = timestamp_ns - self->timestamp_ns; + gpu_freq = diff_ns > 0 ? self->gpu_freq : self->prev_gpu_freq; + + diff_s = div_s64(diff_ns, NSEC_PER_SEC); + diff_ns -= diff_s * NSEC_PER_SEC; + + return self->cycles_elapsed + diff_s * gpu_freq + + div_s64(diff_ns * gpu_freq, NSEC_PER_SEC); +} + +void kbase_ccswe_init(struct kbase_ccswe *self) +{ + memset(self, 0, sizeof(*self)); + + spin_lock_init(&self->access); +} +KBASE_EXPORT_TEST_API(kbase_ccswe_init); + +u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns) +{ + unsigned long flags; + u64 result; + + spin_lock_irqsave(&self->access, flags); + result = kbasep_ccswe_cycle_at_no_lock(self, timestamp_ns); + spin_unlock_irqrestore(&self->access, flags); + + return result; +} +KBASE_EXPORT_TEST_API(kbase_ccswe_cycle_at); + +void kbase_ccswe_freq_change( + struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq) +{ + unsigned long flags; + + spin_lock_irqsave(&self->access, flags); + + /* The time must go only forward. */ + if (WARN_ON(timestamp_ns < self->timestamp_ns)) + goto exit; + + /* If this is the first frequency change, cycles_elapsed is zero. */ + if (self->timestamp_ns) + self->cycles_elapsed = kbasep_ccswe_cycle_at_no_lock( + self, timestamp_ns); + + self->timestamp_ns = timestamp_ns; + self->prev_gpu_freq = self->gpu_freq; + self->gpu_freq = gpu_freq; +exit: + spin_unlock_irqrestore(&self->access, flags); +} +KBASE_EXPORT_TEST_API(kbase_ccswe_freq_change); + +void kbase_ccswe_reset(struct kbase_ccswe *self) +{ + unsigned long flags; + + spin_lock_irqsave(&self->access, flags); + + self->timestamp_ns = 0; + self->cycles_elapsed = 0; + self->gpu_freq = 0; + self->prev_gpu_freq = 0; + + spin_unlock_irqrestore(&self->access, flags); +} + diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_ccswe.h b/drivers/gpu/arm/b_r26p0/mali_kbase_ccswe.h new file mode 100644 index 000000000000..3a7cf73d9eac --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_ccswe.h @@ -0,0 +1,97 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_CCSWE_H_ +#define _KBASE_CCSWE_H_ + +#include + +/** + * struct kbase_ccswe - Cycle count software estimator. + * + * @access: Spinlock protecting this structure access. + * @timestamp_ns: Timestamp(ns) when the last frequency change + * occurred. + * @cycles_elapsed: Number of cycles elapsed before the last frequency + * change + * @gpu_freq: Current GPU frequency(Hz) value. + * @prev_gpu_freq: Previous GPU frequency(Hz) before the last frequency + * change. + */ +struct kbase_ccswe { + spinlock_t access; + u64 timestamp_ns; + u64 cycles_elapsed; + u32 gpu_freq; + u32 prev_gpu_freq; +}; + +/** + * kbase_ccswe_init() - initialize the cycle count estimator. + * + * @self: Cycles count software estimator instance. + */ +void kbase_ccswe_init(struct kbase_ccswe *self); + + +/** + * kbase_ccswe_cycle_at() - Estimate cycle count at given timestamp. + * + * @self: Cycles count software estimator instance. + * @timestamp_ns: The timestamp(ns) for cycle count estimation. + * + * The timestamp must be bigger than the timestamp of the penultimate + * frequency change. If only one frequency change occurred, the + * timestamp must be bigger than the timestamp of the frequency change. + * This is to allow the following code to be executed w/o synchronization. + * If lines below executed atomically, it is safe to assume that only + * one frequency change may happen in between. + * + * u64 ts = ktime_get_raw_ns(); + * u64 cycle = kbase_ccswe_cycle_at(&ccswe, ts) + * + * Returns: estimated value of cycle count at a given time. + */ +u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns); + +/** + * kbase_ccswe_freq_change() - update GPU frequency. + * + * @self: Cycles count software estimator instance. + * @timestamp_ns: Timestamp(ns) when frequency change occurred. + * @gpu_freq: New GPU frequency value. + * + * The timestamp must be bigger than the timestamp of the previous + * frequency change. The function is to be called at the frequency + * change moment (not later). + */ +void kbase_ccswe_freq_change( + struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq); + +/** + * kbase_ccswe_reset() - reset estimator state + * + * @self: Cycles count software estimator instance. + */ +void kbase_ccswe_reset(struct kbase_ccswe *self); + +#endif /* _KBASE_CCSWE_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_config.c b/drivers/gpu/arm/b_r26p0/mali_kbase_config.c new file mode 100644 index 000000000000..ce7070d1d634 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_config.c @@ -0,0 +1,48 @@ +/* + * + * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#include +#include +#include + +int kbasep_platform_device_init(struct kbase_device *kbdev) +{ + struct kbase_platform_funcs_conf *platform_funcs_p; + + platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; + if (platform_funcs_p && platform_funcs_p->platform_init_func) + return platform_funcs_p->platform_init_func(kbdev); + + return 0; +} + +void kbasep_platform_device_term(struct kbase_device *kbdev) +{ + struct kbase_platform_funcs_conf *platform_funcs_p; + + platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; + if (platform_funcs_p && platform_funcs_p->platform_term_func) + platform_funcs_p->platform_term_func(kbdev); +} + diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_config.h b/drivers/gpu/arm/b_r26p0/mali_kbase_config.h new file mode 100644 index 000000000000..fec1e402c180 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_config.h @@ -0,0 +1,398 @@ +/* + * + * (C) COPYRIGHT 2010-2017, 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_config.h + * Configuration API and Attributes for KBase + */ + +#ifndef _KBASE_CONFIG_H_ +#define _KBASE_CONFIG_H_ + +#include +#include +#include +#include + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_config Configuration API and Attributes + * @{ + */ + +/* Forward declaration of struct kbase_device */ +struct kbase_device; + +/** + * kbase_platform_funcs_conf - Specifies platform init/term function pointers + * + * Specifies the functions pointers for platform specific initialization and + * termination. By default no functions are required. No additional platform + * specific control is necessary. + */ +struct kbase_platform_funcs_conf { + /** + * platform_init_func - platform specific init function pointer + * @kbdev - kbase_device pointer + * + * Returns 0 on success, negative error code otherwise. + * + * Function pointer for platform specific initialization or NULL if no + * initialization function is required. At the point this the GPU is + * not active and its power and clocks are in unknown (platform specific + * state) as kbase doesn't yet have control of power and clocks. + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed (and possibly initialized) in here. + */ + int (*platform_init_func)(struct kbase_device *kbdev); + /** + * platform_term_func - platform specific termination function pointer + * @kbdev - kbase_device pointer + * + * Function pointer for platform specific termination or NULL if no + * termination function is required. At the point this the GPU will be + * idle but still powered and clocked. + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed (and possibly terminated) in here. + */ + void (*platform_term_func)(struct kbase_device *kbdev); +}; + +/* + * @brief Specifies the callbacks for power management + * + * By default no callbacks will be made and the GPU must not be powered off. + */ +struct kbase_pm_callback_conf { + /** Callback for when the GPU is idle and the power to it can be switched off. + * + * The system integrator can decide whether to either do nothing, just switch off + * the clocks to the GPU, or to completely power down the GPU. + * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the + * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + */ + void (*power_off_callback)(struct kbase_device *kbdev); + + /** Callback for when the GPU is about to become active and power must be supplied. + * + * This function must not return until the GPU is powered and clocked sufficiently for register access to + * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback. + * If the GPU state has been lost then this function must return 1, otherwise it should return 0. + * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the + * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * + * The return value of the first call to this function is ignored. + * + * @return 1 if the GPU state may have been lost, 0 otherwise. + */ + int (*power_on_callback)(struct kbase_device *kbdev); + + /** Callback for when the system is requesting a suspend and GPU power + * must be switched off. + * + * Note that if this callback is present, then this may be called + * without a preceding call to power_off_callback. Therefore this + * callback must be able to take any action that might otherwise happen + * in power_off_callback. + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed and modified in here. It is the platform \em + * callbacks responsibility to initialize and terminate this pointer if + * used (see @ref kbase_platform_funcs_conf). + */ + void (*power_suspend_callback)(struct kbase_device *kbdev); + + /** Callback for when the system is resuming from a suspend and GPU + * power must be switched on. + * + * Note that if this callback is present, then this may be called + * without a following call to power_on_callback. Therefore this + * callback must be able to take any action that might otherwise happen + * in power_on_callback. + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed and modified in here. It is the platform \em + * callbacks responsibility to initialize and terminate this pointer if + * used (see @ref kbase_platform_funcs_conf). + */ + void (*power_resume_callback)(struct kbase_device *kbdev); + + /** Callback for handling runtime power management initialization. + * + * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback + * will become active from calls made to the OS from within this function. + * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + * + * @return 0 on success, else int error code. + */ + int (*power_runtime_init_callback)(struct kbase_device *kbdev); + + /** Callback for handling runtime power management termination. + * + * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback + * should no longer be called by the OS on completion of this function. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + */ + void (*power_runtime_term_callback)(struct kbase_device *kbdev); + + /** Callback for runtime power-off power management callback + * + * For linux this callback will be called by the kernel runtime_suspend callback. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + * + * @return 0 on success, else OS error code. + */ + void (*power_runtime_off_callback)(struct kbase_device *kbdev); + + /** Callback for runtime power-on power management callback + * + * For linux this callback will be called by the kernel runtime_resume callback. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + */ + int (*power_runtime_on_callback)(struct kbase_device *kbdev); + + /* + * Optional callback for checking if GPU can be suspended when idle + * + * This callback will be called by the runtime power management core + * when the reference count goes to 0 to provide notification that the + * GPU now seems idle. + * + * If this callback finds that the GPU can't be powered off, or handles + * suspend by powering off directly or queueing up a power off, a + * non-zero value must be returned to prevent the runtime PM core from + * also triggering a suspend. + * + * Returning 0 will cause the runtime PM core to conduct a regular + * autosuspend. + * + * This callback is optional and if not provided regular autosuspend + * will be triggered. + * + * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use + * this feature. + * + * Return 0 if GPU can be suspended, positive value if it can not be + * suspeneded by runtime PM, else OS error code + */ + int (*power_runtime_idle_callback)(struct kbase_device *kbdev); + + /* + * Optional callback for software reset + * + * This callback will be called by the power management core to trigger + * a GPU soft reset. + * + * Return 0 if the soft reset was successful and the RESET_COMPLETED + * interrupt will be raised, or a positive value if the interrupt won't + * be raised. On error, return the corresponding OS error code. + */ + int (*soft_reset_callback)(struct kbase_device *kbdev); + + /* MALI_SEC_INTEGRATION */ + /** Callback for GPU DVFS handler start/stop + **/ + int (*power_dvfs_on_callback)(struct kbase_device *kbdev); +}; + +/* struct kbase_gpu_clk_notifier_data - Data for clock rate change notifier. + * + * Pointer to this structure is supposed to be passed to the gpu clock rate + * change notifier function. This structure is deliberately aligned with the + * common clock framework notification structure 'struct clk_notifier_data' + * and such alignment should be maintained. + * + * @gpu_clk_handle: Handle of the GPU clock for which notifier was registered. + * @old_rate: Previous rate of this GPU clock. + * @new_rate: New rate of this GPU clock. + */ +struct kbase_gpu_clk_notifier_data { + void *gpu_clk_handle; + unsigned long old_rate; + unsigned long new_rate; +}; + +/** + * kbase_clk_rate_trace_op_conf - Specifies GPU clock rate trace operations. + * + * Specifies the functions pointers for platform specific GPU clock rate trace + * operations. By default no functions are required. + */ +struct kbase_clk_rate_trace_op_conf { + /** + * enumerate_gpu_clk - Enumerate a GPU clock on the given index + * @kbdev - kbase_device pointer + * @index - GPU clock index + * + * Returns a handle unique to the given GPU clock, or NULL if the clock + * array has been exhausted at the given index value. + * + * Kbase will use this function pointer to enumerate the existence of a + * GPU clock on the given index. + */ + void *(*enumerate_gpu_clk)(struct kbase_device *kbdev, + unsigned int index); + + /** + * get_gpu_clk_rate - Get the current rate for an enumerated clock. + * @kbdev - kbase_device pointer + * @gpu_clk_handle - Handle unique to the enumerated GPU clock + * + * Returns current rate of the GPU clock in unit of Hz. + */ + unsigned long (*get_gpu_clk_rate)(struct kbase_device *kbdev, + void *gpu_clk_handle); + + /** + * gpu_clk_notifier_register - Register a clock rate change notifier. + * @kbdev - kbase_device pointer + * @gpu_clk_handle - Handle unique to the enumerated GPU clock + * @nb - notifier block containing the callback function + * pointer + * + * Returns 0 on success, negative error code otherwise. + * + * This function pointer is used to register a callback function that + * is supposed to be invoked whenever the rate of clock corresponding + * to @gpu_clk_handle changes. + * @nb contains the pointer to callback function. + * The callback function expects the pointer of type + * 'struct kbase_gpu_clk_notifier_data' as the third argument. + */ + int (*gpu_clk_notifier_register)(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb); + + /** + * gpu_clk_notifier_unregister - Unregister clock rate change notifier + * @kbdev - kbase_device pointer + * @gpu_clk_handle - Handle unique to the enumerated GPU clock + * @nb - notifier block containing the callback function + * pointer + * + * This function pointer is used to unregister a callback function that + * was previously registered to get notified of the change in rate + * of clock corresponding to @gpu_clk_handle. + */ + void (*gpu_clk_notifier_unregister)(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb); +}; + +#ifdef CONFIG_OF +struct kbase_platform_config { +}; +#else + +/* + * @brief Specifies start and end of I/O memory region. + */ +struct kbase_io_memory_region { + u64 start; + u64 end; +}; + +/* + * @brief Specifies I/O related resources like IRQs and memory region for I/O operations. + */ +struct kbase_io_resources { + u32 job_irq_number; + u32 mmu_irq_number; + u32 gpu_irq_number; + struct kbase_io_memory_region io_memory_region; +}; + +struct kbase_platform_config { + const struct kbase_io_resources *io_resources; +}; + +#endif /* CONFIG_OF */ + +/** + * @brief Gets the pointer to platform config. + * + * @return Pointer to the platform config + */ +struct kbase_platform_config *kbase_get_platform_config(void); + +/** + * kbasep_platform_device_init: - Platform specific call to initialize hardware + * @kbdev: kbase device pointer + * + * Function calls a platform defined routine if specified in the configuration + * attributes. The routine can initialize any hardware and context state that + * is required for the GPU block to function. + * + * Return: 0 if no errors have been found in the config. + * Negative error code otherwise. + */ +int kbasep_platform_device_init(struct kbase_device *kbdev); + +/** + * kbasep_platform_device_term - Platform specific call to terminate hardware + * @kbdev: Kbase device pointer + * + * Function calls a platform defined routine if specified in the configuration + * attributes. The routine can destroy any platform specific context state and + * shut down any hardware functionality that are outside of the Power Management + * callbacks. + * + */ +void kbasep_platform_device_term(struct kbase_device *kbdev); + +#ifndef CONFIG_OF +/** + * kbase_platform_register - Register a platform device for the GPU + * + * This can be used to register a platform device on systems where device tree + * is not enabled and the platform initialisation code in the kernel doesn't + * create the GPU device. Where possible device tree should be used instead. + * + * Return: 0 for success, any other fail causes module initialisation to fail + */ +int kbase_platform_register(void); + +/** + * kbase_platform_unregister - Unregister a fake platform device + * + * Unregister the platform device created with kbase_platform_register() + */ +void kbase_platform_unregister(void); +#endif + + /** @} *//* end group kbase_config */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_CONFIG_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_config_defaults.h b/drivers/gpu/arm/b_r26p0/mali_kbase_config_defaults.h new file mode 100644 index 000000000000..e079281127ab --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_config_defaults.h @@ -0,0 +1,213 @@ +/* + * + * (C) COPYRIGHT 2013-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_config_defaults.h + * + * Default values for configuration settings + * + */ + +#ifndef _KBASE_CONFIG_DEFAULTS_H_ +#define _KBASE_CONFIG_DEFAULTS_H_ + +/* Include mandatory definitions per platform */ +#include + +enum { + /** + * Use unrestricted Address ID width on the AXI bus. + */ + KBASE_AID_32 = 0x0, + + /** + * Restrict GPU to a half of maximum Address ID count. + * This will reduce performance, but reduce bus load due to GPU. + */ + KBASE_AID_16 = 0x3, + + /** + * Restrict GPU to a quarter of maximum Address ID count. + * This will reduce performance, but reduce bus load due to GPU. + */ + KBASE_AID_8 = 0x2, + + /** + * Restrict GPU to an eighth of maximum Address ID count. + * This will reduce performance, but reduce bus load due to GPU. + */ + KBASE_AID_4 = 0x1 +}; + +enum { + /** + * Use unrestricted Address ID width on the AXI bus. + * Restricting ID width will reduce performance & bus load due to GPU. + */ + KBASE_3BIT_AID_32 = 0x0, + + /* Restrict GPU to 7/8 of maximum Address ID count. */ + KBASE_3BIT_AID_28 = 0x1, + + /* Restrict GPU to 3/4 of maximum Address ID count. */ + KBASE_3BIT_AID_24 = 0x2, + + /* Restrict GPU to 5/8 of maximum Address ID count. */ + KBASE_3BIT_AID_20 = 0x3, + + /* Restrict GPU to 1/2 of maximum Address ID count. */ + KBASE_3BIT_AID_16 = 0x4, + + /* Restrict GPU to 3/8 of maximum Address ID count. */ + KBASE_3BIT_AID_12 = 0x5, + + /* Restrict GPU to 1/4 of maximum Address ID count. */ + KBASE_3BIT_AID_8 = 0x6, + + /* Restrict GPU to 1/8 of maximum Address ID count. */ + KBASE_3BIT_AID_4 = 0x7 +}; + +/** + * Default period for DVFS sampling + */ +#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */ + +/** + * Power Management poweroff tick granuality. This is in nanoseconds to + * allow HR timer support. + * + * On each scheduling tick, the power manager core may decide to: + * -# Power off one or more shader cores + * -# Power off the entire GPU + */ +#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */ + +/** + * Power Manager number of ticks before shader cores are powered off + */ +#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */ + +/** + * Default scheduling tick granuality + */ +#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */ + +/** + * Default minimum number of scheduling ticks before jobs are soft-stopped. + * + * This defines the time-slice for a job (which may be different from that of a + * context) + */ +#define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */ + +/** + * Default minimum number of scheduling ticks before CL jobs are soft-stopped. + */ +#define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */ + +/** + * Default minimum number of scheduling ticks before jobs are hard-stopped + */ +#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */ + +/** + * Default minimum number of scheduling ticks before CL jobs are hard-stopped. + */ +#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */ + +/** + * Default minimum number of scheduling ticks before jobs are hard-stopped + * during dumping + */ +#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ + +/** + * Default timeout for some software jobs, after which the software event wait + * jobs will be cancelled. + */ +#define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */ + +/** + * Default minimum number of scheduling ticks before the GPU is reset to clear a + * "stuck" job + */ +#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */ + +/** + * Default minimum number of scheduling ticks before the GPU is reset to clear a + * "stuck" CL job. + */ +#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */ + +/** + * Default minimum number of scheduling ticks before the GPU is reset to clear a + * "stuck" job during dumping. + */ +#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */ + +/** + * Default number of milliseconds given for other jobs on the GPU to be + * soft-stopped when the GPU needs to be reset. + */ +#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */ + +/** + * Default timeslice that a context is scheduled in for, in nanoseconds. + * + * When a context has used up this amount of time across its jobs, it is + * scheduled out to let another run. + * + * @note the resolution is nanoseconds (ns) here, because that's the format + * often used by the OS. + */ +#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */ + +/** + * Maximum frequency (in kHz) that the GPU can be clocked. For some platforms + * this isn't available, so we simply define a dummy value here. If devfreq + * is enabled the value will be read from there, otherwise this should be + * overridden by defining GPU_FREQ_KHZ_MAX in the platform file. + */ +#define DEFAULT_GPU_FREQ_KHZ_MAX (5000) + +/** + * Default timeout for task execution on an endpoint + * + * Number of GPU clock cycles before the driver terminates a task that is + * making no forward progress on an endpoint (e.g. shader core). + * Value chosen is equivalent to the time after which a job is hard stopped + * which is 5 seconds (assuming the GPU is usually clocked at ~500 MHZ). + */ +#define DEFAULT_PROGRESS_TIMEOUT ((u64)5 * 500 * 1024 * 1024) + +/** + * Default threshold at which to switch to incremental rendering + * + * Fraction of the maximum size of an allocation that grows on GPU page fault + * that can be used up before the driver switches to incremental rendering, + * in 256ths. 0 means disable incremental rendering. + */ +#define DEFAULT_IR_THRESHOLD (192) + +#endif /* _KBASE_CONFIG_DEFAULTS_H_ */ + diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_core_linux.c b/drivers/gpu/arm/b_r26p0/mali_kbase_core_linux.c new file mode 100644 index 000000000000..49837bcfaa02 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_core_linux.c @@ -0,0 +1,4763 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include +#include +#ifdef CONFIG_MALI_DEVFREQ +#include +#include +#ifdef CONFIG_DEVFREQ_THERMAL +#include +#endif /* CONFIG_DEVFREQ_THERMAL */ +#endif /* CONFIG_MALI_DEVFREQ */ +#ifdef CONFIG_MALI_NO_MALI +#include "mali_kbase_model_linux.h" +#include +#endif /* CONFIG_MALI_NO_MALI */ +#include "mali_kbase_mem_profile_debugfs_buf_size.h" +#include "mali_kbase_debug_mem_view.h" +#include "mali_kbase_mem.h" +#include "mali_kbase_mem_pool_debugfs.h" +#include "mali_kbase_debugfs_helper.h" +#if !MALI_CUSTOMER_RELEASE +#include "mali_kbase_regs_dump_debugfs.h" +#endif /* !MALI_CUSTOMER_RELEASE */ +#include "mali_kbase_regs_history_debugfs.h" +#include +#include +#include +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +#include +#endif +#include +#include +#include +#include "mali_kbase_ioctl.h" +#include "mali_kbase_kinstr_jm.h" +#include "mali_kbase_hwcnt_context.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_legacy.h" +#include "mali_kbase_vinstr.h" +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include "arbiter/mali_kbase_arbiter_pm.h" +#endif + +#include "mali_kbase_cs_experimental.h" + +/* MALI_SEC_INTEGRATION */ +#include + +#ifdef CONFIG_MALI_CINSTR_GWT +#include "mali_kbase_gwt.h" +#endif +#include "mali_kbase_pm_internal.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* is_compat_task/in_compat_syscall */ +#include +#include +#include +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#include +#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ +#include +#include +#include +#include + +#include + + +#if (KERNEL_VERSION(3, 13, 0) <= LINUX_VERSION_CODE) +#include +#else +#include +#endif + +#include + +#include + +#include +#include +#include + +/* MALI_SEC_INTEGRATION */ +#include + +/* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_ASV_CALIBRATION_SUPPORT +#include "./platform/exynos/gpu_control.h" +#endif + +/* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_DUMMY_JOB_WA +#include +#endif + +#include + +/* GPU IRQ Tags */ +#define JOB_IRQ_TAG 0 +#define MMU_IRQ_TAG 1 +#define GPU_IRQ_TAG 2 + +#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)" + +/** + * Kernel min/maj <=> API Version + */ +#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ + (((minor) & 0xFFF) << 8) | \ + ((0 & 0xFF) << 0)) + +#define KBASE_API_MIN(api_version) ((api_version >> 8) & 0xFFF) +#define KBASE_API_MAJ(api_version) ((api_version >> 20) & 0xFFF) + +/** + * mali_kbase_api_version_to_maj_min - convert an api_version to a min/maj pair + * + * @api_version: API version to convert + * @major: Major version number (must not exceed 12 bits) + * @minor: Major version number (must not exceed 12 bits) + */ +void mali_kbase_api_version_to_maj_min(unsigned long api_version, u16 *maj, u16 *min) +{ + if (WARN_ON(!maj)) + return; + + if (WARN_ON(!min)) + return; + + *maj = KBASE_API_MAJ(api_version); + *min = KBASE_API_MIN(api_version); +} + +/** + * kbase capabilities table + */ +typedef struct mali_kbase_capability_def { + u16 required_major; + u16 required_minor; +} mali_kbase_capability_def; + +/** + * This must be kept in-sync with mali_kbase_cap + * + * TODO: The alternative approach would be to embed the cap enum values + * in the table. Less efficient but potentially safer. + */ +static mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CAPS] = { + { 11, 15 }, /* SYSTEM_MONITOR */ + { 11, 25 }, /* JIT_PRESSURE_LIMIT */ + { 11, 2 }, /* MEM_GROW_ON_GPF */ + { 11, 2 } /* MEM_PROTECTED */ +}; + +/** + * mali_kbase_supports_cap - Query whether a kbase capability is supported + * + * @api_version: API version to convert + * @cap: Capability to query for - see mali_kbase_caps.h + */ +bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap) +{ + bool supported = false; + unsigned long required_ver; + + mali_kbase_capability_def const *cap_def; + + if (WARN_ON(cap < 0)) + return false; + + if (WARN_ON(cap >= MALI_KBASE_NUM_CAPS)) + return false; + + cap_def = &kbase_caps_table[(int)cap]; + required_ver = KBASE_API_VERSION(cap_def->required_major, cap_def->required_minor); + supported = (api_version >= required_ver); + + return supported; +} + +/** + * kbase_file_new - Create an object representing a device file + * + * @kbdev: An instance of the GPU platform device, allocated from the probe + * method of the driver. + * @filp: Pointer to the struct file corresponding to device file + * /dev/malixx instance, passed to the file's open method. + * + * In its initial state, the device file has no context (i.e. no GPU + * address space) and no API version number. Both must be assigned before + * kbase_file_get_kctx_if_setup_complete() can be used successfully. + * + * @return Address of an object representing a simulated device file, or NULL + * on failure. + */ +static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, + struct file *const filp) +{ + struct kbase_file *const kfile = kmalloc(sizeof(*kfile), GFP_KERNEL); + + if (kfile) { + kfile->kbdev = kbdev; + kfile->filp = filp; + kfile->kctx = NULL; + kfile->api_version = 0; + atomic_set(&kfile->setup_state, KBASE_FILE_NEED_VSN); + } + return kfile; +} + +/** + * kbase_file_set_api_version - Set the application programmer interface version + * + * @kfile: A device file created by kbase_file_new() + * @major: Major version number (must not exceed 12 bits) + * @minor: Major version number (must not exceed 12 bits) + * + * An application programmer interface (API) version must be specified + * before calling kbase_file_create_kctx(), otherwise an error is returned. + * + * If a version number was already set for the given @kfile (or is in the + * process of being set by another thread) then an error is returned. + * + * Return: 0 if successful, otherwise a negative error code. + */ +static int kbase_file_set_api_version(struct kbase_file *const kfile, + u16 const major, u16 const minor) +{ + if (WARN_ON(!kfile)) + return -EINVAL; + + /* setup pending, try to signal that we'll do the setup, + * if setup was already in progress, err this call + */ + if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_VSN, + KBASE_FILE_VSN_IN_PROGRESS) != KBASE_FILE_NEED_VSN) + return -EPERM; + + /* save the proposed version number for later use */ + kfile->api_version = KBASE_API_VERSION(major, minor); + + atomic_set(&kfile->setup_state, KBASE_FILE_NEED_CTX); + return 0; +} + +/** + * kbase_file_get_api_version - Get the application programmer interface version + * + * @kfile: A device file created by kbase_file_new() + * + * Return: The version number (encoded with KBASE_API_VERSION) or 0 if none has + * been set. + */ +static unsigned long kbase_file_get_api_version(struct kbase_file *const kfile) +{ + if (WARN_ON(!kfile)) + return 0; + + if (atomic_read(&kfile->setup_state) < KBASE_FILE_NEED_CTX) + return 0; + + return kfile->api_version; +} + +/** + * kbase_file_create_kctx - Create a kernel base context + * + * @kfile: A device file created by kbase_file_new() + * @flags: Flags to set, which can be any combination of + * BASEP_CONTEXT_CREATE_KERNEL_FLAGS. + * + * This creates a new context for the GPU platform device instance that was + * specified when kbase_file_new() was called. Each context has its own GPU + * address space. If a context was already created for the given @kfile (or is + * in the process of being created for it by another thread) then an error is + * returned. + * + * An API version number must have been set by kbase_file_set_api_version() + * before calling this function, otherwise an error is returned. + * + * Return: 0 if a new context was created, otherwise a negative error code. + */ +static int kbase_file_create_kctx(struct kbase_file *kfile, + base_context_create_flags flags); + +/** + * kbase_file_get_kctx_if_setup_complete - Get a kernel base context + * pointer from a device file + * + * @kfile: A device file created by kbase_file_new() + * + * This function returns an error code (encoded with ERR_PTR) if no context + * has been created for the given @kfile. This makes it safe to use in + * circumstances where the order of initialization cannot be enforced, but + * only if the caller checks the return value. + * + * Return: Address of the kernel base context associated with the @kfile, or + * NULL if no context exists. + */ +static struct kbase_context *kbase_file_get_kctx_if_setup_complete( + struct kbase_file *const kfile) +{ + if (WARN_ON(!kfile) || + atomic_read(&kfile->setup_state) != KBASE_FILE_COMPLETE || + WARN_ON(!kfile->kctx)) + return NULL; + + return kfile->kctx; +} + +/** + * kbase_file_delete - Destroy an object representing a device file + * + * @kfile: A device file created by kbase_file_new() + * + * If any context was created for the @kfile then it is destroyed. + */ +static void kbase_file_delete(struct kbase_file *const kfile) +{ + struct kbase_device *kbdev = NULL; + + if (WARN_ON(!kfile)) + return; + + kfile->filp->private_data = NULL; + kbdev = kfile->kbdev; + + if (atomic_read(&kfile->setup_state) == KBASE_FILE_COMPLETE) { + struct kbase_context *kctx = kfile->kctx; + /* MALI_SEC_INTEGRATION */ + struct kbase_context *lookup, *tmp; + +#ifdef CONFIG_DEBUG_FS + kbasep_mem_profile_debugfs_remove(kctx); +#endif + + /* MALI_SEC_INTEGRATION + * look up context list to set destroying_context + */ + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry_safe(lookup, tmp, &kbdev->kctx_list, kctx_list_link) { + if (kctx == lookup) { + kctx->destroying_context = true; + } + } + mutex_unlock(&kbdev->kctx_list_lock); + + mutex_lock(&kctx->legacy_hwcnt_lock); + /* If this client was performing hardware counter dumping and + * did not explicitly detach itself, destroy it now + */ + kbase_hwcnt_legacy_client_destroy(kctx->legacy_hwcnt_cli); + kctx->legacy_hwcnt_cli = NULL; + mutex_unlock(&kctx->legacy_hwcnt_lock); + +#if IS_ENABLED(CONFIG_DEBUG_FS) + kbase_context_debugfs_term(kctx); +#endif + + kbase_destroy_context(kctx); + + dev_dbg(kbdev->dev, "deleted base context\n"); + } + + kbase_release_device(kbdev); + + kfree(kfile); +} + +static int kbase_api_handshake(struct kbase_file *kfile, + struct kbase_ioctl_version_check *version) +{ + int err = 0; + + switch (version->major) { + case BASE_UK_VERSION_MAJOR: + /* set minor to be the lowest common */ + version->minor = min_t(int, BASE_UK_VERSION_MINOR, + (int)version->minor); + break; + default: + /* We return our actual version regardless if it + * matches the version returned by userspace - + * userspace can bail if it can't handle this + * version + */ + version->major = BASE_UK_VERSION_MAJOR; + version->minor = BASE_UK_VERSION_MINOR; + break; + } + + /* save the proposed version number for later use */ + err = kbase_file_set_api_version(kfile, version->major, version->minor); + if (unlikely(err)) + return err; + + /* For backward compatibility, we may need to create the context before + * the flags have been set. Originally it was created on file open + * (with job submission disabled) but we don't support that usage. + */ + if (!mali_kbase_supports_system_monitor(kbase_file_get_api_version(kfile))) + err = kbase_file_create_kctx(kfile, + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED); + + return err; +} + +/** + * enum mali_error - Mali error codes shared with userspace + * + * This is subset of those common Mali errors that can be returned to userspace. + * Values of matching user and kernel space enumerators MUST be the same. + * MALI_ERROR_NONE is guaranteed to be 0. + * + * @MALI_ERROR_NONE: Success + * @MALI_ERROR_OUT_OF_GPU_MEMORY: Not used in the kernel driver + * @MALI_ERROR_OUT_OF_MEMORY: Memory allocation failure + * @MALI_ERROR_FUNCTION_FAILED: Generic error code + */ +enum mali_error { + MALI_ERROR_NONE = 0, + MALI_ERROR_OUT_OF_GPU_MEMORY, + MALI_ERROR_OUT_OF_MEMORY, + MALI_ERROR_FUNCTION_FAILED, +}; + +static struct kbase_device *to_kbase_device(struct device *dev) +{ + return dev_get_drvdata(dev); +} + +int assign_irqs(struct kbase_device *kbdev) +{ + struct platform_device *pdev; + int i; + + if (!kbdev) + return -ENODEV; + + pdev = to_platform_device(kbdev->dev); + /* 3 IRQ resources */ + for (i = 0; i < 3; i++) { + struct resource *irq_res; + int irqtag; + + irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); + if (!irq_res) { + dev_err(kbdev->dev, "No IRQ resource at index %d\n", i); + return -ENOENT; + } + +#ifdef CONFIG_OF + if (!strncasecmp(irq_res->name, "JOB", 4)) { + irqtag = JOB_IRQ_TAG; + } else if (!strncasecmp(irq_res->name, "MMU", 4)) { + irqtag = MMU_IRQ_TAG; + } else if (!strncasecmp(irq_res->name, "GPU", 4)) { + irqtag = GPU_IRQ_TAG; + } else { + dev_err(&pdev->dev, "Invalid irq res name: '%s'\n", + irq_res->name); + return -EINVAL; + } +#else + irqtag = i; +#endif /* CONFIG_OF */ + kbdev->irqs[irqtag].irq = irq_res->start; + kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK; + } + + return 0; +} + +/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */ +struct kbase_device *kbase_find_device(int minor) +{ + struct kbase_device *kbdev = NULL; + struct list_head *entry; + const struct list_head *dev_list = kbase_device_get_list(); + + list_for_each(entry, dev_list) { + struct kbase_device *tmp; + + tmp = list_entry(entry, struct kbase_device, entry); + if (tmp->mdev.minor == minor || minor == -1) { + kbdev = tmp; + get_device(kbdev->dev); + break; + } + } + kbase_device_put_list(dev_list); + + return kbdev; +} +EXPORT_SYMBOL(kbase_find_device); + +void kbase_release_device(struct kbase_device *kbdev) +{ + put_device(kbdev->dev); +} +EXPORT_SYMBOL(kbase_release_device); + +#ifdef CONFIG_DEBUG_FS +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && \ + !(LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 28) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0)) +/* + * Older versions, before v4.6, of the kernel doesn't have + * kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28 + */ +static int kstrtobool_from_user(const char __user *s, size_t count, bool *res) +{ + char buf[4]; + + count = min(count, sizeof(buf) - 1); + + if (copy_from_user(buf, s, count)) + return -EFAULT; + buf[count] = '\0'; + + return strtobool(buf, res); +} +#endif + +static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off) +{ + struct kbase_context *kctx = f->private_data; + int err; + bool value; + + err = kstrtobool_from_user(ubuf, size, &value); + if (err) + return err; + + if (value) + kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); + else + kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE); + + return size; +} + +static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off) +{ + struct kbase_context *kctx = f->private_data; + char buf[32]; + int count; + bool value; + + value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE); + + count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N"); + + return simple_read_from_buffer(ubuf, size, off, buf, count); +} + +static const struct file_operations kbase_infinite_cache_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = write_ctx_infinite_cache, + .read = read_ctx_infinite_cache, +}; + +static ssize_t write_ctx_force_same_va(struct file *f, const char __user *ubuf, + size_t size, loff_t *off) +{ + struct kbase_context *kctx = f->private_data; + int err; + bool value; + + err = kstrtobool_from_user(ubuf, size, &value); + if (err) + return err; + + if (value) { +#if defined(CONFIG_64BIT) + /* 32-bit clients cannot force SAME_VA */ + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + return -EINVAL; + kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); +#else /* defined(CONFIG_64BIT) */ + /* 32-bit clients cannot force SAME_VA */ + return -EINVAL; +#endif /* defined(CONFIG_64BIT) */ + } else { + kbase_ctx_flag_clear(kctx, KCTX_FORCE_SAME_VA); + } + + return size; +} + +static ssize_t read_ctx_force_same_va(struct file *f, char __user *ubuf, + size_t size, loff_t *off) +{ + struct kbase_context *kctx = f->private_data; + char buf[32]; + int count; + bool value; + + value = kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA); + + count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N"); + + return simple_read_from_buffer(ubuf, size, off, buf, count); +} + +static const struct file_operations kbase_force_same_va_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = write_ctx_force_same_va, + .read = read_ctx_force_same_va, +}; +#endif /* CONFIG_DEBUG_FS */ + +static int kbase_file_create_kctx(struct kbase_file *const kfile, + base_context_create_flags const flags) +{ + struct kbase_device *kbdev = NULL; + struct kbase_context *kctx = NULL; +#ifdef CONFIG_DEBUG_FS + char kctx_name[64]; +#endif + + if (WARN_ON(!kfile)) + return -EINVAL; + + /* setup pending, try to signal that we'll do the setup, + * if setup was already in progress, err this call + */ + if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_CTX, + KBASE_FILE_CTX_IN_PROGRESS) != KBASE_FILE_NEED_CTX) + return -EPERM; + + kbdev = kfile->kbdev; + +#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) + kctx = kbase_create_context(kbdev, in_compat_syscall(), + flags, kfile->api_version, kfile->filp); +#else + kctx = kbase_create_context(kbdev, is_compat_task(), + flags, kfile->api_version, kfile->filp); +#endif /* (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) */ + + /* if bad flags, will stay stuck in setup mode */ + if (!kctx) + return -ENOMEM; + + if (kbdev->infinite_cache_active_default) + kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); + +#ifdef CONFIG_DEBUG_FS + snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id); + + kctx->kctx_dentry = debugfs_create_dir(kctx_name, + kbdev->debugfs_ctx_directory); + + if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { + /* we don't treat this as a fail - just warn about it */ + dev_warn(kbdev->dev, "couldn't create debugfs dir for kctx\n"); + } else { +#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE) + /* prevent unprivileged use of debug file system + * in old kernel version + */ + debugfs_create_file("infinite_cache", 0600, kctx->kctx_dentry, + kctx, &kbase_infinite_cache_fops); +#else + debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry, + kctx, &kbase_infinite_cache_fops); +#endif + debugfs_create_file("force_same_va", 0600, kctx->kctx_dentry, + kctx, &kbase_force_same_va_fops); + + mutex_init(&kctx->mem_profile_lock); + + kbase_context_debugfs_init(kctx); + } +#endif /* CONFIG_DEBUG_FS */ + + dev_dbg(kbdev->dev, "created base context\n"); + + kfile->kctx = kctx; + atomic_set(&kfile->setup_state, KBASE_FILE_COMPLETE); + + return 0; +} + +static int kbase_open(struct inode *inode, struct file *filp) +{ + struct kbase_device *kbdev = NULL; + struct kbase_file *kfile; + int ret = 0; + + kbdev = kbase_find_device(iminor(inode)); + + if (!kbdev) + return -ENODEV; + + kfile = kbase_file_new(kbdev, filp); + if (!kfile) { + ret = -ENOMEM; + goto out; + } + + filp->private_data = kfile; + filp->f_mode |= FMODE_UNSIGNED_OFFSET; + + return 0; + + out: + kbase_release_device(kbdev); + return ret; +} + +static int kbase_release(struct inode *inode, struct file *filp) +{ + struct kbase_file *const kfile = filp->private_data; + + kbase_file_delete(kfile); + return 0; +} + +static int kbase_api_set_flags(struct kbase_file *kfile, + struct kbase_ioctl_set_flags *flags) +{ + int err = 0; + unsigned long const api_version = kbase_file_get_api_version(kfile); + struct kbase_context *kctx = NULL; + + /* Validate flags */ + if (flags->create_flags != + (flags->create_flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS)) + return -EINVAL; + + /* For backward compatibility, the context may have been created before + * the flags were set. + */ + if (mali_kbase_supports_system_monitor(api_version)) { + err = kbase_file_create_kctx(kfile, flags->create_flags); + } else { + struct kbasep_js_kctx_info *js_kctx_info = NULL; + unsigned long irq_flags = 0; + + /* If setup is incomplete (e.g. because the API version + * wasn't set) then we have to give up. + */ + kctx = kbase_file_get_kctx_if_setup_complete(kfile); + if (unlikely(!kctx)) + return -EPERM; + + js_kctx_info = &kctx->jctx.sched_info; + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); + /* Translate the flags */ + if ((flags->create_flags & + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) + kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); + + + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + } + + return err; +} + +static int kbase_api_job_submit(struct kbase_context *kctx, + struct kbase_ioctl_job_submit *submit) +{ + return kbase_jd_submit(kctx, u64_to_user_ptr(submit->addr), + submit->nr_atoms, + submit->stride, false); +} + +static int kbase_api_get_gpuprops(struct kbase_context *kctx, + struct kbase_ioctl_get_gpuprops *get_props) +{ + struct kbase_gpu_props *kprops = &kctx->kbdev->gpu_props; + int err; + + if (get_props->flags != 0) { + dev_err(kctx->kbdev->dev, "Unsupported flags to get_gpuprops"); + return -EINVAL; + } + + if (get_props->size == 0) + return kprops->prop_buffer_size; + if (get_props->size < kprops->prop_buffer_size) + return -EINVAL; + + err = copy_to_user(u64_to_user_ptr(get_props->buffer), + kprops->prop_buffer, + kprops->prop_buffer_size); + if (err) + return -EFAULT; + return kprops->prop_buffer_size; +} + +static int kbase_api_post_term(struct kbase_context *kctx) +{ + kbase_event_close(kctx); + return 0; +} + +static int kbase_api_mem_alloc(struct kbase_context *kctx, + union kbase_ioctl_mem_alloc *alloc) +{ + struct kbase_va_region *reg; + u64 flags = alloc->in.flags; + u64 gpu_va; + + rcu_read_lock(); + /* Don't allow memory allocation until user space has set up the + * tracking page (which sets kctx->process_mm). Also catches when we've + * forked. + */ + if (rcu_dereference(kctx->process_mm) != current->mm) { + rcu_read_unlock(); + return -EINVAL; + } + rcu_read_unlock(); + + if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) + return -ENOMEM; + + /* Force SAME_VA if a 64-bit client. + * The only exception is GPU-executable memory if an EXEC_VA zone + * has been initialized. In that case, GPU-executable memory may + * or may not be SAME_VA. + */ + if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && + kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) { + if (!(flags & BASE_MEM_PROT_GPU_EX) || !kbase_has_exec_va_zone(kctx)) + flags |= BASE_MEM_SAME_VA; + } + + + reg = kbase_mem_alloc(kctx, alloc->in.va_pages, + alloc->in.commit_pages, + alloc->in.extent, + &flags, &gpu_va); + + if (!reg) + return -ENOMEM; + + alloc->out.flags = flags; + alloc->out.gpu_va = gpu_va; + + return 0; +} + +static int kbase_api_mem_query(struct kbase_context *kctx, + union kbase_ioctl_mem_query *query) +{ + return kbase_mem_query(kctx, query->in.gpu_addr, + query->in.query, &query->out.value); +} + +static int kbase_api_mem_free(struct kbase_context *kctx, + struct kbase_ioctl_mem_free *free) +{ + return kbase_mem_free(kctx, free->gpu_addr); +} + +static int kbase_api_kinstr_jm_fd(struct kbase_context *kctx, + union kbase_kinstr_jm_fd *arg) +{ + return kbase_kinstr_jm_get_fd(kctx->kinstr_jm, arg); +} + +static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx, + struct kbase_ioctl_hwcnt_reader_setup *setup) +{ + return kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup); +} + +static int kbase_api_hwcnt_enable(struct kbase_context *kctx, + struct kbase_ioctl_hwcnt_enable *enable) +{ + int ret; + + mutex_lock(&kctx->legacy_hwcnt_lock); + if (enable->dump_buffer != 0) { + /* Non-zero dump buffer, so user wants to create the client */ + if (kctx->legacy_hwcnt_cli == NULL) { + ret = kbase_hwcnt_legacy_client_create( + kctx->kbdev->hwcnt_gpu_virt, + enable, + &kctx->legacy_hwcnt_cli); + } else { + /* This context already has a client */ + ret = -EBUSY; + } + } else { + /* Zero dump buffer, so user wants to destroy the client */ + if (kctx->legacy_hwcnt_cli != NULL) { + kbase_hwcnt_legacy_client_destroy( + kctx->legacy_hwcnt_cli); + kctx->legacy_hwcnt_cli = NULL; + ret = 0; + } else { + /* This context has no client to destroy */ + ret = -EINVAL; + } + } + mutex_unlock(&kctx->legacy_hwcnt_lock); + + return ret; +} + +static int kbase_api_hwcnt_dump(struct kbase_context *kctx) +{ + int ret; + + mutex_lock(&kctx->legacy_hwcnt_lock); + ret = kbase_hwcnt_legacy_client_dump(kctx->legacy_hwcnt_cli); + mutex_unlock(&kctx->legacy_hwcnt_lock); + + return ret; +} + +static int kbase_api_hwcnt_clear(struct kbase_context *kctx) +{ + int ret; + + mutex_lock(&kctx->legacy_hwcnt_lock); + ret = kbase_hwcnt_legacy_client_clear(kctx->legacy_hwcnt_cli); + mutex_unlock(&kctx->legacy_hwcnt_lock); + + return ret; +} + +static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx, + union kbase_ioctl_get_cpu_gpu_timeinfo *timeinfo) +{ + u32 flags = timeinfo->in.request_flags; + struct timespec64 ts; + u64 timestamp; + u64 cycle_cnt; + + kbase_pm_context_active(kctx->kbdev); + + kbase_backend_get_gpu_time(kctx->kbdev, + (flags & BASE_TIMEINFO_CYCLE_COUNTER_FLAG) ? &cycle_cnt : NULL, + (flags & BASE_TIMEINFO_TIMESTAMP_FLAG) ? ×tamp : NULL, + (flags & BASE_TIMEINFO_MONOTONIC_FLAG) ? &ts : NULL); + + if (flags & BASE_TIMEINFO_TIMESTAMP_FLAG) + timeinfo->out.timestamp = timestamp; + + if (flags & BASE_TIMEINFO_CYCLE_COUNTER_FLAG) + timeinfo->out.cycle_counter = cycle_cnt; + + if (flags & BASE_TIMEINFO_MONOTONIC_FLAG) { + timeinfo->out.sec = ts.tv_sec; + timeinfo->out.nsec = ts.tv_nsec; + } + + kbase_pm_context_idle(kctx->kbdev); + + return 0; +} + +#ifdef CONFIG_MALI_NO_MALI +static int kbase_api_hwcnt_set(struct kbase_context *kctx, + struct kbase_ioctl_hwcnt_values *values) +{ + gpu_model_set_dummy_prfcnt_sample( + (u32 __user *)(uintptr_t)values->data, + values->size); + + return 0; +} +#endif + +static int kbase_api_disjoint_query(struct kbase_context *kctx, + struct kbase_ioctl_disjoint_query *query) +{ + query->counter = kbase_disjoint_event_get(kctx->kbdev); + + return 0; +} + +static int kbase_api_get_ddk_version(struct kbase_context *kctx, + struct kbase_ioctl_get_ddk_version *version) +{ + int ret; + int len = sizeof(KERNEL_SIDE_DDK_VERSION_STRING); + + if (version->version_buffer == 0) + return len; + + if (version->size < len) + return -EOVERFLOW; + + ret = copy_to_user(u64_to_user_ptr(version->version_buffer), + KERNEL_SIDE_DDK_VERSION_STRING, + sizeof(KERNEL_SIDE_DDK_VERSION_STRING)); + + if (ret) + return -EFAULT; + + return len; +} + +/* Defaults for legacy just-in-time memory allocator initialization + * kernel calls + */ +#define DEFAULT_MAX_JIT_ALLOCATIONS 255 +#define JIT_LEGACY_TRIM_LEVEL (0) /* No trimming */ + +static int kbase_api_mem_jit_init_10_2(struct kbase_context *kctx, + struct kbase_ioctl_mem_jit_init_10_2 *jit_init) +{ + kctx->jit_version = 1; + + /* since no phys_pages parameter, use the maximum: va_pages */ + return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, + DEFAULT_MAX_JIT_ALLOCATIONS, + JIT_LEGACY_TRIM_LEVEL, BASE_MEM_GROUP_DEFAULT, + jit_init->va_pages); +} + +static int kbase_api_mem_jit_init_11_5(struct kbase_context *kctx, + struct kbase_ioctl_mem_jit_init_11_5 *jit_init) +{ + int i; + + kctx->jit_version = 2; + + for (i = 0; i < sizeof(jit_init->padding); i++) { + /* Ensure all padding bytes are 0 for potential future + * extension + */ + if (jit_init->padding[i]) + return -EINVAL; + } + + /* since no phys_pages parameter, use the maximum: va_pages */ + return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, + jit_init->max_allocations, jit_init->trim_level, + jit_init->group_id, jit_init->va_pages); +} + +static int kbase_api_mem_jit_init(struct kbase_context *kctx, + struct kbase_ioctl_mem_jit_init *jit_init) +{ + int i; + + kctx->jit_version = 3; + + for (i = 0; i < sizeof(jit_init->padding); i++) { + /* Ensure all padding bytes are 0 for potential future + * extension + */ + if (jit_init->padding[i]) + return -EINVAL; + } + + return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, + jit_init->max_allocations, jit_init->trim_level, + jit_init->group_id, jit_init->phys_pages); +} + +static int kbase_api_mem_exec_init(struct kbase_context *kctx, + struct kbase_ioctl_mem_exec_init *exec_init) +{ + return kbase_region_tracker_init_exec(kctx, exec_init->va_pages); +} + +static int kbase_api_mem_sync(struct kbase_context *kctx, + struct kbase_ioctl_mem_sync *sync) +{ + struct basep_syncset sset = { + .mem_handle.basep.handle = sync->handle, + .user_addr = sync->user_addr, + .size = sync->size, + .type = sync->type + }; + + return kbase_sync_now(kctx, &sset); +} + +static int kbase_api_mem_find_cpu_offset(struct kbase_context *kctx, + union kbase_ioctl_mem_find_cpu_offset *find) +{ + return kbasep_find_enclosing_cpu_mapping_offset( + kctx, + find->in.cpu_addr, + find->in.size, + &find->out.offset); +} + +static int kbase_api_mem_find_gpu_start_and_offset(struct kbase_context *kctx, + union kbase_ioctl_mem_find_gpu_start_and_offset *find) +{ + return kbasep_find_enclosing_gpu_mapping_start_and_offset( + kctx, + find->in.gpu_addr, + find->in.size, + &find->out.start, + &find->out.offset); +} + +static int kbase_api_get_context_id(struct kbase_context *kctx, + struct kbase_ioctl_get_context_id *info) +{ + info->id = kctx->id; + + return 0; +} + +static int kbase_api_tlstream_acquire(struct kbase_context *kctx, + struct kbase_ioctl_tlstream_acquire *acquire) +{ + return kbase_timeline_io_acquire(kctx->kbdev, acquire->flags); +} + +static int kbase_api_tlstream_flush(struct kbase_context *kctx) +{ + kbase_timeline_streams_flush(kctx->kbdev->timeline); + + return 0; +} + +static int kbase_api_mem_commit(struct kbase_context *kctx, + struct kbase_ioctl_mem_commit *commit) +{ + return kbase_mem_commit(kctx, commit->gpu_addr, commit->pages); +} + +static int kbase_api_mem_alias(struct kbase_context *kctx, + union kbase_ioctl_mem_alias *alias) +{ + struct base_mem_aliasing_info *ai; + u64 flags; + int err; + + if (alias->in.nents == 0 || alias->in.nents > 2048) + return -EINVAL; + + if (alias->in.stride > (U64_MAX / 2048)) + return -EINVAL; + + ai = vmalloc(sizeof(*ai) * alias->in.nents); + if (!ai) + return -ENOMEM; + + err = copy_from_user(ai, + u64_to_user_ptr(alias->in.aliasing_info), + sizeof(*ai) * alias->in.nents); + if (err) { + vfree(ai); + return -EFAULT; + } + + flags = alias->in.flags; + if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) { + vfree(ai); + return -EINVAL; + } + + alias->out.gpu_va = kbase_mem_alias(kctx, &flags, + alias->in.stride, alias->in.nents, + ai, &alias->out.va_pages); + + alias->out.flags = flags; + + vfree(ai); + + if (alias->out.gpu_va == 0) + return -ENOMEM; + + return 0; +} + +static int kbase_api_mem_import(struct kbase_context *kctx, + union kbase_ioctl_mem_import *import) +{ + int ret; + u64 flags = import->in.flags; + + if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) + return -ENOMEM; + + ret = kbase_mem_import(kctx, + import->in.type, + u64_to_user_ptr(import->in.phandle), + import->in.padding, + &import->out.gpu_va, + &import->out.va_pages, + &flags); + + import->out.flags = flags; + + return ret; +} + +static int kbase_api_mem_flags_change(struct kbase_context *kctx, + struct kbase_ioctl_mem_flags_change *change) +{ + if (change->flags & BASEP_MEM_FLAGS_KERNEL_ONLY) + return -ENOMEM; + + return kbase_mem_flags_change(kctx, change->gpu_va, + change->flags, change->mask); +} + +static int kbase_api_stream_create(struct kbase_context *kctx, + struct kbase_ioctl_stream_create *stream) +{ +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + int fd, ret; + + /* Name must be NULL-terminated and padded with NULLs, so check last + * character is NULL + */ + if (stream->name[sizeof(stream->name)-1] != 0) + return -EINVAL; + + ret = kbase_sync_fence_stream_create(stream->name, &fd); + + if (ret) + return ret; + return fd; +#else + return -ENOENT; +#endif +} + +static int kbase_api_fence_validate(struct kbase_context *kctx, + struct kbase_ioctl_fence_validate *validate) +{ +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + return kbase_sync_fence_validate(validate->fd); +#else + return -ENOENT; +#endif +} + +static int kbase_api_mem_profile_add(struct kbase_context *kctx, + struct kbase_ioctl_mem_profile_add *data) +{ + char *buf; + int err; + + if (data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) { + dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big\n"); + return -EINVAL; + } + + buf = kmalloc(data->len, GFP_KERNEL); + if (ZERO_OR_NULL_PTR(buf)) + return -ENOMEM; + + err = copy_from_user(buf, u64_to_user_ptr(data->buffer), + data->len); + if (err) { + kfree(buf); + return -EFAULT; + } + + return kbasep_mem_profile_debugfs_insert(kctx, buf, data->len); +} + +static int kbase_api_soft_event_update(struct kbase_context *kctx, + struct kbase_ioctl_soft_event_update *update) +{ + if (update->flags != 0) + return -EINVAL; + + return kbase_soft_event_update(kctx, update->event, update->new_status); +} + +/* MALI_SEC_INTEGRATION */ +static int kbase_api_combination_boost(struct kbase_context *kctx, + struct kbase_ioctl_slsi_combination_boost_flags *flags) +{ + if (flags->flags == 0) + return -EINVAL; + + return gpu_vendor_dispatch(kctx, flags->flags); +} + +/* MALI_SEC_INTEGRATION */ +static int kbase_api_vk_boost(struct kbase_context *kctx, + struct kbase_ioctl_slsi_vk_boost_flags *flags) +{ + if (flags->flags == 0) + return -EINVAL; + + return gpu_vendor_dispatch(kctx, flags->flags); +} + +/* MALI_SEC_INTEGRATION */ +static int kbase_api_negative_boost(struct kbase_context *kctx, + struct kbase_ioctl_slsi_negative_boost_flags *flags) +{ + if (flags->flags == 0) + return -EINVAL; + + return gpu_vendor_dispatch(kctx, flags->flags); +} + + +static int kbase_api_sticky_resource_map(struct kbase_context *kctx, + struct kbase_ioctl_sticky_resource_map *map) +{ + int ret; + u64 i; + u64 gpu_addr[BASE_EXT_RES_COUNT_MAX]; + + if (!map->count || map->count > BASE_EXT_RES_COUNT_MAX) + return -EOVERFLOW; + + ret = copy_from_user(gpu_addr, u64_to_user_ptr(map->address), + sizeof(u64) * map->count); + + if (ret != 0) + return -EFAULT; + + kbase_gpu_vm_lock(kctx); + + for (i = 0; i < map->count; i++) { + if (!kbase_sticky_resource_acquire(kctx, gpu_addr[i])) { + /* Invalid resource */ + ret = -EINVAL; + break; + } + } + + if (ret != 0) { + while (i > 0) { + i--; + kbase_sticky_resource_release_force(kctx, NULL, gpu_addr[i]); + } + } + + kbase_gpu_vm_unlock(kctx); + + return ret; +} + +static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, + struct kbase_ioctl_sticky_resource_unmap *unmap) +{ + int ret; + u64 i; + u64 gpu_addr[BASE_EXT_RES_COUNT_MAX]; + + if (!unmap->count || unmap->count > BASE_EXT_RES_COUNT_MAX) + return -EOVERFLOW; + + ret = copy_from_user(gpu_addr, u64_to_user_ptr(unmap->address), + sizeof(u64) * unmap->count); + + if (ret != 0) + return -EFAULT; + + kbase_gpu_vm_lock(kctx); + + for (i = 0; i < unmap->count; i++) { + if (!kbase_sticky_resource_release_force(kctx, NULL, gpu_addr[i])) { + /* Invalid resource, but we keep going anyway */ + ret = -EINVAL; + } + } + + kbase_gpu_vm_unlock(kctx); + + return ret; +} + +#if MALI_UNIT_TEST +static int kbase_api_tlstream_test(struct kbase_context *kctx, + struct kbase_ioctl_tlstream_test *test) +{ + kbase_timeline_test( + kctx->kbdev, + test->tpw_count, + test->msg_delay, + test->msg_count, + test->aux_msg); + + return 0; +} + +static int kbase_api_tlstream_stats(struct kbase_context *kctx, + struct kbase_ioctl_tlstream_stats *stats) +{ + kbase_timeline_stats(kctx->kbdev->timeline, + &stats->bytes_collected, + &stats->bytes_generated); + + return 0; +} +#endif /* MALI_UNIT_TEST */ + + +#define KBASE_HANDLE_IOCTL(cmd, function, arg) \ + do { \ + BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ + return function(arg); \ + } while (0) + +#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg) \ + do { \ + type param; \ + int err; \ + BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE); \ + BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ + err = copy_from_user(¶m, uarg, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + return function(arg, ¶m); \ + } while (0) + +#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg) \ + do { \ + type param; \ + int ret, err; \ + BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ); \ + BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ + memset(¶m, 0, sizeof(param)); \ + ret = function(arg, ¶m); \ + err = copy_to_user(uarg, ¶m, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + return ret; \ + } while (0) + +#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg) \ + do { \ + type param; \ + int ret, err; \ + BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE|_IOC_READ)); \ + BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ + err = copy_from_user(¶m, uarg, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + ret = function(arg, ¶m); \ + err = copy_to_user(uarg, ¶m, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + return ret; \ + } while (0) + +static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct kbase_file *const kfile = filp->private_data; + struct kbase_context *kctx = NULL; + struct kbase_device *kbdev = kfile->kbdev; + void __user *uarg = (void __user *)arg; + + /* Only these ioctls are available until setup is complete */ + switch (cmd) { + case KBASE_IOCTL_VERSION_CHECK: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK, + kbase_api_handshake, + struct kbase_ioctl_version_check, + kfile); + break; + + case KBASE_IOCTL_SET_FLAGS: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS, + kbase_api_set_flags, + struct kbase_ioctl_set_flags, + kfile); + break; + } + + kctx = kbase_file_get_kctx_if_setup_complete(kfile); + if (unlikely(!kctx)) + return -EPERM; + + /* Normal ioctls */ + switch (cmd) { + case KBASE_IOCTL_JOB_SUBMIT: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT, + kbase_api_job_submit, + struct kbase_ioctl_job_submit, + kctx); + break; + case KBASE_IOCTL_GET_GPUPROPS: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS, + kbase_api_get_gpuprops, + struct kbase_ioctl_get_gpuprops, + kctx); + break; + case KBASE_IOCTL_POST_TERM: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM, + kbase_api_post_term, + kctx); + break; + case KBASE_IOCTL_MEM_ALLOC: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC, + kbase_api_mem_alloc, + union kbase_ioctl_mem_alloc, + kctx); + break; + case KBASE_IOCTL_MEM_QUERY: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY, + kbase_api_mem_query, + union kbase_ioctl_mem_query, + kctx); + break; + case KBASE_IOCTL_MEM_FREE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE, + kbase_api_mem_free, + struct kbase_ioctl_mem_free, + kctx); + break; + case KBASE_IOCTL_DISJOINT_QUERY: + KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY, + kbase_api_disjoint_query, + struct kbase_ioctl_disjoint_query, + kctx); + break; + case KBASE_IOCTL_GET_DDK_VERSION: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION, + kbase_api_get_ddk_version, + struct kbase_ioctl_get_ddk_version, + kctx); + break; + case KBASE_IOCTL_MEM_JIT_INIT_10_2: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_10_2, + kbase_api_mem_jit_init_10_2, + struct kbase_ioctl_mem_jit_init_10_2, + kctx); + break; + case KBASE_IOCTL_MEM_JIT_INIT_11_5: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_11_5, + kbase_api_mem_jit_init_11_5, + struct kbase_ioctl_mem_jit_init_11_5, + kctx); + break; + case KBASE_IOCTL_MEM_JIT_INIT: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT, + kbase_api_mem_jit_init, + struct kbase_ioctl_mem_jit_init, + kctx); + break; + case KBASE_IOCTL_MEM_EXEC_INIT: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_EXEC_INIT, + kbase_api_mem_exec_init, + struct kbase_ioctl_mem_exec_init, + kctx); + break; + case KBASE_IOCTL_MEM_SYNC: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC, + kbase_api_mem_sync, + struct kbase_ioctl_mem_sync, + kctx); + break; + case KBASE_IOCTL_MEM_FIND_CPU_OFFSET: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET, + kbase_api_mem_find_cpu_offset, + union kbase_ioctl_mem_find_cpu_offset, + kctx); + break; + case KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET, + kbase_api_mem_find_gpu_start_and_offset, + union kbase_ioctl_mem_find_gpu_start_and_offset, + kctx); + break; + case KBASE_IOCTL_GET_CONTEXT_ID: + KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID, + kbase_api_get_context_id, + struct kbase_ioctl_get_context_id, + kctx); + break; + case KBASE_IOCTL_TLSTREAM_ACQUIRE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE, + kbase_api_tlstream_acquire, + struct kbase_ioctl_tlstream_acquire, + kctx); + break; + case KBASE_IOCTL_TLSTREAM_FLUSH: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH, + kbase_api_tlstream_flush, + kctx); + break; + case KBASE_IOCTL_MEM_COMMIT: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT, + kbase_api_mem_commit, + struct kbase_ioctl_mem_commit, + kctx); + break; + case KBASE_IOCTL_MEM_ALIAS: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS, + kbase_api_mem_alias, + union kbase_ioctl_mem_alias, + kctx); + break; + case KBASE_IOCTL_MEM_IMPORT: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT, + kbase_api_mem_import, + union kbase_ioctl_mem_import, + kctx); + break; + case KBASE_IOCTL_MEM_FLAGS_CHANGE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE, + kbase_api_mem_flags_change, + struct kbase_ioctl_mem_flags_change, + kctx); + break; + case KBASE_IOCTL_STREAM_CREATE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE, + kbase_api_stream_create, + struct kbase_ioctl_stream_create, + kctx); + break; + case KBASE_IOCTL_FENCE_VALIDATE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE, + kbase_api_fence_validate, + struct kbase_ioctl_fence_validate, + kctx); + break; + case KBASE_IOCTL_MEM_PROFILE_ADD: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD, + kbase_api_mem_profile_add, + struct kbase_ioctl_mem_profile_add, + kctx); + break; + + case KBASE_IOCTL_SOFT_EVENT_UPDATE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE, + kbase_api_soft_event_update, + struct kbase_ioctl_soft_event_update, + kctx); + break; + + case KBASE_IOCTL_STICKY_RESOURCE_MAP: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP, + kbase_api_sticky_resource_map, + struct kbase_ioctl_sticky_resource_map, + kctx); + break; + case KBASE_IOCTL_STICKY_RESOURCE_UNMAP: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_UNMAP, + kbase_api_sticky_resource_unmap, + struct kbase_ioctl_sticky_resource_unmap, + kctx); + break; + + /* Instrumentation. */ + case KBASE_IOCTL_KINSTR_JM_FD: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_KINSTR_JM_FD, + kbase_api_kinstr_jm_fd, + union kbase_kinstr_jm_fd, + kctx); + break; + case KBASE_IOCTL_HWCNT_READER_SETUP: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP, + kbase_api_hwcnt_reader_setup, + struct kbase_ioctl_hwcnt_reader_setup, + kctx); + break; + case KBASE_IOCTL_HWCNT_ENABLE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE, + kbase_api_hwcnt_enable, + struct kbase_ioctl_hwcnt_enable, + kctx); + break; + case KBASE_IOCTL_HWCNT_DUMP: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP, + kbase_api_hwcnt_dump, + kctx); + break; + case KBASE_IOCTL_HWCNT_CLEAR: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR, + kbase_api_hwcnt_clear, + kctx); + break; + case KBASE_IOCTL_GET_CPU_GPU_TIMEINFO: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_GET_CPU_GPU_TIMEINFO, + kbase_api_get_cpu_gpu_timeinfo, + union kbase_ioctl_get_cpu_gpu_timeinfo, + kctx); + break; +#ifdef CONFIG_MALI_NO_MALI + case KBASE_IOCTL_HWCNT_SET: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET, + kbase_api_hwcnt_set, + struct kbase_ioctl_hwcnt_values, + kctx); + break; +#endif +#ifdef CONFIG_MALI_CINSTR_GWT + case KBASE_IOCTL_CINSTR_GWT_START: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START, + kbase_gpu_gwt_start, + kctx); + break; + case KBASE_IOCTL_CINSTR_GWT_STOP: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP, + kbase_gpu_gwt_stop, + kctx); + break; + case KBASE_IOCTL_CINSTR_GWT_DUMP: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP, + kbase_gpu_gwt_dump, + union kbase_ioctl_cinstr_gwt_dump, + kctx); + break; +#endif +#if MALI_UNIT_TEST + case KBASE_IOCTL_TLSTREAM_TEST: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST, + kbase_api_tlstream_test, + struct kbase_ioctl_tlstream_test, + kctx); + break; + case KBASE_IOCTL_TLSTREAM_STATS: + KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS, + kbase_api_tlstream_stats, + struct kbase_ioctl_tlstream_stats, + kctx); + break; +#endif /* MALI_UNIT_TEST */ + + /* MALI_SEC_INTEGRATION */ + case KBASE_IOCTL_SLSI_COMBINATION_BOOST_FLAGS: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SLSI_COMBINATION_BOOST_FLAGS, + kbase_api_combination_boost, + struct kbase_ioctl_slsi_combination_boost_flags, + kctx); + break; + + /* MALI_SEC_INTEGRATION */ + case KBASE_IOCTL_SLSI_VK_BOOST_FLAGS: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SLSI_VK_BOOST_FLAGS, + kbase_api_vk_boost, + struct kbase_ioctl_slsi_vk_boost_flags, + kctx); + break; + /* MALI_SEC_INTEGRATION */ + case KBASE_IOCTL_SLSI_NEGATIVE_BOOST_FLAGS: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SLSI_NEGATIVE_BOOST_FLAGS, + kbase_api_negative_boost, + struct kbase_ioctl_slsi_negative_boost_flags, + kctx); + break; + } + + dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); + + return -ENOIOCTLCMD; +} + +static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) +{ + struct kbase_file *const kfile = filp->private_data; + struct kbase_context *const kctx = + kbase_file_get_kctx_if_setup_complete(kfile); + struct base_jd_event_v2 uevent; + int out_count = 0; + + if (unlikely(!kctx)) + return -EPERM; + + if (count < sizeof(uevent)) + return -ENOBUFS; + + do { + while (kbase_event_dequeue(kctx, &uevent)) { + if (out_count > 0) + goto out; + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(kctx->event_queue, + kbase_event_pending(kctx)) != 0) + return -ERESTARTSYS; + } + if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) { + if (out_count == 0) + return -EPIPE; + goto out; + } + + if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) + return -EFAULT; + + buf += sizeof(uevent); + out_count++; + count -= sizeof(uevent); + } while (count >= sizeof(uevent)); + + out: + return out_count * sizeof(uevent); +} + +static unsigned int kbase_poll(struct file *filp, poll_table *wait) +{ + struct kbase_file *const kfile = filp->private_data; + struct kbase_context *const kctx = + kbase_file_get_kctx_if_setup_complete(kfile); + + if (unlikely(!kctx)) + return POLLERR; + + poll_wait(filp, &kctx->event_queue, wait); + if (kbase_event_pending(kctx)) + return POLLIN | POLLRDNORM; + + return 0; +} + +void kbase_event_wakeup(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx); + + wake_up_interruptible(&kctx->event_queue); +} + +KBASE_EXPORT_TEST_API(kbase_event_wakeup); + +int kbase_event_pending(struct kbase_context *ctx) +{ + KBASE_DEBUG_ASSERT(ctx); + + return (atomic_read(&ctx->event_count) != 0) || + (atomic_read(&ctx->event_closed) != 0); +} + +KBASE_EXPORT_TEST_API(kbase_event_pending); + +static int kbase_mmap(struct file *const filp, struct vm_area_struct *const vma) +{ + struct kbase_file *const kfile = filp->private_data; + struct kbase_context *const kctx = + kbase_file_get_kctx_if_setup_complete(kfile); + + if (unlikely(!kctx)) + return -EPERM; + + return kbase_context_mmap(kctx, vma); +} + +static int kbase_check_flags(int flags) +{ + /* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always + * closes the file descriptor in a child process. + */ + if (0 == (flags & O_CLOEXEC)) + return -EINVAL; + + return 0; +} + +static unsigned long kbase_get_unmapped_area(struct file *const filp, + const unsigned long addr, const unsigned long len, + const unsigned long pgoff, const unsigned long flags) +{ + struct kbase_file *const kfile = filp->private_data; + struct kbase_context *const kctx = + kbase_file_get_kctx_if_setup_complete(kfile); + + if (unlikely(!kctx)) + return -EPERM; + + return kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags); +} + +static const struct file_operations kbase_fops = { + .owner = THIS_MODULE, + .open = kbase_open, + .release = kbase_release, + .read = kbase_read, + .poll = kbase_poll, + .unlocked_ioctl = kbase_ioctl, + .compat_ioctl = kbase_ioctl, + .mmap = kbase_mmap, + .check_flags = kbase_check_flags, + .get_unmapped_area = kbase_get_unmapped_area, +}; + +/** + * show_policy - Show callback for the power_policy sysfs file. + * + * This function is called to get the contents of the power_policy sysfs + * file. This is a list of the available policies with the currently active one + * surrounded by square brackets. + * + * @dev: The device this sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The output buffer for the sysfs file contents + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf) +{ + struct kbase_device *kbdev; + const struct kbase_pm_policy *current_policy; + const struct kbase_pm_policy *const *policy_list; + int policy_count; + int i; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + current_policy = kbase_pm_get_policy(kbdev); + + policy_count = kbase_pm_list_policies(kbdev, &policy_list); + + for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { + if (policy_list[i] == current_policy) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); + else + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); + } + + if (ret < PAGE_SIZE - 1) { + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + } else { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** + * set_policy - Store callback for the power_policy sysfs file. + * + * This function is called when the power_policy sysfs file is written to. + * It matches the requested policy against the available policies and if a + * matching policy is found calls kbase_pm_set_policy() to change the + * policy. + * + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + const struct kbase_pm_policy *new_policy = NULL; + const struct kbase_pm_policy *const *policy_list; + int policy_count; + int i; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + policy_count = kbase_pm_list_policies(kbdev, &policy_list); + + for (i = 0; i < policy_count; i++) { + if (sysfs_streq(policy_list[i]->name, buf)) { + new_policy = policy_list[i]; + break; + } + } + + if (!new_policy) { + dev_err(dev, "power_policy: policy not found\n"); + return -EINVAL; + } + + kbase_pm_set_policy(kbdev, new_policy); + + return count; +} + +/* + * The sysfs file power_policy. + * + * This is used for obtaining information about the available policies, + * determining which policy is currently active, and changing the active + * policy. + */ +static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy); + +/* + * show_core_mask - Show callback for the core_mask sysfs file. + * + * This function is called to get the contents of the core_mask sysfs file. + * + * @dev: The device this sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The output buffer for the sysfs file contents + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current core mask (JS0) : 0x%llX\n", + kbdev->pm.debug_core_mask[0]); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current core mask (JS1) : 0x%llX\n", + kbdev->pm.debug_core_mask[1]); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current core mask (JS2) : 0x%llX\n", + kbdev->pm.debug_core_mask[2]); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Available core mask : 0x%llX\n", + kbdev->gpu_props.props.raw_props.shader_present); + + return ret; +} + +/** + * set_core_mask - Store callback for the core_mask sysfs file. + * + * This function is called when the core_mask sysfs file is written to. + * + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes to write to the sysfs file + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + u64 new_core_mask[3]; + int items, i; + ssize_t err = count; + unsigned long flags; + u64 shader_present, group0_core_mask; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + items = sscanf(buf, "%llx %llx %llx", + &new_core_mask[0], &new_core_mask[1], + &new_core_mask[2]); + + if (items != 1 && items != 3) { + dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" + "Use format \n" + "or \n"); + err = -EINVAL; + goto end; + } + + if (items == 1) + new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; + + mutex_lock(&kbdev->pm.lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + shader_present = kbdev->gpu_props.props.raw_props.shader_present; + group0_core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + + for (i = 0; i < 3; ++i) { + if ((new_core_mask[i] & shader_present) != new_core_mask[i]) { + dev_err(dev, "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)", + new_core_mask[i], i, shader_present); + err = -EINVAL; + goto unlock; + + } else if (!(new_core_mask[i] & shader_present & kbdev->pm.backend.ca_cores_enabled)) { + dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", + new_core_mask[i], i, + kbdev->gpu_props.props.raw_props.shader_present, + kbdev->pm.backend.ca_cores_enabled); + err = -EINVAL; + goto unlock; + + } else if (!(new_core_mask[i] & group0_core_mask)) { + dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n", + new_core_mask[i], i, group0_core_mask); + err = -EINVAL; + goto unlock; + } + } + + if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || + kbdev->pm.debug_core_mask[1] != + new_core_mask[1] || + kbdev->pm.debug_core_mask[2] != + new_core_mask[2]) { + + kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], + new_core_mask[1], new_core_mask[2]); + } + +unlock: + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->pm.lock); +end: + return err; +} + +/* + * The sysfs file core_mask. + * + * This is used to restrict shader core availability for debugging purposes. + * Reading it will show the current core mask and the mask of cores available. + * Writing to it will set the current core mask. + */ +static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); + +/** + * set_soft_job_timeout - Store callback for the soft_job_timeout sysfs + * file. + * + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The value written to the sysfs file. + * @count: The number of bytes to write to the sysfs file. + * + * This allows setting the timeout for software jobs. Waiting soft event wait + * jobs will be cancelled after this period expires, while soft fence wait jobs + * will print debug information if the fence debug feature is enabled. + * + * This is expressed in milliseconds. + * + * Return: count if the function succeeded. An error code on failure. + */ +static ssize_t set_soft_job_timeout(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int soft_job_timeout_ms; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) || + (soft_job_timeout_ms <= 0)) + return -EINVAL; + + atomic_set(&kbdev->js_data.soft_job_timeout_ms, + soft_job_timeout_ms); + + return count; +} + +/** + * show_soft_job_timeout - Show callback for the soft_job_timeout sysfs + * file. + * + * This will return the timeout for the software jobs. + * + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer for the sysfs file contents. + * + * Return: The number of bytes output to buf. + */ +static ssize_t show_soft_job_timeout(struct device *dev, + struct device_attribute *attr, + char * const buf) +{ + struct kbase_device *kbdev; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + return scnprintf(buf, PAGE_SIZE, "%i\n", + atomic_read(&kbdev->js_data.soft_job_timeout_ms)); +} + +static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR, + show_soft_job_timeout, set_soft_job_timeout); + +static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, + int default_ticks, u32 old_ticks) +{ + if (timeout_ms > 0) { + u64 ticks = timeout_ms * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + if (!ticks) + return 1; + return ticks; + } else if (timeout_ms < 0) { + return default_ticks; + } else { + return old_ticks; + } +} + +/** + * set_js_timeouts - Store callback for the js_timeouts sysfs file. + * + * This function is called to get the contents of the js_timeouts sysfs + * file. This file contains five values separated by whitespace. The values + * are basically the same as %JS_SOFT_STOP_TICKS, %JS_HARD_STOP_TICKS_SS, + * %JS_HARD_STOP_TICKS_DUMPING, %JS_RESET_TICKS_SS, %JS_RESET_TICKS_DUMPING + * configuration values (in that order), with the difference that the js_timeout + * values are expressed in MILLISECONDS. + * + * The js_timeouts sysfile file allows the current values in + * use by the job scheduler to get override. Note that a value needs to + * be other than 0 for it to override the current job scheduler value. + * + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes to write to the sysfs file + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int items; + long js_soft_stop_ms; + long js_soft_stop_ms_cl; + long js_hard_stop_ms_ss; + long js_hard_stop_ms_cl; + long js_hard_stop_ms_dumping; + long js_reset_ms_ss; + long js_reset_ms_cl; + long js_reset_ms_dumping; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld", + &js_soft_stop_ms, &js_soft_stop_ms_cl, + &js_hard_stop_ms_ss, &js_hard_stop_ms_cl, + &js_hard_stop_ms_dumping, &js_reset_ms_ss, + &js_reset_ms_cl, &js_reset_ms_dumping); + + if (items == 8) { + struct kbasep_js_device_data *js_data = &kbdev->js_data; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + +#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\ + js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \ + default, js_data->ticks_name); \ + dev_dbg(kbdev->dev, "Overriding " #ticks_name \ + " with %lu ticks (%lu ms)\n", \ + (unsigned long)js_data->ticks_name, \ + ms_name); \ + } while (0) + + UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms, + DEFAULT_JS_SOFT_STOP_TICKS); + UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl, + DEFAULT_JS_SOFT_STOP_TICKS_CL); + UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss, + DEFAULT_JS_HARD_STOP_TICKS_SS); + UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl, + DEFAULT_JS_HARD_STOP_TICKS_CL); + UPDATE_TIMEOUT(hard_stop_ticks_dumping, + js_hard_stop_ms_dumping, + DEFAULT_JS_HARD_STOP_TICKS_DUMPING); + UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss, + DEFAULT_JS_RESET_TICKS_SS); + UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl, + DEFAULT_JS_RESET_TICKS_CL); + UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping, + DEFAULT_JS_RESET_TICKS_DUMPING); + + kbase_js_set_timeouts(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return count; + } + + dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n" + "Use format \n" + "Write 0 for no change, -1 to restore default timeout\n"); + return -EINVAL; +} + +static unsigned long get_js_timeout_in_ms( + u32 scheduling_period_ns, + u32 ticks) +{ + u64 ms = (u64)ticks * scheduling_period_ns; + + do_div(ms, 1000000UL); + return ms; +} + +/** + * show_js_timeouts - Show callback for the js_timeouts sysfs file. + * + * This function is called to get the contents of the js_timeouts sysfs + * file. It returns the last set values written to the js_timeouts sysfs file. + * If the file didn't get written yet, the values will be current setting in + * use. + * @dev: The device this sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The output buffer for the sysfs file contents + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + unsigned long js_soft_stop_ms; + unsigned long js_soft_stop_ms_cl; + unsigned long js_hard_stop_ms_ss; + unsigned long js_hard_stop_ms_cl; + unsigned long js_hard_stop_ms_dumping; + unsigned long js_reset_ms_ss; + unsigned long js_reset_ms_cl; + unsigned long js_reset_ms_dumping; + u32 scheduling_period_ns; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + scheduling_period_ns = kbdev->js_data.scheduling_period_ns; + +#define GET_TIMEOUT(name) get_js_timeout_in_ms(\ + scheduling_period_ns, \ + kbdev->js_data.name) + + js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks); + js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl); + js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss); + js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl); + js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping); + js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss); + js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl); + js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping); + +#undef GET_TIMEOUT + + ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", + js_soft_stop_ms, js_soft_stop_ms_cl, + js_hard_stop_ms_ss, js_hard_stop_ms_cl, + js_hard_stop_ms_dumping, js_reset_ms_ss, + js_reset_ms_cl, js_reset_ms_dumping); + + if (ret >= PAGE_SIZE) { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/* + * The sysfs file js_timeouts. + * + * This is used to override the current job scheduler values for + * JS_STOP_STOP_TICKS_SS + * JS_STOP_STOP_TICKS_CL + * JS_HARD_STOP_TICKS_SS + * JS_HARD_STOP_TICKS_CL + * JS_HARD_STOP_TICKS_DUMPING + * JS_RESET_TICKS_SS + * JS_RESET_TICKS_CL + * JS_RESET_TICKS_DUMPING. + */ +static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts); + +static u32 get_new_js_timeout( + u32 old_period, + u32 old_ticks, + u32 new_scheduling_period_ns) +{ + u64 ticks = (u64)old_period * (u64)old_ticks; + do_div(ticks, new_scheduling_period_ns); + return ticks?ticks:1; +} + +/** + * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs + * file + * @dev: The device the sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes to write to the sysfs file + * + * This function is called when the js_scheduling_period sysfs file is written + * to. It checks the data written, and if valid updates the js_scheduling_period + * value + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_js_scheduling_period(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int ret; + unsigned int js_scheduling_period; + u32 new_scheduling_period_ns; + u32 old_period; + struct kbasep_js_device_data *js_data; + unsigned long flags; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + js_data = &kbdev->js_data; + + ret = kstrtouint(buf, 0, &js_scheduling_period); + if (ret || !js_scheduling_period) { + dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n" + "Use format \n"); + return -EINVAL; + } + + new_scheduling_period_ns = js_scheduling_period * 1000000; + + /* Update scheduling timeouts */ + mutex_lock(&js_data->runpool_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* If no contexts have been scheduled since js_timeouts was last written + * to, the new timeouts might not have been latched yet. So check if an + * update is pending and use the new values if necessary. */ + + /* Use previous 'new' scheduling period as a base if present. */ + old_period = js_data->scheduling_period_ns; + +#define SET_TIMEOUT(name) \ + (js_data->name = get_new_js_timeout(\ + old_period, \ + kbdev->js_data.name, \ + new_scheduling_period_ns)) + + SET_TIMEOUT(soft_stop_ticks); + SET_TIMEOUT(soft_stop_ticks_cl); + SET_TIMEOUT(hard_stop_ticks_ss); + SET_TIMEOUT(hard_stop_ticks_cl); + SET_TIMEOUT(hard_stop_ticks_dumping); + SET_TIMEOUT(gpu_reset_ticks_ss); + SET_TIMEOUT(gpu_reset_ticks_cl); + SET_TIMEOUT(gpu_reset_ticks_dumping); + +#undef SET_TIMEOUT + + js_data->scheduling_period_ns = new_scheduling_period_ns; + + kbase_js_set_timeouts(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&js_data->runpool_mutex); + + dev_dbg(kbdev->dev, "JS scheduling period: %dms\n", + js_scheduling_period); + + return count; +} + +/** + * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs + * entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current period used for the JS scheduling + * period. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_js_scheduling_period(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + u32 period; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + period = kbdev->js_data.scheduling_period_ns; + + ret = scnprintf(buf, PAGE_SIZE, "%d\n", + period / 1000000); + + return ret; +} + +static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR, + show_js_scheduling_period, set_js_scheduling_period); + + +#ifdef CONFIG_MALI_DEBUG +static ssize_t set_js_softstop_always(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int ret; + int softstop_always; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = kstrtoint(buf, 0, &softstop_always); + if (ret || ((softstop_always != 0) && (softstop_always != 1))) { + dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n" + "Use format \n"); + return -EINVAL; + } + + kbdev->js_data.softstop_always = (bool) softstop_always; + dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n", + (kbdev->js_data.softstop_always) ? + "Enabled" : "Disabled"); + return count; +} + +static ssize_t show_js_softstop_always(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always); + + if (ret >= PAGE_SIZE) { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/* + * By default, soft-stops are disabled when only a single context is present. + * The ability to enable soft-stop when only a single context is present can be + * used for debug and unit-testing purposes. + * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.) + */ +static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always); +#endif /* CONFIG_MALI_DEBUG */ + +#ifdef CONFIG_MALI_DEBUG +typedef void (kbasep_debug_command_func) (struct kbase_device *); + +enum kbasep_debug_command_code { + KBASEP_DEBUG_COMMAND_DUMPTRACE, + + /* This must be the last enum */ + KBASEP_DEBUG_COMMAND_COUNT +}; + +struct kbasep_debug_command { + char *str; + kbasep_debug_command_func *func; +}; + +void kbasep_ktrace_dump_wrapper(struct kbase_device *kbdev) +{ + KBASE_KTRACE_DUMP(kbdev); +} + +/* Debug commands supported by the driver */ +static const struct kbasep_debug_command debug_commands[] = { + { + .str = "dumptrace", + .func = &kbasep_ktrace_dump_wrapper, + } +}; + +/** + * show_debug - Show callback for the debug_command sysfs file. + * + * This function is called to get the contents of the debug_command sysfs + * file. This is a list of the available debug commands, separated by newlines. + * + * @dev: The device this sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The output buffer for the sysfs file contents + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + int i; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str); + + if (ret >= PAGE_SIZE) { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** + * issue_debug - Store callback for the debug_command sysfs file. + * + * This function is called when the debug_command sysfs file is written to. + * It matches the requested command against the available commands, and if + * a matching command is found calls the associated function from + * @debug_commands to issue the command. + * + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes to write to the sysfs file + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int i; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) { + if (sysfs_streq(debug_commands[i].str, buf)) { + debug_commands[i].func(kbdev); + return count; + } + } + + /* Debug Command not found */ + dev_err(dev, "debug_command: command not known\n"); + return -EINVAL; +} + +/* The sysfs file debug_command. + * + * This is used to issue general debug commands to the device driver. + * Reading it will produce a list of debug commands, separated by newlines. + * Writing to it with one of those commands will issue said command. + */ +static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug); +#endif /* CONFIG_MALI_DEBUG */ + +/** + * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get a description of the present Mali + * GPU via the gpuinfo sysfs entry. This includes the GPU family, the + * number of cores, the hardware version and the raw product id. For + * example + * + * Mali-T60x MP4 r0p0 0x6956 + * + * Return: The number of bytes output to @buf. + */ +static ssize_t kbase_show_gpuinfo(struct device *dev, + struct device_attribute *attr, char *buf) +{ + static const struct gpu_product_id_name { + unsigned id; + char *name; + } gpu_product_id_names[] = { + { .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G71" }, + { .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G72" }, + { .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G51" }, + { .id = GPU_ID2_PRODUCT_TNOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G76" }, + { .id = GPU_ID2_PRODUCT_TDVX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G31" }, + { .id = GPU_ID2_PRODUCT_TGOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G52" }, + { .id = GPU_ID2_PRODUCT_TTRX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G77" }, + { .id = GPU_ID2_PRODUCT_TBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G78" }, + { .id = GPU_ID2_PRODUCT_LBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G68" }, + { .id = GPU_ID2_PRODUCT_TNAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G57" }, + { .id = GPU_ID2_PRODUCT_TODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TODX" }, + { .id = GPU_ID2_PRODUCT_TGRX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TGRX" }, + { .id = GPU_ID2_PRODUCT_TVAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TVAX" }, + { .id = GPU_ID2_PRODUCT_LODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-LODX" }, + { .id = GPU_ID2_PRODUCT_TTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TTUX" }, + { .id = GPU_ID2_PRODUCT_LTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-LTUX" }, + { .id = GPU_ID2_PRODUCT_TE2X >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TE2X" }, + }; + const char *product_name = "(Unknown Mali GPU)"; + struct kbase_device *kbdev; + u32 gpu_id; + unsigned product_id, product_id_mask; + unsigned i; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + product_id_mask = GPU_ID2_PRODUCT_MODEL >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) { + const struct gpu_product_id_name *p = &gpu_product_id_names[i]; + + if ((p->id & product_id_mask) == + (product_id & product_id_mask)) { + product_name = p->name; + break; + } + } + + return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n", + product_name, kbdev->gpu_props.num_cores, + (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, + (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, + product_id); +} +static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL); + +/** + * set_dvfs_period - Store callback for the dvfs_period sysfs file. + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the dvfs_period sysfs file is written to. It + * checks the data written, and if valid updates the DVFS period variable, + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_dvfs_period(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int ret; + int dvfs_period; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = kstrtoint(buf, 0, &dvfs_period); + if (ret || dvfs_period <= 0) { + dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n" + "Use format \n"); + return -EINVAL; + } + + kbdev->pm.dvfs_period = dvfs_period; + dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period); + + return count; +} + +/** + * show_dvfs_period - Show callback for the dvfs_period sysfs entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current period used for the DVFS sample + * timer. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_dvfs_period(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period); + + return ret; +} + +static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period, + set_dvfs_period); + +/** + * set_pm_poweroff - Store callback for the pm_poweroff sysfs file. + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the pm_poweroff sysfs file is written to. + * + * This file contains three values separated by whitespace. The values + * are gpu_poweroff_time (the period of the poweroff timer, in ns), + * poweroff_shader_ticks (the number of poweroff timer ticks before an idle + * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer + * ticks before the GPU is powered off), in that order. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_pm_poweroff(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + struct kbasep_pm_tick_timer_state *stt; + int items; + u64 gpu_poweroff_time; + unsigned int poweroff_shader_ticks, poweroff_gpu_ticks; + unsigned long flags; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time, + &poweroff_shader_ticks, + &poweroff_gpu_ticks); + if (items != 3) { + dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n" + "Use format \n"); + return -EINVAL; + } + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + stt = &kbdev->pm.backend.shader_tick_timer; + stt->configured_interval = HR_TIMER_DELAY_NSEC(gpu_poweroff_time); + stt->configured_ticks = poweroff_shader_ticks; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (poweroff_gpu_ticks != 0) + dev_warn(kbdev->dev, "Separate GPU poweroff delay no longer supported.\n"); + + return count; +} + +/** + * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current period used for the DVFS sample + * timer. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_pm_poweroff(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + struct kbasep_pm_tick_timer_state *stt; + ssize_t ret; + unsigned long flags; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + stt = &kbdev->pm.backend.shader_tick_timer; + ret = scnprintf(buf, PAGE_SIZE, "%llu %u 0\n", + ktime_to_ns(stt->configured_interval), + stt->configured_ticks); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} + +static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff, + set_pm_poweroff); + +/** + * set_reset_timeout - Store callback for the reset_timeout sysfs file. + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the reset_timeout sysfs file is written to. It + * checks the data written, and if valid updates the reset timeout. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_reset_timeout(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int ret; + int reset_timeout; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = kstrtoint(buf, 0, &reset_timeout); + if (ret || reset_timeout <= 0) { + dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n" + "Use format \n"); + return -EINVAL; + } + + kbdev->reset_timeout_ms = reset_timeout; + dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout); + + return count; +} + +/** + * show_reset_timeout - Show callback for the reset_timeout sysfs entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current reset timeout. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_reset_timeout(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms); + + return ret; +} + +static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout, + set_reset_timeout); + +#if IS_ENABLED(CONFIG_DEBUG_FS) +static ssize_t show_mem_pool_size(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, + kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_size); +} + +static ssize_t set_mem_pool_size(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + int err; + + if (!kbdev) + return -ENODEV; + + err = kbase_debugfs_helper_set_attr_from_string(buf, + kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_trim); + + return err ? err : count; +} + +static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size, + set_mem_pool_size); + +static ssize_t show_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, + kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_max_size); +} + +static ssize_t set_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + int err; + + if (!kbdev) + return -ENODEV; + + err = kbase_debugfs_helper_set_attr_from_string(buf, + kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_set_max_size); + + return err ? err : count; +} + +static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size, + set_mem_pool_max_size); + +/** + * show_lp_mem_pool_size - Show size of the large memory pages pool. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the pool size. + * + * This function is called to get the number of large memory pages which currently populate the kbdev pool. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_lp_mem_pool_size(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, + kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_size); +} + +/** + * set_lp_mem_pool_size - Set size of the large memory pages pool. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The value written to the sysfs file. + * @count: The number of bytes written to the sysfs file. + * + * This function is called to set the number of large memory pages which should populate the kbdev pool. + * This may cause existing pages to be removed from the pool, or new pages to be created and then added to the pool. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_lp_mem_pool_size(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + int err; + + if (!kbdev) + return -ENODEV; + + err = kbase_debugfs_helper_set_attr_from_string(buf, + kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_trim); + + return err ? err : count; +} + +static DEVICE_ATTR(lp_mem_pool_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_size, + set_lp_mem_pool_size); + +/** + * show_lp_mem_pool_max_size - Show maximum size of the large memory pages pool. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the pool size. + * + * This function is called to get the maximum number of large memory pages that the kbdev pool can possibly contain. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_lp_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, + kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_max_size); +} + +/** + * set_lp_mem_pool_max_size - Set maximum size of the large memory pages pool. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The value written to the sysfs file. + * @count: The number of bytes written to the sysfs file. + * + * This function is called to set the maximum number of large memory pages that the kbdev pool can possibly contain. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_lp_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + int err; + + if (!kbdev) + return -ENODEV; + + err = kbase_debugfs_helper_set_attr_from_string(buf, + kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_set_max_size); + + return err ? err : count; +} + +static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size, + set_lp_mem_pool_max_size); + +#endif + +/** + * show_js_ctx_scheduling_mode - Show callback for js_ctx_scheduling_mode sysfs + * entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the context scheduling mode information. + * + * This function is called to get the context scheduling mode being used by JS. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_js_ctx_scheduling_mode(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + return scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->js_ctx_scheduling_mode); +} + +/** + * set_js_ctx_scheduling_mode - Set callback for js_ctx_scheduling_mode sysfs + * entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The value written to the sysfs file. + * @count: The number of bytes written to the sysfs file. + * + * This function is called when the js_ctx_scheduling_mode sysfs file is written + * to. It checks the data written, and if valid updates the ctx scheduling mode + * being by JS. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_js_ctx_scheduling_mode(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_context *kctx; + u32 new_js_ctx_scheduling_mode; + struct kbase_device *kbdev; + unsigned long flags; + int ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = kstrtouint(buf, 0, &new_js_ctx_scheduling_mode); + if (ret || new_js_ctx_scheduling_mode >= KBASE_JS_PRIORITY_MODE_COUNT) { + dev_err(kbdev->dev, "Couldn't process js_ctx_scheduling_mode" + " write operation.\n" + "Use format \n"); + return -EINVAL; + } + + if (new_js_ctx_scheduling_mode == kbdev->js_ctx_scheduling_mode) + return count; + + mutex_lock(&kbdev->kctx_list_lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* Update the context priority mode */ + kbdev->js_ctx_scheduling_mode = new_js_ctx_scheduling_mode; + + /* Adjust priority of all the contexts as per the new mode */ + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) + kbase_js_update_ctx_priority(kctx); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->kctx_list_lock); + + dev_dbg(kbdev->dev, "JS ctx scheduling mode: %u\n", new_js_ctx_scheduling_mode); + + return count; +} + +static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR, + show_js_ctx_scheduling_mode, + set_js_ctx_scheduling_mode); + +#ifdef MALI_KBASE_BUILD + +/* Number of entries in serialize_jobs_settings[] */ +#define NR_SERIALIZE_JOBS_SETTINGS 5 +/* Maximum string length in serialize_jobs_settings[].name */ +#define MAX_SERIALIZE_JOBS_NAME_LEN 16 + +static struct +{ + char *name; + u8 setting; +} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = { + {"none", 0}, + {"intra-slot", KBASE_SERIALIZE_INTRA_SLOT}, + {"inter-slot", KBASE_SERIALIZE_INTER_SLOT}, + {"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT}, + {"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT | + KBASE_SERIALIZE_RESET} +}; + +/** + * update_serialize_jobs_setting - Update the serialization setting for the + * submission of GPU jobs. + * + * This function is called when the serialize_jobs sysfs/debugfs file is + * written to. It matches the requested setting against the available settings + * and if a matching setting is found updates kbdev->serialize_jobs. + * + * @kbdev: An instance of the GPU platform device, allocated from the probe + * method of the driver. + * @buf: Buffer containing the value written to the sysfs/debugfs file. + * @count: The number of bytes to write to the sysfs/debugfs file. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t update_serialize_jobs_setting(struct kbase_device *kbdev, + const char *buf, size_t count) +{ + int i; + bool valid = false; + + for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { + if (sysfs_streq(serialize_jobs_settings[i].name, buf)) { + kbdev->serialize_jobs = + serialize_jobs_settings[i].setting; + valid = true; + break; + } + } + + if (!valid) { + dev_err(kbdev->dev, "serialize_jobs: invalid setting"); + return -EINVAL; + } + + return count; +} + +#ifdef CONFIG_DEBUG_FS +/** + * kbasep_serialize_jobs_seq_debugfs_show - Show callback for the serialize_jobs + * debugfs file + * @sfile: seq_file pointer + * @data: Private callback data + * + * This function is called to get the contents of the serialize_jobs debugfs + * file. This is a list of the available settings with the currently active one + * surrounded by square brackets. + * + * Return: 0 on success, or an error code on error + */ +static int kbasep_serialize_jobs_seq_debugfs_show(struct seq_file *sfile, + void *data) +{ + struct kbase_device *kbdev = sfile->private; + int i; + + CSTD_UNUSED(data); + + for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { + if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting) + seq_printf(sfile, "[%s] ", + serialize_jobs_settings[i].name); + else + seq_printf(sfile, "%s ", + serialize_jobs_settings[i].name); + } + + seq_puts(sfile, "\n"); + + return 0; +} + +/** + * kbasep_serialize_jobs_debugfs_write - Store callback for the serialize_jobs + * debugfs file. + * @file: File pointer + * @ubuf: User buffer containing data to store + * @count: Number of bytes in user buffer + * @ppos: File position + * + * This function is called when the serialize_jobs debugfs file is written to. + * It matches the requested setting against the available settings and if a + * matching setting is found updates kbdev->serialize_jobs. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) +{ + struct seq_file *s = file->private_data; + struct kbase_device *kbdev = s->private; + char buf[MAX_SERIALIZE_JOBS_NAME_LEN]; + + CSTD_UNUSED(ppos); + + count = min_t(size_t, sizeof(buf) - 1, count); + if (copy_from_user(buf, ubuf, count)) + return -EFAULT; + + buf[count] = 0; + + return update_serialize_jobs_setting(kbdev, buf, count); +} + +/** + * kbasep_serialize_jobs_debugfs_open - Open callback for the serialize_jobs + * debugfs file + * @in: inode pointer + * @file: file pointer + * + * Return: Zero on success, error code on failure + */ +static int kbasep_serialize_jobs_debugfs_open(struct inode *in, + struct file *file) +{ + return single_open(file, kbasep_serialize_jobs_seq_debugfs_show, + in->i_private); +} + +static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbasep_serialize_jobs_debugfs_open, + .read = seq_read, + .write = kbasep_serialize_jobs_debugfs_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif /* CONFIG_DEBUG_FS */ + +/** + * show_serialize_jobs_sysfs - Show callback for serialize_jobs sysfs file. + * + * This function is called to get the contents of the serialize_jobs sysfs + * file. This is a list of the available settings with the currently active + * one surrounded by square brackets. + * + * @dev: The device this sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The output buffer for the sysfs file contents + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_serialize_jobs_sysfs(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + ssize_t ret = 0; + int i; + + for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { + if (kbdev->serialize_jobs == + serialize_jobs_settings[i].setting) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s]", + serialize_jobs_settings[i].name); + else + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", + serialize_jobs_settings[i].name); + } + + if (ret < PAGE_SIZE - 1) { + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + } else { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** + * store_serialize_jobs_sysfs - Store callback for serialize_jobs sysfs file. + * + * This function is called when the serialize_jobs sysfs file is written to. + * It matches the requested setting against the available settings and if a + * matching setting is found updates kbdev->serialize_jobs. + * + * @dev: The device this sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes to write to the sysfs file + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t store_serialize_jobs_sysfs(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return update_serialize_jobs_setting(to_kbase_device(dev), buf, count); +} + +static DEVICE_ATTR(serialize_jobs, 0600, show_serialize_jobs_sysfs, + store_serialize_jobs_sysfs); +#endif /* MALI_KBASE_BUILD */ + +static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) +{ + struct kbase_device *kbdev = container_of(data, struct kbase_device, + protected_mode_hwcnt_disable_work); + unsigned long flags; + + bool do_disable; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !kbdev->protected_mode_hwcnt_desired && + !kbdev->protected_mode_hwcnt_disabled; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!do_disable) + return; + + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !kbdev->protected_mode_hwcnt_desired && + !kbdev->protected_mode_hwcnt_disabled; + + if (do_disable) { + /* Protected mode state did not change while we were doing the + * disable, so commit the work we just performed and continue + * the state machine. + */ + kbdev->protected_mode_hwcnt_disabled = true; + kbase_backend_slot_update(kbdev); + } else { + /* Protected mode state was updated while we were doing the + * disable, so we need to undo the disable we just performed. + */ + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +static int kbasep_protected_mode_enable(struct protected_mode_device *pdev) +{ + struct kbase_device *kbdev = pdev->data; + + return kbase_pm_protected_mode_enable(kbdev); +} + +static int kbasep_protected_mode_disable(struct protected_mode_device *pdev) +{ + struct kbase_device *kbdev = pdev->data; + + return kbase_pm_protected_mode_disable(kbdev); +} + +static const struct protected_mode_ops kbasep_native_protected_ops = { + .protected_mode_enable = kbasep_protected_mode_enable, + .protected_mode_disable = kbasep_protected_mode_disable +}; + +int kbase_protected_mode_init(struct kbase_device *kbdev) +{ + /* Use native protected ops */ + kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev), + GFP_KERNEL); + if (!kbdev->protected_dev) + return -ENOMEM; + kbdev->protected_dev->data = kbdev; + +#if IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_UNSUPPORTED) + kbdev->protected_ops = &kbasep_native_protected_ops; +#else + kbdev->protected_ops = PROTECTED_CALLBACKS; +#endif + + INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work, + kbasep_protected_mode_hwcnt_disable_worker); + kbdev->protected_mode_hwcnt_desired = true; + kbdev->protected_mode_hwcnt_disabled = false; + return 0; +} + +void kbase_protected_mode_term(struct kbase_device *kbdev) +{ + cancel_work_sync(&kbdev->protected_mode_hwcnt_disable_work); + kfree(kbdev->protected_dev); +} + +#ifdef CONFIG_MALI_NO_MALI +static int kbase_common_reg_map(struct kbase_device *kbdev) +{ + return 0; +} +static void kbase_common_reg_unmap(struct kbase_device * const kbdev) +{ +} +#else /* CONFIG_MALI_NO_MALI */ +static int kbase_common_reg_map(struct kbase_device *kbdev) +{ + int err = 0; + + if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) { + dev_err(kbdev->dev, "Register window unavailable\n"); + err = -EIO; + goto out_region; + } + + kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size); + if (!kbdev->reg) { + dev_err(kbdev->dev, "Can't remap register window\n"); + err = -EINVAL; + goto out_ioremap; + } + + return err; + +out_ioremap: + release_mem_region(kbdev->reg_start, kbdev->reg_size); +out_region: + return err; +} + +static void kbase_common_reg_unmap(struct kbase_device * const kbdev) +{ + if (kbdev->reg) { + iounmap(kbdev->reg); + release_mem_region(kbdev->reg_start, kbdev->reg_size); + kbdev->reg = NULL; + kbdev->reg_start = 0; + kbdev->reg_size = 0; + } +} +#endif /* CONFIG_MALI_NO_MALI */ + +int registers_map(struct kbase_device * const kbdev) +{ + /* the first memory resource is the physical address of the GPU + * registers. + */ + struct platform_device *pdev = to_platform_device(kbdev->dev); + struct resource *reg_res; + int err; + + reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!reg_res) { + dev_err(kbdev->dev, "Invalid register resource\n"); + return -ENOENT; + } + + kbdev->reg_start = reg_res->start; + kbdev->reg_size = resource_size(reg_res); + + + err = kbase_common_reg_map(kbdev); + if (err) { + dev_err(kbdev->dev, "Failed to map registers\n"); + return err; + } + + return 0; +} + +void registers_unmap(struct kbase_device *kbdev) +{ + kbase_common_reg_unmap(kbdev); +} + +#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) + +static bool kbase_is_pm_enabled(const struct device_node *gpu_node) +{ + const struct device_node *power_model_node; + const void *cooling_cells_node; + const void *operating_point_node; + bool is_pm_enable = false; + + power_model_node = of_get_child_by_name(gpu_node, + "power_model"); + if (power_model_node) + is_pm_enable = true; + + cooling_cells_node = of_get_property(gpu_node, + "#cooling-cells", NULL); + if (cooling_cells_node) + is_pm_enable = true; + + operating_point_node = of_get_property(gpu_node, + "operating-points", NULL); + if (operating_point_node) + is_pm_enable = true; + + return is_pm_enable; +} + +static bool kbase_is_pv_enabled(const struct device_node *gpu_node) +{ + const void *arbiter_if_node; + + arbiter_if_node = of_get_property(gpu_node, + "arbiter_if", NULL); + + return arbiter_if_node ? true : false; +} + +static bool kbase_is_full_coherency_enabled(const struct device_node *gpu_node) +{ + const void *coherency_dts; + u32 coherency; + + coherency_dts = of_get_property(gpu_node, + "system-coherency", + NULL); + if (coherency_dts) { + coherency = be32_to_cpup(coherency_dts); + if (coherency == COHERENCY_ACE) + return true; + } + return false; +} + +#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */ + +int kbase_device_pm_init(struct kbase_device *kbdev) +{ + int err = 0; + +#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) + + u32 gpu_id; + u32 product_id; + u32 gpu_model_id; + + if (kbase_is_pv_enabled(kbdev->dev->of_node)) { + if (kbase_is_pm_enabled(kbdev->dev->of_node)) { + /* Arbitration AND power management invalid */ + dev_err(kbdev->dev, "Invalid combination of arbitration AND power management\n"); + return -EPERM; + } + if (kbase_is_full_coherency_enabled(kbdev->dev->of_node)) { + /* Arbitration AND full coherency invalid */ + dev_err(kbdev->dev, "Invalid combination of arbitration AND full coherency\n"); + return -EPERM; + } + err = kbase_arbiter_pm_early_init(kbdev); + if (err == 0) { + /* Check if Arbitration is running on + * supported GPU platform + */ + kbase_pm_register_access_enable(kbdev); + gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); + kbase_pm_register_access_disable(kbdev); + product_id = KBASE_UBFX32(gpu_id, + GPU_ID_VERSION_PRODUCT_ID_SHIFT, 16); + gpu_model_id = GPU_ID2_MODEL_MATCH_VALUE(product_id); + + if (gpu_model_id != GPU_ID2_PRODUCT_TGOX + && gpu_model_id != GPU_ID2_PRODUCT_TNOX) { + kbase_arbiter_pm_early_term(kbdev); + dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n"); + return -EPERM; + } + } + } else { + err = power_control_init(kbdev); + } +#else + err = power_control_init(kbdev); +#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */ + return err; +} + +void kbase_device_pm_term(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#ifdef CONFIG_OF + if (kbase_is_pv_enabled(kbdev->dev->of_node)) + kbase_arbiter_pm_early_term(kbdev); + else + power_control_term(kbdev); +#endif /* CONFIG_OF */ +#else + power_control_term(kbdev); +#endif +} + +int power_control_init(struct kbase_device *kbdev) +{ +#if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF) + /* Power control initialization requires at least the capability to get + * regulators and clocks from the device tree, as well as parsing + * arrays of unsigned integer values. + * + * The whole initialization process shall simply be skipped if the + * minimum capability is not available. + */ + return 0; +#else + struct platform_device *pdev; + int err = 0; + unsigned int i; +#if defined(CONFIG_REGULATOR) + static const char *regulator_names[] = { + "mali", "shadercores" + }; + BUILD_BUG_ON(ARRAY_SIZE(regulator_names) < BASE_MAX_NR_CLOCKS_REGULATORS); +#endif /* CONFIG_REGULATOR */ + + if (!kbdev) + return -ENODEV; + + pdev = to_platform_device(kbdev->dev); + +#if defined(CONFIG_REGULATOR) + /* Since the error code EPROBE_DEFER causes the entire probing + * procedure to be restarted from scratch at a later time, + * all regulators will be released before returning. + * + * Any other error is ignored and the driver will continue + * operating with a partial initialization of regulators. + */ + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + kbdev->regulators[i] = regulator_get_optional(kbdev->dev, + regulator_names[i]); + if (IS_ERR_OR_NULL(kbdev->regulators[i])) { + err = PTR_ERR(kbdev->regulators[i]); + kbdev->regulators[i] = NULL; + break; + } + } + if (err == -EPROBE_DEFER) { + while ((i > 0) && (i < BASE_MAX_NR_CLOCKS_REGULATORS)) + regulator_put(kbdev->regulators[--i]); + return err; + } + + kbdev->nr_regulators = i; + dev_dbg(&pdev->dev, "Regulators probed: %u\n", kbdev->nr_regulators); +#endif + + /* Having more clocks than regulators is acceptable, while the + * opposite shall not happen. + * + * Since the error code EPROBE_DEFER causes the entire probing + * procedure to be restarted from scratch at a later time, + * all clocks and regulators will be released before returning. + * + * Any other error is ignored and the driver will continue + * operating with a partial initialization of clocks. + */ + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + kbdev->clocks[i] = of_clk_get(kbdev->dev->of_node, i); + if (IS_ERR_OR_NULL(kbdev->clocks[i])) { + err = PTR_ERR(kbdev->clocks[i]); + kbdev->clocks[i] = NULL; + break; + } + + err = clk_prepare_enable(kbdev->clocks[i]); + if (err) { + dev_err(kbdev->dev, + "Failed to prepare and enable clock (%d)\n", + err); + clk_put(kbdev->clocks[i]); + break; + } + } + if (err == -EPROBE_DEFER) { + while ((i > 0) && (i < BASE_MAX_NR_CLOCKS_REGULATORS)) { + clk_disable_unprepare(kbdev->clocks[--i]); + clk_put(kbdev->clocks[i]); + } + goto clocks_probe_defer; + } + + kbdev->nr_clocks = i; + dev_dbg(&pdev->dev, "Clocks probed: %u\n", kbdev->nr_clocks); + + /* Any error in parsing the OPP table from the device file + * shall be ignored. The fact that the table may be absent or wrong + * on the device tree of the platform shouldn't prevent the driver + * from completing its initialization. + */ +#if (KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE && \ + !defined(LSK_OPPV2_BACKPORT)) + err = of_init_opp_table(kbdev->dev); + CSTD_UNUSED(err); +#else + +#if defined(CONFIG_PM_OPP) +#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \ + defined(CONFIG_REGULATOR)) + if (kbdev->nr_regulators > 0) { + kbdev->opp_table = dev_pm_opp_set_regulators(kbdev->dev, + regulator_names, BASE_MAX_NR_CLOCKS_REGULATORS); + } +#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ + err = dev_pm_opp_of_add_table(kbdev->dev); + CSTD_UNUSED(err); +#endif /* CONFIG_PM_OPP */ + +#endif /* KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE */ + return 0; + +clocks_probe_defer: +#if defined(CONFIG_REGULATOR) + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) + regulator_put(kbdev->regulators[i]); +#endif + return err; +#endif /* KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE */ +} + +void power_control_term(struct kbase_device *kbdev) +{ + unsigned int i; + +#if (KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE && \ + !defined(LSK_OPPV2_BACKPORT)) +#if KERNEL_VERSION(3, 19, 0) <= LINUX_VERSION_CODE + of_free_opp_table(kbdev->dev); +#endif +#else + +#if defined(CONFIG_PM_OPP) + dev_pm_opp_of_remove_table(kbdev->dev); +#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \ + defined(CONFIG_REGULATOR)) + if (!IS_ERR_OR_NULL(kbdev->opp_table)) + dev_pm_opp_put_regulators(kbdev->opp_table); +#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ +#endif /* CONFIG_PM_OPP */ + +#endif /* KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE */ + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + if (kbdev->clocks[i]) { + if (__clk_is_enabled(kbdev->clocks[i])) + clk_disable_unprepare(kbdev->clocks[i]); + clk_put(kbdev->clocks[i]); + kbdev->clocks[i] = NULL; + } else + break; + } + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_REGULATOR) + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + if (kbdev->regulators[i]) { + regulator_put(kbdev->regulators[i]); + kbdev->regulators[i] = NULL; + } + } +#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ +} + +#ifdef MALI_KBASE_BUILD +#ifdef CONFIG_DEBUG_FS + +static void trigger_reset(struct kbase_device *kbdev) +{ + kbase_pm_context_active(kbdev); + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + kbase_pm_context_idle(kbdev); +} + +#define MAKE_QUIRK_ACCESSORS(type) \ +static int type##_quirks_set(void *data, u64 val) \ +{ \ + struct kbase_device *kbdev; \ + kbdev = (struct kbase_device *)data; \ + kbdev->hw_quirks_##type = (u32)val; \ + trigger_reset(kbdev); \ + return 0;\ +} \ +\ +static int type##_quirks_get(void *data, u64 *val) \ +{ \ + struct kbase_device *kbdev;\ + kbdev = (struct kbase_device *)data;\ + *val = kbdev->hw_quirks_##type;\ + return 0;\ +} \ +DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\ + type##_quirks_set, "%llu\n") + +MAKE_QUIRK_ACCESSORS(sc); +MAKE_QUIRK_ACCESSORS(tiler); +MAKE_QUIRK_ACCESSORS(mmu); +MAKE_QUIRK_ACCESSORS(jm); + +static ssize_t kbase_device_debugfs_reset_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) +{ + struct kbase_device *kbdev = file->private_data; + CSTD_UNUSED(ubuf); + CSTD_UNUSED(count); + CSTD_UNUSED(ppos); + + trigger_reset(kbdev); + + return count; +} + +static const struct file_operations fops_trigger_reset = { + .owner = THIS_MODULE, + .open = simple_open, + .write = kbase_device_debugfs_reset_write, + .llseek = default_llseek, +}; + +#ifndef MALI_SEC_INTEGRATION +extern const struct file_operations kbasep_ktrace_debugfs_fops; +#endif /* MALI_SEC_INTEGRATION */ + +/** + * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read + * @file: File object to read is for + * @buf: User buffer to populate with data + * @len: Length of user buffer + * @ppos: Offset within file object + * + * Retrieves the current status of protected debug mode + * (0 = disabled, 1 = enabled) + * + * Return: Number of bytes added to user buffer + */ +static ssize_t debugfs_protected_debug_mode_read(struct file *file, + char __user *buf, size_t len, loff_t *ppos) +{ + struct kbase_device *kbdev = (struct kbase_device *)file->private_data; + u32 gpu_status; + ssize_t ret_val; + + kbase_pm_context_active(kbdev); + gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)); + kbase_pm_context_idle(kbdev); + + if (gpu_status & GPU_DBGEN) + ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2); + else + ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2); + + return ret_val; +} + +/* + * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops + * + * Contains the file operations for the "protected_debug_mode" debugfs file + */ +static const struct file_operations fops_protected_debug_mode = { + .owner = THIS_MODULE, + .open = simple_open, + .read = debugfs_protected_debug_mode_read, + .llseek = default_llseek, +}; + +static int kbase_device_debugfs_mem_pool_max_size_show(struct seq_file *sfile, + void *data) +{ + CSTD_UNUSED(data); + return kbase_debugfs_helper_seq_read(sfile, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_config_debugfs_max_size); +} + +static ssize_t kbase_device_debugfs_mem_pool_max_size_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) +{ + int err = 0; + + CSTD_UNUSED(ppos); + err = kbase_debugfs_helper_seq_write(file, ubuf, count, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_config_debugfs_set_max_size); + + return err ? err : count; +} + +static int kbase_device_debugfs_mem_pool_max_size_open(struct inode *in, + struct file *file) +{ + return single_open(file, kbase_device_debugfs_mem_pool_max_size_show, + in->i_private); +} + +static const struct file_operations + kbase_device_debugfs_mem_pool_max_size_fops = { + .owner = THIS_MODULE, + .open = kbase_device_debugfs_mem_pool_max_size_open, + .read = seq_read, + .write = kbase_device_debugfs_mem_pool_max_size_write, + .llseek = seq_lseek, + .release = single_release, +}; + +int kbase_device_debugfs_init(struct kbase_device *kbdev) +{ + struct dentry *debugfs_ctx_defaults_directory; + int err; + /* prevent unprivileged use of debug file system + * in old kernel version + */ +#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) + /* only for newer kernel version debug file system is safe */ + const mode_t mode = 0644; +#else + const mode_t mode = 0600; +#endif + +#ifdef MALI_SEC_INTEGRATION + kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname, + NULL); +#else + kbdev->mali_debugfs_directory = debugfs_create_dir("mali", + NULL); +#endif /* MALI_SEC_INTEGRATION */ + if (!kbdev->mali_debugfs_directory) { + dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n"); + err = -ENOMEM; + goto out; + } + +#ifdef MALI_SEC_INTEGRATION + kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx", + kbdev->mali_debugfs_directory); +#else + kbdev->trace_dentry = debugfs_create_file("mali_trace", S_IRUGO, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_ktrace_debugfs_fops); + + kbdev->debugfs_ctx_directory = debugfs_create_dir("mem", + kbdev->mali_debugfs_directory); +#endif /* MALI_SEC_INTEGRATION */ + + if (!kbdev->debugfs_ctx_directory) { + dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n"); + err = -ENOMEM; + goto out; + } + + debugfs_ctx_defaults_directory = debugfs_create_dir("defaults", + kbdev->debugfs_ctx_directory); + if (!debugfs_ctx_defaults_directory) { + dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n"); + err = -ENOMEM; + goto out; + } + +#if !MALI_CUSTOMER_RELEASE + kbasep_regs_dump_debugfs_init(kbdev); +#endif /* !MALI_CUSTOMER_RELEASE */ + kbasep_regs_history_debugfs_init(kbdev); + + kbase_debug_job_fault_debugfs_init(kbdev); + + kbasep_gpu_memory_debugfs_init(kbdev); + kbase_as_fault_debugfs_init(kbdev); +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + kbase_instr_backend_debugfs_init(kbdev); +#endif + /* fops_* variables created by invocations of macro + * MAKE_QUIRK_ACCESSORS() above. */ + debugfs_create_file("quirks_sc", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_sc_quirks); + debugfs_create_file("quirks_tiler", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_tiler_quirks); + debugfs_create_file("quirks_mmu", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_mmu_quirks); + debugfs_create_file("quirks_jm", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_jm_quirks); + + debugfs_create_bool("infinite_cache", mode, + debugfs_ctx_defaults_directory, + &kbdev->infinite_cache_active_default); + + debugfs_create_file("mem_pool_max_size", mode, + debugfs_ctx_defaults_directory, + &kbdev->mem_pool_defaults.small, + &kbase_device_debugfs_mem_pool_max_size_fops); + + debugfs_create_file("lp_mem_pool_max_size", mode, + debugfs_ctx_defaults_directory, + &kbdev->mem_pool_defaults.large, + &kbase_device_debugfs_mem_pool_max_size_fops); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { + debugfs_create_file("protected_debug_mode", S_IRUGO, + kbdev->mali_debugfs_directory, kbdev, + &fops_protected_debug_mode); + } + + debugfs_create_file("reset", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_trigger_reset); + + kbase_ktrace_debugfs_init(kbdev); + +#ifdef CONFIG_MALI_DEVFREQ +#ifdef CONFIG_DEVFREQ_THERMAL + if (kbdev->devfreq) + kbase_ipa_debugfs_init(kbdev); +#endif /* CONFIG_DEVFREQ_THERMAL */ +#endif /* CONFIG_MALI_DEVFREQ */ + + debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_serialize_jobs_debugfs_fops); + + return 0; + +out: + debugfs_remove_recursive(kbdev->mali_debugfs_directory); + return err; +} + +void kbase_device_debugfs_term(struct kbase_device *kbdev) +{ + debugfs_remove_recursive(kbdev->mali_debugfs_directory); +} +#endif /* CONFIG_DEBUG_FS */ +#endif /* MALI_KBASE_BUILD */ + +int kbase_device_coherency_init(struct kbase_device *kbdev) +{ +#ifdef CONFIG_OF + u32 supported_coherency_bitmap = + kbdev->gpu_props.props.raw_props.coherency_mode; + const void *coherency_override_dts; + u32 override_coherency, gpu_id; + unsigned int prod_id; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + gpu_id &= GPU_ID_VERSION_PRODUCT_ID; + prod_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + /* Only for tMIx : + * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly + * documented for tMIx so force correct value here. + */ + if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == + GPU_ID2_PRODUCT_TMIX) + if (supported_coherency_bitmap == + COHERENCY_FEATURE_BIT(COHERENCY_ACE)) + supported_coherency_bitmap |= + COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); + +#endif /* CONFIG_OF */ + + kbdev->system_coherency = COHERENCY_NONE; + + /* device tree may override the coherency */ +#ifdef CONFIG_OF + coherency_override_dts = of_get_property(kbdev->dev->of_node, + "system-coherency", + NULL); + if (coherency_override_dts) { + + override_coherency = be32_to_cpup(coherency_override_dts); + + if ((override_coherency <= COHERENCY_NONE) && + (supported_coherency_bitmap & + COHERENCY_FEATURE_BIT(override_coherency))) { + + kbdev->system_coherency = override_coherency; + + dev_info(kbdev->dev, + "Using coherency mode %u set from dtb", + override_coherency); + } else + dev_warn(kbdev->dev, + "Ignoring unsupported coherency mode %u set from dtb", + override_coherency); + } + +#endif /* CONFIG_OF */ + + kbdev->gpu_props.props.raw_props.coherency_mode = + kbdev->system_coherency; + + return 0; +} + +#ifdef CONFIG_MALI_BUSLOG + +/* Callback used by the kbase bus logger client, to initiate a GPU reset + * when the bus log is restarted. GPU reset is used as reference point + * in HW bus log analyses. + */ +static void kbase_logging_started_cb(void *data) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + dev_info(kbdev->dev, "KBASE - Bus logger restarted\n"); +} + +int buslog_init(struct kbase_device *kbdev) +{ + int err = 0; + + err = bl_core_client_register(kbdev->devname, + kbase_logging_started_cb, + kbdev, &kbdev->buslogger, + THIS_MODULE, NULL); + if (err == 0) + bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); + + return err; +} + +void buslog_term(struct kbase_device *kbdev) +{ + bl_core_client_unregister(kbdev->buslogger); +} +#endif + +static struct attribute *kbase_scheduling_attrs[] = { + &dev_attr_serialize_jobs.attr, + NULL +}; + +static struct attribute *kbase_attrs[] = { +#ifdef CONFIG_MALI_DEBUG + &dev_attr_debug_command.attr, + &dev_attr_js_softstop_always.attr, +#endif + &dev_attr_js_timeouts.attr, + &dev_attr_soft_job_timeout.attr, + &dev_attr_gpuinfo.attr, + &dev_attr_dvfs_period.attr, + &dev_attr_pm_poweroff.attr, + &dev_attr_reset_timeout.attr, + &dev_attr_js_scheduling_period.attr, + &dev_attr_power_policy.attr, + &dev_attr_core_mask.attr, +#if IS_ENABLED(CONFIG_DEBUG_FS) + &dev_attr_mem_pool_size.attr, + &dev_attr_mem_pool_max_size.attr, + &dev_attr_lp_mem_pool_size.attr, + &dev_attr_lp_mem_pool_max_size.attr, +#endif + &dev_attr_js_ctx_scheduling_mode.attr, + NULL +}; + +#define SYSFS_SCHEDULING_GROUP "scheduling" +static const struct attribute_group kbase_scheduling_attr_group = { + .name = SYSFS_SCHEDULING_GROUP, + .attrs = kbase_scheduling_attrs, +}; + +static const struct attribute_group kbase_attr_group = { + .attrs = kbase_attrs, +}; + +int kbase_sysfs_init(struct kbase_device *kbdev) +{ + int err = 0; + + kbdev->mdev.minor = MISC_DYNAMIC_MINOR; + kbdev->mdev.name = kbdev->devname; + kbdev->mdev.fops = &kbase_fops; + kbdev->mdev.parent = get_device(kbdev->dev); + kbdev->mdev.mode = 0666; + + err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); + if (!err) { + err = sysfs_create_group(&kbdev->dev->kobj, + &kbase_scheduling_attr_group); + if (err) { + dev_err(kbdev->dev, "Creation of %s sysfs group failed", + SYSFS_SCHEDULING_GROUP); + sysfs_remove_group(&kbdev->dev->kobj, + &kbase_attr_group); + } + } + + return err; +} + +void kbase_sysfs_term(struct kbase_device *kbdev) +{ + sysfs_remove_group(&kbdev->dev->kobj, &kbase_scheduling_attr_group); + sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); + put_device(kbdev->dev); +} + +static int kbase_platform_device_remove(struct platform_device *pdev) +{ + struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + + if (!kbdev) + return -ENODEV; + + kbase_device_term(kbdev); + dev_set_drvdata(kbdev->dev, NULL); + kbase_device_free(kbdev); + + return 0; +} + +void kbase_backend_devfreq_term(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_DEVFREQ + if (kbdev->devfreq) + kbase_devfreq_term(kbdev); +#endif +} + +int kbase_backend_devfreq_init(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_DEVFREQ + /* Devfreq uses hardware counters, so must be initialized after it. */ + int err = kbase_devfreq_init(kbdev); + + if (err) + dev_err(kbdev->dev, "Continuing without devfreq\n"); +#endif /* CONFIG_MALI_DEVFREQ */ + return 0; +} + +static int kbase_platform_device_probe(struct platform_device *pdev) +{ + struct kbase_device *kbdev; + int err = 0; + + mali_kbase_print_cs_experimental(); + + kbdev = kbase_device_alloc(); + if (!kbdev) { + dev_err(&pdev->dev, "Allocate device failed\n"); + return -ENOMEM; + } + + kbdev->dev = &pdev->dev; + dev_set_drvdata(kbdev->dev, kbdev); + + err = kbase_device_init(kbdev); + + if (err) { + if (err == -EPROBE_DEFER) + dev_err(kbdev->dev, "Device initialization Deferred\n"); + else + dev_err(kbdev->dev, "Device initialization failed\n"); + + dev_set_drvdata(kbdev->dev, NULL); + kbase_device_free(kbdev); + } else { +#ifdef MALI_KBASE_BUILD + dev_info(kbdev->dev, + "Probed as %s\n", dev_name(kbdev->mdev.this_device)); +#endif /* MALI_KBASE_BUILD */ + kbase_increment_device_id(); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + mutex_lock(&kbdev->pm.lock); + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_INITIALIZED_EVT); + mutex_unlock(&kbdev->pm.lock); +#endif + } + + /* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_ASV_CALIBRATION_SUPPORT + gpu_asv_calibration_start(); +#endif + return err; +} + +#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE + +/** + * kbase_device_suspend - Suspend callback from the OS. + * + * This is called by Linux when the device should suspend. + * + * @dev: The device to suspend + * + * Return: A standard Linux error code + */ +static int kbase_device_suspend(struct device *dev) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + /* MALI_SEC_INTEGRATION */ + struct exynos_context *platform = NULL; + + if (!kbdev) + return -ENODEV; + + /* MALI_SEC_INTEGRATION */ + platform = (struct exynos_context *)kbdev->platform_context; + if (!platform) + return -ENODEV; + + kbase_pm_suspend(kbdev); + +#if defined(CONFIG_MALI_DEVFREQ) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) + dev_dbg(dev, "Callback %s\n", __func__); + if (kbdev->devfreq) { + kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND); + flush_workqueue(kbdev->devfreq_queue.workq); + } +#endif + + /* MALI_SEC_INTEGRATION */ + KBASE_KTRACE_ADD(kbdev, KBASE_DEVICE_SUSPEND, NULL, platform->power_runtime_suspend_ret); + + return 0; +} + +/** + * kbase_device_resume - Resume callback from the OS. + * + * This is called by Linux when the device should resume from suspension. + * + * @dev: The device to resume + * + * Return: A standard Linux error code + */ +static int kbase_device_resume(struct device *dev) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + /* MALI_SEC_INTEGRATION */ + struct exynos_context *platform = NULL; + + if (!kbdev) + return -ENODEV; + + /* MALI_SEC_INTEGRATION */ + platform = (struct exynos_context *)kbdev->platform_context; + if (!platform) + return -ENODEV; + + kbase_pm_resume(kbdev); + +#if defined(CONFIG_MALI_DEVFREQ) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) + dev_dbg(dev, "Callback %s\n", __func__); + if (kbdev->devfreq) { + mutex_lock(&kbdev->pm.lock); + if (kbdev->pm.active_count > 0) + kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME); + mutex_unlock(&kbdev->pm.lock); + flush_workqueue(kbdev->devfreq_queue.workq); + } +#endif + + /* MALI_SEC_INTEGRATION */ + KBASE_KTRACE_ADD(kbdev, KBASE_DEVICE_RESUME, NULL, platform->power_runtime_resume_ret); + + return 0; +} + +/** + * kbase_device_runtime_suspend - Runtime suspend callback from the OS. + * + * This is called by Linux when the device should prepare for a condition in + * which it will not be able to communicate with the CPU(s) and RAM due to + * power management. + * + * @dev: The device to suspend + * + * Return: A standard Linux error code + */ +#ifdef KBASE_PM_RUNTIME +static int kbase_device_runtime_suspend(struct device *dev) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + dev_dbg(dev, "Callback %s\n", __func__); +#if defined(CONFIG_MALI_DEVFREQ) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) + if (kbdev->devfreq) + kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND); +#endif + + /* MALI_SEC_INTEGRATION */ + if (kbdev->pm.active_count > 0) + return -EBUSY; + + if (kbdev->pm.backend.callback_power_runtime_off) { + kbdev->pm.backend.callback_power_runtime_off(kbdev); + dev_dbg(dev, "runtime suspend\n"); + } + return 0; +} +#endif /* KBASE_PM_RUNTIME */ + +/** + * kbase_device_runtime_resume - Runtime resume callback from the OS. + * + * This is called by Linux when the device should go into a fully active state. + * + * @dev: The device to suspend + * + * Return: A standard Linux error code + */ + +#ifdef KBASE_PM_RUNTIME +static int kbase_device_runtime_resume(struct device *dev) +{ + int ret = 0; + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + dev_dbg(dev, "Callback %s\n", __func__); + if (kbdev->pm.backend.callback_power_runtime_on) { + ret = kbdev->pm.backend.callback_power_runtime_on(kbdev); + dev_dbg(dev, "runtime resume\n"); + } + +#if defined(CONFIG_MALI_DEVFREQ) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) + if (kbdev->devfreq) + kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME); +#endif + + return ret; +} +#endif /* KBASE_PM_RUNTIME */ + + +#ifdef KBASE_PM_RUNTIME +/** + * kbase_device_runtime_idle - Runtime idle callback from the OS. + * @dev: The device to suspend + * + * This is called by Linux when the device appears to be inactive and it might + * be placed into a low power state. + * + * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend, + * otherwise a standard Linux error code + */ +static int kbase_device_runtime_idle(struct device *dev) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + dev_dbg(dev, "Callback %s\n", __func__); + /* Use platform specific implementation if it exists. */ + if (kbdev->pm.backend.callback_power_runtime_idle) + return kbdev->pm.backend.callback_power_runtime_idle(kbdev); + + /* MALI_SEC_INTEGRATION */ + /* Runtime IDLE must be return 1 for turn on next time by RuntimePM API!! */ + return 1; + +} +#endif /* KBASE_PM_RUNTIME */ + +/* The power management operations for the platform driver. + */ +static const struct dev_pm_ops kbase_pm_ops = { + /* MALI_SEC_INTEGRATION */ + .suspend = kbase_device_suspend, + .resume = kbase_device_resume, +#ifdef KBASE_PM_RUNTIME + .runtime_suspend = kbase_device_runtime_suspend, + .runtime_resume = kbase_device_runtime_resume, + .runtime_idle = kbase_device_runtime_idle, +#endif /* KBASE_PM_RUNTIME */ +}; + +#ifdef CONFIG_OF +static const struct of_device_id kbase_dt_ids[] = { + /* MALI_SEC_INTEGRATION */ + { .compatible = "arm,mali", }, + { .compatible = "arm,malit6xx" }, + { .compatible = "arm,mali-midgard" }, + { .compatible = "arm,mali-bifrost" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, kbase_dt_ids); +#endif + +static struct platform_driver kbase_platform_driver = { + .probe = kbase_platform_device_probe, + .remove = kbase_platform_device_remove, + .driver = { + .name = kbase_drv_name, + .owner = THIS_MODULE, + .pm = &kbase_pm_ops, + .of_match_table = of_match_ptr(kbase_dt_ids), + }, +}; + +/* + * The driver will not provide a shortcut to create the Mali platform device + * anymore when using Device Tree. + */ +#ifdef CONFIG_OF +module_platform_driver(kbase_platform_driver); +#else + +static int __init kbase_driver_init(void) +{ + int ret; + + ret = kbase_platform_register(); + if (ret) + return ret; + + ret = platform_driver_register(&kbase_platform_driver); + + if (ret) + kbase_platform_unregister(); + + return ret; +} + +static void __exit kbase_driver_exit(void) +{ + platform_driver_unregister(&kbase_platform_driver); + kbase_platform_unregister(); +} + +module_init(kbase_driver_init); +module_exit(kbase_driver_exit); + +#endif /* CONFIG_OF */ + +MODULE_LICENSE("GPL"); +MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ + __stringify(BASE_UK_VERSION_MAJOR) "." \ + __stringify(BASE_UK_VERSION_MINOR) ")"); + +#define CREATE_TRACE_POINTS +/* Create the trace points (otherwise we just get code to call a tracepoint) */ +#include "mali_linux_trace.h" + +#ifdef CONFIG_MALI_GATOR_SUPPORT +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change); + +void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value) +{ + trace_mali_pm_status(dev_id, event, value); +} + +void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id) +{ + trace_mali_job_slots_event(dev_id, event, + (kctx != NULL ? kctx->tgid : 0), + (kctx != NULL ? kctx->pid : 0), + atom_id); +} + +void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value) +{ + trace_mali_page_fault_insert_pages(dev_id, event, value); +} + +void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long int event) +{ + trace_mali_total_alloc_pages_change(dev_id, event); +} +#endif /* CONFIG_MALI_GATOR_SUPPORT */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_cs_experimental.h b/drivers/gpu/arm/b_r26p0/mali_kbase_cs_experimental.h new file mode 100644 index 000000000000..caba2cd7a0e3 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_cs_experimental.h @@ -0,0 +1,51 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ + +/* + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + */ + +#ifndef _KBASE_CS_EXPERIMENTAL_H_ +#define _KBASE_CS_EXPERIMENTAL_H_ + +#include + +/** + * mali_kbase_print_cs_experimental() - Print a string for every Core Services + * experimental feature that is enabled + */ +static inline void mali_kbase_print_cs_experimental(void) +{ +#if MALI_INCREMENTAL_RENDERING + pr_info("mali_kbase: INCREMENTAL_RENDERING (experimental) enabled"); +#endif /* MALI_INCREMENTAL_RENDERING */ +} + +#endif /* _KBASE_CS_EXPERIMENTAL_H_ */ + + diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.c b/drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.c new file mode 100644 index 000000000000..cea91bcaf02e --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.c @@ -0,0 +1,344 @@ +/* + * + * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include + +#include +#include "mali_kbase_ctx_sched.h" +#include "tl/mali_kbase_tracepoints.h" + +/* Helper for ktrace */ +#if KBASE_KTRACE_ENABLE +static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) +{ + return atomic_read(&kctx->refcount); +} +#else /* KBASE_KTRACE_ENABLE */ +static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) +{ + CSTD_UNUSED(kctx); + return 0; +} +#endif /* KBASE_KTRACE_ENABLE */ + +int kbase_ctx_sched_init(struct kbase_device *kbdev) +{ + int as_present = (1U << kbdev->nr_hw_address_spaces) - 1; + + /* These two must be recalculated if nr_hw_address_spaces changes + * (e.g. for HW workarounds) */ + kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces; + kbdev->as_free = as_present; /* All ASs initially free */ + + memset(kbdev->as_to_kctx, 0, sizeof(kbdev->as_to_kctx)); + + return 0; +} + +void kbase_ctx_sched_term(struct kbase_device *kbdev) +{ + s8 i; + + /* Sanity checks */ + for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { + WARN_ON(kbdev->as_to_kctx[i] != NULL); + WARN_ON(!(kbdev->as_free & (1u << i))); + } +} + +/* kbasep_ctx_sched_find_as_for_ctx - Find a free address space + * + * @kbdev: The context for which to find a free address space + * + * Return: A valid AS if successful, otherwise KBASEP_AS_NR_INVALID + * + * This function returns an address space available for use. It would prefer + * returning an AS that has been previously assigned to the context to + * avoid having to reprogram the MMU. + */ +static int kbasep_ctx_sched_find_as_for_ctx(struct kbase_context *kctx) +{ + struct kbase_device *const kbdev = kctx->kbdev; + int free_as; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* First check if the previously assigned AS is available */ + if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && + (kbdev->as_free & (1u << kctx->as_nr))) + return kctx->as_nr; + + /* The previously assigned AS was taken, we'll be returning any free + * AS at this point. + */ + free_as = ffs(kbdev->as_free) - 1; + if (free_as >= 0 && free_as < kbdev->nr_hw_address_spaces) + return free_as; + + return KBASEP_AS_NR_INVALID; +} + +int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx) +{ + struct kbase_device *const kbdev = kctx->kbdev; + + lockdep_assert_held(&kbdev->mmu_hw_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON(!kbdev->pm.backend.gpu_powered); + + if (atomic_inc_return(&kctx->refcount) == 1) { + int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx); + + if (free_as != KBASEP_AS_NR_INVALID) { + kbdev->as_free &= ~(1u << free_as); + /* Only program the MMU if the context has not been + * assigned the same address space before. + */ + if (free_as != kctx->as_nr) { + struct kbase_context *const prev_kctx = + kbdev->as_to_kctx[free_as]; + + if (prev_kctx) { + WARN_ON(atomic_read(&prev_kctx->refcount) != 0); + kbase_mmu_disable(prev_kctx); + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( + kbdev, prev_kctx->id); + prev_kctx->as_nr = KBASEP_AS_NR_INVALID; + } + + kctx->as_nr = free_as; + kbdev->as_to_kctx[free_as] = kctx; + KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( + kbdev, kctx->id, free_as); + kbase_mmu_update(kbdev, &kctx->mmu, + kctx->as_nr); + } + } else { + atomic_dec(&kctx->refcount); + + /* Failed to find an available address space, we must + * be returning an error at this point. + */ + WARN_ON(kctx->as_nr != KBASEP_AS_NR_INVALID); + } + } + + return kctx->as_nr; +} + +void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx) +{ + struct kbase_device *const kbdev = kctx->kbdev; + + lockdep_assert_held(&kbdev->hwaccess_lock); + WARN_ON(atomic_read(&kctx->refcount) == 0); + WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID); + WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx); + + atomic_inc(&kctx->refcount); +} + +void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) +{ + struct kbase_device *const kbdev = kctx->kbdev; + int new_ref_count; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + new_ref_count = atomic_dec_return(&kctx->refcount); + if (new_ref_count == 0) { + kbdev->as_free |= (1u << kctx->as_nr); + if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) { + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( + kbdev, kctx->id); + kbdev->as_to_kctx[kctx->as_nr] = NULL; + kctx->as_nr = KBASEP_AS_NR_INVALID; + kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT); + } + } + + KBASE_KTRACE_ADD(kbdev, SCHED_RELEASE_CTX, kctx, new_ref_count); +} + +void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) +{ + struct kbase_device *const kbdev = kctx->kbdev; + + lockdep_assert_held(&kbdev->mmu_hw_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON(atomic_read(&kctx->refcount) != 0); + + if (kctx->as_nr != KBASEP_AS_NR_INVALID) { + if (kbdev->pm.backend.gpu_powered) + kbase_mmu_disable(kctx); + + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, kctx->id); + kbdev->as_to_kctx[kctx->as_nr] = NULL; + kctx->as_nr = KBASEP_AS_NR_INVALID; + } +} + +void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) +{ + s8 i; + + lockdep_assert_held(&kbdev->mmu_hw_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON(!kbdev->pm.backend.gpu_powered); + + for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { + struct kbase_context *kctx; + + kctx = kbdev->as_to_kctx[i]; + if (kctx) { + if (atomic_read(&kctx->refcount)) { + WARN_ON(kctx->as_nr != i); + + kbase_mmu_update(kbdev, &kctx->mmu, + kctx->as_nr); + kbase_ctx_flag_clear(kctx, + KCTX_AS_DISABLED_ON_FAULT); + } else { + /* This context might have been assigned an + * AS before, clear it. + */ + if (kctx->as_nr != KBASEP_AS_NR_INVALID) { + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( + kbdev, kctx->id); + kbdev->as_to_kctx[kctx->as_nr] = NULL; + kctx->as_nr = KBASEP_AS_NR_INVALID; + } + } + } else { + kbase_mmu_disable_as(kbdev, i); + } + } +} + +struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( + struct kbase_device *kbdev, size_t as_nr) +{ + unsigned long flags; + struct kbase_context *found_kctx = NULL; + + if (WARN_ON(kbdev == NULL)) + return NULL; + + if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) + return NULL; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + found_kctx = kbdev->as_to_kctx[as_nr]; + + if (found_kctx != NULL) + kbase_ctx_sched_retain_ctx_refcount(found_kctx); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return found_kctx; +} + +struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, + size_t as_nr) +{ + struct kbase_context *found_kctx; + + if (WARN_ON(kbdev == NULL)) + return NULL; + + if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) + return NULL; + + found_kctx = kbdev->as_to_kctx[as_nr]; + + if (WARN_ON(!found_kctx)) + return NULL; + + if (WARN_ON(atomic_read(&found_kctx->refcount) <= 0)) + return NULL; + + return found_kctx; +} + +bool kbase_ctx_sched_inc_refcount_nolock(struct kbase_context *kctx) +{ + bool result = false; + int as_nr; + + if (WARN_ON(kctx == NULL)) + return result; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + as_nr = kctx->as_nr; + if (atomic_read(&kctx->refcount) > 0) { + KBASE_DEBUG_ASSERT(as_nr >= 0); + + kbase_ctx_sched_retain_ctx_refcount(kctx); + KBASE_KTRACE_ADD(kctx->kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx, + kbase_ktrace_get_ctx_refcnt(kctx)); + result = true; + } + + return result; +} + +bool kbase_ctx_sched_inc_refcount(struct kbase_context *kctx) +{ + unsigned long flags; + bool result = false; + + if (WARN_ON(kctx == NULL)) + return result; + + if (WARN_ON(kctx->kbdev == NULL)) + return result; + + mutex_lock(&kctx->kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); + result = kbase_ctx_sched_inc_refcount_nolock(kctx); + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); + mutex_unlock(&kctx->kbdev->mmu_hw_mutex); + + return result; +} + +void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx) +{ + unsigned long flags; + + if (WARN_ON(!kctx)) + return; + + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); + + if (!WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID) && + !WARN_ON(atomic_read(&kctx->refcount) <= 0)) + kbase_ctx_sched_release_ctx(kctx); + + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.h b/drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.h new file mode 100644 index 000000000000..1affa719e6dc --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_ctx_sched.h @@ -0,0 +1,209 @@ +/* + * + * (C) COPYRIGHT 2017-2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_CTX_SCHED_H_ +#define _KBASE_CTX_SCHED_H_ + +#include + +/** + * The Context Scheduler manages address space assignment and reference + * counting to kbase_context. The interface has been designed to minimise + * interactions between the Job Scheduler and Power Management/MMU to support + * the existing Job Scheduler interface. + * + * The initial implementation of the Context Scheduler does not schedule + * contexts. Instead it relies on the Job Scheduler to make decisions of + * when to schedule/evict contexts if address spaces are starved. In the + * future, once an interface between the CS and JS has been devised to + * provide enough information about how each context is consuming GPU resources, + * those decisions can be made in the CS itself, thereby reducing duplicated + * code. + */ + +/** + * kbase_ctx_sched_init - Initialise the context scheduler + * @kbdev: The device for which the context scheduler needs to be initialised + * + * This must be called during device initialisation. The number of hardware + * address spaces must already be established before calling this function. + * + * Return: 0 for success, otherwise failure + */ +int kbase_ctx_sched_init(struct kbase_device *kbdev); + +/** + * kbase_ctx_sched_term - Terminate the context scheduler + * @kbdev: The device for which the context scheduler needs to be terminated + * + * This must be called during device termination after all contexts have been + * destroyed. + */ +void kbase_ctx_sched_term(struct kbase_device *kbdev); + +/** + * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context + * @kctx: The context to which to retain a reference + * + * This function should be called whenever an address space should be assigned + * to a context and programmed onto the MMU. It should typically be called + * when jobs are ready to be submitted to the GPU. + * + * It can be called as many times as necessary. The address space will be + * assigned to the context for as long as there is a reference to said context. + * + * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be + * held whilst calling this function. + * + * Return: The address space that the context has been assigned to or + * KBASEP_AS_NR_INVALID if no address space was available. + */ +int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx); + +/** + * kbase_ctx_sched_retain_ctx_refcount + * @kctx: The context to which to retain a reference + * + * This function only retains a reference to the context. It must be called + * only when the context already has a reference. + * + * This is typically called inside an atomic session where we know the context + * is already scheduled in but want to take an extra reference to ensure that + * it doesn't get descheduled. + * + * The kbase_device::hwaccess_lock must be held whilst calling this function + */ +void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx); + +/** + * kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context + * @kctx: The context from which to release a reference + * + * This function should be called whenever an address space could be unassigned + * from a context. When there are no more references to said context, the + * address space previously assigned to this context shall be reassigned to + * other contexts as needed. + * + * The kbase_device::hwaccess_lock must be held whilst calling this function + */ +void kbase_ctx_sched_release_ctx(struct kbase_context *kctx); + +/** + * kbase_ctx_sched_remove_ctx - Unassign previously assigned address space + * @kctx: The context to be removed + * + * This function should be called when a context is being destroyed. The + * context must no longer have any reference. If it has been assigned an + * address space before then the AS will be unprogrammed. + * + * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be + * held whilst calling this function. + */ +void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx); + +/** + * kbase_ctx_sched_restore_all_as - Reprogram all address spaces + * @kbdev: The device for which address spaces to be reprogrammed + * + * This function shall reprogram all address spaces previously assigned to + * contexts. It can be used after the GPU is reset. + * + * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be + * held whilst calling this function. + */ +void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev); + +/** + * kbase_ctx_sched_as_to_ctx_refcount - Lookup a context based on its current + * address space and ensure that is stays scheduled in + * @kbdev: The device for which the returned context must belong + * @as_nr: address space assigned to the context of interest + * + * The context is refcounted as being busy to prevent it from scheduling + * out. It must be released with kbase_ctx_sched_release_ctx() when it is no + * longer required to stay scheduled in. + * + * This function can safely be called from IRQ context. + * + * The following locking conditions are made on the caller: + * * it must not hold the kbase_device::hwaccess_lock, because it will be used + * internally. + * + * Return: a valid struct kbase_context on success, which has been refcounted + * as being busy or return NULL on failure, indicating that no context was found + * in as_nr. + */ +struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( + struct kbase_device *kbdev, size_t as_nr); + +/** + * kbase_ctx_sched_as_to_ctx - Lookup a context based on its current address + * space + * @kbdev: The device for which the returned context must belong + * @as_nr: address space assigned to the context of interest + * + * Return: a valid struct kbase_context on success or NULL on failure, + * indicating that no context was found in as_nr. + */ +struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, + size_t as_nr); + +/** + * kbase_ctx_sched_inc_refcount_nolock - Refcount a context as being busy, + * preventing it from being scheduled out. + * @kctx: Context to be refcounted + * + * The following locks must be held by the caller: + * * kbase_device::mmu_hw_mutex + * * kbase_device::hwaccess_lock + * + * Return: true if refcount succeeded, and the context will not be scheduled + * out, false if the refcount failed (because the context is being/has been + * scheduled out). + */ +bool kbase_ctx_sched_inc_refcount_nolock(struct kbase_context *kctx); + +/** + * kbase_ctx_sched_inc_refcount - Refcount a context as being busy, preventing + * it from being scheduled out. + * @kctx: Context to be refcounted + * + * The following locking conditions are made on the caller: + * * it must not hold kbase_device::mmu_hw_mutex and + * kbase_device::hwaccess_lock, because they will be used internally. + * + * Return: true if refcount succeeded, and the context will not be scheduled + * out, false if the refcount failed (because the context is being/has been + * scheduled out). + */ +bool kbase_ctx_sched_inc_refcount(struct kbase_context *kctx); + +/** + * kbase_ctx_sched_release_ctx_lock - Release a reference count of a context + * @kctx: Context for which refcount should be decreased + * + * Effectivelly, this is a wrapper for kbase_ctx_sched_release_ctx, but + * kbase_device::hwaccess_lock is required NOT to be locked. + */ +void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx); + +#endif /* _KBASE_CTX_SCHED_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_debug.c b/drivers/gpu/arm/b_r26p0/mali_kbase_debug.c new file mode 100644 index 000000000000..118f787fb74c --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_debug.c @@ -0,0 +1,44 @@ +/* + * + * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#include + +static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = { + NULL, + NULL +}; + +void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param) +{ + kbasep_debug_assert_registered_cb.func = func; + kbasep_debug_assert_registered_cb.param = param; +} + +void kbasep_debug_assert_call_hook(void) +{ + if (kbasep_debug_assert_registered_cb.func != NULL) + kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param); +} +KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook); + diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_debug.h b/drivers/gpu/arm/b_r26p0/mali_kbase_debug.h new file mode 100644 index 000000000000..2fdb72d943e4 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_debug.h @@ -0,0 +1,169 @@ +/* + * + * (C) COPYRIGHT 2012-2015, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#ifndef _KBASE_DEBUG_H +#define _KBASE_DEBUG_H + +#include + +/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */ +#define KBASE_DEBUG_SKIP_TRACE 0 + +/** @brief If different from 0, the trace will only contain the file and line. */ +#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0 + +/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */ +#ifndef KBASE_DEBUG_DISABLE_ASSERTS +#ifdef CONFIG_MALI_DEBUG +#define KBASE_DEBUG_DISABLE_ASSERTS 0 +#else +#define KBASE_DEBUG_DISABLE_ASSERTS 1 +#endif +#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ + +/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */ +typedef void (kbase_debug_assert_hook) (void *); + +struct kbasep_debug_assert_cb { + kbase_debug_assert_hook *func; + void *param; +}; + +/** + * @def KBASEP_DEBUG_PRINT_TRACE + * @brief Private macro containing the format of the trace to display before every message + * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME + */ +#if !KBASE_DEBUG_SKIP_TRACE +#define KBASEP_DEBUG_PRINT_TRACE \ + "In file: " __FILE__ " line: " CSTD_STR2(__LINE__) +#if !KBASE_DEBUG_SKIP_FUNCTION_NAME +#define KBASEP_DEBUG_PRINT_FUNCTION __func__ +#else +#define KBASEP_DEBUG_PRINT_FUNCTION "" +#endif +#else +#define KBASEP_DEBUG_PRINT_TRACE "" +#endif + +/** + * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) + * @brief (Private) system printing function associated to the @ref KBASE_DEBUG_ASSERT_MSG event. + * @param trace location in the code from where the message is printed + * @param function function from where the message is printed + * @param ... Format string followed by format arguments. + * @note function parameter cannot be concatenated with other strings + */ +/* Select the correct system output function*/ +#ifdef CONFIG_MALI_DEBUG +#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\ + do { \ + pr_err("Mali: %s function:%s ", trace, function);\ + pr_err(__VA_ARGS__);\ + pr_err("\n");\ + } while (false) +#else +#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP() +#endif + +#ifdef CONFIG_MALI_DEBUG +#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook() +#else +#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP() +#endif + +/** + * @def KBASE_DEBUG_ASSERT(expr) + * @brief Calls @ref KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false + * + * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1 + * + * @param expr Boolean expression + */ +#define KBASE_DEBUG_ASSERT(expr) \ + KBASE_DEBUG_ASSERT_MSG(expr, #expr) + +#if KBASE_DEBUG_DISABLE_ASSERTS +#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP() +#else + /** + * @def KBASE_DEBUG_ASSERT_MSG(expr, ...) + * @brief Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false + * + * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1 + * + * @param expr Boolean expression + * @param ... Message to display when @a expr is false, as a format string followed by format arguments. + */ +#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \ + do { \ + if (!(expr)) { \ + KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\ + KBASE_CALL_ASSERT_HOOK();\ + BUG();\ + } \ + } while (false) +#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ + +/** + * @def KBASE_DEBUG_CODE( X ) + * @brief Executes the code inside the macro only in debug mode + * + * @param X Code to compile only in debug mode. + */ +#ifdef CONFIG_MALI_DEBUG +#define KBASE_DEBUG_CODE(X) X +#else +#define KBASE_DEBUG_CODE(X) CSTD_NOP() +#endif /* CONFIG_MALI_DEBUG */ + +/** @} */ + +/** + * @brief Register a function to call on ASSERT + * + * Such functions will \b only be called during Debug mode, and for debugging + * features \b only. Do not rely on them to be called in general use. + * + * To disable the hook, supply NULL to \a func. + * + * @note This function is not thread-safe, and should only be used to + * register/deregister once in the module's lifetime. + * + * @param[in] func the function to call when an assert is triggered. + * @param[in] param the parameter to pass to \a func when calling it + */ +void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param); + +/** + * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook() + * + * @note This function is not thread-safe with respect to multiple threads + * registering functions and parameters with + * kbase_debug_assert_register_hook(). Otherwise, thread safety is the + * responsibility of the registered hook. + */ +void kbasep_debug_assert_call_hook(void); + +#endif /* _KBASE_DEBUG_H */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/b_r26p0/mali_kbase_debug_job_fault.c new file mode 100644 index 000000000000..dbc774d56ab4 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_debug_job_fault.c @@ -0,0 +1,566 @@ +/* + * + * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include + +#ifdef CONFIG_DEBUG_FS + +static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev) +{ + struct list_head *event_list = &kbdev->job_fault_event_list; + unsigned long flags; + bool ret; + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + ret = !list_empty(event_list); + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + + return ret; +} + +static void kbase_ctx_remove_pending_event(struct kbase_context *kctx) +{ + struct list_head *event_list = &kctx->kbdev->job_fault_event_list; + struct base_job_fault_event *event; + unsigned long flags; + + spin_lock_irqsave(&kctx->kbdev->job_fault_event_lock, flags); + list_for_each_entry(event, event_list, head) { + if (event->katom->kctx == kctx) { + list_del(&event->head); + spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags); + + wake_up(&kctx->kbdev->job_fault_resume_wq); + flush_work(&event->job_fault_work); + + /* job_fault_event_list can only have a single atom for + * each context. + */ + return; + } + } + spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags); +} + +static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + struct list_head *event_list = &kctx->kbdev->job_fault_event_list; + struct base_job_fault_event *event; + unsigned long flags; + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + if (list_empty(event_list)) { + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + return true; + } + list_for_each_entry(event, event_list, head) { + if (event->katom->kctx == kctx) { + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, + flags); + return false; + } + } + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + return true; +} + +static int wait_for_job_fault(struct kbase_device *kbdev) +{ +#if KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE + int ret = wait_event_interruptible_timeout(kbdev->job_fault_wq, + kbase_is_job_fault_event_pending(kbdev), + msecs_to_jiffies(2000)); + if (ret == 0) + return -EAGAIN; + else if (ret > 0) + return 0; + else + return ret; +#else + return wait_event_interruptible(kbdev->job_fault_wq, + kbase_is_job_fault_event_pending(kbdev)); +#endif +} + +/* wait until the fault happen and copy the event */ +static int kbase_job_fault_event_wait(struct kbase_device *kbdev, + struct base_job_fault_event *event) +{ + struct list_head *event_list = &kbdev->job_fault_event_list; + struct base_job_fault_event *event_in; + unsigned long flags; + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + while (list_empty(event_list)) { + int err; + + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + + err = wait_for_job_fault(kbdev); + if (err) + return err; + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + } + + event_in = list_entry(event_list->next, + struct base_job_fault_event, head); + event->event_code = event_in->event_code; + event->katom = event_in->katom; + + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + + return 0; + +} + +/* remove the event from the queue */ +static struct base_job_fault_event *kbase_job_fault_event_dequeue( + struct kbase_device *kbdev, struct list_head *event_list) +{ + struct base_job_fault_event *event; + + event = list_entry(event_list->next, + struct base_job_fault_event, head); + list_del(event_list->next); + + return event; + +} + +/* Remove all the following atoms after the failed atom in the same context + * Call the postponed bottom half of job done. + * Then, this context could be rescheduled. + */ +static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx) +{ + struct list_head *event_list = &kctx->job_fault_resume_event_list; + + while (!list_empty(event_list)) { + struct base_job_fault_event *event; + + event = kbase_job_fault_event_dequeue(kctx->kbdev, + &kctx->job_fault_resume_event_list); + kbase_jd_done_worker(&event->katom->work); + } + +} + +static void kbase_job_fault_resume_worker(struct work_struct *data) +{ + struct base_job_fault_event *event = container_of(data, + struct base_job_fault_event, job_fault_work); + struct kbase_context *kctx; + struct kbase_jd_atom *katom; + + katom = event->katom; + kctx = katom->kctx; + + dev_info(kctx->kbdev->dev, "Job dumping wait\n"); + + /* When it was waked up, it need to check if queue is empty or the + * failed atom belongs to different context. If yes, wake up. Both + * of them mean the failed job has been dumped. Please note, it + * should never happen that the job_fault_event_list has the two + * atoms belong to the same context. + */ + wait_event(kctx->kbdev->job_fault_resume_wq, + kbase_ctx_has_no_event_pending(kctx)); + + atomic_set(&kctx->job_fault_count, 0); + kbase_jd_done_worker(&katom->work); + + /* In case the following atoms were scheduled during failed job dump + * the job_done_worker was held. We need to rerun it after the dump + * was finished + */ + kbase_job_fault_resume_event_cleanup(kctx); + + dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n"); +} + +static struct base_job_fault_event *kbase_job_fault_event_queue( + struct list_head *event_list, + struct kbase_jd_atom *atom, + u32 completion_code) +{ + struct base_job_fault_event *event; + + event = &atom->fault_event; + + event->katom = atom; + event->event_code = completion_code; + + list_add_tail(&event->head, event_list); + + return event; + +} + +static void kbase_job_fault_event_post(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, u32 completion_code) +{ + struct base_job_fault_event *event; + unsigned long flags; + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list, + katom, completion_code); + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + + wake_up_interruptible(&kbdev->job_fault_wq); + + INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker); + queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work); + + dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d", + katom->kctx->tgid, katom->kctx->id); + +} + +/* + * This function will process the job fault + * Get the register copy + * Send the failed job dump event + * Create a Wait queue to wait until the job dump finish + */ + +bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, + u32 completion_code) +{ + struct kbase_context *kctx = katom->kctx; + + /* Check if dumping is in the process + * only one atom of each context can be dumped at the same time + * If the atom belongs to different context, it can be dumped + */ + if (atomic_read(&kctx->job_fault_count) > 0) { + kbase_job_fault_event_queue( + &kctx->job_fault_resume_event_list, + katom, completion_code); + dev_info(kctx->kbdev->dev, "queue:%d\n", + kbase_jd_atom_id(kctx, katom)); + return true; + } + + if (kbase_ctx_flag(kctx, KCTX_DYING)) + return false; + + if (atomic_read(&kctx->kbdev->job_fault_debug) > 0) { + + if (completion_code != BASE_JD_EVENT_DONE) { + + if (kbase_job_fault_get_reg_snapshot(kctx) == false) { + dev_warn(kctx->kbdev->dev, "get reg dump failed\n"); + return false; + } + + kbase_job_fault_event_post(kctx->kbdev, katom, + completion_code); + atomic_inc(&kctx->job_fault_count); + dev_info(kctx->kbdev->dev, "post:%d\n", + kbase_jd_atom_id(kctx, katom)); + return true; + + } + } + return false; + +} + +static int debug_job_fault_show(struct seq_file *m, void *v) +{ + struct kbase_device *kbdev = m->private; + struct base_job_fault_event *event = (struct base_job_fault_event *)v; + struct kbase_context *kctx = event->katom->kctx; + int i; + + dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d", + kctx->tgid, kctx->id, event->reg_offset); + + if (kctx->reg_dump == NULL) { + dev_warn(kbdev->dev, "reg dump is NULL"); + return -1; + } + + if (kctx->reg_dump[event->reg_offset] == + REGISTER_DUMP_TERMINATION_FLAG) { + /* Return the error here to stop the read. And the + * following next() will not be called. The stop can + * get the real event resource and release it + */ + return -1; + } + + if (event->reg_offset == 0) + seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id); + + for (i = 0; i < 50; i++) { + if (kctx->reg_dump[event->reg_offset] == + REGISTER_DUMP_TERMINATION_FLAG) { + break; + } + seq_printf(m, "%08x: %08x\n", + kctx->reg_dump[event->reg_offset], + kctx->reg_dump[1+event->reg_offset]); + event->reg_offset += 2; + + } + + + return 0; +} +static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct kbase_device *kbdev = m->private; + struct base_job_fault_event *event = (struct base_job_fault_event *)v; + + dev_info(kbdev->dev, "debug job fault seq next:%d, %d", + event->reg_offset, (int)*pos); + + return event; +} + +static void *debug_job_fault_start(struct seq_file *m, loff_t *pos) +{ + struct kbase_device *kbdev = m->private; + struct base_job_fault_event *event; + + dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos); + + /* The condition is trick here. It needs make sure the + * fault hasn't happened and the dumping hasn't been started, + * or the dumping has finished + */ + if (*pos == 0) { + event = kmalloc(sizeof(*event), GFP_KERNEL); + if (!event) + return NULL; + event->reg_offset = 0; + if (kbase_job_fault_event_wait(kbdev, event)) { + kfree(event); + return NULL; + } + + /* The cache flush workaround is called in bottom half of + * job done but we delayed it. Now we should clean cache + * earlier. Then the GPU memory dump should be correct. + */ + kbase_backend_cache_clean(kbdev, event->katom); + } else + return NULL; + + return event; +} + +static void debug_job_fault_stop(struct seq_file *m, void *v) +{ + struct kbase_device *kbdev = m->private; + + /* here we wake up the kbase_jd_done_worker after stop, it needs + * get the memory dump before the register dump in debug daemon, + * otherwise, the memory dump may be incorrect. + */ + + if (v != NULL) { + kfree(v); + dev_info(kbdev->dev, "debug job fault seq stop stage 1"); + + } else { + unsigned long flags; + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + if (!list_empty(&kbdev->job_fault_event_list)) { + kbase_job_fault_event_dequeue(kbdev, + &kbdev->job_fault_event_list); + wake_up(&kbdev->job_fault_resume_wq); + } + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + dev_info(kbdev->dev, "debug job fault seq stop stage 2"); + } + +} + +static const struct seq_operations ops = { + .start = debug_job_fault_start, + .next = debug_job_fault_next, + .stop = debug_job_fault_stop, + .show = debug_job_fault_show, +}; + +static int debug_job_fault_open(struct inode *in, struct file *file) +{ + struct kbase_device *kbdev = in->i_private; + + if (atomic_cmpxchg(&kbdev->job_fault_debug, 0, 1) == 1) { + dev_warn(kbdev->dev, "debug job fault is busy, only a single client is allowed"); + return -EBUSY; + } + + seq_open(file, &ops); + + ((struct seq_file *)file->private_data)->private = kbdev; + dev_info(kbdev->dev, "debug job fault seq open"); + + + return 0; + +} + +static int debug_job_fault_release(struct inode *in, struct file *file) +{ + struct kbase_device *kbdev = in->i_private; + struct list_head *event_list = &kbdev->job_fault_event_list; + unsigned long flags; + + seq_release(in, file); + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + + /* Disable job fault dumping. This will let kbase run jobs as normal, + * without blocking waiting for a job_fault client to read failed jobs. + * + * After this a new client may open the file, and may re-enable job + * fault dumping, but the job_fault_event_lock we hold here will block + * that from interfering until after we've completed the cleanup. + */ + atomic_dec(&kbdev->job_fault_debug); + + /* Clean the unprocessed job fault. After that, all the suspended + * contexts could be rescheduled. Remove all the failed atoms that + * belong to different contexts Resume all the contexts that were + * suspend due to failed job. + */ + while (!list_empty(event_list)) { + kbase_job_fault_event_dequeue(kbdev, event_list); + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + wake_up(&kbdev->job_fault_resume_wq); + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + } + + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + + dev_info(kbdev->dev, "debug job fault seq close"); + + return 0; +} + +static const struct file_operations kbasep_debug_job_fault_fops = { + .owner = THIS_MODULE, + .open = debug_job_fault_open, + .read = seq_read, + .llseek = seq_lseek, + .release = debug_job_fault_release, +}; + +/* + * Initialize debugfs entry for job fault dump + */ +void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_file("job_fault", 0400, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_debug_job_fault_fops); +} + + +int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) +{ + + INIT_LIST_HEAD(&kbdev->job_fault_event_list); + + init_waitqueue_head(&(kbdev->job_fault_wq)); + init_waitqueue_head(&(kbdev->job_fault_resume_wq)); + spin_lock_init(&kbdev->job_fault_event_lock); + + kbdev->job_fault_resume_workq = alloc_workqueue( + "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1); + if (!kbdev->job_fault_resume_workq) + return -ENOMEM; + + atomic_set(&kbdev->job_fault_debug, 0); + + return 0; +} + +/* + * Release the relevant resource per device + */ +void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) +{ + destroy_workqueue(kbdev->job_fault_resume_workq); +} + + +/* + * Initialize the relevant data structure per context + */ +void kbase_debug_job_fault_context_init(struct kbase_context *kctx) +{ + + /* We need allocate double size register range + * Because this memory will keep the register address and value + */ + kctx->reg_dump = vmalloc(0x4000 * 2); + if (kctx->reg_dump == NULL) + return; + + if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) { + vfree(kctx->reg_dump); + kctx->reg_dump = NULL; + } + INIT_LIST_HEAD(&kctx->job_fault_resume_event_list); + atomic_set(&kctx->job_fault_count, 0); + +} + +/* + * release the relevant resource per context + */ +void kbase_debug_job_fault_context_term(struct kbase_context *kctx) +{ + vfree(kctx->reg_dump); +} + +void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx) +{ + WARN_ON(!kbase_ctx_flag(kctx, KCTX_DYING)); + + kbase_ctx_remove_pending_event(kctx); +} + +#else /* CONFIG_DEBUG_FS */ + +int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) +{ + return 0; +} + +void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) +{ +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/b_r26p0/mali_kbase_debug_job_fault.h new file mode 100644 index 000000000000..ef69627cdce8 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_debug_job_fault.h @@ -0,0 +1,116 @@ +/* + * + * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_JOB_FAULT_H +#define _KBASE_DEBUG_JOB_FAULT_H + +#include +#include + +#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF + +/** + * kbase_debug_job_fault_dev_init - Create the fault event wait queue + * per device and initialize the required lists. + * @kbdev: Device pointer + * + * Return: Zero on success or a negative error code. + */ +int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev); + +/** + * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs + * @kbdev: Device pointer + */ +void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev); + +/** + * kbase_debug_job_fault_dev_term - Clean up resources created in + * kbase_debug_job_fault_dev_init. + * @kbdev: Device pointer + */ +void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev); + +/** + * kbase_debug_job_fault_context_init - Initialize the relevant + * data structure per context + * @kctx: KBase context pointer + */ +void kbase_debug_job_fault_context_init(struct kbase_context *kctx); + +/** + * kbase_debug_job_fault_context_term - Release the relevant + * resource per context + * @kctx: KBase context pointer + */ +void kbase_debug_job_fault_context_term(struct kbase_context *kctx); + +/** + * kbase_debug_job_fault_kctx_unblock - Unblock the atoms blocked on job fault + * dumping on context termination. + * + * This function is called during context termination to unblock the atom for + * which the job fault occurred and also the atoms following it. This is needed + * otherwise the wait for zero jobs could timeout (leading to an assertion + * failure, kernel panic in debug builds) in the pathological case where + * although the thread/daemon capturing the job fault events is running, + * but for some reasons has stopped consuming the events. + * + * @kctx: KBase context pointer + */ +void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx); + +/** + * kbase_debug_job_fault_process - Process the failed job. + * It will send a event and wake up the job fault waiting queue + * Then create a work queue to wait for job dump finish + * This function should be called in the interrupt handler and before + * jd_done that make sure the jd_done_worker will be delayed until the + * job dump finish + * @katom: The failed atom pointer + * @completion_code: the job status + * @return true if dump is going on + */ +bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, + u32 completion_code); + + +/** + * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers + * address during the job fault process, the relevant registers will + * be saved when a job fault happen + * @kctx: KBase context pointer + * @reg_range: Maximum register address space + * @return true if initializing successfully + */ +bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, + int reg_range); + +/** + * kbase_job_fault_get_reg_snapshot - Read the interested registers for + * failed job dump + * @kctx: KBase context pointer + * @return true if getting registers successfully + */ +bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx); + +#endif /*_KBASE_DEBUG_JOB_FAULT_H*/ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/b_r26p0/mali_kbase_debug_mem_view.c new file mode 100644 index 000000000000..478813705a41 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_debug_mem_view.c @@ -0,0 +1,313 @@ +/* + * + * (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Debugfs interface to dump the memory visible to the GPU + */ + +#include "mali_kbase_debug_mem_view.h" +#include "mali_kbase.h" + +#include +#include + +#ifdef CONFIG_DEBUG_FS + +#if (KERNEL_VERSION(4, 1, 0) > LINUX_VERSION_CODE) +#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count) +#endif + +struct debug_mem_mapping { + struct list_head node; + + struct kbase_mem_phy_alloc *alloc; + unsigned long flags; + + u64 start_pfn; + size_t nr_pages; +}; + +struct debug_mem_data { + struct list_head mapping_list; + struct kbase_context *kctx; +}; + +struct debug_mem_seq_off { + struct list_head *lh; + size_t offset; +}; + +static void *debug_mem_start(struct seq_file *m, loff_t *_pos) +{ + struct debug_mem_data *mem_data = m->private; + struct debug_mem_seq_off *data; + struct debug_mem_mapping *map; + loff_t pos = *_pos; + + list_for_each_entry(map, &mem_data->mapping_list, node) { + if (pos >= map->nr_pages) { + pos -= map->nr_pages; + } else { + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + data->lh = &map->node; + data->offset = pos; + return data; + } + } + + /* Beyond the end */ + return NULL; +} + +static void debug_mem_stop(struct seq_file *m, void *v) +{ + kfree(v); +} + +static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct debug_mem_data *mem_data = m->private; + struct debug_mem_seq_off *data = v; + struct debug_mem_mapping *map; + + map = list_entry(data->lh, struct debug_mem_mapping, node); + + if (data->offset < map->nr_pages - 1) { + data->offset++; + ++*pos; + return data; + } + + if (list_is_last(data->lh, &mem_data->mapping_list)) { + kfree(data); + return NULL; + } + + data->lh = data->lh->next; + data->offset = 0; + ++*pos; + + return data; +} + +static int debug_mem_show(struct seq_file *m, void *v) +{ + struct debug_mem_data *mem_data = m->private; + struct debug_mem_seq_off *data = v; + struct debug_mem_mapping *map; + int i, j; + struct page *page; + uint32_t *mapping; + pgprot_t prot = PAGE_KERNEL; + + map = list_entry(data->lh, struct debug_mem_mapping, node); + + kbase_gpu_vm_lock(mem_data->kctx); + + if (data->offset >= map->alloc->nents) { + seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn + + data->offset) << PAGE_SHIFT); + goto out; + } + + if (!(map->flags & KBASE_REG_CPU_CACHED)) + prot = pgprot_writecombine(prot); + + page = as_page(map->alloc->pages[data->offset]); + mapping = vmap(&page, 1, VM_MAP, prot); + if (!mapping) + goto out; + + for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) { + seq_printf(m, "%016llx:", i + ((map->start_pfn + + data->offset) << PAGE_SHIFT)); + + for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping)) + seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]); + seq_putc(m, '\n'); + } + + vunmap(mapping); + + seq_putc(m, '\n'); + +out: + kbase_gpu_vm_unlock(mem_data->kctx); + return 0; +} + +static const struct seq_operations ops = { + .start = debug_mem_start, + .next = debug_mem_next, + .stop = debug_mem_stop, + .show = debug_mem_show, +}; + +static int debug_mem_zone_open(struct rb_root *rbtree, + struct debug_mem_data *mem_data) +{ + int ret = 0; + struct rb_node *p; + struct kbase_va_region *reg; + struct debug_mem_mapping *mapping; + + for (p = rb_first(rbtree); p; p = rb_next(p)) { + reg = rb_entry(p, struct kbase_va_region, rblink); + + if (reg->gpu_alloc == NULL) + /* Empty region - ignore */ + continue; + + mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); + if (!mapping) { + ret = -ENOMEM; + goto out; + } + + mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + mapping->start_pfn = reg->start_pfn; + mapping->nr_pages = reg->nr_pages; + mapping->flags = reg->flags; + list_add_tail(&mapping->node, &mem_data->mapping_list); + } + +out: + return ret; +} + +static int debug_mem_open(struct inode *i, struct file *file) +{ + struct kbase_context *const kctx = i->i_private; + struct debug_mem_data *mem_data; + int ret; + + if (get_file_rcu(kctx->filp) == 0) + return -ENOENT; + + ret = seq_open(file, &ops); + if (ret) + goto open_fail; + + mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL); + if (!mem_data) { + ret = -ENOMEM; + goto out; + } + + mem_data->kctx = kctx; + + INIT_LIST_HEAD(&mem_data->mapping_list); + + kbase_gpu_vm_lock(kctx); + + ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data); + if (0 != ret) { + kbase_gpu_vm_unlock(kctx); + goto out; + } + + ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data); + if (0 != ret) { + kbase_gpu_vm_unlock(kctx); + goto out; + } + + ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data); + if (0 != ret) { + kbase_gpu_vm_unlock(kctx); + goto out; + } + + kbase_gpu_vm_unlock(kctx); + + ((struct seq_file *)file->private_data)->private = mem_data; + + return 0; + +out: + if (mem_data) { + while (!list_empty(&mem_data->mapping_list)) { + struct debug_mem_mapping *mapping; + + mapping = list_first_entry(&mem_data->mapping_list, + struct debug_mem_mapping, node); + kbase_mem_phy_alloc_put(mapping->alloc); + list_del(&mapping->node); + kfree(mapping); + } + kfree(mem_data); + } + seq_release(i, file); +open_fail: + fput(kctx->filp); + + return ret; +} + +static int debug_mem_release(struct inode *inode, struct file *file) +{ + struct kbase_context *const kctx = inode->i_private; + struct seq_file *sfile = file->private_data; + struct debug_mem_data *mem_data = sfile->private; + struct debug_mem_mapping *mapping; + + seq_release(inode, file); + + while (!list_empty(&mem_data->mapping_list)) { + mapping = list_first_entry(&mem_data->mapping_list, + struct debug_mem_mapping, node); + kbase_mem_phy_alloc_put(mapping->alloc); + list_del(&mapping->node); + kfree(mapping); + } + + kfree(mem_data); + + fput(kctx->filp); + + return 0; +} + +static const struct file_operations kbase_debug_mem_view_fops = { + .owner = THIS_MODULE, + .open = debug_mem_open, + .release = debug_mem_release, + .read = seq_read, + .llseek = seq_lseek +}; + +void kbase_debug_mem_view_init(struct kbase_context *const kctx) +{ + /* Caller already ensures this, but we keep the pattern for + * maintenance safety. + */ + if (WARN_ON(!kctx) || + WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + + debugfs_create_file("mem_view", 0400, kctx->kctx_dentry, kctx, + &kbase_debug_mem_view_fops); +} + +#endif diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_debug_mem_view.h b/drivers/gpu/arm/b_r26p0/mali_kbase_debug_mem_view.h new file mode 100644 index 000000000000..b948b7cd9dd4 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_debug_mem_view.h @@ -0,0 +1,40 @@ +/* + * + * (C) COPYRIGHT 2013-2015, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_MEM_VIEW_H +#define _KBASE_DEBUG_MEM_VIEW_H + +#include + +/** + * kbase_debug_mem_view_init - Initialize the mem_view sysfs file + * @kctx: Pointer to kernel base context + * + * This function creates a "mem_view" file which can be used to get a view of + * the context's memory as the GPU sees it (i.e. using the GPU's page tables). + * + * The file is cleaned up by a call to debugfs_remove_recursive() deleting the + * parent directory. + */ +void kbase_debug_mem_view_init(struct kbase_context *kctx); + +#endif diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_debugfs_helper.c b/drivers/gpu/arm/b_r26p0/mali_kbase_debugfs_helper.c new file mode 100644 index 000000000000..37e507b164c5 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_debugfs_helper.c @@ -0,0 +1,183 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include + +#include "mali_kbase_debugfs_helper.h" + +/* Arbitrary maximum size to prevent user space allocating too much kernel + * memory + */ +#define DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE (256u) + +/** + * set_attr_from_string - Parse a string to set elements of an array + * + * This is the core of the implementation of + * kbase_debugfs_helper_set_attr_from_string. The only difference between the + * two functions is that this one requires the input string to be writable. + * + * @buf: Input string to parse. Must be nul-terminated! + * @array: Address of an object that can be accessed like an array. + * @nelems: Number of elements in the array. + * @set_attr_fn: Function to be called back for each array element. + * + * Return: 0 if success, negative error code otherwise. + */ +static int set_attr_from_string( + char *const buf, + void *const array, size_t const nelems, + kbase_debugfs_helper_set_attr_fn const set_attr_fn) +{ + size_t index, err = 0; + char *ptr = buf; + + for (index = 0; index < nelems && *ptr; ++index) { + unsigned long new_size; + size_t len; + char sep; + + /* Drop leading spaces */ + while (*ptr == ' ') + ptr++; + + len = strcspn(ptr, "\n "); + if (len == 0) { + /* No more values (allow this) */ + break; + } + + /* Substitute a nul terminator for a space character + * to make the substring valid for kstrtoul. + */ + sep = ptr[len]; + if (sep == ' ') + ptr[len++] = '\0'; + + err = kstrtoul(ptr, 0, &new_size); + if (err) + break; + + /* Skip the substring (including any premature nul terminator) + */ + ptr += len; + + set_attr_fn(array, index, new_size); + } + + return err; +} + +int kbase_debugfs_helper_set_attr_from_string( + const char *const buf, void *const array, size_t const nelems, + kbase_debugfs_helper_set_attr_fn const set_attr_fn) +{ + char *const wbuf = kstrdup(buf, GFP_KERNEL); + int err = 0; + + if (!wbuf) + return -ENOMEM; + + err = set_attr_from_string(wbuf, array, nelems, + set_attr_fn); + + kfree(wbuf); + return err; +} + +ssize_t kbase_debugfs_helper_get_attr_to_string( + char *const buf, size_t const size, + void *const array, size_t const nelems, + kbase_debugfs_helper_get_attr_fn const get_attr_fn) +{ + ssize_t total = 0; + size_t index; + + for (index = 0; index < nelems; ++index) { + const char *postfix = " "; + + if (index == (nelems-1)) + postfix = "\n"; + + total += scnprintf(buf + total, size - total, "%zu%s", + get_attr_fn(array, index), postfix); + } + + return total; +} + +int kbase_debugfs_helper_seq_write(struct file *const file, + const char __user *const ubuf, size_t const count, + size_t const nelems, + kbase_debugfs_helper_set_attr_fn const set_attr_fn) +{ + const struct seq_file *const sfile = file->private_data; + void *const array = sfile->private; + int err = 0; + char *buf; + + if (WARN_ON(!array)) + return -EINVAL; + + if (WARN_ON(count > DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE)) + return -EINVAL; + + buf = kmalloc(count + 1, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + + if (copy_from_user(buf, ubuf, count)) { + kfree(buf); + return -EFAULT; + } + + buf[count] = '\0'; + err = set_attr_from_string(buf, + array, nelems, set_attr_fn); + kfree(buf); + + return err; +} + +int kbase_debugfs_helper_seq_read(struct seq_file *const sfile, + size_t const nelems, + kbase_debugfs_helper_get_attr_fn const get_attr_fn) +{ + void *const array = sfile->private; + size_t index; + + if (WARN_ON(!array)) + return -EINVAL; + + for (index = 0; index < nelems; ++index) { + const char *postfix = " "; + + if (index == (nelems-1)) + postfix = "\n"; + + seq_printf(sfile, "%zu%s", get_attr_fn(array, index), postfix); + } + return 0; +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_debugfs_helper.h b/drivers/gpu/arm/b_r26p0/mali_kbase_debugfs_helper.h new file mode 100644 index 000000000000..c3c9efa14e65 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_debugfs_helper.h @@ -0,0 +1,141 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUGFS_HELPER_H_ +#define _KBASE_DEBUGFS_HELPER_H_ + +/** + * typedef kbase_debugfs_helper_set_attr_fn - Type of function to set an + * attribute value from an array + * + * @array: Address of an object that can be accessed like an array. + * @index: An element index. The valid range depends on the use-case. + * @value: Attribute value to be set. + */ +typedef void (*kbase_debugfs_helper_set_attr_fn)( + void *array, size_t index, size_t value); + +/** + * kbase_debugfs_helper_set_attr_from_string - Parse a string to reconfigure an + * array + * + * The given function is called once for each attribute value found in the + * input string. It is not an error if the string specifies fewer attribute + * values than the specified number of array elements. + * + * The number base of each attribute value is detected automatically + * according to the standard rules (e.g. prefix "0x" for hexadecimal). + * Attribute values are separated by one or more space characters. + * Additional leading and trailing spaces are ignored. + * + * @buf: Input string to parse. Must be nul-terminated! + * @array: Address of an object that can be accessed like an array. + * @nelems: Number of elements in the array. + * @set_attr_fn: Function to be called back for each array element. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_debugfs_helper_set_attr_from_string( + const char *buf, void *array, size_t nelems, + kbase_debugfs_helper_set_attr_fn set_attr_fn); + +/** + * typedef kbase_debugfs_helper_get_attr_fn - Type of function to get an + * attribute value from an array + * + * @array: Address of an object that can be accessed like an array. + * @index: An element index. The valid range depends on the use-case. + * + * Return: Value of attribute. + */ +typedef size_t (*kbase_debugfs_helper_get_attr_fn)( + void *array, size_t index); + +/** + * kbase_debugfs_helper_get_attr_to_string - Construct a formatted string + * from elements in an array + * + * The given function is called once for each array element to get the + * value of the attribute to be inspected. The attribute values are + * written to the buffer as a formatted string of decimal numbers + * separated by spaces and terminated by a linefeed. + * + * @buf: Buffer in which to store the formatted output string. + * @size: The size of the buffer, in bytes. + * @array: Address of an object that can be accessed like an array. + * @nelems: Number of elements in the array. + * @get_attr_fn: Function to be called back for each array element. + * + * Return: Number of characters written excluding the nul terminator. + */ +ssize_t kbase_debugfs_helper_get_attr_to_string( + char *buf, size_t size, void *array, size_t nelems, + kbase_debugfs_helper_get_attr_fn get_attr_fn); + +/** + * kbase_debugfs_helper_seq_read - Implements reads from a virtual file for an + * array + * + * The virtual file must have been opened by calling single_open and passing + * the address of an object that can be accessed like an array. + * + * The given function is called once for each array element to get the + * value of the attribute to be inspected. The attribute values are + * written to the buffer as a formatted string of decimal numbers + * separated by spaces and terminated by a linefeed. + * + * @sfile: A virtual file previously opened by calling single_open. + * @nelems: Number of elements in the array. + * @get_attr_fn: Function to be called back for each array element. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_debugfs_helper_seq_read( + struct seq_file *const sfile, size_t const nelems, + kbase_debugfs_helper_get_attr_fn const get_attr_fn); + +/** + * kbase_debugfs_helper_seq_write - Implements writes to a virtual file for an + * array + * + * The virtual file must have been opened by calling single_open and passing + * the address of an object that can be accessed like an array. + * + * The given function is called once for each attribute value found in the + * data written to the virtual file. For further details, refer to the + * description of set_attr_from_string. + * + * @file: A virtual file previously opened by calling single_open. + * @ubuf: Source address in user space. + * @count: Number of bytes written to the virtual file. + * @nelems: Number of elements in the array. + * @set_attr_fn: Function to be called back for each array element. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_debugfs_helper_seq_write(struct file *const file, + const char __user *const ubuf, size_t const count, + size_t const nelems, + kbase_debugfs_helper_set_attr_fn const set_attr_fn); + +#endif /*_KBASE_DEBUGFS_HELPER_H_ */ + diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_defs.h b/drivers/gpu/arm/b_r26p0/mali_kbase_defs.h new file mode 100644 index 000000000000..3edeb9069778 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_defs.h @@ -0,0 +1,1844 @@ +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_defs.h + * + * Defintions (types, defines, etcs) common to Kbase. They are placed here to + * allow the hierarchy of header files to work. + */ + +#ifndef _KBASE_DEFS_H_ +#define _KBASE_DEFS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef CONFIG_MALI_BUSLOG +#include +#endif + +#if defined(CONFIG_SYNC) +#include +#else +#include "mali_kbase_fence_defs.h" +#endif + +#ifdef CONFIG_DEBUG_FS +#include +#endif /* CONFIG_DEBUG_FS */ + +#ifdef CONFIG_MALI_DEVFREQ +#include +#endif /* CONFIG_MALI_DEVFREQ */ + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + +#include +#include +#include + +/* MALI_SEC_INTEGRATION */ +#include + +#if defined(CONFIG_PM_RUNTIME) || \ + (defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) +#define KBASE_PM_RUNTIME 1 +#endif + +#include "debug/mali_kbase_debug_ktrace_defs.h" + +/** Number of milliseconds before we time out on a GPU soft/hard reset */ +#define RESET_TIMEOUT 500 + +/** + * The maximum number of Job Slots to support in the Hardware. + * + * You can optimize this down if your target devices will only ever support a + * small number of job slots. + */ +#define BASE_JM_MAX_NR_SLOTS 3 + +/** + * The maximum number of Address Spaces to support in the Hardware. + * + * You can optimize this down if your target devices will only ever support a + * small number of Address Spaces + */ +#define BASE_MAX_NR_AS 16 + +/* mmu */ +#define MIDGARD_MMU_LEVEL(x) (x) + +#define MIDGARD_MMU_TOPLEVEL MIDGARD_MMU_LEVEL(0) + +#define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3) + +#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR) + +/** setting in kbase_context::as_nr that indicates it's invalid */ +#define KBASEP_AS_NR_INVALID (-1) + +/** + * Maximum size in bytes of a MMU lock region, as a logarithm + */ +#define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (64) + +/** + * Minimum size in bytes of a MMU lock region, as a logarithm + */ +#define KBASE_LOCK_REGION_MIN_SIZE_LOG2 (15) + +#include "mali_kbase_hwaccess_defs.h" + +/* Maximum number of pages of memory that require a permanent mapping, per + * kbase_context + */ +#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((32 * 1024ul * 1024ul) >> \ + PAGE_SHIFT) +/* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer + * clients, to reduce undesired system load. + * If a virtualizer client requests a dump within this threshold period after + * some other client has performed a dump, a new dump won't be performed and + * the accumulated counter values for that client will be returned instead. + */ +#define KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS (200 * NSEC_PER_USEC) + +/* Maximum number of clock/regulator pairs that may be referenced by + * the device node. + * This is dependent on support for of_property_read_u64_array() in the + * kernel. + */ +#if (KERNEL_VERSION(4, 0, 0) <= LINUX_VERSION_CODE) || \ + defined(LSK_OPPV2_BACKPORT) +#define BASE_MAX_NR_CLOCKS_REGULATORS (2) +#else +#define BASE_MAX_NR_CLOCKS_REGULATORS (1) +#endif + +/* Forward declarations */ +struct kbase_context; +struct kbase_device; +struct kbase_as; +struct kbase_mmu_setup; +struct kbase_ipa_model_vinstr_data; +struct kbase_kinstr_jm; + +/** + * struct kbase_io_access - holds information about 1 register access + * + * @addr: first bit indicates r/w (r=0, w=1) + * @value: value written or read + */ +struct kbase_io_access { + uintptr_t addr; + u32 value; +}; + +/** + * struct kbase_io_history - keeps track of all recent register accesses + * + * @enabled: true if register accesses are recorded, false otherwise + * @lock: spinlock protecting kbase_io_access array + * @count: number of registers read/written + * @size: number of elements in kbase_io_access array + * @buf: array of kbase_io_access + */ +struct kbase_io_history { +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + bool enabled; +#else + u32 enabled; +#endif + + spinlock_t lock; + size_t count; + u16 size; + struct kbase_io_access *buf; +}; + +/** + * struct kbase_debug_copy_buffer - information about the buffer to be copied. + * + * @size: size of the buffer in bytes + * @pages: pointer to an array of pointers to the pages which contain + * the buffer + * @is_vmalloc: true if @pages was allocated with vzalloc. false if @pages was + * allocated with kcalloc + * @nr_pages: number of pages + * @offset: offset into the pages + * @gpu_alloc: pointer to physical memory allocated by the GPU + * @extres_pages: array of pointers to the pages containing external resources + * for this buffer + * @nr_extres_pages: number of pages in @extres_pages + */ +struct kbase_debug_copy_buffer { + size_t size; + struct page **pages; + bool is_vmalloc; + int nr_pages; + size_t offset; + struct kbase_mem_phy_alloc *gpu_alloc; + + struct page **extres_pages; + int nr_extres_pages; +}; + +struct kbase_device_info { + u32 features; +}; + +struct kbase_mmu_setup { + u64 transtab; + u64 memattr; + u64 transcfg; +}; + +/** + * struct kbase_fault - object containing data relating to a page or bus fault. + * @addr: Records the faulting address. + * @extra_addr: Records the secondary fault address. + * @status: Records the fault status as reported by Hw. + * @protected_mode: Flag indicating whether the fault occurred in protected mode + * or not. + */ +struct kbase_fault { + u64 addr; + u64 extra_addr; + u32 status; + bool protected_mode; +}; + +/** + * struct kbase_as - object representing an address space of GPU. + * @number: Index at which this address space structure is present + * in an array of address space structures embedded inside the + * struct kbase_device. + * @pf_wq: Workqueue for processing work items related to Bus fault + * and Page fault handling. + * @work_pagefault: Work item for the Page fault handling. + * @work_busfault: Work item for the Bus fault handling. + * @pf_data: Data relating to page fault. + * @bf_data: Data relating to bus fault. + * @current_setup: Stores the MMU configuration for this address space. + */ +struct kbase_as { + int number; + struct workqueue_struct *pf_wq; + struct work_struct work_pagefault; + struct work_struct work_busfault; + struct kbase_fault pf_data; + struct kbase_fault bf_data; + struct kbase_mmu_setup current_setup; +}; + +/** + * struct kbase_mmu_table - object representing a set of GPU page tables + * @mmu_teardown_pages: Buffer of 4 Pages in size, used to cache the entries + * of top & intermediate level page tables to avoid + * repeated calls to kmap_atomic during the MMU teardown. + * @mmu_lock: Lock to serialize the accesses made to multi level GPU + * page tables + * @pgd: Physical address of the page allocated for the top + * level page table of the context, this is used for + * MMU HW programming as the address translation will + * start from the top level page table. + * @group_id: A memory group ID to be passed to a platform-specific + * memory group manager. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @kctx: If this set of MMU tables belongs to a context then + * this is a back-reference to the context, otherwise + * it is NULL + */ +struct kbase_mmu_table { + u64 *mmu_teardown_pages; + struct mutex mmu_lock; + phys_addr_t pgd; + u8 group_id; + struct kbase_context *kctx; +}; + +#include "jm/mali_kbase_jm_defs.h" + +static inline int kbase_as_has_bus_fault(struct kbase_as *as, + struct kbase_fault *fault) +{ + return (fault == &as->bf_data); +} + +static inline int kbase_as_has_page_fault(struct kbase_as *as, + struct kbase_fault *fault) +{ + return (fault == &as->pf_data); +} + +/** + * struct kbasep_mem_device - Data stored per device for memory allocation + * + * @used_pages: Tracks usage of OS shared memory. Updated when OS memory is + * allocated/freed. + * @ir_threshold: Fraction of the maximum size of an allocation that grows + * on GPU page fault that can be used before the driver + * switches to incremental rendering, in 1/256ths. + * 0 means disabled. + */ +struct kbasep_mem_device { + atomic_t used_pages; + atomic_t ir_threshold; +}; + +struct kbase_clk_rate_listener; + +/** + * kbase_clk_rate_listener_on_change_t() - Frequency change callback + * + * @listener: Clock frequency change listener. + * @clk_index: Index of the clock for which the change has occurred. + * @clk_rate_hz: Clock frequency(Hz). + * + * A callback to call when clock rate changes. The function must not + * sleep. No clock rate manager functions must be called from here, as + * its lock is taken. + */ +typedef void (*kbase_clk_rate_listener_on_change_t)( + struct kbase_clk_rate_listener *listener, + u32 clk_index, + u32 clk_rate_hz); + +/** + * struct kbase_clk_rate_listener - Clock frequency listener + * + * @node: List node. + * @notify: Callback to be called when GPU frequency changes. + */ +struct kbase_clk_rate_listener { + struct list_head node; + kbase_clk_rate_listener_on_change_t notify; +}; + +/** + * struct kbase_clk_rate_trace_manager - Data stored per device for GPU clock + * rate trace manager. + * + * @gpu_idle: Tracks the idle state of GPU. + * @clks: Array of pointer to structures storing data for every + * enumerated GPU clock. + * @clk_rate_trace_ops: Pointer to the platform specific GPU clock rate trace + * operations. + * @gpu_clk_rate_trace_write: Pointer to the function that would emit the + * tracepoint for the clock rate change. + * @listeners: List of listener attached. + * @lock: Lock to serialize the actions of GPU clock rate trace + * manager. + */ +struct kbase_clk_rate_trace_manager { + bool gpu_idle; + struct kbase_clk_data *clks[BASE_MAX_NR_CLOCKS_REGULATORS]; + struct kbase_clk_rate_trace_op_conf *clk_rate_trace_ops; + struct list_head listeners; + spinlock_t lock; +}; + +/** + * Data stored per device for power management. + * + * This structure contains data for the power management framework. There is one + * instance of this structure per device in the system. + */ +struct kbase_pm_device_data { + /** + * The lock protecting Power Management structures accessed outside of + * IRQ. + * + * This lock must also be held whenever the GPU is being powered on or + * off. + */ + struct mutex lock; + + /** + * The reference count of active contexts on this device. Note that + * some code paths keep shaders/the tiler powered whilst this is 0. Use + * kbase_pm_is_active() instead to check for such cases. + */ + int active_count; + /** Flag indicating suspending/suspended */ + bool suspending; + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* Flag indicating gpu lost */ + bool gpu_lost; +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + + /* MALI_SEC_INTEGRATION */ + wait_queue_head_t suspending_wait; + /* Wait queue set when active_count == 0 */ + wait_queue_head_t zero_active_count_wait; + + /** + * Bit masks identifying the available shader cores that are specified + * via sysfs. One mask per job slot. + */ + u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS]; + u64 debug_core_mask_all; +#ifdef CONFIG_MALI_GPU_CORE_MASK_SELECTION + /* MALI_SEC_INTEGRATION */ + u64 debug_core_mask_info; +#endif + + /** + * Callback for initializing the runtime power management. + * + * @param kbdev The kbase device + * + * @return 0 on success, else error code + */ + int (*callback_power_runtime_init)(struct kbase_device *kbdev); + + /** + * Callback for terminating the runtime power management. + * + * @param kbdev The kbase device + */ + void (*callback_power_runtime_term)(struct kbase_device *kbdev); + + /* Time in milliseconds between each dvfs sample */ + u32 dvfs_period; + + struct kbase_pm_backend_data backend; + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /** + * The state of the arbiter VM machine + */ + struct kbase_arbiter_vm_state *arb_vm_state; +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + + /** + * The state of the GPU clock rate trace manager + */ + struct kbase_clk_rate_trace_manager clk_rtm; +}; + +/** + * struct kbase_mem_pool - Page based memory pool for kctx/kbdev + * @kbdev: Kbase device where memory is used + * @cur_size: Number of free pages currently in the pool (may exceed + * @max_size in some corner cases) + * @max_size: Maximum number of free pages in the pool + * @order: order = 0 refers to a pool of 4 KB pages + * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB) + * @group_id: A memory group ID to be passed to a platform-specific + * memory group manager, if present. Immutable. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @pool_lock: Lock protecting the pool - must be held when modifying + * @cur_size and @page_list + * @page_list: List of free pages in the pool + * @reclaim: Shrinker for kernel reclaim of free pages + * @next_pool: Pointer to next pool where pages can be allocated when this + * pool is empty. Pages will spill over to the next pool when + * this pool is full. Can be NULL if there is no next pool. + * @dying: true if the pool is being terminated, and any ongoing + * operations should be abandoned + * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from + * this pool, eg during a grow operation + */ +struct kbase_mem_pool { + struct kbase_device *kbdev; + size_t cur_size; + size_t max_size; + u8 order; + u8 group_id; + spinlock_t pool_lock; + struct list_head page_list; + struct shrinker reclaim; + + struct kbase_mem_pool *next_pool; + + bool dying; + bool dont_reclaim; +}; + +/** + * struct kbase_mem_pool_group - a complete set of physical memory pools. + * + * Memory pools are used to allow efficient reallocation of previously-freed + * physical pages. A pair of memory pools is initialized for each physical + * memory group: one for 4 KiB pages and one for 2 MiB pages. These arrays + * should be indexed by physical memory group ID, the meaning of which is + * defined by the systems integrator. + * + * @small: Array of objects containing the state for pools of 4 KiB size + * physical pages. + * @large: Array of objects containing the state for pools of 2 MiB size + * physical pages. + */ +struct kbase_mem_pool_group { + struct kbase_mem_pool small[MEMORY_GROUP_MANAGER_NR_GROUPS]; + struct kbase_mem_pool large[MEMORY_GROUP_MANAGER_NR_GROUPS]; +}; + +/** + * struct kbase_mem_pool_config - Initial configuration for a physical memory + * pool + * + * @max_size: Maximum number of free pages that the pool can hold. + */ +struct kbase_mem_pool_config { + size_t max_size; +}; + +/** + * struct kbase_mem_pool_group_config - Initial configuration for a complete + * set of physical memory pools + * + * This array should be indexed by physical memory group ID, the meaning + * of which is defined by the systems integrator. + * + * @small: Array of initial configuration for pools of 4 KiB pages. + * @large: Array of initial configuration for pools of 2 MiB pages. + */ +struct kbase_mem_pool_group_config { + struct kbase_mem_pool_config small[MEMORY_GROUP_MANAGER_NR_GROUPS]; + struct kbase_mem_pool_config large[MEMORY_GROUP_MANAGER_NR_GROUPS]; +}; + +/** + * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP + * frequency, real frequencies and core mask + * @real_freqs: Real GPU frequencies. + * @opp_volts: OPP voltages. + * @opp_freq: Nominal OPP frequency + * @core_mask: Shader core mask + */ +struct kbase_devfreq_opp { + u64 opp_freq; + u64 core_mask; + u64 real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; + u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS]; +}; + +/* MMU mode flags */ +#define KBASE_MMU_MODE_HAS_NON_CACHEABLE (1ul << 0) /* Has NON_CACHEABLE MEMATTR */ + +/** + * struct kbase_mmu_mode - object containing pointer to methods invoked for + * programming the MMU, as per the MMU mode supported + * by Hw. + * @update: enable & setup/configure one of the GPU address space. + * @get_as_setup: retrieve the configuration of one of the GPU address space. + * @disable_as: disable one of the GPU address space. + * @pte_to_phy_addr: retrieve the physical address encoded in the page table entry. + * @ate_is_valid: check if the pte is a valid address translation entry + * encoding the physical address of the actual mapped page. + * @pte_is_valid: check if the pte is a valid entry encoding the physical + * address of the next lower level page table. + * @entry_set_ate: program the pte to be a valid address translation entry to + * encode the physical address of the actual page being mapped. + * @entry_set_pte: program the pte to be a valid entry to encode the physical + * address of the next lower level page table. + * @entry_invalidate: clear out or invalidate the pte. + * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants. + */ +struct kbase_mmu_mode { + void (*update)(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + int as_nr); + void (*get_as_setup)(struct kbase_mmu_table *mmut, + struct kbase_mmu_setup * const setup); + void (*disable_as)(struct kbase_device *kbdev, int as_nr); + phys_addr_t (*pte_to_phy_addr)(u64 entry); + int (*ate_is_valid)(u64 ate, int level); + int (*pte_is_valid)(u64 pte, int level); + void (*entry_set_ate)(u64 *entry, struct tagged_addr phy, + unsigned long flags, int level); + void (*entry_set_pte)(u64 *entry, phys_addr_t phy); + void (*entry_invalidate)(u64 *entry); + unsigned long flags; +}; + +struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void); +struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); + +#define DEVNAME_SIZE 16 + +/** + * enum kbase_devfreq_work_type - The type of work to perform in the devfreq + * suspend/resume worker. + * @DEVFREQ_WORK_NONE: Initilisation state. + * @DEVFREQ_WORK_SUSPEND: Call devfreq_suspend_device(). + * @DEVFREQ_WORK_RESUME: Call devfreq_resume_device(). + */ +enum kbase_devfreq_work_type { + DEVFREQ_WORK_NONE, + DEVFREQ_WORK_SUSPEND, + DEVFREQ_WORK_RESUME +}; + +/** + * struct kbase_devfreq_queue_info - Object representing an instance for managing + * the queued devfreq suspend/resume works. + * @workq: Workqueue for devfreq suspend/resume requests + * @work: Work item for devfreq suspend & resume + * @req_type: Requested work type to be performed by the devfreq + * suspend/resume worker + * @acted_type: Work type has been acted on by the worker, i.e. the + * internal recorded state of the suspend/resume + */ +struct kbase_devfreq_queue_info { + struct workqueue_struct *workq; + struct work_struct work; + enum kbase_devfreq_work_type req_type; + enum kbase_devfreq_work_type acted_type; +}; + +/** + * struct kbase_process - Representing an object of a kbase process instantiated + * when the first kbase context is created under it. + * @tgid: Thread group ID. + * @total_gpu_pages: Total gpu pages allocated across all the contexts + * of this process, it accounts for both native allocations + * and dma_buf imported allocations. + * @kctx_list: List of kbase contexts created for the process. + * @kprcs_node: Node to a rb_tree, kbase_device will maintain a rb_tree + * based on key tgid, kprcs_node is the node link to + * &struct_kbase_device.process_root. + * @dma_buf_root: RB tree of the dma-buf imported allocations, imported + * across all the contexts created for this process. + * Used to ensure that pages of allocation are accounted + * only once for the process, even if the allocation gets + * imported multiple times for the process. + */ +struct kbase_process { + pid_t tgid; + size_t total_gpu_pages; + struct list_head kctx_list; + + struct rb_node kprcs_node; + struct rb_root dma_buf_root; +}; + +/** + * struct kbase_device - Object representing an instance of GPU platform device, + * allocated from the probe method of mali driver. + * @hw_quirks_sc: Configuration to be used for the shader cores as per + * the HW issues present in the GPU. + * @hw_quirks_tiler: Configuration to be used for the Tiler as per the HW + * issues present in the GPU. + * @hw_quirks_mmu: Configuration to be used for the MMU as per the HW + * issues present in the GPU. + * @hw_quirks_jm: Configuration to be used for the Job Manager as per + * the HW issues present in the GPU. + * @entry: Links the device instance to the global list of GPU + * devices. The list would have as many entries as there + * are GPU device instances. + * @dev: Pointer to the kernel's generic/base representation + * of the GPU platform device. + * @mdev: Pointer to the miscellaneous device registered to + * provide Userspace access to kernel driver through the + * device file /dev/malixx. + * @reg_start: Base address of the region in physical address space + * where GPU registers have been mapped. + * @reg_size: Size of the region containing GPU registers + * @reg: Kernel virtual address of the region containing GPU + * registers, using which Driver will access the registers. + * @irqs: Array containing IRQ resource info for 3 types of + * interrupts : Job scheduling, MMU & GPU events (like + * power management, cache etc.) + * @clocks: Pointer to the input clock resources referenced by + * the GPU device node. + * @nr_clocks: Number of clocks set in the clocks array. + * @regulators: Pointer to the structs corresponding to the + * regulators referenced by the GPU device node. + * @nr_regulators: Number of regulators set in the regulators array. + * @opp_table: Pointer to the device OPP structure maintaining the + * link to OPPs attached to a device. This is obtained + * after setting regulator names for the device. + * @devname: string containing the name used for GPU device instance, + * miscellaneous device is registered using the same name. + * @id: Unique identifier for the device, indicates the number of + * devices which have been created so far. + * @model: Pointer, valid only when Driver is compiled to not access + * the real GPU Hw, to the dummy model which tries to mimic + * to some extent the state & behavior of GPU Hw in response + * to the register accesses made by the Driver. + * @irq_slab: slab cache for allocating the work items queued when + * model mimics raising of IRQ to cause an interrupt on CPU. + * @irq_workq: workqueue for processing the irq work items. + * @serving_job_irq: function to execute work items queued when model mimics + * the raising of JS irq, mimics the interrupt handler + * processing JS interrupts. + * @serving_gpu_irq: function to execute work items queued when model mimics + * the raising of GPU irq, mimics the interrupt handler + * processing GPU interrupts. + * @serving_mmu_irq: function to execute work items queued when model mimics + * the raising of MMU irq, mimics the interrupt handler + * processing MMU interrupts. + * @reg_op_lock: lock used by model to serialize the handling of register + * accesses made by the driver. + * @pm: Per device object for storing data for power management + * framework. + * @js_data: Per device object encapsulating the current context of + * Job Scheduler, which is global to the device and is not + * tied to any particular struct kbase_context running on + * the device + * @mem_pools: Global pools of free physical memory pages which can + * be used by all the contexts. + * @memdev: keeps track of the in use physical pages allocated by + * the Driver. + * @mmu_mode: Pointer to the object containing methods for programming + * the MMU, depending on the type of MMU supported by Hw. + * @mgm_dev: Pointer to the memory group manager device attached + * to the GPU device. This points to an internal memory + * group manager if no platform-specific memory group + * manager was retrieved through device tree. + * @as: Array of objects representing address spaces of GPU. + * @as_free: Bitpattern of free/available GPU address spaces. + * @as_to_kctx: Array of pointers to struct kbase_context, having + * GPU adrress spaces assigned to them. + * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask + * register used in the handling of Bus & Page faults. + * @gpu_props: Object containing complete information about the + * configuration/properties of GPU HW device in use. + * @hw_issues_mask: List of SW workarounds for HW issues + * @hw_features_mask: List of available HW features. + * @disjoint_event: struct for keeping track of the disjoint information, + * that whether the GPU is in a disjoint state and the + * number of disjoint events that have occurred on GPU. + * @nr_hw_address_spaces: Number of address spaces actually available in the + * GPU, remains constant after driver initialisation. + * @nr_user_address_spaces: Number of address spaces available to user contexts + * @hwcnt: Structure used for instrumentation and HW counters + * dumping + * @hwcnt_gpu_iface: Backend interface for GPU hardware counter access. + * @hwcnt_gpu_ctx: Context for GPU hardware counter access. + * @hwaccess_lock must be held when calling + * kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx. + * @hwcnt_gpu_virt: Virtualizer for GPU hardware counters. + * @vinstr_ctx: vinstr context created per device. + * @timeline_flags: Bitmask defining which sets of timeline tracepoints + * are enabled. If zero, there is no timeline client and + * therefore timeline is disabled. + * @timeline: Timeline context created per device. + * @trace_lock: Lock to serialize the access to trace buffer. + * @trace_first_out: Index/offset in the trace buffer at which the first + * unread message is present. + * @trace_next_in: Index/offset in the trace buffer at which the new + * message will be written. + * @trace_rbuf: Pointer to the buffer storing debug messages/prints + * tracing the various events in Driver. + * The buffer is filled in circular fashion. + * @reset_timeout_ms: Number of milliseconds to wait for the soft stop to + * complete for the GPU jobs before proceeding with the + * GPU reset. + * @cache_clean_in_progress: Set when a cache clean has been started, and + * cleared when it has finished. This prevents multiple + * cache cleans being done simultaneously. + * @cache_clean_queued: Set if a cache clean is invoked while another is in + * progress. If this happens, another cache clean needs + * to be triggered immediately after completion of the + * current one. + * @cache_clean_wait: Signalled when a cache clean has finished. + * @platform_context: Platform specific private data to be accessed by + * platform specific config files only. + * @kctx_list: List of kbase_contexts created for the device, + * including any contexts that might be created for + * hardware counters. + * @kctx_list_lock: Lock protecting concurrent accesses to @kctx_list. + * @devfreq_profile: Describes devfreq profile for the Mali GPU device, passed + * to devfreq_add_device() to add devfreq feature to Mali + * GPU device. + * @devfreq: Pointer to devfreq structure for Mali GPU device, + * returned on the call to devfreq_add_device(). + * @current_freqs: The real frequencies, corresponding to + * @current_nominal_freq, at which the Mali GPU device + * is currently operating, as retrieved from + * @devfreq_table in the target callback of + * @devfreq_profile. + * @current_nominal_freq: The nominal frequency currently used for the Mali GPU + * device as retrieved through devfreq_recommended_opp() + * using the freq value passed as an argument to target + * callback of @devfreq_profile + * @current_voltages: The voltages corresponding to @current_nominal_freq, + * as retrieved from @devfreq_table in the target + * callback of @devfreq_profile. + * @current_core_mask: bitmask of shader cores that are currently desired & + * enabled, corresponding to @current_nominal_freq as + * retrieved from @devfreq_table in the target callback + * of @devfreq_profile. + * @devfreq_table: Pointer to the lookup table for converting between + * nominal OPP (operating performance point) frequency, + * and real frequency and core mask. This table is + * constructed according to operating-points-v2-mali + * table in devicetree. + * @num_opps: Number of operating performance points available for the Mali + * GPU device. + * @devfreq_queue: Per device object for storing data that manages devfreq + * suspend & resume request queue and the related items. + * @devfreq_cooling: Pointer returned on registering devfreq cooling device + * corresponding to @devfreq. + * @ipa_protection_mode_switched: is set to TRUE when GPU is put into protected + * mode. It is a sticky flag which is cleared by IPA + * once it has made use of information that GPU had + * previously entered protected mode. + * @ipa: Top level structure for IPA, containing pointers to both + * configured & fallback models. + * @previous_frequency: Previous frequency of GPU clock used for + * BASE_HW_ISSUE_GPU2017_1336 workaround, This clock is + * restored when L2 is powered on. + * @job_fault_debug: Flag to control the dumping of debug data for job faults, + * set when the 'job_fault' debugfs file is opened. + * @mali_debugfs_directory: Root directory for the debugfs files created by the driver + * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing + * a sub-directory for every context. + * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault + * has occurred. + * @job_fault_wq: Waitqueue to block the job fault dumping daemon till the + * occurrence of a job fault. + * @job_fault_resume_wq: Waitqueue on which every context with a faulty job wait + * for the job fault dumping to complete before they can + * do bottom half of job done for the atoms which followed + * the faulty atom. + * @job_fault_resume_workq: workqueue to process the work items queued for the faulty + * atoms, whereby the work item function waits for the dumping + * to get completed. + * @job_fault_event_list: List of atoms, each belonging to a different context, which + * generated a job fault. + * @job_fault_event_lock: Lock to protect concurrent accesses to @job_fault_event_list + * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs + * file "read_register". + * @ctx_num: Total number of contexts created for the device. + * @io_history: Pointer to an object keeping a track of all recent + * register accesses. The history of register accesses + * can be read through "regs_history" debugfs file. + * @hwaccess: Contains a pointer to active kbase context and GPU + * backend specific data for HW access layer. + * @faults_pending: Count of page/bus faults waiting for bottom half processing + * via workqueues. + * @poweroff_pending: Set when power off operation for GPU is started, reset when + * power on for GPU is started. + * @infinite_cache_active_default: Set to enable using infinite cache for all the + * allocations of a new context. + * @mem_pool_defaults: Default configuration for the group of memory pools + * created for a new context. + * @current_gpu_coherency_mode: coherency mode in use, which can be different + * from @system_coherency, when using protected mode. + * @system_coherency: coherency mode as retrieved from the device tree. + * @cci_snoop_enabled: Flag to track when CCI snoops have been enabled. + * @snoop_enable_smc: SMC function ID to call into Trusted firmware to + * enable cache snooping. Value of 0 indicates that it + * is not used. + * @snoop_disable_smc: SMC function ID to call disable cache snooping. + * @protected_ops: Pointer to the methods for switching in or out of the + * protected mode, as per the @protected_dev being used. + * @protected_dev: Pointer to the protected mode switcher device attached + * to the GPU device retrieved through device tree if + * GPU do not support protected mode switching natively. + * @protected_mode: set to TRUE when GPU is put into protected mode + * @protected_mode_transition: set to TRUE when GPU is transitioning into or + * out of protected mode. + * @protected_mode_hwcnt_desired: True if we want GPU hardware counters to be + * enabled. Counters must be disabled before transition + * into protected mode. + * @protected_mode_hwcnt_disabled: True if GPU hardware counters are not + * enabled. + * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware + * counters, used if atomic disable is not possible. + * @protected_mode_support: set to true if protected mode is supported. + * @buslogger: Pointer to the structure required for interfacing + * with the bus logger module to set the size of buffer + * used by the module for capturing bus logs. + * @irq_reset_flush: Flag to indicate that GPU reset is in-flight and flush of + * IRQ + bottom half is being done, to prevent the writes + * to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers. + * @inited_subsys: Bitmap of inited sub systems at the time of device probe. + * Used during device remove or for handling error in probe. + * @hwaccess_lock: Lock, which can be taken from IRQ context, to serialize + * the updates made to Job dispatcher + scheduler states. + * @mmu_hw_mutex: Protects access to MMU operations and address space + * related state. + * @serialize_jobs: Currently used mode for serialization of jobs, both + * intra & inter slots serialization is supported. + * @backup_serialize_jobs: Copy of the original value of @serialize_jobs taken + * when GWT is enabled. Used to restore the original value + * on disabling of GWT. + * @js_ctx_scheduling_mode: Context scheduling mode currently being used by + * Job Scheduler + * @l2_size_override: Used to set L2 cache size via device tree blob + * @l2_hash_override: Used to set L2 cache hash via device tree blob + * @process_root: rb_tree root node for maintaining a rb_tree of + * kbase_process based on key tgid(thread group ID). + * @dma_buf_root: rb_tree root node for maintaining a rb_tree of + * &struct kbase_dma_buf based on key dma_buf. + * We maintain a rb_tree of dma_buf mappings under + * kbase_device and kbase_process, one indicates a + * mapping and gpu memory usage at device level and + * other one at process level. + * @total_gpu_pages: Total GPU pages used for the complete GPU device. + * @dma_buf_lock: This mutex should be held while accounting for + * @total_gpu_pages from imported dma buffers. + * @gpu_mem_usage_lock: This spinlock should be held while accounting + * @total_gpu_pages for both native and dma-buf imported + * allocations. + */ +struct kbase_device { + u32 hw_quirks_sc; + u32 hw_quirks_tiler; + u32 hw_quirks_mmu; + u32 hw_quirks_jm; + + struct list_head entry; + struct device *dev; + struct miscdevice mdev; + u64 reg_start; + size_t reg_size; + void __iomem *reg; + + struct { + int irq; + int flags; + } irqs[3]; + + struct clk *clocks[BASE_MAX_NR_CLOCKS_REGULATORS]; + unsigned int nr_clocks; +#ifdef CONFIG_REGULATOR + struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS]; + unsigned int nr_regulators; +#if (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) + struct opp_table *opp_table; +#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ +#endif /* CONFIG_REGULATOR */ + char devname[DEVNAME_SIZE]; + u32 id; + +#ifdef CONFIG_MALI_NO_MALI + void *model; + struct kmem_cache *irq_slab; + struct workqueue_struct *irq_workq; + atomic_t serving_job_irq; + atomic_t serving_gpu_irq; + atomic_t serving_mmu_irq; + spinlock_t reg_op_lock; +#endif /* CONFIG_MALI_NO_MALI */ + + struct kbase_pm_device_data pm; + + struct kbase_mem_pool_group mem_pools; + struct kbasep_mem_device memdev; + struct kbase_mmu_mode const *mmu_mode; + + struct memory_group_manager_device *mgm_dev; + + struct kbase_as as[BASE_MAX_NR_AS]; + u16 as_free; /* Bitpattern of free Address Spaces */ + struct kbase_context *as_to_kctx[BASE_MAX_NR_AS]; + + spinlock_t mmu_mask_change; + + struct kbase_gpu_props gpu_props; + + unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; + unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; + + struct { + atomic_t count; + atomic_t state; + } disjoint_event; + + s8 nr_hw_address_spaces; + s8 nr_user_address_spaces; + + struct kbase_hwcnt { + /* The lock should be used when accessing any of the following members */ + spinlock_t lock; + + struct kbase_context *kctx; + u64 addr; + u64 addr_bytes; + + struct kbase_instr_backend backend; + } hwcnt; + + struct kbase_hwcnt_backend_interface hwcnt_gpu_iface; + struct kbase_hwcnt_context *hwcnt_gpu_ctx; + struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt; + struct kbase_vinstr_context *vinstr_ctx; + + atomic_t timeline_flags; + struct kbase_timeline *timeline; + +#if KBASE_KTRACE_TARGET_RBUF + struct kbase_ktrace ktrace; +#endif + u32 reset_timeout_ms; + + bool cache_clean_in_progress; + bool cache_clean_queued; + wait_queue_head_t cache_clean_wait; + + void *platform_context; + + struct list_head kctx_list; + struct mutex kctx_list_lock; + +#ifdef CONFIG_MALI_DEVFREQ + struct devfreq_dev_profile devfreq_profile; + struct devfreq *devfreq; + unsigned long current_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; + unsigned long current_nominal_freq; + unsigned long current_voltages[BASE_MAX_NR_CLOCKS_REGULATORS]; + u64 current_core_mask; + struct kbase_devfreq_opp *devfreq_table; + int num_opps; + struct kbasep_pm_metrics last_devfreq_metrics; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) + struct kbase_devfreq_queue_info devfreq_queue; +#endif + +#ifdef CONFIG_DEVFREQ_THERMAL +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + struct devfreq_cooling_device *devfreq_cooling; +#else + struct thermal_cooling_device *devfreq_cooling; +#endif + bool ipa_protection_mode_switched; + struct { + /* Access to this struct must be with ipa.lock held */ + struct mutex lock; + struct kbase_ipa_model *configured_model; + struct kbase_ipa_model *fallback_model; + + /* Values of the PM utilization metrics from last time the + * power model was invoked. The utilization is calculated as + * the difference between last_metrics and the current values. + */ + struct kbasep_pm_metrics last_metrics; + /* Model data to pass to ipa_gpu_active/idle() */ + struct kbase_ipa_model_vinstr_data *model_data; + + /* true if use of fallback model has been forced by the User */ + bool force_fallback_model; + } ipa; +#endif /* CONFIG_DEVFREQ_THERMAL */ +#endif /* CONFIG_MALI_DEVFREQ */ + unsigned long previous_frequency; + + atomic_t job_fault_debug; + +#ifdef CONFIG_DEBUG_FS + struct dentry *mali_debugfs_directory; + struct dentry *debugfs_ctx_directory; + + /* MALI_SEC_INTEGRATION */ + /* debugfs entry for trace */ + struct dentry *trace_dentry; + +#ifdef CONFIG_MALI_DEBUG + u64 debugfs_as_read_bitmap; +#endif /* CONFIG_MALI_DEBUG */ + + wait_queue_head_t job_fault_wq; + wait_queue_head_t job_fault_resume_wq; + struct workqueue_struct *job_fault_resume_workq; + struct list_head job_fault_event_list; + spinlock_t job_fault_event_lock; + +#if !MALI_CUSTOMER_RELEASE + struct { + u16 reg_offset; + } regs_dump_debugfs_data; +#endif /* !MALI_CUSTOMER_RELEASE */ +#endif /* CONFIG_DEBUG_FS */ + + atomic_t ctx_num; + +#ifdef CONFIG_DEBUG_FS + struct kbase_io_history io_history; +#endif /* CONFIG_DEBUG_FS */ + + struct kbase_hwaccess_data hwaccess; + + atomic_t faults_pending; + + bool poweroff_pending; + + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + bool infinite_cache_active_default; +#else + u32 infinite_cache_active_default; +#endif + struct kbase_mem_pool_group_config mem_pool_defaults; + + u32 current_gpu_coherency_mode; + u32 system_coherency; + + bool cci_snoop_enabled; + + u32 snoop_enable_smc; + u32 snoop_disable_smc; + + const struct protected_mode_ops *protected_ops; + + struct protected_mode_device *protected_dev; + + bool protected_mode; + + bool protected_mode_transition; + + bool protected_mode_hwcnt_desired; + + bool protected_mode_hwcnt_disabled; + + struct work_struct protected_mode_hwcnt_disable_work; + +#ifdef CONFIG_MALI_BUSLOG + struct bus_logger_client *buslogger; +#endif + + bool irq_reset_flush; + + u32 inited_subsys; + + spinlock_t hwaccess_lock; + + struct mutex mmu_hw_mutex; + + /* MALI_SEC_INTEGRATION */ + struct kbase_vendor_callbacks *vendor_callbacks; + + u8 l2_size_override; + u8 l2_hash_override; + + struct kbasep_js_device_data js_data; + + /* See KBASE_JS_*_PRIORITY_MODE for details. */ + u32 js_ctx_scheduling_mode; + + /* See KBASE_SERIALIZE_* for details */ + u8 serialize_jobs; + +#ifdef CONFIG_MALI_CINSTR_GWT + u8 backup_serialize_jobs; +#endif /* CONFIG_MALI_CINSTR_GWT */ + + + struct rb_root process_root; + struct rb_root dma_buf_root; + + size_t total_gpu_pages; + struct mutex dma_buf_lock; + spinlock_t gpu_mem_usage_lock; + + struct { + struct kbase_context *ctx; + u64 jc; + int slot; + u64 flags; + } dummy_job_wa; + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* Pointer to the arbiter device */ + struct kbase_arbiter_device arb; +#endif +}; + +/** + * enum kbase_file_state - Initialization state of a file opened by @kbase_open + * + * @KBASE_FILE_NEED_VSN: Initial state, awaiting API version. + * @KBASE_FILE_VSN_IN_PROGRESS: Indicates if setting an API version is in + * progress and other setup calls shall be + * rejected. + * @KBASE_FILE_NEED_CTX: Indicates if the API version handshake has + * completed, awaiting context creation flags. + * @KBASE_FILE_CTX_IN_PROGRESS: Indicates if the context's setup is in progress + * and other setup calls shall be rejected. + * @KBASE_FILE_COMPLETE: Indicates if the setup for context has + * completed, i.e. flags have been set for the + * context. + * + * The driver allows only limited interaction with user-space until setup + * is complete. + */ +enum kbase_file_state { + KBASE_FILE_NEED_VSN, + KBASE_FILE_VSN_IN_PROGRESS, + KBASE_FILE_NEED_CTX, + KBASE_FILE_CTX_IN_PROGRESS, + KBASE_FILE_COMPLETE +}; + +/** + * struct kbase_file - Object representing a file opened by @kbase_open + * + * @kbdev: Object representing an instance of GPU platform device, + * allocated from the probe method of the Mali driver. + * @filp: Pointer to the struct file corresponding to device file + * /dev/malixx instance, passed to the file's open method. + * @kctx: Object representing an entity, among which GPU is + * scheduled and which gets its own GPU address space. + * Invalid until @setup_state is KBASE_FILE_COMPLETE. + * @api_version: Contains the version number for User/kernel interface, + * used for compatibility check. Invalid until + * @setup_state is KBASE_FILE_NEED_CTX. + * @setup_state: Initialization state of the file. Values come from + * the kbase_file_state enumeration. + */ +struct kbase_file { + struct kbase_device *kbdev; + struct file *filp; + struct kbase_context *kctx; + unsigned long api_version; + atomic_t setup_state; +}; + +/** + * enum kbase_context_flags - Flags for kbase contexts + * + * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit + * process on a 64-bit kernel. + * + * @KCTX_RUNNABLE_REF: Set when context is counted in + * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. + * + * @KCTX_ACTIVE: Set when the context is active. + * + * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this + * context. + * + * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been + * initialized. + * + * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new + * allocations. Existing allocations will not change. + * + * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. + * + * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept + * scheduled in. + * + * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. + * This is only ever updated whilst the jsctx_mutex is held. + * + * @KCTX_DYING: Set when the context process is in the process of being evicted. + * + * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this + * context, to disable use of implicit dma-buf fences. This is used to avoid + * potential synchronization deadlocks. + * + * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory + * allocations. For 64-bit clients it is enabled by default, and disabled by + * default on 32-bit clients. Being able to clear this flag is only used for + * testing purposes of the custom zone allocation on 64-bit user-space builds, + * where we also require more control than is available through e.g. the JIT + * allocation mechanism. However, the 64-bit user-space client must still + * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT + * + * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled + * from it for job slot 0. This is reset when the context first goes active or + * is re-activated on that slot. + * + * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled + * from it for job slot 1. This is reset when the context first goes active or + * is re-activated on that slot. + * + * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled + * from it for job slot 2. This is reset when the context first goes active or + * is re-activated on that slot. + * + * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for + * the context due to unhandled page(or bus) fault. It is cleared when the + * refcount for the context drops to 0 or on when the address spaces are + * re-enabled on GPU reset or power cycle. + * + * All members need to be separate bits. This enum is intended for use in a + * bitmask where multiple values get OR-ed together. + */ +enum kbase_context_flags { + KCTX_COMPAT = 1U << 0, + KCTX_RUNNABLE_REF = 1U << 1, + KCTX_ACTIVE = 1U << 2, + KCTX_PULLED = 1U << 3, + KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, + KCTX_INFINITE_CACHE = 1U << 5, + KCTX_SUBMIT_DISABLED = 1U << 6, + KCTX_PRIVILEGED = 1U << 7, + KCTX_SCHEDULED = 1U << 8, + KCTX_DYING = 1U << 9, + KCTX_NO_IMPLICIT_SYNC = 1U << 10, + KCTX_FORCE_SAME_VA = 1U << 11, + KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, + KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, + KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, + KCTX_AS_DISABLED_ON_FAULT = 1U << 15, +#if MALI_JIT_PRESSURE_LIMIT_BASE + /* + * Set when JIT physical page limit is less than JIT virtual address + * page limit, so we must take care to not exceed the physical limit + */ + KCTX_JPL_ENABLED = 1U << 16, +#endif /* !MALI_JIT_PRESSURE_LIMIT_BASE */ +}; + +struct kbase_sub_alloc { + struct list_head link; + struct page *page; + DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K); +}; + +/** + * struct kbase_context - Kernel base context + * + * @filp: Pointer to the struct file corresponding to device file + * /dev/malixx instance, passed to the file's open method. + * @kbdev: Pointer to the Kbase device for which the context is created. + * @kctx_list_link: Node into Kbase device list of contexts. + * @mmu: Structure holding details of the MMU tables for this + * context + * @id: Unique identifier for the context, indicates the number of + * contexts which have been created for the device so far. + * @api_version: contains the version number for User/kernel interface, + * used for compatibility check. + * @event_list: list of posted events about completed atoms, to be sent to + * event handling thread of Userpsace. + * @event_coalesce_list: list containing events corresponding to successive atoms + * which have requested deferred delivery of the completion + * events to Userspace. + * @event_mutex: Lock to protect the concurrent access to @event_list & + * @event_mutex. + * @event_closed: Flag set through POST_TERM ioctl, indicates that Driver + * should stop posting events and also inform event handling + * thread that context termination is in progress. + * @event_workq: Workqueue for processing work items corresponding to atoms + * that do not return an event to userspace. + * @event_count: Count of the posted events to be consumed by Userspace. + * @event_coalesce_count: Count of the events present in @event_coalesce_list. + * @flags: bitmap of enums from kbase_context_flags, indicating the + * state & attributes for the context. + * @aliasing_sink_page: Special page used for KBASE_MEM_TYPE_ALIAS allocations, + * which can alias number of memory regions. The page is + * represent a region where it is mapped with a write-alloc + * cache setup, typically used when the write result of the + * GPU isn't needed, but the GPU must write anyway. + * @mem_partials_lock: Lock for protecting the operations done on the elements + * added to @mem_partials list. + * @mem_partials: List head for the list of large pages, 2MB in size, which + * which have been split into 4 KB pages and are used + * partially for the allocations >= 2 MB in size. + * @reg_lock: Lock used for GPU virtual address space management operations, + * like adding/freeing a memory region in the address space. + * Can be converted to a rwlock ?. + * @reg_rbtree_same: RB tree of the memory regions allocated from the SAME_VA + * zone of the GPU virtual address space. Used for allocations + * having the same value for GPU & CPU virtual address. + * @reg_rbtree_custom: RB tree of the memory regions allocated from the CUSTOM_VA + * zone of the GPU virtual address space. + * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA + * zone of the GPU virtual address space. Used for GPU-executable + * allocations which don't need the SAME_VA property. + * @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for + * SAME_VA allocations to defer the reservation of memory region + * (from the GPU virtual address space) from base_mem_alloc + * ioctl to mmap system call. This helps returning unique + * handles, disguised as GPU VA, to Userspace from base_mem_alloc + * and later retrieving the pointer to memory region structure + * in the mmap handler. + * @pending_regions: Array containing pointers to memory region structures, + * used in conjunction with @cookies bitmask mainly for + * providing a mechansim to have the same value for CPU & + * GPU virtual address. + * @event_queue: Wait queue used for blocking the thread, which consumes + * the base_jd_event corresponding to an atom, when there + * are no more posted events. + * @tgid: Thread group ID of the process whose thread created + * the context (by calling KBASE_IOCTL_VERSION_CHECK or + * KBASE_IOCTL_SET_FLAGS, depending on the @api_version). + * This is usually, but not necessarily, the same as the + * process whose thread opened the device file + * /dev/malixx instance. + * @pid: ID of the thread, corresponding to process @tgid, + * which actually created the context. This is usually, + * but not necessarily, the same as the thread which + * opened the device file /dev/malixx instance. + * @jctx: object encapsulating all the Job dispatcher related state, + * including the array of atoms. + * @used_pages: Keeps a track of the number of 4KB physical pages in use + * for the context. + * @nonmapped_pages: Updated in the same way as @used_pages, except for the case + * when special tracking page is freed by userspace where it + * is reset to 0. + * @permanent_mapped_pages: Usage count of permanently mapped memory + * @mem_pools: Context-specific pools of free physical memory pages. + * @reclaim: Shrinker object registered with the kernel containing + * the pointer to callback function which is invoked under + * low memory conditions. In the callback function Driver + * frees up the memory for allocations marked as + * evictable/reclaimable. + * @evict_list: List head for the list containing the allocations which + * can be evicted or freed up in the shrinker callback. + * @waiting_soft_jobs: List head for the list containing softjob atoms, which + * are either waiting for the event set operation, or waiting + * for the signaling of input fence or waiting for the GPU + * device to powered on so as to dump the CPU/GPU timestamps. + * @waiting_soft_jobs_lock: Lock to protect @waiting_soft_jobs list from concurrent + * accesses. + * @dma_fence: Object containing list head for the list of dma-buf fence + * waiting atoms and the waitqueue to process the work item + * queued for the atoms blocked on the signaling of dma-buf + * fences. + * @as_nr: id of the address space being used for the scheduled in + * context. This is effectively part of the Run Pool, because + * it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst + * the context is scheduled in. The hwaccess_lock must be held + * whilst accessing this. + * If the context relating to this value of as_nr is required, + * then the context must be retained to ensure that it doesn't + * disappear whilst it is being used. Alternatively, hwaccess_lock + * can be held to ensure the context doesn't disappear (but this + * has restrictions on what other locks can be taken simutaneously). + * @refcount: Keeps track of the number of users of this context. A user + * can be a job that is available for execution, instrumentation + * needing to 'pin' a context for counter collection, etc. + * If the refcount reaches 0 then this context is considered + * inactive and the previously programmed AS might be cleared + * at any point. + * Generally the reference count is incremented when the context + * is scheduled in and an atom is pulled from the context's per + * slot runnable tree. + * @mm_update_lock: lock used for handling of special tracking page. + * @process_mm: Pointer to the memory descriptor of the process which + * created the context. Used for accounting the physical + * pages used for GPU allocations, done for the context, + * to the memory consumed by the process. + * @same_va_end: End address of the SAME_VA zone (in 4KB page units) + * @exec_va_start: Start address of the EXEC_VA zone (in 4KB page units) + * or U64_MAX if the EXEC_VA zone is uninitialized. + * @gpu_va_end: End address of the GPU va space (in 4KB page units) + * @jit_va: Indicates if a JIT_VA zone has been created. + * @mem_profile_data: Buffer containing the profiling information provided by + * Userspace, can be read through the mem_profile debugfs file. + * @mem_profile_size: Size of the @mem_profile_data. + * @mem_profile_lock: Lock to serialize the operations related to mem_profile + * debugfs file. + * @kctx_dentry: Pointer to the debugfs directory created for every context, + * inside kbase_device::debugfs_ctx_directory, containing + * context specific files. + * @reg_dump: Buffer containing a register offset & value pair, used + * for dumping job fault debug info. + * @job_fault_count: Indicates that a job fault occurred for the context and + * dumping of its debug info is in progress. + * @job_fault_resume_event_list: List containing atoms completed after the faulty + * atom but before the debug data for faulty atom was dumped. + * @jsctx_queue: Per slot & priority arrays of object containing the root + * of RB-tree holding currently runnable atoms on the job slot + * and the head item of the linked list of atoms blocked on + * cross-slot dependencies. + * @atoms_pulled: Total number of atoms currently pulled from the context. + * @atoms_pulled_slot: Per slot count of the number of atoms currently pulled + * from the context. + * @atoms_pulled_slot_pri: Per slot & priority count of the number of atoms currently + * pulled from the context. hwaccess_lock shall be held when + * accessing it. + * @blocked_js: Indicates if the context is blocked from submitting atoms + * on a slot at a given priority. This is set to true, when + * the atom corresponding to context is soft/hard stopped or + * removed from the HEAD_NEXT register in response to + * soft/hard stop. + * @slots_pullable: Bitmask of slots, indicating the slots for which the + * context has pullable atoms in the runnable tree. + * @work: Work structure used for deferred ASID assignment. + * @legacy_hwcnt_cli: Pointer to the legacy userspace hardware counters + * client, there can be only such client per kbase + * context. + * @legacy_hwcnt_lock: Lock used to prevent concurrent access to + * @legacy_hwcnt_cli. + * @completed_jobs: List containing completed atoms for which base_jd_event is + * to be posted. + * @work_count: Number of work items, corresponding to atoms, currently + * pending on job_done workqueue of @jctx. + * @soft_job_timeout: Timer object used for failing/cancelling the waiting + * soft-jobs which have been blocked for more than the + * timeout value used for the soft-jobs + * @jit_alloc: Array of 256 pointers to GPU memory regions, used for + * just-in-time memory allocations. + * @jit_max_allocations: Maximum allowed number of in-flight + * just-in-time memory allocations. + * @jit_current_allocations: Current number of in-flight just-in-time + * memory allocations. + * @jit_current_allocations_per_bin: Current number of in-flight just-in-time + * memory allocations per bin. + * @jit_version: Version number indicating whether userspace is using + * old or new version of interface for just-in-time + * memory allocations. + * 1 -> client used KBASE_IOCTL_MEM_JIT_INIT_10_2 + * 2 -> client used KBASE_IOCTL_MEM_JIT_INIT_11_5 + * 3 -> client used KBASE_IOCTL_MEM_JIT_INIT + * @jit_group_id: A memory group ID to be passed to a platform-specific + * memory group manager. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @jit_phys_pages_limit: Limit of physical pages to apply across all + * just-in-time memory allocations, applied to + * @jit_current_phys_pressure. + * @jit_current_phys_pressure: Current 'pressure' on physical pages, which is + * the sum of the worst case estimate of pages that + * could be used (i.e. the + * &struct_kbase_va_region.nr_pages for all in-use + * just-in-time memory regions that have not yet had + * a usage report) and the actual number of pages + * that were used (i.e. the + * &struct_kbase_va_region.used_pages for regions + * that have had a usage report). + * @jit_phys_pages_to_be_allocated: Count of the physical pages that are being + * now allocated for just-in-time memory + * allocations of a context (across all the + * threads). This is supposed to be updated + * with @reg_lock held before allocating + * the backing pages. This helps ensure that + * total physical memory usage for just in + * time memory allocation remains within the + * @jit_phys_pages_limit in multi-threaded + * scenarios. + * @jit_active_head: List containing the just-in-time memory allocations + * which are in use. + * @jit_pool_head: List containing the just-in-time memory allocations + * which have been freed up by userspace and so not being + * used by them. + * Driver caches them to quickly fulfill requests for new + * JIT allocations. They are released in case of memory + * pressure as they are put on the @evict_list when they + * are freed up by userspace. + * @jit_destroy_head: List containing the just-in-time memory allocations + * which were moved to it from @jit_pool_head, in the + * shrinker callback, after freeing their backing + * physical pages. + * @jit_evict_lock: Lock used for operations done on just-in-time memory + * allocations and also for accessing @evict_list. + * @jit_work: Work item queued to defer the freeing of a memory + * region when a just-in-time memory allocation is moved + * to @jit_destroy_head. + * @ext_res_meta_head: A list of sticky external resources which were requested to + * be mapped on GPU side, through a softjob atom of type + * EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl. + * @age_count: Counter incremented on every call to jd_submit_atom, + * atom is assigned the snapshot of this counter, which + * is used to determine the atom's age when it is added to + * the runnable RB-tree. + * @trim_level: Level of JIT allocation trimming to perform on free (0-100%) + * @kprcs: Reference to @struct kbase_process that the current + * kbase_context belongs to. + * @kprcs_link: List link for the list of kbase context maintained + * under kbase_process. + * @gwt_enabled: Indicates if tracking of GPU writes is enabled, protected by + * kbase_context.reg_lock. + * @gwt_was_enabled: Simple sticky bit flag to know if GWT was ever enabled. + * @gwt_current_list: A list of addresses for which GPU has generated write faults, + * after the last snapshot of it was sent to userspace. + * @gwt_snapshot_list: Snapshot of the @gwt_current_list for sending to user space. + * @priority: Indicates the context priority. Used along with @atoms_count + * for context scheduling, protected by hwaccess_lock. + * @atoms_count: Number of GPU atoms currently in use, per priority + * @create_flags: Flags used in context creation. + * @kinstr_jm: Kernel job manager instrumentation context handle + * + * A kernel base context is an entity among which the GPU is scheduled. + * Each context has its own GPU address space. + * Up to one context can be created for each client that opens the device file + * /dev/malixx. Context creation is deferred until a special ioctl() system call + * is made on the device file. + */ +struct kbase_context { + struct file *filp; + struct kbase_device *kbdev; + struct list_head kctx_list_link; + struct kbase_mmu_table mmu; + + u32 id; + unsigned long api_version; + struct list_head event_list; + struct list_head event_coalesce_list; + struct mutex event_mutex; + atomic_t event_closed; + struct workqueue_struct *event_workq; + atomic_t event_count; + int event_coalesce_count; + + atomic_t flags; + + struct tagged_addr aliasing_sink_page; + + spinlock_t mem_partials_lock; + struct list_head mem_partials; + + struct mutex reg_lock; + + struct rb_root reg_rbtree_same; + struct rb_root reg_rbtree_custom; + struct rb_root reg_rbtree_exec; + + struct kbase_jd_context jctx; + struct jsctx_queue jsctx_queue + [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; + + struct list_head completed_jobs; + atomic_t work_count; + struct timer_list soft_job_timeout; + + /* MALI_SEC_INTEGRATION */ + int ctx_status; + char name[CTX_NAME_SIZE]; + /* MALI_SEC_INTEGRATION */ + bool destroying_context; + atomic_t mem_profile_showing_state; + wait_queue_head_t mem_profile_wait; + + /* MALI_SEC_INTEGRATION */ + bool need_to_force_schedule_out; + + atomic_t atoms_pulled; + atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS]; + int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][ + KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + int priority; + bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + u32 slots_pullable; + u32 age_count; + + DECLARE_BITMAP(cookies, BITS_PER_LONG); + struct kbase_va_region *pending_regions[BITS_PER_LONG]; + + wait_queue_head_t event_queue; + pid_t tgid; + pid_t pid; + atomic_t used_pages; + atomic_t nonmapped_pages; + atomic_t permanent_mapped_pages; + + struct kbase_mem_pool_group mem_pools; + + struct shrinker reclaim; + struct list_head evict_list; + + struct list_head waiting_soft_jobs; + spinlock_t waiting_soft_jobs_lock; +#ifdef CONFIG_MALI_DMA_FENCE + struct { + struct list_head waiting_resource; + struct workqueue_struct *wq; + } dma_fence; +#endif /* CONFIG_MALI_DMA_FENCE */ + + int as_nr; + + atomic_t refcount; + + spinlock_t mm_update_lock; + struct mm_struct __rcu *process_mm; + u64 same_va_end; + u64 exec_va_start; + u64 gpu_va_end; + bool jit_va; + +#ifdef CONFIG_DEBUG_FS + char *mem_profile_data; + size_t mem_profile_size; + struct mutex mem_profile_lock; + struct dentry *kctx_dentry; + + unsigned int *reg_dump; + atomic_t job_fault_count; + struct list_head job_fault_resume_event_list; + +#endif /* CONFIG_DEBUG_FS */ + + struct kbase_hwcnt_legacy_client *legacy_hwcnt_cli; + struct mutex legacy_hwcnt_lock; + + struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT]; + u8 jit_max_allocations; + u8 jit_current_allocations; + u8 jit_current_allocations_per_bin[256]; + u8 jit_version; + u8 jit_group_id; +#if MALI_JIT_PRESSURE_LIMIT_BASE + u64 jit_phys_pages_limit; + u64 jit_current_phys_pressure; + u64 jit_phys_pages_to_be_allocated; +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + struct list_head jit_active_head; + struct list_head jit_pool_head; + struct list_head jit_destroy_head; + struct mutex jit_evict_lock; + struct work_struct jit_work; + + struct list_head ext_res_meta_head; + + u8 trim_level; + + struct kbase_process *kprcs; + struct list_head kprcs_link; + +#ifdef CONFIG_MALI_CINSTR_GWT + bool gwt_enabled; + bool gwt_was_enabled; + struct list_head gwt_current_list; + struct list_head gwt_snapshot_list; +#endif + + base_context_create_flags create_flags; + + /* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_SEC_VK_BOOST + bool ctx_vk_need_qos; +#endif + struct kbase_kinstr_jm *kinstr_jm; +}; + +#ifdef CONFIG_MALI_CINSTR_GWT +/** + * struct kbasep_gwt_list_element - Structure used to collect GPU + * write faults. + * @link: List head for adding write faults. + * @region: Details of the region where we have the + * faulting page address. + * @page_addr: Page address where GPU write fault occurred. + * @num_pages: The number of pages modified. + * + * Using this structure all GPU write faults are stored in a list. + */ +struct kbasep_gwt_list_element { + struct list_head link; + struct kbase_va_region *region; + u64 page_addr; + u64 num_pages; +}; + +#endif + +/** + * struct kbase_ctx_ext_res_meta - Structure which binds an external resource + * to a @kbase_context. + * @ext_res_node: List head for adding the metadata to a + * @kbase_context. + * @alloc: The physical memory allocation structure + * which is mapped. + * @gpu_addr: The GPU virtual address the resource is + * mapped to. + * @ref: Reference count. + * + * External resources can be mapped into multiple contexts as well as the same + * context multiple times. + * As kbase_va_region itself isn't refcounted we can't attach our extra + * information to it as it could be removed under our feet leaving external + * resources pinned. + * This metadata structure binds a single external resource to a single + * context, ensuring that per context mapping is tracked separately so it can + * be overridden when needed and abuses by the application (freeing the resource + * multiple times) don't effect the refcount of the physical allocation. + */ +struct kbase_ctx_ext_res_meta { + struct list_head ext_res_node; + struct kbase_mem_phy_alloc *alloc; + u64 gpu_addr; + u32 ref; +}; + +enum kbase_reg_access_type { + REG_READ, + REG_WRITE +}; + +enum kbase_share_attr_bits { + /* (1ULL << 8) bit is reserved */ + SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */ + SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */ +}; + +/** + * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent. + * @kbdev: kbase device + * + * Return: true if the device access are coherent, false if not. + */ +static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) +{ + if ((kbdev->system_coherency == COHERENCY_ACE_LITE) || + (kbdev->system_coherency == COHERENCY_ACE)) + return true; + + return false; +} + +/* Conversion helpers for setting up high resolution timers */ +#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U)) +#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) + +/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */ +#define KBASE_CLEAN_CACHE_MAX_LOOPS 100000 +/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */ +#define KBASE_AS_INACTIVE_MAX_LOOPS 100000000 + +/* JobDescriptorHeader - taken from the architecture specifications, the layout + * is currently identical for all GPU archs. */ +struct job_descriptor_header { + u32 exception_status; + u32 first_incomplete_task; + u64 fault_pointer; + u8 job_descriptor_size : 1; + u8 job_type : 7; + u8 job_barrier : 1; + u8 _reserved_01 : 1; + u8 _reserved_1 : 1; + u8 _reserved_02 : 1; + u8 _reserved_03 : 1; + u8 _reserved_2 : 1; + u8 _reserved_04 : 1; + u8 _reserved_05 : 1; + u16 job_index; + u16 job_dependency_index_1; + u16 job_dependency_index_2; + union { + u64 _64; + u32 _32; + } next_job; +}; + +#endif /* _KBASE_DEFS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_disjoint_events.c b/drivers/gpu/arm/b_r26p0/mali_kbase_disjoint_events.c new file mode 100644 index 000000000000..b5ac414b1223 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_disjoint_events.c @@ -0,0 +1,81 @@ +/* + * + * (C) COPYRIGHT 2014, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel disjoint events helper functions + */ + +#include + +void kbase_disjoint_init(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + atomic_set(&kbdev->disjoint_event.count, 0); + atomic_set(&kbdev->disjoint_event.state, 0); +} + +/* increment the disjoint event count */ +void kbase_disjoint_event(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + atomic_inc(&kbdev->disjoint_event.count); +} + +/* increment the state and the event counter */ +void kbase_disjoint_state_up(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + atomic_inc(&kbdev->disjoint_event.state); + + kbase_disjoint_event(kbdev); +} + +/* decrement the state */ +void kbase_disjoint_state_down(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0); + + kbase_disjoint_event(kbdev); + + atomic_dec(&kbdev->disjoint_event.state); +} + +/* increments the count only if the state is > 0 */ +void kbase_disjoint_event_potential(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + if (atomic_read(&kbdev->disjoint_event.state)) + kbase_disjoint_event(kbdev); +} + +u32 kbase_disjoint_event_get(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + return atomic_read(&kbdev->disjoint_event.count); +} +KBASE_EXPORT_TEST_API(kbase_disjoint_event_get); diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_dma_fence.c b/drivers/gpu/arm/b_r26p0/mali_kbase_dma_fence.c new file mode 100644 index 000000000000..25acbcb3f03d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_dma_fence.c @@ -0,0 +1,456 @@ +/* + * + * (C) COPYRIGHT 2011-2017,2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as + * it will be set there. + */ +#include "mali_kbase_dma_fence.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static void +kbase_dma_fence_work(struct work_struct *pwork); + +static void +kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + + list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource); +} + +static void +kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom) +{ + list_del(&katom->queue); +} + +static int +kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info, + struct ww_acquire_ctx *ctx) +{ + struct reservation_object *content_res = NULL; + unsigned int content_res_idx = 0; + unsigned int r; + int err = 0; + + ww_acquire_init(ctx, &reservation_ww_class); + +retry: + for (r = 0; r < info->dma_fence_resv_count; r++) { + if (info->resv_objs[r] == content_res) { + content_res = NULL; + continue; + } + + err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx); + if (err) + goto error; + } + + ww_acquire_done(ctx); + return err; + +error: + content_res_idx = r; + + /* Unlock the locked one ones */ + while (r--) + ww_mutex_unlock(&info->resv_objs[r]->lock); + + if (content_res) + ww_mutex_unlock(&content_res->lock); + + /* If we deadlock try with lock_slow and retry */ + if (err == -EDEADLK) { + content_res = info->resv_objs[content_res_idx]; + ww_mutex_lock_slow(&content_res->lock, ctx); + goto retry; + } + + /* If we are here the function failed */ + ww_acquire_fini(ctx); + return err; +} + +static void +kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info, + struct ww_acquire_ctx *ctx) +{ + unsigned int r; + + for (r = 0; r < info->dma_fence_resv_count; r++) + ww_mutex_unlock(&info->resv_objs[r]->lock); + ww_acquire_fini(ctx); +} + + + +/** + * kbase_dma_fence_queue_work() - Queue work to handle @katom + * @katom: Pointer to atom for which to queue work + * + * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and + * submit the atom. + */ +static void +kbase_dma_fence_queue_work(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + bool ret; + + INIT_WORK(&katom->work, kbase_dma_fence_work); + ret = queue_work(kctx->dma_fence.wq, &katom->work); + /* Warn if work was already queued, that should not happen. */ + WARN_ON(!ret); +} + +/** + * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom + * @katom: Katom to cancel + * + * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. + */ +static void +kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&katom->kctx->jctx.lock); + + /* Cancel callbacks and clean up. */ + kbase_fence_free_callbacks(katom); + + /* Mark the atom as handled in case all fences signaled just before + * canceling the callbacks and the worker was queued. + */ + kbase_fence_dep_count_set(katom, -1); + + /* Prevent job_done_nolock from being called twice on an atom when + * there is a race between job completion and cancellation. + */ + + if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) { + /* Wait was cancelled - zap the atom */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(katom->kctx->kbdev); + } +} + +/** + * kbase_dma_fence_work() - Worker thread called when a fence is signaled + * @pwork: work_struct containing a pointer to a katom + * + * This function will clean and mark all dependencies as satisfied + */ +static void +kbase_dma_fence_work(struct work_struct *pwork) +{ + struct kbase_jd_atom *katom; + struct kbase_jd_context *ctx; + + katom = container_of(pwork, struct kbase_jd_atom, work); + ctx = &katom->kctx->jctx; + + mutex_lock(&ctx->lock); + if (kbase_fence_dep_count_read(katom) != 0) + goto out; + + kbase_fence_dep_count_set(katom, -1); + + /* Remove atom from list of dma-fence waiting atoms. */ + kbase_dma_fence_waiters_remove(katom); + /* Cleanup callbacks. */ + kbase_fence_free_callbacks(katom); + /* + * Queue atom on GPU, unless it has already completed due to a failing + * dependency. Run jd_done_nolock() on the katom if it is completed. + */ + if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED)) + jd_done_nolock(katom, NULL); + else + kbase_jd_dep_clear_locked(katom); + +out: + mutex_unlock(&ctx->lock); +} + +static void +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb) +#else +kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +#endif +{ + struct kbase_fence_cb *kcb = container_of(cb, + struct kbase_fence_cb, + fence_cb); + struct kbase_jd_atom *katom = kcb->katom; + + /* If the atom is zapped dep_count will be forced to a negative number + * preventing this callback from ever scheduling work. Which in turn + * would reschedule the atom. + */ + + if (kbase_fence_dep_count_dec_and_test(katom)) + kbase_dma_fence_queue_work(katom); +} + +static int +kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, + struct reservation_object *resv, + bool exclusive) +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *excl_fence = NULL; + struct fence **shared_fences = NULL; +#else + struct dma_fence *excl_fence = NULL; + struct dma_fence **shared_fences = NULL; +#endif + unsigned int shared_count = 0; + int err, i; + + err = reservation_object_get_fences_rcu(resv, + &excl_fence, + &shared_count, + &shared_fences); + if (err) + return err; + + if (excl_fence) { + err = kbase_fence_add_callback(katom, + excl_fence, + kbase_dma_fence_cb); + + /* Release our reference, taken by reservation_object_get_fences_rcu(), + * to the fence. We have set up our callback (if that was possible), + * and it's the fence's owner is responsible for singling the fence + * before allowing it to disappear. + */ + dma_fence_put(excl_fence); + + if (err) + goto out; + } + + if (exclusive) { + for (i = 0; i < shared_count; i++) { + err = kbase_fence_add_callback(katom, + shared_fences[i], + kbase_dma_fence_cb); + if (err) + goto out; + } + } + + /* Release all our references to the shared fences, taken by + * reservation_object_get_fences_rcu(). We have set up our callback (if + * that was possible), and it's the fence's owner is responsible for + * signaling the fence before allowing it to disappear. + */ +out: + for (i = 0; i < shared_count; i++) + dma_fence_put(shared_fences[i]); + kfree(shared_fences); + + if (err) { + /* + * On error, cancel and clean up all callbacks that was set up + * before the error. + */ + kbase_fence_free_callbacks(katom); + } + + return err; +} + +void kbase_dma_fence_add_reservation(struct reservation_object *resv, + struct kbase_dma_fence_resv_info *info, + bool exclusive) +{ + unsigned int i; + + for (i = 0; i < info->dma_fence_resv_count; i++) { + /* Duplicate resource, ignore */ + if (info->resv_objs[i] == resv) + return; + } + + info->resv_objs[info->dma_fence_resv_count] = resv; + if (exclusive) + set_bit(info->dma_fence_resv_count, + info->dma_fence_excl_bitmap); + (info->dma_fence_resv_count)++; +} + +int kbase_dma_fence_wait(struct kbase_jd_atom *katom, + struct kbase_dma_fence_resv_info *info) +{ + int err, i; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + struct ww_acquire_ctx ww_ctx; + + lockdep_assert_held(&katom->kctx->jctx.lock); + + fence = kbase_fence_out_new(katom); + if (!fence) { + err = -ENOMEM; + dev_err(katom->kctx->kbdev->dev, + "Error %d creating fence.\n", err); + return err; + } + + kbase_fence_dep_count_set(katom, 1); + + err = kbase_dma_fence_lock_reservations(info, &ww_ctx); + if (err) { + dev_err(katom->kctx->kbdev->dev, + "Error %d locking reservations.\n", err); + kbase_fence_dep_count_set(katom, -1); + kbase_fence_out_remove(katom); + return err; + } + + for (i = 0; i < info->dma_fence_resv_count; i++) { + struct reservation_object *obj = info->resv_objs[i]; + + if (!test_bit(i, info->dma_fence_excl_bitmap)) { + err = reservation_object_reserve_shared(obj); + if (err) { + dev_err(katom->kctx->kbdev->dev, + "Error %d reserving space for shared fence.\n", err); + goto end; + } + + err = kbase_dma_fence_add_reservation_callback(katom, obj, false); + if (err) { + dev_err(katom->kctx->kbdev->dev, + "Error %d adding reservation to callback.\n", err); + goto end; + } + + reservation_object_add_shared_fence(obj, fence); + } else { + err = kbase_dma_fence_add_reservation_callback(katom, obj, true); + if (err) { + dev_err(katom->kctx->kbdev->dev, + "Error %d adding reservation to callback.\n", err); + goto end; + } + + reservation_object_add_excl_fence(obj, fence); + } + } + +end: + kbase_dma_fence_unlock_reservations(info, &ww_ctx); + + if (likely(!err)) { + /* Test if the callbacks are already triggered */ + if (kbase_fence_dep_count_dec_and_test(katom)) { + kbase_fence_dep_count_set(katom, -1); + kbase_fence_free_callbacks(katom); + } else { + /* Add katom to the list of dma-buf fence waiting atoms + * only if it is still waiting. + */ + kbase_dma_fence_waiters_add(katom); + } + } else { + /* There was an error, cancel callbacks, set dep_count to -1 to + * indicate that the atom has been handled (the caller will + * kill it for us), signal the fence, free callbacks and the + * fence. + */ + kbase_fence_free_callbacks(katom); + kbase_fence_dep_count_set(katom, -1); + kbase_dma_fence_signal(katom); + } + + return err; +} + +void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx) +{ + struct list_head *list = &kctx->dma_fence.waiting_resource; + + while (!list_empty(list)) { + struct kbase_jd_atom *katom; + + katom = list_first_entry(list, struct kbase_jd_atom, queue); + kbase_dma_fence_waiters_remove(katom); + kbase_dma_fence_cancel_atom(katom); + } +} + +void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom) +{ + /* Cancel callbacks and clean up. */ + if (kbase_fence_free_callbacks(katom)) + kbase_dma_fence_queue_work(katom); +} + +void kbase_dma_fence_signal(struct kbase_jd_atom *katom) +{ + if (!katom->dma_fence.fence) + return; + + /* Signal the atom's fence. */ + dma_fence_signal(katom->dma_fence.fence); + + kbase_fence_out_remove(katom); + + kbase_fence_free_callbacks(katom); +} + +void kbase_dma_fence_term(struct kbase_context *kctx) +{ + destroy_workqueue(kctx->dma_fence.wq); + kctx->dma_fence.wq = NULL; +} + +int kbase_dma_fence_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource); + + kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d", + WQ_UNBOUND, 1, kctx->pid); + if (!kctx->dma_fence.wq) + return -ENOMEM; + + return 0; +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_dma_fence.h b/drivers/gpu/arm/b_r26p0/mali_kbase_dma_fence.h new file mode 100644 index 000000000000..2a4d6fcfaaaf --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_dma_fence.h @@ -0,0 +1,136 @@ +/* + * + * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DMA_FENCE_H_ +#define _KBASE_DMA_FENCE_H_ + +#ifdef CONFIG_MALI_DMA_FENCE + +#include +#include +#include + + +/* Forward declaration from mali_kbase_defs.h */ +struct kbase_jd_atom; +struct kbase_context; + +/** + * struct kbase_dma_fence_resv_info - Structure with list of reservation objects + * @resv_objs: Array of reservation objects to attach the + * new fence to. + * @dma_fence_resv_count: Number of reservation objects in the array. + * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive. + * + * This is used by some functions to pass around a collection of data about + * reservation objects. + */ +struct kbase_dma_fence_resv_info { + struct reservation_object **resv_objs; + unsigned int dma_fence_resv_count; + unsigned long *dma_fence_excl_bitmap; +}; + +/** + * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs + * @resv: Reservation object to add to the array. + * @info: Pointer to struct with current reservation info + * @exclusive: Boolean indicating if exclusive access is needed + * + * The function adds a new reservation_object to an existing array of + * reservation_objects. At the same time keeps track of which objects require + * exclusive access in dma_fence_excl_bitmap. + */ +void kbase_dma_fence_add_reservation(struct reservation_object *resv, + struct kbase_dma_fence_resv_info *info, + bool exclusive); + +/** + * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs + * @katom: Katom with the external dependency. + * @info: Pointer to struct with current reservation info + * + * Return: An error code or 0 if succeeds + */ +int kbase_dma_fence_wait(struct kbase_jd_atom *katom, + struct kbase_dma_fence_resv_info *info); + +/** + * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx + * @kctx: Pointer to kbase context + * + * This function will cancel and clean up all katoms on @kctx that is waiting + * on dma-buf fences. + * + * Locking: jctx.lock needs to be held when calling this function. + */ +void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx); + +/** + * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom + * @katom: Pointer to katom whose callbacks are to be canceled + * + * This function cancels all dma-buf fence callbacks on @katom, but does not + * cancel the katom itself. + * + * The caller is responsible for ensuring that jd_done_nolock is called on + * @katom. + * + * Locking: jctx.lock must be held when calling this function. + */ +void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom); + +/** + * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait + * @katom: Pointer to katom to signal and clean up + * + * This function will signal the @katom's fence, if it has one, and clean up + * the callback data from the katom's wait on earlier fences. + * + * Locking: jctx.lock must be held while calling this function. + */ +void kbase_dma_fence_signal(struct kbase_jd_atom *katom); + +/** + * kbase_dma_fence_term() - Terminate Mali dma-fence context + * @kctx: kbase context to terminate + */ +void kbase_dma_fence_term(struct kbase_context *kctx); + +/** + * kbase_dma_fence_init() - Initialize Mali dma-fence context + * @kctx: kbase context to initialize + */ +int kbase_dma_fence_init(struct kbase_context *kctx); + + +#else /* CONFIG_MALI_DMA_FENCE */ +/* Dummy functions for when dma-buf fence isn't enabled. */ + +static inline int kbase_dma_fence_init(struct kbase_context *kctx) +{ + return 0; +} + +static inline void kbase_dma_fence_term(struct kbase_context *kctx) {} +#endif /* CONFIG_MALI_DMA_FENCE */ +#endif diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_dummy_job_wa.c b/drivers/gpu/arm/b_r26p0/mali_kbase_dummy_job_wa.c new file mode 100644 index 000000000000..188e53bf1abe --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_dummy_job_wa.c @@ -0,0 +1,442 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Implementation of the dummy job execution workaround for the GPU hang issue. + */ + +#include +#include +#include + +#include +#include + +#define DUMMY_JOB_WA_BINARY_NAME "valhall-1691526.wa" + +struct wa_header { + u16 signature; + u16 version; + u32 info_offset; +} __packed; + +struct wa_v2_info { + u64 jc; + u32 js; + u32 blob_offset; + u64 flags; +} __packed; + +struct wa_blob { + u64 base; + u32 size; + u32 map_flags; + u32 payload_offset; + u32 blob_offset; +} __packed; + +static bool in_range(const u8 *base, const u8 *end, off_t off, size_t sz) +{ + return !(end - base - off < sz); +} + +static u32 wait_any(struct kbase_device *kbdev, off_t offset, u32 bits) +{ + int loop; + const int timeout = 100; + u32 val; + + for (loop = 0; loop < timeout; loop++) { + val = kbase_reg_read(kbdev, offset); + if (val & bits) + break; + udelay(10); + } + + if (loop == timeout) { + dev_err(kbdev->dev, + "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n", + (unsigned long)offset, (unsigned long)bits, + (unsigned long)val); + } + + return (val & bits); +} + +static int wait(struct kbase_device *kbdev, off_t offset, u32 bits, bool set) +{ + int loop; + const int timeout = 100; + u32 val; + u32 target = 0; + + if (set) + target = bits; + + for (loop = 0; loop < timeout; loop++) { + val = kbase_reg_read(kbdev, (offset)); + if ((val & bits) == target) + break; + + udelay(10); + } + + if (loop == timeout) { + dev_err(kbdev->dev, + "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n", + (unsigned long)offset, (unsigned long)bits, + (unsigned long)val); + return -ETIMEDOUT; + } + + return 0; +} + +static inline int run_job(struct kbase_device *kbdev, int as, int slot, + u64 cores, u64 jc) +{ + u32 done; + + /* setup job */ + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_LO), + jc & U32_MAX); + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_HI), + jc >> 32); + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_LO), + cores & U32_MAX); + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_HI), + cores >> 32); + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_CONFIG_NEXT), + JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK | as); + + /* go */ + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_COMMAND_NEXT), + JS_COMMAND_START); + + /* wait for the slot to finish (done, error) */ + done = wait_any(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), + (1ul << (16+slot)) | (1ul << slot)); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), done); + + if (done != (1ul << slot)) { + dev_err(kbdev->dev, + "Failed to run WA job on slot %d cores 0x%llx: done 0x%lx\n", + slot, (unsigned long long)cores, + (unsigned long)done); + dev_err(kbdev->dev, "JS_STATUS on failure: 0x%x\n", + kbase_reg_read(kbdev, JOB_SLOT_REG(slot, JS_STATUS))); + + return -EFAULT; + } else { + return 0; + } +} + +/* To be called after power up & MMU init, but before everything else */ +int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores) +{ + int as; + int slot; + u64 jc; + int failed = 0; + int runs = 0; + u32 old_gpu_mask; + u32 old_job_mask; + + if (!kbdev) + return -EFAULT; + + if (!kbdev->dummy_job_wa.ctx) + return -EFAULT; + + as = kbdev->dummy_job_wa.ctx->as_nr; + slot = kbdev->dummy_job_wa.slot; + jc = kbdev->dummy_job_wa.jc; + + /* mask off all but MMU IRQs */ + old_gpu_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + old_job_mask = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); + + /* power up requested cores */ + kbase_reg_write(kbdev, SHADER_PWRON_LO, (cores & U32_MAX)); + kbase_reg_write(kbdev, SHADER_PWRON_HI, (cores >> 32)); + + if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP) { + /* wait for power-ups */ + wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), true); + if (cores >> 32) + wait(kbdev, SHADER_READY_HI, (cores >> 32), true); + } + + if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) { + int i; + + /* do for each requested core */ + for (i = 0; i < sizeof(cores) * 8; i++) { + u64 affinity; + + affinity = 1ull << i; + + if (!(cores & affinity)) + continue; + + if (run_job(kbdev, as, slot, affinity, jc)) + failed++; + runs++; + } + + } else { + if (run_job(kbdev, as, slot, cores, jc)) + failed++; + runs++; + } + + if (kbdev->dummy_job_wa.flags & + KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { + /* power off shader cores (to reduce any dynamic leakage) */ + kbase_reg_write(kbdev, SHADER_PWROFF_LO, (cores & U32_MAX)); + kbase_reg_write(kbdev, SHADER_PWROFF_HI, (cores >> 32)); + + /* wait for power off complete */ + wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), false); + wait(kbdev, SHADER_PWRTRANS_LO, (cores & U32_MAX), false); + if (cores >> 32) { + wait(kbdev, SHADER_READY_HI, (cores >> 32), false); + wait(kbdev, SHADER_PWRTRANS_HI, (cores >> 32), false); + } + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), U32_MAX); + } + + /* restore IRQ masks */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), old_gpu_mask); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), old_job_mask); + + if (failed) + dev_err(kbdev->dev, + "WA complete with %d failures out of %d runs\n", failed, + runs); + + return failed ? -EFAULT : 0; +} + +static ssize_t show_dummy_job_wa_info(struct device * const dev, + struct device_attribute * const attr, char * const buf) +{ + struct kbase_device *const kbdev = dev_get_drvdata(dev); + int err; + + if (!kbdev || !kbdev->dummy_job_wa.ctx) + return -ENODEV; + + err = scnprintf(buf, PAGE_SIZE, "slot %u flags %llx\n", + kbdev->dummy_job_wa.slot, kbdev->dummy_job_wa.flags); + + return err; +} + +static DEVICE_ATTR(dummy_job_wa_info, 0444, show_dummy_job_wa_info, NULL); + +static bool wa_blob_load_needed(struct kbase_device *kbdev) +{ + if (of_machine_is_compatible("arm,juno")) + return false; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3485)) + return true; + + return false; +} + +int kbase_dummy_job_wa_load(struct kbase_device *kbdev) +{ + const struct firmware *firmware; + static const char wa_name[] = DUMMY_JOB_WA_BINARY_NAME; + const u32 signature = 0x4157; + const u32 version = 2; + const u8 *fw_end; + const u8 *fw; + const struct wa_header *header; + const struct wa_v2_info *v2_info; + u32 blob_offset; + int err; + struct kbase_context *kctx; + + if (!wa_blob_load_needed(kbdev)) + return 0; + + /* load the wa */ + err = request_firmware(&firmware, wa_name, kbdev->dev); + + if (err) { + dev_err(kbdev->dev, "WA blob missing. Please refer to the Arm Mali DDK Valhall Release Notes, " + "Part number DC-06002 or contact support-mali@arm.com - driver probe will be failed"); + return -ENODEV; + } + + kctx = kbase_create_context(kbdev, true, + BASE_CONTEXT_CREATE_FLAG_NONE, 0, + NULL); + + if (!kctx) { + dev_err(kbdev->dev, "Failed to create WA context\n"); + goto no_ctx; + } + + fw = firmware->data; + fw_end = fw + firmware->size; + + dev_dbg(kbdev->dev, "Loaded firmware of size %zu bytes\n", + firmware->size); + + if (!in_range(fw, fw_end, 0, sizeof(*header))) { + dev_err(kbdev->dev, "WA too small\n"); + goto bad_fw; + } + + header = (const struct wa_header *)(fw + 0); + + if (header->signature != signature) { + dev_err(kbdev->dev, "WA signature failure: 0x%lx\n", + (unsigned long)header->signature); + goto bad_fw; + } + + if (header->version != version) { + dev_err(kbdev->dev, "WA version 0x%lx not supported\n", + (unsigned long)header->version); + goto bad_fw; + } + + if (!in_range(fw, fw_end, header->info_offset, sizeof(*v2_info))) { + dev_err(kbdev->dev, "WA info offset out of bounds\n"); + goto bad_fw; + } + + v2_info = (const struct wa_v2_info *)(fw + header->info_offset); + + if (v2_info->flags & ~KBASE_DUMMY_JOB_WA_FLAGS) { + dev_err(kbdev->dev, "Unsupported WA flag(s): 0x%llx\n", + (unsigned long long)v2_info->flags); + goto bad_fw; + } + + kbdev->dummy_job_wa.slot = v2_info->js; + kbdev->dummy_job_wa.jc = v2_info->jc; + kbdev->dummy_job_wa.flags = v2_info->flags; + + blob_offset = v2_info->blob_offset; + + while (blob_offset) { + const struct wa_blob *blob; + size_t nr_pages; + u64 flags; + u64 gpu_va; + struct kbase_va_region *va_region; + + if (!in_range(fw, fw_end, blob_offset, sizeof(*blob))) { + dev_err(kbdev->dev, "Blob offset out-of-range: 0x%lx\n", + (unsigned long)blob_offset); + goto bad_fw; + } + + blob = (const struct wa_blob *)(fw + blob_offset); + if (!in_range(fw, fw_end, blob->payload_offset, blob->size)) { + dev_err(kbdev->dev, "Payload out-of-bounds\n"); + goto bad_fw; + } + + gpu_va = blob->base; + if (PAGE_ALIGN(gpu_va) != gpu_va) { + dev_err(kbdev->dev, "blob not page aligned\n"); + goto bad_fw; + } + nr_pages = PFN_UP(blob->size); + flags = blob->map_flags | BASE_MEM_FLAG_MAP_FIXED; + + va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, + 0, &flags, &gpu_va); + + if (!va_region) { + dev_err(kbdev->dev, "Failed to allocate for blob\n"); + } else { + struct kbase_vmap_struct vmap = { 0 }; + const u8 *payload; + void *dst; + + /* copy the payload, */ + payload = fw + blob->payload_offset; + + dst = kbase_vmap(kctx, + va_region->start_pfn << PAGE_SHIFT, + nr_pages << PAGE_SHIFT, &vmap); + + if (dst) { + memcpy(dst, payload, blob->size); + kbase_vunmap(kctx, &vmap); + } else { + dev_err(kbdev->dev, + "Failed to copy payload\n"); + } + + } + blob_offset = blob->blob_offset; /* follow chain */ + } + + release_firmware(firmware); + + kbasep_js_schedule_privileged_ctx(kbdev, kctx); + + kbdev->dummy_job_wa.ctx = kctx; + + err = sysfs_create_file(&kbdev->dev->kobj, + &dev_attr_dummy_job_wa_info.attr); + if (err) + dev_err(kbdev->dev, "SysFS file creation for dummy job wa failed\n"); + + return 0; + +bad_fw: + kbase_destroy_context(kctx); +no_ctx: + release_firmware(firmware); + return -EFAULT; +} + +void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) +{ + struct kbase_context *wa_ctx; + + /* Can be safely called even if the file wasn't created on probe */ + sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr); + + wa_ctx = READ_ONCE(kbdev->dummy_job_wa.ctx); + WRITE_ONCE(kbdev->dummy_job_wa.ctx, NULL); + /* make this write visible before we tear down the ctx */ + smp_mb(); + + if (wa_ctx) { + kbasep_js_release_privileged_ctx(kbdev, wa_ctx); + kbase_destroy_context(wa_ctx); + } +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_dummy_job_wa.h b/drivers/gpu/arm/b_r26p0/mali_kbase_dummy_job_wa.h new file mode 100644 index 000000000000..5bbe37df7ed6 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_dummy_job_wa.h @@ -0,0 +1,45 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DUMMY_JOB_WORKAROUND_ +#define _KBASE_DUMMY_JOB_WORKAROUND_ + +#define KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE (1ull << 0) +#define KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP (1ull << 1) +#define KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER (1ull << 2) + +#define KBASE_DUMMY_JOB_WA_FLAGS (KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE | \ + KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP | \ + KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) + + +int kbase_dummy_job_wa_load(struct kbase_device *kbdev); +void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev); +int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores); + +static inline bool kbase_dummy_job_wa_enabled(struct kbase_device *kbdev) +{ + return (kbdev->dummy_job_wa.ctx != NULL); +} + + +#endif /* _KBASE_DUMMY_JOB_WORKAROUND_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_event.c b/drivers/gpu/arm/b_r26p0/mali_kbase_event.c new file mode 100644 index 000000000000..8cfc35853ab9 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_event.c @@ -0,0 +1,322 @@ +/* + * + * (C) COPYRIGHT 2010-2016,2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#include +#include +#include +#include + +/* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#include +#endif + +static bool kbase_event_check_error(struct kbase_context *kctx, struct kbase_jd_atom *katom, struct base_jd_udata *data) +{ + pgd_t *pgd; + struct mm_struct *mm; + + memset(data->blob, 0, sizeof(data->blob)); + + if (!kctx || !katom) { + printk("kctx: 0x%p, katom: 0x%p\n", kctx, katom); + return false; + } + + if (katom->status != KBASE_JD_ATOM_STATE_COMPLETED) { + printk("Abnormal situation\n"); + printk("kctx: 0x%p, katom: 0x%p, katom->status: 0x%x\n", kctx, katom, katom->status); + return false; + } + + mm = katom->kctx->process_mm; + if (mm == NULL) { + printk("Abnormal katom\n"); + printk("katom->kctx: 0x%p, katom->kctx->tgid: %d, katom->kctx->process_mm: 0x%p\n", katom->kctx, katom->kctx->tgid, katom->kctx->process_mm); + return false; + } + pgd = pgd_offset(mm, (unsigned long)&katom->completed); + if (pgd_none(*pgd) || pgd_bad(*pgd)) { + printk("Abnormal katom\n"); + printk("katom->kctx: 0x%p, katom->kctx->tgid: %d, katom->kctx->process_mm: 0x%p, pgd: 0x%px\n", katom->kctx, katom->kctx->tgid, katom->kctx->process_mm, pgd); + return false; + } + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (katom->completed.lock.dep_map.key) { + pgd = pgd_offset(mm, (unsigned long)&katom->completed.lock.dep_map.key); + if (pgd_none(*pgd) || pgd_bad(*pgd)) { + printk("Abnormal katom 2\n"); + printk("katom->kctx: 0x%p, katom->kctx->tgid: %d, katom->kctx->process_mm: 0x%p, pgd: 0x%px\n", katom->kctx, katom->kctx->tgid, katom->kctx->process_mm, pgd); + return false; + } + } +#endif + + return true; +} /* MALI_SEC_INTEGRATION */ + +static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + struct base_jd_udata data; + struct kbase_device *kbdev; + + lockdep_assert_held(&kctx->jctx.lock); + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); + + /* MALI_SEC_INTEGRATION */ + if (kbase_event_check_error(kctx, katom, &data) == false) + return data; + + kbdev = kctx->kbdev; + data = katom->udata; + + KBASE_TLSTREAM_TL_NRET_ATOM_CTX(kbdev, katom, kctx); + KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom); + + katom->status = KBASE_JD_ATOM_STATE_UNUSED; + dev_dbg(kbdev->dev, "Atom %p status to unused\n", (void *)katom); + wake_up(&katom->completed); + + return data; +} + +int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent) +{ + struct kbase_jd_atom *atom; + + KBASE_DEBUG_ASSERT(ctx); + + mutex_lock(&ctx->event_mutex); + + if (list_empty(&ctx->event_list)) { + if (!atomic_read(&ctx->event_closed)) { + mutex_unlock(&ctx->event_mutex); + return -1; + } + + /* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */ + mutex_unlock(&ctx->event_mutex); + uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED; + memset(&uevent->udata, 0, sizeof(uevent->udata)); + dev_dbg(ctx->kbdev->dev, + "event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n", + BASE_JD_EVENT_DRV_TERMINATED); + return 0; + } + + /* normal event processing */ + atomic_dec(&ctx->event_count); + atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]); + + /* MALI_SEC_INTEGRATION */ + /* Do not delete from list if item was removed already */ + if (!(ctx->event_list.next->prev == LIST_POISON2 || ctx->event_list.next->next == LIST_POISON1)) + list_del(ctx->event_list.next); + + mutex_unlock(&ctx->event_mutex); + + dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom); + uevent->event_code = atom->event_code; + + uevent->atom_number = (atom - ctx->jctx.atoms); + + if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) + kbase_jd_free_external_resources(atom); + + mutex_lock(&ctx->jctx.lock); + uevent->udata = kbase_event_process(ctx, atom); + mutex_unlock(&ctx->jctx.lock); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_event_dequeue); + +/** + * kbase_event_process_noreport_worker - Worker for processing atoms that do not + * return an event but do have external + * resources + * @data: Work structure + */ +static void kbase_event_process_noreport_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, + work); + struct kbase_context *kctx = katom->kctx; + + if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) + kbase_jd_free_external_resources(katom); + + mutex_lock(&kctx->jctx.lock); + kbase_event_process(kctx, katom); + mutex_unlock(&kctx->jctx.lock); +} + +/** + * kbase_event_process_noreport - Process atoms that do not return an event + * @kctx: Context pointer + * @katom: Atom to be processed + * + * Atoms that do not have external resources will be processed immediately. + * Atoms that do have external resources will be processed on a workqueue, in + * order to avoid locking issues. + */ +static void kbase_event_process_noreport(struct kbase_context *kctx, + struct kbase_jd_atom *katom) +{ + if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { + INIT_WORK(&katom->work, kbase_event_process_noreport_worker); + queue_work(kctx->event_workq, &katom->work); + } else { + kbase_event_process(kctx, katom); + } +} + +/** + * kbase_event_coalesce - Move pending events to the main event list + * @kctx: Context pointer + * + * kctx->event_list and kctx->event_coalesce_count must be protected + * by a lock unless this is the last thread using them + * (and we're about to terminate the lock). + * + * Return: The number of pending events moved to the main event list + */ +static int kbase_event_coalesce(struct kbase_context *kctx) +{ + const int event_count = kctx->event_coalesce_count; + + /* Join the list of pending events onto the tail of the main list + and reset it */ + list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list); + kctx->event_coalesce_count = 0; + + /* Return the number of events moved */ + return event_count; +} + +void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) +{ + struct kbase_device *kbdev = ctx->kbdev; + + dev_dbg(kbdev->dev, "Posting event for atom %p\n", (void *)atom); + + if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { + if (atom->event_code == BASE_JD_EVENT_DONE) { + dev_dbg(kbdev->dev, "Suppressing event (atom done)\n"); + kbase_event_process_noreport(ctx, atom); + return; + } + } + + if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) { + dev_dbg(kbdev->dev, "Suppressing event (never)\n"); + kbase_event_process_noreport(ctx, atom); + return; + } + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, atom, TL_ATOM_STATE_POSTED); + if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { + /* Don't report the event until other event(s) have completed */ + dev_dbg(kbdev->dev, "Deferring event (coalesced)\n"); + mutex_lock(&ctx->event_mutex); + list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list); + ++ctx->event_coalesce_count; + mutex_unlock(&ctx->event_mutex); + } else { + /* Report the event and any pending events now */ + int event_count = 1; + + mutex_lock(&ctx->event_mutex); + event_count += kbase_event_coalesce(ctx); + list_add_tail(&atom->dep_item[0], &ctx->event_list); + atomic_add(event_count, &ctx->event_count); + mutex_unlock(&ctx->event_mutex); + dev_dbg(kbdev->dev, "Reporting %d events\n", event_count); + + kbase_event_wakeup(ctx); + + /* Post-completion latency */ + trace_sysgraph(SGR_POST, ctx->id, + kbase_jd_atom_id(ctx, atom)); + } +} +KBASE_EXPORT_TEST_API(kbase_event_post); + +void kbase_event_close(struct kbase_context *kctx) +{ + mutex_lock(&kctx->event_mutex); + atomic_set(&kctx->event_closed, true); + mutex_unlock(&kctx->event_mutex); + kbase_event_wakeup(kctx); +} + +int kbase_event_init(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx); + + INIT_LIST_HEAD(&kctx->event_list); + INIT_LIST_HEAD(&kctx->event_coalesce_list); + mutex_init(&kctx->event_mutex); + kctx->event_coalesce_count = 0; + kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); + + if (NULL == kctx->event_workq) + return -EINVAL; + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_event_init); + +void kbase_event_cleanup(struct kbase_context *kctx) +{ + int event_count; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(kctx->event_workq); + + flush_workqueue(kctx->event_workq); + destroy_workqueue(kctx->event_workq); + + /* We use kbase_event_dequeue to remove the remaining events as that + * deals with all the cleanup needed for the atoms. + * + * Note: use of kctx->event_list without a lock is safe because this must be the last + * thread using it (because we're about to terminate the lock) + */ + event_count = kbase_event_coalesce(kctx); + atomic_add(event_count, &kctx->event_count); + + while (!list_empty(&kctx->event_list)) { + struct base_jd_event_v2 event; + + kbase_event_dequeue(kctx, &event); + } +} + +KBASE_EXPORT_TEST_API(kbase_event_cleanup); diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_fence.c b/drivers/gpu/arm/b_r26p0/mali_kbase_fence.c new file mode 100644 index 000000000000..7a715b3354be --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_fence.c @@ -0,0 +1,214 @@ +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include +#include +#include + +/* Spin lock protecting all Mali fences as fence->lock. */ +static DEFINE_SPINLOCK(kbase_fence_lock); + +static const char * +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +kbase_fence_get_driver_name(struct fence *fence) +#else +kbase_fence_get_driver_name(struct dma_fence *fence) +#endif +{ + return kbase_drv_name; +} + +static const char * +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +kbase_fence_get_timeline_name(struct fence *fence) +#else +kbase_fence_get_timeline_name(struct dma_fence *fence) +#endif +{ + return kbase_timeline_name; +} + +static bool +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +kbase_fence_enable_signaling(struct fence *fence) +#else +kbase_fence_enable_signaling(struct dma_fence *fence) +#endif +{ + return true; +} + +static void +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +kbase_fence_fence_value_str(struct fence *fence, char *str, int size) +#else +kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) +#endif +{ +#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) + snprintf(str, size, "%u", fence->seqno); +#else + snprintf(str, size, "%llu", fence->seqno); +#endif +} + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +const struct fence_ops kbase_fence_ops = { + .wait = fence_default_wait, +#else +const struct dma_fence_ops kbase_fence_ops = { + .wait = dma_fence_default_wait, +#endif + .get_driver_name = kbase_fence_get_driver_name, + .get_timeline_name = kbase_fence_get_timeline_name, + .enable_signaling = kbase_fence_enable_signaling, + .fence_value_str = kbase_fence_fence_value_str +}; + + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +struct fence * +kbase_fence_out_new(struct kbase_jd_atom *katom) +#else +struct dma_fence * +kbase_fence_out_new(struct kbase_jd_atom *katom) +#endif +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + + WARN_ON(katom->dma_fence.fence); + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return NULL; + + dma_fence_init(fence, + &kbase_fence_ops, + &kbase_fence_lock, + katom->dma_fence.context, + atomic_inc_return(&katom->dma_fence.seqno)); + + katom->dma_fence.fence = fence; + + return fence; +} + +bool +kbase_fence_free_callbacks(struct kbase_jd_atom *katom) +{ + struct kbase_fence_cb *cb, *tmp; + bool res = false; + + lockdep_assert_held(&katom->kctx->jctx.lock); + + /* Clean up and free callbacks. */ + list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) { + bool ret; + + /* Cancel callbacks that hasn't been called yet. */ + ret = dma_fence_remove_callback(cb->fence, &cb->fence_cb); + if (ret) { + int ret; + + /* Fence had not signaled, clean up after + * canceling. + */ + ret = atomic_dec_return(&katom->dma_fence.dep_count); + + if (unlikely(ret == 0)) + res = true; + } + + /* + * Release the reference taken in + * kbase_fence_add_callback(). + */ + dma_fence_put(cb->fence); + list_del(&cb->node); + kfree(cb); + } + + return res; +} + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +int +kbase_fence_add_callback(struct kbase_jd_atom *katom, + struct fence *fence, + fence_func_t callback) +#else +int +kbase_fence_add_callback(struct kbase_jd_atom *katom, + struct dma_fence *fence, + dma_fence_func_t callback) +#endif +{ + int err = 0; + struct kbase_fence_cb *kbase_fence_cb; + + if (!fence) + return -EINVAL; + + kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL); + if (!kbase_fence_cb) + return -ENOMEM; + + kbase_fence_cb->fence = fence; + kbase_fence_cb->katom = katom; + INIT_LIST_HEAD(&kbase_fence_cb->node); + atomic_inc(&katom->dma_fence.dep_count); + + err = dma_fence_add_callback(fence, &kbase_fence_cb->fence_cb, + callback); + if (err == -ENOENT) { + /* Fence signaled, get the completion result */ + err = dma_fence_get_status(fence); + + /* remap success completion to err code */ + if (err == 1) + err = 0; + + kfree(kbase_fence_cb); + atomic_dec(&katom->dma_fence.dep_count); + } else if (err) { + kfree(kbase_fence_cb); + atomic_dec(&katom->dma_fence.dep_count); + } else { + /* + * Get reference to fence that will be kept until callback gets + * cleaned up in kbase_fence_free_callbacks(). + */ + dma_fence_get(fence); + /* Add callback to katom's list of callbacks */ + list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks); + } + + return err; +} + diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_fence.h b/drivers/gpu/arm/b_r26p0/mali_kbase_fence.h new file mode 100644 index 000000000000..8e7024eefa00 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_fence.h @@ -0,0 +1,282 @@ +/* + * + * (C) COPYRIGHT 2010-2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_FENCE_H_ +#define _KBASE_FENCE_H_ + +/* + * mali_kbase_fence.[hc] has common fence code used by both + * - CONFIG_MALI_DMA_FENCE - implicit DMA fences + * - CONFIG_SYNC_FILE - explicit fences beginning with 4.9 kernel + */ + +#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) + +#include +#include "mali_kbase_fence_defs.h" +#include "mali_kbase.h" + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +extern const struct fence_ops kbase_fence_ops; +#else +extern const struct dma_fence_ops kbase_fence_ops; +#endif + +/** +* struct kbase_fence_cb - Mali dma-fence callback data struct +* @fence_cb: Callback function +* @katom: Pointer to katom that is waiting on this callback +* @fence: Pointer to the fence object on which this callback is waiting +* @node: List head for linking this callback to the katom +*/ +struct kbase_fence_cb { +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence_cb fence_cb; + struct fence *fence; +#else + struct dma_fence_cb fence_cb; + struct dma_fence *fence; +#endif + struct kbase_jd_atom *katom; + struct list_head node; +}; + +/** + * kbase_fence_out_new() - Creates a new output fence and puts it on the atom + * @katom: Atom to create an output fence for + * + * return: A new fence object on success, NULL on failure. + */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom); +#else +struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom); +#endif + +#if defined(CONFIG_SYNC_FILE) +/** + * kbase_fence_fence_in_set() - Assign input fence to atom + * @katom: Atom to assign input fence to + * @fence: Input fence to assign to atom + * + * This function will take ownership of one fence reference! + */ +#define kbase_fence_fence_in_set(katom, fence) \ + do { \ + WARN_ON((katom)->dma_fence.fence_in); \ + (katom)->dma_fence.fence_in = fence; \ + } while (0) +#endif + + +/** + * kbase_fence_out_remove() - Removes the output fence from atom + * @katom: Atom to remove output fence for + * + * This will also release the reference to this fence which the atom keeps + */ +static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom) +{ + if (katom->dma_fence.fence) { + dma_fence_put(katom->dma_fence.fence); + katom->dma_fence.fence = NULL; + } +} + +#if defined(CONFIG_SYNC_FILE) +/** + * kbase_fence_out_remove() - Removes the input fence from atom + * @katom: Atom to remove input fence for + * + * This will also release the reference to this fence which the atom keeps + */ +static inline void kbase_fence_in_remove(struct kbase_jd_atom *katom) +{ + if (katom->dma_fence.fence_in) { + dma_fence_put(katom->dma_fence.fence_in); + katom->dma_fence.fence_in = NULL; + } +} +#endif + +/** + * kbase_fence_out_is_ours() - Check if atom has a valid fence created by us + * @katom: Atom to check output fence for + * + * Return: true if fence exists and is valid, otherwise false + */ +static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom) +{ + return katom->dma_fence.fence && + katom->dma_fence.fence->ops == &kbase_fence_ops; +} + +/** + * kbase_fence_out_signal() - Signal output fence of atom + * @katom: Atom to signal output fence for + * @status: Status to signal with (0 for success, < 0 for error) + * + * Return: 0 on success, < 0 on error + */ +static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, + int status) +{ + if (status) { +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) + fence_set_error(katom->dma_fence.fence, status); +#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) + dma_fence_set_error(katom->dma_fence.fence, status); +#else + katom->dma_fence.fence->status = status; +#endif + } + return dma_fence_signal(katom->dma_fence.fence); +} + +/** + * kbase_fence_add_callback() - Add callback on @fence to block @katom + * @katom: Pointer to katom that will be blocked by @fence + * @fence: Pointer to fence on which to set up the callback + * @callback: Pointer to function to be called when fence is signaled + * + * Caller needs to hold a reference to @fence when calling this function, and + * the caller is responsible for releasing that reference. An additional + * reference to @fence will be taken when the callback was successfully set up + * and @fence needs to be kept valid until the callback has been called and + * cleanup have been done. + * + * Return: 0 on success: fence was either already signaled, or callback was + * set up. Negative error code is returned on error. + */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +int kbase_fence_add_callback(struct kbase_jd_atom *katom, + struct fence *fence, + fence_func_t callback); +#else +int kbase_fence_add_callback(struct kbase_jd_atom *katom, + struct dma_fence *fence, + dma_fence_func_t callback); +#endif + +/** + * kbase_fence_dep_count_set() - Set dep_count value on atom to specified value + * @katom: Atom to set dep_count for + * @val: value to set dep_count to + * + * The dep_count is available to the users of this module so that they can + * synchronize completion of the wait with cancellation and adding of more + * callbacks. For instance, a user could do the following: + * + * dep_count set to 1 + * callback #1 added, dep_count is increased to 2 + * callback #1 happens, dep_count decremented to 1 + * since dep_count > 0, no completion is done + * callback #2 is added, dep_count is increased to 2 + * dep_count decremented to 1 + * callback #2 happens, dep_count decremented to 0 + * since dep_count now is zero, completion executes + * + * The dep_count can also be used to make sure that the completion only + * executes once. This is typically done by setting dep_count to -1 for the + * thread that takes on this responsibility. + */ +static inline void +kbase_fence_dep_count_set(struct kbase_jd_atom *katom, int val) +{ + atomic_set(&katom->dma_fence.dep_count, val); +} + +/** + * kbase_fence_dep_count_dec_and_test() - Decrements dep_count + * @katom: Atom to decrement dep_count for + * + * See @kbase_fence_dep_count_set for general description about dep_count + * + * Return: true if value was decremented to zero, otherwise false + */ +static inline bool +kbase_fence_dep_count_dec_and_test(struct kbase_jd_atom *katom) +{ + return atomic_dec_and_test(&katom->dma_fence.dep_count); +} + +/** + * kbase_fence_dep_count_read() - Returns the current dep_count value + * @katom: Pointer to katom + * + * See @kbase_fence_dep_count_set for general description about dep_count + * + * Return: The current dep_count value + */ +static inline int kbase_fence_dep_count_read(struct kbase_jd_atom *katom) +{ + return atomic_read(&katom->dma_fence.dep_count); +} + +/** + * kbase_fence_free_callbacks() - Free dma-fence callbacks on a katom + * @katom: Pointer to katom + * + * This function will free all fence callbacks on the katom's list of + * callbacks. Callbacks that have not yet been called, because their fence + * hasn't yet signaled, will first be removed from the fence. + * + * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. + * + * Return: true if dep_count reached 0, otherwise false. + */ +bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom); + +#if defined(CONFIG_SYNC_FILE) +/** + * kbase_fence_in_get() - Retrieve input fence for atom. + * @katom: Atom to get input fence from + * + * A ref will be taken for the fence, so use @kbase_fence_put() to release it + * + * Return: The fence, or NULL if there is no input fence for atom + */ +#define kbase_fence_in_get(katom) dma_fence_get((katom)->dma_fence.fence_in) +#endif + +/** + * kbase_fence_out_get() - Retrieve output fence for atom. + * @katom: Atom to get output fence from + * + * A ref will be taken for the fence, so use @kbase_fence_put() to release it + * + * Return: The fence, or NULL if there is no output fence for atom + */ +#define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence) + + +/** + * kbase_fence_put() - Releases a reference to a fence + * @fence: Fence to release reference for. + */ +#define kbase_fence_put(fence) dma_fence_put(fence) + + +#endif /* CONFIG_MALI_DMA_FENCE || defined(CONFIG_SYNC_FILE */ + +#endif /* _KBASE_FENCE_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_fence_defs.h b/drivers/gpu/arm/b_r26p0/mali_kbase_fence_defs.h new file mode 100644 index 000000000000..7a75e12df2a1 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_fence_defs.h @@ -0,0 +1,71 @@ +/* + * + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_FENCE_DEFS_H_ +#define _KBASE_FENCE_DEFS_H_ + +/* + * There was a big rename in the 4.10 kernel (fence* -> dma_fence*) + * This file hides the compatibility issues with this for the rest the driver + */ + +#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) + +#include + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + +#include + +#define dma_fence_context_alloc(a) fence_context_alloc(a) +#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e) +#define dma_fence_get(a) fence_get(a) +#define dma_fence_put(a) fence_put(a) +#define dma_fence_signal(a) fence_signal(a) +#define dma_fence_is_signaled(a) fence_is_signaled(a) +#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c) +#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b) + +/* MALI_SEC_INTEGRATION */ +/* [HACK] Should check status in LT(4.9) otherwise fence timeout occur frequently */ +//#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) +#if (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) +#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0) +#else +#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0) +#endif + +#else + +#include + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)) +#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \ + (a)->status ?: 1 \ + : 0) +#endif + +#endif /* < 4.10.0 */ + +#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */ + +#endif /* _KBASE_FENCE_DEFS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_gator.h b/drivers/gpu/arm/b_r26p0/mali_kbase_gator.h new file mode 100644 index 000000000000..6428f08071e3 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_gator.h @@ -0,0 +1,53 @@ +/* + * + * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* NB taken from gator */ +/* + * List of possible actions to be controlled by DS-5 Streamline. + * The following numbers are used by gator to control the frame buffer dumping + * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because + * they are unknown inside gator. + */ + +#ifndef _KBASE_GATOR_H_ +#define _KBASE_GATOR_H_ + +#include + +#define GATOR_JOB_SLOT_START 1 +#define GATOR_JOB_SLOT_STOP 2 +#define GATOR_JOB_SLOT_SOFT_STOPPED 3 + +#ifdef CONFIG_MALI_GATOR_SUPPORT + +#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16)) + +struct kbase_context; + +void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id); +void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value); +void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value); +void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long int event); + +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + +#endif /* _KBASE_GATOR_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/b_r26p0/mali_kbase_gpu_memory_debugfs.c new file mode 100644 index 000000000000..3044e6773b82 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_gpu_memory_debugfs.c @@ -0,0 +1,110 @@ +/* + * + * (C) COPYRIGHT 2012-2017, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include + +/* MALI_SEC_INTEGRATION */ +#include + +#ifdef CONFIG_DEBUG_FS +/** Show callback for the @c gpu_memory debugfs file. + * + * This function is called to get the contents of the @c gpu_memory debugfs + * file. This is a report of current gpu memory usage. + * + * @param sfile The debugfs entry + * @param data Data associated with the entry + * + * @return 0 if successfully prints data in debugfs entry file + * -1 if it encountered an error + */ + +static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) +{ + struct list_head *entry; + const struct list_head *kbdev_list; + + kbdev_list = kbase_device_get_list(); + list_for_each(entry, kbdev_list) { + struct kbase_device *kbdev = NULL; + struct kbase_context *kctx; + + kbdev = list_entry(entry, struct kbase_device, entry); + /* output the total memory usage and cap for this device */ + seq_printf(sfile, "%-16s %10u\n", + kbdev->devname, + atomic_read(&(kbdev->memdev.used_pages))); + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + /* output the memory usage and cap for each kctx + * opened on this device */ + seq_printf(sfile, " %s-0x%p %10u | tgid=%10d | pid=%10d | name=%20s\n", + "kctx", + kctx, + atomic_read(&(kctx->used_pages)), + kctx->tgid, + kctx->pid, + kctx->name); + } + mutex_unlock(&kbdev->kctx_list_lock); + } + kbase_device_put_list(kbdev_list); + return 0; +} + +/* + * File operations related to debugfs entry for gpu_memory + */ +static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_gpu_memory_seq_show, NULL); +} + +static const struct file_operations kbasep_gpu_memory_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbasep_gpu_memory_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * Initialize debugfs entry for gpu_memory + */ +void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_file("gpu_memory", S_IRUGO, + kbdev->mali_debugfs_directory, NULL, + &kbasep_gpu_memory_debugfs_fops); + return; +} + +#else +/* + * Stub functions for when debugfs is disabled + */ +void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) +{ + return; +} +#endif diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/b_r26p0/mali_kbase_gpu_memory_debugfs.h new file mode 100644 index 000000000000..28a871a0da4f --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_gpu_memory_debugfs.h @@ -0,0 +1,42 @@ +/* + * + * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_gpu_memory_debugfs.h + * Header file for gpu_memory entry in debugfs + * + */ + +#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H +#define _KBASE_GPU_MEMORY_DEBUGFS_H + +#include +#include + +/** + * @brief Initialize gpu_memory debugfs entry + */ +void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev); + +#endif /*_KBASE_GPU_MEMORY_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_gpuprops.c b/drivers/gpu/arm/b_r26p0/mali_kbase_gpuprops.c new file mode 100644 index 000000000000..ae2458f497da --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_gpuprops.c @@ -0,0 +1,636 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Base kernel property query APIs + */ + +#include +#include +#include +#include +#include +#include "mali_kbase_ioctl.h" +#include +#include +#include +#include + + +static void kbase_gpuprops_construct_coherent_groups( + struct base_gpu_props * const props) +{ + struct mali_base_gpu_coherent_group *current_group; + u64 group_present; + u64 group_mask; + u64 first_set, first_set_prev; + u32 num_groups = 0; + + KBASE_DEBUG_ASSERT(NULL != props); + + props->coherency_info.coherency = props->raw_props.mem_features; + props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present); + + if (props->coherency_info.coherency & GROUPS_L2_COHERENT) { + /* Group is l2 coherent */ + group_present = props->raw_props.l2_present; + } else { + /* Group is l1 coherent */ + group_present = props->raw_props.shader_present; + } + + /* + * The coherent group mask can be computed from the l2 present + * register. + * + * For the coherent group n: + * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1) + * where first_set is group_present with only its nth set-bit kept + * (i.e. the position from where a new group starts). + * + * For instance if the groups are l2 coherent and l2_present=0x0..01111: + * The first mask is: + * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1) + * = (0x0..010 - 1) & ~(0x0..01 - 1) + * = 0x0..00f + * The second mask is: + * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1) + * = (0x0..100 - 1) & ~(0x0..010 - 1) + * = 0x0..0f0 + * And so on until all the bits from group_present have been cleared + * (i.e. there is no group left). + */ + + current_group = props->coherency_info.group; + first_set = group_present & ~(group_present - 1); + + while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) { + group_present -= first_set; /* Clear the current group bit */ + first_set_prev = first_set; + + first_set = group_present & ~(group_present - 1); + group_mask = (first_set - 1) & ~(first_set_prev - 1); + + /* Populate the coherent_group structure for each group */ + current_group->core_mask = group_mask & props->raw_props.shader_present; + current_group->num_cores = hweight64(current_group->core_mask); + + num_groups++; + current_group++; + } + + if (group_present != 0) + pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS); + + props->coherency_info.num_groups = num_groups; +} + +/** + * kbase_gpuprops_get_props - Get the GPU configuration + * @gpu_props: The &struct base_gpu_props structure + * @kbdev: The &struct kbase_device structure for the device + * + * Fill the &struct base_gpu_props structure with values from the GPU + * configuration registers. Only the raw properties are filled in this function. + * + * Return: Zero on success, Linux error code on failure + */ +static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props, + struct kbase_device *kbdev) +{ + struct kbase_gpuprops_regdump regdump; + int i; + int err; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(NULL != gpu_props); + + /* Dump relevant registers */ + err = kbase_backend_gpuprops_get(kbdev, ®dump); + if (err) + return err; + + gpu_props->raw_props.gpu_id = regdump.gpu_id; + gpu_props->raw_props.tiler_features = regdump.tiler_features; + gpu_props->raw_props.mem_features = regdump.mem_features; + gpu_props->raw_props.mmu_features = regdump.mmu_features; + gpu_props->raw_props.l2_features = regdump.l2_features; + gpu_props->raw_props.core_features = regdump.core_features; + + gpu_props->raw_props.as_present = regdump.as_present; + gpu_props->raw_props.js_present = regdump.js_present; + gpu_props->raw_props.shader_present = + ((u64) regdump.shader_present_hi << 32) + + regdump.shader_present_lo; + gpu_props->raw_props.tiler_present = + ((u64) regdump.tiler_present_hi << 32) + + regdump.tiler_present_lo; + gpu_props->raw_props.l2_present = + ((u64) regdump.l2_present_hi << 32) + + regdump.l2_present_lo; + gpu_props->raw_props.stack_present = + ((u64) regdump.stack_present_hi << 32) + + regdump.stack_present_lo; + + for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) + gpu_props->raw_props.js_features[i] = regdump.js_features[i]; + + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + gpu_props->raw_props.texture_features[i] = regdump.texture_features[i]; + + gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size; + gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads; + gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size; + gpu_props->raw_props.thread_features = regdump.thread_features; + gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc; + + return 0; +} + +void kbase_gpuprops_update_core_props_gpu_id( + struct base_gpu_props * const gpu_props) +{ + gpu_props->core_props.version_status = + KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4); + gpu_props->core_props.minor_revision = + KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8); + gpu_props->core_props.major_revision = + KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4); + gpu_props->core_props.product_id = + KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16); +} + +/** + * kbase_gpuprops_calculate_props - Calculate the derived properties + * @gpu_props: The &struct base_gpu_props structure + * @kbdev: The &struct kbase_device structure for the device + * + * Fill the &struct base_gpu_props structure with values derived from the GPU + * configuration registers + */ +static void kbase_gpuprops_calculate_props( + struct base_gpu_props * const gpu_props, struct kbase_device *kbdev) +{ + int i; + u32 gpu_id; + u32 product_id; + + /* Populate the base_gpu_props structure */ + kbase_gpuprops_update_core_props_gpu_id(gpu_props); + gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2; +#if KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE + gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT; +#else + gpu_props->core_props.gpu_available_memory_size = + totalram_pages() << PAGE_SHIFT; +#endif + + gpu_props->core_props.num_exec_engines = + KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4); + + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i]; + + gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8); + gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); + + /* Field with number of l2 slices is added to MEM_FEATURES register + * since t76x. Below code assumes that for older GPU reserved bits will + * be read as zero. */ + gpu_props->l2_props.num_l2_slices = + KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1; + + gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6); + gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4); + + if (gpu_props->raw_props.thread_max_threads == 0) + gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT; + else + gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads; + + if (gpu_props->raw_props.thread_max_workgroup_size == 0) + gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT; + else + gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size; + + if (gpu_props->raw_props.thread_max_barrier_size == 0) + gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT; + else + gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size; + + if (gpu_props->raw_props.thread_tls_alloc == 0) + gpu_props->thread_props.tls_alloc = + gpu_props->thread_props.max_threads; + else + gpu_props->thread_props.tls_alloc = + gpu_props->raw_props.thread_tls_alloc; + + /* MIDHARC-2364 was intended for tULx. + * Workaround for the incorrectly applied THREAD_FEATURES to tDUx. + */ + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; + product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == GPU_ID2_PRODUCT_TDUX) { + gpu_props->thread_props.max_registers = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 0U, 22); + gpu_props->thread_props.impl_tech = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 22U, 2); + gpu_props->thread_props.max_task_queue = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 24U, 8); + gpu_props->thread_props.max_thread_group_split = 0; + } else { + gpu_props->thread_props.max_registers = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 0U, 16); + gpu_props->thread_props.max_task_queue = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 16U, 8); + gpu_props->thread_props.max_thread_group_split = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 24U, 6); + gpu_props->thread_props.impl_tech = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 30U, 2); + } + + /* If values are not specified, then use defaults */ + if (gpu_props->thread_props.max_registers == 0) { + gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT; + gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT; + gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; + } + /* Initialize the coherent_group structure for each group */ + kbase_gpuprops_construct_coherent_groups(gpu_props); +} + +void kbase_gpuprops_set(struct kbase_device *kbdev) +{ + struct kbase_gpu_props *gpu_props; + struct gpu_raw_gpu_props *raw; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + gpu_props = &kbdev->gpu_props; + raw = &gpu_props->props.raw_props; + + /* Initialize the base_gpu_props structure from the hardware */ + kbase_gpuprops_get_props(&gpu_props->props, kbdev); + + /* Populate the derived properties */ + kbase_gpuprops_calculate_props(&gpu_props->props, kbdev); + + /* Populate kbase-only fields */ + gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8); + gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8); + + gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1); + + gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8); + gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8); + + gpu_props->num_cores = hweight64(raw->shader_present); + gpu_props->num_core_groups = hweight64(raw->l2_present); + gpu_props->num_address_spaces = hweight32(raw->as_present); + gpu_props->num_job_slots = hweight32(raw->js_present); +} + +int kbase_gpuprops_set_features(struct kbase_device *kbdev) +{ + struct base_gpu_props *gpu_props; + struct kbase_gpuprops_regdump regdump; + int err; + + gpu_props = &kbdev->gpu_props.props; + + /* Dump relevant registers */ + err = kbase_backend_gpuprops_get_features(kbdev, ®dump); + if (err) + return err; + + /* + * Copy the raw value from the register, later this will get turned + * into the selected coherency mode. + * Additionally, add non-coherent mode, as this is always supported. + */ + gpu_props->raw_props.coherency_mode = regdump.coherency_features | + COHERENCY_FEATURE_BIT(COHERENCY_NONE); + + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT)) + gpu_props->thread_props.max_thread_group_split = 0; + + return err; +} + +/* + * Module parameters to allow the L2 size and hash configuration to be + * overridden. + * + * These parameters must be set on insmod to take effect, and are not visible + * in sysfs. + */ +static u8 override_l2_size; +module_param(override_l2_size, byte, 0); +MODULE_PARM_DESC(override_l2_size, "Override L2 size config for testing"); + +static u8 override_l2_hash; +module_param(override_l2_hash, byte, 0); +MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing"); + +/** + * kbase_read_l2_config_from_dt - Read L2 configuration + * @kbdev: The kbase device for which to get the L2 configuration. + * + * Check for L2 configuration overrides in module parameters and device tree. + * Override values in module parameters take priority over override values in + * device tree. + * + * Return: true if either size or hash was overridden, false if no overrides + * were found. + */ +static bool kbase_read_l2_config_from_dt(struct kbase_device * const kbdev) +{ + struct device_node *np = kbdev->dev->of_node; + + if (!np) + return false; + + if (override_l2_size) + kbdev->l2_size_override = override_l2_size; + else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override)) + kbdev->l2_size_override = 0; + + if (override_l2_hash) + kbdev->l2_hash_override = override_l2_hash; + else if (of_property_read_u8(np, "l2-hash", &kbdev->l2_hash_override)) + kbdev->l2_hash_override = 0; + + if (kbdev->l2_size_override || kbdev->l2_hash_override) + return true; + + return false; +} + +int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) +{ + int err = 0; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { + struct kbase_gpuprops_regdump regdump; + struct base_gpu_props *gpu_props = &kbdev->gpu_props.props; + + /* Check for L2 cache size & hash overrides */ + if (!kbase_read_l2_config_from_dt(kbdev)) + return 0; + + /* Need L2 to get powered to reflect to L2_FEATURES */ + kbase_pm_context_active(kbdev); + + /* Wait for the completion of L2 power transition */ + kbase_pm_wait_for_l2_powered(kbdev); + + /* Dump L2_FEATURES register */ + err = kbase_backend_gpuprops_get_l2_features(kbdev, ®dump); + if (err) + goto idle_gpu; + + dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n", + regdump.l2_features); + + /* Update gpuprops with reflected L2_FEATURES */ + gpu_props->raw_props.l2_features = regdump.l2_features; + gpu_props->l2_props.log2_cache_size = + KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); + +idle_gpu: + /* Let GPU idle */ + kbase_pm_context_idle(kbdev); + } + + return err; +} + +static struct { + u32 type; + size_t offset; + int size; +} gpu_property_mapping[] = { +#define PROP(name, member) \ + {KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \ + sizeof(((struct base_gpu_props *)0)->member)} + PROP(PRODUCT_ID, core_props.product_id), + PROP(VERSION_STATUS, core_props.version_status), + PROP(MINOR_REVISION, core_props.minor_revision), + PROP(MAJOR_REVISION, core_props.major_revision), + PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max), + PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size), + PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]), + PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]), + PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]), + PROP(TEXTURE_FEATURES_3, core_props.texture_features[3]), + PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size), + PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines), + + PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size), + PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size), + PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices), + + PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes), + PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels), + + PROP(MAX_THREADS, thread_props.max_threads), + PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size), + PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size), + PROP(MAX_REGISTERS, thread_props.max_registers), + PROP(MAX_TASK_QUEUE, thread_props.max_task_queue), + PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split), + PROP(IMPL_TECH, thread_props.impl_tech), + PROP(TLS_ALLOC, thread_props.tls_alloc), + + PROP(RAW_SHADER_PRESENT, raw_props.shader_present), + PROP(RAW_TILER_PRESENT, raw_props.tiler_present), + PROP(RAW_L2_PRESENT, raw_props.l2_present), + PROP(RAW_STACK_PRESENT, raw_props.stack_present), + PROP(RAW_L2_FEATURES, raw_props.l2_features), + PROP(RAW_CORE_FEATURES, raw_props.core_features), + PROP(RAW_MEM_FEATURES, raw_props.mem_features), + PROP(RAW_MMU_FEATURES, raw_props.mmu_features), + PROP(RAW_AS_PRESENT, raw_props.as_present), + PROP(RAW_JS_PRESENT, raw_props.js_present), + PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]), + PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]), + PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]), + PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]), + PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]), + PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]), + PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]), + PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]), + PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]), + PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]), + PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]), + PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]), + PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]), + PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]), + PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]), + PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]), + PROP(RAW_TILER_FEATURES, raw_props.tiler_features), + PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]), + PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]), + PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]), + PROP(RAW_TEXTURE_FEATURES_3, raw_props.texture_features[3]), + PROP(RAW_GPU_ID, raw_props.gpu_id), + PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads), + PROP(RAW_THREAD_MAX_WORKGROUP_SIZE, + raw_props.thread_max_workgroup_size), + PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size), + PROP(RAW_THREAD_FEATURES, raw_props.thread_features), + PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc), + PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode), + + PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups), + PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups), + PROP(COHERENCY_COHERENCY, coherency_info.coherency), + PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask), + PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask), + PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask), + PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask), + PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask), + PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask), + PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask), + PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask), + PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask), + PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask), + PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask), + PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask), + PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask), + PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask), + PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask), + PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask), + +#undef PROP +}; + +int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev) +{ + struct kbase_gpu_props *kprops = &kbdev->gpu_props; + struct base_gpu_props *props = &kprops->props; + u32 count = ARRAY_SIZE(gpu_property_mapping); + u32 i; + u32 size = 0; + u8 *p; + + for (i = 0; i < count; i++) { + /* 4 bytes for the ID, and the size of the property */ + size += 4 + gpu_property_mapping[i].size; + } + + kprops->prop_buffer_size = size; + kprops->prop_buffer = kmalloc(size, GFP_KERNEL); + + if (!kprops->prop_buffer) { + kprops->prop_buffer_size = 0; + return -ENOMEM; + } + + p = kprops->prop_buffer; + +#define WRITE_U8(v) (*p++ = (v) & 0xFF) +#define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0) +#define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0) +#define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0) + + for (i = 0; i < count; i++) { + u32 type = gpu_property_mapping[i].type; + u8 type_size; + void *field = ((u8 *)props) + gpu_property_mapping[i].offset; + + switch (gpu_property_mapping[i].size) { + case 1: + type_size = KBASE_GPUPROP_VALUE_SIZE_U8; + break; + case 2: + type_size = KBASE_GPUPROP_VALUE_SIZE_U16; + break; + case 4: + type_size = KBASE_GPUPROP_VALUE_SIZE_U32; + break; + case 8: + type_size = KBASE_GPUPROP_VALUE_SIZE_U64; + break; + default: + dev_err(kbdev->dev, + "Invalid gpu_property_mapping type=%d size=%d", + type, gpu_property_mapping[i].size); + return -EINVAL; + } + + WRITE_U32((type<<2) | type_size); + + switch (type_size) { + case KBASE_GPUPROP_VALUE_SIZE_U8: + WRITE_U8(*((u8 *)field)); + break; + case KBASE_GPUPROP_VALUE_SIZE_U16: + WRITE_U16(*((u16 *)field)); + break; + case KBASE_GPUPROP_VALUE_SIZE_U32: + WRITE_U32(*((u32 *)field)); + break; + case KBASE_GPUPROP_VALUE_SIZE_U64: + WRITE_U64(*((u64 *)field)); + break; + default: /* Cannot be reached */ + WARN_ON(1); + return -EINVAL; + } + } + + return 0; +} + +void kbase_gpuprops_free_user_buffer(struct kbase_device *kbdev) +{ + kfree(kbdev->gpu_props.prop_buffer); +} + +int kbase_device_populate_max_freq(struct kbase_device *kbdev) +{ + struct mali_base_gpu_core_props *core_props; + + /* obtain max configured gpu frequency, if devfreq is enabled then + * this will be overridden by the highest operating point found + */ + core_props = &(kbdev->gpu_props.props.core_props); +#ifdef GPU_FREQ_KHZ_MAX + core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; +#else + core_props->gpu_freq_khz_max = DEFAULT_GPU_FREQ_KHZ_MAX; +#endif + + return 0; +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_gpuprops.h b/drivers/gpu/arm/b_r26p0/mali_kbase_gpuprops.h new file mode 100644 index 000000000000..5eee7948381a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_gpuprops.h @@ -0,0 +1,135 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2011-2015, 2017, 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_gpuprops.h + * Base kernel property query APIs + */ + +#ifndef _KBASE_GPUPROPS_H_ +#define _KBASE_GPUPROPS_H_ + +#include "mali_kbase_gpuprops_types.h" + +/* Forward definition - see mali_kbase.h */ +struct kbase_device; + +/** + * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield. + * @value: The value from which to extract bits. + * @offset: The first bit to extract (0 being the LSB). + * @size: The number of bits to extract. + * + * Context: @offset + @size <= 32. + * + * Return: Bits [@offset, @offset + @size) from @value. + */ +/* from mali_cdsb.h */ +#define KBASE_UBFX32(value, offset, size) \ + (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1)) + +/** + * @brief Set up Kbase GPU properties. + * + * Set up Kbase GPU properties with information from the GPU registers + * + * @param kbdev The struct kbase_device structure for the device + */ +void kbase_gpuprops_set(struct kbase_device *kbdev); + +/** + * kbase_gpuprops_set_features - Set up Kbase GPU properties + * @kbdev: Device pointer + * + * This function sets up GPU properties that are dependent on the hardware + * features bitmask. This function must be preceeded by a call to + * kbase_hw_set_features_mask(). + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_gpuprops_set_features(struct kbase_device *kbdev); + +/** + * kbase_gpuprops_update_l2_features - Update GPU property of L2_FEATURES + * @kbdev: Device pointer + * + * This function updates l2_features and the log2 cache size. + * + * Return: Zero on success, Linux error code for failure + */ +int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev); + +/** + * kbase_gpuprops_populate_user_buffer - Populate the GPU properties buffer + * @kbdev: The kbase device + * + * Fills prop_buffer with the GPU properties for user space to read. + */ +int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev); + +/** + * kbase_gpuprops_free_user_buffer - Free the GPU properties buffer. + * @kbdev: kbase device pointer + * + * Free the GPU properties buffer allocated from + * kbase_gpuprops_populate_user_buffer. + */ +void kbase_gpuprops_free_user_buffer(struct kbase_device *kbdev); + +/** + * kbase_device_populate_max_freq - Populate max gpu frequency. + * @kbdev: kbase device pointer + * + * Populate the maximum gpu frequency to be used when devfreq is disabled. + * + * Return: 0 on success and non-zero value on failure. + */ +int kbase_device_populate_max_freq(struct kbase_device *kbdev); + +/** + * kbase_gpuprops_update_core_props_gpu_id - break down gpu id value + * @gpu_props: the &base_gpu_props structure + * + * Break down gpu_id value stored in base_gpu_props::raw_props.gpu_id into + * separate fields (version_status, minor_revision, major_revision, product_id) + * stored in base_gpu_props::core_props. + */ +void kbase_gpuprops_update_core_props_gpu_id( + struct base_gpu_props * const gpu_props); + +#endif /* _KBASE_GPUPROPS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/b_r26p0/mali_kbase_gpuprops_types.h new file mode 100644 index 000000000000..ec6f1c39ccb0 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_gpuprops_types.h @@ -0,0 +1,98 @@ +/* + * + * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_gpuprops_types.h + * Base kernel property query APIs + */ + +#ifndef _KBASE_GPUPROPS_TYPES_H_ +#define _KBASE_GPUPROPS_TYPES_H_ + +#include "mali_base_kernel.h" + +#define KBASE_GPU_SPEED_MHZ 123 +#define KBASE_GPU_PC_SIZE_LOG2 24U + +struct kbase_gpuprops_regdump { + u32 gpu_id; + u32 l2_features; + u32 core_features; + u32 tiler_features; + u32 mem_features; + u32 mmu_features; + u32 as_present; + u32 js_present; + u32 thread_max_threads; + u32 thread_max_workgroup_size; + u32 thread_max_barrier_size; + u32 thread_features; + u32 thread_tls_alloc; + u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + u32 js_features[GPU_MAX_JOB_SLOTS]; + u32 shader_present_lo; + u32 shader_present_hi; + u32 tiler_present_lo; + u32 tiler_present_hi; + u32 l2_present_lo; + u32 l2_present_hi; + u32 stack_present_lo; + u32 stack_present_hi; + u32 coherency_features; +}; + +struct kbase_gpu_cache_props { + u8 associativity; + u8 external_bus_width; +}; + +struct kbase_gpu_mem_props { + u8 core_group; +}; + +struct kbase_gpu_mmu_props { + u8 va_bits; + u8 pa_bits; +}; + +struct kbase_gpu_props { + /* kernel-only properties */ + u8 num_cores; + u8 num_core_groups; + u8 num_address_spaces; + u8 num_job_slots; + + struct kbase_gpu_cache_props l2_props; + + struct kbase_gpu_mem_props mem; + struct kbase_gpu_mmu_props mmu; + + /* Properties shared with userspace */ + struct base_gpu_props props; + + u32 prop_buffer_size; + void *prop_buffer; +}; + +#endif /* _KBASE_GPUPROPS_TYPES_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_gwt.c b/drivers/gpu/arm/b_r26p0/mali_kbase_gwt.c new file mode 100644 index 000000000000..6a47c9dd3610 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_gwt.c @@ -0,0 +1,269 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_gwt.h" +#include + +static inline void kbase_gpu_gwt_setup_page_permission( + struct kbase_context *kctx, + unsigned long flag, + struct rb_node *node) +{ + struct rb_node *rbnode = node; + + while (rbnode) { + struct kbase_va_region *reg; + int err = 0; + + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + if (reg->nr_pages && !kbase_is_region_invalid_or_free(reg) && + (reg->flags & KBASE_REG_GPU_WR)) { + err = kbase_mmu_update_pages(kctx, reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + reg->gpu_alloc->nents, + reg->flags & flag, + reg->gpu_alloc->group_id); + if (err) + dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages failure\n"); + } + + rbnode = rb_next(rbnode); + } +} + +static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx, + unsigned long flag) +{ + kbase_gpu_gwt_setup_page_permission(kctx, flag, + rb_first(&(kctx->reg_rbtree_same))); + kbase_gpu_gwt_setup_page_permission(kctx, flag, + rb_first(&(kctx->reg_rbtree_custom))); +} + + +int kbase_gpu_gwt_start(struct kbase_context *kctx) +{ + kbase_gpu_vm_lock(kctx); + if (kctx->gwt_enabled) { + kbase_gpu_vm_unlock(kctx); + return -EBUSY; + } + + INIT_LIST_HEAD(&kctx->gwt_current_list); + INIT_LIST_HEAD(&kctx->gwt_snapshot_list); + + /* If GWT is enabled using new vector dumping format + * from user space, back up status of the job serialization flag and + * use full serialisation of jobs for dumping. + * Status will be restored on end of dumping in gwt_stop. + */ + kctx->kbdev->backup_serialize_jobs = kctx->kbdev->serialize_jobs; + kctx->kbdev->serialize_jobs = KBASE_SERIALIZE_INTRA_SLOT | + KBASE_SERIALIZE_INTER_SLOT; + + /* Mark gwt enabled before making pages read only in case a + write page fault is triggered while we're still in this loop. + (kbase_gpu_vm_lock() doesn't prevent this!) + */ + kctx->gwt_enabled = true; + kctx->gwt_was_enabled = true; + + kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR); + + kbase_gpu_vm_unlock(kctx); + return 0; +} + +int kbase_gpu_gwt_stop(struct kbase_context *kctx) +{ + struct kbasep_gwt_list_element *pos, *n; + + kbase_gpu_vm_lock(kctx); + if (!kctx->gwt_enabled) { + kbase_gpu_vm_unlock(kctx); + return -EINVAL; + } + + list_for_each_entry_safe(pos, n, &kctx->gwt_current_list, link) { + list_del(&pos->link); + kfree(pos); + } + + list_for_each_entry_safe(pos, n, &kctx->gwt_snapshot_list, link) { + list_del(&pos->link); + kfree(pos); + } + + kctx->kbdev->serialize_jobs = kctx->kbdev->backup_serialize_jobs; + + kbase_gpu_gwt_setup_pages(kctx, ~0UL); + + kctx->gwt_enabled = false; + kbase_gpu_vm_unlock(kctx); + return 0; +} + + +static int list_cmp_function(void *priv, struct list_head *a, + struct list_head *b) +{ + struct kbasep_gwt_list_element *elementA = container_of(a, + struct kbasep_gwt_list_element, link); + struct kbasep_gwt_list_element *elementB = container_of(b, + struct kbasep_gwt_list_element, link); + + CSTD_UNUSED(priv); + + if (elementA->page_addr > elementB->page_addr) + return 1; + return -1; +} + +static void kbase_gpu_gwt_collate(struct kbase_context *kctx, + struct list_head *snapshot_list) +{ + struct kbasep_gwt_list_element *pos, *n; + struct kbasep_gwt_list_element *collated = NULL; + + /* Sort the list */ + list_sort(NULL, snapshot_list, list_cmp_function); + + /* Combine contiguous areas. */ + list_for_each_entry_safe(pos, n, snapshot_list, link) { + if (collated == NULL || collated->region != + pos->region || + (collated->page_addr + + (collated->num_pages * PAGE_SIZE)) != + pos->page_addr) { + /* This is the first time through, a new region or + * is not contiguous - start collating to this element + */ + collated = pos; + } else { + /* contiguous so merge */ + collated->num_pages += pos->num_pages; + /* remove element from list */ + list_del(&pos->link); + kfree(pos); + } + } +} + +int kbase_gpu_gwt_dump(struct kbase_context *kctx, + union kbase_ioctl_cinstr_gwt_dump *gwt_dump) +{ + const u32 ubuf_size = gwt_dump->in.len; + u32 ubuf_count = 0; + __user void *user_addr = (__user void *) + (uintptr_t)gwt_dump->in.addr_buffer; + __user void *user_sizes = (__user void *) + (uintptr_t)gwt_dump->in.size_buffer; + + kbase_gpu_vm_lock(kctx); + + if (!kctx->gwt_enabled) { + kbase_gpu_vm_unlock(kctx); + /* gwt_dump shouldn't be called when gwt is disabled */ + return -EPERM; + } + + if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer + || !gwt_dump->in.size_buffer) { + kbase_gpu_vm_unlock(kctx); + /* We don't have any valid user space buffer to copy the + * write modified addresses. + */ + return -EINVAL; + } + + if (list_empty(&kctx->gwt_snapshot_list) && + !list_empty(&kctx->gwt_current_list)) { + + list_replace_init(&kctx->gwt_current_list, + &kctx->gwt_snapshot_list); + + /* We have collected all write faults so far + * and they will be passed on to user space. + * Reset the page flags state to allow collection of + * further write faults. + */ + kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR); + + /* Sort and combine consecutive pages in the dump list*/ + kbase_gpu_gwt_collate(kctx, &kctx->gwt_snapshot_list); + } + + while ((!list_empty(&kctx->gwt_snapshot_list))) { + u64 addr_buffer[32]; + u64 num_page_buffer[32]; + u32 count = 0; + int err; + struct kbasep_gwt_list_element *dump_info, *n; + + list_for_each_entry_safe(dump_info, n, + &kctx->gwt_snapshot_list, link) { + addr_buffer[count] = dump_info->page_addr; + num_page_buffer[count] = dump_info->num_pages; + count++; + list_del(&dump_info->link); + kfree(dump_info); + if (ARRAY_SIZE(addr_buffer) == count || + ubuf_size == (ubuf_count + count)) + break; + } + + if (count) { + err = copy_to_user((user_addr + + (ubuf_count * sizeof(u64))), + (void *)addr_buffer, + count * sizeof(u64)); + if (err) { + dev_err(kctx->kbdev->dev, "Copy to user failure\n"); + kbase_gpu_vm_unlock(kctx); + return err; + } + err = copy_to_user((user_sizes + + (ubuf_count * sizeof(u64))), + (void *)num_page_buffer, + count * sizeof(u64)); + if (err) { + dev_err(kctx->kbdev->dev, "Copy to user failure\n"); + kbase_gpu_vm_unlock(kctx); + return err; + } + + ubuf_count += count; + } + + if (ubuf_count == ubuf_size) + break; + } + + if (!list_empty(&kctx->gwt_snapshot_list)) + gwt_dump->out.more_data_available = 1; + else + gwt_dump->out.more_data_available = 0; + + gwt_dump->out.no_of_addr_collected = ubuf_count; + kbase_gpu_vm_unlock(kctx); + return 0; +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_gwt.h b/drivers/gpu/arm/b_r26p0/mali_kbase_gwt.h new file mode 100644 index 000000000000..7e7746e64915 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_gwt.h @@ -0,0 +1,55 @@ +/* + * + * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#if !defined(_KBASE_GWT_H) +#define _KBASE_GWT_H + +#include +#include + +/** + * kbase_gpu_gwt_start - Start the GPU write tracking + * @kctx: Pointer to kernel context + * + * @return 0 on success, error on failure. + */ +int kbase_gpu_gwt_start(struct kbase_context *kctx); + +/** + * kbase_gpu_gwt_stop - Stop the GPU write tracking + * @kctx: Pointer to kernel context + * + * @return 0 on success, error on failure. + */ +int kbase_gpu_gwt_stop(struct kbase_context *kctx); + +/** + * kbase_gpu_gwt_dump - Pass page address of faulting addresses to user space. + * @kctx: Pointer to kernel context + * @gwt_dump: User space data to be passed. + * + * @return 0 on success, error on failure. + */ +int kbase_gpu_gwt_dump(struct kbase_context *kctx, + union kbase_ioctl_cinstr_gwt_dump *gwt_dump); + +#endif /* _KBASE_GWT_H */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hw.c b/drivers/gpu/arm/b_r26p0/mali_kbase_hw.c new file mode 100644 index 000000000000..f8a9248e3c06 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hw.c @@ -0,0 +1,425 @@ +/* + * + * (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Run-time work-arounds helpers + */ + +#include +#include +#include "gpu/mali_kbase_gpu_regmap.h" +#include "mali_kbase.h" +#include "mali_kbase_hw.h" + +void kbase_hw_set_features_mask(struct kbase_device *kbdev) +{ + const enum base_hw_feature *features; + u32 gpu_id; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + + switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { + case GPU_ID2_PRODUCT_TMIX: + features = base_hw_features_tMIx; + break; + case GPU_ID2_PRODUCT_THEX: + features = base_hw_features_tHEx; + break; + case GPU_ID2_PRODUCT_TSIX: + features = base_hw_features_tSIx; + break; + case GPU_ID2_PRODUCT_TDVX: + features = base_hw_features_tDVx; + break; + case GPU_ID2_PRODUCT_TNOX: + features = base_hw_features_tNOx; + break; + case GPU_ID2_PRODUCT_TGOX: + features = base_hw_features_tGOx; + break; + case GPU_ID2_PRODUCT_TTRX: + features = base_hw_features_tTRx; + break; + case GPU_ID2_PRODUCT_TNAX: + features = base_hw_features_tNAx; + break; + case GPU_ID2_PRODUCT_LBEX: + case GPU_ID2_PRODUCT_TBEX: + features = base_hw_features_tBEx; + break; + case GPU_ID2_PRODUCT_TDUX: + features = base_hw_features_tDUx; + break; + case GPU_ID2_PRODUCT_TODX: + case GPU_ID2_PRODUCT_LODX: + features = base_hw_features_tODx; + break; + case GPU_ID2_PRODUCT_TGRX: + features = base_hw_features_tGRx; + break; + case GPU_ID2_PRODUCT_TVAX: + features = base_hw_features_tVAx; + break; + case GPU_ID2_PRODUCT_TTUX: + /* Fallthrough */ + case GPU_ID2_PRODUCT_LTUX: + features = base_hw_features_tTUx; + break; + case GPU_ID2_PRODUCT_TE2X: + features = base_hw_features_tE2x; + break; + default: + features = base_hw_features_generic; + break; + } + + for (; *features != BASE_HW_FEATURE_END; features++) + set_bit(*features, &kbdev->hw_features_mask[0]); + +#if defined(CONFIG_MALI_VECTOR_DUMP) + /* When dumping is enabled, need to disable flush reduction optimization + * for GPUs on which it is safe to have only cache clean operation at + * the end of job chain. + * This is required to make vector dump work. There is some discrepancy + * in the implementation of flush reduction optimization due to + * unclear or ambiguous ARCH spec. + */ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) + clear_bit(BASE_HW_FEATURE_FLUSH_REDUCTION, + &kbdev->hw_features_mask[0]); +#endif +} + +/** + * kbase_hw_get_issues_for_new_id - Get the hardware issues for a new GPU ID + * @kbdev: Device pointer + * + * Return: pointer to an array of hardware issues, terminated by + * BASE_HW_ISSUE_END. + * + * In debugging versions of the driver, unknown versions of a known GPU will + * be treated as the most recent known version not later than the actual + * version. In such circumstances, the GPU ID in @kbdev will also be replaced + * with the most recent known version. + * + * Note: The GPU configuration must have been read by kbase_gpuprops_get_props() + * before calling this function. + */ +static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( + struct kbase_device *kbdev) +{ + const enum base_hw_issue *issues = NULL; + + struct base_hw_product { + u32 product_model; + struct { + u32 version; + const enum base_hw_issue *issues; + } map[7]; + }; + + static const struct base_hw_product base_hw_products[] = { + {GPU_ID2_PRODUCT_TMIX, + {{GPU_ID2_VERSION_MAKE(0, 0, 1), + base_hw_issues_tMIx_r0p0_05dev0}, + {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1}, + {U32_MAX /* sentinel value */, NULL} } }, + + {GPU_ID2_PRODUCT_THEX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1}, + {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1}, + {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2}, + {GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TSIX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1}, + {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0}, + {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TDVX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TNOX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TGOX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0}, + {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TTRX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1}, + {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1}, + {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TNAX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1}, + {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_LBEX, + {{GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0}, + {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TBEX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1}, + {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TDUX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TODX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_LODX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TGRX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGRx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TVAX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TTUX, + {{GPU_ID2_VERSION_MAKE(2, 0, 0), base_hw_issues_tTUx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_LTUX, + {{GPU_ID2_VERSION_MAKE(3, 0, 0), base_hw_issues_tTUx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TE2X, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tE2x_r0p0}, + {U32_MAX, NULL} } }, + }; + + u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + const u32 product_model = gpu_id & GPU_ID2_PRODUCT_MODEL; + const struct base_hw_product *product = NULL; + size_t p; + + /* Stop when we reach the end of the products array. */ + for (p = 0; p < ARRAY_SIZE(base_hw_products); ++p) { + if (product_model == base_hw_products[p].product_model) { + product = &base_hw_products[p]; + break; + } + } + + if (product != NULL) { + /* Found a matching product. */ + const u32 version = gpu_id & GPU_ID2_VERSION; + u32 fallback_version = 0; + const enum base_hw_issue *fallback_issues = NULL; + size_t v; + + /* Stop when we reach the end of the map. */ + for (v = 0; product->map[v].version != U32_MAX; ++v) { + + if (version == product->map[v].version) { + /* Exact match so stop. */ + issues = product->map[v].issues; + break; + } + + /* Check whether this is a candidate for most recent + known version not later than the actual + version. */ + if ((version > product->map[v].version) && + (product->map[v].version >= fallback_version)) { +#if MALI_CUSTOMER_RELEASE + /* Match on version's major and minor fields */ + if (((version ^ product->map[v].version) >> + GPU_ID2_VERSION_MINOR_SHIFT) == 0) +#endif + { + fallback_version = product->map[v].version; + fallback_issues = product->map[v].issues; + } + } + } + + if ((issues == NULL) && (fallback_issues != NULL)) { + /* Fall back to the issue set of the most recent known + version not later than the actual version. */ + issues = fallback_issues; + +#if MALI_CUSTOMER_RELEASE + dev_warn(kbdev->dev, + "GPU hardware issue table may need updating:\n" +#else + dev_info(kbdev->dev, +#endif + "r%dp%d status %d is unknown; treating as r%dp%d status %d", + (gpu_id & GPU_ID2_VERSION_MAJOR) >> + GPU_ID2_VERSION_MAJOR_SHIFT, + (gpu_id & GPU_ID2_VERSION_MINOR) >> + GPU_ID2_VERSION_MINOR_SHIFT, + (gpu_id & GPU_ID2_VERSION_STATUS) >> + GPU_ID2_VERSION_STATUS_SHIFT, + (fallback_version & GPU_ID2_VERSION_MAJOR) >> + GPU_ID2_VERSION_MAJOR_SHIFT, + (fallback_version & GPU_ID2_VERSION_MINOR) >> + GPU_ID2_VERSION_MINOR_SHIFT, + (fallback_version & GPU_ID2_VERSION_STATUS) >> + GPU_ID2_VERSION_STATUS_SHIFT); + + gpu_id &= ~GPU_ID2_VERSION; + gpu_id |= fallback_version; + kbdev->gpu_props.props.raw_props.gpu_id = gpu_id; + + kbase_gpuprops_update_core_props_gpu_id( + &kbdev->gpu_props.props); + } + } + return issues; +} + +int kbase_hw_set_issues_mask(struct kbase_device *kbdev) +{ + const enum base_hw_issue *issues; + u32 gpu_id; + u32 impl_tech; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + impl_tech = kbdev->gpu_props.props.thread_props.impl_tech; + + if (impl_tech != IMPLEMENTATION_MODEL) { + issues = kbase_hw_get_issues_for_new_id(kbdev); + if (issues == NULL) { + dev_err(kbdev->dev, + "Unknown GPU ID %x", gpu_id); + return -EINVAL; + } + +#if !MALI_CUSTOMER_RELEASE + /* The GPU ID might have been replaced with the last + known version of the same GPU. */ + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; +#endif + } else { + /* Software model */ + switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { + case GPU_ID2_PRODUCT_TMIX: + issues = base_hw_issues_model_tMIx; + break; + case GPU_ID2_PRODUCT_THEX: + issues = base_hw_issues_model_tHEx; + break; + case GPU_ID2_PRODUCT_TSIX: + issues = base_hw_issues_model_tSIx; + break; + case GPU_ID2_PRODUCT_TDVX: + issues = base_hw_issues_model_tDVx; + break; + case GPU_ID2_PRODUCT_TNOX: + issues = base_hw_issues_model_tNOx; + break; + case GPU_ID2_PRODUCT_TGOX: + issues = base_hw_issues_model_tGOx; + break; + case GPU_ID2_PRODUCT_TTRX: + issues = base_hw_issues_model_tTRx; + break; + case GPU_ID2_PRODUCT_TNAX: + issues = base_hw_issues_model_tNAx; + break; + case GPU_ID2_PRODUCT_LBEX: + case GPU_ID2_PRODUCT_TBEX: + issues = base_hw_issues_model_tBEx; + break; + case GPU_ID2_PRODUCT_TDUX: + issues = base_hw_issues_model_tDUx; + break; + case GPU_ID2_PRODUCT_TODX: + case GPU_ID2_PRODUCT_LODX: + issues = base_hw_issues_model_tODx; + break; + case GPU_ID2_PRODUCT_TGRX: + issues = base_hw_issues_model_tGRx; + break; + case GPU_ID2_PRODUCT_TVAX: + issues = base_hw_issues_model_tVAx; + break; + case GPU_ID2_PRODUCT_TTUX: + case GPU_ID2_PRODUCT_LTUX: + issues = base_hw_issues_model_tTUx; + break; + case GPU_ID2_PRODUCT_TE2X: + issues = base_hw_issues_model_tE2x; + break; + default: + dev_err(kbdev->dev, + "Unknown GPU ID %x", gpu_id); + return -EINVAL; + } + } + + dev_info(kbdev->dev, + "GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d", + (gpu_id & GPU_ID2_PRODUCT_MAJOR) >> + GPU_ID2_PRODUCT_MAJOR_SHIFT, + (gpu_id & GPU_ID2_ARCH_MAJOR) >> + GPU_ID2_ARCH_MAJOR_SHIFT, + (gpu_id & GPU_ID2_ARCH_MINOR) >> + GPU_ID2_ARCH_MINOR_SHIFT, + (gpu_id & GPU_ID2_ARCH_REV) >> + GPU_ID2_ARCH_REV_SHIFT, + (gpu_id & GPU_ID2_VERSION_MAJOR) >> + GPU_ID2_VERSION_MAJOR_SHIFT, + (gpu_id & GPU_ID2_VERSION_MINOR) >> + GPU_ID2_VERSION_MINOR_SHIFT, + (gpu_id & GPU_ID2_VERSION_STATUS) >> + GPU_ID2_VERSION_STATUS_SHIFT); + + for (; *issues != BASE_HW_ISSUE_END; issues++) + set_bit(*issues, &kbdev->hw_issues_mask[0]); + + return 0; +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hw.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hw.h new file mode 100644 index 000000000000..f386b1624317 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hw.h @@ -0,0 +1,70 @@ +/* + * + * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file + * Run-time work-arounds helpers + */ + +#ifndef _KBASE_HW_H_ +#define _KBASE_HW_H_ + +#include "mali_kbase_defs.h" + +/** + * @brief Tell whether a work-around should be enabled + */ +#define kbase_hw_has_issue(kbdev, issue)\ + test_bit(issue, &(kbdev)->hw_issues_mask[0]) + +/** + * @brief Tell whether a feature is supported + */ +#define kbase_hw_has_feature(kbdev, feature)\ + test_bit(feature, &(kbdev)->hw_features_mask[0]) + +/** + * kbase_hw_set_issues_mask - Set the hardware issues mask based on the GPU ID + * @kbdev: Device pointer + * + * Return: 0 if the GPU ID was recognized, otherwise -EINVAL. + * + * The GPU ID is read from the @kbdev. + * + * In debugging versions of the driver, unknown versions of a known GPU with a + * new-format ID will be treated as the most recent known version not later + * than the actual version. In such circumstances, the GPU ID in @kbdev will + * also be replaced with the most recent known version. + * + * Note: The GPU configuration must have been read by + * kbase_gpuprops_get_props() before calling this function. + */ +int kbase_hw_set_issues_mask(struct kbase_device *kbdev); + +/** + * @brief Set the features mask depending on the GPU ID + */ +void kbase_hw_set_features_mask(struct kbase_device *kbdev); + +#endif /* _KBASE_HW_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_backend.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_backend.h new file mode 100644 index 000000000000..89df2519ab97 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_backend.h @@ -0,0 +1,45 @@ +/* + * + * (C) COPYRIGHT 2014-2015, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * HW access backend common APIs + */ + +#ifndef _KBASE_HWACCESS_BACKEND_H_ +#define _KBASE_HWACCESS_BACKEND_H_ + +/** + * kbase_backend_devfreq_init - Perform backend devfreq related initialization. + * @kbdev: Device pointer + * + * Return: 0 on success, or an error code on failure. + */ +int kbase_backend_devfreq_init(struct kbase_device *kbdev); + +/** + * kbase_backend_devfreq_term - Perform backend-devfreq termination. + * @kbdev: Device pointer + */ +void kbase_backend_devfreq_term(struct kbase_device *kbdev); + +#endif /* _KBASE_HWACCESS_BACKEND_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_defs.h new file mode 100644 index 000000000000..124a2d9cf0c3 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_defs.h @@ -0,0 +1,51 @@ +/* + * + * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/** + * @file mali_kbase_hwaccess_gpu_defs.h + * HW access common definitions + */ + +#ifndef _KBASE_HWACCESS_DEFS_H_ +#define _KBASE_HWACCESS_DEFS_H_ + +#include + +/** + * struct kbase_hwaccess_data - object encapsulating the GPU backend specific + * data for the HW access layer. + * hwaccess_lock (a spinlock) must be held when + * accessing this structure. + * @active_kctx: pointer to active kbase context which last submitted an + * atom to GPU and while the context is active it can + * submit new atoms to GPU from the irq context also, without + * going through the bottom half of job completion path. + * @backend: GPU backend specific data for HW access layer + */ +struct kbase_hwaccess_data { + struct kbase_context *active_kctx[BASE_JM_MAX_NR_SLOTS]; + + struct kbase_backend_data backend; +}; + +#endif /* _KBASE_HWACCESS_DEFS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_gpuprops.h new file mode 100644 index 000000000000..3ae0dbe6886d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_gpuprops.h @@ -0,0 +1,87 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2014-2015, 2018, 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + +/** + * Base kernel property query backend APIs + */ + +#ifndef _KBASE_HWACCESS_GPUPROPS_H_ +#define _KBASE_HWACCESS_GPUPROPS_H_ + +/** + * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from + * GPU + * @kbdev: Device pointer + * @regdump: Pointer to struct kbase_gpuprops_regdump structure + * + * The caller should ensure that GPU remains powered-on during this function. + * + * Return: Zero for succeess or a Linux error code + */ +int kbase_backend_gpuprops_get(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump); + +/** + * kbase_backend_gpuprops_get_features - Fill @regdump with GPU properties read + * from GPU + * @kbdev: Device pointer + * @regdump: Pointer to struct kbase_gpuprops_regdump structure + * + * This function reads GPU properties that are dependent on the hardware + * features bitmask. It will power-on the GPU if required. + * + * Return: Zero for succeess or a Linux error code + */ +int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump); + +/** + * kbase_backend_gpuprops_get_l2_features - Fill @regdump with L2_FEATURES read + * from GPU + * @kbdev: Device pointer + * @regdump: Pointer to struct kbase_gpuprops_regdump structure + * + * This function reads L2_FEATURES register that is dependent on the hardware + * features bitmask. It will power-on the GPU if required. + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump); + + +#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_instr.h new file mode 100644 index 000000000000..4fd2e3549268 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_instr.h @@ -0,0 +1,151 @@ +/* + * + * (C) COPYRIGHT 2014-2015, 2017-2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * HW Access instrumentation common APIs + */ + +#ifndef _KBASE_HWACCESS_INSTR_H_ +#define _KBASE_HWACCESS_INSTR_H_ + +#include + +/** + * struct kbase_instr_hwcnt_enable - Enable hardware counter collection. + * @dump_buffer: GPU address to write counters to. + * @dump_buffer_bytes: Size in bytes of the buffer pointed to by dump_buffer. + * @fe_bm: counters selection bitmask (Front End). + * @shader_bm: counters selection bitmask (Shader). + * @tiler_bm: counters selection bitmask (Tiler). + * @mmu_l2_bm: counters selection bitmask (MMU_L2). + * @use_secondary: use secondary performance counters set for applicable + * counter blocks. + */ +struct kbase_instr_hwcnt_enable { + u64 dump_buffer; + u64 dump_buffer_bytes; + u32 fe_bm; + u32 shader_bm; + u32 tiler_bm; + u32 mmu_l2_bm; + bool use_secondary; +}; + +/** + * kbase_instr_hwcnt_enable_internal() - Enable HW counters collection + * @kbdev: Kbase device + * @kctx: Kbase context + * @enable: HW counter setup parameters + * + * Context: might sleep, waiting for reset to complete + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_instr_hwcnt_enable *enable); + +/** + * kbase_instr_hwcnt_disable_internal() - Disable HW counters collection + * @kctx: Kbase context + * + * Context: might sleep, waiting for an ongoing dump to complete + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx); + +/** + * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU + * @kctx: Kbase context + * + * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true, + * of call kbase_instr_hwcnt_wait_for_dump(). + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx); + +/** + * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has + * completed. + * @kctx: Kbase context + * + * Context: will sleep, waiting for dump to complete + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx); + +/** + * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has + * completed + * @kctx: Kbase context + * @success: Set to true if successful + * + * Context: does not sleep. + * + * Return: true if the dump is complete + */ +bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, + bool * const success); + +/** + * kbase_instr_hwcnt_clear() - Clear HW counters + * @kctx: Kbase context + * + * Context: might sleep, waiting for reset to complete + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_clear(struct kbase_context *kctx); + +/** + * kbase_instr_backend_init() - Initialise the instrumentation backend + * @kbdev: Kbase device + * + * This function should be called during driver initialization. + * + * Return: 0 on success + */ +int kbase_instr_backend_init(struct kbase_device *kbdev); + +/** + * kbase_instr_backend_init() - Terminate the instrumentation backend + * @kbdev: Kbase device + * + * This function should be called during driver termination. + */ +void kbase_instr_backend_term(struct kbase_device *kbdev); + +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +/** + * kbase_instr_backend_debugfs_init() - Add a debugfs entry for the + * hardware counter set. + * @kbdev: kbase device + */ +void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev); +#endif + +#endif /* _KBASE_HWACCESS_INSTR_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_jm.h new file mode 100644 index 000000000000..3d5934e0e0a1 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_jm.h @@ -0,0 +1,302 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * HW access job manager common APIs + */ + +#ifndef _KBASE_HWACCESS_JM_H_ +#define _KBASE_HWACCESS_JM_H_ + +/** + * kbase_backend_run_atom() - Run an atom on the GPU + * @kbdev: Device pointer + * @atom: Atom to run + * + * Caller must hold the HW access lock + */ +void kbase_backend_run_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/** + * kbase_backend_slot_update - Update state based on slot ringbuffers + * + * @kbdev: Device pointer + * + * Inspect the jobs in the slot ringbuffers and update state. + * + * This will cause jobs to be submitted to hardware if they are unblocked + */ +void kbase_backend_slot_update(struct kbase_device *kbdev); + +/** + * kbase_backend_find_and_release_free_address_space() - Release a free AS + * @kbdev: Device pointer + * @kctx: Context pointer + * + * This function can evict an idle context from the runpool, freeing up the + * address space it was using. + * + * The address space is marked as in use. The caller must either assign a + * context using kbase_gpu_use_ctx(), or release it using + * kbase_ctx_sched_release() + * + * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none + * available + */ +int kbase_backend_find_and_release_free_address_space( + struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the + * provided address space. + * @kbdev: Device pointer + * @kctx: Context pointer. May be NULL + * @as_nr: Free address space to use + * + * kbase_gpu_next_job() will pull atoms from the active context. + * + * Return: true if successful, false if ASID not assigned. + */ +bool kbase_backend_use_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, + int as_nr); + +/** + * kbase_backend_use_ctx_sched() - Activate a context. + * @kbdev: Device pointer + * @kctx: Context pointer + * @js: Job slot to activate context on + * + * kbase_gpu_next_job() will pull atoms from the active context. + * + * The context must already be scheduled and assigned to an address space. If + * the context is not scheduled, then kbase_gpu_use_ctx() should be used + * instead. + * + * Caller must hold hwaccess_lock + * + * Return: true if context is now active, false otherwise (ie if context does + * not have an address space assigned) + */ +bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, + struct kbase_context *kctx, int js); + +/** + * kbase_backend_release_ctx_irq - Release a context from the GPU. This will + * de-assign the assigned address space. + * @kbdev: Device pointer + * @kctx: Context pointer + * + * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock + */ +void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will + * de-assign the assigned address space. + * @kbdev: Device pointer + * @kctx: Context pointer + * + * Caller must hold kbase_device->mmu_hw_mutex + * + * This function must perform any operations that could not be performed in IRQ + * context by kbase_backend_release_ctx_irq(). + */ +void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbase_backend_cache_clean - Perform a cache clean if the given atom requires + * one + * @kbdev: Device pointer + * @katom: Pointer to the failed atom + * + * On some GPUs, the GPU cache must be cleaned following a failed atom. This + * function performs a clean if it is required by @katom. + */ +void kbase_backend_cache_clean(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + + +/** + * kbase_backend_complete_wq() - Perform backend-specific actions required on + * completing an atom. + * @kbdev: Device pointer + * @katom: Pointer to the atom to complete + * + * This function should only be called from kbase_jd_done_worker() or + * js_return_worker(). + * + * Return: true if atom has completed, false if atom should be re-submitted + */ +void kbase_backend_complete_wq(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/** + * kbase_backend_complete_wq_post_sched - Perform backend-specific actions + * required on completing an atom, after + * any scheduling has taken place. + * @kbdev: Device pointer + * @core_req: Core requirements of atom + * + * This function should only be called from kbase_jd_done_worker() or + * js_return_worker(). + */ +void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, + base_jd_core_req core_req); + +/** + * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU + * and remove any others from the ringbuffers. + * @kbdev: Device pointer + * @end_timestamp: Timestamp of reset + */ +void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp); + +/** + * kbase_backend_inspect_tail - Return the atom currently at the tail of slot + * @js + * @kbdev: Device pointer + * @js: Job slot to inspect + * + * Return : Atom currently at the head of slot @js, or NULL + */ +struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, + int js); + +/** + * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a + * slot. + * @kbdev: Device pointer + * @js: Job slot to inspect + * + * Return : Number of atoms currently on slot + */ +int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js); + +/** + * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot + * that are currently on the GPU. + * @kbdev: Device pointer + * @js: Job slot to inspect + * + * Return : Number of atoms currently on slot @js that are currently on the GPU. + */ +int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js); + +/** + * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs + * has changed. + * @kbdev: Device pointer + * + * Perform any required backend-specific actions (eg starting/stopping + * scheduling timers). + */ +void kbase_backend_ctx_count_changed(struct kbase_device *kbdev); + +/** + * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed. + * @kbdev: Device pointer + * + * Perform any required backend-specific actions (eg updating timeouts of + * currently running atoms). + */ +void kbase_backend_timeouts_changed(struct kbase_device *kbdev); + +/** + * kbase_backend_slot_free() - Return the number of jobs that can be currently + * submitted to slot @js. + * @kbdev: Device pointer + * @js: Job slot to inspect + * + * Return : Number of jobs that can be submitted. + */ +int kbase_backend_slot_free(struct kbase_device *kbdev, int js); + +/** + * kbase_job_check_enter_disjoint - potentially leave disjoint state + * @kbdev: kbase device + * @target_katom: atom which is finishing + * + * Work out whether to leave disjoint state when finishing an atom that was + * originated by kbase_job_check_enter_disjoint(). + */ +void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, + struct kbase_jd_atom *target_katom); + +/** + * kbase_backend_jm_kill_running_jobs_from_kctx - Kill all jobs that are + * currently running on GPU from a context + * @kctx: Context pointer + * + * This is used in response to a page fault to remove all jobs from the faulting + * context from the hardware. + * + * Caller must hold hwaccess_lock. + */ +void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx); + +/** + * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and + * to be descheduled. + * @kctx: Context pointer + * + * This should be called following kbase_js_zap_context(), to ensure the context + * can be safely destroyed. + */ +void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx); + +/** + * kbase_backend_get_current_flush_id - Return the current flush ID + * + * @kbdev: Device pointer + * + * Return: the current flush ID to be recorded for each job chain + */ +u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev); + +/** + * kbase_job_slot_hardstop - Hard-stop the specified job slot + * @kctx: The kbase context that contains the job(s) that should + * be hard-stopped + * @js: The job slot to hard-stop + * @target_katom: The job that should be hard-stopped (or NULL for all + * jobs from the context) + * Context: + * The job slot lock must be held when calling this function. + */ +void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, + struct kbase_jd_atom *target_katom); + +/** + * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms + * currently on the GPU + * @kbdev: Device pointer + * + * Return: true if there are any atoms on the GPU, false otherwise + */ +bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev); + +#endif /* _KBASE_HWACCESS_JM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_pm.h new file mode 100644 index 000000000000..bbaf6eaf8d88 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_pm.h @@ -0,0 +1,229 @@ +/* + * + * (C) COPYRIGHT 2014-2015, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/** + * @file mali_kbase_hwaccess_pm.h + * HW access power manager common APIs + */ + +#ifndef _KBASE_HWACCESS_PM_H_ +#define _KBASE_HWACCESS_PM_H_ + +#include +#include + +#include + +/* Forward definition - see mali_kbase.h */ +struct kbase_device; + +/* Functions common to all HW access backends */ + +/** + * Initialize the power management framework. + * + * Must be called before any other power management function + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 if the power management framework was successfully initialized. + */ +int kbase_hwaccess_pm_init(struct kbase_device *kbdev); + +/** + * Terminate the power management framework. + * + * No power management functions may be called after this + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_hwaccess_pm_term(struct kbase_device *kbdev); + +/** + * kbase_hwaccess_pm_powerup - Power up the GPU. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @flags: Flags to pass on to kbase_pm_init_hw + * + * Power up GPU after all modules have been initialized and interrupt handlers + * installed. + * + * Return: 0 if powerup was successful. + */ +int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, + unsigned int flags); + +/** + * Halt the power management framework. + * + * Should ensure that no new interrupts are generated, but allow any currently + * running interrupt handlers to complete successfully. The GPU is forced off by + * the time this function returns, regardless of whether or not the active power + * policy asks for the GPU to be powered off. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_halt(struct kbase_device *kbdev); + +/** + * Perform any backend-specific actions to suspend the GPU + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev); + +/** + * Perform any backend-specific actions to resume the GPU from a suspend + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_resume(struct kbase_device *kbdev); + +/** + * Perform any required actions for activating the GPU. Called when the first + * context goes active. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev); + +/** + * Perform any required actions for idling the GPU. Called when the last + * context goes idle. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev); + + +/** + * Set the debug core mask. + * + * This determines which cores the power manager is allowed to use. + * + * @param kbdev The kbase device structure for the device (must be a + * valid pointer) + * @param new_core_mask_js0 The core mask to use for job slot 0 + * @param new_core_mask_js0 The core mask to use for job slot 1 + * @param new_core_mask_js0 The core mask to use for job slot 2 + */ +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, + u64 new_core_mask_js0, u64 new_core_mask_js1, + u64 new_core_mask_js2); + + +/** + * Get the current policy. + * + * Returns the policy that is currently active. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * + * @return The current policy + */ +const struct kbase_pm_ca_policy +*kbase_pm_ca_get_policy(struct kbase_device *kbdev); + +/** + * Change the policy to the one specified. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param policy The policy to change to (valid pointer returned from + * @ref kbase_pm_ca_list_policies) + */ +void kbase_pm_ca_set_policy(struct kbase_device *kbdev, + const struct kbase_pm_ca_policy *policy); + +/** + * Retrieve a static list of the available policies. + * + * @param[out] policies An array pointer to take the list of policies. This may + * be NULL. The contents of this array must not be + * modified. + * + * @return The number of policies + */ +int +kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies); + + +/** + * Get the current policy. + * + * Returns the policy that is currently active. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * + * @return The current policy + */ +const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev); + +/** + * Change the policy to the one specified. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param policy The policy to change to (valid pointer returned from + * @ref kbase_pm_list_policies) + */ +void kbase_pm_set_policy(struct kbase_device *kbdev, + const struct kbase_pm_policy *policy); + +/** + * kbase_pm_list_policies - Retrieve a static list of the available policies. + * + * @kbdev: The kbase device structure for the device. + * @list: An array pointer to take the list of policies. This may be NULL. + * The contents of this array must not be modified. + * + * Return: The number of policies + */ +int kbase_pm_list_policies(struct kbase_device *kbdev, + const struct kbase_pm_policy * const **list); + +/** + * kbase_protected_most_enable - Enable protected mode + * + * @kbdev: Address of the instance of a GPU platform device. + * + * Return: Zero on success or an error code + */ +int kbase_pm_protected_mode_enable(struct kbase_device *kbdev); + +/** + * kbase_protected_mode_disable - Disable protected mode + * + * @kbdev: Address of the instance of a GPU platform device. + * + * Return: Zero on success or an error code + */ +int kbase_pm_protected_mode_disable(struct kbase_device *kbdev); + +#endif /* _KBASE_HWACCESS_PM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_time.h new file mode 100644 index 000000000000..94b7551b865e --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwaccess_time.h @@ -0,0 +1,56 @@ +/* + * + * (C) COPYRIGHT 2014,2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/** + * + */ + +#ifndef _KBASE_BACKEND_TIME_H_ +#define _KBASE_BACKEND_TIME_H_ + +/** + * kbase_backend_get_gpu_time() - Get current GPU time + * @kbdev: Device pointer + * @cycle_counter: Pointer to u64 to store cycle counter in + * @system_time: Pointer to u64 to store system time in + * @ts: Pointer to struct timespec to store current monotonic + * time in + */ +void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, + u64 *system_time, struct timespec64 *ts); + +/** + * kbase_backend_get_gpu_time_norequest() - Get current GPU time without + * request/release cycle counter + * @kbdev: Device pointer + * @cycle_counter: Pointer to u64 to store cycle counter in + * @system_time: Pointer to u64 to store system time in + * @ts: Pointer to struct timespec to store current monotonic + * time in + */ +void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, + u64 *cycle_counter, + u64 *system_time, + struct timespec64 *ts); + +#endif /* _KBASE_BACKEND_TIME_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt.c b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt.c new file mode 100644 index 000000000000..2708af78b292 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt.c @@ -0,0 +1,794 @@ +/* + * + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Implementation of hardware counter context and accumulator APIs. + */ + +#include "mali_kbase_hwcnt_context.h" +#include "mali_kbase_hwcnt_accumulator.h" +#include "mali_kbase_hwcnt_backend.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_malisw.h" +#include "mali_kbase_debug.h" +#include "mali_kbase_linux.h" + +#include +#include +#include + +/** + * enum kbase_hwcnt_accum_state - Hardware counter accumulator states. + * @ACCUM_STATE_ERROR: Error state, where all accumulator operations fail. + * @ACCUM_STATE_DISABLED: Disabled state, where dumping is always disabled. + * @ACCUM_STATE_ENABLED: Enabled state, where dumping is enabled if there are + * any enabled counters. + */ +enum kbase_hwcnt_accum_state { + ACCUM_STATE_ERROR, + ACCUM_STATE_DISABLED, + ACCUM_STATE_ENABLED +}; + +/** + * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure. + * @backend: Pointer to created counter backend. + * @state: The current state of the accumulator. + * - State transition from disabled->enabled or + * disabled->error requires state_lock. + * - State transition from enabled->disabled or + * enabled->error requires both accum_lock and + * state_lock. + * - Error state persists until next disable. + * @enable_map: The current set of enabled counters. + * - Must only be modified while holding both + * accum_lock and state_lock. + * - Can be read while holding either lock. + * - Must stay in sync with enable_map_any_enabled. + * @enable_map_any_enabled: True if any counters in the map are enabled, else + * false. If true, and state is ACCUM_STATE_ENABLED, + * then the counter backend will be enabled. + * - Must only be modified while holding both + * accum_lock and state_lock. + * - Can be read while holding either lock. + * - Must stay in sync with enable_map. + * @scratch_map: Scratch enable map, used as temporary enable map + * storage during dumps. + * - Must only be read or modified while holding + * accum_lock. + * @accum_buf: Accumulation buffer, where dumps will be accumulated + * into on transition to a disable state. + * - Must only be read or modified while holding + * accum_lock. + * @accumulated: True if the accumulation buffer has been accumulated + * into and not subsequently read from yet, else false. + * - Must only be read or modified while holding + * accum_lock. + * @ts_last_dump_ns: Timestamp (ns) of the end time of the most recent + * dump that was requested by the user. + * - Must only be read or modified while holding + * accum_lock. + */ +struct kbase_hwcnt_accumulator { + struct kbase_hwcnt_backend *backend; + enum kbase_hwcnt_accum_state state; + struct kbase_hwcnt_enable_map enable_map; + bool enable_map_any_enabled; + struct kbase_hwcnt_enable_map scratch_map; + struct kbase_hwcnt_dump_buffer accum_buf; + bool accumulated; + u64 ts_last_dump_ns; +}; + +/** + * struct kbase_hwcnt_context - Hardware counter context structure. + * @iface: Pointer to hardware counter backend interface. + * @state_lock: Spinlock protecting state. + * @disable_count: Disable count of the context. Initialised to 1. + * Decremented when the accumulator is acquired, and incremented + * on release. Incremented on calls to + * kbase_hwcnt_context_disable[_atomic], and decremented on + * calls to kbase_hwcnt_context_enable. + * - Must only be read or modified while holding state_lock. + * @accum_lock: Mutex protecting accumulator. + * @accum_inited: Flag to prevent concurrent accumulator initialisation and/or + * termination. Set to true before accumulator initialisation, + * and false after accumulator termination. + * - Must only be modified while holding both accum_lock and + * state_lock. + * - Can be read while holding either lock. + * @accum: Hardware counter accumulator structure. + */ +struct kbase_hwcnt_context { + const struct kbase_hwcnt_backend_interface *iface; + spinlock_t state_lock; + size_t disable_count; + struct mutex accum_lock; + bool accum_inited; + struct kbase_hwcnt_accumulator accum; +}; + +int kbase_hwcnt_context_init( + const struct kbase_hwcnt_backend_interface *iface, + struct kbase_hwcnt_context **out_hctx) +{ + struct kbase_hwcnt_context *hctx = NULL; + + if (!iface || !out_hctx) + return -EINVAL; + + hctx = kzalloc(sizeof(*hctx), GFP_KERNEL); + if (!hctx) + return -ENOMEM; + + hctx->iface = iface; + spin_lock_init(&hctx->state_lock); + hctx->disable_count = 1; + mutex_init(&hctx->accum_lock); + hctx->accum_inited = false; + + *out_hctx = hctx; + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_init); + +void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx) +{ + if (!hctx) + return; + + /* Make sure we didn't leak the accumulator */ + WARN_ON(hctx->accum_inited); + kfree(hctx); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_term); + +/** + * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context. + * @hctx: Non-NULL pointer to hardware counter context. + */ +static void kbasep_hwcnt_accumulator_term(struct kbase_hwcnt_context *hctx) +{ + WARN_ON(!hctx); + WARN_ON(!hctx->accum_inited); + + kbase_hwcnt_enable_map_free(&hctx->accum.scratch_map); + kbase_hwcnt_dump_buffer_free(&hctx->accum.accum_buf); + kbase_hwcnt_enable_map_free(&hctx->accum.enable_map); + hctx->iface->term(hctx->accum.backend); + memset(&hctx->accum, 0, sizeof(hctx->accum)); +} + +/** + * kbasep_hwcnt_accumulator_init() - Initialise the accumulator for the context. + * @hctx: Non-NULL pointer to hardware counter context. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx) +{ + int errcode; + + WARN_ON(!hctx); + WARN_ON(!hctx->accum_inited); + + errcode = hctx->iface->init( + hctx->iface->info, &hctx->accum.backend); + if (errcode) + goto error; + + hctx->accum.state = ACCUM_STATE_ERROR; + + errcode = kbase_hwcnt_enable_map_alloc( + hctx->iface->metadata, &hctx->accum.enable_map); + if (errcode) + goto error; + + hctx->accum.enable_map_any_enabled = false; + + errcode = kbase_hwcnt_dump_buffer_alloc( + hctx->iface->metadata, &hctx->accum.accum_buf); + if (errcode) + goto error; + + errcode = kbase_hwcnt_enable_map_alloc( + hctx->iface->metadata, &hctx->accum.scratch_map); + if (errcode) + goto error; + + hctx->accum.accumulated = false; + + hctx->accum.ts_last_dump_ns = + hctx->iface->timestamp_ns(hctx->accum.backend); + + return 0; + +error: + kbasep_hwcnt_accumulator_term(hctx); + return errcode; +} + +/** + * kbasep_hwcnt_accumulator_disable() - Transition the accumulator into the + * disabled state, from the enabled or + * error states. + * @hctx: Non-NULL pointer to hardware counter context. + * @accumulate: True if we should accumulate before disabling, else false. + */ +static void kbasep_hwcnt_accumulator_disable( + struct kbase_hwcnt_context *hctx, bool accumulate) +{ + int errcode = 0; + bool backend_enabled = false; + struct kbase_hwcnt_accumulator *accum; + unsigned long flags; + u64 dump_time_ns; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->accum_lock); + WARN_ON(!hctx->accum_inited); + + accum = &hctx->accum; + + spin_lock_irqsave(&hctx->state_lock, flags); + + WARN_ON(hctx->disable_count != 0); + WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED); + + if ((hctx->accum.state == ACCUM_STATE_ENABLED) && + (accum->enable_map_any_enabled)) + backend_enabled = true; + + if (!backend_enabled) + hctx->accum.state = ACCUM_STATE_DISABLED; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + /* Early out if the backend is not already enabled */ + if (!backend_enabled) + return; + + if (!accumulate) + goto disable; + + /* Try and accumulate before disabling */ + errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns); + if (errcode) + goto disable; + + errcode = hctx->iface->dump_wait(accum->backend); + if (errcode) + goto disable; + + errcode = hctx->iface->dump_get(accum->backend, + &accum->accum_buf, &accum->enable_map, accum->accumulated); + if (errcode) + goto disable; + + accum->accumulated = true; + +disable: + hctx->iface->dump_disable(accum->backend); + + /* Regardless of any errors during the accumulate, put the accumulator + * in the disabled state. + */ + spin_lock_irqsave(&hctx->state_lock, flags); + + hctx->accum.state = ACCUM_STATE_DISABLED; + + spin_unlock_irqrestore(&hctx->state_lock, flags); +} + +/** + * kbasep_hwcnt_accumulator_enable() - Transition the accumulator into the + * enabled state, from the disabled state. + * @hctx: Non-NULL pointer to hardware counter context. + */ +static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx) +{ + int errcode = 0; + struct kbase_hwcnt_accumulator *accum; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->state_lock); + WARN_ON(!hctx->accum_inited); + WARN_ON(hctx->accum.state != ACCUM_STATE_DISABLED); + + accum = &hctx->accum; + + /* The backend only needs enabling if any counters are enabled */ + if (accum->enable_map_any_enabled) + errcode = hctx->iface->dump_enable_nolock( + accum->backend, &accum->enable_map); + + if (!errcode) + accum->state = ACCUM_STATE_ENABLED; + else + accum->state = ACCUM_STATE_ERROR; +} + +/** + * kbasep_hwcnt_accumulator_dump() - Perform a dump with the most up-to-date + * values of enabled counters possible, and + * optionally update the set of enabled + * counters. + * @hctx : Non-NULL pointer to the hardware counter context + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * @new_map: Pointer to the new counter enable map. If non-NULL, must have + * the same metadata as the accumulator. If NULL, the set of + * enabled counters will be unchanged. + */ +static int kbasep_hwcnt_accumulator_dump( + struct kbase_hwcnt_context *hctx, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf, + const struct kbase_hwcnt_enable_map *new_map) +{ + int errcode = 0; + unsigned long flags; + enum kbase_hwcnt_accum_state state; + bool dump_requested = false; + bool dump_written = false; + bool cur_map_any_enabled; + struct kbase_hwcnt_enable_map *cur_map; + bool new_map_any_enabled = false; + u64 dump_time_ns; + struct kbase_hwcnt_accumulator *accum; + + WARN_ON(!hctx); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(dump_buf && (dump_buf->metadata != hctx->iface->metadata)); + WARN_ON(new_map && (new_map->metadata != hctx->iface->metadata)); + WARN_ON(!hctx->accum_inited); + lockdep_assert_held(&hctx->accum_lock); + + accum = &hctx->accum; + cur_map = &accum->scratch_map; + + /* Save out info about the current enable map */ + cur_map_any_enabled = accum->enable_map_any_enabled; + kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map); + + if (new_map) + new_map_any_enabled = + kbase_hwcnt_enable_map_any_enabled(new_map); + + /* + * We're holding accum_lock, so the accumulator state might transition + * from disabled to enabled during this function (as enabling is lock + * free), but it will never disable (as disabling needs to hold the + * accum_lock), nor will it ever transition from enabled to error (as + * an enable while we're already enabled is impossible). + * + * If we're already disabled, we'll only look at the accumulation buffer + * rather than do a real dump, so a concurrent enable does not affect + * us. + * + * If a concurrent enable fails, we might transition to the error + * state, but again, as we're only looking at the accumulation buffer, + * it's not an issue. + */ + spin_lock_irqsave(&hctx->state_lock, flags); + + state = accum->state; + + /* + * Update the new map now, such that if an enable occurs during this + * dump then that enable will set the new map. If we're already enabled, + * then we'll do it ourselves after the dump. + */ + if (new_map) { + kbase_hwcnt_enable_map_copy( + &accum->enable_map, new_map); + accum->enable_map_any_enabled = new_map_any_enabled; + } + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + /* Error state, so early out. No need to roll back any map updates */ + if (state == ACCUM_STATE_ERROR) + return -EIO; + + /* Initiate the dump if the backend is enabled. */ + if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) { + if (dump_buf) { + errcode = hctx->iface->dump_request( + accum->backend, &dump_time_ns); + dump_requested = true; + } else { + dump_time_ns = hctx->iface->timestamp_ns( + accum->backend); + errcode = hctx->iface->dump_clear(accum->backend); + } + + if (errcode) + goto error; + } else { + dump_time_ns = hctx->iface->timestamp_ns(accum->backend); + } + + /* Copy any accumulation into the dest buffer */ + if (accum->accumulated && dump_buf) { + kbase_hwcnt_dump_buffer_copy( + dump_buf, &accum->accum_buf, cur_map); + dump_written = true; + } + + /* Wait for any requested dumps to complete */ + if (dump_requested) { + WARN_ON(state != ACCUM_STATE_ENABLED); + errcode = hctx->iface->dump_wait(accum->backend); + if (errcode) + goto error; + } + + /* If we're enabled and there's a new enable map, change the enabled set + * as soon after the dump has completed as possible. + */ + if ((state == ACCUM_STATE_ENABLED) && new_map) { + /* Backend is only enabled if there were any enabled counters */ + if (cur_map_any_enabled) + hctx->iface->dump_disable(accum->backend); + + /* (Re-)enable the backend if the new map has enabled counters. + * No need to acquire the spinlock, as concurrent enable while + * we're already enabled and holding accum_lock is impossible. + */ + if (new_map_any_enabled) { + errcode = hctx->iface->dump_enable( + accum->backend, new_map); + if (errcode) + goto error; + } + } + + /* Copy, accumulate, or zero into the dest buffer to finish */ + if (dump_buf) { + /* If we dumped, copy or accumulate it into the destination */ + if (dump_requested) { + WARN_ON(state != ACCUM_STATE_ENABLED); + errcode = hctx->iface->dump_get( + accum->backend, + dump_buf, + cur_map, + dump_written); + if (errcode) + goto error; + dump_written = true; + } + + /* If we've not written anything into the dump buffer so far, it + * means there was nothing to write. Zero any enabled counters. + */ + if (!dump_written) + kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map); + } + + /* Write out timestamps */ + *ts_start_ns = accum->ts_last_dump_ns; + *ts_end_ns = dump_time_ns; + + accum->accumulated = false; + accum->ts_last_dump_ns = dump_time_ns; + + return 0; +error: + /* An error was only physically possible if the backend was enabled */ + WARN_ON(state != ACCUM_STATE_ENABLED); + + /* Disable the backend, and transition to the error state */ + hctx->iface->dump_disable(accum->backend); + spin_lock_irqsave(&hctx->state_lock, flags); + + accum->state = ACCUM_STATE_ERROR; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + return errcode; +} + +/** + * kbasep_hwcnt_context_disable() - Increment the disable count of the context. + * @hctx: Non-NULL pointer to hardware counter context. + * @accumulate: True if we should accumulate before disabling, else false. + */ +static void kbasep_hwcnt_context_disable( + struct kbase_hwcnt_context *hctx, bool accumulate) +{ + unsigned long flags; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->accum_lock); + + if (!kbase_hwcnt_context_disable_atomic(hctx)) { + kbasep_hwcnt_accumulator_disable(hctx, accumulate); + + spin_lock_irqsave(&hctx->state_lock, flags); + + /* Atomic disable failed and we're holding the mutex, so current + * disable count must be 0. + */ + WARN_ON(hctx->disable_count != 0); + hctx->disable_count++; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + } +} + +int kbase_hwcnt_accumulator_acquire( + struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_accumulator **accum) +{ + int errcode = 0; + unsigned long flags; + + if (!hctx || !accum) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); + spin_lock_irqsave(&hctx->state_lock, flags); + + if (!hctx->accum_inited) + /* Set accum initing now to prevent concurrent init */ + hctx->accum_inited = true; + else + /* Already have an accum, or already being inited */ + errcode = -EBUSY; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); + + if (errcode) + return errcode; + + errcode = kbasep_hwcnt_accumulator_init(hctx); + + if (errcode) { + mutex_lock(&hctx->accum_lock); + spin_lock_irqsave(&hctx->state_lock, flags); + + hctx->accum_inited = false; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); + + return errcode; + } + + spin_lock_irqsave(&hctx->state_lock, flags); + + WARN_ON(hctx->disable_count == 0); + WARN_ON(hctx->accum.enable_map_any_enabled); + + /* Decrement the disable count to allow the accumulator to be accessible + * now that it's fully constructed. + */ + hctx->disable_count--; + + /* + * Make sure the accumulator is initialised to the correct state. + * Regardless of initial state, counters don't need to be enabled via + * the backend, as the initial enable map has no enabled counters. + */ + hctx->accum.state = (hctx->disable_count == 0) ? + ACCUM_STATE_ENABLED : + ACCUM_STATE_DISABLED; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + *accum = &hctx->accum; + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_acquire); + +void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum) +{ + unsigned long flags; + struct kbase_hwcnt_context *hctx; + + if (!accum) + return; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + + mutex_lock(&hctx->accum_lock); + + /* Double release is a programming error */ + WARN_ON(!hctx->accum_inited); + + /* Disable the context to ensure the accumulator is inaccesible while + * we're destroying it. This performs the corresponding disable count + * increment to the decrement done during acquisition. + */ + kbasep_hwcnt_context_disable(hctx, false); + + mutex_unlock(&hctx->accum_lock); + + kbasep_hwcnt_accumulator_term(hctx); + + mutex_lock(&hctx->accum_lock); + spin_lock_irqsave(&hctx->state_lock, flags); + + hctx->accum_inited = false; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_release); + +void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx) +{ + if (WARN_ON(!hctx)) + return; + + /* Try and atomically disable first, so we can avoid locking the mutex + * if we don't need to. + */ + if (kbase_hwcnt_context_disable_atomic(hctx)) + return; + + mutex_lock(&hctx->accum_lock); + + kbasep_hwcnt_context_disable(hctx, true); + + mutex_unlock(&hctx->accum_lock); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable); + +bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx) +{ + unsigned long flags; + bool atomic_disabled = false; + + if (WARN_ON(!hctx)) + return false; + + spin_lock_irqsave(&hctx->state_lock, flags); + + if (!WARN_ON(hctx->disable_count == SIZE_MAX)) { + /* + * If disable count is non-zero, we can just bump the disable + * count. + * + * Otherwise, we can't disable in an atomic context. + */ + if (hctx->disable_count != 0) { + hctx->disable_count++; + atomic_disabled = true; + } + } + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + return atomic_disabled; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable_atomic); + +void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx) +{ + unsigned long flags; + + if (WARN_ON(!hctx)) + return; + + spin_lock_irqsave(&hctx->state_lock, flags); + + if (!WARN_ON(hctx->disable_count == 0)) { + if (hctx->disable_count == 1) + kbasep_hwcnt_accumulator_enable(hctx); + + hctx->disable_count--; + } + + spin_unlock_irqrestore(&hctx->state_lock, flags); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_enable); + +const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( + struct kbase_hwcnt_context *hctx) +{ + if (!hctx) + return NULL; + + return hctx->iface->metadata; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_metadata); + +int kbase_hwcnt_accumulator_set_counters( + struct kbase_hwcnt_accumulator *accum, + const struct kbase_hwcnt_enable_map *new_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_context *hctx; + + if (!accum || !new_map || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + + if ((new_map->metadata != hctx->iface->metadata) || + (dump_buf && (dump_buf->metadata != hctx->iface->metadata))) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); + + errcode = kbasep_hwcnt_accumulator_dump( + hctx, ts_start_ns, ts_end_ns, dump_buf, new_map); + + mutex_unlock(&hctx->accum_lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_set_counters); + +int kbase_hwcnt_accumulator_dump( + struct kbase_hwcnt_accumulator *accum, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_context *hctx; + + if (!accum || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + + if (dump_buf && (dump_buf->metadata != hctx->iface->metadata)) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); + + errcode = kbasep_hwcnt_accumulator_dump( + hctx, ts_start_ns, ts_end_ns, dump_buf, NULL); + + mutex_unlock(&hctx->accum_lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_dump); + +u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum) +{ + struct kbase_hwcnt_context *hctx; + + if (WARN_ON(!accum)) + return 0; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + return hctx->iface->timestamp_ns(accum->backend); +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_accumulator.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_accumulator.h new file mode 100644 index 000000000000..eb82ea4bfd14 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_accumulator.h @@ -0,0 +1,146 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter accumulator API. + */ + +#ifndef _KBASE_HWCNT_ACCUMULATOR_H_ +#define _KBASE_HWCNT_ACCUMULATOR_H_ + +#include + +struct kbase_hwcnt_context; +struct kbase_hwcnt_accumulator; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/** + * kbase_hwcnt_accumulator_acquire() - Acquire the hardware counter accumulator + * for a hardware counter context. + * @hctx: Non-NULL pointer to a hardware counter context. + * @accum: Non-NULL pointer to where the pointer to the created accumulator + * will be stored on success. + * + * There can exist at most one instance of the hardware counter accumulator per + * context at a time. + * + * If multiple clients need access to the hardware counters at the same time, + * then an abstraction built on top of the single instance to the hardware + * counter accumulator is required. + * + * No counters will be enabled with the returned accumulator. A subsequent call + * to kbase_hwcnt_accumulator_set_counters must be used to turn them on. + * + * There are four components to a hardware counter dump: + * - A set of enabled counters + * - A start time + * - An end time + * - A dump buffer containing the accumulated counter values for all enabled + * counters between the start and end times. + * + * For each dump, it is guaranteed that all enabled counters were active for the + * entirety of the period between the start and end times. + * + * It is also guaranteed that the start time of dump "n" is always equal to the + * end time of dump "n - 1". + * + * For all dumps, the values of any counters that were not enabled is undefined. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_accumulator_acquire( + struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_accumulator **accum); + +/** + * kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * + * The accumulator must be released before the context the accumulator was + * created from is terminated. + */ +void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum); + +/** + * kbase_hwcnt_accumulator_set_counters() - Perform a dump of the currently + * enabled counters, and enable a new + * set of counters that will be used + * for subsequent dumps. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * @new_map: Non-NULL pointer to the new counter enable map. Must have the + * same metadata as the accumulator. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * If this function fails for some unexpected reason (i.e. anything other than + * invalid args), then the accumulator will be put into the error state until + * the parent context is next disabled. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_accumulator_set_counters( + struct kbase_hwcnt_accumulator *accum, + const struct kbase_hwcnt_enable_map *new_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled + * counters. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * If this function fails for some unexpected reason (i.e. anything other than + * invalid args), then the accumulator will be put into the error state until + * the parent context is next disabled. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_accumulator_dump( + struct kbase_hwcnt_accumulator *accum, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_accumulator_timestamp_ns() - Get the current accumulator backend + * timestamp. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * + * Return: Accumulator backend timestamp in nanoseconds. + */ +u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum); + +#endif /* _KBASE_HWCNT_ACCUMULATOR_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend.h new file mode 100644 index 000000000000..3a921b754b55 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend.h @@ -0,0 +1,220 @@ +/* + * + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Virtual interface for hardware counter backends. + */ + +#ifndef _KBASE_HWCNT_BACKEND_H_ +#define _KBASE_HWCNT_BACKEND_H_ + +#include + +struct kbase_hwcnt_metadata; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/* + * struct kbase_hwcnt_backend_info - Opaque pointer to information used to + * create an instance of a hardware counter + * backend. + */ +struct kbase_hwcnt_backend_info; + +/* + * struct kbase_hwcnt_backend_info - Opaque pointer to a hardware counter + * backend, used to perform dumps. + */ +struct kbase_hwcnt_backend; + +/** + * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend. + * @info: Non-NULL pointer to backend info. + * @out_backend: Non-NULL pointer to where backend is stored on success. + * + * All uses of the created hardware counter backend must be externally + * synchronised. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_init_fn)( + const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend); + +/** + * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend. + * @backend: Pointer to backend to be terminated. + */ +typedef void (*kbase_hwcnt_backend_term_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend + * timestamp. + * @backend: Non-NULL pointer to backend. + * + * Return: Backend timestamp in nanoseconds. + */ +typedef u64 (*kbase_hwcnt_backend_timestamp_ns_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the + * backend. + * @backend: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + * + * The enable_map must have been created using the interface's metadata. + * If the backend has already been enabled, an error is returned. + * + * May be called in an atomic context. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_enable_fn)( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map); + +/** + * typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping + * with the backend. + * @backend: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + * + * Exactly the same as kbase_hwcnt_backend_dump_enable_fn(), except must be + * called in an atomic context with the spinlock documented by the specific + * backend interface held. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_enable_nolock_fn)( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map); + +/** + * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with + * the backend. + * @backend: Non-NULL pointer to backend. + * + * If the backend is already disabled, does nothing. + * Any undumped counter values since the last dump get will be lost. + */ +typedef void (*kbase_hwcnt_backend_dump_disable_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped + * counters. + * @backend: Non-NULL pointer to backend. + * + * If the backend is not enabled, returns an error. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_clear_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter + * dump. + * @backend: Non-NULL pointer to backend. + * @dump_time_ns: Non-NULL pointer where the timestamp of when the dump was + * requested will be written out to on success. + * + * If the backend is not enabled or another dump is already in progress, + * returns an error. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_request_fn)( + struct kbase_hwcnt_backend *backend, + u64 *dump_time_ns); + +/** + * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested + * counter dump has completed. + * @backend: Non-NULL pointer to backend. + * + * If the backend is not enabled, returns an error. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_wait_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the + * counters dumped after the last dump + * request into the dump buffer. + * @backend: Non-NULL pointer to backend. + * @dump_buffer: Non-NULL pointer to destination dump buffer. + * @enable_map: Non-NULL pointer to enable map specifying enabled values. + * @accumulate: True if counters should be accumulated into dump_buffer, rather + * than copied. + * + * If the backend is not enabled, returns an error. + * If a dump is in progress (i.e. dump_wait has not yet returned successfully) + * then the resultant contents of the dump buffer will be undefined. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_get_fn)( + struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *enable_map, + bool accumulate); + +/** + * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual + * interface. + * @metadata: Immutable hardware counter metadata. + * @info: Immutable info used to initialise an instance of the + * backend. + * @init: Function ptr to initialise an instance of the backend. + * @term: Function ptr to terminate an instance of the backend. + * @timestamp_ns: Function ptr to get the current backend timestamp. + * @dump_enable: Function ptr to enable dumping. + * @dump_enable_nolock: Function ptr to enable dumping while the + * backend-specific spinlock is already held. + * @dump_disable: Function ptr to disable dumping. + * @dump_clear: Function ptr to clear counters. + * @dump_request: Function ptr to request a dump. + * @dump_wait: Function ptr to wait until dump to complete. + * @dump_get: Function ptr to copy or accumulate dump into a dump + * buffer. + */ +struct kbase_hwcnt_backend_interface { + const struct kbase_hwcnt_metadata *metadata; + const struct kbase_hwcnt_backend_info *info; + kbase_hwcnt_backend_init_fn init; + kbase_hwcnt_backend_term_fn term; + kbase_hwcnt_backend_timestamp_ns_fn timestamp_ns; + kbase_hwcnt_backend_dump_enable_fn dump_enable; + kbase_hwcnt_backend_dump_enable_nolock_fn dump_enable_nolock; + kbase_hwcnt_backend_dump_disable_fn dump_disable; + kbase_hwcnt_backend_dump_clear_fn dump_clear; + kbase_hwcnt_backend_dump_request_fn dump_request; + kbase_hwcnt_backend_dump_wait_fn dump_wait; + kbase_hwcnt_backend_dump_get_fn dump_get; +}; + +#endif /* _KBASE_HWCNT_BACKEND_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend_jm.c b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend_jm.c new file mode 100644 index 000000000000..02a42bfdea14 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend_jm.c @@ -0,0 +1,707 @@ +/* + * + * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_backend_jm.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase.h" +#include "mali_kbase_pm_ca.h" +#include "mali_kbase_hwaccess_instr.h" +#include "mali_kbase_hwaccess_time.h" +#include "mali_kbase_ccswe.h" + +#ifdef CONFIG_MALI_NO_MALI +#include "backend/gpu/mali_kbase_model_dummy.h" +#endif +#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" + +#include "backend/gpu/mali_kbase_pm_internal.h" + +/** + * struct kbase_hwcnt_backend_jm_info - Information used to create an instance + * of a JM hardware counter backend. + * @kbdev: KBase device. + * @use_secondary: True if secondary performance counters should be used, + * else false. Ignored if secondary counters are not supported. + * @metadata: Hardware counter metadata. + * @dump_bytes: Bytes of GPU memory required to perform a + * hardware counter dump. + */ +struct kbase_hwcnt_backend_jm_info { + struct kbase_device *kbdev; + bool use_secondary; + const struct kbase_hwcnt_metadata *metadata; + size_t dump_bytes; +}; + +/** + * struct kbase_hwcnt_backend_jm - Instance of a JM hardware counter backend. + * @info: Info used to create the backend. + * @kctx: KBase context used for GPU memory allocation and + * counter dumping. + * @gpu_dump_va: GPU hardware counter dump buffer virtual address. + * @cpu_dump_va: CPU mapping of gpu_dump_va. + * @vmap: Dump buffer vmap. + * @enabled: True if dumping has been enabled, else false. + * @pm_core_mask: PM state sync-ed shaders core mask for the enabled + * dumping. + * @clk_enable_map: The enable map specifying enabled clock domains. + * @cycle_count_elapsed: + * Cycle count elapsed for a given sample period. + * The top clock cycle, index 0, is read directly from + * hardware, but the other clock domains need to be + * calculated with software estimation. + * @prev_cycle_count: Previous cycle count to calculate the cycle count for + * sample period. + * @rate_listener: Clock rate listener callback state. + * @ccswe_shader_cores: Shader cores cycle count software estimator. + */ +struct kbase_hwcnt_backend_jm { + const struct kbase_hwcnt_backend_jm_info *info; + struct kbase_context *kctx; + u64 gpu_dump_va; + void *cpu_dump_va; + struct kbase_vmap_struct *vmap; + bool enabled; + u64 pm_core_mask; + u64 clk_enable_map; + u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; + u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; + struct kbase_clk_rate_listener rate_listener; + struct kbase_ccswe ccswe_shader_cores; +}; + +/** + * kbasep_hwcnt_backend_jm_on_freq_change() - On freq change callback + * + * @rate_listener: Callback state + * @clk_index: Clock index + * @clk_rate_hz: Clock frequency(hz) + */ +static void kbasep_hwcnt_backend_jm_on_freq_change( + struct kbase_clk_rate_listener *rate_listener, + u32 clk_index, + u32 clk_rate_hz) +{ + struct kbase_hwcnt_backend_jm *backend_jm = container_of( + rate_listener, struct kbase_hwcnt_backend_jm, rate_listener); + u64 timestamp_ns; + + if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) + return; + + timestamp_ns = ktime_get_raw_ns(); + kbase_ccswe_freq_change( + &backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz); +} + +/** + * kbasep_hwcnt_backend_jm_cc_enable() - Enable cycle count tracking + * + * @backend: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + * @timestamp_ns: Timestamp(ns) when HWCNT were enabled. + */ +static void kbasep_hwcnt_backend_jm_cc_enable( + struct kbase_hwcnt_backend_jm *backend_jm, + const struct kbase_hwcnt_enable_map *enable_map, + u64 timestamp_ns) +{ + struct kbase_device *kbdev = backend_jm->kctx->kbdev; + u64 clk_enable_map = enable_map->clk_enable_map; + u64 cycle_count; + + if (kbase_hwcnt_clk_enable_map_enabled( + clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { + /* turn on the cycle counter */ + kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev); + /* Read cycle count for top clock domain. */ + kbase_backend_get_gpu_time_norequest( + kbdev, &cycle_count, NULL, NULL); + + backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] = + cycle_count; + } + + if (kbase_hwcnt_clk_enable_map_enabled( + clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { + /* software estimation for non-top clock domains */ + struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; + const struct kbase_clk_data *clk_data = + rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; + u32 cur_freq; + unsigned long flags; + + spin_lock_irqsave(&rtm->lock, flags); + + cur_freq = (u32) clk_data->clock_val; + kbase_ccswe_reset(&backend_jm->ccswe_shader_cores); + kbase_ccswe_freq_change( + &backend_jm->ccswe_shader_cores, + timestamp_ns, + cur_freq); + + kbase_clk_rate_trace_manager_subscribe_no_lock( + rtm, &backend_jm->rate_listener); + + spin_unlock_irqrestore(&rtm->lock, flags); + + /* ccswe was reset. The estimated cycle is zero. */ + backend_jm->prev_cycle_count[ + KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0; + } + + /* Keep clk_enable_map for dump_request. */ + backend_jm->clk_enable_map = clk_enable_map; +} + +/** + * kbasep_hwcnt_backend_jm_cc_disable() - Disable cycle count tracking + * + * @backend: Non-NULL pointer to backend. + */ +static void kbasep_hwcnt_backend_jm_cc_disable( + struct kbase_hwcnt_backend_jm *backend_jm) +{ + struct kbase_device *kbdev = backend_jm->kctx->kbdev; + struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; + u64 clk_enable_map = backend_jm->clk_enable_map; + + if (kbase_hwcnt_clk_enable_map_enabled( + clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { + /* turn off the cycle counter */ + kbase_pm_release_gpu_cycle_counter(backend_jm->kctx->kbdev); + } + if (kbase_hwcnt_clk_enable_map_enabled( + clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { + + kbase_clk_rate_trace_manager_unsubscribe( + rtm, &backend_jm->rate_listener); + } +} + + +/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ +static u64 kbasep_hwcnt_backend_jm_timestamp_ns( + struct kbase_hwcnt_backend *backend) +{ + (void)backend; + return ktime_get_raw_ns(); +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ +static int kbasep_hwcnt_backend_jm_dump_enable_nolock( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + int errcode; + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_context *kctx; + struct kbase_device *kbdev; + struct kbase_hwcnt_physical_enable_map phys; + struct kbase_instr_hwcnt_enable enable; + u64 timestamp_ns; + + if (!backend_jm || !enable_map || backend_jm->enabled || + (enable_map->metadata != backend_jm->info->metadata)) + return -EINVAL; + + kctx = backend_jm->kctx; + kbdev = backend_jm->kctx->kbdev; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map); + + enable.fe_bm = phys.fe_bm; + enable.shader_bm = phys.shader_bm; + enable.tiler_bm = phys.tiler_bm; + enable.mmu_l2_bm = phys.mmu_l2_bm; + enable.use_secondary = backend_jm->info->use_secondary; + enable.dump_buffer = backend_jm->gpu_dump_va; + enable.dump_buffer_bytes = backend_jm->info->dump_bytes; + + timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); + + errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); + if (errcode) + goto error; + + backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); + backend_jm->enabled = true; + + kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns); + + return 0; +error: + return errcode; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */ +static int kbasep_hwcnt_backend_jm_dump_enable( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + unsigned long flags; + int errcode; + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_device *kbdev; + + if (!backend_jm) + return -EINVAL; + + kbdev = backend_jm->kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock( + backend, enable_map); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return errcode; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */ +static void kbasep_hwcnt_backend_jm_dump_disable( + struct kbase_hwcnt_backend *backend) +{ + int errcode; + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + + if (WARN_ON(!backend_jm) || !backend_jm->enabled) + return; + + kbasep_hwcnt_backend_jm_cc_disable(backend_jm); + + errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx); + WARN_ON(errcode); + + backend_jm->enabled = false; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */ +static int kbasep_hwcnt_backend_jm_dump_clear( + struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + + if (!backend_jm || !backend_jm->enabled) + return -EINVAL; + + return kbase_instr_hwcnt_clear(backend_jm->kctx); +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */ +static int kbasep_hwcnt_backend_jm_dump_request( + struct kbase_hwcnt_backend *backend, + u64 *dump_time_ns) +{ + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_device *kbdev; + const struct kbase_hwcnt_metadata *metadata; + u64 current_cycle_count; + size_t clk; + int ret; + + if (!backend_jm || !backend_jm->enabled) + return -EINVAL; + + kbdev = backend_jm->kctx->kbdev; + metadata = backend_jm->info->metadata; + + /* Disable pre-emption, to make the timestamp as accurate as possible */ + preempt_disable(); + { + *dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); + ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx); + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + if (!kbase_hwcnt_clk_enable_map_enabled( + backend_jm->clk_enable_map, clk)) + continue; + + if (clk == KBASE_CLOCK_DOMAIN_TOP) { + /* Read cycle count for top clock domain. */ + kbase_backend_get_gpu_time_norequest( + kbdev, ¤t_cycle_count, + NULL, NULL); + } else { + /* + * Estimate cycle count for non-top clock + * domain. + */ + current_cycle_count = kbase_ccswe_cycle_at( + &backend_jm->ccswe_shader_cores, + *dump_time_ns); + } + backend_jm->cycle_count_elapsed[clk] = + current_cycle_count - + backend_jm->prev_cycle_count[clk]; + + /* + * Keep the current cycle count for later calculation. + */ + backend_jm->prev_cycle_count[clk] = current_cycle_count; + } + } + preempt_enable(); + + return ret; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */ +static int kbasep_hwcnt_backend_jm_dump_wait( + struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + + if (!backend_jm || !backend_jm->enabled) + return -EINVAL; + + return kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx); +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */ +static int kbasep_hwcnt_backend_jm_dump_get( + struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate) +{ + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + size_t clk; + + if (!backend_jm || !dst || !dst_enable_map || + (backend_jm->info->metadata != dst->metadata) || + (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + + /* Invalidate the kernel buffer before reading from it. */ + kbase_sync_mem_regions( + backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU); + + kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) { + if (!kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk)) + continue; + + /* Extract elapsed cycle count for each clock domain. */ + dst->clk_cnt_buf[clk] = backend_jm->cycle_count_elapsed[clk]; + } + + return kbase_hwcnt_gpu_dump_get( + dst, backend_jm->cpu_dump_va, dst_enable_map, + backend_jm->pm_core_mask, accumulate); +} + +/** + * kbasep_hwcnt_backend_jm_dump_alloc() - Allocate a GPU dump buffer. + * @info: Non-NULL pointer to JM backend info. + * @kctx: Non-NULL pointer to kbase context. + * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address + * is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_jm_dump_alloc( + const struct kbase_hwcnt_backend_jm_info *info, + struct kbase_context *kctx, + u64 *gpu_dump_va) +{ + struct kbase_va_region *reg; + u64 flags; + u64 nr_pages; + + WARN_ON(!info); + WARN_ON(!kctx); + WARN_ON(!gpu_dump_va); + + flags = BASE_MEM_PROT_CPU_RD | + BASE_MEM_PROT_GPU_WR | + BASEP_MEM_PERMANENT_KERNEL_MAPPING | + BASE_MEM_CACHED_CPU; + + if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) + flags |= BASE_MEM_UNCACHED_GPU; + + nr_pages = PFN_UP(info->dump_bytes); + + reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va); + + if (!reg) + return -ENOMEM; + + return 0; +} + +/** + * kbasep_hwcnt_backend_jm_dump_free() - Free an allocated GPU dump buffer. + * @kctx: Non-NULL pointer to kbase context. + * @gpu_dump_va: GPU dump buffer virtual address. + */ +static void kbasep_hwcnt_backend_jm_dump_free( + struct kbase_context *kctx, + u64 gpu_dump_va) +{ + WARN_ON(!kctx); + if (gpu_dump_va) + kbase_mem_free(kctx, gpu_dump_va); +} + +/** + * kbasep_hwcnt_backend_jm_destroy() - Destroy a JM backend. + * @backend: Pointer to JM backend to destroy. + * + * Can be safely called on a backend in any state of partial construction. + */ +static void kbasep_hwcnt_backend_jm_destroy( + struct kbase_hwcnt_backend_jm *backend) +{ + if (!backend) + return; + + if (backend->kctx) { + struct kbase_context *kctx = backend->kctx; + struct kbase_device *kbdev = kctx->kbdev; + + if (backend->cpu_dump_va) + kbase_phy_alloc_mapping_put(kctx, backend->vmap); + + if (backend->gpu_dump_va) + kbasep_hwcnt_backend_jm_dump_free( + kctx, backend->gpu_dump_va); + + kbasep_js_release_privileged_ctx(kbdev, kctx); + kbase_destroy_context(kctx); + } + + kfree(backend); +} + +/** + * kbasep_hwcnt_backend_jm_create() - Create a JM backend. + * @info: Non-NULL pointer to backend info. + * @out_backend: Non-NULL pointer to where backend is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_jm_create( + const struct kbase_hwcnt_backend_jm_info *info, + struct kbase_hwcnt_backend_jm **out_backend) +{ + + int errcode; + struct kbase_device *kbdev; + struct kbase_hwcnt_backend_jm *backend = NULL; + + WARN_ON(!info); + WARN_ON(!out_backend); + + kbdev = info->kbdev; + + backend = kzalloc(sizeof(*backend), GFP_KERNEL); + if (!backend) + goto alloc_error; + + backend->info = info; + + backend->kctx = kbase_create_context(kbdev, true, + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL); + if (!backend->kctx) + goto alloc_error; + + kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx); + + errcode = kbasep_hwcnt_backend_jm_dump_alloc( + info, backend->kctx, &backend->gpu_dump_va); + if (errcode) + goto error; + + backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx, + backend->gpu_dump_va, &backend->vmap); + if (!backend->cpu_dump_va) + goto alloc_error; + + kbase_ccswe_init(&backend->ccswe_shader_cores); + backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change; + +#ifdef CONFIG_MALI_NO_MALI + /* The dummy model needs the CPU mapping. */ + gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va); +#endif + + *out_backend = backend; + return 0; + +alloc_error: + errcode = -ENOMEM; +error: + kbasep_hwcnt_backend_jm_destroy(backend); + return errcode; +} + +/* JM backend implementation of kbase_hwcnt_backend_init_fn */ +static int kbasep_hwcnt_backend_jm_init( + const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend) +{ + int errcode; + struct kbase_hwcnt_backend_jm *backend = NULL; + + if (!info || !out_backend) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_jm_create( + (const struct kbase_hwcnt_backend_jm_info *) info, &backend); + if (errcode) + return errcode; + + *out_backend = (struct kbase_hwcnt_backend *)backend; + + return 0; +} + +/* JM backend implementation of kbase_hwcnt_backend_term_fn */ +static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend) +{ + if (!backend) + return; + + kbasep_hwcnt_backend_jm_dump_disable(backend); + kbasep_hwcnt_backend_jm_destroy( + (struct kbase_hwcnt_backend_jm *)backend); +} + +/** + * kbasep_hwcnt_backend_jm_info_destroy() - Destroy a JM backend info. + * @info: Pointer to info to destroy. + * + * Can be safely called on a backend info in any state of partial construction. + */ +static void kbasep_hwcnt_backend_jm_info_destroy( + const struct kbase_hwcnt_backend_jm_info *info) +{ + if (!info) + return; + + kbase_hwcnt_gpu_metadata_destroy(info->metadata); + kfree(info); +} + +/** + * kbasep_hwcnt_backend_jm_info_create() - Create a JM backend info. + * @kbdev: Non_NULL pointer to kbase device. + * @out_info: Non-NULL pointer to where info is stored on success. + * + * Return 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_jm_info_create( + struct kbase_device *kbdev, + const struct kbase_hwcnt_backend_jm_info **out_info) +{ + int errcode = -ENOMEM; + struct kbase_hwcnt_gpu_info hwcnt_gpu_info; + struct kbase_hwcnt_backend_jm_info *info = NULL; + + WARN_ON(!kbdev); + WARN_ON(!out_info); + + errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info); + if (errcode) + return errcode; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + goto error; + + info->kbdev = kbdev; + +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY + info->use_secondary = true; +#else + info->use_secondary = false; +#endif + + errcode = kbase_hwcnt_gpu_metadata_create( + &hwcnt_gpu_info, info->use_secondary, + &info->metadata, + &info->dump_bytes); + if (errcode) + goto error; + + *out_info = info; + + return 0; +error: + kbasep_hwcnt_backend_jm_info_destroy(info); + return errcode; +} + +int kbase_hwcnt_backend_jm_create( + struct kbase_device *kbdev, + struct kbase_hwcnt_backend_interface *iface) +{ + int errcode; + const struct kbase_hwcnt_backend_jm_info *info = NULL; + + if (!kbdev || !iface) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_jm_info_create(kbdev, &info); + + if (errcode) + return errcode; + + iface->metadata = info->metadata; + iface->info = (struct kbase_hwcnt_backend_info *)info; + iface->init = kbasep_hwcnt_backend_jm_init; + iface->term = kbasep_hwcnt_backend_jm_term; + iface->timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns; + iface->dump_enable = kbasep_hwcnt_backend_jm_dump_enable; + iface->dump_enable_nolock = kbasep_hwcnt_backend_jm_dump_enable_nolock; + iface->dump_disable = kbasep_hwcnt_backend_jm_dump_disable; + iface->dump_clear = kbasep_hwcnt_backend_jm_dump_clear; + iface->dump_request = kbasep_hwcnt_backend_jm_dump_request; + iface->dump_wait = kbasep_hwcnt_backend_jm_dump_wait; + iface->dump_get = kbasep_hwcnt_backend_jm_dump_get; + + return 0; +} + +void kbase_hwcnt_backend_jm_destroy( + struct kbase_hwcnt_backend_interface *iface) +{ + if (!iface) + return; + + kbasep_hwcnt_backend_jm_info_destroy( + (const struct kbase_hwcnt_backend_jm_info *)iface->info); + memset(iface, 0, sizeof(*iface)); +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend_jm.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend_jm.h new file mode 100644 index 000000000000..f15faeba704a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_backend_jm.h @@ -0,0 +1,61 @@ +/* + * + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Concrete implementation of mali_kbase_hwcnt_backend interface for JM + * backend. + */ + +#ifndef _KBASE_HWCNT_BACKEND_JM_H_ +#define _KBASE_HWCNT_BACKEND_JM_H_ + +#include "mali_kbase_hwcnt_backend.h" + +struct kbase_device; + +/** + * kbase_hwcnt_backend_jm_create() - Create a JM hardware counter backend + * interface. + * @kbdev: Non-NULL pointer to kbase device. + * @iface: Non-NULL pointer to backend interface structure that is filled in + * on creation success. + * + * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_backend_jm_create( + struct kbase_device *kbdev, + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_jm_destroy() - Destroy a JM hardware counter backend + * interface. + * @iface: Pointer to interface to destroy. + * + * Can be safely called on an all-zeroed interface, or on an already destroyed + * interface. + */ +void kbase_hwcnt_backend_jm_destroy( + struct kbase_hwcnt_backend_interface *iface); + +#endif /* _KBASE_HWCNT_BACKEND_JM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_context.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_context.h new file mode 100644 index 000000000000..bc50ad12c2f4 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_context.h @@ -0,0 +1,119 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter context API. + */ + +#ifndef _KBASE_HWCNT_CONTEXT_H_ +#define _KBASE_HWCNT_CONTEXT_H_ + +#include + +struct kbase_hwcnt_backend_interface; +struct kbase_hwcnt_context; + +/** + * kbase_hwcnt_context_init() - Initialise a hardware counter context. + * @iface: Non-NULL pointer to a hardware counter backend interface. + * @out_hctx: Non-NULL pointer to where the pointer to the created context will + * be stored on success. + * + * On creation, the disable count of the context will be 0. + * A hardware counter accumulator can be acquired using a created context. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_context_init( + const struct kbase_hwcnt_backend_interface *iface, + struct kbase_hwcnt_context **out_hctx); + +/** + * kbase_hwcnt_context_term() - Terminate a hardware counter context. + * @hctx: Pointer to context to be terminated. + */ +void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_metadata() - Get the hardware counter metadata used by + * the context, so related counter data + * structures can be created. + * @hctx: Non-NULL pointer to the hardware counter context. + * + * Return: Non-NULL pointer to metadata, or NULL on error. + */ +const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( + struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_disable() - Increment the disable count of the context. + * @hctx: Pointer to the hardware counter context. + * + * If a call to this function increments the disable count from 0 to 1, and + * an accumulator has been acquired, then a counter dump will be performed + * before counters are disabled via the backend interface. + * + * Subsequent dumps via the accumulator while counters are disabled will first + * return the accumulated dump, then will return dumps with zeroed counters. + * + * After this function call returns, it is guaranteed that counters will not be + * enabled via the backend interface. + */ +void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the + * context if possible in an atomic + * context. + * @hctx: Pointer to the hardware counter context. + * + * This function will only succeed if hardware counters are effectively already + * disabled, i.e. there is no accumulator, the disable count is already + * non-zero, or the accumulator has no counters set. + * + * After this function call returns true, it is guaranteed that counters will + * not be enabled via the backend interface. + * + * Return: True if the disable count was incremented, else False. + */ +bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_enable() - Decrement the disable count of the context. + * @hctx: Pointer to the hardware counter context. + * + * If a call to this function decrements the disable count from 1 to 0, and + * an accumulator has been acquired, then counters will be re-enabled via the + * backend interface. + * + * If an accumulator has been acquired and enabling counters fails for some + * reason, the accumulator will be placed into an error state. + * + * It is only valid to call this function one time for each prior returned call + * to kbase_hwcnt_context_disable. + * + * The spinlock documented in the backend interface that was passed in to + * kbase_hwcnt_context_init() must be held before calling this function. + */ +void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx); + +#endif /* _KBASE_HWCNT_CONTEXT_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_gpu.c b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_gpu.c new file mode 100644 index 000000000000..103432875f6d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_gpu.c @@ -0,0 +1,786 @@ +/* + * + * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase.h" +#ifdef CONFIG_MALI_NO_MALI +#include "backend/gpu/mali_kbase_model_dummy.h" +#endif + +#define KBASE_HWCNT_V4_BLOCKS_PER_GROUP 8 +#define KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP 4 +#define KBASE_HWCNT_V4_MAX_GROUPS \ + (KBASE_HWCNT_AVAIL_MASK_BITS / KBASE_HWCNT_V4_BLOCKS_PER_GROUP) +#define KBASE_HWCNT_V4_HEADERS_PER_BLOCK 4 +#define KBASE_HWCNT_V4_COUNTERS_PER_BLOCK 60 +#define KBASE_HWCNT_V4_VALUES_PER_BLOCK \ + (KBASE_HWCNT_V4_HEADERS_PER_BLOCK + KBASE_HWCNT_V4_COUNTERS_PER_BLOCK) +/* Index of the PRFCNT_EN header into a V4 counter block */ +#define KBASE_HWCNT_V4_PRFCNT_EN_HEADER 2 + +#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4 +#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4 +#define KBASE_HWCNT_V5_COUNTERS_PER_BLOCK 60 +#define KBASE_HWCNT_V5_VALUES_PER_BLOCK \ + (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_COUNTERS_PER_BLOCK) +/* Index of the PRFCNT_EN header into a V5 counter block */ +#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2 + +/** + * kbasep_hwcnt_backend_gpu_metadata_v4_create() - Create hardware counter + * metadata for a v4 GPU. + * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU. + * @metadata: Non-NULL pointer to where created metadata is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_gpu_metadata_v4_create( + const struct kbase_hwcnt_gpu_v4_info *v4_info, + const struct kbase_hwcnt_metadata **metadata) +{ + size_t grp; + int errcode = -ENOMEM; + struct kbase_hwcnt_description desc; + struct kbase_hwcnt_group_description *grps; + size_t avail_mask_bit; + + WARN_ON(!v4_info); + WARN_ON(!metadata); + + /* Check if there are enough bits in the availability mask to represent + * all the hardware counter blocks in the system. + */ + if (v4_info->cg_count > KBASE_HWCNT_V4_MAX_GROUPS) + return -EINVAL; + + grps = kcalloc(v4_info->cg_count, sizeof(*grps), GFP_KERNEL); + if (!grps) + goto clean_up; + + desc.grp_cnt = v4_info->cg_count; + desc.grps = grps; + + for (grp = 0; grp < v4_info->cg_count; grp++) { + size_t blk; + size_t sc; + const u64 core_mask = v4_info->cgs[grp].core_mask; + struct kbase_hwcnt_block_description *blks = kcalloc( + KBASE_HWCNT_V4_BLOCKS_PER_GROUP, + sizeof(*blks), + GFP_KERNEL); + + if (!blks) + goto clean_up; + + grps[grp].type = KBASE_HWCNT_GPU_GROUP_TYPE_V4; + grps[grp].blk_cnt = KBASE_HWCNT_V4_BLOCKS_PER_GROUP; + grps[grp].blks = blks; + + for (blk = 0; blk < KBASE_HWCNT_V4_BLOCKS_PER_GROUP; blk++) { + blks[blk].inst_cnt = 1; + blks[blk].hdr_cnt = + KBASE_HWCNT_V4_HEADERS_PER_BLOCK; + blks[blk].ctr_cnt = + KBASE_HWCNT_V4_COUNTERS_PER_BLOCK; + } + + for (sc = 0; sc < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP; sc++) { + blks[sc].type = core_mask & (1ull << sc) ? + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER : + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; + } + + blks[4].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER; + blks[5].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2; + blks[6].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; + blks[7].type = (grp == 0) ? + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM : + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; + + WARN_ON(KBASE_HWCNT_V4_BLOCKS_PER_GROUP != 8); + } + + /* Initialise the availability mask */ + desc.avail_mask = 0; + avail_mask_bit = 0; + + for (grp = 0; grp < desc.grp_cnt; grp++) { + size_t blk; + const struct kbase_hwcnt_block_description *blks = + desc.grps[grp].blks; + for (blk = 0; blk < desc.grps[grp].blk_cnt; blk++) { + WARN_ON(blks[blk].inst_cnt != 1); + if (blks[blk].type != + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED) + desc.avail_mask |= (1ull << avail_mask_bit); + + avail_mask_bit++; + } + } + + desc.clk_cnt = v4_info->clk_cnt; + + errcode = kbase_hwcnt_metadata_create(&desc, metadata); + + /* Always clean up, as metadata will make a copy of the input args */ +clean_up: + if (grps) { + for (grp = 0; grp < v4_info->cg_count; grp++) + kfree(grps[grp].blks); + kfree(grps); + } + return errcode; +} + +/** + * kbasep_hwcnt_backend_gpu_v4_dump_bytes() - Get the raw dump buffer size for a + * V4 GPU. + * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU. + * + * Return: Size of buffer the V4 GPU needs to perform a counter dump. + */ +static size_t kbasep_hwcnt_backend_gpu_v4_dump_bytes( + const struct kbase_hwcnt_gpu_v4_info *v4_info) +{ + return v4_info->cg_count * + KBASE_HWCNT_V4_BLOCKS_PER_GROUP * + KBASE_HWCNT_V4_VALUES_PER_BLOCK * + KBASE_HWCNT_VALUE_BYTES; +} + +/** + * kbasep_hwcnt_backend_gpu_metadata_v5_create() - Create hardware counter + * metadata for a v5 GPU. + * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU. + * @use_secondary: True if secondary performance counters should be used, else + * false. Ignored if secondary counters are not supported. + * @metadata: Non-NULL pointer to where created metadata is stored + * on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_gpu_metadata_v5_create( + const struct kbase_hwcnt_gpu_v5_info *v5_info, + bool use_secondary, + const struct kbase_hwcnt_metadata **metadata) +{ + struct kbase_hwcnt_description desc; + struct kbase_hwcnt_group_description group; + struct kbase_hwcnt_block_description + blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; + size_t non_sc_block_count; + size_t sc_block_count; + + WARN_ON(!v5_info); + WARN_ON(!metadata); + + /* Calculate number of block instances that aren't shader cores */ + non_sc_block_count = 2 + v5_info->l2_count; + /* Calculate number of block instances that are shader cores */ + sc_block_count = fls64(v5_info->core_mask); + + /* + * A system can have up to 64 shader cores, but the 64-bit + * availability mask can't physically represent that many cores as well + * as the other hardware blocks. + * Error out if there are more blocks than our implementation can + * support. + */ + if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS) + return -EINVAL; + + /* One Job Manager block */ + blks[0].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM; + blks[0].inst_cnt = 1; + blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[0].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + /* One Tiler block */ + blks[1].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; + blks[1].inst_cnt = 1; + blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[1].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + /* l2_count memsys blks */ + blks[2].type = use_secondary ? + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 : + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; + blks[2].inst_cnt = v5_info->l2_count; + blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[2].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + /* + * There are as many shader cores in the system as there are bits set in + * the core mask. However, the dump buffer memory requirements need to + * take into account the fact that the core mask may be non-contiguous. + * + * For example, a system with a core mask of 0b1011 has the same dump + * buffer memory requirements as a system with 0b1111, but requires more + * memory than a system with 0b0111. However, core 2 of the system with + * 0b1011 doesn't physically exist, and the dump buffer memory that + * accounts for that core will never be written to when we do a counter + * dump. + * + * We find the core mask's last set bit to determine the memory + * requirements, and embed the core mask into the availability mask so + * we can determine later which shader cores physically exist. + */ + blks[3].type = use_secondary ? + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 : + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; + blks[3].inst_cnt = sc_block_count; + blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[3].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4); + + group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; + group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; + group.blks = blks; + + desc.grp_cnt = 1; + desc.grps = &group; + desc.clk_cnt = v5_info->clk_cnt; + + /* The JM, Tiler, and L2s are always available, and are before cores */ + desc.avail_mask = (1ull << non_sc_block_count) - 1; + /* Embed the core mask directly in the availability mask */ + desc.avail_mask |= (v5_info->core_mask << non_sc_block_count); + + return kbase_hwcnt_metadata_create(&desc, metadata); +} + +/** + * kbasep_hwcnt_backend_gpu_v5_dump_bytes() - Get the raw dump buffer size for a + * V5 GPU. + * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU. + * + * Return: Size of buffer the V5 GPU needs to perform a counter dump. + */ +static size_t kbasep_hwcnt_backend_gpu_v5_dump_bytes( + const struct kbase_hwcnt_gpu_v5_info *v5_info) +{ + WARN_ON(!v5_info); + return (2 + v5_info->l2_count + fls64(v5_info->core_mask)) * + KBASE_HWCNT_V5_VALUES_PER_BLOCK * + KBASE_HWCNT_VALUE_BYTES; +} + +int kbase_hwcnt_gpu_info_init( + struct kbase_device *kbdev, + struct kbase_hwcnt_gpu_info *info) +{ + size_t clk; + + if (!kbdev || !info) + return -EINVAL; + +#ifdef CONFIG_MALI_NO_MALI + /* NO_MALI uses V5 layout, regardless of the underlying platform. */ + info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; + info->v5.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; + info->v5.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; +#else + { + const struct base_gpu_props *props = &kbdev->gpu_props.props; + const size_t l2_count = props->l2_props.num_l2_slices; + const size_t core_mask = + props->coherency_info.group[0].core_mask; + + info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; + info->v5.l2_count = l2_count; + info->v5.core_mask = core_mask; + } +#endif + + /* Determine the number of available clock domains. */ + for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { + if (kbdev->pm.clk_rtm.clks[clk] == NULL) + break; + } + info->v5.clk_cnt = clk; + + return 0; +} + +int kbase_hwcnt_gpu_metadata_create( + const struct kbase_hwcnt_gpu_info *info, + bool use_secondary, + const struct kbase_hwcnt_metadata **out_metadata, + size_t *out_dump_bytes) +{ + int errcode; + const struct kbase_hwcnt_metadata *metadata; + size_t dump_bytes; + + if (!info || !out_metadata || !out_dump_bytes) + return -EINVAL; + + switch (info->type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + dump_bytes = kbasep_hwcnt_backend_gpu_v4_dump_bytes(&info->v4); + errcode = kbasep_hwcnt_backend_gpu_metadata_v4_create( + &info->v4, &metadata); + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + dump_bytes = kbasep_hwcnt_backend_gpu_v5_dump_bytes(&info->v5); + errcode = kbasep_hwcnt_backend_gpu_metadata_v5_create( + &info->v5, use_secondary, &metadata); + break; + default: + return -EINVAL; + } + if (errcode) + return errcode; + + /* + * Dump abstraction size should be exactly the same size and layout as + * the physical dump size, for backwards compatibility. + */ + WARN_ON(dump_bytes != metadata->dump_buf_bytes); + + *out_metadata = metadata; + *out_dump_bytes = dump_bytes; + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_create); + +void kbase_hwcnt_gpu_metadata_destroy( + const struct kbase_hwcnt_metadata *metadata) +{ + if (!metadata) + return; + + kbase_hwcnt_metadata_destroy(metadata); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_destroy); + +static bool is_block_type_shader( + const u64 grp_type, + const u64 blk_type, + const size_t blk) +{ + bool is_shader = false; + + switch (grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + /* blk-value in [0, KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP-1] + * corresponds to a shader, or its implementation + * reserved. As such, here we use the blk index value to + * tell the reserved case. + */ + if (blk_type == KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER || + (blk < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP && + blk_type == KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED)) + is_shader = true; + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2) + is_shader = true; + break; + default: + /* Warn on unknown group type */ + WARN_ON(true); + } + + return is_shader; +} + +int kbase_hwcnt_gpu_dump_get( + struct kbase_hwcnt_dump_buffer *dst, + void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + u64 pm_core_mask, + bool accumulate) +{ + const struct kbase_hwcnt_metadata *metadata; + const u32 *dump_src; + size_t src_offset, grp, blk, blk_inst; + size_t grp_prev = 0; + u64 core_mask = pm_core_mask; + + if (!dst || !src || !dst_enable_map || + (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + + metadata = dst->metadata; + dump_src = (const u32 *)src; + src_offset = 0; + + kbase_hwcnt_metadata_for_each_block( + metadata, grp, blk, blk_inst) { + const size_t hdr_cnt = + kbase_hwcnt_metadata_block_headers_count( + metadata, grp, blk); + const size_t ctr_cnt = + kbase_hwcnt_metadata_block_counters_count( + metadata, grp, blk); + const u64 blk_type = kbase_hwcnt_metadata_block_type( + metadata, grp, blk); + const bool is_shader_core = is_block_type_shader( + kbase_hwcnt_metadata_group_type(metadata, grp), + blk_type, blk); + + if (grp != grp_prev) { + /* grp change would only happen with V4. V5 and + * further are envisaged to be single group + * scenario only. Here needs to drop the lower + * group core-mask by shifting right with + * KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP. + */ + core_mask = pm_core_mask >> + KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP; + grp_prev = grp; + } + + /* Early out if no values in the dest block are enabled */ + if (kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u32 *src_blk = dump_src + src_offset; + + if (!is_shader_core || (core_mask & 1)) { + if (accumulate) { + kbase_hwcnt_dump_buffer_block_accumulate( + dst_blk, src_blk, hdr_cnt, + ctr_cnt); + } else { + kbase_hwcnt_dump_buffer_block_copy( + dst_blk, src_blk, + (hdr_cnt + ctr_cnt)); + } + } else if (!accumulate) { + kbase_hwcnt_dump_buffer_block_zero( + dst_blk, (hdr_cnt + ctr_cnt)); + } + } + + src_offset += (hdr_cnt + ctr_cnt); + if (is_shader_core) + core_mask = core_mask >> 1; + } + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_dump_get); + +/** + * kbasep_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block + * enable map abstraction to + * a physical block enable + * map. + * @lo: Low 64 bits of block enable map abstraction. + * @hi: High 64 bits of block enable map abstraction. + * + * The abstraction uses 128 bits to enable 128 block values, whereas the + * physical uses just 32 bits, as bit n enables values [n*4, n*4+3]. + * Therefore, this conversion is lossy. + * + * Return: 32-bit physical block enable map. + */ +static inline u32 kbasep_hwcnt_backend_gpu_block_map_to_physical( + u64 lo, + u64 hi) +{ + u32 phys = 0; + u64 dwords[2] = {lo, hi}; + size_t dword_idx; + + for (dword_idx = 0; dword_idx < 2; dword_idx++) { + const u64 dword = dwords[dword_idx]; + u16 packed = 0; + + size_t hword_bit; + + for (hword_bit = 0; hword_bit < 16; hword_bit++) { + const size_t dword_bit = hword_bit * 4; + const u16 mask = + ((dword >> (dword_bit + 0)) & 0x1) | + ((dword >> (dword_bit + 1)) & 0x1) | + ((dword >> (dword_bit + 2)) & 0x1) | + ((dword >> (dword_bit + 3)) & 0x1); + packed |= (mask << hword_bit); + } + phys |= ((u32)packed) << (16 * dword_idx); + } + return phys; +} + +/** + * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical + * block enable map to a + * block enable map + * abstraction. + * @phys: Physical 32-bit block enable map + * @lo: Non-NULL pointer to where low 64 bits of block enable map abstraction + * will be stored. + * @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction + * will be stored. + */ +static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical( + u32 phys, + u64 *lo, + u64 *hi) +{ + u64 dwords[2] = {0, 0}; + + size_t dword_idx; + + for (dword_idx = 0; dword_idx < 2; dword_idx++) { + const u16 packed = phys >> (16 * dword_idx); + u64 dword = 0; + + size_t hword_bit; + + for (hword_bit = 0; hword_bit < 16; hword_bit++) { + const size_t dword_bit = hword_bit * 4; + const u64 mask = (packed >> (hword_bit)) & 0x1; + + dword |= mask << (dword_bit + 0); + dword |= mask << (dword_bit + 1); + dword |= mask << (dword_bit + 2); + dword |= mask << (dword_bit + 3); + } + dwords[dword_idx] = dword; + } + *lo = dwords[0]; + *hi = dwords[1]; +} + +void kbase_hwcnt_gpu_enable_map_to_physical( + struct kbase_hwcnt_physical_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) +{ + const struct kbase_hwcnt_metadata *metadata; + + u64 fe_bm = 0; + u64 shader_bm = 0; + u64 tiler_bm = 0; + u64 mmu_l2_bm = 0; + + size_t grp, blk, blk_inst; + + if (WARN_ON(!src) || WARN_ON(!dst)) + return; + + metadata = src->metadata; + + kbase_hwcnt_metadata_for_each_block( + metadata, grp, blk, blk_inst) { + const u64 grp_type = kbase_hwcnt_metadata_group_type( + metadata, grp); + const u64 blk_type = kbase_hwcnt_metadata_block_type( + metadata, grp, blk); + const size_t blk_val_cnt = + kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( + src, grp, blk, blk_inst); + + switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) { + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: + shader_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: + tiler_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: + mmu_l2_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: + fe_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: + break; + default: + WARN_ON(true); + } + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: + fe_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + tiler_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + shader_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + mmu_l2_bm |= *blk_map; + break; + default: + WARN_ON(true); + } + break; + default: + WARN_ON(true); + } + } + + dst->fe_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(fe_bm, 0); + dst->shader_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0); + dst->tiler_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(tiler_bm, 0); + dst->mmu_l2_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm, 0); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_to_physical); + +void kbase_hwcnt_gpu_enable_map_from_physical( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_physical_enable_map *src) +{ + const struct kbase_hwcnt_metadata *metadata; + + u64 ignored_hi; + u64 fe_bm; + u64 shader_bm; + u64 tiler_bm; + u64 mmu_l2_bm; + size_t grp, blk, blk_inst; + + if (WARN_ON(!src) || WARN_ON(!dst)) + return; + + metadata = dst->metadata; + + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->fe_bm, &fe_bm, &ignored_hi); + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->shader_bm, &shader_bm, &ignored_hi); + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->tiler_bm, &tiler_bm, &ignored_hi); + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->mmu_l2_bm, &mmu_l2_bm, &ignored_hi); + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + const u64 grp_type = kbase_hwcnt_metadata_group_type( + metadata, grp); + const u64 blk_type = kbase_hwcnt_metadata_block_type( + metadata, grp, blk); + const size_t blk_val_cnt = + kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + u64 *blk_map = kbase_hwcnt_enable_map_block_instance( + dst, grp, blk, blk_inst); + + switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) { + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: + *blk_map = shader_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: + *blk_map = tiler_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: + *blk_map = mmu_l2_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: + *blk_map = fe_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: + break; + default: + WARN_ON(true); + } + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: + *blk_map = fe_bm; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + *blk_map = tiler_bm; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + *blk_map = shader_bm; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + *blk_map = mmu_l2_bm; + break; + default: + WARN_ON(true); + } + break; + default: + WARN_ON(true); + } + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_from_physical); + +void kbase_hwcnt_gpu_patch_dump_headers( + struct kbase_hwcnt_dump_buffer *buf, + const struct kbase_hwcnt_enable_map *enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!buf) || WARN_ON(!enable_map) || + WARN_ON(buf->metadata != enable_map->metadata)) + return; + + metadata = buf->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + const u64 grp_type = + kbase_hwcnt_metadata_group_type(metadata, grp); + u32 *buf_blk = kbase_hwcnt_dump_buffer_block_instance( + buf, grp, blk, blk_inst); + const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( + enable_map, grp, blk, blk_inst); + const u32 prfcnt_en = + kbasep_hwcnt_backend_gpu_block_map_to_physical( + blk_map[0], 0); + + switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + buf_blk[KBASE_HWCNT_V4_PRFCNT_EN_HEADER] = prfcnt_en; + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en; + break; + default: + WARN_ON(true); + } + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_patch_dump_headers); diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_gpu.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_gpu.h new file mode 100644 index 000000000000..13c1af3d567f --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_gpu.h @@ -0,0 +1,255 @@ +/* + * + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_HWCNT_GPU_H_ +#define _KBASE_HWCNT_GPU_H_ + +#include + +struct kbase_device; +struct kbase_hwcnt_metadata; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/** + * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to + * identify metadata groups. + * @KBASE_HWCNT_GPU_GROUP_TYPE_V4: GPU V4 group type. + * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type. + */ +enum kbase_hwcnt_gpu_group_type { + KBASE_HWCNT_GPU_GROUP_TYPE_V4 = 0x10, + KBASE_HWCNT_GPU_GROUP_TYPE_V5, +}; + +/** + * enum kbase_hwcnt_gpu_v4_block_type - GPU V4 hardware counter block types, + * used to identify metadata blocks. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: Shader block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: Tiler block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: MMU/L2 block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: Job Manager block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: Reserved block. + */ +enum kbase_hwcnt_gpu_v4_block_type { + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER = 0x20, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED, +}; + +/** + * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types, + * used to identify metadata blocks. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: Job Manager block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block. + */ +enum kbase_hwcnt_gpu_v5_block_type { + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM = 0x40, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2, +}; + +/** + * struct kbase_hwcnt_physical_enable_map - Representation of enable map + * directly used by GPU. + * @fe_bm: Front end (JM/CSHW) counters selection bitmask. + * @shader_bm: Shader counters selection bitmask. + * @tiler_bm: Tiler counters selection bitmask. + * @mmu_l2_bm: MMU_L2 counters selection bitmask. + */ +struct kbase_hwcnt_physical_enable_map { + u32 fe_bm; + u32 shader_bm; + u32 tiler_bm; + u32 mmu_l2_bm; +}; + +/** + * struct kbase_hwcnt_gpu_v4_info - Information about hwcnt blocks on v4 GPUs. + * @cg_count: Core group count. + * @cgs: Non-NULL pointer to array of cg_count coherent group structures. + * @clk_cnt: Number of clock domains available. + * + * V4 devices are Mali-T6xx or Mali-T72x, and have one or more core groups, + * where each core group may have a physically different layout. + */ +struct kbase_hwcnt_gpu_v4_info { + size_t cg_count; + const struct mali_base_gpu_coherent_group *cgs; + u8 clk_cnt; +}; + +/** + * struct kbase_hwcnt_gpu_v5_info - Information about hwcnt blocks on v5 GPUs. + * @l2_count: L2 cache count. + * @core_mask: Shader core mask. May be sparse. + * @clk_cnt: Number of clock domains available. + */ +struct kbase_hwcnt_gpu_v5_info { + size_t l2_count; + u64 core_mask; + u8 clk_cnt; +}; + +/** + * struct kbase_hwcnt_gpu_info - Tagged union with information about the current + * GPU's hwcnt blocks. + * @type: GPU type. + * @v4: Info filled in if a v4 GPU. + * @v5: Info filled in if a v5 GPU. + */ +struct kbase_hwcnt_gpu_info { + enum kbase_hwcnt_gpu_group_type type; + union { + struct kbase_hwcnt_gpu_v4_info v4; + struct kbase_hwcnt_gpu_v5_info v5; + }; +}; + +/** + * kbase_hwcnt_gpu_info_init() - Initialise an info structure used to create the + * hwcnt metadata. + * @kbdev: Non-NULL pointer to kbase device. + * @info: Non-NULL pointer to data structure to be filled in. + * + * The initialised info struct will only be valid for use while kbdev is valid. + */ +int kbase_hwcnt_gpu_info_init( + struct kbase_device *kbdev, + struct kbase_hwcnt_gpu_info *info); + +/** + * kbase_hwcnt_gpu_metadata_create() - Create hardware counter metadata for the + * current GPU. + * @info: Non-NULL pointer to info struct initialised by + * kbase_hwcnt_gpu_info_init. + * @use_secondary: True if secondary performance counters should be used, else + * false. Ignored if secondary counters are not supported. + * @out_metadata: Non-NULL pointer to where created metadata is stored on + * success. + * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump + * buffer is stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_gpu_metadata_create( + const struct kbase_hwcnt_gpu_info *info, + bool use_secondary, + const struct kbase_hwcnt_metadata **out_metadata, + size_t *out_dump_bytes); + +/** + * kbase_hwcnt_gpu_metadata_destroy() - Destroy GPU hardware counter metadata. + * @metadata: Pointer to metadata to destroy. + */ +void kbase_hwcnt_gpu_metadata_destroy( + const struct kbase_hwcnt_metadata *metadata); + +/** + * kbase_hwcnt_gpu_dump_get() - Copy or accumulate enabled counters from the raw + * dump buffer in src into the dump buffer + * abstraction in dst. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src raw dump buffer, of same length + * as returned in out_dump_bytes parameter of + * kbase_hwcnt_gpu_metadata_create. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @pm_core_mask: PM state synchronized shaders core mask with the dump. + * @accumulate: True if counters in src should be accumulated into dst, + * rather than copied. + * + * The dst and dst_enable_map MUST have been created from the same metadata as + * returned from the call to kbase_hwcnt_gpu_metadata_create as was used to get + * the length of src. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_gpu_dump_get( + struct kbase_hwcnt_dump_buffer *dst, + void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + const u64 pm_core_mask, + bool accumulate); + +/** + * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction + * into a physical enable map. + * @dst: Non-NULL pointer to dst physical enable map. + * @src: Non-NULL pointer to src enable map abstraction. + * + * The src must have been created from a metadata returned from a call to + * kbase_hwcnt_gpu_metadata_create. + * + * This is a lossy conversion, as the enable map abstraction has one bit per + * individual counter block value, but the physical enable map uses 1 bit for + * every 4 counters, shared over all instances of a block. + */ +void kbase_hwcnt_gpu_enable_map_to_physical( + struct kbase_hwcnt_physical_enable_map *dst, + const struct kbase_hwcnt_enable_map *src); + +/** + * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to + * an enable map abstraction. + * @dst: Non-NULL pointer to dst enable map abstraction. + * @src: Non-NULL pointer to src physical enable map. + * + * The dst must have been created from a metadata returned from a call to + * kbase_hwcnt_gpu_metadata_create. + * + * This is a lossy conversion, as the physical enable map can technically + * support counter blocks with 128 counters each, but no hardware actually uses + * more than 64, so the enable map abstraction has nowhere to store the enable + * information for the 64 non-existent counters. + */ +void kbase_hwcnt_gpu_enable_map_from_physical( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_physical_enable_map *src); + +/** + * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter + * enable headers in a dump buffer to + * reflect the specified enable map. + * @buf: Non-NULL pointer to dump buffer to patch. + * @enable_map: Non-NULL pointer to enable map. + * + * The buf and enable_map must have been created from a metadata returned from + * a call to kbase_hwcnt_gpu_metadata_create. + * + * This function should be used before handing off a dump buffer over the + * kernel-user boundary, to ensure the header is accurate for the enable map + * used by the user. + */ +void kbase_hwcnt_gpu_patch_dump_headers( + struct kbase_hwcnt_dump_buffer *buf, + const struct kbase_hwcnt_enable_map *enable_map); + +#endif /* _KBASE_HWCNT_GPU_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_legacy.c b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_legacy.c new file mode 100644 index 000000000000..794ef39e365c --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_legacy.c @@ -0,0 +1,152 @@ +/* + * + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_legacy.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_ioctl.h" + +#include +#include + +/** + * struct kbase_hwcnt_legacy_client - Legacy hardware counter client. + * @user_dump_buf: Pointer to a non-NULL user buffer, where dumps are returned. + * @enable_map: Counter enable map. + * @dump_buf: Dump buffer used to manipulate dumps before copied to user. + * @hvcli: Hardware counter virtualizer client. + */ +struct kbase_hwcnt_legacy_client { + void __user *user_dump_buf; + struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer dump_buf; + struct kbase_hwcnt_virtualizer_client *hvcli; +}; + +int kbase_hwcnt_legacy_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_ioctl_hwcnt_enable *enable, + struct kbase_hwcnt_legacy_client **out_hlcli) +{ + int errcode; + struct kbase_hwcnt_legacy_client *hlcli; + const struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_physical_enable_map phys_em; + + if (!hvirt || !enable || !enable->dump_buffer || !out_hlcli) + return -EINVAL; + + metadata = kbase_hwcnt_virtualizer_metadata(hvirt); + + hlcli = kzalloc(sizeof(*hlcli), GFP_KERNEL); + if (!hlcli) + return -ENOMEM; + + hlcli->user_dump_buf = (void __user *)(uintptr_t)enable->dump_buffer; + + errcode = kbase_hwcnt_enable_map_alloc(metadata, &hlcli->enable_map); + if (errcode) + goto error; + + /* Translate from the ioctl enable map to the internal one */ + phys_em.fe_bm = enable->fe_bm; + phys_em.shader_bm = enable->shader_bm; + phys_em.tiler_bm = enable->tiler_bm; + phys_em.mmu_l2_bm = enable->mmu_l2_bm; + kbase_hwcnt_gpu_enable_map_from_physical(&hlcli->enable_map, &phys_em); + + errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hlcli->dump_buf); + if (errcode) + goto error; + + errcode = kbase_hwcnt_virtualizer_client_create( + hvirt, &hlcli->enable_map, &hlcli->hvcli); + if (errcode) + goto error; + + *out_hlcli = hlcli; + return 0; + +error: + kbase_hwcnt_legacy_client_destroy(hlcli); + return errcode; +} + +void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli) +{ + if (!hlcli) + return; + + kbase_hwcnt_virtualizer_client_destroy(hlcli->hvcli); + kbase_hwcnt_dump_buffer_free(&hlcli->dump_buf); + kbase_hwcnt_enable_map_free(&hlcli->enable_map); + kfree(hlcli); +} + +int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli) +{ + int errcode; + u64 ts_start_ns; + u64 ts_end_ns; + + if (!hlcli) + return -EINVAL; + + /* Dump into the kernel buffer */ + errcode = kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli, + &ts_start_ns, &ts_end_ns, &hlcli->dump_buf); + if (errcode) + return errcode; + + /* Patch the dump buf headers, to hide the counters that other hwcnt + * clients are using. + */ + kbase_hwcnt_gpu_patch_dump_headers( + &hlcli->dump_buf, &hlcli->enable_map); + + /* Zero all non-enabled counters (current values are undefined) */ + kbase_hwcnt_dump_buffer_zero_non_enabled( + &hlcli->dump_buf, &hlcli->enable_map); + + /* Copy into the user's buffer */ + errcode = copy_to_user(hlcli->user_dump_buf, hlcli->dump_buf.dump_buf, + hlcli->dump_buf.metadata->dump_buf_bytes); + /* Non-zero errcode implies user buf was invalid or too small */ + if (errcode) + return -EFAULT; + + return 0; +} + +int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli) +{ + u64 ts_start_ns; + u64 ts_end_ns; + + if (!hlcli) + return -EINVAL; + + /* Dump with a NULL buffer to clear this client's counters */ + return kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli, + &ts_start_ns, &ts_end_ns, NULL); +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_legacy.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_legacy.h new file mode 100644 index 000000000000..7a610ae378a2 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_legacy.h @@ -0,0 +1,94 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Legacy hardware counter interface, giving userspace clients simple, + * synchronous access to hardware counters. + * + * Any functions operating on an single legacy hardware counter client instance + * must be externally synchronised. + * Different clients may safely be used concurrently. + */ + +#ifndef _KBASE_HWCNT_LEGACY_H_ +#define _KBASE_HWCNT_LEGACY_H_ + +struct kbase_hwcnt_legacy_client; +struct kbase_ioctl_hwcnt_enable; +struct kbase_hwcnt_virtualizer; + +/** + * kbase_hwcnt_legacy_client_create() - Create a legacy hardware counter client. + * @hvirt: Non-NULL pointer to hardware counter virtualizer the client + * should be attached to. + * @enable: Non-NULL pointer to hwcnt_enable structure, containing a valid + * pointer to a user dump buffer large enough to hold a dump, and + * the counters that should be enabled. + * @out_hlcli: Non-NULL pointer to where the pointer to the created client will + * be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_legacy_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_ioctl_hwcnt_enable *enable, + struct kbase_hwcnt_legacy_client **out_hlcli); + +/** + * kbase_hwcnt_legacy_client_destroy() - Destroy a legacy hardware counter + * client. + * @hlcli: Pointer to the legacy hardware counter client. + * + * Will safely destroy a client in any partial state of construction. + */ +void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli); + +/** + * kbase_hwcnt_legacy_client_dump() - Perform a hardware counter dump into the + * client's user buffer. + * @hlcli: Non-NULL pointer to the legacy hardware counter client. + * + * This function will synchronously dump hardware counters into the user buffer + * specified on client creation, with the counters specified on client creation. + * + * The counters are automatically cleared after each dump, such that the next + * dump performed will return the counter values accumulated between the time of + * this function call and the next dump. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli); + +/** + * kbase_hwcnt_legacy_client_clear() - Perform and discard a hardware counter + * dump. + * @hlcli: Non-NULL pointer to the legacy hardware counter client. + * + * This function will synchronously clear the hardware counters, such that the + * next dump performed will return the counter values accumulated between the + * time of this function call and the next dump. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli); + +#endif /* _KBASE_HWCNT_LEGACY_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_reader.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_reader.h new file mode 100644 index 000000000000..8cd3835595f7 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_reader.h @@ -0,0 +1,106 @@ +/* + * + * (C) COPYRIGHT 2015, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_HWCNT_READER_H_ +#define _KBASE_HWCNT_READER_H_ + +#include + +/* The ids of ioctl commands. */ +#define KBASE_HWCNT_READER 0xBE +#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, u32) +#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32) +#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, u32) +#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, u32) +#define KBASE_HWCNT_READER_GET_BUFFER _IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20,\ + offsetof(struct kbase_hwcnt_reader_metadata, cycles)) +#define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES _IOR(KBASE_HWCNT_READER, 0x20,\ + struct kbase_hwcnt_reader_metadata) +#define KBASE_HWCNT_READER_PUT_BUFFER _IOC(_IOC_WRITE, KBASE_HWCNT_READER, 0x21,\ + offsetof(struct kbase_hwcnt_reader_metadata, cycles)) +#define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES _IOW(KBASE_HWCNT_READER, 0x21,\ + struct kbase_hwcnt_reader_metadata) +#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, u32) +#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, u32) +#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, u32) +#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32) +#define KBASE_HWCNT_READER_GET_API_VERSION_WITH_FEATURES \ + _IOW(KBASE_HWCNT_READER, 0xFF, \ + struct kbase_hwcnt_reader_api_version) + +/** + * struct kbase_hwcnt_reader_metadata_cycles - GPU clock cycles + * @top: the number of cycles associated with the main clock for the + * GPU + * @shader_cores: the cycles that have elapsed on the GPU shader cores + */ +struct kbase_hwcnt_reader_metadata_cycles { + u64 top; + u64 shader_cores; +}; + +/** + * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata + * @timestamp: time when sample was collected + * @event_id: id of an event that triggered sample collection + * @buffer_idx: position in sampling area where sample buffer was stored + * @cycles: the GPU cycles that occurred since the last sample + */ +struct kbase_hwcnt_reader_metadata { + u64 timestamp; + u32 event_id; + u32 buffer_idx; + struct kbase_hwcnt_reader_metadata_cycles cycles; +}; + +/** + * enum base_hwcnt_reader_event - hwcnt dumping events + * @BASE_HWCNT_READER_EVENT_MANUAL: manual request for dump + * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump + * @BASE_HWCNT_READER_EVENT_PREJOB: prejob dump request + * @BASE_HWCNT_READER_EVENT_POSTJOB: postjob dump request + * @BASE_HWCNT_READER_EVENT_COUNT: number of supported events + */ +enum base_hwcnt_reader_event { + BASE_HWCNT_READER_EVENT_MANUAL, + BASE_HWCNT_READER_EVENT_PERIODIC, + BASE_HWCNT_READER_EVENT_PREJOB, + BASE_HWCNT_READER_EVENT_POSTJOB, + + BASE_HWCNT_READER_EVENT_COUNT +}; + +/** + * struct kbase_hwcnt_reader_api_version - hwcnt reader API version + * @versoin: API version + * @features: available features in this API version + */ +#define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE (0) +#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP (1 << 0) +#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1) +struct kbase_hwcnt_reader_api_version { + u32 version; + u32 features; +}; + +#endif /* _KBASE_HWCNT_READER_H_ */ + diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_types.c b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_types.c new file mode 100644 index 000000000000..73ea6098397b --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_types.c @@ -0,0 +1,604 @@ +/* + * + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase.h" + +/* Minimum alignment of each block of hardware counters */ +#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT \ + (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) + +/** + * KBASE_HWCNT_ALIGN_UPWARDS() - Align a value to an alignment. + * @value: The value to align upwards. + * @alignment: The alignment. + * + * Return: A number greater than or equal to value that is aligned to alignment. + */ +#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ + (value + ((alignment - (value % alignment)) % alignment)) + +int kbase_hwcnt_metadata_create( + const struct kbase_hwcnt_description *desc, + const struct kbase_hwcnt_metadata **out_metadata) +{ + char *buf; + struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_group_metadata *grp_mds; + size_t grp; + size_t enable_map_count; /* Number of u64 bitfields (inc padding) */ + size_t dump_buf_count; /* Number of u32 values (inc padding) */ + size_t avail_mask_bits; /* Number of availability mask bits */ + + size_t size; + size_t offset; + + if (!desc || !out_metadata) + return -EINVAL; + + /* The maximum number of clock domains is 64. */ + if (desc->clk_cnt > (sizeof(u64) * BITS_PER_BYTE)) + return -EINVAL; + + /* Calculate the bytes needed to tightly pack the metadata */ + + /* Top level metadata */ + size = 0; + size += sizeof(struct kbase_hwcnt_metadata); + + /* Group metadata */ + size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; + + /* Block metadata */ + for (grp = 0; grp < desc->grp_cnt; grp++) { + size += sizeof(struct kbase_hwcnt_block_metadata) * + desc->grps[grp].blk_cnt; + } + + /* Single allocation for the entire metadata */ + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Use the allocated memory for the metadata and its members */ + + /* Bump allocate the top level metadata */ + offset = 0; + metadata = (struct kbase_hwcnt_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_metadata); + + /* Bump allocate the group metadata */ + grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; + + enable_map_count = 0; + dump_buf_count = 0; + avail_mask_bits = 0; + + for (grp = 0; grp < desc->grp_cnt; grp++) { + size_t blk; + + const struct kbase_hwcnt_group_description *grp_desc = + desc->grps + grp; + struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp; + + size_t group_enable_map_count = 0; + size_t group_dump_buffer_count = 0; + size_t group_avail_mask_bits = 0; + + /* Bump allocate this group's block metadata */ + struct kbase_hwcnt_block_metadata *blk_mds = + (struct kbase_hwcnt_block_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_block_metadata) * + grp_desc->blk_cnt; + + /* Fill in each block in the group's information */ + for (blk = 0; blk < grp_desc->blk_cnt; blk++) { + const struct kbase_hwcnt_block_description *blk_desc = + grp_desc->blks + blk; + struct kbase_hwcnt_block_metadata *blk_md = + blk_mds + blk; + const size_t n_values = + blk_desc->hdr_cnt + blk_desc->ctr_cnt; + + blk_md->type = blk_desc->type; + blk_md->inst_cnt = blk_desc->inst_cnt; + blk_md->hdr_cnt = blk_desc->hdr_cnt; + blk_md->ctr_cnt = blk_desc->ctr_cnt; + blk_md->enable_map_index = group_enable_map_count; + blk_md->enable_map_stride = + kbase_hwcnt_bitfield_count(n_values); + blk_md->dump_buf_index = group_dump_buffer_count; + blk_md->dump_buf_stride = + KBASE_HWCNT_ALIGN_UPWARDS( + n_values, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES)); + blk_md->avail_mask_index = group_avail_mask_bits; + + group_enable_map_count += + blk_md->enable_map_stride * blk_md->inst_cnt; + group_dump_buffer_count += + blk_md->dump_buf_stride * blk_md->inst_cnt; + group_avail_mask_bits += blk_md->inst_cnt; + } + + /* Fill in the group's information */ + grp_md->type = grp_desc->type; + grp_md->blk_cnt = grp_desc->blk_cnt; + grp_md->blk_metadata = blk_mds; + grp_md->enable_map_index = enable_map_count; + grp_md->dump_buf_index = dump_buf_count; + grp_md->avail_mask_index = avail_mask_bits; + + enable_map_count += group_enable_map_count; + dump_buf_count += group_dump_buffer_count; + avail_mask_bits += group_avail_mask_bits; + } + + /* Fill in the top level metadata's information */ + metadata->grp_cnt = desc->grp_cnt; + metadata->grp_metadata = grp_mds; + metadata->enable_map_bytes = + enable_map_count * KBASE_HWCNT_BITFIELD_BYTES; + metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES; + metadata->avail_mask = desc->avail_mask; + metadata->clk_cnt = desc->clk_cnt; + + WARN_ON(size != offset); + /* Due to the block alignment, there should be exactly one enable map + * bit per 4 bytes in the dump buffer. + */ + WARN_ON(metadata->dump_buf_bytes != + (metadata->enable_map_bytes * + BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES)); + + *out_metadata = metadata; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_create); + +void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) +{ + kfree(metadata); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_destroy); + +int kbase_hwcnt_enable_map_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_enable_map *enable_map) +{ + u64 *enable_map_buf; + + if (!metadata || !enable_map) + return -EINVAL; + + if (metadata->enable_map_bytes > 0) { + enable_map_buf = + kzalloc(metadata->enable_map_bytes, GFP_KERNEL); + if (!enable_map_buf) + return -ENOMEM; + } else { + enable_map_buf = NULL; + } + + enable_map->metadata = metadata; + enable_map->hwcnt_enable_map = enable_map_buf; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_alloc); + +void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map) +{ + if (!enable_map) + return; + + kfree(enable_map->hwcnt_enable_map); + enable_map->hwcnt_enable_map = NULL; + enable_map->metadata = NULL; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_free); + +int kbase_hwcnt_dump_buffer_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + size_t dump_buf_bytes; + size_t clk_cnt_buf_bytes; + u8 *buf; + + if (!metadata || !dump_buf) + return -EINVAL; + + dump_buf_bytes = metadata->dump_buf_bytes; + clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt; + + /* Make a single allocation for both dump_buf and clk_cnt_buf. */ + buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + dump_buf->metadata = metadata; + dump_buf->dump_buf = (u32 *)buf; + dump_buf->clk_cnt_buf = (u64 *)(buf + dump_buf_bytes); + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_alloc); + +void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf) +{ + if (!dump_buf) + return; + + kfree(dump_buf->dump_buf); + memset(dump_buf, 0, sizeof(*dump_buf)); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_free); + +int kbase_hwcnt_dump_buffer_array_alloc( + const struct kbase_hwcnt_metadata *metadata, + size_t n, + struct kbase_hwcnt_dump_buffer_array *dump_bufs) +{ + struct kbase_hwcnt_dump_buffer *buffers; + size_t buf_idx; + unsigned int order; + unsigned long addr; + size_t dump_buf_bytes; + size_t clk_cnt_buf_bytes; + + if (!metadata || !dump_bufs) + return -EINVAL; + + dump_buf_bytes = metadata->dump_buf_bytes; + clk_cnt_buf_bytes = + sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt; + + /* Allocate memory for the dump buffer struct array */ + buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); + if (!buffers) + return -ENOMEM; + + /* Allocate pages for the actual dump buffers, as they tend to be fairly + * large. + */ + order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n); + addr = __get_free_pages(GFP_KERNEL, order); + + if (!addr) { + kfree(buffers); + return -ENOMEM; + } + + dump_bufs->page_addr = addr; + dump_bufs->page_order = order; + dump_bufs->buf_cnt = n; + dump_bufs->bufs = buffers; + + /* Set the buffer of each dump buf */ + for (buf_idx = 0; buf_idx < n; buf_idx++) { + const size_t dump_buf_offset = dump_buf_bytes * buf_idx; + const size_t clk_cnt_buf_offset = + (dump_buf_bytes * n) + (clk_cnt_buf_bytes * buf_idx); + + buffers[buf_idx].metadata = metadata; + buffers[buf_idx].dump_buf = (u32 *)(addr + dump_buf_offset); + buffers[buf_idx].clk_cnt_buf = + (u64 *)(addr + clk_cnt_buf_offset); + } + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_alloc); + +void kbase_hwcnt_dump_buffer_array_free( + struct kbase_hwcnt_dump_buffer_array *dump_bufs) +{ + if (!dump_bufs) + return; + + kfree(dump_bufs->bufs); + free_pages(dump_bufs->page_addr, dump_bufs->page_order); + memset(dump_bufs, 0, sizeof(*dump_bufs)); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_free); + +void kbase_hwcnt_dump_buffer_zero( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk; + size_t val_cnt; + + if (!kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + + kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); + } + + memset(dst->clk_cnt_buf, 0, + sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero); + +void kbase_hwcnt_dump_buffer_zero_strict( + struct kbase_hwcnt_dump_buffer *dst) +{ + if (WARN_ON(!dst)) + return; + + memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes); + + memset(dst->clk_cnt_buf, 0, + sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_strict); + +void kbase_hwcnt_dump_buffer_zero_non_enabled( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( + dst_enable_map, grp, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + + /* Align upwards to include padding bytes */ + val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES)); + + if (kbase_hwcnt_metadata_block_instance_avail( + metadata, grp, blk, blk_inst)) { + /* Block available, so only zero non-enabled values */ + kbase_hwcnt_dump_buffer_block_zero_non_enabled( + dst_blk, blk_em, val_cnt); + } else { + /* Block not available, so zero the entire thing */ + kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); + } + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_non_enabled); + +void kbase_hwcnt_dump_buffer_copy( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + size_t clk; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk; + const u32 *src_blk; + size_t val_cnt; + + if (!kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + + kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt); + } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + if (kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk)) + dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk]; + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy); + +void kbase_hwcnt_dump_buffer_copy_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + size_t clk; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( + dst_enable_map, grp, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + /* Align upwards to include padding bytes */ + val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES)); + + kbase_hwcnt_dump_buffer_block_copy_strict( + dst_blk, src_blk, blk_em, val_cnt); + } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + bool clk_enabled = + kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk); + + dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy_strict); + +void kbase_hwcnt_dump_buffer_accumulate( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + size_t clk; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk; + const u32 *src_blk; + size_t hdr_cnt; + size_t ctr_cnt; + + if (!kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + hdr_cnt = kbase_hwcnt_metadata_block_headers_count( + metadata, grp, blk); + ctr_cnt = kbase_hwcnt_metadata_block_counters_count( + metadata, grp, blk); + + kbase_hwcnt_dump_buffer_block_accumulate( + dst_blk, src_blk, hdr_cnt, ctr_cnt); + } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + if (kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk)) + dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate); + +void kbase_hwcnt_dump_buffer_accumulate_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + size_t clk; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( + dst_enable_map, grp, blk, blk_inst); + size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count( + metadata, grp, blk); + size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count( + metadata, grp, blk); + /* Align upwards to include padding bytes */ + ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(hdr_cnt + ctr_cnt, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES) - hdr_cnt); + + kbase_hwcnt_dump_buffer_block_accumulate_strict( + dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt); + } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + if (kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk)) + dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; + else + dst->clk_cnt_buf[clk] = 0; + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate_strict); diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_types.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_types.h new file mode 100644 index 000000000000..6a2640f88926 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_types.h @@ -0,0 +1,1142 @@ +/* + * + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter types. + * Contains structures for describing the physical layout of hardware counter + * dump buffers and enable maps within a system. + * + * Also contains helper functions for manipulation of these dump buffers and + * enable maps. + * + * Through use of these structures and functions, hardware counters can be + * enabled, copied, accumulated, and generally manipulated in a generic way, + * regardless of the physical counter dump layout. + * + * Terminology: + * + * Hardware Counter System: + * A collection of hardware counter groups, making a full hardware counter + * system. + * Hardware Counter Group: + * A group of Hardware Counter Blocks (e.g. a t62x might have more than one + * core group, so has one counter group per core group, where each group + * may have a different number and layout of counter blocks). + * Hardware Counter Block: + * A block of hardware counters (e.g. shader block, tiler block). + * Hardware Counter Block Instance: + * An instance of a Hardware Counter Block (e.g. an MP4 GPU might have + * 4 shader block instances). + * + * Block Header: + * A header value inside a counter block. Headers don't count anything, + * so it is only valid to copy or zero them. Headers are always the first + * values in the block. + * Block Counter: + * A counter value inside a counter block. Counters can be zeroed, copied, + * or accumulated. Counters are always immediately after the headers in the + * block. + * Block Value: + * A catch-all term for block headers and block counters. + * + * Enable Map: + * An array of u64 bitfields, where each bit either enables exactly one + * block value, or is unused (padding). + * Dump Buffer: + * An array of u32 values, where each u32 corresponds either to one block + * value, or is unused (padding). + * Availability Mask: + * A bitfield, where each bit corresponds to whether a block instance is + * physically available (e.g. an MP3 GPU may have a sparse core mask of + * 0b1011, meaning it only has 3 cores but for hardware counter dumps has the + * same dump buffer layout as an MP4 GPU with a core mask of 0b1111. In this + * case, the availability mask might be 0b1011111 (the exact layout will + * depend on the specific hardware architecture), with the 3 extra early bits + * corresponding to other block instances in the hardware counter system). + * Metadata: + * Structure describing the physical layout of the enable map and dump buffers + * for a specific hardware counter system. + * + */ + +#ifndef _KBASE_HWCNT_TYPES_H_ +#define _KBASE_HWCNT_TYPES_H_ + +#include +#include +#include +#include +#include "mali_malisw.h" + +/* Number of bytes in each bitfield */ +#define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64)) + +/* Number of bits in each bitfield */ +#define KBASE_HWCNT_BITFIELD_BITS (KBASE_HWCNT_BITFIELD_BYTES * BITS_PER_BYTE) + +/* Number of bytes for each counter value */ +#define KBASE_HWCNT_VALUE_BYTES (sizeof(u32)) + +/* Number of bits in an availability mask (i.e. max total number of block + * instances supported in a Hardware Counter System) + */ +#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE) + +/** + * struct kbase_hwcnt_block_description - Description of one or more identical, + * contiguous, Hardware Counter Blocks. + * @type: The arbitrary identifier used to identify the type of the block. + * @inst_cnt: The number of Instances of the block. + * @hdr_cnt: The number of 32-bit Block Headers in the block. + * @ctr_cnt: The number of 32-bit Block Counters in the block. + */ +struct kbase_hwcnt_block_description { + u64 type; + size_t inst_cnt; + size_t hdr_cnt; + size_t ctr_cnt; +}; + +/** + * struct kbase_hwcnt_group_description - Description of one or more identical, + * contiguous Hardware Counter Groups. + * @type: The arbitrary identifier used to identify the type of the group. + * @blk_cnt: The number of types of Hardware Counter Block in the group. + * @blks: Non-NULL pointer to an array of blk_cnt block descriptions, + * describing each type of Hardware Counter Block in the group. + */ +struct kbase_hwcnt_group_description { + u64 type; + size_t blk_cnt; + const struct kbase_hwcnt_block_description *blks; +}; + +/** + * struct kbase_hwcnt_description - Description of a Hardware Counter System. + * @grp_cnt: The number of Hardware Counter Groups. + * @grps: Non-NULL pointer to an array of grp_cnt group descriptions, + * describing each Hardware Counter Group in the system. + * @avail_mask: Flat Availability Mask for all block instances in the system. + * @clk_cnt: The number of clock domains in the system. The maximum is 64. + */ +struct kbase_hwcnt_description { + size_t grp_cnt; + const struct kbase_hwcnt_group_description *grps; + u64 avail_mask; + u8 clk_cnt; +}; + +/** + * struct kbase_hwcnt_block_metadata - Metadata describing the physical layout + * of a block in a Hardware Counter System's + * Dump Buffers and Enable Maps. + * @type: The arbitrary identifier used to identify the type of the + * block. + * @inst_cnt: The number of Instances of the block. + * @hdr_cnt: The number of 32-bit Block Headers in the block. + * @ctr_cnt: The number of 32-bit Block Counters in the block. + * @enable_map_index: Index in u64s into the parent's Enable Map where the + * Enable Map bitfields of the Block Instances described by + * this metadata start. + * @enable_map_stride: Stride in u64s between the Enable Maps of each of the + * Block Instances described by this metadata. + * @dump_buf_index: Index in u32s into the parent's Dump Buffer where the + * Dump Buffers of the Block Instances described by this + * metadata start. + * @dump_buf_stride: Stride in u32s between the Dump Buffers of each of the + * Block Instances described by this metadata. + * @avail_mask_index: Index in bits into the parent's Availability Mask where + * the Availability Masks of the Block Instances described + * by this metadata start. + */ +struct kbase_hwcnt_block_metadata { + u64 type; + size_t inst_cnt; + size_t hdr_cnt; + size_t ctr_cnt; + size_t enable_map_index; + size_t enable_map_stride; + size_t dump_buf_index; + size_t dump_buf_stride; + size_t avail_mask_index; +}; + +/** + * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout + * of a group of blocks in a Hardware + * Counter System's Dump Buffers and Enable + * Maps. + * @type: The arbitrary identifier used to identify the type of the + * group. + * @blk_cnt: The number of types of Hardware Counter Block in the + * group. + * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata, + * describing the physical layout of each type of Hardware + * Counter Block in the group. + * @enable_map_index: Index in u64s into the parent's Enable Map where the + * Enable Maps of the blocks within the group described by + * this metadata start. + * @dump_buf_index: Index in u32s into the parent's Dump Buffer where the + * Dump Buffers of the blocks within the group described by + * metadata start. + * @avail_mask_index: Index in bits into the parent's Availability Mask where + * the Availability Masks of the blocks within the group + * described by this metadata start. + */ +struct kbase_hwcnt_group_metadata { + u64 type; + size_t blk_cnt; + const struct kbase_hwcnt_block_metadata *blk_metadata; + size_t enable_map_index; + size_t dump_buf_index; + size_t avail_mask_index; +}; + +/** + * struct kbase_hwcnt_metadata - Metadata describing the physical layout + * of Dump Buffers and Enable Maps within a + * Hardware Counter System. + * @grp_cnt: The number of Hardware Counter Groups. + * @grp_metadata: Non-NULL pointer to an array of grp_cnt group metadata, + * describing the physical layout of each Hardware Counter + * Group in the system. + * @enable_map_bytes: The size in bytes of an Enable Map needed for the system. + * @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system. + * @avail_mask: The Availability Mask for the system. + * @clk_cnt: The number of clock domains in the system. + */ +struct kbase_hwcnt_metadata { + size_t grp_cnt; + const struct kbase_hwcnt_group_metadata *grp_metadata; + size_t enable_map_bytes; + size_t dump_buf_bytes; + u64 avail_mask; + u8 clk_cnt; +}; + +/** + * struct kbase_hwcnt_enable_map - Hardware Counter Enable Map. Array of u64 + * bitfields. + * @metadata: Non-NULL pointer to metadata used to identify, and to describe + * the layout of the enable map. + * @hwcnt_enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an + * array of u64 bitfields, each bit of which enables one hardware + * counter. + * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle + * counter for a given clock domain. + */ +struct kbase_hwcnt_enable_map { + const struct kbase_hwcnt_metadata *metadata; + u64 *hwcnt_enable_map; + u64 clk_enable_map; +}; + +/** + * struct kbase_hwcnt_dump_buffer - Hardware Counter Dump Buffer. Array of u32 + * values. + * @metadata: Non-NULL pointer to metadata used to identify, and to describe + * the layout of the Dump Buffer. + * @dump_buf: Non-NULL pointer of size metadata->dump_buf_bytes to an array + * of u32 values. + * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed + * for each clock domain. + */ +struct kbase_hwcnt_dump_buffer { + const struct kbase_hwcnt_metadata *metadata; + u32 *dump_buf; + u64 *clk_cnt_buf; +}; + +/** + * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array. + * @page_addr: Address of allocated pages. A single allocation is used for all + * Dump Buffers in the array. + * @page_order: The allocation order of the pages. + * @buf_cnt: The number of allocated Dump Buffers. + * @bufs: Non-NULL pointer to the array of Dump Buffers. + */ +struct kbase_hwcnt_dump_buffer_array { + unsigned long page_addr; + unsigned int page_order; + size_t buf_cnt; + struct kbase_hwcnt_dump_buffer *bufs; +}; + +/** + * kbase_hwcnt_metadata_create() - Create a hardware counter metadata object + * from a description. + * @desc: Non-NULL pointer to a hardware counter description. + * @metadata: Non-NULL pointer to where created metadata will be stored on + * success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_metadata_create( + const struct kbase_hwcnt_description *desc, + const struct kbase_hwcnt_metadata **metadata); + +/** + * kbase_hwcnt_metadata_destroy() - Destroy a hardware counter metadata object. + * @metadata: Pointer to hardware counter metadata + */ +void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); + +/** + * kbase_hwcnt_metadata_group_count() - Get the number of groups. + * @metadata: Non-NULL pointer to metadata. + * + * Return: Number of hardware counter groups described by metadata. + */ +#define kbase_hwcnt_metadata_group_count(metadata) \ + ((metadata)->grp_cnt) + +/** + * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * + * Return: Type of the group grp. + */ +#define kbase_hwcnt_metadata_group_type(metadata, grp) \ + ((metadata)->grp_metadata[(grp)].type) + +/** + * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * + * Return: Number of blocks in group grp. + */ +#define kbase_hwcnt_metadata_block_count(metadata, grp) \ + ((metadata)->grp_metadata[(grp)].blk_cnt) + +/** + * kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Type of the block blk in group grp. + */ +#define kbase_hwcnt_metadata_block_type(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].type) + +/** + * kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of + * a block. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of instances of block blk in group grp. + */ +#define kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].inst_cnt) + +/** + * kbase_hwcnt_metadata_block_headers_count() - Get the number of counter + * headers. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of u32 counter headers in each instance of block blk in + * group grp. + */ +#define kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].hdr_cnt) + +/** + * kbase_hwcnt_metadata_block_counters_count() - Get the number of counters. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of u32 counters in each instance of block blk in group + * grp. + */ +#define kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].ctr_cnt) + +/** + * kbase_hwcnt_metadata_block_values_count() - Get the number of values. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of u32 headers plus counters in each instance of block blk + * in group grp. + */ +#define kbase_hwcnt_metadata_block_values_count(metadata, grp, blk) \ + (kbase_hwcnt_metadata_block_counters_count((metadata), (grp), (blk)) \ + + kbase_hwcnt_metadata_block_headers_count((metadata), (grp), (blk))) + +/** + * kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in + * the metadata. + * @md: Non-NULL pointer to metadata. + * @grp: size_t variable used as group iterator. + * @blk: size_t variable used as block iterator. + * @blk_inst: size_t variable used as block instance iterator. + * + * Iteration order is group, then block, then block instance (i.e. linearly + * through memory). + */ +#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \ + for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \ + for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \ + for ((blk_inst) = 0; (blk_inst) < kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); (blk_inst)++) + +/** + * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail + * mask corresponding to the block. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: The bit index into the avail mask for the block. + */ +static inline size_t kbase_hwcnt_metadata_block_avail_bit( + const struct kbase_hwcnt_metadata *metadata, + size_t grp, + size_t blk) +{ + const size_t bit = + metadata->grp_metadata[grp].avail_mask_index + + metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index; + + return bit; +} + +/** + * kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is + * available. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: true if the block instance is available, else false. + */ +static inline bool kbase_hwcnt_metadata_block_instance_avail( + const struct kbase_hwcnt_metadata *metadata, + size_t grp, + size_t blk, + size_t blk_inst) +{ + const size_t bit = kbase_hwcnt_metadata_block_avail_bit( + metadata, grp, blk) + blk_inst; + const u64 mask = 1ull << bit; + + return (metadata->avail_mask & mask) != 0; +} + +/** + * kbase_hwcnt_enable_map_alloc() - Allocate an enable map. + * @metadata: Non-NULL pointer to metadata describing the system. + * @enable_map: Non-NULL pointer to enable map to be initialised. Will be + * initialised to all zeroes (i.e. all counters disabled). + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_enable_map_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_enable_map *enable_map); + +/** + * kbase_hwcnt_enable_map_free() - Free an enable map. + * @enable_map: Enable map to be freed. + * + * Can be safely called on an all-zeroed enable map structure, or on an already + * freed enable map. + */ +void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map); + +/** + * kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block + * instance's enable map. + * @map: Non-NULL pointer to (const) enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: (const) u64* to the bitfield(s) used as the enable map for the + * block instance. + */ +#define kbase_hwcnt_enable_map_block_instance(map, grp, blk, blk_inst) \ + ((map)->hwcnt_enable_map + \ + (map)->metadata->grp_metadata[(grp)].enable_map_index + \ + (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_index + \ + (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride * (blk_inst)) + +/** + * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required + * to have at minimum one bit per value. + * @val_cnt: Number of values. + * + * Return: Number of required bitfields. + */ +static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt) +{ + return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) / + KBASE_HWCNT_BITFIELD_BITS; +} + +/** + * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block. + * @dst: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + */ +static inline void kbase_hwcnt_enable_map_block_disable_all( + struct kbase_hwcnt_enable_map *dst, + size_t grp, + size_t blk, + size_t blk_inst) +{ + const size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + dst->metadata, grp, blk); + const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); + u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance( + dst, grp, blk, blk_inst); + + memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES); +} + +/** + * kbase_hwcnt_enable_map_disable_all() - Disable all values in the enable map. + * @dst: Non-NULL pointer to enable map to zero. + */ +static inline void kbase_hwcnt_enable_map_disable_all( + struct kbase_hwcnt_enable_map *dst) +{ + if (dst->hwcnt_enable_map != NULL) + memset(dst->hwcnt_enable_map, 0, + dst->metadata->enable_map_bytes); + + dst->clk_enable_map = 0; +} + +/** + * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block. + * @dst: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + */ +static inline void kbase_hwcnt_enable_map_block_enable_all( + struct kbase_hwcnt_enable_map *dst, + size_t grp, + size_t blk, + size_t blk_inst) +{ + const size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + dst->metadata, grp, blk); + const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); + u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance( + dst, grp, blk, blk_inst); + + size_t bitfld_idx; + + for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { + const u64 remaining_values = val_cnt - + (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); + u64 block_enable_map_mask = U64_MAX; + + if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) + block_enable_map_mask = (1ull << remaining_values) - 1; + + block_enable_map[bitfld_idx] = block_enable_map_mask; + } +} + +/** + * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in an enable + * map. + * @dst: Non-NULL pointer to enable map. + */ +static inline void kbase_hwcnt_enable_map_enable_all( + struct kbase_hwcnt_enable_map *dst) +{ + size_t grp, blk, blk_inst; + + kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst) + kbase_hwcnt_enable_map_block_enable_all( + dst, grp, blk, blk_inst); + + dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1; +} + +/** + * kbase_hwcnt_enable_map_copy() - Copy an enable map to another. + * @dst: Non-NULL pointer to destination enable map. + * @src: Non-NULL pointer to source enable map. + * + * The dst and src MUST have been created from the same metadata. + */ +static inline void kbase_hwcnt_enable_map_copy( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) +{ + if (dst->hwcnt_enable_map != NULL) { + memcpy(dst->hwcnt_enable_map, + src->hwcnt_enable_map, + dst->metadata->enable_map_bytes); + } + + dst->clk_enable_map = src->clk_enable_map; +} + +/** + * kbase_hwcnt_enable_map_union() - Union dst and src enable maps into dst. + * @dst: Non-NULL pointer to destination enable map. + * @src: Non-NULL pointer to source enable map. + * + * The dst and src MUST have been created from the same metadata. + */ +static inline void kbase_hwcnt_enable_map_union( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) +{ + const size_t bitfld_count = + dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES; + size_t i; + + if (dst->hwcnt_enable_map != NULL) { + for (i = 0; i < bitfld_count; i++) + dst->hwcnt_enable_map[i] |= src->hwcnt_enable_map[i]; + } + + dst->clk_enable_map |= src->clk_enable_map; +} + +/** + * kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block + * instance are enabled. + * @enable_map: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: true if any values in the block are enabled, else false. + */ +static inline bool kbase_hwcnt_enable_map_block_enabled( + const struct kbase_hwcnt_enable_map *enable_map, + size_t grp, + size_t blk, + size_t blk_inst) +{ + bool any_enabled = false; + const size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + enable_map->metadata, grp, blk); + const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); + const u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance( + enable_map, grp, blk, blk_inst); + + size_t bitfld_idx; + + for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { + const u64 remaining_values = val_cnt - + (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); + u64 block_enable_map_mask = U64_MAX; + + if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) + block_enable_map_mask = (1ull << remaining_values) - 1; + + any_enabled = any_enabled || + (block_enable_map[bitfld_idx] & block_enable_map_mask); + } + + return any_enabled; +} + +/** + * kbase_hwcnt_enable_map_any_enabled() - Check if any values are enabled. + * @enable_map: Non-NULL pointer to enable map. + * + * Return: true if any values are enabled, else false. + */ +static inline bool kbase_hwcnt_enable_map_any_enabled( + const struct kbase_hwcnt_enable_map *enable_map) +{ + size_t grp, blk, blk_inst; + const u64 clk_enable_map_mask = + (1ull << enable_map->metadata->clk_cnt) - 1; + + if (enable_map->metadata->clk_cnt > 0 && + (enable_map->clk_enable_map & clk_enable_map_mask)) + return true; + + kbase_hwcnt_metadata_for_each_block( + enable_map->metadata, grp, blk, blk_inst) { + if (kbase_hwcnt_enable_map_block_enabled( + enable_map, grp, blk, blk_inst)) + return true; + } + + return false; +} + +/** + * kbase_hwcnt_enable_map_block_value_enabled() - Check if a value in a block + * instance is enabled. + * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_idx: Index of the value to check in the block instance. + * + * Return: true if the value was enabled, else false. + */ +static inline bool kbase_hwcnt_enable_map_block_value_enabled( + const u64 *bitfld, + size_t val_idx) +{ + const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; + const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; + const u64 mask = 1ull << bit; + + return (bitfld[idx] & mask) != 0; +} + +/** + * kbase_hwcnt_enable_map_block_enable_value() - Enable a value in a block + * instance. + * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_idx: Index of the value to enable in the block instance. + */ +static inline void kbase_hwcnt_enable_map_block_enable_value( + u64 *bitfld, + size_t val_idx) +{ + const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; + const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; + const u64 mask = 1ull << bit; + + bitfld[idx] |= mask; +} + +/** + * kbase_hwcnt_enable_map_block_disable_value() - Disable a value in a block + * instance. + * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_idx: Index of the value to disable in the block instance. + */ +static inline void kbase_hwcnt_enable_map_block_disable_value( + u64 *bitfld, + size_t val_idx) +{ + const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; + const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; + const u64 mask = 1ull << bit; + + bitfld[idx] &= ~mask; +} + +/** + * kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer. + * @metadata: Non-NULL pointer to metadata describing the system. + * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be + * initialised to undefined values, so must be used as a copy dest, + * or cleared before use. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_dump_buffer_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_dump_buffer_free() - Free a dump buffer. + * @dump_buf: Dump buffer to be freed. + * + * Can be safely called on an all-zeroed dump buffer structure, or on an already + * freed dump buffer. + */ +void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers. + * @metadata: Non-NULL pointer to metadata describing the system. + * @n: Number of dump buffers to allocate + * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. Each + * dump buffer in the array will be initialised to undefined values, + * so must be used as a copy dest, or cleared before use. + * + * A single contiguous page allocation will be used for all of the buffers + * inside the array, where: + * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_dump_buffer_array_alloc( + const struct kbase_hwcnt_metadata *metadata, + size_t n, + struct kbase_hwcnt_dump_buffer_array *dump_bufs); + +/** + * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array. + * @dump_bufs: Dump buffer array to be freed. + * + * Can be safely called on an all-zeroed dump buffer array structure, or on an + * already freed dump buffer array. + */ +void kbase_hwcnt_dump_buffer_array_free( + struct kbase_hwcnt_dump_buffer_array *dump_bufs); + +/** + * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block + * instance's dump buffer. + * @buf: Non-NULL pointer to (const) dump buffer. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: (const) u32* to the dump buffer for the block instance. + */ +#define kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst) \ + ((buf)->dump_buf + \ + (buf)->metadata->grp_metadata[(grp)].dump_buf_index + \ + (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_index + \ + (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_stride * (blk_inst)) + +/** + * kbase_hwcnt_dump_buffer_zero() - Zero all enabled values in dst. + * After the operation, all non-enabled values + * will be undefined. + * @dst: Non-NULL pointer to dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst and dst_enable_map MUST have been created from the same metadata. + */ +void kbase_hwcnt_dump_buffer_zero( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_zero() - Zero all values in a block. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @val_cnt: Number of values in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_zero( + u32 *dst_blk, + size_t val_cnt) +{ + memset(dst_blk, 0, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); +} + +/** + * kbase_hwcnt_dump_buffer_zero_strict() - Zero all values in dst. + * After the operation, all values + * (including padding bytes) will be + * zero. + * Slower than the non-strict variant. + * @dst: Non-NULL pointer to dump buffer. + */ +void kbase_hwcnt_dump_buffer_zero_strict( + struct kbase_hwcnt_dump_buffer *dst); + +/** + * kbase_hwcnt_dump_buffer_zero_non_enabled() - Zero all non-enabled values in + * dst (including padding bytes and + * unavailable blocks). + * After the operation, all enabled + * values will be unchanged. + * @dst: Non-NULL pointer to dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst and dst_enable_map MUST have been created from the same metadata. + */ +void kbase_hwcnt_dump_buffer_zero_non_enabled( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_zero_non_enabled() - Zero all non-enabled + * values in a block. + * After the operation, all + * enabled values will be + * unchanged. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_cnt: Number of values in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled( + u32 *dst_blk, + const u64 *blk_em, + size_t val_cnt) +{ + size_t val; + + for (val = 0; val < val_cnt; val++) { + if (!kbase_hwcnt_enable_map_block_value_enabled(blk_em, val)) + dst_blk[val] = 0; + } +} + +/** + * kbase_hwcnt_dump_buffer_copy() - Copy all enabled values from src to dst. + * After the operation, all non-enabled values + * will be undefined. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_copy( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_copy() - Copy all block values from src to dst. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @val_cnt: Number of values in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_copy( + u32 *dst_blk, + const u32 *src_blk, + size_t val_cnt) +{ + /* Copy all the counters in the block instance. + * Values of non-enabled counters are undefined. + */ + memcpy(dst_blk, src_blk, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); +} + +/** + * kbase_hwcnt_dump_buffer_copy_strict() - Copy all enabled values from src to + * dst. + * After the operation, all non-enabled + * values (including padding bytes) will + * be zero. + * Slower than the non-strict variant. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_copy_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_copy_strict() - Copy all enabled block values + * from src to dst. + * After the operation, all + * non-enabled values will be + * zero. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_cnt: Number of values in the block. + * + * After the copy, any disabled values in dst will be zero. + */ +static inline void kbase_hwcnt_dump_buffer_block_copy_strict( + u32 *dst_blk, + const u32 *src_blk, + const u64 *blk_em, + size_t val_cnt) +{ + size_t val; + + for (val = 0; val < val_cnt; val++) { + bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled( + blk_em, val); + + dst_blk[val] = val_enabled ? src_blk[val] : 0; + } +} + +/** + * kbase_hwcnt_dump_buffer_accumulate() - Copy all enabled headers and + * accumulate all enabled counters from + * src to dst. + * After the operation, all non-enabled + * values will be undefined. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_accumulate( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_accumulate() - Copy all block headers and + * accumulate all block counters + * from src to dst. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @hdr_cnt: Number of headers in the block. + * @ctr_cnt: Number of counters in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_accumulate( + u32 *dst_blk, + const u32 *src_blk, + size_t hdr_cnt, + size_t ctr_cnt) +{ + size_t ctr; + /* Copy all the headers in the block instance. + * Values of non-enabled headers are undefined. + */ + memcpy(dst_blk, src_blk, hdr_cnt * KBASE_HWCNT_VALUE_BYTES); + + /* Accumulate all the counters in the block instance. + * Values of non-enabled counters are undefined. + */ + for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) { + u32 *dst_ctr = dst_blk + ctr; + const u32 *src_ctr = src_blk + ctr; + + const u32 src_counter = *src_ctr; + const u32 dst_counter = *dst_ctr; + + /* Saturating add */ + u32 accumulated = src_counter + dst_counter; + + if (accumulated < src_counter) + accumulated = U32_MAX; + + *dst_ctr = accumulated; + } +} + +/** + * kbase_hwcnt_dump_buffer_accumulate_strict() - Copy all enabled headers and + * accumulate all enabled counters + * from src to dst. + * After the operation, all + * non-enabled values (including + * padding bytes) will be zero. + * Slower than the non-strict + * variant. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_accumulate_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_accumulate_strict() - Copy all enabled block + * headers and accumulate + * all block counters from + * src to dst. + * After the operation, all + * non-enabled values will + * be zero. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @hdr_cnt: Number of headers in the block. + * @ctr_cnt: Number of counters in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict( + u32 *dst_blk, + const u32 *src_blk, + const u64 *blk_em, + size_t hdr_cnt, + size_t ctr_cnt) +{ + size_t ctr; + + kbase_hwcnt_dump_buffer_block_copy_strict( + dst_blk, src_blk, blk_em, hdr_cnt); + + for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) { + bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled( + blk_em, ctr); + + u32 *dst_ctr = dst_blk + ctr; + const u32 *src_ctr = src_blk + ctr; + + const u32 src_counter = *src_ctr; + const u32 dst_counter = *dst_ctr; + + /* Saturating add */ + u32 accumulated = src_counter + dst_counter; + + if (accumulated < src_counter) + accumulated = U32_MAX; + + *dst_ctr = ctr_enabled ? accumulated : 0; + } +} + +/** + * @brief Iterate over each clock domain in the metadata. + * + * @param[in] md Non-NULL pointer to metadata. + * @param[in] clk size_t variable used as clock iterator. + */ +#define kbase_hwcnt_metadata_for_each_clock(md, clk) \ + for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++) + +/** + * kbase_hwcnt_clk_enable_map_enabled() - Check if the given index is enabled + * in clk_enable_map. + * @clk_enable_map: An enable map for clock domains. + * @index: Index of the enable map for clock domain. + * + * Return: true if the index of the clock domain is enabled, else false. + */ +static inline bool kbase_hwcnt_clk_enable_map_enabled( + const u64 clk_enable_map, const size_t index) +{ + if (clk_enable_map & (1ull << index)) + return true; + return false; +} + +#endif /* _KBASE_HWCNT_TYPES_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_virtualizer.c b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_virtualizer.c new file mode 100644 index 000000000000..917e47cda0f9 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_virtualizer.c @@ -0,0 +1,790 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_accumulator.h" +#include "mali_kbase_hwcnt_context.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_malisw.h" +#include "mali_kbase_debug.h" +#include "mali_kbase_linux.h" + +#include +#include + +/** + * struct kbase_hwcnt_virtualizer - Hardware counter virtualizer structure. + * @hctx: Hardware counter context being virtualized. + * @dump_threshold_ns: Minimum threshold period for dumps between different + * clients where a new accumulator dump will not be + * performed, and instead accumulated values will be used. + * If 0, rate limiting is disabled. + * @metadata: Hardware counter metadata. + * @lock: Lock acquired at all entrypoints, to protect mutable + * state. + * @client_count: Current number of virtualizer clients. + * @clients: List of virtualizer clients. + * @accum: Hardware counter accumulator. NULL if no clients. + * @scratch_map: Enable map used as scratch space during counter changes. + * @scratch_buf: Dump buffer used as scratch space during dumps. + * @ts_last_dump_ns: End time of most recent dump across all clients. + */ +struct kbase_hwcnt_virtualizer { + struct kbase_hwcnt_context *hctx; + u64 dump_threshold_ns; + const struct kbase_hwcnt_metadata *metadata; + struct mutex lock; + size_t client_count; + struct list_head clients; + struct kbase_hwcnt_accumulator *accum; + struct kbase_hwcnt_enable_map scratch_map; + struct kbase_hwcnt_dump_buffer scratch_buf; + u64 ts_last_dump_ns; +}; + +/** + * struct kbase_hwcnt_virtualizer_client - Virtualizer client structure. + * @node: List node used for virtualizer client list. + * @hvirt: Hardware counter virtualizer. + * @enable_map: Enable map with client's current enabled counters. + * @accum_buf: Dump buffer with client's current accumulated counters. + * @has_accum: True if accum_buf contains any accumulated counters. + * @ts_start_ns: Counter collection start time of current dump. + */ +struct kbase_hwcnt_virtualizer_client { + struct list_head node; + struct kbase_hwcnt_virtualizer *hvirt; + struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer accum_buf; + bool has_accum; + u64 ts_start_ns; +}; + +const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( + struct kbase_hwcnt_virtualizer *hvirt) +{ + if (!hvirt) + return NULL; + + return hvirt->metadata; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_metadata); + +/** + * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory. + * @hvcli: Pointer to virtualizer client. + * + * Will safely free a client in any partial state of construction. + */ +static void kbasep_hwcnt_virtualizer_client_free( + struct kbase_hwcnt_virtualizer_client *hvcli) +{ + if (!hvcli) + return; + + kbase_hwcnt_dump_buffer_free(&hvcli->accum_buf); + kbase_hwcnt_enable_map_free(&hvcli->enable_map); + kfree(hvcli); +} + +/** + * kbasep_hwcnt_virtualizer_client_alloc - Allocate memory for a virtualizer + * client. + * @metadata: Non-NULL pointer to counter metadata. + * @out_hvcli: Non-NULL pointer to where created client will be stored on + * success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_virtualizer_client_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_virtualizer_client **out_hvcli) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *hvcli = NULL; + + WARN_ON(!metadata); + WARN_ON(!out_hvcli); + + hvcli = kzalloc(sizeof(*hvcli), GFP_KERNEL); + if (!hvcli) + return -ENOMEM; + + errcode = kbase_hwcnt_enable_map_alloc(metadata, &hvcli->enable_map); + if (errcode) + goto error; + + errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hvcli->accum_buf); + if (errcode) + goto error; + + *out_hvcli = hvcli; + return 0; +error: + kbasep_hwcnt_virtualizer_client_free(hvcli); + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_accumulate - Accumulate a dump buffer into a + * client's accumulation buffer. + * @hvcli: Non-NULL pointer to virtualizer client. + * @dump_buf: Non-NULL pointer to dump buffer to accumulate from. + */ +static void kbasep_hwcnt_virtualizer_client_accumulate( + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_dump_buffer *dump_buf) +{ + WARN_ON(!hvcli); + WARN_ON(!dump_buf); + lockdep_assert_held(&hvcli->hvirt->lock); + + if (hvcli->has_accum) { + /* If already some accumulation, accumulate */ + kbase_hwcnt_dump_buffer_accumulate( + &hvcli->accum_buf, dump_buf, &hvcli->enable_map); + } else { + /* If no accumulation, copy */ + kbase_hwcnt_dump_buffer_copy( + &hvcli->accum_buf, dump_buf, &hvcli->enable_map); + } + hvcli->has_accum = true; +} + +/** + * kbasep_hwcnt_virtualizer_accumulator_term - Terminate the hardware counter + * accumulator after final client + * removal. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * + * Will safely terminate the accumulator in any partial state of initialisation. + */ +static void kbasep_hwcnt_virtualizer_accumulator_term( + struct kbase_hwcnt_virtualizer *hvirt) +{ + WARN_ON(!hvirt); + lockdep_assert_held(&hvirt->lock); + WARN_ON(hvirt->client_count); + + kbase_hwcnt_dump_buffer_free(&hvirt->scratch_buf); + kbase_hwcnt_enable_map_free(&hvirt->scratch_map); + kbase_hwcnt_accumulator_release(hvirt->accum); + hvirt->accum = NULL; +} + +/** + * kbasep_hwcnt_virtualizer_accumulator_init - Initialise the hardware counter + * accumulator before first client + * addition. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_virtualizer_accumulator_init( + struct kbase_hwcnt_virtualizer *hvirt) +{ + int errcode; + + WARN_ON(!hvirt); + lockdep_assert_held(&hvirt->lock); + WARN_ON(hvirt->client_count); + WARN_ON(hvirt->accum); + + errcode = kbase_hwcnt_accumulator_acquire( + hvirt->hctx, &hvirt->accum); + if (errcode) + goto error; + + errcode = kbase_hwcnt_enable_map_alloc( + hvirt->metadata, &hvirt->scratch_map); + if (errcode) + goto error; + + errcode = kbase_hwcnt_dump_buffer_alloc( + hvirt->metadata, &hvirt->scratch_buf); + if (errcode) + goto error; + + return 0; +error: + kbasep_hwcnt_virtualizer_accumulator_term(hvirt); + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_add - Add a newly allocated client to the + * virtualizer. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client to add. + * @enable_map: Non-NULL pointer to client's initial enable map. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_virtualizer_client_add( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map) +{ + int errcode = 0; + u64 ts_start_ns; + u64 ts_end_ns; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!enable_map); + lockdep_assert_held(&hvirt->lock); + + if (hvirt->client_count == 0) + /* First client added, so initialise the accumulator */ + errcode = kbasep_hwcnt_virtualizer_accumulator_init(hvirt); + if (errcode) + return errcode; + + hvirt->client_count += 1; + + if (hvirt->client_count == 1) { + /* First client, so just pass the enable map onwards as is */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + enable_map, &ts_start_ns, &ts_end_ns, NULL); + } else { + struct kbase_hwcnt_virtualizer_client *pos; + + /* Make the scratch enable map the union of all enable maps */ + kbase_hwcnt_enable_map_copy( + &hvirt->scratch_map, enable_map); + list_for_each_entry(pos, &hvirt->clients, node) + kbase_hwcnt_enable_map_union( + &hvirt->scratch_map, &pos->enable_map); + + /* Set the counters with the new union enable map */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + &hvirt->scratch_map, + &ts_start_ns, &ts_end_ns, + &hvirt->scratch_buf); + /* Accumulate into only existing clients' accumulation bufs */ + if (!errcode) + list_for_each_entry(pos, &hvirt->clients, node) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + } + if (errcode) + goto error; + + list_add(&hvcli->node, &hvirt->clients); + hvcli->hvirt = hvirt; + kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); + hvcli->has_accum = false; + hvcli->ts_start_ns = ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = ts_end_ns; + + return 0; +error: + hvirt->client_count -= 1; + if (hvirt->client_count == 0) + kbasep_hwcnt_virtualizer_accumulator_term(hvirt); + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_remove - Remove a client from the + * virtualizer. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client to remove. + */ +static void kbasep_hwcnt_virtualizer_client_remove( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli) +{ + int errcode = 0; + u64 ts_start_ns; + u64 ts_end_ns; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + lockdep_assert_held(&hvirt->lock); + + list_del(&hvcli->node); + hvirt->client_count -= 1; + + if (hvirt->client_count == 0) { + /* Last client removed, so terminate the accumulator */ + kbasep_hwcnt_virtualizer_accumulator_term(hvirt); + } else { + struct kbase_hwcnt_virtualizer_client *pos; + /* Make the scratch enable map the union of all enable maps */ + kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map); + list_for_each_entry(pos, &hvirt->clients, node) + kbase_hwcnt_enable_map_union( + &hvirt->scratch_map, &pos->enable_map); + /* Set the counters with the new union enable map */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + &hvirt->scratch_map, + &ts_start_ns, &ts_end_ns, + &hvirt->scratch_buf); + /* Accumulate into remaining clients' accumulation bufs */ + if (!errcode) + list_for_each_entry(pos, &hvirt->clients, node) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = ts_end_ns; + } + WARN_ON(errcode); +} + +/** + * kbasep_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's + * currently enabled counters, + * and enable a new set of + * counters that will be used for + * subsequent dumps. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @enable_map: Non-NULL pointer to the new counter enable map for the client. + * Must have the same metadata as the virtualizer. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +static int kbasep_hwcnt_virtualizer_client_set_counters( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *pos; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!enable_map); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(enable_map->metadata != hvirt->metadata); + WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); + lockdep_assert_held(&hvirt->lock); + + /* Make the scratch enable map the union of all enable maps */ + kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); + list_for_each_entry(pos, &hvirt->clients, node) + /* Ignore the enable map of the selected client */ + if (pos != hvcli) + kbase_hwcnt_enable_map_union( + &hvirt->scratch_map, &pos->enable_map); + + /* Set the counters with the new union enable map */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + &hvirt->scratch_map, ts_start_ns, ts_end_ns, + &hvirt->scratch_buf); + if (errcode) + return errcode; + + /* Accumulate into all accumulation bufs except the selected client's */ + list_for_each_entry(pos, &hvirt->clients, node) + if (pos != hvcli) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + + /* Finally, write into the dump buf */ + if (dump_buf) { + const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; + + if (hvcli->has_accum) { + kbase_hwcnt_dump_buffer_accumulate( + &hvcli->accum_buf, src, &hvcli->enable_map); + src = &hvcli->accum_buf; + } + kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); + } + hvcli->has_accum = false; + + /* Update the selected client's enable map */ + kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); + + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; + + return errcode; +} + +int kbase_hwcnt_virtualizer_client_set_counters( + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer *hvirt; + + if (!hvcli || !enable_map || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hvirt = hvcli->hvirt; + + if ((enable_map->metadata != hvirt->metadata) || + (dump_buf && (dump_buf->metadata != hvirt->metadata))) + return -EINVAL; + + mutex_lock(&hvirt->lock); + + if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { + /* + * If there's only one client with no prior accumulation, we can + * completely skip the virtualize and just pass through the call + * to the accumulator, saving a fair few copies and + * accumulations. + */ + errcode = kbase_hwcnt_accumulator_set_counters( + hvirt->accum, enable_map, + ts_start_ns, ts_end_ns, dump_buf); + + if (!errcode) { + /* Update the selected client's enable map */ + kbase_hwcnt_enable_map_copy( + &hvcli->enable_map, enable_map); + + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; + } + } else { + /* Otherwise, do the full virtualize */ + errcode = kbasep_hwcnt_virtualizer_client_set_counters( + hvirt, hvcli, enable_map, + ts_start_ns, ts_end_ns, dump_buf); + } + + mutex_unlock(&hvirt->lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_set_counters); + +/** + * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's + * currently enabled counters. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +static int kbasep_hwcnt_virtualizer_client_dump( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *pos; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); + lockdep_assert_held(&hvirt->lock); + + /* Perform the dump */ + errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, + ts_start_ns, ts_end_ns, &hvirt->scratch_buf); + if (errcode) + return errcode; + + /* Accumulate into all accumulation bufs except the selected client's */ + list_for_each_entry(pos, &hvirt->clients, node) + if (pos != hvcli) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + + /* Finally, write into the dump buf */ + if (dump_buf) { + const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; + + if (hvcli->has_accum) { + kbase_hwcnt_dump_buffer_accumulate( + &hvcli->accum_buf, src, &hvcli->enable_map); + src = &hvcli->accum_buf; + } + kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); + } + hvcli->has_accum = false; + + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; + + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_dump_rate_limited - Perform a dump of the + * client's currently enabled counters + * if it hasn't been rate limited, + * otherwise return the client's most + * recent accumulation. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +static int kbasep_hwcnt_virtualizer_client_dump_rate_limited( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + bool rate_limited = true; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); + lockdep_assert_held(&hvirt->lock); + + if (hvirt->dump_threshold_ns == 0) { + /* Threshold == 0, so rate limiting disabled */ + rate_limited = false; + } else if (hvirt->ts_last_dump_ns == hvcli->ts_start_ns) { + /* Last dump was performed by this client, and dumps from an + * individual client are never rate limited + */ + rate_limited = false; + } else { + const u64 ts_ns = + kbase_hwcnt_accumulator_timestamp_ns(hvirt->accum); + const u64 time_since_last_dump_ns = + ts_ns - hvirt->ts_last_dump_ns; + + /* Dump period equals or exceeds the threshold */ + if (time_since_last_dump_ns >= hvirt->dump_threshold_ns) + rate_limited = false; + } + + if (!rate_limited) + return kbasep_hwcnt_virtualizer_client_dump( + hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf); + + /* If we've gotten this far, the client must have something accumulated + * otherwise it is a logic error + */ + WARN_ON(!hvcli->has_accum); + + if (dump_buf) + kbase_hwcnt_dump_buffer_copy( + dump_buf, &hvcli->accum_buf, &hvcli->enable_map); + hvcli->has_accum = false; + + *ts_start_ns = hvcli->ts_start_ns; + *ts_end_ns = hvirt->ts_last_dump_ns; + hvcli->ts_start_ns = hvirt->ts_last_dump_ns; + + return 0; +} + +int kbase_hwcnt_virtualizer_client_dump( + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer *hvirt; + + if (!hvcli || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hvirt = hvcli->hvirt; + + if (dump_buf && (dump_buf->metadata != hvirt->metadata)) + return -EINVAL; + + mutex_lock(&hvirt->lock); + + if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { + /* + * If there's only one client with no prior accumulation, we can + * completely skip the virtualize and just pass through the call + * to the accumulator, saving a fair few copies and + * accumulations. + */ + errcode = kbase_hwcnt_accumulator_dump( + hvirt->accum, ts_start_ns, ts_end_ns, dump_buf); + + if (!errcode) { + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; + } + } else { + /* Otherwise, do the full virtualize */ + errcode = kbasep_hwcnt_virtualizer_client_dump_rate_limited( + hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf); + } + + mutex_unlock(&hvirt->lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_dump); + +int kbase_hwcnt_virtualizer_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + const struct kbase_hwcnt_enable_map *enable_map, + struct kbase_hwcnt_virtualizer_client **out_hvcli) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *hvcli; + + if (!hvirt || !enable_map || !out_hvcli || + (enable_map->metadata != hvirt->metadata)) + return -EINVAL; + + errcode = kbasep_hwcnt_virtualizer_client_alloc( + hvirt->metadata, &hvcli); + if (errcode) + return errcode; + + mutex_lock(&hvirt->lock); + + errcode = kbasep_hwcnt_virtualizer_client_add(hvirt, hvcli, enable_map); + + mutex_unlock(&hvirt->lock); + + if (errcode) { + kbasep_hwcnt_virtualizer_client_free(hvcli); + return errcode; + } + + *out_hvcli = hvcli; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_create); + +void kbase_hwcnt_virtualizer_client_destroy( + struct kbase_hwcnt_virtualizer_client *hvcli) +{ + if (!hvcli) + return; + + mutex_lock(&hvcli->hvirt->lock); + + kbasep_hwcnt_virtualizer_client_remove(hvcli->hvirt, hvcli); + + mutex_unlock(&hvcli->hvirt->lock); + + kbasep_hwcnt_virtualizer_client_free(hvcli); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_destroy); + +int kbase_hwcnt_virtualizer_init( + struct kbase_hwcnt_context *hctx, + u64 dump_threshold_ns, + struct kbase_hwcnt_virtualizer **out_hvirt) +{ + struct kbase_hwcnt_virtualizer *virt; + const struct kbase_hwcnt_metadata *metadata; + + if (!hctx || !out_hvirt) + return -EINVAL; + + metadata = kbase_hwcnt_context_metadata(hctx); + if (!metadata) + return -EINVAL; + + virt = kzalloc(sizeof(*virt), GFP_KERNEL); + if (!virt) + return -ENOMEM; + + virt->hctx = hctx; + virt->dump_threshold_ns = dump_threshold_ns; + virt->metadata = metadata; + + mutex_init(&virt->lock); + INIT_LIST_HEAD(&virt->clients); + + *out_hvirt = virt; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_init); + +void kbase_hwcnt_virtualizer_term( + struct kbase_hwcnt_virtualizer *hvirt) +{ + if (!hvirt) + return; + + /* Non-zero client count implies client leak */ + if (WARN_ON(hvirt->client_count != 0)) { + struct kbase_hwcnt_virtualizer_client *pos, *n; + + list_for_each_entry_safe(pos, n, &hvirt->clients, node) + kbase_hwcnt_virtualizer_client_destroy(pos); + } + + WARN_ON(hvirt->client_count != 0); + WARN_ON(hvirt->accum); + + kfree(hvirt); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_term); diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_virtualizer.h b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_virtualizer.h new file mode 100644 index 000000000000..8f628c3306fc --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_hwcnt_virtualizer.h @@ -0,0 +1,145 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter virtualizer API. + * + * Virtualizes a hardware counter context, so multiple clients can access + * a single hardware counter resource as though each was the exclusive user. + */ + +#ifndef _KBASE_HWCNT_VIRTUALIZER_H_ +#define _KBASE_HWCNT_VIRTUALIZER_H_ + +#include + +struct kbase_hwcnt_context; +struct kbase_hwcnt_virtualizer; +struct kbase_hwcnt_virtualizer_client; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/** + * kbase_hwcnt_virtualizer_init - Initialise a hardware counter virtualizer. + * @hctx: Non-NULL pointer to the hardware counter context to + * virtualize. + * @dump_threshold_ns: Minimum threshold period for dumps between different + * clients where a new accumulator dump will not be + * performed, and instead accumulated values will be used. + * If 0, rate limiting will be disabled. + * @out_hvirt: Non-NULL pointer to where the pointer to the created + * virtualizer will be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_virtualizer_init( + struct kbase_hwcnt_context *hctx, + u64 dump_threshold_ns, + struct kbase_hwcnt_virtualizer **out_hvirt); + +/** + * kbase_hwcnt_virtualizer_term - Terminate a hardware counter virtualizer. + * @hvirt: Pointer to virtualizer to be terminated. + */ +void kbase_hwcnt_virtualizer_term( + struct kbase_hwcnt_virtualizer *hvirt); + +/** + * kbase_hwcnt_virtualizer_metadata - Get the hardware counter metadata used by + * the virtualizer, so related counter data + * structures can be created. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * + * Return: Non-NULL pointer to metadata, or NULL on error. + */ +const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( + struct kbase_hwcnt_virtualizer *hvirt); + +/** + * kbase_hwcnt_virtualizer_client_create - Create a new virtualizer client. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @enable_map: Non-NULL pointer to the enable map for the client. Must have the + * same metadata as the virtualizer. + * @out_hvcli: Non-NULL pointer to where the pointer to the created client will + * be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_virtualizer_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + const struct kbase_hwcnt_enable_map *enable_map, + struct kbase_hwcnt_virtualizer_client **out_hvcli); + +/** + * kbase_hwcnt_virtualizer_client_destroy() - Destroy a virtualizer client. + * @hvcli: Pointer to the hardware counter client. + */ +void kbase_hwcnt_virtualizer_client_destroy( + struct kbase_hwcnt_virtualizer_client *hvcli); + +/** + * kbase_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's + * currently enabled counters, and + * enable a new set of counters + * that will be used for + * subsequent dumps. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @enable_map: Non-NULL pointer to the new counter enable map for the client. + * Must have the same metadata as the virtualizer. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_virtualizer_client_set_counters( + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_virtualizer_client_dump - Perform a dump of the client's + * currently enabled counters. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_virtualizer_client_dump( + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +#endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_ioctl.h b/drivers/gpu/arm/b_r26p0/mali_kbase_ioctl.h new file mode 100644 index 000000000000..e46757bafdb2 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_ioctl.h @@ -0,0 +1,886 @@ +/* + * + * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_IOCTL_H_ +#define _KBASE_IOCTL_H_ + +#ifdef __cpluscplus +extern "C" { +#endif + +#include +#include + +#include "jm/mali_kbase_jm_ioctl.h" + +#define KBASE_IOCTL_TYPE 0x80 + +/** + * struct kbase_ioctl_version_check - Check version compatibility with kernel + * + * @major: Major version number + * @minor: Minor version number + */ +struct kbase_ioctl_version_check { + __u16 major; + __u16 minor; +}; + +#define KBASE_IOCTL_VERSION_CHECK \ + _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check) + +/** + * struct kbase_ioctl_set_flags - Set kernel context creation flags + * + * @create_flags: Flags - see base_context_create_flags + */ +struct kbase_ioctl_set_flags { + __u32 create_flags; +}; + +#define KBASE_IOCTL_SET_FLAGS \ + _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags) + +/** + * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel + * + * @buffer: Pointer to the buffer to store properties into + * @size: Size of the buffer + * @flags: Flags - must be zero for now + * + * The ioctl will return the number of bytes stored into @buffer or an error + * on failure (e.g. @size is too small). If @size is specified as 0 then no + * data will be written but the return value will be the number of bytes needed + * for all the properties. + * + * @flags may be used in the future to request a different format for the + * buffer. With @flags == 0 the following format is used. + * + * The buffer will be filled with pairs of values, a u32 key identifying the + * property followed by the value. The size of the value is identified using + * the bottom bits of the key. The value then immediately followed the key and + * is tightly packed (there is no padding). All keys and values are + * little-endian. + * + * 00 = u8 + * 01 = u16 + * 10 = u32 + * 11 = u64 + */ +struct kbase_ioctl_get_gpuprops { + __u64 buffer; + __u32 size; + __u32 flags; +}; + +#define KBASE_IOCTL_GET_GPUPROPS \ + _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops) + +/** + * union kbase_ioctl_mem_alloc - Allocate memory on the GPU + * + * @va_pages: The number of pages of virtual address space to reserve + * @commit_pages: The number of physical pages to allocate + * @extent: The number of extra pages to allocate on each GPU fault which grows + * the region + * @flags: Flags + * @gpu_va: The GPU virtual address which is allocated + * + * @in: Input parameters + * @out: Output parameters + */ +union kbase_ioctl_mem_alloc { + struct { + __u64 va_pages; + __u64 commit_pages; + __u64 extent; + __u64 flags; + } in; + struct { + __u64 flags; + __u64 gpu_va; + } out; +}; + +#define KBASE_IOCTL_MEM_ALLOC \ + _IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc) + +/** + * struct kbase_ioctl_mem_query - Query properties of a GPU memory region + * @gpu_addr: A GPU address contained within the region + * @query: The type of query + * @value: The result of the query + * + * Use a %KBASE_MEM_QUERY_xxx flag as input for @query. + * + * @in: Input parameters + * @out: Output parameters + */ +union kbase_ioctl_mem_query { + struct { + __u64 gpu_addr; + __u64 query; + } in; + struct { + __u64 value; + } out; +}; + +#define KBASE_IOCTL_MEM_QUERY \ + _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query) + +#define KBASE_MEM_QUERY_COMMIT_SIZE ((u64)1) +#define KBASE_MEM_QUERY_VA_SIZE ((u64)2) +#define KBASE_MEM_QUERY_FLAGS ((u64)3) + +/** + * struct kbase_ioctl_mem_free - Free a memory region + * @gpu_addr: Handle to the region to free + */ +struct kbase_ioctl_mem_free { + __u64 gpu_addr; +}; + +#define KBASE_IOCTL_MEM_FREE \ + _IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free) + +/** + * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader + * @buffer_count: requested number of dumping buffers + * @fe_bm: counters selection bitmask (Front end) + * @shader_bm: counters selection bitmask (Shader) + * @tiler_bm: counters selection bitmask (Tiler) + * @mmu_l2_bm: counters selection bitmask (MMU_L2) + * + * A fd is returned from the ioctl if successful, or a negative value on error + */ +struct kbase_ioctl_hwcnt_reader_setup { + __u32 buffer_count; + __u32 fe_bm; + __u32 shader_bm; + __u32 tiler_bm; + __u32 mmu_l2_bm; +}; + +#define KBASE_IOCTL_HWCNT_READER_SETUP \ + _IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup) + +/** + * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection + * @dump_buffer: GPU address to write counters to + * @fe_bm: counters selection bitmask (Front end) + * @shader_bm: counters selection bitmask (Shader) + * @tiler_bm: counters selection bitmask (Tiler) + * @mmu_l2_bm: counters selection bitmask (MMU_L2) + */ +struct kbase_ioctl_hwcnt_enable { + __u64 dump_buffer; + __u32 fe_bm; + __u32 shader_bm; + __u32 tiler_bm; + __u32 mmu_l2_bm; +}; + +#define KBASE_IOCTL_HWCNT_ENABLE \ + _IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable) + +#define KBASE_IOCTL_HWCNT_DUMP \ + _IO(KBASE_IOCTL_TYPE, 10) + +#define KBASE_IOCTL_HWCNT_CLEAR \ + _IO(KBASE_IOCTL_TYPE, 11) + +/** + * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to. + * @data: Counter samples for the dummy model. + * @size: Size of the counter sample data. + * @padding: Padding. + */ +struct kbase_ioctl_hwcnt_values { + __u64 data; + __u32 size; + __u32 padding; +}; + +#define KBASE_IOCTL_HWCNT_SET \ + _IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values) + +/** + * struct kbase_ioctl_disjoint_query - Query the disjoint counter + * @counter: A counter of disjoint events in the kernel + */ +struct kbase_ioctl_disjoint_query { + __u32 counter; +}; + +#define KBASE_IOCTL_DISJOINT_QUERY \ + _IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query) + +/** + * struct kbase_ioctl_get_ddk_version - Query the kernel version + * @version_buffer: Buffer to receive the kernel version string + * @size: Size of the buffer + * @padding: Padding + * + * The ioctl will return the number of bytes written into version_buffer + * (which includes a NULL byte) or a negative error code + * + * The ioctl request code has to be _IOW because the data in ioctl struct is + * being copied to the kernel, even though the kernel then writes out the + * version info to the buffer specified in the ioctl. + */ +struct kbase_ioctl_get_ddk_version { + __u64 version_buffer; + __u32 size; + __u32 padding; +}; + +#define KBASE_IOCTL_GET_DDK_VERSION \ + _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version) + +/** + * struct kbase_ioctl_mem_jit_init_10_2 - Initialize the just-in-time memory + * allocator (between kernel driver + * version 10.2--11.4) + * @va_pages: Number of VA pages to reserve for JIT + * + * Note that depending on the VA size of the application and GPU, the value + * specified in @va_pages may be ignored. + * + * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for + * backwards compatibility. + */ +struct kbase_ioctl_mem_jit_init_10_2 { + __u64 va_pages; +}; + +#define KBASE_IOCTL_MEM_JIT_INIT_10_2 \ + _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_10_2) + +/** + * struct kbase_ioctl_mem_jit_init_11_5 - Initialize the just-in-time memory + * allocator (between kernel driver + * version 11.5--11.19) + * @va_pages: Number of VA pages to reserve for JIT + * @max_allocations: Maximum number of concurrent allocations + * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%) + * @group_id: Group ID to be used for physical allocations + * @padding: Currently unused, must be zero + * + * Note that depending on the VA size of the application and GPU, the value + * specified in @va_pages may be ignored. + * + * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for + * backwards compatibility. + */ +struct kbase_ioctl_mem_jit_init_11_5 { + __u64 va_pages; + __u8 max_allocations; + __u8 trim_level; + __u8 group_id; + __u8 padding[5]; +}; + +#define KBASE_IOCTL_MEM_JIT_INIT_11_5 \ + _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_11_5) + +/** + * struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory + * allocator + * @va_pages: Number of GPU virtual address pages to reserve for just-in-time + * memory allocations + * @max_allocations: Maximum number of concurrent allocations + * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%) + * @group_id: Group ID to be used for physical allocations + * @padding: Currently unused, must be zero + * @phys_pages: Maximum number of physical pages to allocate just-in-time + * + * Note that depending on the VA size of the application and GPU, the value + * specified in @va_pages may be ignored. + */ +struct kbase_ioctl_mem_jit_init { + __u64 va_pages; + __u8 max_allocations; + __u8 trim_level; + __u8 group_id; + __u8 padding[5]; + __u64 phys_pages; +}; + +#define KBASE_IOCTL_MEM_JIT_INIT \ + _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init) + +/** + * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory + * + * @handle: GPU memory handle (GPU VA) + * @user_addr: The address where it is mapped in user space + * @size: The number of bytes to synchronise + * @type: The direction to synchronise: 0 is sync to memory (clean), + * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants. + * @padding: Padding to round up to a multiple of 8 bytes, must be zero + */ +struct kbase_ioctl_mem_sync { + __u64 handle; + __u64 user_addr; + __u64 size; + __u8 type; + __u8 padding[7]; +}; + +#define KBASE_IOCTL_MEM_SYNC \ + _IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync) + +/** + * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer + * + * @gpu_addr: The GPU address of the memory region + * @cpu_addr: The CPU address to locate + * @size: A size in bytes to validate is contained within the region + * @offset: The offset from the start of the memory region to @cpu_addr + * + * @in: Input parameters + * @out: Output parameters + */ +union kbase_ioctl_mem_find_cpu_offset { + struct { + __u64 gpu_addr; + __u64 cpu_addr; + __u64 size; + } in; + struct { + __u64 offset; + } out; +}; + +#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \ + _IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset) + +/** + * struct kbase_ioctl_get_context_id - Get the kernel context ID + * + * @id: The kernel context ID + */ +struct kbase_ioctl_get_context_id { + __u32 id; +}; + +#define KBASE_IOCTL_GET_CONTEXT_ID \ + _IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id) + +/** + * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd + * + * @flags: Flags + * + * The ioctl returns a file descriptor when successful + */ +struct kbase_ioctl_tlstream_acquire { + __u32 flags; +}; + +#define KBASE_IOCTL_TLSTREAM_ACQUIRE \ + _IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire) + +#define KBASE_IOCTL_TLSTREAM_FLUSH \ + _IO(KBASE_IOCTL_TYPE, 19) + +/** + * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region + * + * @gpu_addr: The memory region to modify + * @pages: The number of physical pages that should be present + * + * The ioctl may return on the following error codes or 0 for success: + * -ENOMEM: Out of memory + * -EINVAL: Invalid arguments + */ +struct kbase_ioctl_mem_commit { + __u64 gpu_addr; + __u64 pages; +}; + +#define KBASE_IOCTL_MEM_COMMIT \ + _IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit) + +/** + * union kbase_ioctl_mem_alias - Create an alias of memory regions + * @flags: Flags, see BASE_MEM_xxx + * @stride: Bytes between start of each memory region + * @nents: The number of regions to pack together into the alias + * @aliasing_info: Pointer to an array of struct base_mem_aliasing_info + * @gpu_va: Address of the new alias + * @va_pages: Size of the new alias + * + * @in: Input parameters + * @out: Output parameters + */ +union kbase_ioctl_mem_alias { + struct { + __u64 flags; + __u64 stride; + __u64 nents; + __u64 aliasing_info; + } in; + struct { + __u64 flags; + __u64 gpu_va; + __u64 va_pages; + } out; +}; + +#define KBASE_IOCTL_MEM_ALIAS \ + _IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias) + +/** + * union kbase_ioctl_mem_import - Import memory for use by the GPU + * @flags: Flags, see BASE_MEM_xxx + * @phandle: Handle to the external memory + * @type: Type of external memory, see base_mem_import_type + * @padding: Amount of extra VA pages to append to the imported buffer + * @gpu_va: Address of the new alias + * @va_pages: Size of the new alias + * + * @in: Input parameters + * @out: Output parameters + */ +union kbase_ioctl_mem_import { + struct { + __u64 flags; + __u64 phandle; + __u32 type; + __u32 padding; + } in; + struct { + __u64 flags; + __u64 gpu_va; + __u64 va_pages; + } out; +}; + +#define KBASE_IOCTL_MEM_IMPORT \ + _IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import) + +/** + * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region + * @gpu_va: The GPU region to modify + * @flags: The new flags to set + * @mask: Mask of the flags to modify + */ +struct kbase_ioctl_mem_flags_change { + __u64 gpu_va; + __u64 flags; + __u64 mask; +}; + +#define KBASE_IOCTL_MEM_FLAGS_CHANGE \ + _IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change) + +/** + * struct kbase_ioctl_stream_create - Create a synchronisation stream + * @name: A name to identify this stream. Must be NULL-terminated. + * + * Note that this is also called a "timeline", but is named stream to avoid + * confusion with other uses of the word. + * + * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes. + * + * The ioctl returns a file descriptor. + */ +struct kbase_ioctl_stream_create { + char name[32]; +}; + +#define KBASE_IOCTL_STREAM_CREATE \ + _IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create) + +/** + * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence + * @fd: The file descriptor to validate + */ +struct kbase_ioctl_fence_validate { + int fd; +}; + +#define KBASE_IOCTL_FENCE_VALIDATE \ + _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate) + +/** + * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel + * @buffer: Pointer to the information + * @len: Length + * @padding: Padding + * + * The data provided is accessible through a debugfs file + */ +struct kbase_ioctl_mem_profile_add { + __u64 buffer; + __u32 len; + __u32 padding; +}; + +#define KBASE_IOCTL_MEM_PROFILE_ADD \ + _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add) + +/** + * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource + * @count: Number of resources + * @address: Array of u64 GPU addresses of the external resources to map + */ +struct kbase_ioctl_sticky_resource_map { + __u64 count; + __u64 address; +}; + +#define KBASE_IOCTL_STICKY_RESOURCE_MAP \ + _IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map) + +/** + * struct kbase_ioctl_sticky_resource_map - Unmap a resource mapped which was + * previously permanently mapped + * @count: Number of resources + * @address: Array of u64 GPU addresses of the external resources to unmap + */ +struct kbase_ioctl_sticky_resource_unmap { + __u64 count; + __u64 address; +}; + +#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \ + _IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap) + +/** + * union kbase_ioctl_mem_find_gpu_start_and_offset - Find the start address of + * the GPU memory region for + * the given gpu address and + * the offset of that address + * into the region + * + * @gpu_addr: GPU virtual address + * @size: Size in bytes within the region + * @start: Address of the beginning of the memory region enclosing @gpu_addr + * for the length of @offset bytes + * @offset: The offset from the start of the memory region to @gpu_addr + * + * @in: Input parameters + * @out: Output parameters + */ +union kbase_ioctl_mem_find_gpu_start_and_offset { + struct { + __u64 gpu_addr; + __u64 size; + } in; + struct { + __u64 start; + __u64 offset; + } out; +}; + +#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \ + _IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset) + + +#define KBASE_IOCTL_CINSTR_GWT_START \ + _IO(KBASE_IOCTL_TYPE, 33) + +#define KBASE_IOCTL_CINSTR_GWT_STOP \ + _IO(KBASE_IOCTL_TYPE, 34) + +/** + * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses. + * @addr_buffer: Address of buffer to hold addresses of gpu modified areas. + * @size_buffer: Address of buffer to hold size of modified areas (in pages) + * @len: Number of addresses the buffers can hold. + * @more_data_available: Status indicating if more addresses are available. + * @no_of_addr_collected: Number of addresses collected into addr_buffer. + * + * @in: Input parameters + * @out: Output parameters + * + * This structure is used when performing a call to dump GPU write fault + * addresses. + */ +union kbase_ioctl_cinstr_gwt_dump { + struct { + __u64 addr_buffer; + __u64 size_buffer; + __u32 len; + __u32 padding; + + } in; + struct { + __u32 no_of_addr_collected; + __u8 more_data_available; + __u8 padding[27]; + } out; +}; + +#define KBASE_IOCTL_CINSTR_GWT_DUMP \ + _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump) + +/** + * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone + * + * @va_pages: Number of VA pages to reserve for EXEC_VA + */ +struct kbase_ioctl_mem_exec_init { + __u64 va_pages; +}; + +#define KBASE_IOCTL_MEM_EXEC_INIT \ + _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init) + +/************************ + * MALI_SEC_INTEGRATION * + ************************/ +/* IOCTLs 36-41 are reserved */ +/* IOCTL 42 is free for use */ + +/* + * struct kbase_ioctl_slsi_combination_boost_flags - Update the status of combination boost flag + * @flags: Flags for future expansion + */ +struct kbase_ioctl_slsi_combination_boost_flags { + __u32 flags; +}; + +#define KBASE_IOCTL_SLSI_COMBINATION_BOOST_FLAGS \ + _IOW(KBASE_IOCTL_TYPE, 42, struct kbase_ioctl_slsi_combination_boost_flags) + +/* + * struct kbase_ioctl_slsi_vk_boost_flags - Update the status of vk boost flag + * @flags: Flags for future expansion + */ +struct kbase_ioctl_slsi_vk_boost_flags { + __u32 flags; +}; + +#define KBASE_IOCTL_SLSI_VK_BOOST_FLAGS \ + _IOW(KBASE_IOCTL_TYPE, 43, struct kbase_ioctl_slsi_vk_boost_flags) + +/* + * struct kbase_ioctl_slsi_negative_boost_flags - Update the status of negative boost flag + * @flags: Flags for future expansion + */ +struct kbase_ioctl_slsi_negative_boost_flags { + __u32 flags; +}; + +#define KBASE_IOCTL_SLSI_NEGATIVE_BOOST_FLAGS \ + _IOW(KBASE_IOCTL_TYPE, 44, struct kbase_ioctl_slsi_negative_boost_flags) +/** + * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of + * cpu/gpu time (counter values) + * + * @request_flags: Bit-flags indicating the requested types. + * @paddings: Unused, size alignment matching the out. + * @sec: Integer field of the monotonic time, unit in seconds. + * @nsec: Fractional sec of the monotonic time, in nano-seconds. + * @padding: Unused, for u64 alignment + * @timestamp: System wide timestamp (counter) value. + * @cycle_counter: GPU cycle counter value. + * + * @in: Input parameters + * @out: Output parameters + * + */ +union kbase_ioctl_get_cpu_gpu_timeinfo { + struct { + __u32 request_flags; + __u32 paddings[7]; + } in; + struct { + __u64 sec; + __u32 nsec; + __u32 padding; + __u64 timestamp; + __u64 cycle_counter; + } out; +}; + +#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \ + _IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo) + +/*************** + * test ioctls * + ***************/ +#if MALI_UNIT_TEST +/* These ioctls are purely for test purposes and are not used in the production + * driver, they therefore may change without notice + */ + +#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1) + +/** + * struct kbase_ioctl_tlstream_test - Start a timeline stream test + * + * @tpw_count: number of trace point writers in each context + * @msg_delay: time delay between tracepoints from one writer in milliseconds + * @msg_count: number of trace points written by one writer + * @aux_msg: if non-zero aux messages will be included + */ +struct kbase_ioctl_tlstream_test { + __u32 tpw_count; + __u32 msg_delay; + __u32 msg_count; + __u32 aux_msg; +}; + +#define KBASE_IOCTL_TLSTREAM_TEST \ + _IOW(KBASE_IOCTL_TEST_TYPE, 1, struct kbase_ioctl_tlstream_test) + +/** + * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes + * @bytes_collected: number of bytes read by user + * @bytes_generated: number of bytes generated by tracepoints + */ +struct kbase_ioctl_tlstream_stats { + __u32 bytes_collected; + __u32 bytes_generated; +}; + +#define KBASE_IOCTL_TLSTREAM_STATS \ + _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats) + +#endif /* MALI_UNIT_TEST */ + +/* Customer extension range */ +#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2) + +/* If the integration needs extra ioctl add them there + * like this: + * + * struct my_ioctl_args { + * .... + * } + * + * #define KBASE_IOCTL_MY_IOCTL \ + * _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args) + */ + + +/********************************** + * Definitions for GPU properties * + **********************************/ +#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0) +#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1) +#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2) +#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3) + +#define KBASE_GPUPROP_PRODUCT_ID 1 +#define KBASE_GPUPROP_VERSION_STATUS 2 +#define KBASE_GPUPROP_MINOR_REVISION 3 +#define KBASE_GPUPROP_MAJOR_REVISION 4 +/* 5 previously used for GPU speed */ +#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6 +/* 7 previously used for minimum GPU speed */ +#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8 +#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9 +#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10 +#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11 +#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12 + +#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13 +#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14 +#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15 + +#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16 +#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17 + +#define KBASE_GPUPROP_MAX_THREADS 18 +#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19 +#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20 +#define KBASE_GPUPROP_MAX_REGISTERS 21 +#define KBASE_GPUPROP_MAX_TASK_QUEUE 22 +#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23 +#define KBASE_GPUPROP_IMPL_TECH 24 + +#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25 +#define KBASE_GPUPROP_RAW_TILER_PRESENT 26 +#define KBASE_GPUPROP_RAW_L2_PRESENT 27 +#define KBASE_GPUPROP_RAW_STACK_PRESENT 28 +#define KBASE_GPUPROP_RAW_L2_FEATURES 29 +#define KBASE_GPUPROP_RAW_CORE_FEATURES 30 +#define KBASE_GPUPROP_RAW_MEM_FEATURES 31 +#define KBASE_GPUPROP_RAW_MMU_FEATURES 32 +#define KBASE_GPUPROP_RAW_AS_PRESENT 33 +#define KBASE_GPUPROP_RAW_JS_PRESENT 34 +#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35 +#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36 +#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37 +#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38 +#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39 +#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40 +#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41 +#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42 +#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43 +#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44 +#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45 +#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46 +#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47 +#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48 +#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49 +#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50 +#define KBASE_GPUPROP_RAW_TILER_FEATURES 51 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54 +#define KBASE_GPUPROP_RAW_GPU_ID 55 +#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56 +#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57 +#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58 +#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59 +#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60 + +#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61 +#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62 +#define KBASE_GPUPROP_COHERENCY_COHERENCY 63 +#define KBASE_GPUPROP_COHERENCY_GROUP_0 64 +#define KBASE_GPUPROP_COHERENCY_GROUP_1 65 +#define KBASE_GPUPROP_COHERENCY_GROUP_2 66 +#define KBASE_GPUPROP_COHERENCY_GROUP_3 67 +#define KBASE_GPUPROP_COHERENCY_GROUP_4 68 +#define KBASE_GPUPROP_COHERENCY_GROUP_5 69 +#define KBASE_GPUPROP_COHERENCY_GROUP_6 70 +#define KBASE_GPUPROP_COHERENCY_GROUP_7 71 +#define KBASE_GPUPROP_COHERENCY_GROUP_8 72 +#define KBASE_GPUPROP_COHERENCY_GROUP_9 73 +#define KBASE_GPUPROP_COHERENCY_GROUP_10 74 +#define KBASE_GPUPROP_COHERENCY_GROUP_11 75 +#define KBASE_GPUPROP_COHERENCY_GROUP_12 76 +#define KBASE_GPUPROP_COHERENCY_GROUP_13 77 +#define KBASE_GPUPROP_COHERENCY_GROUP_14 78 +#define KBASE_GPUPROP_COHERENCY_GROUP_15 79 + +#define KBASE_GPUPROP_TEXTURE_FEATURES_3 80 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81 + +#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82 + +#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83 +#define KBASE_GPUPROP_TLS_ALLOC 84 + +#ifdef __cpluscplus +} +#endif + +#endif diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_jd.c b/drivers/gpu/arm/b_r26p0/mali_kbase_jd.c new file mode 100644 index 000000000000..8744a3cffb4e --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_jd.c @@ -0,0 +1,1819 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#include +#ifdef CONFIG_COMPAT +#include +#endif +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "mali_kbase_dma_fence.h" +/* MALI_SEC_INTEGRATION */ +#include +#include "platform/exynos/gpu_integration_defs.h" +#include +#include + +#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) +/* random32 was renamed to prandom_u32 in 3.8 */ +#define prandom_u32 random32 +#endif + +/* Return whether katom will run on the GPU or not. Currently only soft jobs and + * dependency-only atoms do not run on the GPU */ +#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ + ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ + BASE_JD_REQ_DEP))) + +/* + * This is the kernel side of the API. Only entry points are: + * - kbase_jd_submit(): Called from userspace to submit a single bag + * - kbase_jd_done(): Called from interrupt context to track the + * completion of a job. + * Callouts: + * - to the job manager (enqueue a job) + * - to the event subsystem (signals the completion/failure of bag/job-chains). + */ + +static void __user * +get_compat_pointer(struct kbase_context *kctx, const u64 p) +{ +#ifdef CONFIG_COMPAT + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + return compat_ptr(p); +#endif + return u64_to_user_ptr(p); +} + +/* Mark an atom as complete, and trace it in kinstr_jm */ +static void jd_mark_atom_complete(struct kbase_jd_atom *katom) +{ + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + kbase_kinstr_jm_atom_complete(katom); + dev_dbg(katom->kctx->kbdev->dev, "Atom %p status to completed\n", + (void *)katom); +} + +/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs + * + * Returns whether the JS needs a reschedule. + * + * Note that the caller must also check the atom status and + * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock + */ +static bool jd_run_atom(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + + dev_dbg(kctx->kbdev->dev, "JD run atom %p in kctx %p\n", + (void *)katom, (void *)kctx); + + KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); + + if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { + /* Dependency only atom */ + trace_sysgraph(SGR_SUBMIT, kctx->id, + kbase_jd_atom_id(katom->kctx, katom)); + jd_mark_atom_complete(katom); + return 0; + } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + /* Soft-job */ + if (katom->will_fail_event_code) { + kbase_finish_soft_job(katom); + jd_mark_atom_complete(katom); + return 0; + } + if (kbase_process_soft_job(katom) == 0) { + kbase_finish_soft_job(katom); + jd_mark_atom_complete(katom); + } + return 0; + } + + katom->status = KBASE_JD_ATOM_STATE_IN_JS; + dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", (void *)katom); + /* Queue an action about whether we should try scheduling a context */ + return kbasep_js_add_job(kctx, katom); +} + +void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) +{ + struct kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(katom); + kbdev = katom->kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev); + + /* Check whether the atom's other dependencies were already met. If + * katom is a GPU atom then the job scheduler may be able to represent + * the dependencies, hence we may attempt to submit it before they are + * met. Other atoms must have had both dependencies resolved. + */ + if (IS_GPU_ATOM(katom) || + (!kbase_jd_katom_dep_atom(&katom->dep[0]) && + !kbase_jd_katom_dep_atom(&katom->dep[1]))) { + /* katom dep complete, attempt to run it */ + bool resched = false; + + resched = jd_run_atom(katom); + + if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { + /* The atom has already finished */ + resched |= jd_done_nolock(katom, NULL); + } + + if (resched) + kbase_js_sched_all(kbdev); + } +} + +void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) +{ +#ifdef CONFIG_MALI_DMA_FENCE + /* Flush dma-fence workqueue to ensure that any callbacks that may have + * been queued are done before continuing. + * Any successfully completed atom would have had all it's callbacks + * completed before the atom was run, so only flush for failed atoms. + */ + if (katom->event_code != BASE_JD_EVENT_DONE) + flush_workqueue(katom->kctx->dma_fence.wq); +#endif /* CONFIG_MALI_DMA_FENCE */ +} + +static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) +{ + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); + +#ifdef CONFIG_MALI_DMA_FENCE + kbase_dma_fence_signal(katom); +#endif /* CONFIG_MALI_DMA_FENCE */ + + kbase_gpu_vm_lock(katom->kctx); + /* only roll back if extres is non-NULL */ + if (katom->extres) { + u32 res_no; + + res_no = katom->nr_extres; + while (res_no-- > 0) { + struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; + struct kbase_va_region *reg; + + reg = kbase_region_tracker_find_region_base_address( + katom->kctx, + katom->extres[res_no].gpu_address); + kbase_unmap_external_resource(katom->kctx, reg, alloc); + } + kfree(katom->extres); + katom->extres = NULL; + } + kbase_gpu_vm_unlock(katom->kctx); +} + +/* + * Set up external resources needed by this job. + * + * jctx.lock must be held when this is called. + */ + +static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom *user_atom) +{ + int err_ret_val = -EINVAL; + u32 res_no; +#ifdef CONFIG_MALI_DMA_FENCE + struct kbase_dma_fence_resv_info info = { + .resv_objs = NULL, + .dma_fence_resv_count = 0, + .dma_fence_excl_bitmap = NULL + }; +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + /* + * When both dma-buf fence and Android native sync is enabled, we + * disable dma-buf fence for contexts that are using Android native + * fences. + */ + const bool implicit_sync = !kbase_ctx_flag(katom->kctx, + KCTX_NO_IMPLICIT_SYNC); +#else /* CONFIG_SYNC || CONFIG_SYNC_FILE*/ + const bool implicit_sync = true; +#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ +#endif /* CONFIG_MALI_DMA_FENCE */ + struct base_external_resource *input_extres; + + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); + + /* no resources encoded, early out */ + if (!katom->nr_extres) + return -EINVAL; + + katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL); + if (!katom->extres) + return -ENOMEM; + + /* copy user buffer to the end of our real buffer. + * Make sure the struct sizes haven't changed in a way + * we don't support */ + BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres)); + input_extres = (struct base_external_resource *) + (((unsigned char *)katom->extres) + + (sizeof(*katom->extres) - sizeof(*input_extres)) * + katom->nr_extres); + + if (copy_from_user(input_extres, + get_compat_pointer(katom->kctx, user_atom->extres_list), + sizeof(*input_extres) * katom->nr_extres) != 0) { + err_ret_val = -EINVAL; + goto early_err_out; + } + +#ifdef CONFIG_MALI_DMA_FENCE + if (implicit_sync) { + info.resv_objs = kmalloc_array(katom->nr_extres, + sizeof(struct reservation_object *), + GFP_KERNEL); + if (!info.resv_objs) { + err_ret_val = -ENOMEM; + goto early_err_out; + } + + info.dma_fence_excl_bitmap = + kcalloc(BITS_TO_LONGS(katom->nr_extres), + sizeof(unsigned long), GFP_KERNEL); + if (!info.dma_fence_excl_bitmap) { + err_ret_val = -ENOMEM; + goto early_err_out; + } + } +#endif /* CONFIG_MALI_DMA_FENCE */ + + /* Take the processes mmap lock */ + down_read(¤t->mm->mmap_sem); + + /* need to keep the GPU VM locked while we set up UMM buffers */ + kbase_gpu_vm_lock(katom->kctx); + for (res_no = 0; res_no < katom->nr_extres; res_no++) { + struct base_external_resource *res = &input_extres[res_no]; + struct kbase_va_region *reg; + struct kbase_mem_phy_alloc *alloc; +#ifdef CONFIG_MALI_DMA_FENCE + bool exclusive; + exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) + ? true : false; +#endif + reg = kbase_region_tracker_find_region_enclosing_address( + katom->kctx, + res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + /* did we find a matching region object? */ + if (kbase_is_region_invalid_or_free(reg)) { + /* roll back */ + goto failed_loop; + } + + if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) && + (reg->flags & KBASE_REG_PROTECTED)) { + katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED; + } + + alloc = kbase_map_external_resource(katom->kctx, reg, + current->mm); + if (!alloc) { + err_ret_val = -EINVAL; + goto failed_loop; + } + +#ifdef CONFIG_MALI_DMA_FENCE + if (implicit_sync && + reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + struct reservation_object *resv; + + resv = reg->gpu_alloc->imported.umm.dma_buf->resv; + if (resv) + kbase_dma_fence_add_reservation(resv, &info, + exclusive); + } +#endif /* CONFIG_MALI_DMA_FENCE */ + + /* finish with updating out array with the data we found */ + /* NOTE: It is important that this is the last thing we do (or + * at least not before the first write) as we overwrite elements + * as we loop and could be overwriting ourself, so no writes + * until the last read for an element. + * */ + katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ + katom->extres[res_no].alloc = alloc; + } + /* successfully parsed the extres array */ + /* drop the vm lock now */ + kbase_gpu_vm_unlock(katom->kctx); + + /* Release the processes mmap lock */ + up_read(¤t->mm->mmap_sem); + +#ifdef CONFIG_MALI_DMA_FENCE + if (implicit_sync) { + if (info.dma_fence_resv_count) { + int ret; + + ret = kbase_dma_fence_wait(katom, &info); + if (ret < 0) + goto failed_dma_fence_setup; + } + + kfree(info.resv_objs); + kfree(info.dma_fence_excl_bitmap); + } +#endif /* CONFIG_MALI_DMA_FENCE */ + + /* all done OK */ + return 0; + +/* error handling section */ + +#ifdef CONFIG_MALI_DMA_FENCE +failed_dma_fence_setup: + /* Lock the processes mmap lock */ + down_read(¤t->mm->mmap_sem); + + /* lock before we unmap */ + kbase_gpu_vm_lock(katom->kctx); +#endif + + failed_loop: + /* undo the loop work */ + while (res_no-- > 0) { + struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; + + kbase_unmap_external_resource(katom->kctx, NULL, alloc); + } + kbase_gpu_vm_unlock(katom->kctx); + + /* Release the processes mmap lock */ + up_read(¤t->mm->mmap_sem); + + early_err_out: + kfree(katom->extres); + katom->extres = NULL; +#ifdef CONFIG_MALI_DMA_FENCE + if (implicit_sync) { + kfree(info.resv_objs); + kfree(info.dma_fence_excl_bitmap); + } +#endif + return err_ret_val; +} + +static inline void jd_resolve_dep(struct list_head *out_list, + struct kbase_jd_atom *katom, + u8 d, bool ctx_is_dying) +{ + u8 other_d = !d; + + while (!list_empty(&katom->dep_head[d])) { + struct kbase_jd_atom *dep_atom; + struct kbase_jd_atom *other_dep_atom; + u8 dep_type; + + dep_atom = list_entry(katom->dep_head[d].next, + struct kbase_jd_atom, dep_item[d]); + list_del(katom->dep_head[d].next); + + dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]); + kbase_jd_katom_dep_clear(&dep_atom->dep[d]); + + if (katom->event_code != BASE_JD_EVENT_DONE && + (dep_type != BASE_JD_DEP_TYPE_ORDER)) { +#ifdef CONFIG_MALI_DMA_FENCE + kbase_dma_fence_cancel_callbacks(dep_atom); +#endif + + dep_atom->event_code = katom->event_code; + KBASE_DEBUG_ASSERT(dep_atom->status != + KBASE_JD_ATOM_STATE_UNUSED); + + dep_atom->will_fail_event_code = dep_atom->event_code; + } + other_dep_atom = (struct kbase_jd_atom *) + kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]); + + if (!dep_atom->in_jd_list && (!other_dep_atom || + (IS_GPU_ATOM(dep_atom) && !ctx_is_dying && + !dep_atom->will_fail_event_code && + !other_dep_atom->will_fail_event_code))) { + bool dep_satisfied = true; +#ifdef CONFIG_MALI_DMA_FENCE + int dep_count; + + dep_count = kbase_fence_dep_count_read(dep_atom); + if (likely(dep_count == -1)) { + dep_satisfied = true; + } else { + /* + * There are either still active callbacks, or + * all fences for this @dep_atom has signaled, + * but the worker that will queue the atom has + * not yet run. + * + * Wait for the fences to signal and the fence + * worker to run and handle @dep_atom. If + * @dep_atom was completed due to error on + * @katom, then the fence worker will pick up + * the complete status and error code set on + * @dep_atom above. + */ + dep_satisfied = false; + } +#endif /* CONFIG_MALI_DMA_FENCE */ + + if (dep_satisfied) { + trace_sysgraph(SGR_DEP_RES, + dep_atom->kctx->id, + kbase_jd_atom_id(katom->kctx, dep_atom)); + dep_atom->in_jd_list = true; + list_add_tail(&dep_atom->jd_item, out_list); + } + } + } +} + +/** + * is_dep_valid - Validate that a dependency is valid for early dependency + * submission + * @katom: Dependency atom to validate + * + * A dependency is valid if any of the following are true : + * - It does not exist (a non-existent dependency does not block submission) + * - It is in the job scheduler + * - It has completed, does not have a failure event code, and has not been + * marked to fail in the future + * + * Return: true if valid, false otherwise + */ +static bool is_dep_valid(struct kbase_jd_atom *katom) +{ + /* If there's no dependency then this is 'valid' from the perspective of + * early dependency submission */ + if (!katom) + return true; + + /* Dependency must have reached the job scheduler */ + if (katom->status < KBASE_JD_ATOM_STATE_IN_JS) + return false; + + /* If dependency has completed and has failed or will fail then it is + * not valid */ + if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED && + (katom->event_code != BASE_JD_EVENT_DONE || + katom->will_fail_event_code)) + return false; + + return true; +} + +static void jd_try_submitting_deps(struct list_head *out_list, + struct kbase_jd_atom *node) +{ + int i; + + for (i = 0; i < 2; i++) { + struct list_head *pos; + + list_for_each(pos, &node->dep_head[i]) { + struct kbase_jd_atom *dep_atom = list_entry(pos, + struct kbase_jd_atom, dep_item[i]); + + if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) { + /*Check if atom deps look sane*/ + bool dep0_valid = is_dep_valid( + dep_atom->dep[0].atom); + bool dep1_valid = is_dep_valid( + dep_atom->dep[1].atom); + bool dep_satisfied = true; +#ifdef CONFIG_MALI_DMA_FENCE + int dep_count; + + dep_count = kbase_fence_dep_count_read( + dep_atom); + if (likely(dep_count == -1)) { + dep_satisfied = true; + } else { + /* + * There are either still active callbacks, or + * all fences for this @dep_atom has signaled, + * but the worker that will queue the atom has + * not yet run. + * + * Wait for the fences to signal and the fence + * worker to run and handle @dep_atom. If + * @dep_atom was completed due to error on + * @katom, then the fence worker will pick up + * the complete status and error code set on + * @dep_atom above. + */ + dep_satisfied = false; + } +#endif /* CONFIG_MALI_DMA_FENCE */ + + if (dep0_valid && dep1_valid && dep_satisfied) { + trace_sysgraph(SGR_DEP_RES, + dep_atom->kctx->id, + kbase_jd_atom_id(dep_atom->kctx, + dep_atom)); + dep_atom->in_jd_list = true; + list_add(&dep_atom->jd_item, out_list); + } + } + } + } +} + +#if MALI_JIT_PRESSURE_LIMIT_BASE +/** + * jd_update_jit_usage - Update just-in-time physical memory usage for an atom. + * + * @katom: An atom that has just finished. + * + * Read back actual just-in-time memory region usage from atoms that provide + * this information, and update the current physical page pressure. + * + * The caller must hold the kbase_jd_context.lock. + */ +static void jd_update_jit_usage(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct kbase_va_region *reg; + struct kbase_vmap_struct mapping; + u64 *ptr; + u64 used_pages; + unsigned int idx; + + lockdep_assert_held(&kctx->jctx.lock); + + /* If this atom wrote to JIT memory, find out how much it has written + * and update the usage information in the region. + */ + for (idx = 0; + idx < ARRAY_SIZE(katom->jit_ids) && katom->jit_ids[idx]; + idx++) { + enum heap_pointer { LOW = 0, HIGH, COUNT }; + size_t size_to_read; + u64 read_val; + + reg = kctx->jit_alloc[katom->jit_ids[idx]]; + + if (!reg) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u has no region\n", + __func__, idx, katom->jit_ids[idx]); + continue; + } + + if (reg == KBASE_RESERVED_REG_JIT_ALLOC) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u has failed to allocate a region\n", + __func__, idx, katom->jit_ids[idx]); + continue; + } + + if (!reg->heap_info_gpu_addr) + continue; + + size_to_read = sizeof(*ptr); + if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) + size_to_read = sizeof(u32); + else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) + size_to_read = sizeof(u64[COUNT]); + + ptr = kbase_vmap(kctx, reg->heap_info_gpu_addr, size_to_read, + &mapping); + + if (!ptr) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u start=0x%llx unable to map end marker %llx\n", + __func__, idx, katom->jit_ids[idx], + reg->start_pfn << PAGE_SHIFT, + reg->heap_info_gpu_addr); + continue; + } + + if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) { + read_val = READ_ONCE(*(u32 *)ptr); + used_pages = PFN_UP(read_val); + } else { + u64 addr_end; + + if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { + const unsigned long extent_bytes = reg->extent + << PAGE_SHIFT; + const u64 low_ptr = ptr[LOW]; + const u64 high_ptr = ptr[HIGH]; + + /* As either the low or high pointer could + * consume their partition and move onto the + * next chunk, we need to account for both. + * In the case where nothing has been allocated + * from the high pointer the whole chunk could + * be backed unnecessarily - but the granularity + * is the chunk size anyway and any non-zero + * offset of low pointer from the start of the + * chunk would result in the whole chunk being + * backed. + */ + read_val = max(high_ptr, low_ptr); + + /* kbase_check_alloc_sizes() already satisfies + * this, but here to avoid future maintenance + * hazards + */ + WARN_ON(!is_power_of_2(extent_bytes)); + addr_end = ALIGN(read_val, extent_bytes); + } else { + addr_end = read_val = READ_ONCE(*ptr); + } + + if (addr_end >= (reg->start_pfn << PAGE_SHIFT)) + used_pages = PFN_UP(addr_end) - reg->start_pfn; + else + used_pages = reg->used_pages; + } + + trace_mali_jit_report(katom, reg, idx, read_val, used_pages); + kbase_trace_jit_report_gpu_mem(kctx, reg, 0u); + + /* We can never have used more pages than the VA size of the + * region + */ + if (used_pages > reg->nr_pages) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u start=0x%llx used_pages %llx > %zx (read 0x%llx as %s%s)\n", + __func__, idx, katom->jit_ids[idx], + reg->start_pfn << PAGE_SHIFT, + used_pages, reg->nr_pages, read_val, + (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) ? + "size" : "addr", + (reg->flags & KBASE_REG_TILER_ALIGN_TOP) ? + " with align" : ""); + used_pages = reg->nr_pages; + } + /* Note: one real use case has an atom correctly reporting 0 + * pages in use. This happens in normal use-cases but may only + * happen for a few of the application's frames. + */ + + kbase_vunmap(kctx, &mapping); + + kbase_jit_report_update_pressure(kctx, reg, used_pages, 0u); + } + + kbase_jit_retry_pending_alloc(kctx); +} +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +/* + * Perform the necessary handling of an atom that has finished running + * on the GPU. + * + * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller + * is responsible for calling kbase_finish_soft_job *before* calling this function. + * + * The caller must hold the kbase_jd_context.lock. + */ +bool jd_done_nolock(struct kbase_jd_atom *katom, + struct list_head *completed_jobs_ctx) +{ + struct kbase_context *kctx = katom->kctx; + struct list_head completed_jobs; + struct list_head runnable_jobs; + bool need_to_try_schedule_context = false; + int i; + + INIT_LIST_HEAD(&completed_jobs); + INIT_LIST_HEAD(&runnable_jobs); + + KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (kbase_ctx_flag(kctx, KCTX_JPL_ENABLED)) + jd_update_jit_usage(katom); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + /* This is needed in case an atom is failed due to being invalid, this + * can happen *before* the jobs that the atom depends on have completed */ + for (i = 0; i < 2; i++) { + if (kbase_jd_katom_dep_atom(&katom->dep[i])) { + list_del(&katom->dep_item[i]); + kbase_jd_katom_dep_clear(&katom->dep[i]); + } + } + + jd_mark_atom_complete(katom); + list_add_tail(&katom->jd_item, &completed_jobs); + + while (!list_empty(&completed_jobs)) { + katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item); + list_del(completed_jobs.prev); + KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); + + for (i = 0; i < 2; i++) + jd_resolve_dep(&runnable_jobs, katom, i, + kbase_ctx_flag(kctx, KCTX_DYING)); + + if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) + kbase_jd_post_external_resources(katom); + + while (!list_empty(&runnable_jobs)) { + struct kbase_jd_atom *node; + + node = list_entry(runnable_jobs.next, + struct kbase_jd_atom, jd_item); + list_del(runnable_jobs.next); + node->in_jd_list = false; + + dev_dbg(kctx->kbdev->dev, "List node %p has status %d\n", + node, node->status); + + KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); + if (node->status == KBASE_JD_ATOM_STATE_IN_JS) + continue; + + if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && + !kbase_ctx_flag(kctx, KCTX_DYING)) { + need_to_try_schedule_context |= jd_run_atom(node); + } else { + node->event_code = katom->event_code; + + if (node->core_req & + BASE_JD_REQ_SOFT_JOB) { + WARN_ON(!list_empty(&node->queue)); + kbase_finish_soft_job(node); + } + node->status = KBASE_JD_ATOM_STATE_COMPLETED; + } + + if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) { + list_add_tail(&node->jd_item, &completed_jobs); + } else if (node->status == KBASE_JD_ATOM_STATE_IN_JS && + !node->will_fail_event_code) { + /* Node successfully submitted, try submitting + * dependencies as they may now be representable + * in JS */ + jd_try_submitting_deps(&runnable_jobs, node); + } + } + + /* Register a completed job as a disjoint event when the GPU + * is in a disjoint state (ie. being reset). + */ + kbase_disjoint_event_potential(kctx->kbdev); + if (completed_jobs_ctx) + list_add_tail(&katom->jd_item, completed_jobs_ctx); + else + kbase_event_post(kctx, katom); + + /* Decrement and check the TOTAL number of jobs. This includes + * those not tracked by the scheduler: 'not ready to run' and + * 'dependency-only' jobs. */ + if (--kctx->jctx.job_nr == 0) + wake_up(&kctx->jctx.zero_jobs_wait); /* All events are safely queued now, and we can signal any waiter + * that we've got no more jobs (so we can be safely terminated) */ + } + + return need_to_try_schedule_context; +} + +KBASE_EXPORT_TEST_API(jd_done_nolock); + +#ifdef CONFIG_GPU_TRACEPOINTS +enum { + CORE_REQ_DEP_ONLY, + CORE_REQ_SOFT, + CORE_REQ_COMPUTE, + CORE_REQ_FRAGMENT, + CORE_REQ_VERTEX, + CORE_REQ_TILER, + CORE_REQ_FRAGMENT_VERTEX, + CORE_REQ_FRAGMENT_VERTEX_TILER, + CORE_REQ_FRAGMENT_TILER, + CORE_REQ_VERTEX_TILER, + CORE_REQ_UNKNOWN +}; +static const char * const core_req_strings[] = { + "Dependency Only Job", + "Soft Job", + "Compute Shader Job", + "Fragment Shader Job", + "Vertex/Geometry Shader Job", + "Tiler Job", + "Fragment Shader + Vertex/Geometry Shader Job", + "Fragment Shader + Vertex/Geometry Shader Job + Tiler Job", + "Fragment Shader + Tiler Job", + "Vertex/Geometry Shader Job + Tiler Job", + "Unknown Job" +}; +static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) +{ + if (core_req & BASE_JD_REQ_SOFT_JOB) + return core_req_strings[CORE_REQ_SOFT]; + if (core_req & BASE_JD_REQ_ONLY_COMPUTE) + return core_req_strings[CORE_REQ_COMPUTE]; + switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) { + case BASE_JD_REQ_DEP: + return core_req_strings[CORE_REQ_DEP_ONLY]; + case BASE_JD_REQ_FS: + return core_req_strings[CORE_REQ_FRAGMENT]; + case BASE_JD_REQ_CS: + return core_req_strings[CORE_REQ_VERTEX]; + case BASE_JD_REQ_T: + return core_req_strings[CORE_REQ_TILER]; + case (BASE_JD_REQ_FS | BASE_JD_REQ_CS): + return core_req_strings[CORE_REQ_FRAGMENT_VERTEX]; + case (BASE_JD_REQ_FS | BASE_JD_REQ_T): + return core_req_strings[CORE_REQ_FRAGMENT_TILER]; + case (BASE_JD_REQ_CS | BASE_JD_REQ_T): + return core_req_strings[CORE_REQ_VERTEX_TILER]; + case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T): + return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER]; + } + return core_req_strings[CORE_REQ_UNKNOWN]; +} +#endif + +/* Trace an atom submission. */ +static void jd_trace_atom_submit(struct kbase_context *const kctx, + struct kbase_jd_atom *const katom, + int *priority) +{ + struct kbase_device *const kbdev = kctx->kbdev; + + KBASE_TLSTREAM_TL_NEW_ATOM(kbdev, katom, kbase_jd_atom_id(kctx, katom)); + KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); + if (priority) + KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, *priority); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE); + kbase_kinstr_jm_atom_queue(katom); +} + +static bool jd_submit_atom(struct kbase_context *const kctx, + const struct base_jd_atom *const user_atom, + const struct base_jd_fragment *const user_jc_incr, + struct kbase_jd_atom *const katom) +{ + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_jd_context *jctx = &kctx->jctx; + int queued = 0; + int i; + int sched_prio; + bool will_fail = false; + + dev_dbg(kbdev->dev, "User did JD submit atom %p\n", (void *)katom); + + /* Update the TOTAL number of jobs. This includes those not tracked by + * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ + jctx->job_nr++; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) + katom->start_timestamp.tv64 = 0; +#else + katom->start_timestamp = 0; +#endif + katom->udata = user_atom->udata; + katom->kctx = kctx; + katom->nr_extres = user_atom->nr_extres; + katom->extres = NULL; + katom->device_nr = user_atom->device_nr; + katom->jc = user_atom->jc; + katom->core_req = user_atom->core_req; + katom->jobslot = user_atom->jobslot; + katom->seq_nr = user_atom->seq_nr; + katom->atom_flags = 0; + katom->retry_count = 0; + katom->need_cache_flush_cores_retained = 0; + katom->pre_dep = NULL; + katom->post_dep = NULL; + katom->x_pre_dep = NULL; + katom->x_post_dep = NULL; + katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED; + katom->softjob_data = NULL; + + trace_sysgraph(SGR_ARRIVE, kctx->id, user_atom->atom_number); + +#if MALI_JIT_PRESSURE_LIMIT_BASE + /* Older API version atoms might have random values where jit_id now + * lives, but we must maintain backwards compatibility - handle the + * issue. + */ + if (!mali_kbase_supports_jit_pressure_limit(kctx->api_version)) { + katom->jit_ids[0] = 0; + katom->jit_ids[1] = 0; + } else { + katom->jit_ids[0] = user_atom->jit_id[0]; + katom->jit_ids[1] = user_atom->jit_id[1]; + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + katom->renderpass_id = user_atom->renderpass_id; + + /* Implicitly sets katom->protected_state.enter as well. */ + katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; + + katom->age = kctx->age_count++; + + INIT_LIST_HEAD(&katom->queue); + INIT_LIST_HEAD(&katom->jd_item); +#ifdef CONFIG_MALI_DMA_FENCE + kbase_fence_dep_count_set(katom, -1); +#endif + + /* Don't do anything if there is a mess up with dependencies. + This is done in a separate cycle to check both the dependencies at ones, otherwise + it will be extra complexity to deal with 1st dependency ( just added to the list ) + if only the 2nd one has invalid config. + */ + for (i = 0; i < 2; i++) { + int dep_atom_number = user_atom->pre_dep[i].atom_id; + base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type; + + if (dep_atom_number) { + if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER && + dep_atom_type != BASE_JD_DEP_TYPE_DATA) { + katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kbdev->dev, + "Atom %p status to completed\n", + (void *)katom); + + /* Wrong dependency setup. Atom will be sent + * back to user space. Do not record any + * dependencies. */ + jd_trace_atom_submit(kctx, katom, NULL); + + return jd_done_nolock(katom, NULL); + } + } + } + + /* Add dependencies */ + for (i = 0; i < 2; i++) { + int dep_atom_number = user_atom->pre_dep[i].atom_id; + base_jd_dep_type dep_atom_type; + struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number]; + + dep_atom_type = user_atom->pre_dep[i].dependency_type; + kbase_jd_katom_dep_clear(&katom->dep[i]); + + if (!dep_atom_number) + continue; + + if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED || + dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) { + + if (dep_atom->event_code == BASE_JD_EVENT_DONE) + continue; + /* don't stop this atom if it has an order dependency + * only to the failed one, try to submit it through + * the normal path + */ + if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER && + dep_atom->event_code > BASE_JD_EVENT_ACTIVE) { + continue; + } + + /* Atom has completed, propagate the error code if any */ + katom->event_code = dep_atom->event_code; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; + dev_dbg(kbdev->dev, "Atom %p status to queued\n", + (void *)katom); + + /* This atom will be sent back to user space. + * Do not record any dependencies. + */ + jd_trace_atom_submit(kctx, katom, NULL); + + will_fail = true; + + } else { + /* Atom is in progress, add this atom to the list */ + list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]); + kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type); + queued = 1; + } + } + + if (will_fail) { + if (!queued) { + if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + /* This softjob has failed due to a previous + * dependency, however we should still run the + * prepare & finish functions + */ + int err = kbase_prepare_soft_job(katom); + + if (err >= 0) + kbase_finish_soft_job(katom); + } + + return jd_done_nolock(katom, NULL); + } + + if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + /* This softjob has failed due to a previous + * dependency, however we should still run the + * prepare & finish functions + */ + if (kbase_prepare_soft_job(katom) != 0) { + katom->event_code = + BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } + } + + katom->will_fail_event_code = katom->event_code; + return false; + } + + /* These must occur after the above loop to ensure that an atom + * that depends on a previous atom with the same number behaves + * as expected + */ + katom->event_code = BASE_JD_EVENT_DONE; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; + dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)katom); + + /* For invalid priority, be most lenient and choose the default */ + sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); + if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) + sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; + katom->sched_priority = sched_prio; + + /* Create a new atom. */ + jd_trace_atom_submit(kctx, katom, &katom->sched_priority); + +#if !MALI_INCREMENTAL_RENDERING + /* Reject atoms for incremental rendering if not supported */ + if (katom->core_req & + (BASE_JD_REQ_START_RENDERPASS|BASE_JD_REQ_END_RENDERPASS)) { + dev_err(kctx->kbdev->dev, + "Rejecting atom with unsupported core_req 0x%x\n", + katom->core_req); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } +#endif /* !MALI_INCREMENTAL_RENDERING */ + + if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) { + WARN_ON(katom->jc != 0); + katom->jc_fragment = *user_jc_incr; + } else if (!katom->jc && + (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + /* Reject atoms with job chain = NULL, as these cause issues + * with soft-stop + */ + dev_err(kctx->kbdev->dev, "Rejecting atom with jc = NULL\n"); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } + + /* Reject atoms with an invalid device_nr */ + if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) && + (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) { + dev_err(kctx->kbdev->dev, + "Rejecting atom with invalid device_nr %d\n", + katom->device_nr); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } + + /* Reject atoms with invalid core requirements */ + if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && + (katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) { + dev_err(kctx->kbdev->dev, + "Rejecting atom with invalid core requirements\n"); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE; + return jd_done_nolock(katom, NULL); + } + + /* Reject soft-job atom of certain types from accessing external resources */ + if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && + (((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) || + ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_ALLOC) || + ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_FREE))) { + dev_err(kctx->kbdev->dev, + "Rejecting soft-job atom accessing external resources\n"); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } + + if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { + /* handle what we need to do to access the external resources */ + if (kbase_jd_pre_external_resources(katom, user_atom) != 0) { + /* setup failed (no access, bad resource, unknown resource types, etc.) */ + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } + } + +#if !MALI_JIT_PRESSURE_LIMIT_BASE + if (mali_kbase_supports_jit_pressure_limit(kctx->api_version) && + (user_atom->jit_id[0] || user_atom->jit_id[1])) { + /* JIT pressure limit is disabled, but we are receiving non-0 + * JIT IDs - atom is invalid. + */ + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + /* Validate the atom. Function will return error if the atom is + * malformed. + * + * Soft-jobs never enter the job scheduler but have their own initialize method. + * + * If either fail then we immediately complete the atom with an error. + */ + if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) { + if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } + } else { + /* Soft-job */ + if (kbase_prepare_soft_job(katom) != 0) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } + } + +#ifdef CONFIG_GPU_TRACEPOINTS + katom->work_id = atomic_inc_return(&jctx->work_id); + trace_gpu_job_enqueue(kctx->id, katom->work_id, + kbasep_map_core_reqs_to_string(katom->core_req)); +#endif + + if (queued && !IS_GPU_ATOM(katom)) + return false; + +#ifdef CONFIG_MALI_DMA_FENCE + if (kbase_fence_dep_count_read(katom) != -1) + return false; + +#endif /* CONFIG_MALI_DMA_FENCE */ + + if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + if (kbase_process_soft_job(katom) == 0) { + kbase_finish_soft_job(katom); + return jd_done_nolock(katom, NULL); + } + return false; + } + + if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + bool need_to_try_schedule_context; + + katom->status = KBASE_JD_ATOM_STATE_IN_JS; + dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", + (void *)katom); + + need_to_try_schedule_context = kbasep_js_add_job(kctx, katom); + /* If job was cancelled then resolve immediately */ + if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) + return need_to_try_schedule_context; + } + + /* This is a pure dependency. Resolve it immediately */ + return jd_done_nolock(katom, NULL); +} + +int kbase_jd_submit(struct kbase_context *kctx, + void __user *user_addr, u32 nr_atoms, u32 stride, + bool uk6_atom) +{ + struct kbase_jd_context *jctx = &kctx->jctx; + int err = 0; + int i; + bool need_to_try_schedule_context = false; + struct kbase_device *kbdev; + u32 latest_flush; + + bool jd_atom_is_v2 = (stride == sizeof(struct base_jd_atom_v2) || + stride == offsetof(struct base_jd_atom_v2, renderpass_id)); + + /* + * kbase_jd_submit isn't expected to fail and so all errors with the + * jobs are reported by immediately failing them (through event system) + */ + kbdev = kctx->kbdev; + + beenthere(kctx, "%s", "Enter"); + + if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { + dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it\n"); + return -EINVAL; + } + + if (stride != offsetof(struct base_jd_atom_v2, renderpass_id) && + stride != sizeof(struct base_jd_atom_v2) && + stride != offsetof(struct base_jd_atom, renderpass_id) && + stride != sizeof(struct base_jd_atom)) { + dev_err(kbdev->dev, + "Stride %u passed to job_submit isn't supported by the kernel\n", + stride); + return -EINVAL; + } + + /* All atoms submitted in this call have the same flush ID */ + latest_flush = kbase_backend_get_current_flush_id(kbdev); + + for (i = 0; i < nr_atoms; i++) { + struct base_jd_atom user_atom; + struct base_jd_fragment user_jc_incr; + struct kbase_jd_atom *katom; + + if (unlikely(jd_atom_is_v2)) { + if (copy_from_user(&user_atom.jc, user_addr, sizeof(struct base_jd_atom_v2)) != 0) { + dev_err(kbdev->dev, + "Invalid atom address %p passed to job_submit\n", + user_addr); + err = -EFAULT; + break; + } + + /* no seq_nr in v2 */ + user_atom.seq_nr = 0; + } else { + if (copy_from_user(&user_atom, user_addr, stride) != 0) { + dev_err(kbdev->dev, + "Invalid atom address %p passed to job_submit\n", + user_addr); + err = -EFAULT; + break; + } + } + + if (stride == offsetof(struct base_jd_atom_v2, renderpass_id)) { + dev_dbg(kbdev->dev, "No renderpass ID: use 0\n"); + user_atom.renderpass_id = 0; + } else { + /* Ensure all padding bytes are 0 for potential future + * extension + */ + size_t j; + + dev_dbg(kbdev->dev, "Renderpass ID is %d\n", + user_atom.renderpass_id); + for (j = 0; j < sizeof(user_atom.padding); j++) { + if (user_atom.padding[j]) { + dev_err(kbdev->dev, + "Bad padding byte %zu: %d\n", + j, user_atom.padding[j]); + err = -EINVAL; + break; + } + } + if (err) + break; + } + + /* In this case 'jc' is the CPU address of a struct + * instead of a GPU address of a job chain. + */ + if (user_atom.core_req & BASE_JD_REQ_END_RENDERPASS) { + if (copy_from_user(&user_jc_incr, + u64_to_user_ptr(user_atom.jc), + sizeof(user_jc_incr))) { + dev_err(kbdev->dev, + "Invalid jc address 0x%llx passed to job_submit\n", + user_atom.jc); + err = -EFAULT; + break; + } + dev_dbg(kbdev->dev, "Copied IR jobchain addresses\n"); + user_atom.jc = 0; + } + + user_addr = (void __user *)((uintptr_t) user_addr + stride); + + mutex_lock(&jctx->lock); +#ifndef compiletime_assert +#define compiletime_assert_defined +#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \ +while (false) +#endif + compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) == + BASE_JD_ATOM_COUNT, + "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); + compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) == + sizeof(user_atom.atom_number), + "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); +#ifdef compiletime_assert_defined +#undef compiletime_assert +#undef compiletime_assert_defined +#endif + katom = &jctx->atoms[user_atom.atom_number]; + + /* Record the flush ID for the cache flush optimisation */ + katom->flush_id = latest_flush; + + while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) { + /* Atom number is already in use, wait for the atom to + * complete + */ + mutex_unlock(&jctx->lock); + + /* This thread will wait for the atom to complete. Due + * to thread scheduling we are not sure that the other + * thread that owns the atom will also schedule the + * context, so we force the scheduler to be active and + * hence eventually schedule this context at some point + * later. + */ + kbase_js_sched_all(kbdev); + + if (wait_event_killable(katom->completed, + katom->status == + KBASE_JD_ATOM_STATE_UNUSED) != 0) { + /* We're being killed so the result code + * doesn't really matter + */ + return 0; + } + mutex_lock(&jctx->lock); + } + + need_to_try_schedule_context |= jd_submit_atom(kctx, &user_atom, + &user_jc_incr, katom); + + /* Register a completed job as a disjoint event when the GPU is in a disjoint state + * (ie. being reset). + */ + kbase_disjoint_event_potential(kbdev); + + mutex_unlock(&jctx->lock); + } + + if (need_to_try_schedule_context) + kbase_js_sched_all(kbdev); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_jd_submit); + +void kbase_jd_done_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); + struct kbase_jd_context *jctx; + struct kbase_context *kctx; + struct kbasep_js_kctx_info *js_kctx_info; + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + u64 cache_jc = katom->jc; + struct kbasep_js_atom_retained_state katom_retained_state; + bool context_idle; + base_jd_core_req core_req = katom->core_req; + + /* Soft jobs should never reach this function */ + KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); + + kctx = katom->kctx; + jctx = &kctx->jctx; + kbdev = kctx->kbdev; + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + + dev_dbg(kbdev->dev, "Enter atom %p done worker for kctx %p\n", + (void *)katom, (void *)kctx); + + KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); + + kbase_backend_complete_wq(kbdev, katom); + + /* + * Begin transaction on JD context and JS context + */ + mutex_lock(&jctx->lock); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_DONE); + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + + /* This worker only gets called on contexts that are scheduled *in*. This is + * because it only happens in response to an IRQ from a job that was + * running. + */ + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + if (katom->event_code == BASE_JD_EVENT_STOPPED) { + unsigned long flags; + + dev_dbg(kbdev->dev, "Atom %p has been promoted to stopped\n", + (void *)katom); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + katom->status = KBASE_JD_ATOM_STATE_IN_JS; + dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", + (void *)katom); + kbase_js_unpull(kctx, katom); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&jctx->lock); + + return; + } + + if ((katom->event_code != BASE_JD_EVENT_DONE) && + (!kbase_ctx_flag(katom->kctx, KCTX_DYING))) + dev_err(kbdev->dev, + "t6xx: GPU fault 0x%02lx from job slot %d\n", + (unsigned long)katom->event_code, + katom->slot_nr); + + /* Retain state before the katom disappears */ + kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); + + context_idle = kbase_js_complete_atom_wq(kctx, katom); + + KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state)); + + kbasep_js_remove_job(kbdev, kctx, katom); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; + /* jd_done_nolock() requires the jsctx_mutex lock to be dropped */ + jd_done_nolock(katom, &kctx->completed_jobs); + + /* katom may have been freed now, do not use! */ + + if (context_idle) { + unsigned long flags; + + context_idle = false; + mutex_lock(&js_devdata->queue_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* If kbase_sched() has scheduled this context back in then + * KCTX_ACTIVE will have been set after we marked it as + * inactive, and another pm reference will have been taken, so + * drop our reference. But do not call kbase_jm_idle_ctx(), as + * the context is active and fast-starting is allowed. + * + * If an atom has been fast-started then kctx->atoms_pulled will + * be non-zero but KCTX_ACTIVE will still be false (as the + * previous pm reference has been inherited). Do NOT drop our + * reference, as it has been re-used, and leave the context as + * active. + * + * If no new atoms have been started then KCTX_ACTIVE will still + * be false and atoms_pulled will be zero, so drop the reference + * and call kbase_jm_idle_ctx(). + * + * As the checks are done under both the queue_mutex and + * hwaccess_lock is should be impossible for this to race + * with the scheduler code. + */ + if (kbase_ctx_flag(kctx, KCTX_ACTIVE) || + !atomic_read(&kctx->atoms_pulled)) { + /* Calling kbase_jm_idle_ctx() here will ensure that + * atoms are not fast-started when we drop the + * hwaccess_lock. This is not performed if + * KCTX_ACTIVE is set as in that case another pm + * reference has been taken and a fast-start would be + * valid. + */ + if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) + kbase_jm_idle_ctx(kbdev, kctx); + context_idle = true; + } else { + kbase_ctx_flag_set(kctx, KCTX_ACTIVE); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&js_devdata->queue_mutex); + } + + /* + * Transaction complete + */ + mutex_unlock(&jctx->lock); + + /* Job is now no longer running, so can now safely release the context + * reference, and handle any actions that were logged against the atom's retained state */ + + kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state); + + kbase_js_sched_all(kbdev); + + if (!atomic_dec_return(&kctx->work_count)) { + /* If worker now idle then post all events that jd_done_nolock() + * has queued */ + mutex_lock(&jctx->lock); + while (!list_empty(&kctx->completed_jobs)) { + struct kbase_jd_atom *atom = list_entry( + kctx->completed_jobs.next, + struct kbase_jd_atom, jd_item); + list_del(kctx->completed_jobs.next); + + kbase_event_post(kctx, atom); + } + mutex_unlock(&jctx->lock); + } + + kbase_backend_complete_wq_post_sched(kbdev, core_req); + + if (context_idle) + kbase_pm_context_idle(kbdev); + + KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); + + dev_dbg(kbdev->dev, "Leave atom %p done worker for kctx %p\n", + (void *)katom, (void *)kctx); +} + +/** + * jd_cancel_worker - Work queue job cancel function. + * @data: a &struct work_struct + * + * Only called as part of 'Zapping' a context (which occurs on termination). + * Operates serially with the kbase_jd_done_worker() on the work queue. + * + * This can only be called on contexts that aren't scheduled. + * + * We don't need to release most of the resources that would occur on + * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be + * running (by virtue of only being called on contexts that aren't + * scheduled). + */ +static void jd_cancel_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); + struct kbase_jd_context *jctx; + struct kbase_context *kctx; + struct kbasep_js_kctx_info *js_kctx_info; + bool need_to_try_schedule_context; + bool attr_state_changed; + struct kbase_device *kbdev; + + /* Soft jobs should never reach this function */ + KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); + + kctx = katom->kctx; + kbdev = kctx->kbdev; + jctx = &kctx->jctx; + js_kctx_info = &kctx->jctx.sched_info; + + KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); + + /* This only gets called on contexts that are scheduled out. Hence, we must + * make sure we don't de-ref the number of running jobs (there aren't + * any), nor must we try to schedule out the context (it's already + * scheduled out). + */ + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + /* Scheduler: Remove the job from the system */ + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + mutex_lock(&jctx->lock); + + need_to_try_schedule_context = jd_done_nolock(katom, NULL); + /* Because we're zapping, we're not adding any more jobs to this ctx, so no need to + * schedule the context. There's also no need for the jsctx_mutex to have been taken + * around this too. */ + KBASE_DEBUG_ASSERT(!need_to_try_schedule_context); + + /* katom may have been freed now, do not use! */ + mutex_unlock(&jctx->lock); + + if (attr_state_changed) + kbase_js_sched_all(kbdev); +} + +/** + * kbase_jd_done - Complete a job that has been removed from the Hardware + * @katom: atom which has been completed + * @slot_nr: slot the atom was on + * @end_timestamp: completion time + * @done_code: completion code + * + * This must be used whenever a job has been removed from the Hardware, e.g.: + * An IRQ indicates that the job finished (for both error and 'done' codes), or + * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop. + * + * Some work is carried out immediately, and the rest is deferred onto a + * workqueue + * + * Context: + * This can be called safely from atomic context. + * The caller must hold kbdev->hwaccess_lock + */ +void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, + ktime_t *end_timestamp, kbasep_js_atom_done_code done_code) +{ + struct kbase_context *kctx; + struct kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(katom); + kctx = katom->kctx; + KBASE_DEBUG_ASSERT(kctx); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev); + + if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) + katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; + + KBASE_KTRACE_ADD_JM(kbdev, JD_DONE, kctx, katom, katom->jc, 0); + + kbase_job_check_leave_disjoint(kbdev, katom); + + katom->slot_nr = slot_nr; + + atomic_inc(&kctx->work_count); + +#ifdef CONFIG_DEBUG_FS + /* a failed job happened and is waiting for dumping*/ + if (!katom->will_fail_event_code && + kbase_debug_job_fault_process(katom, katom->event_code)) + return; +#endif + + WARN_ON(work_pending(&katom->work)); + INIT_WORK(&katom->work, kbase_jd_done_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + +KBASE_EXPORT_TEST_API(kbase_jd_done); + +void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(NULL != katom); + kctx = katom->kctx; + KBASE_DEBUG_ASSERT(NULL != kctx); + + dev_dbg(kbdev->dev, "JD: cancelling atom %p\n", (void *)katom); + KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); + + /* This should only be done from a context that is not scheduled */ + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + WARN_ON(work_pending(&katom->work)); + + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + INIT_WORK(&katom->work, jd_cancel_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + + +void kbase_jd_zap_context(struct kbase_context *kctx) +{ + struct kbase_jd_atom *katom; + struct list_head *entry, *tmp; + struct kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(kctx); + + kbdev = kctx->kbdev; + + KBASE_KTRACE_ADD_JM(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u); + + kbase_js_zap_context(kctx); + + mutex_lock(&kctx->jctx.lock); + + /* + * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are + * queued outside the job scheduler. + */ + + del_timer_sync(&kctx->soft_job_timeout); + list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { + katom = list_entry(entry, struct kbase_jd_atom, queue); + kbase_cancel_soft_job(katom); + } + + +#ifdef CONFIG_MALI_DMA_FENCE + kbase_dma_fence_cancel_all_atoms(kctx); +#endif + + mutex_unlock(&kctx->jctx.lock); + +#ifdef CONFIG_MALI_DMA_FENCE + /* Flush dma-fence workqueue to ensure that any callbacks that may have + * been queued are done before continuing. + */ + flush_workqueue(kctx->dma_fence.wq); +#endif + +#ifdef CONFIG_DEBUG_FS + kbase_debug_job_fault_kctx_unblock(kctx); +#endif + + kbase_jm_wait_for_zero_jobs(kctx); +} + +KBASE_EXPORT_TEST_API(kbase_jd_zap_context); + +int kbase_jd_init(struct kbase_context *kctx) +{ + int i; + int mali_err = 0; + + KBASE_DEBUG_ASSERT(kctx); + + kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (NULL == kctx->jctx.job_done_wq) { + mali_err = -ENOMEM; + goto out1; + } + + for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { + init_waitqueue_head(&kctx->jctx.atoms[i].completed); + + INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]); + INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]); + + /* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */ + kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID; + kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED; + +#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) + kctx->jctx.atoms[i].dma_fence.context = + dma_fence_context_alloc(1); + atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0); + INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks); +#endif + + /* MALI_SEC_INTEGRATION */ + spin_lock_init(&kctx->jctx.atoms[i].fence_lock); + } + + for (i = 0; i < BASE_JD_RP_COUNT; i++) + kctx->jctx.renderpasses[i].state = KBASE_JD_RP_COMPLETE; + + mutex_init(&kctx->jctx.lock); + + init_waitqueue_head(&kctx->jctx.zero_jobs_wait); + + spin_lock_init(&kctx->jctx.tb_lock); + + kctx->jctx.job_nr = 0; + INIT_LIST_HEAD(&kctx->completed_jobs); + atomic_set(&kctx->work_count, 0); + + return 0; + + out1: + return mali_err; +} + +KBASE_EXPORT_TEST_API(kbase_jd_init); + +void kbase_jd_exit(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx); + + /* Work queue is emptied by this */ + destroy_workqueue(kctx->jctx.job_done_wq); +} + +KBASE_EXPORT_TEST_API(kbase_jd_exit); diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/b_r26p0/mali_kbase_jd_debugfs.c new file mode 100644 index 000000000000..e9a161f4a518 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_jd_debugfs.c @@ -0,0 +1,244 @@ +/* + * + * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifdef CONFIG_DEBUG_FS + +#include +#include +#include +#include +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#include +#endif +#include + +struct kbase_jd_debugfs_depinfo { + u8 id; + char type; +}; + +static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, + struct seq_file *sfile) +{ +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + struct kbase_sync_fence_info info; + int res; + + switch (atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: + res = kbase_sync_fence_out_info_get(atom, &info); + if (res == 0) + seq_printf(sfile, "Sa([%p]%d) ", + info.fence, info.status); + break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: + res = kbase_sync_fence_in_info_get(atom, &info); + if (res == 0) + seq_printf(sfile, "Wa([%p]%d) ", + info.fence, info.status); + break; + default: + break; + } +#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ + +#ifdef CONFIG_MALI_DMA_FENCE + if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { + struct kbase_fence_cb *cb; + + if (atom->dma_fence.fence) { +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence = atom->dma_fence.fence; +#else + struct dma_fence *fence = atom->dma_fence.fence; +#endif + + seq_printf(sfile, +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) + "Sd(%u#%u: %s) ", +#else + "Sd(%llu#%u: %s) ", +#endif + fence->context, + fence->seqno, + dma_fence_is_signaled(fence) ? + "signaled" : "active"); + } + + list_for_each_entry(cb, &atom->dma_fence.callbacks, + node) { +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence = cb->fence; +#else + struct dma_fence *fence = cb->fence; +#endif + + seq_printf(sfile, +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) + "Wd(%u#%u: %s) ", +#else + "Wd(%llu#%u: %s) ", +#endif + fence->context, + fence->seqno, + dma_fence_is_signaled(fence) ? + "signaled" : "active"); + } + } +#endif /* CONFIG_MALI_DMA_FENCE */ + +} + +static void kbasep_jd_debugfs_atom_deps( + struct kbase_jd_debugfs_depinfo *deps, + struct kbase_jd_atom *atom) +{ + struct kbase_context *kctx = atom->kctx; + int i; + + for (i = 0; i < 2; i++) { + deps[i].id = (unsigned)(atom->dep[i].atom ? + kbase_jd_atom_id(kctx, atom->dep[i].atom) : 0); + + switch (atom->dep[i].dep_type) { + case BASE_JD_DEP_TYPE_INVALID: + deps[i].type = ' '; + break; + case BASE_JD_DEP_TYPE_DATA: + deps[i].type = 'D'; + break; + case BASE_JD_DEP_TYPE_ORDER: + deps[i].type = '>'; + break; + default: + deps[i].type = '?'; + break; + } + } +} +/** + * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file. + * @sfile: The debugfs entry + * @data: Data associated with the entry + * + * This function is called to get the contents of the JD atoms debugfs file. + * This is a report of all atoms managed by kbase_jd_context.atoms + * + * Return: 0 if successfully prints data in debugfs entry file, failure + * otherwise + */ +static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) +{ + struct kbase_context *kctx = sfile->private; + struct kbase_jd_atom *atoms; + unsigned long irq_flags; + int i; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + /* Print version */ + seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION); + + /* Print U/K API version */ + seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR, + BASE_UK_VERSION_MINOR); + + /* Print table heading */ + seq_puts(sfile, " ID, Core req, St, CR, Predeps, Start time, Additional info...\n"); + + atoms = kctx->jctx.atoms; + /* General atom states */ + mutex_lock(&kctx->jctx.lock); + /* JS-related states */ + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); + for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) { + struct kbase_jd_atom *atom = &atoms[i]; + s64 start_timestamp = 0; + struct kbase_jd_debugfs_depinfo deps[2]; + + if (atom->status == KBASE_JD_ATOM_STATE_UNUSED) + continue; + + /* start_timestamp is cleared as soon as the atom leaves UNUSED state + * and set before a job is submitted to the h/w, a non-zero value means + * it is valid */ + if (ktime_to_ns(atom->start_timestamp)) + start_timestamp = ktime_to_ns( + ktime_sub(ktime_get(), atom->start_timestamp)); + + kbasep_jd_debugfs_atom_deps(deps, atom); + + seq_printf(sfile, + "%3u, %8x, %2u, %c%3u %c%3u, %20lld, ", + i, atom->core_req, atom->status, + deps[0].type, deps[0].id, + deps[1].type, deps[1].id, + start_timestamp); + + + kbase_jd_debugfs_fence_info(atom, sfile); + + seq_puts(sfile, "\n"); + } + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); + mutex_unlock(&kctx->jctx.lock); + + return 0; +} + + +/** + * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file + * @in: &struct inode pointer + * @file: &struct file pointer + * + * Return: file descriptor + */ +static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private); +} + +static const struct file_operations kbasep_jd_debugfs_atoms_fops = { + .owner = THIS_MODULE, + .open = kbasep_jd_debugfs_atoms_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx) +{ + /* Caller already ensures this, but we keep the pattern for + * maintenance safety. + */ + if (WARN_ON(!kctx) || + WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + + /* Expose all atoms */ + debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx, + &kbasep_jd_debugfs_atoms_fops); + +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/b_r26p0/mali_kbase_jd_debugfs.h new file mode 100644 index 000000000000..697bdef4d434 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_jd_debugfs.h @@ -0,0 +1,45 @@ +/* + * + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_jd_debugfs.h + * Header file for job dispatcher-related entries in debugfs + */ + +#ifndef _KBASE_JD_DEBUGFS_H +#define _KBASE_JD_DEBUGFS_H + +#include + +#define MALI_JD_DEBUGFS_VERSION 3 + +/* Forward declarations */ +struct kbase_context; + +/** + * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system + * + * @kctx Pointer to kbase_context + */ +void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx); + +#endif /*_KBASE_JD_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_jm.c b/drivers/gpu/arm/b_r26p0/mali_kbase_jm.c new file mode 100644 index 000000000000..3f17dd763b97 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_jm.c @@ -0,0 +1,151 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * HW access job manager common APIs + */ + +#include +#include "mali_kbase_hwaccess_jm.h" +#include "mali_kbase_jm.h" + +/** + * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot + * @js on the active context. + * @kbdev: Device pointer + * @js: Job slot to run on + * @nr_jobs_to_submit: Number of jobs to attempt to submit + * + * Return: true if slot can still be submitted on, false if slot is now full. + */ +static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, + int nr_jobs_to_submit) +{ + struct kbase_context *kctx; + int i; + + kctx = kbdev->hwaccess.active_kctx[js]; + dev_dbg(kbdev->dev, + "Trying to run the next %d jobs in kctx %p (s:%d)\n", + nr_jobs_to_submit, (void *)kctx, js); + + if (!kctx) + return true; + + for (i = 0; i < nr_jobs_to_submit; i++) { + struct kbase_jd_atom *katom = kbase_js_pull(kctx, js); + + if (!katom) + return true; /* Context has no jobs on this slot */ + + kbase_backend_run_atom(kbdev, katom); + } + + dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%d)\n", js); + return false; +} + +u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) +{ + u32 ret_mask = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "JM kick slot mask 0x%x\n", js_mask); + + while (js_mask) { + int js = ffs(js_mask) - 1; + int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js); + + if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit)) + ret_mask |= (1 << js); + + js_mask &= ~(1 << js); + } + + dev_dbg(kbdev->dev, "Can still submit to mask 0x%x\n", ret_mask); + return ret_mask; +} + +void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!down_trylock(&js_devdata->schedule_sem)) { + kbase_jm_kick(kbdev, js_mask); + up(&js_devdata->schedule_sem); + } +} + +void kbase_jm_try_kick_all(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!down_trylock(&js_devdata->schedule_sem)) { + kbase_jm_kick_all(kbdev); + up(&js_devdata->schedule_sem); + } +} + +void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + int js; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { + if (kbdev->hwaccess.active_kctx[js] == kctx) { + dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + (void *)kctx, js); + kbdev->hwaccess.active_kctx[js] = NULL; + } + } +} + +struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + dev_dbg(kbdev->dev, "Atom %p is returning with event code 0x%x\n", + (void *)katom, katom->event_code); + + if (katom->event_code != BASE_JD_EVENT_STOPPED && + katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) { + return kbase_js_complete_atom(katom, NULL); + } else { + kbase_js_unpull(katom->kctx, katom); + return NULL; + } +} + +struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, ktime_t *end_timestamp) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return kbase_js_complete_atom(katom, end_timestamp); +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_jm.h b/drivers/gpu/arm/b_r26p0/mali_kbase_jm.h new file mode 100644 index 000000000000..a3c774483256 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_jm.h @@ -0,0 +1,115 @@ +/* + * + * (C) COPYRIGHT 2014, 2016, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * Job manager common APIs + */ + +#ifndef _KBASE_JM_H_ +#define _KBASE_JM_H_ + +/** + * kbase_jm_kick() - Indicate that there are jobs ready to run. + * @kbdev: Device pointer + * @js_mask: Mask of the job slots that can be pulled from. + * + * Caller must hold the hwaccess_lock and schedule_sem semaphore + * + * Return: Mask of the job slots that can still be submitted to. + */ +u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask); + +/** + * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job + * slots. + * @kbdev: Device pointer + * + * Caller must hold the hwaccess_lock and schedule_sem semaphore + * + * Return: Mask of the job slots that can still be submitted to. + */ +static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev) +{ + return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); +} + +/** + * kbase_jm_try_kick - Attempt to call kbase_jm_kick + * @kbdev: Device pointer + * @js_mask: Mask of the job slots that can be pulled from + * Context: Caller must hold hwaccess_lock + * + * If schedule_sem can be immediately obtained then this function will call + * kbase_jm_kick() otherwise it will do nothing. + */ +void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask); + +/** + * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all + * @kbdev: Device pointer + * Context: Caller must hold hwaccess_lock + * + * If schedule_sem can be immediately obtained then this function will call + * kbase_jm_kick_all() otherwise it will do nothing. + */ +void kbase_jm_try_kick_all(struct kbase_device *kbdev); + +/** + * kbase_jm_idle_ctx() - Mark a context as idle. + * @kbdev: Device pointer + * @kctx: Context to mark as idle + * + * No more atoms will be pulled from this context until it is marked as active + * by kbase_js_use_ctx(). + * + * The context should have no atoms currently pulled from it + * (kctx->atoms_pulled == 0). + * + * Caller must hold the hwaccess_lock + */ +void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has + * been soft-stopped or will fail due to a + * dependency + * @kbdev: Device pointer + * @katom: Atom that has been stopped or will be failed + * + * Return: Atom that has now been unblocked and can now be run, or NULL if none + */ +struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/** + * kbase_jm_complete() - Complete an atom + * @kbdev: Device pointer + * @katom: Atom that has completed + * @end_timestamp: Timestamp of atom completion + * + * Return: Atom that has now been unblocked and can now be run, or NULL if none + */ +struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, ktime_t *end_timestamp); + +#endif /* _KBASE_JM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_js.c b/drivers/gpu/arm/b_r26p0/mali_kbase_js.c new file mode 100644 index 000000000000..a5ae3ab9e77e --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_js.c @@ -0,0 +1,3784 @@ +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Job Scheduler Implementation + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mali_kbase_jm.h" +#include "mali_kbase_hwaccess_jm.h" + +/* + * Private types + */ + +/* Bitpattern indicating the result of releasing a context */ +enum { + /* The context was descheduled - caller should try scheduling in a new + * one to keep the runpool full */ + KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0), + /* Ctx attributes were changed - caller should try scheduling all + * contexts */ + KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1) +}; + +typedef u32 kbasep_js_release_result; + +const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = { + KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */ + KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */ + KBASE_JS_ATOM_SCHED_PRIO_LOW /* BASE_JD_PRIO_LOW */ +}; + +const base_jd_prio +kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = { + BASE_JD_PRIO_HIGH, /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */ + BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */ + BASE_JD_PRIO_LOW /* KBASE_JS_ATOM_SCHED_PRIO_LOW */ +}; + + +/* + * Private function prototypes + */ +static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( + struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state); + +static int kbase_js_get_slot(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, + kbasep_js_ctx_job_cb callback); + +/* Helper for ktrace */ +#if KBASE_KTRACE_ENABLE +static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) +{ + return atomic_read(&kctx->refcount); +} +#else /* KBASE_KTRACE_ENABLE */ +static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) +{ + CSTD_UNUSED(kctx); + return 0; +} +#endif /* KBASE_KTRACE_ENABLE */ + +/* + * Private functions + */ + +/** + * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements + * @features: JSn_FEATURE register value + * + * Given a JSn_FEATURE register value returns the core requirements that match + * + * Return: Core requirement bit mask + */ +static base_jd_core_req core_reqs_from_jsn_features(u16 features) +{ + base_jd_core_req core_req = 0u; + + if ((features & JS_FEATURE_SET_VALUE_JOB) != 0) + core_req |= BASE_JD_REQ_V; + + if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0) + core_req |= BASE_JD_REQ_CF; + + if ((features & JS_FEATURE_COMPUTE_JOB) != 0) + core_req |= BASE_JD_REQ_CS; + + if ((features & JS_FEATURE_TILER_JOB) != 0) + core_req |= BASE_JD_REQ_T; + + if ((features & JS_FEATURE_FRAGMENT_JOB) != 0) + core_req |= BASE_JD_REQ_FS; + + return core_req; +} + +static void kbase_js_sync_timers(struct kbase_device *kbdev) +{ + mutex_lock(&kbdev->js_data.runpool_mutex); + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&kbdev->js_data.runpool_mutex); +} + +/** + * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * @prio: Priority to check. + * + * Return true if there are no atoms to pull. There may be running atoms in the + * ring buffer even if there are no atoms to pull. It is also possible for the + * ring buffer to be full (with running atoms) when this functions returns + * true. + * + * Return: true if there are no atoms to pull, false otherwise. + */ +static inline bool +jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) +{ + bool none_to_pull; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree); + + dev_dbg(kctx->kbdev->dev, + "Slot %d (prio %d) is %spullable in kctx %p\n", + js, prio, none_to_pull ? "not " : "", kctx); + + return none_to_pull; +} + +/** + * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no + * pullable atoms + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * + * Caller must hold hwaccess_lock + * + * Return: true if the ring buffers for all priorities have no pullable atoms, + * false otherwise. + */ +static inline bool +jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) +{ + int prio; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) + return false; + } + + return true; +} + +/** + * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue. + * @kctx: Pointer to kbase context with the queue. + * @js: Job slot id to iterate. + * @prio: Priority id to iterate. + * @callback: Function pointer to callback. + * + * Iterate over a queue and invoke @callback for each entry in the queue, and + * remove the entry from the queue. + * + * If entries are added to the queue while this is running those entries may, or + * may not be covered. To ensure that all entries in the buffer have been + * enumerated when this function returns jsctx->lock must be held when calling + * this function. + * + * The HW access lock must always be held when calling this function. + */ +static void +jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, + kbasep_js_ctx_job_cb callback) +{ + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + while (!RB_EMPTY_ROOT(&queue->runnable_tree)) { + struct rb_node *node = rb_first(&queue->runnable_tree); + struct kbase_jd_atom *entry = rb_entry(node, + struct kbase_jd_atom, runnable_tree_node); + + rb_erase(node, &queue->runnable_tree); + callback(kctx->kbdev, entry); + + /* Runnable end-of-renderpass atoms can also be in the linked + * list of atoms blocked on cross-slot dependencies. Remove them + * to avoid calling the callback twice. + */ + if (entry->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) { + WARN_ON(!(entry->core_req & + BASE_JD_REQ_END_RENDERPASS)); + dev_dbg(kctx->kbdev->dev, + "Del runnable atom %p from X_DEP list\n", + (void *)entry); + + list_del(&entry->queue); + entry->atom_flags &= + ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + } + } + + while (!list_empty(&queue->x_dep_head)) { + struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next, + struct kbase_jd_atom, queue); + + WARN_ON(!(entry->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); + dev_dbg(kctx->kbdev->dev, + "Del blocked atom %p from X_DEP list\n", + (void *)entry); + + list_del(queue->x_dep_head.next); + entry->atom_flags &= + ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + + callback(kctx->kbdev, entry); + } +} + +/** + * jsctx_queue_foreach(): - Execute callback for each entry in every queue + * @kctx: Pointer to kbase context with queue. + * @js: Job slot id to iterate. + * @callback: Function pointer to callback. + * + * Iterate over all the different priorities, and for each call + * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback + * for each entry, and remove the entry from the queue. + */ +static inline void +jsctx_queue_foreach(struct kbase_context *kctx, int js, + kbasep_js_ctx_job_cb callback) +{ + int prio; + + for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) + jsctx_queue_foreach_prio(kctx, js, prio, callback); +} + +/** + * jsctx_rb_peek_prio(): - Check buffer and get next atom + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * @prio: Priority id to check. + * + * Check the ring buffer for the specified @js and @prio and return a pointer to + * the next atom, unless the ring buffer is empty. + * + * Return: Pointer to next atom in buffer, or NULL if there is no atom. + */ +static inline struct kbase_jd_atom * +jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) +{ + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + struct rb_node *node; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + dev_dbg(kctx->kbdev->dev, + "Peeking runnable tree of kctx %p for prio %d (s:%d)\n", + (void *)kctx, prio, js); + + node = rb_first(&rb->runnable_tree); + if (!node) { + dev_dbg(kctx->kbdev->dev, "Tree is empty\n"); + return NULL; + } + + return rb_entry(node, struct kbase_jd_atom, runnable_tree_node); +} + +/** + * jsctx_rb_peek(): - Check all priority buffers and get next atom + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * + * Check the ring buffers for all priorities, starting from + * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a + * pointer to the next atom, unless all the priority's ring buffers are empty. + * + * Caller must hold the hwaccess_lock. + * + * Return: Pointer to next atom in buffer, or NULL if there is no atom. + */ +static inline struct kbase_jd_atom * +jsctx_rb_peek(struct kbase_context *kctx, int js) +{ + int prio; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + struct kbase_jd_atom *katom; + + katom = jsctx_rb_peek_prio(kctx, js, prio); + if (katom) + return katom; + } + + return NULL; +} + +/** + * jsctx_rb_pull(): - Mark atom in list as running + * @kctx: Pointer to kbase context with ring buffer. + * @katom: Pointer to katom to pull. + * + * Mark an atom previously obtained from jsctx_rb_peek() as running. + * + * @katom must currently be at the head of the ring buffer. + */ +static inline void +jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + dev_dbg(kctx->kbdev->dev, "Erasing atom %p from runnable tree of kctx %p\n", + (void *)katom, (void *)kctx); + + /* Atoms must be pulled in the correct order. */ + WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); + + rb_erase(&katom->runnable_tree_node, &rb->runnable_tree); +} + +#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0) + +static void +jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + struct kbase_device *kbdev = kctx->kbdev; + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + dev_dbg(kbdev->dev, "Adding atom %p to runnable tree of kctx %p (s:%d)\n", + (void *)katom, (void *)kctx, js); + + while (*new) { + struct kbase_jd_atom *entry = container_of(*new, + struct kbase_jd_atom, runnable_tree_node); + + parent = *new; + if (LESS_THAN_WRAP(katom->age, entry->age)) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&katom->runnable_tree_node, parent, new); + rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree); + + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_READY); +} + +/** + * jsctx_rb_unpull(): - Undo marking of atom in list as running + * @kctx: Pointer to kbase context with ring buffer. + * @katom: Pointer to katom to unpull. + * + * Undo jsctx_rb_pull() and put @katom back in the queue. + * + * jsctx_rb_unpull() must be called on atoms in the same order the atoms were + * pulled. + */ +static inline void +jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + jsctx_tree_add(kctx, katom); +} + +static bool kbase_js_ctx_pullable(struct kbase_context *kctx, + int js, + bool is_scheduled); +static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js); +static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js); + +/* + * Functions private to KBase ('Protected' functions) + */ +int kbasep_js_devdata_init(struct kbase_device * const kbdev) +{ + struct kbasep_js_device_data *jsdd; + int i, j; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + jsdd = &kbdev->js_data; + +#ifdef CONFIG_MALI_DEBUG + /* Soft-stop will be disabled on a single context by default unless + * softstop_always is set */ + jsdd->softstop_always = false; +#endif /* CONFIG_MALI_DEBUG */ + jsdd->nr_all_contexts_running = 0; + jsdd->nr_user_contexts_running = 0; + jsdd->nr_contexts_pullable = 0; + atomic_set(&jsdd->nr_contexts_runnable, 0); + /* No ctx allowed to submit */ + jsdd->runpool_irq.submit_allowed = 0u; + memset(jsdd->runpool_irq.ctx_attr_ref_count, 0, + sizeof(jsdd->runpool_irq.ctx_attr_ref_count)); + memset(jsdd->runpool_irq.slot_affinities, 0, + sizeof(jsdd->runpool_irq.slot_affinities)); + memset(jsdd->runpool_irq.slot_affinity_refcount, 0, + sizeof(jsdd->runpool_irq.slot_affinity_refcount)); + INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list); + + /* Config attributes */ + jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS; + jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS; + jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL; + jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS; + jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL; + jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING; + jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS; + jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL; + jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING; + jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS; + atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT); + + dev_dbg(kbdev->dev, "JS Config Attribs: "); + dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", + jsdd->scheduling_period_ns); + dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u", + jsdd->soft_stop_ticks); + dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u", + jsdd->soft_stop_ticks_cl); + dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u", + jsdd->hard_stop_ticks_ss); + dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u", + jsdd->hard_stop_ticks_cl); + dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u", + jsdd->hard_stop_ticks_dumping); + dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u", + jsdd->gpu_reset_ticks_ss); + dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u", + jsdd->gpu_reset_ticks_cl); + dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u", + jsdd->gpu_reset_ticks_dumping); + dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u", + jsdd->ctx_timeslice_ns); + dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i", + atomic_read(&jsdd->soft_job_timeout_ms)); + + if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss && + jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && + jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping && + jsdd->hard_stop_ticks_dumping < + jsdd->gpu_reset_ticks_dumping)) { + dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n"); + return -EINVAL; + } + +#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS + dev_dbg(kbdev->dev, "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.", + jsdd->soft_stop_ticks, + jsdd->scheduling_period_ns); +#endif +#if KBASE_DISABLE_SCHEDULING_HARD_STOPS + dev_dbg(kbdev->dev, "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.", + jsdd->hard_stop_ticks_ss, + jsdd->hard_stop_ticks_dumping, + jsdd->scheduling_period_ns); +#endif +#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS + dev_dbg(kbdev->dev, "Note: The JS tick timer (if coded) will still be run, but do nothing."); +#endif + + for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) + jsdd->js_reqs[i] = core_reqs_from_jsn_features( + kbdev->gpu_props.props.raw_props.js_features[i]); + + /* On error, we could continue on: providing none of the below resources + * rely on the ones above */ + + mutex_init(&jsdd->runpool_mutex); + mutex_init(&jsdd->queue_mutex); + sema_init(&jsdd->schedule_sem, 1); + + for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) { + for (j = 0; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) { + INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]); + INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]); + } + } + + return 0; +} + +void kbasep_js_devdata_halt(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +void kbasep_js_devdata_term(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata; + s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, }; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + js_devdata = &kbdev->js_data; + + /* The caller must de-register all contexts before calling this + */ + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0); + KBASE_DEBUG_ASSERT(memcmp( + js_devdata->runpool_irq.ctx_attr_ref_count, + zero_ctx_attr_ref_count, + sizeof(zero_ctx_attr_ref_count)) == 0); + CSTD_UNUSED(zero_ctx_attr_ref_count); +} + +int kbasep_js_kctx_init(struct kbase_context *const kctx) +{ + struct kbase_device *kbdev; + struct kbasep_js_kctx_info *js_kctx_info; + int i, j; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) + INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]); + + js_kctx_info = &kctx->jctx.sched_info; + + kctx->slots_pullable = 0; + js_kctx_info->ctx.nr_jobs = 0; + kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); + kbase_ctx_flag_clear(kctx, KCTX_DYING); + memset(js_kctx_info->ctx.ctx_attr_ref_count, 0, + sizeof(js_kctx_info->ctx.ctx_attr_ref_count)); + + /* Initially, the context is disabled from submission until the create + * flags are set */ + kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED); + + /* On error, we could continue on: providing none of the below resources + * rely on the ones above */ + mutex_init(&js_kctx_info->ctx.jsctx_mutex); + + init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait); + + for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { + INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head); + kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT; + } + } + + return 0; +} + +void kbasep_js_kctx_term(struct kbase_context *kctx) +{ + struct kbase_device *kbdev; + struct kbasep_js_kctx_info *js_kctx_info; + int js; + bool update_ctx_count = false; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + js_kctx_info = &kctx->jctx.sched_info; + + /* The caller must de-register all jobs before calling this */ + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0); + + mutex_lock(&kbdev->js_data.queue_mutex); + mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) { + WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0); + atomic_dec(&kbdev->js_data.nr_contexts_runnable); + update_ctx_count = true; + kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); + } + + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&kbdev->js_data.queue_mutex); + + if (update_ctx_count) { + mutex_lock(&kbdev->js_data.runpool_mutex); + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&kbdev->js_data.runpool_mutex); + } +} + +/** + * kbase_js_ctx_list_add_pullable_nolock - Variant of + * kbase_jd_ctx_list_add_pullable() + * where the caller must hold + * hwaccess_lock + * @kbdev: Device pointer + * @kctx: Context to add to queue + * @js: Job slot to use + * + * Caller must hold hwaccess_lock + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + bool ret = false; + + lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Add pullable tail kctx %p (s:%d)\n", + (void *)kctx, js); + + if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + + list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); + + if (!kctx->slots_pullable) { + kbdev->js_data.nr_contexts_pullable++; + ret = true; + if (!atomic_read(&kctx->atoms_pulled)) { + WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); + atomic_inc(&kbdev->js_data.nr_contexts_runnable); + } + } + kctx->slots_pullable |= (1 << js); + + return ret; +} + +/** + * kbase_js_ctx_list_add_pullable_head_nolock - Variant of + * kbase_js_ctx_list_add_pullable_head() + * where the caller must hold + * hwaccess_lock + * @kbdev: Device pointer + * @kctx: Context to add to queue + * @js: Job slot to use + * + * Caller must hold hwaccess_lock + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_add_pullable_head_nolock( + struct kbase_device *kbdev, struct kbase_context *kctx, int js) +{ + bool ret = false; + + lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Add pullable head kctx %p (s:%d)\n", + (void *)kctx, js); + + if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + + list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); + + if (!kctx->slots_pullable) { + kbdev->js_data.nr_contexts_pullable++; + ret = true; + if (!atomic_read(&kctx->atoms_pulled)) { + WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); + atomic_inc(&kbdev->js_data.nr_contexts_runnable); + } + } + kctx->slots_pullable |= (1 << js); + + return ret; +} + +/** + * kbase_js_ctx_list_add_pullable_head - Add context to the head of the + * per-slot pullable context queue + * @kbdev: Device pointer + * @kctx: Context to add to queue + * @js: Job slot to use + * + * If the context is on either the pullable or unpullable queues, then it is + * removed before being added to the head. + * + * This function should be used when a context has been scheduled, but no jobs + * can currently be pulled from it. + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} + +/** + * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the + * per-slot unpullable context queue + * @kbdev: Device pointer + * @kctx: Context to add to queue + * @js: Job slot to use + * + * The context must already be on the per-slot pullable queue. It will be + * removed from the pullable queue before being added to the unpullable queue. + * + * This function should be used when a context has been pulled from, and there + * are no jobs remaining on the specified slot. + * + * Caller must hold hwaccess_lock + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + bool ret = false; + + lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Add unpullable tail kctx %p (s:%d)\n", + (void *)kctx, js); + + list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + &kbdev->js_data.ctx_list_unpullable[js][kctx->priority]); + + if (kctx->slots_pullable == (1 << js)) { + kbdev->js_data.nr_contexts_pullable--; + ret = true; + if (!atomic_read(&kctx->atoms_pulled)) { + WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); + atomic_dec(&kbdev->js_data.nr_contexts_runnable); + } + } + kctx->slots_pullable &= ~(1 << js); + + return ret; +} + +/** + * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable + * or unpullable context queues + * @kbdev: Device pointer + * @kctx: Context to remove from queue + * @js: Job slot to use + * + * The context must already be on one of the queues. + * + * This function should be used when a context has no jobs on the GPU, and no + * jobs remaining for the specified slot. + * + * Caller must hold hwaccess_lock + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + bool ret = false; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])); + + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + + if (kctx->slots_pullable == (1 << js)) { + kbdev->js_data.nr_contexts_pullable--; + ret = true; + if (!atomic_read(&kctx->atoms_pulled)) { + WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); + atomic_dec(&kbdev->js_data.nr_contexts_runnable); + } + } + kctx->slots_pullable &= ~(1 << js); + + return ret; +} + +/** + * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head() + * where the caller must hold + * hwaccess_lock + * @kbdev: Device pointer + * @js: Job slot to use + * + * Caller must hold hwaccess_lock + * + * Return: Context to use for specified slot. + * NULL if no contexts present for specified slot + */ +static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( + struct kbase_device *kbdev, + int js) +{ + struct kbase_context *kctx; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i])) + continue; + + kctx = list_entry(kbdev->js_data.ctx_list_pullable[js][i].next, + struct kbase_context, + jctx.sched_info.ctx.ctx_list_entry[js]); + + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + dev_dbg(kbdev->dev, + "Popped %p from the pullable queue (s:%d)\n", + (void *)kctx, js); + return kctx; + } + return NULL; +} + +/** + * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable + * queue. + * @kbdev: Device pointer + * @js: Job slot to use + * + * Return: Context to use for specified slot. + * NULL if no contexts present for specified slot + */ +static struct kbase_context *kbase_js_ctx_list_pop_head( + struct kbase_device *kbdev, int js) +{ + struct kbase_context *kctx; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return kctx; +} + +/** + * kbase_js_ctx_pullable - Return if a context can be pulled from on the + * specified slot + * @kctx: Context pointer + * @js: Job slot to use + * @is_scheduled: true if the context is currently scheduled + * + * Caller must hold hwaccess_lock + * + * Return: true if context can be pulled from on specified slot + * false otherwise + */ +static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, + bool is_scheduled) +{ + struct kbasep_js_device_data *js_devdata; + struct kbase_jd_atom *katom; + struct kbase_device *kbdev = kctx->kbdev; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + js_devdata = &kbdev->js_data; + + if (is_scheduled) { + if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", + (void *)kctx); + return false; + } + } + katom = jsctx_rb_peek(kctx, js); + if (!katom) { + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", + (void *)kctx, js); + return false; /* No pullable atoms */ + } + if (kctx->blocked_js[js][katom->sched_priority]) { + dev_dbg(kbdev->dev, + "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", + (void *)kctx, katom->sched_priority, js); + return false; + } + if (atomic_read(&katom->blocked)) { + dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_ctx_pullable\n", + (void *)katom); + return false; /* next atom blocked */ + } + if (kbase_js_atom_blocked_on_x_dep(katom)) { + if (katom->x_pre_dep->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) { + dev_dbg(kbdev->dev, + "JS: X pre-dep %p is not present in slot FIFO or will fail\n", + (void *)katom->x_pre_dep); + return false; + } + if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && + kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) { + dev_dbg(kbdev->dev, + "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", + (void *)katom, js); + return false; + } + } + + dev_dbg(kbdev->dev, "JS: Atom %p is pullable in kctx %p (s:%d)\n", + (void *)katom, (void *)kctx, js); + + return true; +} + +static bool kbase_js_dep_validate(struct kbase_context *kctx, + struct kbase_jd_atom *katom) +{ + struct kbase_device *kbdev = kctx->kbdev; + bool ret = true; + bool has_dep = false, has_x_dep = false; + int js = kbase_js_get_slot(kbdev, katom); + int prio = katom->sched_priority; + int i; + + for (i = 0; i < 2; i++) { + struct kbase_jd_atom *dep_atom = katom->dep[i].atom; + + if (dep_atom) { + int dep_js = kbase_js_get_slot(kbdev, dep_atom); + int dep_prio = dep_atom->sched_priority; + + dev_dbg(kbdev->dev, + "Checking dep %d of atom %p (s:%d) on %p (s:%d)\n", + i, (void *)katom, js, (void *)dep_atom, dep_js); + + /* Dependent atom must already have been submitted */ + if (!(dep_atom->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { + dev_dbg(kbdev->dev, + "Blocker not submitted yet\n"); + ret = false; + break; + } + + /* Dependencies with different priorities can't + be represented in the ringbuffer */ + if (prio != dep_prio) { + dev_dbg(kbdev->dev, + "Different atom priorities\n"); + ret = false; + break; + } + + if (js == dep_js) { + /* Only one same-slot dependency can be + * represented in the ringbuffer */ + if (has_dep) { + dev_dbg(kbdev->dev, + "Too many same-slot deps\n"); + ret = false; + break; + } + /* Each dependee atom can only have one + * same-slot dependency */ + if (dep_atom->post_dep) { + dev_dbg(kbdev->dev, + "Too many same-slot successors\n"); + ret = false; + break; + } + has_dep = true; + } else { + /* Only one cross-slot dependency can be + * represented in the ringbuffer */ + if (has_x_dep) { + dev_dbg(kbdev->dev, + "Too many cross-slot deps\n"); + ret = false; + break; + } + /* Each dependee atom can only have one + * cross-slot dependency */ + if (dep_atom->x_post_dep) { + dev_dbg(kbdev->dev, + "Too many cross-slot successors\n"); + ret = false; + break; + } + /* The dependee atom can not already be in the + * HW access ringbuffer */ + if (dep_atom->gpu_rb_state != + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + dev_dbg(kbdev->dev, + "Blocker already in ringbuffer (state:%d)\n", + dep_atom->gpu_rb_state); + ret = false; + break; + } + /* The dependee atom can not already have + * completed */ + if (dep_atom->status != + KBASE_JD_ATOM_STATE_IN_JS) { + dev_dbg(kbdev->dev, + "Blocker already completed (status:%d)\n", + dep_atom->status); + ret = false; + break; + } + + has_x_dep = true; + } + + /* Dependency can be represented in ringbuffers */ + } + } + + /* If dependencies can be represented by ringbuffer then clear them from + * atom structure */ + if (ret) { + for (i = 0; i < 2; i++) { + struct kbase_jd_atom *dep_atom = katom->dep[i].atom; + + if (dep_atom) { + int dep_js = kbase_js_get_slot(kbdev, dep_atom); + + dev_dbg(kbdev->dev, + "Clearing dep %d of atom %p (s:%d) on %p (s:%d)\n", + i, (void *)katom, js, (void *)dep_atom, + dep_js); + + if ((js != dep_js) && + (dep_atom->status != + KBASE_JD_ATOM_STATE_COMPLETED) + && (dep_atom->status != + KBASE_JD_ATOM_STATE_HW_COMPLETED) + && (dep_atom->status != + KBASE_JD_ATOM_STATE_UNUSED)) { + + katom->atom_flags |= + KBASE_KATOM_FLAG_X_DEP_BLOCKED; + + dev_dbg(kbdev->dev, "Set X_DEP flag on atom %p\n", + (void *)katom); + + katom->x_pre_dep = dep_atom; + dep_atom->x_post_dep = katom; + if (kbase_jd_katom_dep_type( + &katom->dep[i]) == + BASE_JD_DEP_TYPE_DATA) + katom->atom_flags |= + KBASE_KATOM_FLAG_FAIL_BLOCKER; + } + if ((kbase_jd_katom_dep_type(&katom->dep[i]) + == BASE_JD_DEP_TYPE_DATA) && + (js == dep_js)) { + katom->pre_dep = dep_atom; + dep_atom->post_dep = katom; + } + + list_del(&katom->dep_item[i]); + kbase_jd_katom_dep_clear(&katom->dep[i]); + } + } + } else { + dev_dbg(kbdev->dev, + "Deps of atom %p (s:%d) could not be represented\n", + (void *)katom, js); + } + + return ret; +} + +void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority) +{ + struct kbase_device *kbdev = kctx->kbdev; + int js; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Move kctx to the pullable/upullable list as per the new priority */ + if (new_priority != kctx->priority) { + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + if (kctx->slots_pullable & (1 << js)) + list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + &kbdev->js_data.ctx_list_pullable[js][new_priority]); + else + list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + &kbdev->js_data.ctx_list_unpullable[js][new_priority]); + } + + kctx->priority = new_priority; + } +} + +void kbase_js_update_ctx_priority(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + int new_priority = KBASE_JS_ATOM_SCHED_PRIO_LOW; + int prio; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbdev->js_ctx_scheduling_mode == KBASE_JS_SYSTEM_PRIORITY_MODE) { + /* Determine the new priority for context, as per the priority + * of currently in-use atoms. + */ + for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + if (kctx->atoms_count[prio]) { + new_priority = prio; + break; + } + } + } + + kbase_js_set_ctx_priority(kctx, new_priority); +} + +/** + * js_add_start_rp() - Add an atom that starts a renderpass to the job scheduler + * @start_katom: Pointer to the atom to be added. + * Return: 0 if successful or a negative value on failure. + */ +static int js_add_start_rp(struct kbase_jd_atom *const start_katom) +{ + struct kbase_context *const kctx = start_katom->kctx; + struct kbase_jd_renderpass *rp; + struct kbase_device *const kbdev = kctx->kbdev; + unsigned long flags; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) + return -EINVAL; + + if (start_katom->core_req & BASE_JD_REQ_END_RENDERPASS) + return -EINVAL; + + compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; + + if (rp->state != KBASE_JD_RP_COMPLETE) + return -EINVAL; + + dev_dbg(kctx->kbdev->dev, "JS add start atom %p of RP %d\n", + (void *)start_katom, start_katom->renderpass_id); + + /* The following members are read when updating the job slot + * ringbuffer/fifo therefore they require additional locking. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + rp->state = KBASE_JD_RP_START; + rp->start_katom = start_katom; + rp->end_katom = NULL; + INIT_LIST_HEAD(&rp->oom_reg_list); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return 0; +} + +/** + * js_add_end_rp() - Add an atom that ends a renderpass to the job scheduler + * @end_katom: Pointer to the atom to be added. + * Return: 0 if successful or a negative value on failure. + */ +static int js_add_end_rp(struct kbase_jd_atom *const end_katom) +{ + struct kbase_context *const kctx = end_katom->kctx; + struct kbase_jd_renderpass *rp; + struct kbase_device *const kbdev = kctx->kbdev; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return -EINVAL; + + if (end_katom->core_req & BASE_JD_REQ_START_RENDERPASS) + return -EINVAL; + + compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + dev_dbg(kbdev->dev, "JS add end atom %p in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + + if (rp->state == KBASE_JD_RP_COMPLETE) + return -EINVAL; + + if (rp->end_katom == NULL) { + /* We can't be in a retry state until the fragment job chain + * has completed. + */ + unsigned long flags; + + WARN_ON(rp->state == KBASE_JD_RP_RETRY); + WARN_ON(rp->state == KBASE_JD_RP_RETRY_PEND_OOM); + WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + rp->end_katom = end_katom; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else + WARN_ON(rp->end_katom != end_katom); + + return 0; +} + +bool kbasep_js_add_job(struct kbase_context *kctx, + struct kbase_jd_atom *atom) +{ + unsigned long flags; + struct kbasep_js_kctx_info *js_kctx_info; + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + int err = 0; + + bool enqueue_required = false; + bool timer_sync = false; + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(atom != NULL); + lockdep_assert_held(&kctx->jctx.lock); + + kbdev = kctx->kbdev; + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + + if (atom->core_req & BASE_JD_REQ_START_RENDERPASS) + err = js_add_start_rp(atom); + else if (atom->core_req & BASE_JD_REQ_END_RENDERPASS) + err = js_add_end_rp(atom); + + if (err < 0) { + atom->event_code = BASE_JD_EVENT_JOB_INVALID; + atom->status = KBASE_JD_ATOM_STATE_COMPLETED; + goto out_unlock; + } + + /* + * Begin Runpool transaction + */ + mutex_lock(&js_devdata->runpool_mutex); + + /* Refcount ctx.nr_jobs */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); + ++(js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, "Add atom %p to kctx %p; now %d in ctx\n", + (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); + + /* Lock for state available during IRQ */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (++kctx->atoms_count[atom->sched_priority] == 1) + kbase_js_update_ctx_priority(kctx); + + if (!kbase_js_dep_validate(kctx, atom)) { + /* Dependencies could not be represented */ + --(js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, + "Remove atom %p from kctx %p; now %d in ctx\n", + (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); + + /* Setting atom status back to queued as it still has unresolved + * dependencies */ + atom->status = KBASE_JD_ATOM_STATE_QUEUED; + dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)atom); + + /* Undo the count, as the atom will get added again later but + * leave the context priority adjusted or boosted, in case if + * this was the first higher priority atom received for this + * context. + * This will prevent the scenario of priority inversion, where + * another context having medium priority atoms keeps getting + * scheduled over this context, which is having both lower and + * higher priority atoms, but higher priority atoms are blocked + * due to dependency on lower priority atoms. With priority + * boost the high priority atom will get to run at earliest. + */ + kctx->atoms_count[atom->sched_priority]--; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&js_devdata->runpool_mutex); + + goto out_unlock; + } + + enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); + + KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, + kbase_ktrace_get_ctx_refcnt(kctx)); + + /* Context Attribute Refcounting */ + kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom); + + if (enqueue_required) { + if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false)) + timer_sync = kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, atom->slot_nr); + else + timer_sync = kbase_js_ctx_list_add_unpullable_nolock( + kbdev, kctx, atom->slot_nr); + } + /* If this context is active and the atom is the first on its slot, + * kick the job manager to attempt to fast-start the atom */ + if (enqueue_required && kctx == + kbdev->hwaccess.active_kctx[atom->slot_nr]) + kbase_jm_try_kick(kbdev, 1 << atom->slot_nr); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + if (timer_sync) + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&js_devdata->runpool_mutex); + /* End runpool transaction */ + + if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { + if (kbase_ctx_flag(kctx, KCTX_DYING)) { + /* A job got added while/after kbase_job_zap_context() + * was called on a non-scheduled context. Kill that job + * by killing the context. */ + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, + false); + } else if (js_kctx_info->ctx.nr_jobs == 1) { + /* Handle Refcount going from 0 to 1: schedule the + * context on the Queue */ + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx); + + /* Queue was updated - caller must try to + * schedule the head context */ + WARN_ON(!enqueue_required); + } + } +out_unlock: + dev_dbg(kbdev->dev, "Enqueue of kctx %p is %srequired\n", + kctx, enqueue_required ? "" : "not "); + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + mutex_unlock(&js_devdata->queue_mutex); + + return enqueue_required; +} + +void kbasep_js_remove_job(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_jd_atom *atom) +{ + struct kbasep_js_kctx_info *js_kctx_info; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(atom != NULL); + + js_kctx_info = &kctx->jctx.sched_info; + + KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc, + kbase_ktrace_get_ctx_refcnt(kctx)); + + /* De-refcount ctx.nr_jobs */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); + --(js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, + "Remove atom %p from kctx %p; now %d in ctx\n", + (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (--kctx->atoms_count[atom->sched_priority] == 0) + kbase_js_update_ctx_priority(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + unsigned long flags; + struct kbasep_js_atom_retained_state katom_retained_state; + bool attr_state_changed; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + + kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); + kbasep_js_remove_job(kbdev, kctx, katom); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* The atom has 'finished' (will not be re-run), so no need to call + * kbasep_js_has_atom_finished(). + * + * This is because it returns false for soft-stopped atoms, but we + * want to override that, because we're cancelling an atom regardless of + * whether it was soft-stopped or not */ + attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, + &katom_retained_state); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return attr_state_changed; +} + +/** + * kbasep_js_run_jobs_after_ctx_and_atom_release - Try running more jobs after + * releasing a context and/or atom + * @kbdev: The kbase_device to operate on + * @kctx: The kbase_context to operate on + * @katom_retained_state: Retained state from the atom + * @runpool_ctx_attr_change: True if the runpool context attributes have changed + * + * This collates a set of actions that must happen whilst hwaccess_lock is held. + * + * This includes running more jobs when: + * - The previously released kctx caused a ctx attribute change, + * - The released atom caused a ctx attribute change, + * - Slots were previously blocked due to affinity restrictions, + * - Submission during IRQ handling failed. + * + * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were + * changed. The caller should try scheduling all contexts + */ +static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( + struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state, + bool runpool_ctx_attr_change) +{ + struct kbasep_js_device_data *js_devdata; + kbasep_js_release_result result = 0; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(katom_retained_state != NULL); + js_devdata = &kbdev->js_data; + + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (js_devdata->nr_user_contexts_running != 0 && runpool_ctx_attr_change) { + /* A change in runpool ctx attributes might mean we can + * run more jobs than before */ + result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL; + + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, + kctx, NULL, 0u, 0); + } + return result; +} + +/** + * kbasep_js_runpool_release_ctx_internal - Internal function to release the reference + * on a ctx and an atom's "retained state", only + * taking the runpool and as transaction mutexes + * @kbdev: The kbase_device to operate on + * @kctx: The kbase_context to operate on + * @katom_retained_state: Retained state from the atom + * + * This also starts more jobs running in the case of an ctx-attribute state change + * + * This does none of the followup actions for scheduling: + * - It does not schedule in a new context + * - It does not requeue or handle dying contexts + * + * For those tasks, just call kbasep_js_runpool_release_ctx() instead + * + * Has following requirements + * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr + * - Context has a non-zero refcount + * - Caller holds js_kctx_info->ctx.jsctx_mutex + * - Caller holds js_devdata->runpool_mutex + * + * Return: A bitpattern, containing KBASEP_JS_RELEASE_RESULT_* flags, indicating + * the result of releasing a context that whether the caller should try + * scheduling a new context or should try scheduling all contexts. + */ +static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( + struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state) +{ + unsigned long flags; + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + + kbasep_js_release_result release_result = 0u; + bool runpool_ctx_attr_change = false; + int kctx_as_nr; + int new_ref_count; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + kctx_as_nr = kctx->as_nr; + KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); + + /* + * Transaction begins on AS and runpool_irq + * + * Assert about out calling contract + */ + mutex_lock(&kbdev->pm.lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr); + KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); + + /* Update refcount */ + kbase_ctx_sched_release_ctx(kctx); + new_ref_count = atomic_read(&kctx->refcount); + + /* Release the atom if it finished (i.e. wasn't soft-stopped) */ + if (kbasep_js_has_atom_finished(katom_retained_state)) + runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom( + kbdev, kctx, katom_retained_state); + + if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) && +#ifdef CONFIG_MALI_ARBITER_SUPPORT + !kbase_pm_is_gpu_lost(kbdev) && +#endif + !kbase_pm_is_suspending(kbdev)) { + /* Context is kept scheduled into an address space even when + * there are no jobs, in this case we have to handle the + * situation where all jobs have been evicted from the GPU and + * submission is disabled. + * + * At this point we re-enable submission to allow further jobs + * to be executed + */ + kbasep_js_set_submit_allowed(js_devdata, kctx); + } + + /* Make a set of checks to see if the context should be scheduled out. + * Note that there'll always be at least 1 reference to the context + * which was previously acquired by kbasep_js_schedule_ctx(). */ + if (new_ref_count == 1 && + (!kbasep_js_is_submit_allowed(js_devdata, kctx) || +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbase_pm_is_gpu_lost(kbdev) || +#endif + kbase_pm_is_suspending(kbdev))) { + int num_slots = kbdev->gpu_props.num_job_slots; + int slot; + + /* Last reference, and we've been told to remove this context + * from the Run Pool */ + dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d", + kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, + kbasep_js_is_submit_allowed(js_devdata, kctx)); + + KBASE_TLSTREAM_TL_NRET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx); + + kbase_backend_release_ctx_irq(kbdev, kctx); + + for (slot = 0; slot < num_slots; slot++) { + if (kbdev->hwaccess.active_kctx[slot] == kctx) { + dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + (void *)kctx, slot); + kbdev->hwaccess.active_kctx[slot] = NULL; + } + } + + /* Ctx Attribute handling + * + * Releasing atoms attributes must either happen before this, or + * after the KCTX_SHEDULED flag is changed, otherwise we + * double-decount the attributes + */ + runpool_ctx_attr_change |= + kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx); + + /* Releasing the context and katom retained state can allow + * more jobs to run */ + release_result |= + kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, + kctx, katom_retained_state, + runpool_ctx_attr_change); + + /* + * Transaction ends on AS and runpool_irq: + * + * By this point, the AS-related data is now clear and ready + * for re-use. + * + * Since releases only occur once for each previous successful + * retain, and no more retains are allowed on this context, no + * other thread will be operating in this + * code whilst we are + */ + + /* Recalculate pullable status for all slots */ + for (slot = 0; slot < num_slots; slot++) { + if (kbase_js_ctx_pullable(kctx, slot, false)) + kbase_js_ctx_list_add_pullable_nolock(kbdev, + kctx, slot); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_backend_release_ctx_noirq(kbdev, kctx); + + mutex_unlock(&kbdev->pm.lock); + + /* Note: Don't reuse kctx_as_nr now */ + + /* Synchronize with any timers */ + kbase_backend_ctx_count_changed(kbdev); + + /* update book-keeping info */ + kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); + /* Signal any waiter that the context is not scheduled, so is + * safe for termination - once the jsctx_mutex is also dropped, + * and jobs have finished. */ + wake_up(&js_kctx_info->ctx.is_scheduled_wait); + + /* Queue an action to occur after we've dropped the lock */ + release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED | + KBASEP_JS_RELEASE_RESULT_SCHED_ALL; + } else { + kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, + katom_retained_state, runpool_ctx_attr_change); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->pm.lock); + } + + return release_result; +} + +void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_atom_retained_state katom_retained_state; + + /* Setup a dummy katom_retained_state */ + kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); + + kbasep_js_runpool_release_ctx_internal(kbdev, kctx, + &katom_retained_state); +} + +void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, bool has_pm_ref) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + /* This is called if and only if you've you've detached the context from + * the Runpool Queue, and not added it back to the Runpool + */ + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + if (kbase_ctx_flag(kctx, KCTX_DYING)) { + /* Dying: don't requeue, but kill all jobs on the context. This + * happens asynchronously */ + dev_dbg(kbdev->dev, + "JS: ** Killing Context %p on RunPool Remove **", kctx); + kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel); + } +} + +void kbasep_js_runpool_release_ctx_and_katom_retained_state( + struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + kbasep_js_release_result release_result; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, + katom_retained_state); + + /* Drop the runpool mutex to allow requeing kctx */ + mutex_unlock(&js_devdata->runpool_mutex); + + if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); + + /* Drop the jsctx_mutex to allow scheduling in a new context */ + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + + if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL) + kbase_js_sched_all(kbdev); +} + +void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_atom_retained_state katom_retained_state; + + kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); + + kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, + &katom_retained_state); +} + +/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into + * kbase_js_sched_all() */ +static void kbasep_js_runpool_release_ctx_no_schedule( + struct kbase_device *kbdev, struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + kbasep_js_release_result release_result; + struct kbasep_js_atom_retained_state katom_retained_state_struct; + struct kbasep_js_atom_retained_state *katom_retained_state = + &katom_retained_state_struct; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + kbasep_js_atom_retained_state_init_invalid(katom_retained_state); + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, + katom_retained_state); + + /* Drop the runpool mutex to allow requeing kctx */ + mutex_unlock(&js_devdata->runpool_mutex); + if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); + + /* Drop the jsctx_mutex to allow scheduling in a new context */ + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + /* NOTE: could return release_result if the caller would like to know + * whether it should schedule a new context, but currently no callers do + */ +} + +void kbase_js_set_timeouts(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_backend_timeouts_changed(kbdev); +} + +static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + unsigned long flags; + bool kctx_suspended = false; + int as_nr; + + dev_dbg(kbdev->dev, "Scheduling kctx %p (s:%d)\n", kctx, js); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + /* Pick available address space for this context */ + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + as_nr = kbase_ctx_sched_retain_ctx(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + if (as_nr == KBASEP_AS_NR_INVALID) { + as_nr = kbase_backend_find_and_release_free_address_space( + kbdev, kctx); + if (as_nr != KBASEP_AS_NR_INVALID) { + /* Attempt to retain the context again, this should + * succeed */ + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + as_nr = kbase_ctx_sched_retain_ctx(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + WARN_ON(as_nr == KBASEP_AS_NR_INVALID); + } + } + if (as_nr == KBASEP_AS_NR_INVALID) + return false; /* No address spaces currently available */ + + /* + * Atomic transaction on the Context and Run Pool begins + */ + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* Check to see if context is dying due to kbase_job_zap_context() */ + if (kbase_ctx_flag(kctx, KCTX_DYING)) { + /* Roll back the transaction so far and return */ + kbase_ctx_sched_release_ctx(kctx); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return false; + } + + KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL, + 0u, + kbase_ktrace_get_ctx_refcnt(kctx)); + + kbase_ctx_flag_set(kctx, KCTX_SCHEDULED); + + /* Assign context to previously chosen address space */ + if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) { + /* Roll back the transaction so far and return */ + kbase_ctx_sched_release_ctx(kctx); + kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return false; + } + + kbdev->hwaccess.active_kctx[js] = kctx; + + KBASE_TLSTREAM_TL_RET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx); + + /* Cause any future waiter-on-termination to wait until the context is + * descheduled */ + wake_up(&js_kctx_info->ctx.is_scheduled_wait); + + /* Re-check for suspending: a suspend could've occurred, and all the + * contexts could've been removed from the runpool before we took this + * lock. In this case, we don't want to allow this context to run jobs, + * we just want it out immediately. + * + * The DMB required to read the suspend flag was issued recently as part + * of the hwaccess_lock locking. If a suspend occurs *after* that lock + * was taken (i.e. this condition doesn't execute), then the + * kbasep_js_suspend() code will cleanup this context instead (by virtue + * of it being called strictly after the suspend flag is set, and will + * wait for this lock to drop) */ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { +#else + if (kbase_pm_is_suspending(kbdev)) { +#endif + /* Cause it to leave at some later point */ + bool retained; + + retained = kbase_ctx_sched_inc_refcount_nolock(kctx); + KBASE_DEBUG_ASSERT(retained); + + kbasep_js_clear_submit_allowed(js_devdata, kctx); + kctx_suspended = true; + } + + kbase_ctx_flag_clear(kctx, KCTX_PULLED_SINCE_ACTIVE_JS0 << js); + + /* Transaction complete */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + /* Synchronize with any timers */ + kbase_backend_ctx_count_changed(kbdev); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + /* Note: after this point, the context could potentially get scheduled + * out immediately */ + + if (kctx_suspended) { + /* Finishing forcing out the context due to a suspend. Use a + * variant of kbasep_js_runpool_release_ctx() that doesn't + * schedule a new context, to prevent a risk of recursion back + * into this function */ + kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx); + return false; + } + return true; +} + +static bool kbase_js_use_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && + kbase_backend_use_ctx_sched(kbdev, kctx, js)) { + + dev_dbg(kbdev->dev, + "kctx %p already has ASID - mark as active (s:%d)\n", + (void *)kctx, js); + + if (kbdev->hwaccess.active_kctx[js] != kctx) { + kbdev->hwaccess.active_kctx[js] = kctx; + kbase_ctx_flag_clear(kctx, + KCTX_PULLED_SINCE_ACTIVE_JS0 << js); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return true; /* Context already scheduled */ + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return kbasep_js_schedule_ctx(kbdev, kctx, js); +} + +void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_kctx_info *js_kctx_info; + struct kbasep_js_device_data *js_devdata; + bool is_scheduled; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* This should only happen in response to a system call + * from a user-space thread. + * In a non-arbitrated environment this can never happen + * whilst suspending. + * + * In an arbitrated environment, user-space threads can run + * while we are suspended (for example GPU not available + * to this VM), however in that case we will block on + * the wait event for KCTX_SCHEDULED, since no context + * can be scheduled until we have the GPU again. + */ + if (kbdev->arb.arb_if == NULL) + if (WARN_ON(kbase_pm_is_suspending(kbdev))) + return; +#else + /* This should only happen in response to a system call + * from a user-space thread. + * In a non-arbitrated environment this can never happen + * whilst suspending. + */ + if (WARN_ON(kbase_pm_is_suspending(kbdev))) + return; +#endif + + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + + /* Mark the context as privileged */ + kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED); + + is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED); + if (!is_scheduled) { + /* Add the context to the pullable list */ + if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0)) + kbase_js_sync_timers(kbdev); + + /* Fast-starting requires the jsctx_mutex to be dropped, + * because it works on multiple ctxs */ + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + + /* Try to schedule the context in */ + kbase_js_sched_all(kbdev); + + /* Wait for the context to be scheduled in */ + wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, + kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + } else { + /* Already scheduled in - We need to retain it to keep the + * corresponding address space */ + WARN_ON(!kbase_ctx_sched_inc_refcount(kctx)); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + } +} +KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx); + +void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + + /* We don't need to use the address space anymore */ + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + /* Release the context - it will be scheduled out */ + kbasep_js_runpool_release_ctx(kbdev, kctx); + + kbase_js_sched_all(kbdev); +} +KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx); + +void kbasep_js_suspend(struct kbase_device *kbdev) +{ + unsigned long flags; + struct kbasep_js_device_data *js_devdata; + int i; + u16 retained = 0u; + + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev)); + js_devdata = &kbdev->js_data; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* Prevent all contexts from submitting */ + js_devdata->runpool_irq.submit_allowed = 0; + + /* Retain each of the contexts, so we can cause it to leave even if it + * had no refcount to begin with */ + for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) { + struct kbase_context *kctx = kbdev->as_to_kctx[i]; + + retained = retained << 1; + + if (kctx && !(kbdev->as_free & (1u << i))) { + kbase_ctx_sched_retain_ctx_refcount(kctx); + retained |= 1u; + /* This loop will not have an effect on the privileged + * contexts as they would have an extra ref count + * compared to the normal contexts, so they will hold + * on to their address spaces. MMU will re-enabled for + * them on resume. + */ + } + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* De-ref the previous retain to ensure each context gets pulled out + * sometime later. */ + for (i = 0; + i < BASE_MAX_NR_AS; + ++i, retained = retained >> 1) { + struct kbase_context *kctx = kbdev->as_to_kctx[i]; + + if (retained & 1u) + kbasep_js_runpool_release_ctx(kbdev, kctx); + } + + /* Caller must wait for all Power Manager active references to be + * dropped */ +} + +void kbasep_js_resume(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata; + int js, prio; + + KBASE_DEBUG_ASSERT(kbdev); + js_devdata = &kbdev->js_data; + KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + + mutex_lock(&js_devdata->queue_mutex); + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + struct kbase_context *kctx, *n; + unsigned long flags; + +#ifndef CONFIG_MALI_ARBITER_SUPPORT + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + list_for_each_entry_safe(kctx, n, + &kbdev->js_data.ctx_list_unpullable[js][prio], + jctx.sched_info.ctx.ctx_list_entry[js]) { + struct kbasep_js_kctx_info *js_kctx_info; + bool timer_sync = false; + + /* Drop lock so we can take kctx mutexes */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); + + js_kctx_info = &kctx->jctx.sched_info; + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && + kbase_js_ctx_pullable(kctx, js, false)) + timer_sync = + kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); + + if (timer_sync) + kbase_backend_ctx_count_changed(kbdev); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + /* Take lock before accessing list again */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#else + bool timer_sync = false; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + list_for_each_entry_safe(kctx, n, + &kbdev->js_data.ctx_list_unpullable[js][prio], + jctx.sched_info.ctx.ctx_list_entry[js]) { + + if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && + kbase_js_ctx_pullable(kctx, js, false)) + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (timer_sync) { + mutex_lock(&js_devdata->runpool_mutex); + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&js_devdata->runpool_mutex); + } +#endif + } + } + mutex_unlock(&js_devdata->queue_mutex); + + /* Restart atom processing */ + kbase_js_sched_all(kbdev); + + /* JS Resume complete */ +} + +bool kbase_js_is_atom_valid(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + if ((katom->core_req & BASE_JD_REQ_FS) && + (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | + BASE_JD_REQ_T))) + return false; + + if ((katom->core_req & BASE_JD_REQ_JOB_SLOT) && + (katom->jobslot >= BASE_JM_MAX_NR_SLOTS)) + return false; + + return true; +} + +static int kbase_js_get_slot(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + if (katom->core_req & BASE_JD_REQ_JOB_SLOT) + return katom->jobslot; + + if (katom->core_req & BASE_JD_REQ_FS) + return 0; + + if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { + if (katom->device_nr == 1 && + kbdev->gpu_props.num_core_groups == 2) + return 2; + } + + return 1; +} + +bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom) +{ + bool enqueue_required, add_required = true; + + katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->jctx.lock); + + /* If slot will transition from unpullable to pullable then add to + * pullable list */ + if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) { + enqueue_required = true; + } else { + enqueue_required = false; + } + + if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || + (katom->pre_dep && (katom->pre_dep->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + + dev_dbg(kctx->kbdev->dev, "Add atom %p to X_DEP list (s:%d)\n", + (void *)katom, js); + + list_add_tail(&katom->queue, &queue->x_dep_head); + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + if (kbase_js_atom_blocked_on_x_dep(katom)) { + enqueue_required = false; + add_required = false; + } + } else { + dev_dbg(kctx->kbdev->dev, "Atom %p not added to X_DEP list\n", + (void *)katom); + } + + if (add_required) { + /* Check if there are lower priority jobs to soft stop */ + kbase_job_slot_ctx_priority_check_locked(kctx, katom); + + /* Add atom to ring buffer. */ + jsctx_tree_add(kctx, katom); + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; + } + + dev_dbg(kctx->kbdev->dev, + "Enqueue of kctx %p is %srequired to submit atom %p\n", + kctx, enqueue_required ? "" : "not ", katom); + + return enqueue_required; +} + +/** + * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the + * runnable_tree, ready for execution + * @katom: Atom to submit + * + * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set, + * but is still present in the x_dep list. If @katom has a same-slot dependent + * atom then that atom (and any dependents) will also be moved. + */ +static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) +{ + struct kbase_context *const kctx = katom->kctx; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + while (katom) { + WARN_ON(!(katom->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); + + if (!kbase_js_atom_blocked_on_x_dep(katom)) { + dev_dbg(kctx->kbdev->dev, + "Del atom %p from X_DEP list in js_move_to_tree\n", + (void *)katom); + + list_del(&katom->queue); + katom->atom_flags &= + ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + /* For incremental rendering, an end-of-renderpass atom + * may have had its dependency on start-of-renderpass + * ignored and may therefore already be in the tree. + */ + if (!(katom->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { + jsctx_tree_add(kctx, katom); + katom->atom_flags |= + KBASE_KATOM_FLAG_JSCTX_IN_TREE; + } + } else { + dev_dbg(kctx->kbdev->dev, + "Atom %p blocked on x-dep in js_move_to_tree\n", + (void *)katom); + break; + } + + katom = katom->post_dep; + } +} + + +/** + * kbase_js_evict_deps - Evict dependencies of a failed atom. + * @kctx: Context pointer + * @katom: Pointer to the atom that has failed. + * @js: The job slot the katom was run on. + * @prio: Priority of the katom. + * + * Remove all post dependencies of an atom from the context ringbuffers. + * + * The original atom's event_code will be propogated to all dependent atoms. + * + * Context: Caller must hold the HW access lock + */ +static void kbase_js_evict_deps(struct kbase_context *kctx, + struct kbase_jd_atom *katom, int js, int prio) +{ + struct kbase_jd_atom *x_dep = katom->x_post_dep; + struct kbase_jd_atom *next_katom = katom->post_dep; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if (next_katom) { + KBASE_DEBUG_ASSERT(next_katom->status != + KBASE_JD_ATOM_STATE_HW_COMPLETED); + next_katom->will_fail_event_code = katom->event_code; + + } + + /* Has cross slot depenency. */ + if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE | + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { + /* Remove dependency.*/ + x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + trace_sysgraph(SGR_DEP_RES, kctx->id, + kbase_jd_atom_id(kctx, x_dep)); + + dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %p\n", + (void *)x_dep); + + /* Fail if it had a data dependency. */ + if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) { + x_dep->will_fail_event_code = katom->event_code; + } + if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) + kbase_js_move_to_tree(x_dep); + } +} + +struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) +{ + struct kbase_jd_atom *katom; + struct kbasep_js_device_data *js_devdata; + struct kbase_device *kbdev; + int pulled; + + KBASE_DEBUG_ASSERT(kctx); + + kbdev = kctx->kbdev; + dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %p (s:%d)\n", + (void *)kctx, js); + + js_devdata = &kbdev->js_data; + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", + (void *)kctx); + return NULL; + } +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) +#else + if (kbase_pm_is_suspending(kbdev)) +#endif + return NULL; + + katom = jsctx_rb_peek(kctx, js); + if (!katom) { + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", + (void *)kctx, js); + return NULL; + } + if (kctx->blocked_js[js][katom->sched_priority]) { + dev_dbg(kbdev->dev, + "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", + (void *)kctx, katom->sched_priority, js); + return NULL; + } + if (atomic_read(&katom->blocked)) { + dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_pull\n", + (void *)katom); + return NULL; + } + + /* Due to ordering restrictions when unpulling atoms on failure, we do + * not allow multiple runs of fail-dep atoms from the same context to be + * present on the same slot */ + if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) { + struct kbase_jd_atom *prev_atom = + kbase_backend_inspect_tail(kbdev, js); + + if (prev_atom && prev_atom->kctx != kctx) + return NULL; + } + + if (kbase_js_atom_blocked_on_x_dep(katom)) { + if (katom->x_pre_dep->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) { + dev_dbg(kbdev->dev, + "JS: X pre-dep %p is not present in slot FIFO or will fail\n", + (void *)katom->x_pre_dep); + return NULL; + } + if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && + kbase_backend_nr_atoms_on_slot(kbdev, js)) { + dev_dbg(kbdev->dev, + "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", + (void *)katom, js); + return NULL; + } + } + + kbase_ctx_flag_set(kctx, KCTX_PULLED); + kbase_ctx_flag_set(kctx, (KCTX_PULLED_SINCE_ACTIVE_JS0 << js)); + + pulled = atomic_inc_return(&kctx->atoms_pulled); + if (pulled == 1 && !kctx->slots_pullable) { + WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); + atomic_inc(&kbdev->js_data.nr_contexts_runnable); + } + atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]); + kctx->atoms_pulled_slot_pri[katom->slot_nr][katom->sched_priority]++; + jsctx_rb_pull(kctx, katom); + + kbase_ctx_sched_retain_ctx_refcount(kctx); + + katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF; + + katom->ticks = 0; + + dev_dbg(kbdev->dev, "JS: successfully pulled atom %p from kctx %p (s:%d)\n", + (void *)katom, (void *)kctx, js); + + return katom; +} + +/** + * js_return_of_start_rp() - Handle soft-stop of an atom that starts a + * renderpass + * @start_katom: Pointer to the start-of-renderpass atom that was soft-stopped + * + * This function is called to switch to incremental rendering if the tiler job + * chain at the start of a renderpass has used too much memory. It prevents the + * tiler job being pulled for execution in the job scheduler again until the + * next phase of incremental rendering is complete. + * + * If the end-of-renderpass atom is already in the job scheduler (because a + * previous attempt at tiling used too much memory during the same renderpass) + * then it is unblocked; otherwise, it is run by handing it to the scheduler. + */ +static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) +{ + struct kbase_context *const kctx = start_katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + struct kbase_jd_atom *end_katom; + unsigned long flags; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) + return; + + compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; + + if (WARN_ON(rp->start_katom != start_katom)) + return; + + dev_dbg(kctx->kbdev->dev, + "JS return start atom %p in state %d of RP %d\n", + (void *)start_katom, (int)rp->state, + start_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) + return; + + /* The tiler job might have been soft-stopped for some reason other + * than running out of memory. + */ + if (rp->state == KBASE_JD_RP_START || rp->state == KBASE_JD_RP_RETRY) { + dev_dbg(kctx->kbdev->dev, + "JS return isn't OOM in state %d of RP %d\n", + (int)rp->state, start_katom->renderpass_id); + return; + } + + dev_dbg(kctx->kbdev->dev, + "JS return confirm OOM in state %d of RP %d\n", + (int)rp->state, start_katom->renderpass_id); + + if (WARN_ON(rp->state != KBASE_JD_RP_PEND_OOM && + rp->state != KBASE_JD_RP_RETRY_PEND_OOM)) + return; + + /* Prevent the tiler job being pulled for execution in the + * job scheduler again. + */ + dev_dbg(kbdev->dev, "Blocking start atom %p\n", + (void *)start_katom); + atomic_inc(&start_katom->blocked); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + rp->state = (rp->state == KBASE_JD_RP_PEND_OOM) ? + KBASE_JD_RP_OOM : KBASE_JD_RP_RETRY_OOM; + + /* Was the fragment job chain submitted to kbase yet? */ + end_katom = rp->end_katom; + if (end_katom) { + dev_dbg(kctx->kbdev->dev, "JS return add end atom %p\n", + (void *)end_katom); + + if (rp->state == KBASE_JD_RP_RETRY_OOM) { + /* Allow the end of the renderpass to be pulled for + * execution again to continue incremental rendering. + */ + dev_dbg(kbdev->dev, "Unblocking end atom %p\n", + (void *)end_katom); + atomic_dec(&end_katom->blocked); + WARN_ON(!(end_katom->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_TREE)); + WARN_ON(end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); + + kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, + end_katom->slot_nr); + + /* Expect the fragment job chain to be scheduled without + * further action because this function is called when + * returning an atom to the job scheduler ringbuffer. + */ + end_katom = NULL; + } else { + WARN_ON(end_katom->status != + KBASE_JD_ATOM_STATE_QUEUED && + end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); + } + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (end_katom) + kbase_jd_dep_clear_locked(end_katom); +} + +/** + * js_return_of_end_rp() - Handle completion of an atom that ends a renderpass + * @end_katom: Pointer to the end-of-renderpass atom that was completed + * + * This function is called to continue incremental rendering if the tiler job + * chain at the start of a renderpass used too much memory. It resets the + * mechanism for detecting excessive memory usage then allows the soft-stopped + * tiler job chain to be pulled for execution again. + * + * The start-of-renderpass atom must already been submitted to kbase. + */ +static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) +{ + struct kbase_context *const kctx = end_katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + struct kbase_jd_atom *start_katom; + unsigned long flags; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return; + + compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + if (WARN_ON(rp->end_katom != end_katom)) + return; + + dev_dbg(kctx->kbdev->dev, + "JS return end atom %p in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + + if (WARN_ON(rp->state != KBASE_JD_RP_OOM && + rp->state != KBASE_JD_RP_RETRY_OOM)) + return; + + /* Reduce the number of mapped pages in the memory regions that + * triggered out-of-memory last time so that we can detect excessive + * memory usage again. + */ + kbase_gpu_vm_lock(kctx); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + while (!list_empty(&rp->oom_reg_list)) { + struct kbase_va_region *reg = + list_first_entry(&rp->oom_reg_list, + struct kbase_va_region, link); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_dbg(kbdev->dev, + "Reset backing to %zu pages for region %p\n", + reg->threshold_pages, (void *)reg); + + if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED)) + kbase_mem_shrink(kctx, reg, reg->threshold_pages); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + dev_dbg(kbdev->dev, "Deleting region %p from list\n", + (void *)reg); + list_del_init(®->link); + kbase_va_region_alloc_put(kctx, reg); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_gpu_vm_unlock(kctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + rp->state = KBASE_JD_RP_RETRY; + dev_dbg(kbdev->dev, "Changed state to %d for retry\n", rp->state); + + /* Allow the start of the renderpass to be pulled for execution again + * to begin/continue incremental rendering. + */ + start_katom = rp->start_katom; + if (!WARN_ON(!start_katom)) { + dev_dbg(kbdev->dev, "Unblocking start atom %p\n", + (void *)start_katom); + atomic_dec(&start_katom->blocked); + (void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, + start_katom->slot_nr); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +static void js_return_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, + work); + struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; + struct kbasep_js_atom_retained_state retained_state; + int js = katom->slot_nr; + int prio = katom->sched_priority; + bool timer_sync = false; + bool context_idle = false; + unsigned long flags; + base_jd_core_req core_req = katom->core_req; + + dev_dbg(kbdev->dev, "%s for atom %p with event code 0x%x\n", + __func__, (void *)katom, katom->event_code); + + if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom); + + kbase_backend_complete_wq(kbdev, katom); + + kbasep_js_atom_retained_state_copy(&retained_state, katom); + + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + + atomic_dec(&kctx->atoms_pulled); + atomic_dec(&kctx->atoms_pulled_slot[js]); + + if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) + atomic_dec(&katom->blocked); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kctx->atoms_pulled_slot_pri[js][katom->sched_priority]--; + + if (!atomic_read(&kctx->atoms_pulled_slot[js]) && + jsctx_rb_none_to_pull(kctx, js)) + timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js); + + /* If this slot has been blocked due to soft-stopped atoms, and all + * atoms have now been processed, then unblock the slot */ + if (!kctx->atoms_pulled_slot_pri[js][prio] && + kctx->blocked_js[js][prio]) { + kctx->blocked_js[js][prio] = false; + + /* Only mark the slot as pullable if the context is not idle - + * that case is handled below */ + if (atomic_read(&kctx->atoms_pulled) && + kbase_js_ctx_pullable(kctx, js, true)) + timer_sync |= kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); + } + + if (!atomic_read(&kctx->atoms_pulled)) { + dev_dbg(kbdev->dev, + "No atoms currently pulled from context %p\n", + (void *)kctx); + + if (!kctx->slots_pullable) { + dev_dbg(kbdev->dev, + "Context %p %s counted as runnable\n", + (void *)kctx, + kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF) ? + "is" : "isn't"); + + WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); + atomic_dec(&kbdev->js_data.nr_contexts_runnable); + timer_sync = true; + } + + if (kctx->as_nr != KBASEP_AS_NR_INVALID && + !kbase_ctx_flag(kctx, KCTX_DYING)) { + int num_slots = kbdev->gpu_props.num_job_slots; + int slot; + + if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) + kbasep_js_set_submit_allowed(js_devdata, kctx); + + for (slot = 0; slot < num_slots; slot++) { + if (kbase_js_ctx_pullable(kctx, slot, true)) + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, slot); + } + } + + kbase_jm_idle_ctx(kbdev, kctx); + + context_idle = true; + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (context_idle) { + dev_dbg(kbdev->dev, + "Context %p %s counted as active\n", + (void *)kctx, + kbase_ctx_flag(kctx, KCTX_ACTIVE) ? + "is" : "isn't"); + WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); + kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + kbase_pm_context_idle(kbdev); + } + + if (timer_sync) + kbase_js_sync_timers(kbdev); + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + + if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) { + mutex_lock(&kctx->jctx.lock); + js_return_of_start_rp(katom); + mutex_unlock(&kctx->jctx.lock); + } else if (katom->event_code == BASE_JD_EVENT_END_RP_DONE) { + mutex_lock(&kctx->jctx.lock); + js_return_of_end_rp(katom); + mutex_unlock(&kctx->jctx.lock); + } + + katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; + dev_dbg(kbdev->dev, "JS: retained state %s finished", + kbasep_js_has_atom_finished(&retained_state) ? + "has" : "hasn't"); + + WARN_ON(kbasep_js_has_atom_finished(&retained_state)); + + kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, + &retained_state); + + kbase_js_sched_all(kbdev); + + kbase_backend_complete_wq_post_sched(kbdev, core_req); + + dev_dbg(kbdev->dev, "Leaving %s for atom %p\n", + __func__, (void *)katom); +} + +void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + dev_dbg(kctx->kbdev->dev, "Unpulling atom %p in kctx %p\n", + (void *)katom, (void *)kctx); + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + jsctx_rb_unpull(kctx, katom); + + WARN_ON(work_pending(&katom->work)); + + /* Block re-submission until workqueue has run */ + atomic_inc(&katom->blocked); + + kbase_job_check_leave_disjoint(kctx->kbdev, katom); + + INIT_WORK(&katom->work, js_return_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + +/** + * js_complete_start_rp() - Handle completion of atom that starts a renderpass + * @kctx: Context pointer + * @start_katom: Pointer to the atom that completed + * + * Put any references to virtual memory regions that might have been added by + * kbase_job_slot_softstop_start_rp() because the tiler job chain completed + * despite any pending soft-stop request. + * + * If the atom that just completed was soft-stopped during a previous attempt to + * run it then there should be a blocked end-of-renderpass atom waiting for it, + * which we must unblock to process the output of the tiler job chain. + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool js_complete_start_rp(struct kbase_context *kctx, + struct kbase_jd_atom *const start_katom) +{ + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + bool timer_sync = false; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) + return false; + + compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; + + if (WARN_ON(rp->start_katom != start_katom)) + return false; + + dev_dbg(kctx->kbdev->dev, + "Start atom %p is done in state %d of RP %d\n", + (void *)start_katom, (int)rp->state, + start_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) + return false; + + if (rp->state == KBASE_JD_RP_PEND_OOM || + rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { + unsigned long flags; + + dev_dbg(kctx->kbdev->dev, + "Start atom %p completed before soft-stop\n", + (void *)start_katom); + + kbase_gpu_vm_lock(kctx); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + while (!list_empty(&rp->oom_reg_list)) { + struct kbase_va_region *reg = + list_first_entry(&rp->oom_reg_list, + struct kbase_va_region, link); + + WARN_ON(reg->flags & KBASE_REG_VA_FREED); + dev_dbg(kctx->kbdev->dev, "Deleting region %p from list\n", + (void *)reg); + list_del_init(®->link); + kbase_va_region_alloc_put(kctx, reg); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_gpu_vm_unlock(kctx); + } else { + dev_dbg(kctx->kbdev->dev, + "Start atom %p did not exceed memory threshold\n", + (void *)start_katom); + + WARN_ON(rp->state != KBASE_JD_RP_START && + rp->state != KBASE_JD_RP_RETRY); + } + + if (rp->state == KBASE_JD_RP_RETRY || + rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { + struct kbase_jd_atom *const end_katom = rp->end_katom; + + if (!WARN_ON(!end_katom)) { + unsigned long flags; + + /* Allow the end of the renderpass to be pulled for + * execution again to continue incremental rendering. + */ + dev_dbg(kbdev->dev, "Unblocking end atom %p!\n", + (void *)end_katom); + atomic_dec(&end_katom->blocked); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + timer_sync = kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, end_katom->slot_nr); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + } + + return timer_sync; +} + +/** + * js_complete_end_rp() - Handle final completion of atom that ends a renderpass + * @kctx: Context pointer + * @end_katom: Pointer to the atom that completed for the last time + * + * This function must only be called if the renderpass actually completed + * without the tiler job chain at the start using too much memory; otherwise + * completion of the end-of-renderpass atom is handled similarly to a soft-stop. + */ +static void js_complete_end_rp(struct kbase_context *kctx, + struct kbase_jd_atom *const end_katom) +{ + struct kbase_device *const kbdev = kctx->kbdev; + unsigned long flags; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return; + + compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + if (WARN_ON(rp->end_katom != end_katom)) + return; + + dev_dbg(kbdev->dev, "End atom %p is done in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) || + WARN_ON(rp->state == KBASE_JD_RP_OOM) || + WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM)) + return; + + /* Rendering completed without running out of memory. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + WARN_ON(!list_empty(&rp->oom_reg_list)); + rp->state = KBASE_JD_RP_COMPLETE; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_dbg(kbdev->dev, "Renderpass %d is complete\n", + end_katom->renderpass_id); +} + +bool kbase_js_complete_atom_wq(struct kbase_context *kctx, + struct kbase_jd_atom *katom) +{ + struct kbasep_js_kctx_info *js_kctx_info; + struct kbasep_js_device_data *js_devdata; + struct kbase_device *kbdev; + unsigned long flags; + bool timer_sync = false; + int atom_slot; + bool context_idle = false; + int prio = katom->sched_priority; + + kbdev = kctx->kbdev; + atom_slot = katom->slot_nr; + + dev_dbg(kbdev->dev, "%s for atom %p (s:%d)\n", + __func__, (void *)katom, atom_slot); + + /* Update the incremental rendering state machine. + */ + if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) + timer_sync |= js_complete_start_rp(kctx, katom); + else if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) + js_complete_end_rp(kctx, katom); + + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + + mutex_lock(&js_devdata->runpool_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { + dev_dbg(kbdev->dev, "Atom %p is in runnable_tree\n", + (void *)katom); + + context_idle = !atomic_dec_return(&kctx->atoms_pulled); + atomic_dec(&kctx->atoms_pulled_slot[atom_slot]); + kctx->atoms_pulled_slot_pri[atom_slot][prio]--; + + if (!atomic_read(&kctx->atoms_pulled) && + !kctx->slots_pullable) { + WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); + atomic_dec(&kbdev->js_data.nr_contexts_runnable); + timer_sync = true; + } + + /* If this slot has been blocked due to soft-stopped atoms, and + * all atoms have now been processed, then unblock the slot */ + if (!kctx->atoms_pulled_slot_pri[atom_slot][prio] + && kctx->blocked_js[atom_slot][prio]) { + dev_dbg(kbdev->dev, + "kctx %p is no longer blocked from submitting on slot %d at priority %d\n", + (void *)kctx, atom_slot, prio); + + kctx->blocked_js[atom_slot][prio] = false; + if (kbase_js_ctx_pullable(kctx, atom_slot, true)) + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, atom_slot); + } + } + WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)); + + if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) && + jsctx_rb_none_to_pull(kctx, atom_slot)) { + if (!list_empty( + &kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot])) + timer_sync |= kbase_js_ctx_list_remove_nolock( + kctx->kbdev, kctx, atom_slot); + } + + /* + * If submission is disabled on this context (most likely due to an + * atom failure) and there are now no atoms left in the system then + * re-enable submission so that context can be scheduled again. + */ + if (!kbasep_js_is_submit_allowed(js_devdata, kctx) && + !atomic_read(&kctx->atoms_pulled) && + !kbase_ctx_flag(kctx, KCTX_DYING)) { + int js; + + kbasep_js_set_submit_allowed(js_devdata, kctx); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + if (kbase_js_ctx_pullable(kctx, js, true)) + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); + } + } else if (katom->x_post_dep && + kbasep_js_is_submit_allowed(js_devdata, kctx)) { + int js; + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + if (kbase_js_ctx_pullable(kctx, js, true)) + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); + } + } + + /* Mark context as inactive. The pm reference will be dropped later in + * jd_done_worker(). + */ + if (context_idle) { + dev_dbg(kbdev->dev, "kctx %p is no longer active\n", + (void *)kctx); + kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + if (timer_sync) + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&js_devdata->runpool_mutex); + + dev_dbg(kbdev->dev, "Leaving %s\n", __func__); + return context_idle; +} + +/** + * js_end_rp_is_complete() - Check whether an atom that ends a renderpass has + * completed for the last time. + * + * @end_katom: Pointer to the atom that completed on the hardware. + * + * An atom that ends a renderpass may be run on the hardware several times + * before notifying userspace or allowing dependent atoms to be executed. + * + * This function is used to decide whether or not to allow end-of-renderpass + * atom completion. It only returns false if the atom at the start of the + * renderpass was soft-stopped because it used too much memory during the most + * recent attempt at tiling. + * + * Return: True if the atom completed for the last time. + */ +static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom) +{ + struct kbase_context *const kctx = end_katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return true; + + compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + if (WARN_ON(rp->end_katom != end_katom)) + return true; + + dev_dbg(kbdev->dev, + "JS complete end atom %p in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, + end_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) + return true; + + /* Failure of end-of-renderpass atoms must not return to the + * start of the renderpass. + */ + if (end_katom->event_code != BASE_JD_EVENT_DONE) + return true; + + if (rp->state != KBASE_JD_RP_OOM && + rp->state != KBASE_JD_RP_RETRY_OOM) + return true; + + dev_dbg(kbdev->dev, "Suppressing end atom completion\n"); + return false; +} + +struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, + ktime_t *end_timestamp) +{ + /* MALI_SEC_INTEGRATION : MALI_SEC_CL_BOOST */ + u64 microseconds_spent = 0; + struct kbase_device *kbdev; + struct kbase_context *kctx = katom->kctx; + struct kbase_jd_atom *x_dep = katom->x_post_dep; + + kbdev = kctx->kbdev; + dev_dbg(kbdev->dev, "Atom %p complete in kctx %p (post-dep %p)\n", + (void *)katom, (void *)kctx, (void *)x_dep); + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if ((katom->core_req & BASE_JD_REQ_END_RENDERPASS) && + !js_end_rp_is_complete(katom)) { + katom->event_code = BASE_JD_EVENT_END_RP_DONE; + kbase_js_unpull(kctx, katom); + return NULL; + } + + if (katom->will_fail_event_code) + katom->event_code = katom->will_fail_event_code; + + katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; + dev_dbg(kbdev->dev, "Atom %p status to HW completed\n", (void *)katom); + + if (katom->event_code != BASE_JD_EVENT_DONE) { + kbase_js_evict_deps(kctx, katom, katom->slot_nr, + katom->sched_priority); + } + + KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, NULL, + katom->slot_nr, 0, TL_JS_EVENT_STOP); + + trace_sysgraph_gpu(SGR_COMPLETE, kctx->id, + kbase_jd_atom_id(katom->kctx, katom), katom->slot_nr); + + /* MALI_SEC_INTEGRATION : MALI_SEC_CL_BOOST */ + /* Calculate the job's time used */ + if (end_timestamp != NULL) { + /* Only calculating it for jobs that really run on the HW (e.g. + * removed from next jobs never actually ran, so really did take + * zero time) */ + ktime_t tick_diff = ktime_sub(*end_timestamp, + katom->start_timestamp); + + microseconds_spent = ktime_to_ns(tick_diff); + + /* MALI_SEC_INTEGRATION */ + if (kbdev->vendor_callbacks->cl_boost_update_utilization) + kbdev->vendor_callbacks->cl_boost_update_utilization(kbdev, katom, microseconds_spent); + + do_div(microseconds_spent, 1000); + + /* Round up time spent to the minimum timer resolution */ + if (microseconds_spent < KBASEP_JS_TICK_RESOLUTION_US) + microseconds_spent = KBASEP_JS_TICK_RESOLUTION_US; + } + + kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0); + + /* Unblock cross dependency if present */ + if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE || + !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && + (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)) { + bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, + false); + x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + trace_sysgraph(SGR_DEP_RES, kctx->id, + kbase_jd_atom_id(katom->kctx, x_dep)); + dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %p\n", + (void *)x_dep); + + kbase_js_move_to_tree(x_dep); + + if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr, + false)) + kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, + x_dep->slot_nr); + + if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { + dev_dbg(kbdev->dev, "Atom %p is in runnable tree\n", + (void *)x_dep); + return x_dep; + } + } else { + dev_dbg(kbdev->dev, + "No cross-slot dep to unblock for atom %p\n", + (void *)katom); + } + + return NULL; +} + +/** + * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot + * dependency + * @katom: Pointer to an atom in the slot ringbuffer + * + * A cross-slot dependency is ignored if necessary to unblock incremental + * rendering. If the atom at the start of a renderpass used too much memory + * and was soft-stopped then the atom at the end of a renderpass is submitted + * to hardware regardless of its dependency on the start-of-renderpass atom. + * This can happen multiple times for the same pair of atoms. + * + * Return: true to block the atom or false to allow it to be submitted to + * hardware + */ +bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) +{ + struct kbase_context *const kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!(katom->atom_flags & + KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { + dev_dbg(kbdev->dev, "Atom %p is not blocked on a cross-slot dependency", + (void *)katom); + return false; + } + + if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) { + dev_dbg(kbdev->dev, "Atom %p is blocked on a cross-slot dependency", + (void *)katom); + return true; + } + + compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[katom->renderpass_id]; + /* We can read a subset of renderpass state without holding + * higher-level locks (but not end_katom, for example). + */ + + WARN_ON(rp->state == KBASE_JD_RP_COMPLETE); + + dev_dbg(kbdev->dev, "End atom has cross-slot dep in state %d\n", + (int)rp->state); + + if (rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM) + return true; + + /* Tiler ran out of memory so allow the fragment job chain to run + * if it only depends on the tiler job chain. + */ + if (katom->x_pre_dep != rp->start_katom) { + dev_dbg(kbdev->dev, "Dependency is on %p not start atom %p\n", + (void *)katom->x_pre_dep, (void *)rp->start_katom); + return true; + } + + dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %p\n", + (void *)katom->x_pre_dep); + + return false; +} + +void kbase_js_sched(struct kbase_device *kbdev, int js_mask) +{ + struct kbasep_js_device_data *js_devdata; + struct kbase_context *last_active[BASE_JM_MAX_NR_SLOTS]; + bool timer_sync = false; + bool ctx_waiting[BASE_JM_MAX_NR_SLOTS]; + int js; + + dev_dbg(kbdev->dev, "%s kbdev %p mask 0x%x\n", + __func__, (void *)kbdev, (unsigned int)js_mask); + + js_devdata = &kbdev->js_data; + + down(&js_devdata->schedule_sem); + mutex_lock(&js_devdata->queue_mutex); + + for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { + last_active[js] = kbdev->hwaccess.active_kctx[js]; + ctx_waiting[js] = false; + } + + while (js_mask) { + js = ffs(js_mask) - 1; + + while (1) { + struct kbase_context *kctx; + unsigned long flags; + bool context_idle = false; + + kctx = kbase_js_ctx_list_pop_head(kbdev, js); + + if (!kctx) { + js_mask &= ~(1 << js); + dev_dbg(kbdev->dev, + "No kctx on pullable list (s:%d)\n", + js); + break; + } + + if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) { + context_idle = true; + + dev_dbg(kbdev->dev, + "kctx %p is not active (s:%d)\n", + (void *)kctx, js); + + if (kbase_pm_context_active_handle_suspend( + kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { + dev_dbg(kbdev->dev, + "Suspend pending (s:%d)\n", js); + /* Suspend pending - return context to + * queue and stop scheduling */ + mutex_lock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + if (kbase_js_ctx_list_add_pullable_head( + kctx->kbdev, kctx, js)) + kbase_js_sync_timers(kbdev); + mutex_unlock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + up(&js_devdata->schedule_sem); + return; + } + kbase_ctx_flag_set(kctx, KCTX_ACTIVE); + } + + if (!kbase_js_use_ctx(kbdev, kctx, js)) { + mutex_lock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + + dev_dbg(kbdev->dev, + "kctx %p cannot be used at this time\n", + kctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (kbase_js_ctx_pullable(kctx, js, false) + || kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) + timer_sync |= + kbase_js_ctx_list_add_pullable_head_nolock( + kctx->kbdev, kctx, js); + else + timer_sync |= + kbase_js_ctx_list_add_unpullable_nolock( + kctx->kbdev, kctx, js); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); + mutex_unlock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + if (context_idle) { + WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); + kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + kbase_pm_context_idle(kbdev); + } + + /* No more jobs can be submitted on this slot */ + js_mask &= ~(1 << js); + break; + } + mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kbase_ctx_flag_clear(kctx, KCTX_PULLED); + + if (!kbase_jm_kick(kbdev, 1 << js)) { + dev_dbg(kbdev->dev, + "No more jobs can be submitted (s:%d)\n", + js); + js_mask &= ~(1 << js); + } + if (!kbase_ctx_flag(kctx, KCTX_PULLED)) { + bool pullable; + + dev_dbg(kbdev->dev, + "No atoms pulled from kctx %p (s:%d)\n", + (void *)kctx, js); + + pullable = kbase_js_ctx_pullable(kctx, js, + true); + + /* Failed to pull jobs - push to head of list. + * Unless this context is already 'active', in + * which case it's effectively already scheduled + * so push it to the back of the list. */ + if (pullable && kctx == last_active[js] && + kbase_ctx_flag(kctx, + (KCTX_PULLED_SINCE_ACTIVE_JS0 << + js))) + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( + kctx->kbdev, + kctx, js); + else if (pullable) + timer_sync |= + kbase_js_ctx_list_add_pullable_head_nolock( + kctx->kbdev, + kctx, js); + else + timer_sync |= + kbase_js_ctx_list_add_unpullable_nolock( + kctx->kbdev, + kctx, js); + + /* If this context is not the active context, + * but the active context is pullable on this + * slot, then we need to remove the active + * marker to prevent it from submitting atoms in + * the IRQ handler, which would prevent this + * context from making progress. */ + if (last_active[js] && kctx != last_active[js] + && kbase_js_ctx_pullable( + last_active[js], js, true)) + ctx_waiting[js] = true; + + if (context_idle) { + kbase_jm_idle_ctx(kbdev, kctx); + spin_unlock_irqrestore( + &kbdev->hwaccess_lock, + flags); + WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); + kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + kbase_pm_context_idle(kbdev); + } else { + spin_unlock_irqrestore( + &kbdev->hwaccess_lock, + flags); + } + mutex_unlock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + + js_mask &= ~(1 << js); + break; /* Could not run atoms on this slot */ + } + + dev_dbg(kbdev->dev, "Push kctx %p to back of list\n", + (void *)kctx); + if (kbase_js_ctx_pullable(kctx, js, true)) + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( + kctx->kbdev, kctx, js); + else + timer_sync |= + kbase_js_ctx_list_add_unpullable_nolock( + kctx->kbdev, kctx, js); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + } + } + + if (timer_sync) + kbase_js_sync_timers(kbdev); + + for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { + if (kbdev->hwaccess.active_kctx[js] == last_active[js] && + ctx_waiting[js]) { + dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + (void *)last_active[js], js); + kbdev->hwaccess.active_kctx[js] = NULL; + } + } + + mutex_unlock(&js_devdata->queue_mutex); + up(&js_devdata->schedule_sem); +} + +void kbase_js_zap_context(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; + + /* + * Critical assumption: No more submission is possible outside of the + * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs) + * whilst the struct kbase_context is terminating. + */ + + /* First, atomically do the following: + * - mark the context as dying + * - try to evict it from the queue */ + mutex_lock(&kctx->jctx.lock); + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + kbase_ctx_flag_set(kctx, KCTX_DYING); + + dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx); + + /* + * At this point we know: + * - If eviction succeeded, it was in the queue, but now no + * longer is + * - We must cancel the jobs here. No Power Manager active reference to + * release. + * - This happens asynchronously - kbase_jd_zap_context() will wait for + * those jobs to be killed. + * - If eviction failed, then it wasn't in the queue. It is one + * of the following: + * - a. it didn't have any jobs, and so is not in the Queue or + * the Run Pool (not scheduled) + * - Hence, no more work required to cancel jobs. No Power Manager + * active reference to release. + * - b. it was in the middle of a scheduling transaction (and thus must + * have at least 1 job). This can happen from a syscall or a + * kernel thread. We still hold the jsctx_mutex, and so the thread + * must be waiting inside kbasep_js_try_schedule_head_ctx(), + * before checking whether the runpool is full. That thread will + * continue after we drop the mutex, and will notice the context + * is dying. It will rollback the transaction, killing all jobs at + * the same time. kbase_jd_zap_context() will wait for those jobs + * to be killed. + * - Hence, no more work required to cancel jobs, or to release the + * Power Manager active reference. + * - c. it is scheduled, and may or may not be running jobs + * - We must cause it to leave the runpool by stopping it from + * submitting any more jobs. When it finally does leave, + * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs + * (because it is dying), release the Power Manager active reference, + * and will not requeue the context in the queue. + * kbase_jd_zap_context() will wait for those jobs to be killed. + * - Hence, work required just to make it leave the runpool. Cancelling + * jobs and releasing the Power manager active reference will be + * handled when it leaves the runpool. + */ + if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { + unsigned long flags; + int js; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + if (!list_empty( + &kctx->jctx.sched_info.ctx.ctx_list_entry[js])) + list_del_init( + &kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* The following events require us to kill off remaining jobs + * and update PM book-keeping: + * - we evicted it correctly (it must have jobs to be in the + * Queue) + * + * These events need no action, but take this path anyway: + * - Case a: it didn't have any jobs, and was never in the Queue + * - Case b: scheduling transaction will be partially rolled- + * back (this already cancels the jobs) + */ + + KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx); + + /* Only cancel jobs when we evicted from the + * queue. No Power Manager active reference was held. + * + * Having is_dying set ensures that this kills, and + * doesn't requeue */ + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false); + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + mutex_unlock(&kctx->jctx.lock); + } else { + unsigned long flags; + bool was_retained; + + /* Case c: didn't evict, but it is scheduled - it's in the Run + * Pool */ + KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx); + + /* Disable the ctx from submitting any more jobs */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kbasep_js_clear_submit_allowed(js_devdata, kctx); + + /* Retain and (later) release the context whilst it is is now + * disallowed from submitting jobs - ensures that someone + * somewhere will be removing the context later on */ + was_retained = kbase_ctx_sched_inc_refcount_nolock(kctx); + + /* Since it's scheduled and we have the jsctx_mutex, it must be + * retained successfully */ + KBASE_DEBUG_ASSERT(was_retained); + + dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx); + + /* Cancel any remaining running jobs for this kctx - if any. + * Submit is disallowed which takes effect immediately, so no + * more new jobs will appear after we do this. */ + kbase_backend_jm_kill_running_jobs_from_kctx(kctx); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + mutex_unlock(&kctx->jctx.lock); + + dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", + kctx); + + kbasep_js_runpool_release_ctx(kbdev, kctx); + } + + KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u); + + /* After this, you must wait on both the + * kbase_jd_context::zero_jobs_wait and the + * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs + * to be destroyed, and the context to be de-scheduled (if it was on the + * runpool). + * + * kbase_jd_zap_context() will do this. */ +} + +static inline int trace_get_refcnt(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + return atomic_read(&kctx->refcount); +} + +/** + * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context + * @kctx: Pointer to context. + * @callback: Pointer to function to call for each job. + * + * Call a function on all jobs belonging to a non-queued, non-running + * context, and detach the jobs from the context as it goes. + * + * Due to the locks that might be held at the time of the call, the callback + * may need to defer work on a workqueue to complete its actions (e.g. when + * cancelling jobs) + * + * Atoms will be removed from the queue, so this must only be called when + * cancelling jobs (which occurs as part of context destruction). + * + * The locking conditions on the caller are as follows: + * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. + */ +static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, + kbasep_js_ctx_job_cb callback) +{ + struct kbase_device *kbdev; + unsigned long flags; + u32 js; + + kbdev = kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, + 0u, trace_get_refcnt(kbdev, kctx)); + + /* Invoke callback on jobs on each slot in turn */ + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) + jsctx_queue_foreach(kctx, js, callback); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_js.h b/drivers/gpu/arm/b_r26p0/mali_kbase_js.h new file mode 100644 index 000000000000..541acd4afed7 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_js.h @@ -0,0 +1,40 @@ +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_js.h + * Job Scheduler APIs. + */ + +#ifndef _KBASE_JS_H_ +#define _KBASE_JS_H_ + +#include "context/mali_kbase_context.h" +#include "mali_kbase_defs.h" +#include "mali_kbase_debug.h" +#include +#include "jm/mali_kbase_jm_js.h" +#include "jm/mali_kbase_js_defs.h" + +#endif /* _KBASE_JS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/b_r26p0/mali_kbase_js_ctx_attr.c new file mode 100644 index 000000000000..141d04a385cb --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_js_ctx_attr.c @@ -0,0 +1,283 @@ +/* + * + * (C) COPYRIGHT 2012-2016, 2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +#include +#include + +/* + * Private functions follow + */ + +/** + * @brief Check whether a ctx has a certain attribute, and if so, retain that + * attribute on the runpool. + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx is scheduled on the runpool + * + * @return true indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return false indicates no change in ctx attributes state of the runpool. + */ +static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + bool runpool_state_changed = false; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { + KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX); + ++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); + + if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) { + /* First refcount indicates a state change */ + runpool_state_changed = true; + KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute); + } + } + + return runpool_state_changed; +} + +/** + * @brief Check whether a ctx has a certain attribute, and if so, release that + * attribute on the runpool. + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx is scheduled on the runpool + * + * @return true indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return false indicates no change in ctx attributes state of the runpool. + */ +static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + bool runpool_state_changed = false; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { + KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0); + --(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); + + if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) { + /* Last de-refcount indicates a state change */ + runpool_state_changed = true; + KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute); + } + } + + return runpool_state_changed; +} + +/** + * @brief Retain a certain attribute on a ctx, also retaining it on the runpool + * if the context is scheduled. + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + * + * @return true indicates a change in ctx attributes state of the runpool. + * This may allow the scheduler to submit more jobs than previously. + * @return false indicates no change in ctx attributes state of the runpool. + */ +static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_kctx_info *js_kctx_info; + bool runpool_state_changed = false; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX); + + ++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); + + if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { + /* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */ + KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute); + runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute); + } + + return runpool_state_changed; +} + +/* + * @brief Release a certain attribute on a ctx, also releasing it from the runpool + * if the context is scheduled. + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + * + * @return true indicates a change in ctx attributes state of the runpool. + * This may allow the scheduler to submit more jobs than previously. + * @return false indicates no change in ctx attributes state of the runpool. + */ +static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_kctx_info *js_kctx_info; + bool runpool_state_changed = false; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0); + + if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { + lockdep_assert_held(&kbdev->hwaccess_lock); + /* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */ + runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute); + KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute); + } + + /* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */ + --(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); + + return runpool_state_changed; +} + +/* + * More commonly used public functions + */ + +void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + bool runpool_state_changed; + int i; + + /* Retain any existing attributes */ + for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { + /* The context is being scheduled in, so update the runpool with the new attributes */ + runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); + + /* We don't need to know about state changed, because retaining a + * context occurs on scheduling it, and that itself will also try + * to run new atoms */ + CSTD_UNUSED(runpool_state_changed); + } + } +} + +bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + bool runpool_state_changed = false; + int i; + + /* Release any existing attributes */ + for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { + /* The context is being scheduled out, so update the runpool on the removed attributes */ + runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); + } + } + + return runpool_state_changed; +} + +void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + bool runpool_state_changed = false; + base_jd_core_req core_req; + + KBASE_DEBUG_ASSERT(katom); + core_req = katom->core_req; + + if (core_req & BASE_JD_REQ_ONLY_COMPUTE) + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); + else + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); + + if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { + /* Atom that can run on slot1 or slot2, and can use all cores */ + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); + } + + /* We don't need to know about state changed, because retaining an + * atom occurs on adding it, and that itself will also try to run + * new atoms */ + CSTD_UNUSED(runpool_state_changed); +} + +bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state) +{ + bool runpool_state_changed = false; + base_jd_core_req core_req; + + KBASE_DEBUG_ASSERT(katom_retained_state); + core_req = katom_retained_state->core_req; + + /* No-op for invalid atoms */ + if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false) + return false; + + if (core_req & BASE_JD_REQ_ONLY_COMPUTE) + runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); + else + runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); + + if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { + /* Atom that can run on slot1 or slot2, and can use all cores */ + runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); + } + + return runpool_state_changed; +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_js_ctx_attr.h b/drivers/gpu/arm/b_r26p0/mali_kbase_js_ctx_attr.h new file mode 100644 index 000000000000..25fd39787c71 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_js_ctx_attr.h @@ -0,0 +1,155 @@ +/* + * + * (C) COPYRIGHT 2012-2015, 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_js_ctx_attr.h + * Job Scheduler Context Attribute APIs + */ + +#ifndef _KBASE_JS_CTX_ATTR_H_ +#define _KBASE_JS_CTX_ATTR_H_ + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_js + * @{ + */ + +/** + * Retain all attributes of a context + * + * This occurs on scheduling in the context on the runpool (but after + * is_scheduled is set) + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx->is_scheduled is true + */ +void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * Release all attributes of a context + * + * This occurs on scheduling out the context from the runpool (but before + * is_scheduled is cleared) + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx->is_scheduled is true + * + * @return true indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return false indicates no change in ctx attributes state of the runpool. + */ +bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * Retain all attributes of an atom + * + * This occurs on adding an atom to a context + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + */ +void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); + +/** + * Release all attributes of an atom, given its retained state. + * + * This occurs after (permanently) removing an atom from a context + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + * + * This is a no-op when \a katom_retained_state is invalid. + * + * @return true indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return false indicates no change in ctx attributes state of the runpool. + */ +bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); + +/** + * Requires: + * - runpool_irq spinlock + */ +static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_device_data *js_devdata; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_devdata = &kbdev->js_data; + + return js_devdata->runpool_irq.ctx_attr_ref_count[attribute]; +} + +/** + * Requires: + * - runpool_irq spinlock + */ +static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) +{ + /* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */ + return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute); +} + +/** + * Requires: + * - jsctx mutex + */ +static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_kctx_info = &kctx->jctx.sched_info; + + /* In general, attributes are 'on' when they have a refcount (which should never be < 0) */ + return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]); +} + + /** @} *//* end group kbase_js */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_JS_DEFS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_kinstr_jm.c b/drivers/gpu/arm/b_r26p0/mali_kbase_kinstr_jm.c new file mode 100644 index 000000000000..1e91a7cd7d36 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_kinstr_jm.c @@ -0,0 +1,896 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * mali_kbase_kinstr_jm.c + * Kernel driver public interface to job manager atom tracing + */ + +#include "mali_kbase_kinstr_jm.h" +#include "mali_kbase_kinstr_jm_reader.h" + +#include "mali_kbase.h" +#include "mali_kbase_linux.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if KERNEL_VERSION(5, 1, 0) <= LINUX_VERSION_CODE +#include +#else +// Stringify the expression if no message is given. +#define static_assert(e, ...) __static_assert(e, #__VA_ARGS__, #e) +#define __static_assert(e, msg, ...) _Static_assert(e, msg) +#endif + +#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE +typedef unsigned int __poll_t; +#endif + +#ifndef ENOTSUP +#define ENOTSUP EOPNOTSUPP +#endif + +/* The module printing prefix */ +#define PR_ "mali_kbase_kinstr_jm: " + +/* Allows us to perform ASM goto for the tracing + * https://www.kernel.org/doc/Documentation/static-keys.txt + */ +#if KERNEL_VERSION(4, 3, 0) <= LINUX_VERSION_CODE +DEFINE_STATIC_KEY_FALSE(basep_kinstr_jm_reader_static_key); +#else +struct static_key basep_kinstr_jm_reader_static_key = STATIC_KEY_INIT_FALSE; +#define static_branch_inc(key) static_key_slow_inc(key) +#define static_branch_dec(key) static_key_slow_dec(key) +#endif /* KERNEL_VERSION(4 ,3, 0) <= LINUX_VERSION_CODE */ + +#define KBASE_KINSTR_JM_VERSION 1 + +/** + * struct kbase_kinstr_jm - The context for the kernel job manager atom tracing + * @readers: a bitlocked list of opened readers. Readers are attached to the + * private data of a file descriptor that the user opens with the + * KBASE_IOCTL_KINSTR_JM_FD IO control call. + * @refcount: reference count for the context. Any reader will have a link + * back to the context so that they can remove themselves from the + * list. + * + * This is opaque outside this compilation unit + */ +struct kbase_kinstr_jm { + struct hlist_bl_head readers; + struct kref refcount; +}; + +/** + * struct kbase_kinstr_jm_atom_state_change - Represents an atom changing to a + * new state + * @timestamp: Raw monotonic nanoseconds of the state change + * @state: The state that the atom has moved to + * @atom: The atom number that has changed state + * @flags: Flags associated with the state change. See + * KBASE_KINSTR_JM_ATOM_STATE_FLAG_* defines. + * @reserved: Reserved for future use. + * @data: Extra data for the state change. Active member depends on state. + * + * We can add new fields to the structure and old user code will gracefully + * ignore the new fields. + * + * We can change the size of the structure and old user code will gracefully + * skip over the new size via `struct kbase_kinstr_jm_fd_out->size`. + * + * If we remove fields, the version field in `struct + * kbase_kinstr_jm_fd_out->version` will be incremented and old user code will + * gracefully fail and tell the user that the kernel API is too new and has + * backwards-incompatible changes. Note that one userspace can opt to handle + * multiple kernel major versions of the structure. + * + * If we need to change the _meaning_ of one of the fields, i.e. the state + * machine has had a incompatible change, we can keep the same members in the + * structure and update the version as above. User code will no longer + * recognise that it has the supported field and can gracefully explain to the + * user that the kernel API is no longer supported. + * + * When making changes to this structure, make sure they are either: + * - additions to the end (for minor version bumps (i.e. only a size increase)) + * such that the layout of existing fields doesn't change, or; + * - update the version reported to userspace so that it can fail explicitly. + */ +struct kbase_kinstr_jm_atom_state_change { + u64 timestamp; + s8 state; /* enum kbase_kinstr_jm_reader_atom_state */ + u8 atom; + u8 flags; + u8 reserved[1]; + /* Tagged union based on state. Ensure members are aligned correctly! */ + union { + struct { + u8 slot; + } start; + u8 padding[4]; + } data; +}; +static_assert( + ((1 << 8 * sizeof(((struct kbase_kinstr_jm_atom_state_change *)0)->state)) - 1) >= + KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT); + +#define KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW BIT(0) + +/** + * struct reader_changes - The circular buffer of kernel atom state changes + * @data: The allocated buffer. This is allocated when the user requests + * the reader file descriptor. It is released when the user calls + * close() on the fd. When accessing this, lock the producer spin + * lock to prevent races on the allocated memory. The consume lock + * does not need to be held because newly-inserted data will always + * be outside the currenly-read range. + * @producer: The producing spinlock which allows us to push changes into the + * buffer at the same time as a user read occurring. This needs to + * be locked when saving/restoring the IRQ because we can receive an + * interrupt from the GPU when an atom completes. The CPU could have + * a task preempted that is holding this lock. + * @consumer: The consuming mutex which locks around the user read(). + * Must be held when updating the tail of the circular buffer. + * @head: The head of the circular buffer. Can be used with Linux @c CIRC_ + * helpers. The producer should lock and update this with an SMP + * store when a new change lands. The consumer can read with an + * SMP load. This allows the producer to safely insert new changes + * into the circular buffer. + * @tail: The tail of the circular buffer. Can be used with Linux @c CIRC_ + * helpers. The producer should do a READ_ONCE load and the consumer + * should SMP store. + * @size: The number of changes that are allowed in @c data. Can be used + * with Linux @c CIRC_ helpers. Will always be a power of two. The + * producer lock should be held when updating this and stored with + * an SMP release memory barrier. This means that the consumer can + * do an SMP load. + * @threshold: The number of changes above which threads polling on the reader + * file descriptor will be woken up. + */ +struct reader_changes { + struct kbase_kinstr_jm_atom_state_change *data; + spinlock_t producer; + struct mutex consumer; + u32 head; + u32 tail; + u32 size; + u32 threshold; +}; + +/** + * reader_changes_is_valid_size() - Determines if requested changes buffer size + * is valid. + * @size: The requested memory size + * + * We have a constraint that the underlying physical buffer must be a + * power of two so that we can use the efficient circular buffer helpers that + * the kernel provides. It also needs to be representable within a u32. + * + * Return: + * * true - the size is valid + * * false - the size is invalid + */ +static inline bool reader_changes_is_valid_size(const size_t size) +{ + typedef struct reader_changes changes_t; + const size_t elem_size = sizeof(*((changes_t *)0)->data); + const size_t size_size = sizeof(((changes_t *)0)->size); + const size_t size_max = (1ull << (size_size * 8)) - 1; + + return is_power_of_2(size) && /* Is a power of two */ + ((size / elem_size) <= size_max); /* Small enough */ +} + +/** + * reader_changes_init() - Initializes the reader changes and allocates the + * changes buffer + * @changes: The context pointer, must point to a zero-inited allocated reader + * changes structure. We may support allocating the structure in the + * future. + * @size: The requested changes buffer size + * + * Return: + * (0, U16_MAX] - the number of data elements allocated + * -EINVAL - a pointer was invalid + * -ENOTSUP - we do not support allocation of the context + * -ERANGE - the requested memory size was invalid + * -ENOMEM - could not allocate the memory + * -EADDRINUSE - the buffer memory was already allocated + */ +static int reader_changes_init(struct reader_changes *const changes, + const size_t size) +{ + BUILD_BUG_ON((PAGE_SIZE % sizeof(*changes->data)) != 0); + + if (!reader_changes_is_valid_size(size)) { + pr_warn(PR_ "invalid size %zu\n", size); + return -ERANGE; + } + + changes->data = vmalloc(size); + if (!changes->data) + return -ENOMEM; + + spin_lock_init(&changes->producer); + mutex_init(&changes->consumer); + + changes->size = size / sizeof(*changes->data); + changes->threshold = min(((size_t)(changes->size)) / 4, + ((size_t)(PAGE_SIZE)) / sizeof(*changes->data)); + + return changes->size; +} + +/** + * reader_changes_term() - Cleans up a reader changes structure + * @changes: The context to clean up + * + * Releases the allocated state changes memory + */ +static void reader_changes_term(struct reader_changes *const changes) +{ + struct kbase_kinstr_jm_atom_state_change *data = NULL; + unsigned long irq; + + /* + * Although changes->data is used on the consumer side, too, no active + * consumer is possible by the time we clean up the reader changes, so + * no need to take the consumer lock. However, we do need the producer + * lock because the list removal can race with list traversal. + */ + spin_lock_irqsave(&changes->producer, irq); + swap(changes->data, data); + spin_unlock_irqrestore(&changes->producer, irq); + + mutex_destroy(&changes->consumer); + vfree(data); +} + +/** + * reader_changes_count_locked() - Retrieves the count of state changes from the + * tail to the physical end of the buffer + * @changes: The state changes context + * + * The consumer mutex must be held. Uses the CIRC_CNT_TO_END macro to + * determine the count, so there may be more items. However, that's the maximum + * number that can be read in one contiguous read. + * + * Return: the number of changes in the circular buffer until the end of the + * allocation + */ +static u32 reader_changes_count_locked(struct reader_changes *const changes) +{ + u32 head; + + lockdep_assert_held_once(&changes->consumer); + + head = smp_load_acquire(&changes->head); + + return CIRC_CNT_TO_END(head, changes->tail, changes->size); +} + +/** + * reader_changes_count() - Retrieves the count of state changes from the + * tail to the physical end of the buffer + * @changes: The state changes context + * + * Return: the number of changes in the circular buffer until the end of the + * allocation + */ +static u32 reader_changes_count(struct reader_changes *const changes) +{ + u32 ret; + + mutex_lock(&changes->consumer); + ret = reader_changes_count_locked(changes); + mutex_unlock(&changes->consumer); + return ret; +} + +/** + * reader_changes_push() - Pushes a change into the reader circular buffer. + * @changes: The buffer to insert the change into + * @change: Kernel atom change to insert + * @wait_queue: The queue to be kicked when changes should be read from + * userspace. Kicked when a threshold is reached or there is + * overflow. + */ +static void reader_changes_push( + struct reader_changes *const changes, + const struct kbase_kinstr_jm_atom_state_change *const change, + wait_queue_head_t *const wait_queue) +{ + u32 head, tail, size, space; + unsigned long irq; + struct kbase_kinstr_jm_atom_state_change *data; + + spin_lock_irqsave(&changes->producer, irq); + + /* We may be called for a reader_changes that's awaiting cleanup. */ + data = changes->data; + if (!data) + goto unlock; + + size = changes->size; + head = changes->head; + tail = smp_load_acquire(&changes->tail); + + space = CIRC_SPACE(head, tail, size); + if (space >= 1) { + data[head] = *change; + if (space == 1) { + data[head].flags |= + KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW; + pr_warn(PR_ "overflow of circular buffer\n"); + } + smp_store_release(&changes->head, (head + 1) & (size - 1)); + } + + /* Wake for either overflow or over-threshold cases. */ + if (CIRC_CNT(head + 1, tail, size) >= changes->threshold) + wake_up_interruptible(wait_queue); + +unlock: + spin_unlock_irqrestore(&changes->producer, irq); +} + +/** + * struct reader - Allows the kernel state changes to be read by user space. + * @node: The node in the @c readers locked list + * @rcu_head: storage for the RCU callback to free this reader (see kfree_rcu) + * @changes: The circular buffer of user changes + * @wait_queue: A wait queue for poll + * @context: a pointer to the parent context that created this reader. Can be + * used to remove the reader from the list of readers. Reference + * counted. + * + * The reader is a circular buffer in kernel space. State changes are pushed + * into the buffer. The flow from user space is: + * + * * Request file descriptor with KBASE_IOCTL_KINSTR_JM_FD. This will + * allocate the kernel side circular buffer with a size specified in the + * ioctl argument. + * * The user will then poll the file descriptor for data + * * Upon receiving POLLIN, perform a read() on the file descriptor to get + * the data out. + * * The buffer memory will be freed when the file descriptor is closed + */ +struct reader { + struct hlist_bl_node node; + struct rcu_head rcu_head; + struct reader_changes changes; + wait_queue_head_t wait_queue; + struct kbase_kinstr_jm *context; +}; + +static struct kbase_kinstr_jm * +kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx); +static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx); +static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx, + struct reader *const reader); +static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx, + struct reader *const reader); + +/** + * reader_term() - Terminate a instrumentation job manager reader context. + * @reader: Pointer to context to be terminated. + */ +static void reader_term(struct reader *const reader) +{ + if (!reader) + return; + + kbase_kinstr_jm_readers_del(reader->context, reader); + reader_changes_term(&reader->changes); + kbase_kinstr_jm_ref_put(reader->context); + + kfree_rcu(reader, rcu_head); +} + +/** + * reader_init() - Initialise a instrumentation job manager reader context. + * @out_reader: Non-NULL pointer to where the pointer to the created context + * will be stored on success. + * @ctx: the pointer to the parent context. Reference count will be + * increased if initialization is successful + * @num_changes: The number of changes to allocate a buffer for + * + * Return: 0 on success, else error code. + */ +static int reader_init(struct reader **const out_reader, + struct kbase_kinstr_jm *const ctx, + size_t const num_changes) +{ + struct reader *reader = NULL; + const size_t change_size = sizeof(struct kbase_kinstr_jm_atom_state_change); + int status; + + if (!out_reader || !ctx || !num_changes) + return -EINVAL; + + reader = kzalloc(sizeof(*reader), GFP_KERNEL); + if (!reader) + return -ENOMEM; + + INIT_HLIST_BL_NODE(&reader->node); + init_waitqueue_head(&reader->wait_queue); + + reader->context = kbase_kinstr_jm_ref_get(ctx); + + status = reader_changes_init(&reader->changes, num_changes * change_size); + if (status < 0) + goto fail; + + status = kbase_kinstr_jm_readers_add(ctx, reader); + if (status < 0) + goto fail; + + *out_reader = reader; + + return 0; + +fail: + kbase_kinstr_jm_ref_put(reader->context); + kfree(reader); + return status; +} + +/** + * reader_release() - Invoked when the reader file descriptor is released + * @node: The inode that the file descriptor that the file corresponds to. In + * our case our reader file descriptor is backed by an anonymous node so + * not much is in this. + * @file: the file data. Our reader context is held in the private data + * Return: zero on success + */ +static int reader_release(struct inode *const node, struct file *const file) +{ + struct reader *const reader = file->private_data; + + reader_term(reader); + file->private_data = NULL; + + return 0; +} + +/** + * reader_changes_copy_to_user() - Copy any changes from a changes structure to + * the user-provided buffer. + * @changes: The changes structure from which to copy. + * @buffer: The user buffer to copy the data to. + * @buffer_size: The number of bytes in the buffer. + * Return: The number of bytes copied or negative errno on failure. + */ +static ssize_t reader_changes_copy_to_user(struct reader_changes *const changes, + char __user *buffer, + size_t buffer_size) +{ + ssize_t ret = 0; + struct kbase_kinstr_jm_atom_state_change const *src_buf = READ_ONCE( + changes->data); + size_t const entry_size = sizeof(*src_buf); + size_t changes_tail, changes_count, read_size; + + /* Needed for the quick buffer capacity calculation below. + * Note that we can't use is_power_of_2() since old compilers don't + * understand it's a constant expression. + */ +#define is_power_of_two(x) ((x) && !((x) & ((x) - 1))) + static_assert(is_power_of_two( + sizeof(struct kbase_kinstr_jm_atom_state_change))); +#undef is_power_of_two + + lockdep_assert_held_once(&changes->consumer); + + /* Read continuously until either: + * - we've filled the output buffer, or + * - there are no changes when we check. + * + * If more changes arrive while we're copying to the user, we can copy + * those as well, space permitting. + */ + do { + changes_tail = changes->tail; + changes_count = reader_changes_count_locked(changes); + read_size = min(changes_count * entry_size, + buffer_size & ~(entry_size - 1)); + + if (!read_size) + break; + + if (copy_to_user(buffer, &(src_buf[changes_tail]), read_size)) + return -EFAULT; + + buffer += read_size; + buffer_size -= read_size; + ret += read_size; + changes_tail = (changes_tail + read_size / entry_size) & + (changes->size - 1); + smp_store_release(&changes->tail, changes_tail); + } while (read_size); + + return ret; +} + +/** + * reader_read() - Handles a read call on the reader file descriptor + * + * @filp: The file that the read was performed on + * @buffer: The destination buffer + * @buffer_size: The maximum number of bytes to read + * @offset: The offset into the 'file' to read from. + * + * Note the destination buffer needs to be fully mapped in userspace or the read + * will fault. + * + * Return: + * * The number of bytes read or: + * * -EBADF - the file descriptor did not have an attached reader + * * -EFAULT - memory access fault + * * -EAGAIN - if the file is set to nonblocking reads with O_NONBLOCK and there + * is no data available + * + * Note: The number of bytes read will always be a multiple of the size of an + * entry. + */ +static ssize_t reader_read(struct file *const filp, + char __user *const buffer, + size_t const buffer_size, + loff_t *const offset) +{ + struct reader *const reader = filp->private_data; + struct reader_changes *changes; + ssize_t ret; + + if (!reader) + return -EBADF; + + if (buffer_size < sizeof(struct kbase_kinstr_jm_atom_state_change)) + return -ENOBUFS; + +#if KERNEL_VERSION(5, 0, 0) <= LINUX_VERSION_CODE + if (!access_ok(buffer, buffer_size)) + return -EIO; +#else + if (!access_ok(VERIFY_WRITE, buffer, buffer_size)) + return -EIO; +#endif + + changes = &reader->changes; + + mutex_lock(&changes->consumer); + if (!reader_changes_count_locked(changes)) { + if (filp->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + goto exit; + } + + if (wait_event_interruptible( + reader->wait_queue, + !!reader_changes_count_locked(changes))) { + ret = -EINTR; + goto exit; + } + } + + ret = reader_changes_copy_to_user(changes, buffer, buffer_size); + +exit: + mutex_unlock(&changes->consumer); + return ret; +} + +/** + * reader_poll() - Handles a poll call on the reader file descriptor + * @file: The file that the poll was performed on + * @wait: The poll table + * + * The results of the poll will be unreliable if there is no mapped memory as + * there is no circular buffer to push atom state changes into. + * + * Return: + * * 0 - no data ready + * * POLLIN - state changes have been buffered + * * -EBADF - the file descriptor did not have an attached reader + * * -EINVAL - the IO control arguments were invalid + */ +static __poll_t reader_poll(struct file *const file, + struct poll_table_struct *const wait) +{ + struct reader *reader; + struct reader_changes *changes; + + if (unlikely(!file || !wait)) + return -EINVAL; + + reader = file->private_data; + if (unlikely(!reader)) + return -EBADF; + + changes = &reader->changes; + + if (reader_changes_count(changes) >= changes->threshold) + return POLLIN; + + poll_wait(file, &reader->wait_queue, wait); + + return (reader_changes_count(changes) > 0) ? POLLIN : 0; +} + +/* The file operations virtual function table */ +static const struct file_operations file_operations = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = reader_read, + .poll = reader_poll, + .release = reader_release +}; + +/* The maximum amount of readers that can be created on a context. */ +static const size_t kbase_kinstr_jm_readers_max = 16; + +/** + * kbasep_kinstr_jm_release() - Invoked when the reference count is dropped + * @ref: the context reference count + */ +static void kbase_kinstr_jm_release(struct kref *const ref) +{ + struct kbase_kinstr_jm *const ctx = + container_of(ref, struct kbase_kinstr_jm, refcount); + + kfree(ctx); +} + +/** + * kbase_kinstr_jm_ref_get() - Reference counts the instrumentation context + * @ctx: the context to reference count + * Return: the reference counted context + */ +static struct kbase_kinstr_jm * +kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx) +{ + if (likely(ctx)) + kref_get(&ctx->refcount); + return ctx; +} + +/** + * kbase_kinstr_jm_ref_put() - Dereferences the instrumentation context + * @ctx: the context to lower the reference count on + */ +static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx) +{ + if (likely(ctx)) + kref_put(&ctx->refcount, kbase_kinstr_jm_release); +} + +/** + * kbase_kinstr_jm_readers_add() - Adds a reader to the list of readers + * @ctx: the instrumentation context + * @reader: the reader to add + * + * Return: + * 0 - success + * -ENOMEM - too many readers already added. + */ +static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx, + struct reader *const reader) +{ + struct hlist_bl_head *const readers = &ctx->readers; + struct hlist_bl_node *node; + struct reader *temp; + size_t count = 0; + + hlist_bl_lock(readers); + + hlist_bl_for_each_entry_rcu(temp, node, readers, node) + ++count; + + if (kbase_kinstr_jm_readers_max < count) { + hlist_bl_unlock(readers); + return -ENOMEM; + } + + hlist_bl_add_head_rcu(&reader->node, readers); + + hlist_bl_unlock(readers); + + static_branch_inc(&basep_kinstr_jm_reader_static_key); + + return 0; +} + +/** + * readers_del() - Deletes a reader from the list of readers + * @ctx: the instrumentation context + * @reader: the reader to delete + */ +static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx, + struct reader *const reader) +{ + struct hlist_bl_head *const readers = &ctx->readers; + + hlist_bl_lock(readers); + hlist_bl_del_rcu(&reader->node); + hlist_bl_unlock(readers); + + static_branch_dec(&basep_kinstr_jm_reader_static_key); +} + +int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, + union kbase_kinstr_jm_fd *jm_fd_arg) +{ + struct kbase_kinstr_jm_fd_in const *in; + struct reader *reader; + size_t const change_size = sizeof(struct + kbase_kinstr_jm_atom_state_change); + int status; + int fd; + int i; + + if (!ctx || !jm_fd_arg) + return -EINVAL; + + in = &jm_fd_arg->in; + + if (!is_power_of_2(in->count)) + return -EINVAL; + + for (i = 0; i < sizeof(in->padding); ++i) + if (in->padding[i]) + return -EINVAL; + + status = reader_init(&reader, ctx, in->count); + if (status < 0) + return status; + + jm_fd_arg->out.version = KBASE_KINSTR_JM_VERSION; + jm_fd_arg->out.size = change_size; + memset(&jm_fd_arg->out.padding, 0, sizeof(jm_fd_arg->out.padding)); + + fd = anon_inode_getfd("[mali_kinstr_jm]", &file_operations, reader, + O_CLOEXEC); + if (fd < 0) + reader_term(reader); + + return fd; +} + +int kbase_kinstr_jm_init(struct kbase_kinstr_jm **const out_ctx) +{ + struct kbase_kinstr_jm *ctx = NULL; + + if (!out_ctx) + return -EINVAL; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + INIT_HLIST_BL_HEAD(&ctx->readers); + kref_init(&ctx->refcount); + + *out_ctx = ctx; + + return 0; +} + +void kbase_kinstr_jm_term(struct kbase_kinstr_jm *const ctx) +{ + kbase_kinstr_jm_ref_put(ctx); +} + +void kbasep_kinstr_jm_atom_state( + struct kbase_jd_atom *const katom, + const enum kbase_kinstr_jm_reader_atom_state state) +{ + struct kbase_context *const kctx = katom->kctx; + struct kbase_kinstr_jm *const ctx = kctx->kinstr_jm; + const u8 id = kbase_jd_atom_id(kctx, katom); + struct kbase_kinstr_jm_atom_state_change change = { + .timestamp = ktime_get_raw_ns(), .atom = id, .state = state + }; + struct reader *reader; + struct hlist_bl_node *node; + + WARN(KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT < state || 0 > state, + PR_ "unsupported katom (%u) state (%i)", id, state); + + switch (state) { + case KBASE_KINSTR_JM_READER_ATOM_STATE_START: + change.data.start.slot = katom->jobslot; + break; + default: + break; + } + + rcu_read_lock(); + hlist_bl_for_each_entry_rcu(reader, node, &ctx->readers, node) + reader_changes_push( + &reader->changes, &change, &reader->wait_queue); + rcu_read_unlock(); +} + +KBASE_EXPORT_TEST_API(kbasep_kinstr_jm_atom_state); + +void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const katom) +{ + struct kbase_context *const kctx = katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + const int slot = katom->slot_nr; + struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0); + + BUILD_BUG_ON(SLOT_RB_SIZE != 2); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS)) + return; + if (WARN_ON(!submitted)) + return; + + if (submitted == katom) + kbase_kinstr_jm_atom_state_start(katom); +} + +void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const katom) +{ + struct kbase_context *const kctx = katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + const int slot = katom->slot_nr; + struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0); + struct kbase_jd_atom *const queued = kbase_gpu_inspect(kbdev, slot, 1); + + BUILD_BUG_ON(SLOT_RB_SIZE != 2); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS)) + return; + if (WARN_ON(!submitted)) + return; + if (WARN_ON((submitted != katom) && (queued != katom))) + return; + + if (queued == katom) + return; + + if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) + kbase_kinstr_jm_atom_state_stop(katom); + if (queued && queued->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) + kbase_kinstr_jm_atom_state_start(queued); +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_kinstr_jm.h b/drivers/gpu/arm/b_r26p0/mali_kbase_kinstr_jm.h new file mode 100644 index 000000000000..555edfeef77c --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_kinstr_jm.h @@ -0,0 +1,283 @@ +/* + * + * (C) COPYRIGHT 2019,2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * mali_kbase_kinstr_jm.h + * Kernel driver public interface to job manager atom tracing. This API provides + * a method to get the atom state changes into user space. + * + * The flow of operation is: + * + * | kernel | user | + * | ----------------------------------- | ----------------------------------- | + * | Initialize API with | | + * | kbase_kinstr_jm_init() | | + * | | | + * | Kernel code injects states with | | + * | kbase_kinstr_jm_atom_state_*() APIs | | + * | | Call ioctl() to get file descriptor | + * | | via KBASE_IOCTL_KINSTR_JM_FD | + * | Allocates a reader attached to FD | | + * | Allocates circular buffer and | | + * | patches, via ASM goto, the | | + * | kbase_kinstr_jm_atom_state_*() | | + * | | loop: | + * | | Call poll() on FD for POLLIN | + * | When threshold of changes is hit, | | + * | the poll is interrupted with | | + * | POLLIN. If circular buffer is | | + * | full then store the missed count | | + * | and interrupt poll | Call read() to get data from | + * | | circular buffer via the fd | + * | Kernel advances tail of circular | | + * | buffer | | + * | | Close file descriptor | + * | Deallocates circular buffer | | + * | | | + * | Terminate API with | | + * | kbase_kinstr_jm_term() | | + * + * All tracepoints are guarded on a static key. The static key is activated when + * a user space reader gets created. This means that there is negligible cost + * inserting the tracepoints into code when there are no readers. + */ + +#ifndef _KBASE_KINSTR_JM_H_ +#define _KBASE_KINSTR_JM_H_ + +#include "mali_kbase_kinstr_jm_reader.h" + +#ifdef __KERNEL__ +#include +#include +#else +/* empty wrapper macros for userspace */ +#define static_branch_unlikely(key) (1) +#define KERNEL_VERSION(a, b, c) (0) +#define LINUX_VERSION_CODE (1) +#endif /* __KERNEL__ */ + +/* Forward declarations */ +struct kbase_context; +struct kbase_kinstr_jm; +struct kbase_jd_atom; +union kbase_kinstr_jm_fd; + +/** + * kbase_kinstr_jm_init() - Initialise an instrumentation job manager context. + * @ctx: Non-NULL pointer to where the pointer to the created context will + * be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_kinstr_jm_init(struct kbase_kinstr_jm **ctx); + +/** + * kbase_kinstr_jm_term() - Terminate an instrumentation job manager context. + * @ctx: Pointer to context to be terminated. + */ +void kbase_kinstr_jm_term(struct kbase_kinstr_jm *ctx); + +/** + * kbase_kinstr_jm_get_fd() - Retrieves a file descriptor that can be used to + * read the atom state changes from userspace + * + * @ctx: Pointer to the initialized context + * @jm_fd_arg: Pointer to the union containing the in/out params + * Return: -1 on failure, valid file descriptor on success + */ +int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, + union kbase_kinstr_jm_fd *jm_fd_arg); + +/** + * kbasep_kinstr_jm_atom_state() - Signifies that an atom has changed state + * @atom: The atom that has changed state + * @state: The new state of the atom + * + * This performs the actual storage of the state ready for user space to + * read the data. It is only called when the static key is enabled from + * kbase_kinstr_jm_atom_state(). There is almost never a need to invoke this + * function directly. + */ +void kbasep_kinstr_jm_atom_state( + struct kbase_jd_atom *const atom, + const enum kbase_kinstr_jm_reader_atom_state state); + +/* Allows ASM goto patching to reduce tracing overhead. This is + * incremented/decremented when readers are created and terminated. This really + * shouldn't be changed externally, but if you do, make sure you use + * a static_key_inc()/static_key_dec() pair. + */ +#if KERNEL_VERSION(4, 3, 0) <= LINUX_VERSION_CODE +extern struct static_key_false basep_kinstr_jm_reader_static_key; +#else +/* Pre-4.3 kernels have a different API for static keys, but work + * mostly the same with less type safety. */ +extern struct static_key basep_kinstr_jm_reader_static_key; +#define static_branch_unlikely(key) static_key_false(key) +#endif /* KERNEL_VERSION(4, 3, 0) <= LINUX_VERSION_CODE */ + +/** + * kbase_kinstr_jm_atom_state() - Signifies that an atom has changed state + * @atom: The atom that has changed state + * @state: The new state of the atom + * + * This uses a static key to reduce overhead when tracing is disabled + */ +static inline void kbase_kinstr_jm_atom_state( + struct kbase_jd_atom *const atom, + const enum kbase_kinstr_jm_reader_atom_state state) +{ + if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) + kbasep_kinstr_jm_atom_state(atom, state); +} + +/** + * kbase_kinstr_jm_atom_state_queue() - Signifies that an atom has entered a + * hardware or software queue. + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_state_queue( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state( + atom, KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE); +} + +/** + * kbase_kinstr_jm_atom_state_start() - Signifies that work has started on an + * atom + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_state_start( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state( + atom, KBASE_KINSTR_JM_READER_ATOM_STATE_START); +} + +/** + * kbase_kinstr_jm_atom_state_stop() - Signifies that work has stopped on an + * atom + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_state_stop( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state( + atom, KBASE_KINSTR_JM_READER_ATOM_STATE_STOP); +} + +/** + * kbase_kinstr_jm_atom_state_complete() - Signifies that all work has completed + * on an atom + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_state_complete( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state( + atom, KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE); +} + +/** + * kbase_kinstr_jm_atom_queue() - A software *or* hardware atom is queued for + * execution + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_queue(struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state_queue(atom); +} + +/** + * kbase_kinstr_jm_atom_complete() - A software *or* hardware atom is fully + * completed + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_complete( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state_complete(atom); +} + +/** + * kbase_kinstr_jm_atom_sw_start() - A software atom has started work + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_sw_start( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state_start(atom); +} + +/** + * kbase_kinstr_jm_atom_sw_stop() - A software atom has stopped work + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_sw_stop( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state_stop(atom); +} + +/** + * kbasep_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted + * @atom: The atom that has been submitted + * + * This private implementation should not be called directly, it is protected + * by a static key in kbase_kinstr_jm_atom_hw_submit(). Use that instead. + */ +void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const atom); + +/** + * kbase_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted + * @atom: The atom that has been submitted + */ +static inline void kbase_kinstr_jm_atom_hw_submit( + struct kbase_jd_atom *const atom) +{ + if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) + kbasep_kinstr_jm_atom_hw_submit(atom); +} + +/** + * kbasep_kinstr_jm_atom_hw_release() - A hardware atom has been released + * @atom: The atom that has been released + * + * This private implementation should not be called directly, it is protected + * by a static key in kbase_kinstr_jm_atom_hw_release(). Use that instead. + */ +void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const atom); + +/** + * kbase_kinstr_jm_atom_hw_release() - A hardware atom has been released + * @atom: The atom that has been released + */ +static inline void kbase_kinstr_jm_atom_hw_release( + struct kbase_jd_atom *const atom) +{ + if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) + kbasep_kinstr_jm_atom_hw_release(atom); +} + +#endif /* _KBASE_KINSTR_JM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_kinstr_jm_reader.h b/drivers/gpu/arm/b_r26p0/mali_kbase_kinstr_jm_reader.h new file mode 100644 index 000000000000..e267e6bc44de --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_kinstr_jm_reader.h @@ -0,0 +1,70 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * mali_kbase_kinstr_jm_reader.h + * Provides an ioctl API to read kernel atom state changes. The flow of the + * API is: + * 1. Obtain the file descriptor with ``KBASE_IOCTL_KINSTR_JM_FD`` + * 2. Determine the buffer structure layout via the above ioctl's returned + * size and version fields in ``struct kbase_kinstr_jm_fd_out`` + * 4. Poll the file descriptor for ``POLLIN`` + * 5. Get data with read() on the fd + * 6. Use the structure version to understand how to read the data from the + * buffer + * 7. Repeat 4-6 + * 8. Close the file descriptor + */ + +#ifndef _KBASE_KINSTR_JM_READER_H_ +#define _KBASE_KINSTR_JM_READER_H_ + +/** + * enum kbase_kinstr_jm_reader_atom_state - Determines the work state of an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE: Signifies that an atom has + * entered a hardware queue + * @KBASE_KINSTR_JM_READER_ATOM_STATE_START: Signifies that work has started + * on an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_STOP: Signifies that work has stopped + * on an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE: Signifies that work has + * completed on an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT: The number of state enumerations + * + * We can add new states to the end of this if they do not break the existing + * state machine. Old user mode code can gracefully ignore states they do not + * understand. + * + * If we need to make a breaking change to the state machine, we can do that by + * changing the version reported by KBASE_IOCTL_KINSTR_JM_FD. This will + * mean that old user mode code will fail to understand the new state field in + * the structure and gracefully not use the state change API. + */ +enum kbase_kinstr_jm_reader_atom_state { + KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE, + KBASE_KINSTR_JM_READER_ATOM_STATE_START, + KBASE_KINSTR_JM_READER_ATOM_STATE_STOP, + KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE, + KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT +}; + +#endif /* _KBASE_KINSTR_JM_READER_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_linux.h b/drivers/gpu/arm/b_r26p0/mali_kbase_linux.h new file mode 100644 index 000000000000..003ac9e68a76 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_linux.h @@ -0,0 +1,48 @@ +/* + * + * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_linux.h + * Base kernel APIs, Linux implementation. + */ + +#ifndef _KBASE_LINUX_H_ +#define _KBASE_LINUX_H_ + +/* All things that are needed for the Linux port. */ +#include +#include +#include +#include +#include + +#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API)) + #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func) +#else + #define KBASE_EXPORT_TEST_API(func) +#endif + +#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func) + +#endif /* _KBASE_LINUX_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem.c b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.c new file mode 100644 index 000000000000..8cf7e5d59b09 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.c @@ -0,0 +1,4582 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Base kernel memory APIs + */ +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_OF +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Alignment of objects allocated by the GPU inside a just-in-time memory + * region whose size is given by an end address + * + * This is the alignment of objects allocated by the GPU, but possibly not + * fully written to. When taken into account with + * KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES it gives the maximum number of bytes + * that the JIT memory report size can exceed the actual backed memory size. + */ +#define KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES (128u) + +/* + * Maximum size of objects allocated by the GPU inside a just-in-time memory + * region whose size is given by an end address + * + * This is the maximum size of objects allocated by the GPU, but possibly not + * fully written to. When taken into account with + * KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES it gives the maximum number of bytes + * that the JIT memory report size can exceed the actual backed memory size. + */ +#define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u) + + +/* Forward declarations */ +static void free_partial_locked(struct kbase_context *kctx, + struct kbase_mem_pool *pool, struct tagged_addr tp); + +static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) +{ +#if defined(CONFIG_ARM64) + /* VA_BITS can be as high as 48 bits, but all bits are available for + * both user and kernel. + */ + size_t cpu_va_bits = VA_BITS; +#elif defined(CONFIG_X86_64) + /* x86_64 can access 48 bits of VA, but the 48th is used to denote + * kernel (1) vs userspace (0), so the max here is 47. + */ + size_t cpu_va_bits = 47; +#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32) + size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE; +#else +#error "Unknown CPU VA width for this architecture" +#endif + +#ifdef CONFIG_64BIT + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + cpu_va_bits = 32; +#endif + + return cpu_va_bits; +} + +/* This function finds out which RB tree the given pfn from the GPU VA belongs + * to based on the memory zone the pfn refers to */ +static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, + u64 gpu_pfn) +{ + struct rb_root *rbtree = NULL; + + /* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA + * zone if this has been initialized. + */ + if (gpu_pfn >= kctx->exec_va_start) + rbtree = &kctx->reg_rbtree_exec; + else { + u64 same_va_end; + +#ifdef CONFIG_64BIT + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) +#endif /* CONFIG_64BIT */ + same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; +#ifdef CONFIG_64BIT + else + same_va_end = kctx->same_va_end; +#endif /* CONFIG_64BIT */ + + if (gpu_pfn >= same_va_end) + rbtree = &kctx->reg_rbtree_custom; + else + rbtree = &kctx->reg_rbtree_same; + } + + return rbtree; +} + +/* This function inserts a region into the tree. */ +static void kbase_region_tracker_insert(struct kbase_va_region *new_reg) +{ + u64 start_pfn = new_reg->start_pfn; + struct rb_node **link = NULL; + struct rb_node *parent = NULL; + struct rb_root *rbtree = NULL; + + rbtree = new_reg->rbtree; + + link = &(rbtree->rb_node); + /* Find the right place in the tree using tree search */ + while (*link) { + struct kbase_va_region *old_reg; + + parent = *link; + old_reg = rb_entry(parent, struct kbase_va_region, rblink); + + /* RBTree requires no duplicate entries. */ + KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn); + + if (old_reg->start_pfn > start_pfn) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + /* Put the new node there, and rebalance tree */ + rb_link_node(&(new_reg->rblink), parent, link); + + rb_insert_color(&(new_reg->rblink), rbtree); +} + +static struct kbase_va_region *find_region_enclosing_range_rbtree( + struct rb_root *rbtree, u64 start_pfn, size_t nr_pages) +{ + struct rb_node *rbnode; + struct kbase_va_region *reg; + u64 end_pfn = start_pfn + nr_pages; + + rbnode = rbtree->rb_node; + + while (rbnode) { + u64 tmp_start_pfn, tmp_end_pfn; + + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + tmp_start_pfn = reg->start_pfn; + tmp_end_pfn = reg->start_pfn + reg->nr_pages; + + /* If start is lower than this, go left. */ + if (start_pfn < tmp_start_pfn) + rbnode = rbnode->rb_left; + /* If end is higher than this, then go right. */ + else if (end_pfn > tmp_end_pfn) + rbnode = rbnode->rb_right; + else /* Enclosing */ + return reg; + } + + return NULL; +} + +struct kbase_va_region *kbase_find_region_enclosing_address( + struct rb_root *rbtree, u64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_node *rbnode; + struct kbase_va_region *reg; + + rbnode = rbtree->rb_node; + + while (rbnode) { + u64 tmp_start_pfn, tmp_end_pfn; + + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + tmp_start_pfn = reg->start_pfn; + tmp_end_pfn = reg->start_pfn + reg->nr_pages; + + /* If start is lower than this, go left. */ + if (gpu_pfn < tmp_start_pfn) + rbnode = rbnode->rb_left; + /* If end is higher than this, then go right. */ + else if (gpu_pfn >= tmp_end_pfn) + rbnode = rbnode->rb_right; + else /* Enclosing */ + return reg; + } + + return NULL; +} + +/* Find region enclosing given address. */ +struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( + struct kbase_context *kctx, u64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_root *rbtree = NULL; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + lockdep_assert_held(&kctx->reg_lock); + + rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); + + return kbase_find_region_enclosing_address(rbtree, gpu_addr); +} + +KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address); + +struct kbase_va_region *kbase_find_region_base_address( + struct rb_root *rbtree, u64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_node *rbnode = NULL; + struct kbase_va_region *reg = NULL; + + rbnode = rbtree->rb_node; + + while (rbnode) { + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + if (reg->start_pfn > gpu_pfn) + rbnode = rbnode->rb_left; + else if (reg->start_pfn < gpu_pfn) + rbnode = rbnode->rb_right; + else + return reg; + } + + return NULL; +} + +/* Find region with given base address */ +struct kbase_va_region *kbase_region_tracker_find_region_base_address( + struct kbase_context *kctx, u64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_root *rbtree = NULL; + + lockdep_assert_held(&kctx->reg_lock); + + rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); + + return kbase_find_region_base_address(rbtree, gpu_addr); +} + +KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address); + +/* Find region meeting given requirements */ +static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( + struct kbase_va_region *reg_reqs, + size_t nr_pages, size_t align_offset, size_t align_mask, + u64 *out_start_pfn) +{ + struct rb_node *rbnode = NULL; + struct kbase_va_region *reg = NULL; + struct rb_root *rbtree = NULL; + + /* Note that this search is a linear search, as we do not have a target + address in mind, so does not benefit from the rbtree search */ + rbtree = reg_reqs->rbtree; + + for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) { + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + if ((reg->nr_pages >= nr_pages) && + (reg->flags & KBASE_REG_FREE)) { + /* Check alignment */ + u64 start_pfn = reg->start_pfn; + + /* When align_offset == align, this sequence is + * equivalent to: + * (start_pfn + align_mask) & ~(align_mask) + * + * Otherwise, it aligns to n*align + offset, for the + * lowest value n that makes this still >start_pfn */ + start_pfn += align_mask; + start_pfn -= (start_pfn - align_offset) & (align_mask); + + if (!(reg_reqs->flags & KBASE_REG_GPU_NX)) { + /* Can't end at 4GB boundary */ + if (0 == ((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB)) + start_pfn += align_offset; + + /* Can't start at 4GB boundary */ + if (0 == (start_pfn & BASE_MEM_PFN_MASK_4GB)) + start_pfn += align_offset; + + if (!((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB) || + !(start_pfn & BASE_MEM_PFN_MASK_4GB)) + continue; + } else if (reg_reqs->flags & + KBASE_REG_GPU_VA_SAME_4GB_PAGE) { + u64 end_pfn = start_pfn + nr_pages - 1; + + if ((start_pfn & ~BASE_MEM_PFN_MASK_4GB) != + (end_pfn & ~BASE_MEM_PFN_MASK_4GB)) + start_pfn = end_pfn & ~BASE_MEM_PFN_MASK_4GB; + } + + if ((start_pfn >= reg->start_pfn) && + (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) && + ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) { + *out_start_pfn = start_pfn; + return reg; + } + } + } + + return NULL; +} + +/** + * @brief Remove a region object from the global list. + * + * The region reg is removed, possibly by merging with other free and + * compatible adjacent regions. It must be called with the context + * region lock held. The associated memory is not released (see + * kbase_free_alloced_region). Internal use only. + */ +int kbase_remove_va_region(struct kbase_va_region *reg) +{ + struct rb_node *rbprev; + struct kbase_va_region *prev = NULL; + struct rb_node *rbnext; + struct kbase_va_region *next = NULL; + struct rb_root *reg_rbtree = NULL; + + int merged_front = 0; + int merged_back = 0; + int err = 0; + + reg_rbtree = reg->rbtree; + + /* Try to merge with the previous block first */ + rbprev = rb_prev(&(reg->rblink)); + if (rbprev) { + prev = rb_entry(rbprev, struct kbase_va_region, rblink); + if (prev->flags & KBASE_REG_FREE) { + /* We're compatible with the previous VMA, + * merge with it */ + WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) != + (reg->flags & KBASE_REG_ZONE_MASK)); + prev->nr_pages += reg->nr_pages; + rb_erase(&(reg->rblink), reg_rbtree); + reg = prev; + merged_front = 1; + } + } + + /* Try to merge with the next block second */ + /* Note we do the lookup here as the tree may have been rebalanced. */ + rbnext = rb_next(&(reg->rblink)); + if (rbnext) { + /* We're compatible with the next VMA, merge with it */ + next = rb_entry(rbnext, struct kbase_va_region, rblink); + if (next->flags & KBASE_REG_FREE) { + WARN_ON((next->flags & KBASE_REG_ZONE_MASK) != + (reg->flags & KBASE_REG_ZONE_MASK)); + next->start_pfn = reg->start_pfn; + next->nr_pages += reg->nr_pages; + rb_erase(&(reg->rblink), reg_rbtree); + merged_back = 1; + if (merged_front) { + /* We already merged with prev, free it */ + kfree(reg); + } + } + } + + /* If we failed to merge then we need to add a new block */ + if (!(merged_front || merged_back)) { + /* + * We didn't merge anything. Add a new free + * placeholder and remove the original one. + */ + struct kbase_va_region *free_reg; + + free_reg = kbase_alloc_free_region(reg_rbtree, + reg->start_pfn, reg->nr_pages, + reg->flags & KBASE_REG_ZONE_MASK); + if (!free_reg) { + err = -ENOMEM; + goto out; + } + rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree); + } + + out: + return err; +} + +KBASE_EXPORT_TEST_API(kbase_remove_va_region); + +/** + * kbase_insert_va_region_nolock - Insert a VA region to the list, + * replacing the existing one. + * + * @new_reg: The new region to insert + * @at_reg: The region to replace + * @start_pfn: The Page Frame Number to insert at + * @nr_pages: The number of pages of the region + */ +static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg, + struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages) +{ + struct rb_root *reg_rbtree = NULL; + int err = 0; + + reg_rbtree = at_reg->rbtree; + + /* Must be a free region */ + KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0); + /* start_pfn should be contained within at_reg */ + KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages)); + /* at least nr_pages from start_pfn should be contained within at_reg */ + KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages); + + new_reg->start_pfn = start_pfn; + new_reg->nr_pages = nr_pages; + + /* Regions are a whole use, so swap and delete old one. */ + if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) { + rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), + reg_rbtree); + kfree(at_reg); + } + /* New region replaces the start of the old one, so insert before. */ + else if (at_reg->start_pfn == start_pfn) { + at_reg->start_pfn += nr_pages; + KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages); + at_reg->nr_pages -= nr_pages; + + kbase_region_tracker_insert(new_reg); + } + /* New region replaces the end of the old one, so insert after. */ + else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) { + at_reg->nr_pages -= nr_pages; + + kbase_region_tracker_insert(new_reg); + } + /* New region splits the old one, so insert and create new */ + else { + struct kbase_va_region *new_front_reg; + + new_front_reg = kbase_alloc_free_region(reg_rbtree, + at_reg->start_pfn, + start_pfn - at_reg->start_pfn, + at_reg->flags & KBASE_REG_ZONE_MASK); + + if (new_front_reg) { + at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; + at_reg->start_pfn = start_pfn + nr_pages; + + kbase_region_tracker_insert(new_front_reg); + kbase_region_tracker_insert(new_reg); + } else { + err = -ENOMEM; + } + } + + return err; +} + +/** + * kbase_add_va_region - Add a VA region to the region list for a context. + * + * @kctx: kbase context containing the region + * @reg: the region to add + * @addr: the address to insert the region at + * @nr_pages: the number of pages in the region + * @align: the minimum alignment in pages + */ +int kbase_add_va_region(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 addr, + size_t nr_pages, size_t align) +{ + int err = 0; + struct kbase_device *kbdev = kctx->kbdev; + int cpu_va_bits = kbase_get_num_cpu_va_bits(kctx); + int gpu_pc_bits = + kbdev->gpu_props.props.core_props.log2_program_counter_size; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != reg); + + lockdep_assert_held(&kctx->reg_lock); + + /* The executable allocation from the SAME_VA zone would already have an + * appropriately aligned GPU VA chosen for it. + * Also the executable allocation from EXEC_VA zone doesn't need the + * special alignment. + */ + if (!(reg->flags & KBASE_REG_GPU_NX) && !addr && + ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) { + if (cpu_va_bits > gpu_pc_bits) { + align = max(align, (size_t)((1ULL << gpu_pc_bits) + >> PAGE_SHIFT)); + } + } + + do { + err = kbase_add_va_region_rbtree(kbdev, reg, addr, nr_pages, + align); + if (err != -ENOMEM) + break; + + /* + * If the allocation is not from the same zone as JIT + * then don't retry, we're out of VA and there is + * nothing which can be done about it. + */ + if ((reg->flags & KBASE_REG_ZONE_MASK) != + KBASE_REG_ZONE_CUSTOM_VA) + break; + } while (kbase_jit_evict(kctx)); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_add_va_region); + +/** + * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree + * + * Insert a region into the rbtree that was specified when the region was + * created. If addr is 0 a free area in the rbtree is used, otherwise the + * specified address is used. + * + * @kbdev: The kbase device + * @reg: The region to add + * @addr: The address to add the region at, or 0 to map at any available address + * @nr_pages: The size of the region in pages + * @align: The minimum alignment in pages + */ +int kbase_add_va_region_rbtree(struct kbase_device *kbdev, + struct kbase_va_region *reg, + u64 addr, size_t nr_pages, size_t align) +{ + struct device *const dev = kbdev->dev; + struct rb_root *rbtree = NULL; + struct kbase_va_region *tmp; + u64 gpu_pfn = addr >> PAGE_SHIFT; + int err = 0; + + rbtree = reg->rbtree; + + if (!align) + align = 1; + + /* must be a power of 2 */ + KBASE_DEBUG_ASSERT(is_power_of_2(align)); + KBASE_DEBUG_ASSERT(nr_pages > 0); + + /* Path 1: Map a specific address. Find the enclosing region, + * which *must* be free. + */ + if (gpu_pfn) { + KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1))); + + tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn, + nr_pages); + if (kbase_is_region_invalid(tmp)) { + dev_warn(dev, "Enclosing region not found or invalid: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages); + err = -ENOMEM; + goto exit; + } else if (!kbase_is_region_free(tmp)) { + dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", + tmp->start_pfn, tmp->flags, + tmp->nr_pages, gpu_pfn, nr_pages); + err = -ENOMEM; + goto exit; + } + + err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn, + nr_pages); + if (err) { + dev_warn(dev, "Failed to insert va region"); + err = -ENOMEM; + } + } else { + /* Path 2: Map any free address which meets the requirements. */ + u64 start_pfn; + size_t align_offset = align; + size_t align_mask = align - 1; + + if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) { + WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory", + __func__, + (unsigned long)align); + align_mask = reg->extent - 1; + align_offset = reg->extent - reg->initial_commit; + } + + tmp = kbase_region_tracker_find_region_meeting_reqs(reg, + nr_pages, align_offset, align_mask, + &start_pfn); + if (tmp) { + err = kbase_insert_va_region_nolock(reg, tmp, + start_pfn, nr_pages); + if (unlikely(err)) { + dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages", + start_pfn, nr_pages); + } + } else { + dev_dbg(dev, "Failed to find a suitable region: %zu nr_pages, %zu align_offset, %zu align_mask\n", + nr_pages, align_offset, align_mask); + err = -ENOMEM; + } + } + +exit: + return err; +} + +/** + * @brief Initialize the internal region tracker data structure. + */ +static void kbase_region_tracker_ds_init(struct kbase_context *kctx, + struct kbase_va_region *same_va_reg, + struct kbase_va_region *custom_va_reg) +{ + kctx->reg_rbtree_same = RB_ROOT; + kbase_region_tracker_insert(same_va_reg); + + /* Although custom_va_reg and exec_va_reg don't always exist, + * initialize unconditionally because of the mem_view debugfs + * implementation which relies on them being empty. + * + * The difference between the two is that the EXEC_VA region + * is never initialized at this stage. + */ + kctx->reg_rbtree_custom = RB_ROOT; + kctx->reg_rbtree_exec = RB_ROOT; + + if (custom_va_reg) + kbase_region_tracker_insert(custom_va_reg); +} + +static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) +{ + struct rb_node *rbnode; + struct kbase_va_region *reg; + + do { + rbnode = rb_first(rbtree); + if (rbnode) { + rb_erase(rbnode, rbtree); + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + WARN_ON(reg->va_refcnt != 1); + /* Reset the start_pfn - as the rbtree is being + * destroyed and we've already erased this region, there + * is no further need to attempt to remove it. + * This won't affect the cleanup if the region was + * being used as a sticky resource as the cleanup + * related to sticky resources anyways need to be + * performed before the term of region tracker. + */ + reg->start_pfn = 0; + kbase_free_alloced_region(reg); + } + } while (rbnode); +} + +void kbase_region_tracker_term(struct kbase_context *kctx) +{ + kbase_gpu_vm_lock(kctx); + kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); + kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); + kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); + kbase_gpu_vm_unlock(kctx); +} + +void kbase_region_tracker_term_rbtree(struct rb_root *rbtree) +{ + kbase_region_tracker_erase_rbtree(rbtree); +} + +static size_t kbase_get_same_va_bits(struct kbase_context *kctx) +{ + return min(kbase_get_num_cpu_va_bits(kctx), + (size_t) kctx->kbdev->gpu_props.mmu.va_bits); +} + +int kbase_region_tracker_init(struct kbase_context *kctx) +{ + struct kbase_va_region *same_va_reg; + struct kbase_va_region *custom_va_reg = NULL; + size_t same_va_bits = kbase_get_same_va_bits(kctx); + u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; + u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; + u64 same_va_pages; + int err; + + /* Take the lock as kbase_free_alloced_region requires it */ + kbase_gpu_vm_lock(kctx); + + same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; + /* all have SAME_VA */ + same_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 1, + same_va_pages, + KBASE_REG_ZONE_SAME_VA); + + if (!same_va_reg) { + err = -ENOMEM; + goto fail_unlock; + } + +#ifdef CONFIG_64BIT + /* 32-bit clients have custom VA zones */ + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { +#endif + if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { + err = -EINVAL; + goto fail_free_same_va; + } + /* If the current size of TMEM is out of range of the + * virtual address space addressable by the MMU then + * we should shrink it to fit + */ + if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) + custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; + + custom_va_reg = kbase_alloc_free_region( + &kctx->reg_rbtree_custom, + KBASE_REG_ZONE_CUSTOM_VA_BASE, + custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); + + if (!custom_va_reg) { + err = -ENOMEM; + goto fail_free_same_va; + } +#ifdef CONFIG_64BIT + } else { + custom_va_size = 0; + } +#endif + + kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); + + kctx->same_va_end = same_va_pages + 1; + kctx->gpu_va_end = kctx->same_va_end + custom_va_size; + kctx->exec_va_start = U64_MAX; + kctx->jit_va = false; + + + kbase_gpu_vm_unlock(kctx); + return 0; + +fail_free_same_va: + kbase_free_alloced_region(same_va_reg); +fail_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +} + +#ifdef CONFIG_64BIT +static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, + u64 jit_va_pages) +{ + struct kbase_va_region *same_va; + struct kbase_va_region *custom_va_reg; + + lockdep_assert_held(&kctx->reg_lock); + + /* First verify that a JIT_VA zone has not been created already. */ + if (kctx->jit_va) + return -EINVAL; + + /* + * Modify the same VA free region after creation. Be careful to ensure + * that allocations haven't been made as they could cause an overlap + * to happen with existing same VA allocations and the custom VA zone. + */ + same_va = kbase_region_tracker_find_region_base_address(kctx, + PAGE_SIZE); + if (!same_va) + return -ENOMEM; + + if (same_va->nr_pages < jit_va_pages || kctx->same_va_end < jit_va_pages) + return -ENOMEM; + + /* It's safe to adjust the same VA zone now */ + same_va->nr_pages -= jit_va_pages; + kctx->same_va_end -= jit_va_pages; + + /* + * Create a custom VA zone at the end of the VA for allocations which + * JIT can use so it doesn't have to allocate VA from the kernel. + */ + custom_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, + kctx->same_va_end, + jit_va_pages, + KBASE_REG_ZONE_CUSTOM_VA); + + /* + * The context will be destroyed if we fail here so no point + * reverting the change we made to same_va. + */ + if (!custom_va_reg) + return -ENOMEM; + + kbase_region_tracker_insert(custom_va_reg); + return 0; +} +#endif + +int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, + int max_allocations, int trim_level, int group_id, + u64 phys_pages_limit) +{ + int err = 0; + + if (trim_level < 0 || trim_level > BASE_JIT_MAX_TRIM_LEVEL) + return -EINVAL; + + if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) + return -EINVAL; + + if (phys_pages_limit > jit_va_pages) + return -EINVAL; + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (phys_pages_limit != jit_va_pages) + kbase_ctx_flag_set(kctx, KCTX_JPL_ENABLED); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + kbase_gpu_vm_lock(kctx); + +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) + err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages); +#endif + /* + * Nothing to do for 32-bit clients, JIT uses the existing + * custom VA zone. + */ + + if (!err) { + kctx->jit_max_allocations = max_allocations; + kctx->trim_level = trim_level; + kctx->jit_va = true; + kctx->jit_group_id = group_id; +#if MALI_JIT_PRESSURE_LIMIT_BASE + kctx->jit_phys_pages_limit = phys_pages_limit; + dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n", + phys_pages_limit); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + } + + kbase_gpu_vm_unlock(kctx); + + return err; +} + +int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) +{ + struct kbase_va_region *shrinking_va_reg; + struct kbase_va_region *exec_va_reg; + u64 exec_va_start, exec_va_base_addr; + int err; + + /* The EXEC_VA zone shall be created by making space at the end of the + * address space. Firstly, verify that the number of EXEC_VA pages + * requested by the client is reasonable and then make sure that it is + * not greater than the address space itself before calculating the base + * address of the new zone. + */ + if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES) + return -EINVAL; + + kbase_gpu_vm_lock(kctx); + + /* First verify that a JIT_VA zone has not been created already. */ + if (kctx->jit_va) { + err = -EPERM; + goto exit_unlock; + } + + if (exec_va_pages > kctx->gpu_va_end) { + err = -ENOMEM; + goto exit_unlock; + } + + exec_va_start = kctx->gpu_va_end - exec_va_pages; + exec_va_base_addr = exec_va_start << PAGE_SHIFT; + + shrinking_va_reg = kbase_region_tracker_find_region_enclosing_address(kctx, + exec_va_base_addr); + if (!shrinking_va_reg) { + err = -ENOMEM; + goto exit_unlock; + } + + /* Make sure that the EXEC_VA region is still uninitialized */ + if ((shrinking_va_reg->flags & KBASE_REG_ZONE_MASK) == + KBASE_REG_ZONE_EXEC_VA) { + err = -EPERM; + goto exit_unlock; + } + + if (shrinking_va_reg->nr_pages <= exec_va_pages) { + err = -ENOMEM; + goto exit_unlock; + } + + exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, + exec_va_start, + exec_va_pages, + KBASE_REG_ZONE_EXEC_VA); + if (!exec_va_reg) { + err = -ENOMEM; + goto exit_unlock; + } + + shrinking_va_reg->nr_pages -= exec_va_pages; +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) + kctx->same_va_end -= exec_va_pages; +#endif + kctx->exec_va_start = exec_va_start; + + kbase_region_tracker_insert(exec_va_reg); + err = 0; + +exit_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +} + + +int kbase_mem_init(struct kbase_device *kbdev) +{ + int err = 0; + struct kbasep_mem_device *memdev; +#ifdef CONFIG_OF + struct device_node *mgm_node = NULL; +#endif + + KBASE_DEBUG_ASSERT(kbdev); + + memdev = &kbdev->memdev; + + kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults, + KBASE_MEM_POOL_MAX_SIZE_KCTX); + + /* Initialize memory usage */ + atomic_set(&memdev->used_pages, 0); + + spin_lock_init(&kbdev->gpu_mem_usage_lock); + kbdev->total_gpu_pages = 0; + kbdev->process_root = RB_ROOT; + kbdev->dma_buf_root = RB_ROOT; + mutex_init(&kbdev->dma_buf_lock); + +#ifdef IR_THRESHOLD + atomic_set(&memdev->ir_threshold, IR_THRESHOLD); +#else + atomic_set(&memdev->ir_threshold, DEFAULT_IR_THRESHOLD); +#endif + + kbdev->mgm_dev = &kbase_native_mgm_dev; + +#ifdef CONFIG_OF + /* Check to see whether or not a platform-specific memory group manager + * is configured and available. + */ + mgm_node = of_parse_phandle(kbdev->dev->of_node, + "physical-memory-group-manager", 0); + if (!mgm_node) { + dev_info(kbdev->dev, + "No memory group manager is configured\n"); + } else { + struct platform_device *const pdev = + of_find_device_by_node(mgm_node); + + if (!pdev) { + dev_err(kbdev->dev, + "The configured memory group manager was not found\n"); + } else { + kbdev->mgm_dev = platform_get_drvdata(pdev); + if (!kbdev->mgm_dev) { + dev_info(kbdev->dev, + "Memory group manager is not ready\n"); + err = -EPROBE_DEFER; + } else if (!try_module_get(kbdev->mgm_dev->owner)) { + dev_err(kbdev->dev, + "Failed to get memory group manger module\n"); + err = -ENODEV; + kbdev->mgm_dev = NULL; + } else { + dev_info(kbdev->dev, + "Memory group manager successfully loaded\n"); + } + } + of_node_put(mgm_node); + } +#endif + + if (likely(!err)) { + struct kbase_mem_pool_group_config mem_pool_defaults; + + kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults, + KBASE_MEM_POOL_MAX_SIZE_KBDEV); + + err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, + &mem_pool_defaults, NULL); + } + + return err; +} + +void kbase_mem_halt(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +void kbase_mem_term(struct kbase_device *kbdev) +{ + struct kbasep_mem_device *memdev; + int pages; + + KBASE_DEBUG_ASSERT(kbdev); + + memdev = &kbdev->memdev; + + pages = atomic_read(&memdev->used_pages); + if (pages != 0) + dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); + + kbase_mem_pool_group_term(&kbdev->mem_pools); + + WARN_ON(kbdev->total_gpu_pages); + WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root)); + WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root)); + mutex_destroy(&kbdev->dma_buf_lock); + + if (kbdev->mgm_dev) + module_put(kbdev->mgm_dev->owner); +} +KBASE_EXPORT_TEST_API(kbase_mem_term); + +/** + * @brief Allocate a free region object. + * + * The allocated object is not part of any list yet, and is flagged as + * KBASE_REG_FREE. No mapping is allocated yet. + * + * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA. + * + */ +struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, + u64 start_pfn, size_t nr_pages, int zone) +{ + struct kbase_va_region *new_reg; + + KBASE_DEBUG_ASSERT(rbtree != NULL); + + /* zone argument should only contain zone related region flags */ + KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0); + KBASE_DEBUG_ASSERT(nr_pages > 0); + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE)); + + new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL); + + if (!new_reg) + return NULL; + + new_reg->va_refcnt = 1; + new_reg->cpu_alloc = NULL; /* no alloc bound yet */ + new_reg->gpu_alloc = NULL; /* no alloc bound yet */ + new_reg->rbtree = rbtree; + new_reg->flags = zone | KBASE_REG_FREE; + + new_reg->flags |= KBASE_REG_GROWABLE; + + new_reg->start_pfn = start_pfn; + new_reg->nr_pages = nr_pages; + + INIT_LIST_HEAD(&new_reg->jit_node); + INIT_LIST_HEAD(&new_reg->link); + + return new_reg; +} + +KBASE_EXPORT_TEST_API(kbase_alloc_free_region); + +static struct kbase_context *kbase_reg_flags_to_kctx( + struct kbase_va_region *reg) +{ + struct kbase_context *kctx = NULL; + struct rb_root *rbtree = reg->rbtree; + + switch (reg->flags & KBASE_REG_ZONE_MASK) { + case KBASE_REG_ZONE_CUSTOM_VA: + kctx = container_of(rbtree, struct kbase_context, + reg_rbtree_custom); + break; + case KBASE_REG_ZONE_SAME_VA: + kctx = container_of(rbtree, struct kbase_context, + reg_rbtree_same); + break; + case KBASE_REG_ZONE_EXEC_VA: + kctx = container_of(rbtree, struct kbase_context, + reg_rbtree_exec); + break; + default: + WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); + break; + } + + return kctx; +} + +/** + * @brief Free a region object. + * + * The described region must be freed of any mapping. + * + * If the region is not flagged as KBASE_REG_FREE, the region's + * alloc object will be released. + * It is a bug if no alloc object exists for non-free regions. + * + */ +void kbase_free_alloced_region(struct kbase_va_region *reg) +{ + if (!(reg->flags & KBASE_REG_FREE)) { + struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + + if (WARN_ON(!kctx)) + return; + + if (WARN_ON(kbase_is_region_invalid(reg))) + return; + + dev_dbg(kctx->kbdev->dev, "Freeing memory region %p\n", + (void *)reg); + + mutex_lock(&kctx->jit_evict_lock); + + /* + * The physical allocation should have been removed from the + * eviction list before this function is called. However, in the + * case of abnormal process termination or the app leaking the + * memory kbase_mem_free_region is not called so it can still be + * on the list at termination time of the region tracker. + */ + if (!list_empty(®->gpu_alloc->evict_node)) { + mutex_unlock(&kctx->jit_evict_lock); + + /* + * Unlink the physical allocation before unmaking it + * evictable so that the allocation isn't grown back to + * its last backed size as we're going to unmap it + * anyway. + */ + reg->cpu_alloc->reg = NULL; + if (reg->cpu_alloc != reg->gpu_alloc) + reg->gpu_alloc->reg = NULL; + + /* + * If a region has been made evictable then we must + * unmake it before trying to free it. + * If the memory hasn't been reclaimed it will be + * unmapped and freed below, if it has been reclaimed + * then the operations below are no-ops. + */ + if (reg->flags & KBASE_REG_DONT_NEED) { + KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == + KBASE_MEM_TYPE_NATIVE); + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + } else { + mutex_unlock(&kctx->jit_evict_lock); + } + + /* + * Remove the region from the sticky resource metadata + * list should it be there. + */ + kbase_sticky_resource_release_force(kctx, NULL, + reg->start_pfn << PAGE_SHIFT); + + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); + + reg->flags |= KBASE_REG_VA_FREED; + kbase_va_region_alloc_put(kctx, reg); + } else { + kfree(reg); + } +} + +KBASE_EXPORT_TEST_API(kbase_free_alloced_region); + +int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) +{ + int err; + size_t i = 0; + unsigned long attr; + unsigned long mask = ~KBASE_REG_MEMATTR_MASK; + unsigned long gwt_mask = ~0; + int group_id; + struct kbase_mem_phy_alloc *alloc; + +#ifdef CONFIG_MALI_CINSTR_GWT + if (kctx->gwt_enabled) + gwt_mask = ~KBASE_REG_GPU_WR; +#endif + + if ((kctx->kbdev->system_coherency == COHERENCY_ACE) && + (reg->flags & KBASE_REG_SHARE_BOTH)) + attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA); + else + attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC); + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != reg); + + err = kbase_add_va_region(kctx, reg, addr, nr_pages, align); + if (err) + return err; + + alloc = reg->gpu_alloc; + group_id = alloc->group_id; + + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { + u64 const stride = alloc->imported.alias.stride; + + KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); + for (i = 0; i < alloc->imported.alias.nents; i++) { + if (alloc->imported.alias.aliased[i].alloc) { + err = kbase_mmu_insert_pages(kctx->kbdev, + &kctx->mmu, + reg->start_pfn + (i * stride), + alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset, + alloc->imported.alias.aliased[i].length, + reg->flags & gwt_mask, + kctx->as_nr, + group_id); + if (err) + goto bad_insert; + + kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc); + } else { + err = kbase_mmu_insert_single_page(kctx, + reg->start_pfn + i * stride, + kctx->aliasing_sink_page, + alloc->imported.alias.aliased[i].length, + (reg->flags & mask & gwt_mask) | attr, + group_id); + + if (err) + goto bad_insert; + } + } + } else { + err = kbase_mmu_insert_pages(kctx->kbdev, + &kctx->mmu, + reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, + kctx->as_nr, + group_id); + if (err) + goto bad_insert; + kbase_mem_phy_alloc_gpu_mapped(alloc); + } + + if (reg->flags & KBASE_REG_IMPORT_PAD && + !WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) && + reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM && + reg->gpu_alloc->imported.umm.current_mapping_usage_count) { + /* For padded imported dma-buf memory, map the dummy aliasing + * page from the end of the dma-buf pages, to the end of the + * region using a read only mapping. + * + * Only map when it's imported dma-buf memory that is currently + * mapped. + * + * Assume reg->gpu_alloc->nents is the number of actual pages + * in the dma-buf memory. + */ + err = kbase_mmu_insert_single_page(kctx, + reg->start_pfn + reg->gpu_alloc->nents, + kctx->aliasing_sink_page, + reg->nr_pages - reg->gpu_alloc->nents, + (reg->flags | KBASE_REG_GPU_RD) & + ~KBASE_REG_GPU_WR, + KBASE_MEM_GROUP_SINK); + if (err) + goto bad_insert; + } + + return err; + +bad_insert: + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn, reg->nr_pages, + kctx->as_nr); + + if (alloc->type == KBASE_MEM_TYPE_ALIAS) { + KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); + while (i--) + if (alloc->imported.alias.aliased[i].alloc) + kbase_mem_phy_alloc_gpu_unmapped(alloc->imported.alias.aliased[i].alloc); + } + + kbase_remove_va_region(reg); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_gpu_mmap); + +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc, bool writeable); + +int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + int err = 0; + size_t i; + + if (reg->start_pfn == 0) + return 0; + + if (!reg->gpu_alloc) + return -EINVAL; + + /* Tear down down GPU page tables, depending on memory type. */ + switch (reg->gpu_alloc->type) { + case KBASE_MEM_TYPE_ALIAS: /* Fall-through */ + case KBASE_MEM_TYPE_IMPORTED_UMM: + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn, reg->nr_pages, kctx->as_nr); + break; + default: + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn, kbase_reg_current_backed_size(reg), + kctx->as_nr); + break; + } + + /* Update tracking, and other cleanup, depending on memory type. */ + switch (reg->gpu_alloc->type) { + case KBASE_MEM_TYPE_ALIAS: + KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); + for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++) + if (reg->gpu_alloc->imported.alias.aliased[i].alloc) + kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); + break; + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + struct kbase_alloc_import_user_buf *user_buf = + ®->gpu_alloc->imported.user_buf; + + if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) { + user_buf->current_mapping_usage_count &= + ~PINNED_ON_IMPORT; + + /* The allocation could still have active mappings. */ + if (user_buf->current_mapping_usage_count == 0) { + kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc, + (reg->flags & KBASE_REG_GPU_WR)); + } + } + } + /* Fall-through */ + default: + kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc); + break; + } + + return err; +} + +static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping( + struct kbase_context *kctx, + unsigned long uaddr, size_t size, u64 *offset) +{ + struct vm_area_struct *vma; + struct kbase_cpu_mapping *map; + unsigned long vm_pgoff_in_region; + unsigned long vm_off_in_region; + unsigned long map_start; + size_t map_size; + + lockdep_assert_held(¤t->mm->mmap_sem); + + if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */ + return NULL; + + vma = find_vma_intersection(current->mm, uaddr, uaddr+size); + + if (!vma || vma->vm_start > uaddr) + return NULL; + if (vma->vm_ops != &kbase_vm_ops) + /* Not ours! */ + return NULL; + + map = vma->vm_private_data; + + if (map->kctx != kctx) + /* Not from this context! */ + return NULL; + + vm_pgoff_in_region = vma->vm_pgoff - map->region->start_pfn; + vm_off_in_region = vm_pgoff_in_region << PAGE_SHIFT; + map_start = vma->vm_start - vm_off_in_region; + map_size = map->region->nr_pages << PAGE_SHIFT; + + if ((uaddr + size) > (map_start + map_size)) + /* Not within the CPU mapping */ + return NULL; + + *offset = (uaddr - vma->vm_start) + vm_off_in_region; + + return map; +} + +int kbasep_find_enclosing_cpu_mapping_offset( + struct kbase_context *kctx, + unsigned long uaddr, size_t size, u64 *offset) +{ + struct kbase_cpu_mapping *map; + + kbase_os_mem_map_lock(kctx); + + map = kbasep_find_enclosing_cpu_mapping(kctx, uaddr, size, offset); + + kbase_os_mem_map_unlock(kctx); + + if (!map) + return -EINVAL; + + return 0; +} + +KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset); + +int kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context *kctx, + u64 gpu_addr, size_t size, u64 *start, u64 *offset) +{ + struct kbase_va_region *region; + + kbase_gpu_vm_lock(kctx); + + region = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); + + if (!region) { + kbase_gpu_vm_unlock(kctx); + return -EINVAL; + } + + *start = region->start_pfn << PAGE_SHIFT; + + *offset = gpu_addr - *start; + + if (((region->start_pfn + region->nr_pages) << PAGE_SHIFT) < (gpu_addr + size)) { + kbase_gpu_vm_unlock(kctx); + return -EINVAL; + } + + kbase_gpu_vm_unlock(kctx); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbasep_find_enclosing_gpu_mapping_start_and_offset); + +void kbase_sync_single(struct kbase_context *kctx, + struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa, + off_t offset, size_t size, enum kbase_sync_type sync_fn) +{ + struct page *cpu_page; + phys_addr_t cpu_pa = as_phys_addr_t(t_cpu_pa); + phys_addr_t gpu_pa = as_phys_addr_t(t_gpu_pa); + + cpu_page = pfn_to_page(PFN_DOWN(cpu_pa)); + + if (likely(cpu_pa == gpu_pa)) { + dma_addr_t dma_addr; + + BUG_ON(!cpu_page); + BUG_ON(offset + size > PAGE_SIZE); + + dma_addr = kbase_dma_addr(cpu_page) + offset; + if (sync_fn == KBASE_SYNC_TO_CPU) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, + size, DMA_BIDIRECTIONAL); + else if (sync_fn == KBASE_SYNC_TO_DEVICE) + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, + size, DMA_BIDIRECTIONAL); + } else { + void *src = NULL; + void *dst = NULL; + struct page *gpu_page; + + if (WARN(!gpu_pa, "No GPU PA found for infinite cache op")) + return; + + gpu_page = pfn_to_page(PFN_DOWN(gpu_pa)); + + if (sync_fn == KBASE_SYNC_TO_DEVICE) { + src = ((unsigned char *)kmap(cpu_page)) + offset; + dst = ((unsigned char *)kmap(gpu_page)) + offset; + } else if (sync_fn == KBASE_SYNC_TO_CPU) { + dma_sync_single_for_cpu(kctx->kbdev->dev, + kbase_dma_addr(gpu_page) + offset, + size, DMA_BIDIRECTIONAL); + src = ((unsigned char *)kmap(gpu_page)) + offset; + dst = ((unsigned char *)kmap(cpu_page)) + offset; + } + memcpy(dst, src, size); + kunmap(gpu_page); + kunmap(cpu_page); + if (sync_fn == KBASE_SYNC_TO_DEVICE) + dma_sync_single_for_device(kctx->kbdev->dev, + kbase_dma_addr(gpu_page) + offset, + size, DMA_BIDIRECTIONAL); + } +} + +static int kbase_do_syncset(struct kbase_context *kctx, + struct basep_syncset *sset, enum kbase_sync_type sync_fn) +{ + int err = 0; + struct kbase_va_region *reg; + struct kbase_cpu_mapping *map; + unsigned long start; + size_t size; + struct tagged_addr *cpu_pa; + struct tagged_addr *gpu_pa; + u64 page_off, page_count; + u64 i; + u64 offset; + + kbase_os_mem_map_lock(kctx); + kbase_gpu_vm_lock(kctx); + + /* find the region where the virtual address is contained */ + reg = kbase_region_tracker_find_region_enclosing_address(kctx, + sset->mem_handle.basep.handle); + if (kbase_is_region_invalid_or_free(reg)) { + dev_warn(kctx->kbdev->dev, "Can't find a valid region at VA 0x%016llX", + sset->mem_handle.basep.handle); + err = -EINVAL; + goto out_unlock; + } + + /* + * Handle imported memory before checking for KBASE_REG_CPU_CACHED. The + * CPU mapping cacheability is defined by the owner of the imported + * memory, and not by kbase, therefore we must assume that any imported + * memory may be cached. + */ + if (kbase_mem_is_imported(reg->gpu_alloc->type)) { + err = kbase_mem_do_sync_imported(kctx, reg, sync_fn); + goto out_unlock; + } + + if (!(reg->flags & KBASE_REG_CPU_CACHED)) + goto out_unlock; + + start = (uintptr_t)sset->user_addr; + size = (size_t)sset->size; + + map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset); + if (!map) { + dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", + start, sset->mem_handle.basep.handle); + err = -EINVAL; + goto out_unlock; + } + + page_off = offset >> PAGE_SHIFT; + offset &= ~PAGE_MASK; + page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + cpu_pa = kbase_get_cpu_phy_pages(reg); + gpu_pa = kbase_get_gpu_phy_pages(reg); + + if (page_off > reg->nr_pages || + page_off + page_count > reg->nr_pages) { + /* Sync overflows the region */ + err = -EINVAL; + goto out_unlock; + } + + /* Sync first page */ + if (as_phys_addr_t(cpu_pa[page_off])) { + size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); + + kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off], + offset, sz, sync_fn); + } + + /* Sync middle pages (if any) */ + for (i = 1; page_count > 2 && i < page_count - 1; i++) { + /* we grow upwards, so bail on first non-present page */ + if (!as_phys_addr_t(cpu_pa[page_off + i])) + break; + + kbase_sync_single(kctx, cpu_pa[page_off + i], + gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn); + } + + /* Sync last page (if any) */ + if (page_count > 1 && + as_phys_addr_t(cpu_pa[page_off + page_count - 1])) { + size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1; + + kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1], + gpu_pa[page_off + page_count - 1], 0, sz, + sync_fn); + } + +out_unlock: + kbase_gpu_vm_unlock(kctx); + kbase_os_mem_map_unlock(kctx); + return err; +} + +int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset) +{ + int err = -EINVAL; + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(sset != NULL); + + if (sset->mem_handle.basep.handle & ~PAGE_MASK) { + dev_warn(kctx->kbdev->dev, + "mem_handle: passed parameter is invalid"); + return -EINVAL; + } + + switch (sset->type) { + case BASE_SYNCSET_OP_MSYNC: + err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_DEVICE); + break; + + case BASE_SYNCSET_OP_CSYNC: + err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_CPU); + break; + + default: + dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type); + break; + } + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_sync_now); + +/* vm lock must be held */ +int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + int err; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != reg); + dev_dbg(kctx->kbdev->dev, "%s %p in kctx %p\n", + __func__, (void *)reg, (void *)kctx); + lockdep_assert_held(&kctx->reg_lock); + + if (reg->flags & KBASE_REG_NO_USER_FREE) { + dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n"); + return -EINVAL; + } + + /* + * Unlink the physical allocation before unmaking it evictable so + * that the allocation isn't grown back to its last backed size + * as we're going to unmap it anyway. + */ + reg->cpu_alloc->reg = NULL; + if (reg->cpu_alloc != reg->gpu_alloc) + reg->gpu_alloc->reg = NULL; + + /* + * If a region has been made evictable then we must unmake it + * before trying to free it. + * If the memory hasn't been reclaimed it will be unmapped and freed + * below, if it has been reclaimed then the operations below are no-ops. + */ + if (reg->flags & KBASE_REG_DONT_NEED) { + KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == + KBASE_MEM_TYPE_NATIVE); + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + + err = kbase_gpu_munmap(kctx, reg); + if (err) { + dev_warn(kctx->kbdev->dev, "Could not unmap from the GPU...\n"); + goto out; + } + + /* This will also free the physical pages */ + kbase_free_alloced_region(reg); + + out: + return err; +} + +KBASE_EXPORT_TEST_API(kbase_mem_free_region); + +/** + * @brief Free the region from the GPU and unregister it. + * + * This function implements the free operation on a memory segment. + * It will loudly fail if called with outstanding mappings. + */ +int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) +{ + int err = 0; + struct kbase_va_region *reg; + + KBASE_DEBUG_ASSERT(kctx != NULL); + dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %p\n", + __func__, gpu_addr, (void *)kctx); + + if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { + dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid"); + return -EINVAL; + } + + if (0 == gpu_addr) { + dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n"); + return -EINVAL; + } + kbase_gpu_vm_lock(kctx); + + if (gpu_addr >= BASE_MEM_COOKIE_BASE && + gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) { + int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE); + + reg = kctx->pending_regions[cookie]; + if (!reg) { + err = -EINVAL; + goto out_unlock; + } + + /* ask to unlink the cookie as we'll free it */ + + kctx->pending_regions[cookie] = NULL; + bitmap_set(kctx->cookies, cookie, 1); + + kbase_free_alloced_region(reg); + } else { + /* A real GPU va */ + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (kbase_is_region_invalid_or_free(reg)) { + dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX", + gpu_addr); + err = -EINVAL; + goto out_unlock; + } + + if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) { + /* SAME_VA must be freed through munmap */ + dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__, + gpu_addr); + err = -EINVAL; + goto out_unlock; + } + err = kbase_mem_free_region(kctx, reg); + } + + out_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +} + +KBASE_EXPORT_TEST_API(kbase_mem_free); + +int kbase_update_region_flags(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned long flags) +{ + KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0); + + reg->flags |= kbase_cache_enabled(flags, reg->nr_pages); + /* all memory is now growable */ + reg->flags |= KBASE_REG_GROWABLE; + + if (flags & BASE_MEM_GROW_ON_GPF) + reg->flags |= KBASE_REG_PF_GROW; + + if (flags & BASE_MEM_PROT_CPU_WR) + reg->flags |= KBASE_REG_CPU_WR; + + if (flags & BASE_MEM_PROT_CPU_RD) + reg->flags |= KBASE_REG_CPU_RD; + + if (flags & BASE_MEM_PROT_GPU_WR) + reg->flags |= KBASE_REG_GPU_WR; + + if (flags & BASE_MEM_PROT_GPU_RD) + reg->flags |= KBASE_REG_GPU_RD; + + if (0 == (flags & BASE_MEM_PROT_GPU_EX)) + reg->flags |= KBASE_REG_GPU_NX; + + if (!kbase_device_is_cpu_coherent(kctx->kbdev)) { + if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED && + !(flags & BASE_MEM_UNCACHED_GPU)) + return -EINVAL; + } else if (flags & (BASE_MEM_COHERENT_SYSTEM | + BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { + reg->flags |= KBASE_REG_SHARE_BOTH; + } + + if (!(reg->flags & KBASE_REG_SHARE_BOTH) && + flags & BASE_MEM_COHERENT_LOCAL) { + reg->flags |= KBASE_REG_SHARE_IN; + } + + if (flags & BASE_MEM_TILER_ALIGN_TOP) + reg->flags |= KBASE_REG_TILER_ALIGN_TOP; + + + /* Set up default MEMATTR usage */ + if (!(reg->flags & KBASE_REG_GPU_CACHED)) { + if (kctx->kbdev->mmu_mode->flags & + KBASE_MMU_MODE_HAS_NON_CACHEABLE) { + /* Override shareability, and MEMATTR for uncached */ + reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); + reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + } else { + dev_warn(kctx->kbdev->dev, + "Can't allocate GPU uncached memory due to MMU in Legacy Mode\n"); + return -EINVAL; + } + } else if (kctx->kbdev->system_coherency == COHERENCY_ACE && + (reg->flags & KBASE_REG_SHARE_BOTH)) { + reg->flags |= + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); + } else { + reg->flags |= + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT); + } + + if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING) + reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; + + if (flags & BASEP_MEM_NO_USER_FREE) + reg->flags |= KBASE_REG_NO_USER_FREE; + + if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) + reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE; + + return 0; +} + +int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, + size_t nr_pages_requested) +{ + int new_page_count __maybe_unused; + size_t nr_left = nr_pages_requested; + int res; + struct kbase_context *kctx; + struct kbase_device *kbdev; + struct tagged_addr *tp; + + if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) || + WARN_ON(alloc->imported.native.kctx == NULL) || + WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { + return -EINVAL; + } + + if (alloc->reg) { + if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) + goto invalid_request; + } + + kctx = alloc->imported.native.kctx; + kbdev = kctx->kbdev; + + if (nr_pages_requested == 0) + goto done; /*nothing to do*/ + + new_page_count = atomic_add_return( + nr_pages_requested, &kctx->used_pages); + atomic_add(nr_pages_requested, + &kctx->kbdev->memdev.used_pages); + + /* Increase mm counters before we allocate pages so that this + * allocation is visible to the OOM killer */ + kbase_process_page_usage_inc(kctx, nr_pages_requested); + + tp = alloc->pages + alloc->nents; + +#ifdef CONFIG_MALI_2MB_ALLOC + /* Check if we have enough pages requested so we can allocate a large + * page (512 * 4KB = 2MB ) + */ + if (nr_left >= (SZ_2M / SZ_4K)) { + int nr_lp = nr_left / (SZ_2M / SZ_4K); + + res = kbase_mem_pool_alloc_pages( + &kctx->mem_pools.large[alloc->group_id], + nr_lp * (SZ_2M / SZ_4K), + tp, + true); + + if (res > 0) { + nr_left -= res; + tp += res; + } + + if (nr_left) { + struct kbase_sub_alloc *sa, *temp_sa; + + spin_lock(&kctx->mem_partials_lock); + + list_for_each_entry_safe(sa, temp_sa, + &kctx->mem_partials, link) { + int pidx = 0; + + while (nr_left) { + pidx = find_next_zero_bit(sa->sub_pages, + SZ_2M / SZ_4K, + pidx); + bitmap_set(sa->sub_pages, pidx, 1); + *tp++ = as_tagged_tag(page_to_phys(sa->page + + pidx), + FROM_PARTIAL); + nr_left--; + + if (bitmap_full(sa->sub_pages, SZ_2M / SZ_4K)) { + /* unlink from partial list when full */ + list_del_init(&sa->link); + break; + } + } + } + spin_unlock(&kctx->mem_partials_lock); + } + + /* only if we actually have a chunk left <512. If more it indicates + * that we couldn't allocate a 2MB above, so no point to retry here. + */ + if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) { + /* create a new partial and suballocate the rest from it */ + struct page *np = NULL; + + do { + int err; + + np = kbase_mem_pool_alloc( + &kctx->mem_pools.large[ + alloc->group_id]); + if (np) + break; + + err = kbase_mem_pool_grow( + &kctx->mem_pools.large[alloc->group_id], + 1); + if (err) + break; + } while (1); + + if (np) { + int i; + struct kbase_sub_alloc *sa; + struct page *p; + + sa = kmalloc(sizeof(*sa), GFP_KERNEL); + if (!sa) { + kbase_mem_pool_free( + &kctx->mem_pools.large[ + alloc->group_id], + np, + false); + goto no_new_partial; + } + + /* store pointers back to the control struct */ + np->lru.next = (void *)sa; + for (p = np; p < np + SZ_2M / SZ_4K; p++) + p->lru.prev = (void *)np; + INIT_LIST_HEAD(&sa->link); + bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K); + sa->page = np; + + for (i = 0; i < nr_left; i++) + *tp++ = as_tagged_tag(page_to_phys(np + i), FROM_PARTIAL); + + bitmap_set(sa->sub_pages, 0, nr_left); + nr_left = 0; + + /* expose for later use */ + spin_lock(&kctx->mem_partials_lock); + list_add(&sa->link, &kctx->mem_partials); + spin_unlock(&kctx->mem_partials_lock); + } + } + } +no_new_partial: +#endif + + if (nr_left) { + res = kbase_mem_pool_alloc_pages( + &kctx->mem_pools.small[alloc->group_id], + nr_left, tp, false); + if (res <= 0) + goto alloc_failed; + } + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, + kctx->id, + (u64)new_page_count); + + alloc->nents += nr_pages_requested; + + kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); + +done: + return 0; + +alloc_failed: + /* rollback needed if got one or more 2MB but failed later */ + if (nr_left != nr_pages_requested) { + size_t nr_pages_to_free = nr_pages_requested - nr_left; + + alloc->nents += nr_pages_to_free; + + kbase_process_page_usage_inc(kctx, nr_pages_to_free); + atomic_add(nr_pages_to_free, &kctx->used_pages); + atomic_add(nr_pages_to_free, + &kctx->kbdev->memdev.used_pages); + + kbase_free_phy_pages_helper(alloc, nr_pages_to_free); + } + + kbase_process_page_usage_dec(kctx, nr_pages_requested); + atomic_sub(nr_pages_requested, &kctx->used_pages); + atomic_sub(nr_pages_requested, + &kctx->kbdev->memdev.used_pages); + +invalid_request: + return -ENOMEM; +} + +struct tagged_addr *kbase_alloc_phy_pages_helper_locked( + struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, + size_t nr_pages_requested, + struct kbase_sub_alloc **prealloc_sa) +{ + int new_page_count __maybe_unused; + size_t nr_left = nr_pages_requested; + int res; + struct kbase_context *kctx; + struct kbase_device *kbdev; + struct tagged_addr *tp; + struct tagged_addr *new_pages = NULL; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); + KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); + + lockdep_assert_held(&pool->pool_lock); + +#if !defined(CONFIG_MALI_2MB_ALLOC) + WARN_ON(pool->order); +#endif + + if (alloc->reg) { + if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) + goto invalid_request; + } + + kctx = alloc->imported.native.kctx; + kbdev = kctx->kbdev; + + lockdep_assert_held(&kctx->mem_partials_lock); + + if (nr_pages_requested == 0) + goto done; /*nothing to do*/ + + new_page_count = atomic_add_return( + nr_pages_requested, &kctx->used_pages); + atomic_add(nr_pages_requested, + &kctx->kbdev->memdev.used_pages); + + /* Increase mm counters before we allocate pages so that this + * allocation is visible to the OOM killer + */ + kbase_process_page_usage_inc(kctx, nr_pages_requested); + + tp = alloc->pages + alloc->nents; + new_pages = tp; + +#ifdef CONFIG_MALI_2MB_ALLOC + if (pool->order) { + int nr_lp = nr_left / (SZ_2M / SZ_4K); + + res = kbase_mem_pool_alloc_pages_locked(pool, + nr_lp * (SZ_2M / SZ_4K), + tp); + + if (res > 0) { + nr_left -= res; + tp += res; + } + + if (nr_left) { + struct kbase_sub_alloc *sa, *temp_sa; + + list_for_each_entry_safe(sa, temp_sa, + &kctx->mem_partials, link) { + int pidx = 0; + + while (nr_left) { + pidx = find_next_zero_bit(sa->sub_pages, + SZ_2M / SZ_4K, + pidx); + bitmap_set(sa->sub_pages, pidx, 1); + *tp++ = as_tagged_tag(page_to_phys( + sa->page + pidx), + FROM_PARTIAL); + nr_left--; + + if (bitmap_full(sa->sub_pages, + SZ_2M / SZ_4K)) { + /* unlink from partial list when + * full + */ + list_del_init(&sa->link); + break; + } + } + } + } + + /* only if we actually have a chunk left <512. If more it + * indicates that we couldn't allocate a 2MB above, so no point + * to retry here. + */ + if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) { + /* create a new partial and suballocate the rest from it + */ + struct page *np = NULL; + + np = kbase_mem_pool_alloc_locked(pool); + + if (np) { + int i; + struct kbase_sub_alloc *const sa = *prealloc_sa; + struct page *p; + + /* store pointers back to the control struct */ + np->lru.next = (void *)sa; + for (p = np; p < np + SZ_2M / SZ_4K; p++) + p->lru.prev = (void *)np; + INIT_LIST_HEAD(&sa->link); + bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K); + sa->page = np; + + for (i = 0; i < nr_left; i++) + *tp++ = as_tagged_tag( + page_to_phys(np + i), + FROM_PARTIAL); + + bitmap_set(sa->sub_pages, 0, nr_left); + nr_left = 0; + /* Indicate to user that we'll free this memory + * later. + */ + *prealloc_sa = NULL; + + /* expose for later use */ + list_add(&sa->link, &kctx->mem_partials); + } + } + if (nr_left) + goto alloc_failed; + } else { +#endif + res = kbase_mem_pool_alloc_pages_locked(pool, + nr_left, + tp); + if (res <= 0) + goto alloc_failed; +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, + kctx->id, + (u64)new_page_count); + + alloc->nents += nr_pages_requested; + + kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); + +done: + return new_pages; + +alloc_failed: + /* rollback needed if got one or more 2MB but failed later */ + if (nr_left != nr_pages_requested) { + size_t nr_pages_to_free = nr_pages_requested - nr_left; + + struct tagged_addr *start_free = alloc->pages + alloc->nents; + +#ifdef CONFIG_MALI_2MB_ALLOC + if (pool->order) { + while (nr_pages_to_free) { + if (is_huge_head(*start_free)) { + kbase_mem_pool_free_pages_locked( + pool, 512, + start_free, + false, /* not dirty */ + true); /* return to pool */ + nr_pages_to_free -= 512; + start_free += 512; + } else if (is_partial(*start_free)) { + free_partial_locked(kctx, pool, + *start_free); + nr_pages_to_free--; + start_free++; + } + } + } else { +#endif + kbase_mem_pool_free_pages_locked(pool, + nr_pages_to_free, + start_free, + false, /* not dirty */ + true); /* return to pool */ +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + } + + kbase_process_page_usage_dec(kctx, nr_pages_requested); + atomic_sub(nr_pages_requested, &kctx->used_pages); + atomic_sub(nr_pages_requested, &kctx->kbdev->memdev.used_pages); + +invalid_request: + return NULL; +} + +static void free_partial(struct kbase_context *kctx, int group_id, struct + tagged_addr tp) +{ + struct page *p, *head_page; + struct kbase_sub_alloc *sa; + + p = as_page(tp); + head_page = (struct page *)p->lru.prev; + sa = (struct kbase_sub_alloc *)head_page->lru.next; + spin_lock(&kctx->mem_partials_lock); + clear_bit(p - head_page, sa->sub_pages); + if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { + list_del(&sa->link); + kbase_mem_pool_free( + &kctx->mem_pools.large[group_id], + head_page, + true); + kfree(sa); + } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == + SZ_2M / SZ_4K - 1) { + /* expose the partial again */ + list_add(&sa->link, &kctx->mem_partials); + } + spin_unlock(&kctx->mem_partials_lock); +} + +int kbase_free_phy_pages_helper( + struct kbase_mem_phy_alloc *alloc, + size_t nr_pages_to_free) +{ + struct kbase_context *kctx = alloc->imported.native.kctx; + struct kbase_device *kbdev = kctx->kbdev; + bool syncback; + bool reclaimed = (alloc->evicted != 0); + struct tagged_addr *start_free; + int new_page_count __maybe_unused; + size_t freed = 0; + + if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) || + WARN_ON(alloc->imported.native.kctx == NULL) || + WARN_ON(alloc->nents < nr_pages_to_free) || + WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { + return -EINVAL; + } + + /* early out if nothing to do */ + if (0 == nr_pages_to_free) + return 0; + + start_free = alloc->pages + alloc->nents - nr_pages_to_free; + + syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; + + /* pad start_free to a valid start location */ + while (nr_pages_to_free && is_huge(*start_free) && + !is_huge_head(*start_free)) { + nr_pages_to_free--; + start_free++; + } + + while (nr_pages_to_free) { + if (is_huge_head(*start_free)) { + /* This is a 2MB entry, so free all the 512 pages that + * it points to + */ + kbase_mem_pool_free_pages( + &kctx->mem_pools.large[alloc->group_id], + 512, + start_free, + syncback, + reclaimed); + nr_pages_to_free -= 512; + start_free += 512; + freed += 512; + } else if (is_partial(*start_free)) { + free_partial(kctx, alloc->group_id, *start_free); + nr_pages_to_free--; + start_free++; + freed++; + } else { + struct tagged_addr *local_end_free; + + local_end_free = start_free; + while (nr_pages_to_free && + !is_huge(*local_end_free) && + !is_partial(*local_end_free)) { + local_end_free++; + nr_pages_to_free--; + } + kbase_mem_pool_free_pages( + &kctx->mem_pools.small[alloc->group_id], + local_end_free - start_free, + start_free, + syncback, + reclaimed); + freed += local_end_free - start_free; + start_free += local_end_free - start_free; + } + } + + alloc->nents -= freed; + + /* + * If the allocation was not evicted (i.e. evicted == 0) then + * the page accounting needs to be done. + */ + if (!reclaimed) { + kbase_process_page_usage_dec(kctx, freed); + new_page_count = atomic_sub_return(freed, + &kctx->used_pages); + atomic_sub(freed, + &kctx->kbdev->memdev.used_pages); + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, + kctx->id, + (u64)new_page_count); + + kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); + } + + return 0; +} + +static void free_partial_locked(struct kbase_context *kctx, + struct kbase_mem_pool *pool, struct tagged_addr tp) +{ + struct page *p, *head_page; + struct kbase_sub_alloc *sa; + + lockdep_assert_held(&pool->pool_lock); + lockdep_assert_held(&kctx->mem_partials_lock); + + p = as_page(tp); + head_page = (struct page *)p->lru.prev; + sa = (struct kbase_sub_alloc *)head_page->lru.next; + clear_bit(p - head_page, sa->sub_pages); + if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { + list_del(&sa->link); + kbase_mem_pool_free_locked(pool, head_page, true); + kfree(sa); + } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == + SZ_2M / SZ_4K - 1) { + /* expose the partial again */ + list_add(&sa->link, &kctx->mem_partials); + } +} + +void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, + struct kbase_mem_pool *pool, struct tagged_addr *pages, + size_t nr_pages_to_free) +{ + struct kbase_context *kctx = alloc->imported.native.kctx; + struct kbase_device *kbdev = kctx->kbdev; + bool syncback; + bool reclaimed = (alloc->evicted != 0); + struct tagged_addr *start_free; + size_t freed = 0; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); + KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); + KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); + + lockdep_assert_held(&pool->pool_lock); + lockdep_assert_held(&kctx->mem_partials_lock); + + /* early out if nothing to do */ + if (!nr_pages_to_free) + return; + + start_free = pages; + + syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; + + /* pad start_free to a valid start location */ + while (nr_pages_to_free && is_huge(*start_free) && + !is_huge_head(*start_free)) { + nr_pages_to_free--; + start_free++; + } + + while (nr_pages_to_free) { + if (is_huge_head(*start_free)) { + /* This is a 2MB entry, so free all the 512 pages that + * it points to + */ + WARN_ON(!pool->order); + kbase_mem_pool_free_pages_locked(pool, + 512, + start_free, + syncback, + reclaimed); + nr_pages_to_free -= 512; + start_free += 512; + freed += 512; + } else if (is_partial(*start_free)) { + WARN_ON(!pool->order); + free_partial_locked(kctx, pool, *start_free); + nr_pages_to_free--; + start_free++; + freed++; + } else { + struct tagged_addr *local_end_free; + + WARN_ON(pool->order); + local_end_free = start_free; + while (nr_pages_to_free && + !is_huge(*local_end_free) && + !is_partial(*local_end_free)) { + local_end_free++; + nr_pages_to_free--; + } + kbase_mem_pool_free_pages_locked(pool, + local_end_free - start_free, + start_free, + syncback, + reclaimed); + freed += local_end_free - start_free; + start_free += local_end_free - start_free; + } + } + + alloc->nents -= freed; + + /* + * If the allocation was not evicted (i.e. evicted == 0) then + * the page accounting needs to be done. + */ + if (!reclaimed) { + int new_page_count; + + kbase_process_page_usage_dec(kctx, freed); + new_page_count = atomic_sub_return(freed, + &kctx->used_pages); + atomic_sub(freed, + &kctx->kbdev->memdev.used_pages); + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, + kctx->id, + (u64)new_page_count); + + kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); + } +} + + +void kbase_mem_kref_free(struct kref *kref) +{ + struct kbase_mem_phy_alloc *alloc; + + alloc = container_of(kref, struct kbase_mem_phy_alloc, kref); + + switch (alloc->type) { + case KBASE_MEM_TYPE_NATIVE: { + + if (!WARN_ON(!alloc->imported.native.kctx)) { + if (alloc->permanent_map) + kbase_phy_alloc_mapping_term( + alloc->imported.native.kctx, + alloc); + + /* + * The physical allocation must have been removed from + * the eviction list before trying to free it. + */ + mutex_lock( + &alloc->imported.native.kctx->jit_evict_lock); + WARN_ON(!list_empty(&alloc->evict_node)); + mutex_unlock( + &alloc->imported.native.kctx->jit_evict_lock); + + kbase_process_page_usage_dec( + alloc->imported.native.kctx, + alloc->imported.native.nr_struct_pages); + } + kbase_free_phy_pages_helper(alloc, alloc->nents); + break; + } + case KBASE_MEM_TYPE_ALIAS: { + /* just call put on the underlying phy allocs */ + size_t i; + struct kbase_aliased *aliased; + + aliased = alloc->imported.alias.aliased; + if (aliased) { + for (i = 0; i < alloc->imported.alias.nents; i++) + if (aliased[i].alloc) + kbase_mem_phy_alloc_put(aliased[i].alloc); + vfree(aliased); + } + break; + } + case KBASE_MEM_TYPE_RAW: + /* raw pages, external cleanup */ + break; + case KBASE_MEM_TYPE_IMPORTED_UMM: + if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { + WARN_ONCE(alloc->imported.umm.current_mapping_usage_count != 1, + "WARNING: expected excatly 1 mapping, got %d", + alloc->imported.umm.current_mapping_usage_count); + dma_buf_unmap_attachment( + alloc->imported.umm.dma_attachment, + alloc->imported.umm.sgt, + DMA_BIDIRECTIONAL); + kbase_remove_dma_buf_usage(alloc->imported.umm.kctx, + alloc); + } + dma_buf_detach(alloc->imported.umm.dma_buf, + alloc->imported.umm.dma_attachment); + dma_buf_put(alloc->imported.umm.dma_buf); + break; + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + if (alloc->imported.user_buf.mm) + mmdrop(alloc->imported.user_buf.mm); + if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) + vfree(alloc->imported.user_buf.pages); + else + kfree(alloc->imported.user_buf.pages); + break; + default: + WARN(1, "Unexecpted free of type %d\n", alloc->type); + break; + } + + /* Free based on allocation type */ + if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) + vfree(alloc); + else + kfree(alloc); +} + +KBASE_EXPORT_TEST_API(kbase_mem_kref_free); + +int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size) +{ + KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT(vsize > 0); + + /* validate user provided arguments */ + if (size > vsize || vsize > reg->nr_pages) + goto out_term; + + /* Prevent vsize*sizeof from wrapping around. + * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail. + */ + if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages))) + goto out_term; + + KBASE_DEBUG_ASSERT(0 != vsize); + + if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0) + goto out_term; + + reg->cpu_alloc->reg = reg; + if (reg->cpu_alloc != reg->gpu_alloc) { + if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0) + goto out_rollback; + reg->gpu_alloc->reg = reg; + } + + return 0; + +out_rollback: + kbase_free_phy_pages_helper(reg->cpu_alloc, size); +out_term: + return -1; +} + +KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages); + +bool kbase_check_alloc_flags(unsigned long flags) +{ + /* Only known input flags should be set. */ + if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) + return false; + + /* At least one flag should be set */ + if (flags == 0) + return false; + + /* Either the GPU or CPU must be reading from the allocated memory */ + if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0) + return false; + + /* Either the GPU or CPU must be writing to the allocated memory */ + if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0) + return false; + + /* GPU executable memory cannot: + * - Be written by the GPU + * - Be grown on GPU page fault + */ + if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & + (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF))) + return false; + + /* GPU executable memory also cannot have the top of its initial + * commit aligned to 'extent' + */ + if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & + BASE_MEM_TILER_ALIGN_TOP)) + return false; + + /* To have an allocation lie within a 4GB chunk is required only for + * TLS memory, which will never be used to contain executable code. + */ + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & + BASE_MEM_PROT_GPU_EX)) + return false; + + /* TLS memory should also not be used for tiler heap */ + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & + BASE_MEM_TILER_ALIGN_TOP)) + return false; + + /* GPU should have at least read or write access otherwise there is no + reason for allocating. */ + if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) + return false; + + /* BASE_MEM_IMPORT_SHARED is only valid for imported memory */ + if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED) + return false; + + /* BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is only valid for imported + * memory */ + if ((flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) == + BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) + return false; + + /* Should not combine BASE_MEM_COHERENT_LOCAL with + * BASE_MEM_COHERENT_SYSTEM */ + if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) == + (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) + return false; + + return true; +} + +bool kbase_check_import_flags(unsigned long flags) +{ + /* Only known input flags should be set. */ + if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) + return false; + + /* At least one flag should be set */ + if (flags == 0) + return false; + + /* Imported memory cannot be GPU executable */ + if (flags & BASE_MEM_PROT_GPU_EX) + return false; + + /* Imported memory cannot grow on page fault */ + if (flags & BASE_MEM_GROW_ON_GPF) + return false; + + /* Imported memory cannot be aligned to the end of its initial commit */ + if (flags & BASE_MEM_TILER_ALIGN_TOP) + return false; + + /* GPU should have at least read or write access otherwise there is no + reason for importing. */ + if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) + return false; + + /* Protected memory cannot be read by the CPU */ + if ((flags & BASE_MEM_PROTECTED) && (flags & BASE_MEM_PROT_CPU_RD)) + return false; + + return true; +} + +int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, + u64 va_pages, u64 commit_pages, u64 large_extent) +{ + struct device *dev = kctx->kbdev->dev; + int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; + u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT; + struct kbase_va_region test_reg; + + /* kbase_va_region's extent member can be of variable size, so check against that type */ + test_reg.extent = large_extent; + +#define KBASE_MSG_PRE "GPU allocation attempted with " + + if (0 == va_pages) { + dev_warn(dev, KBASE_MSG_PRE "0 va_pages!"); + return -EINVAL; + } + + if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { + dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", + (unsigned long long)va_pages); + return -ENOMEM; + } + + /* Note: commit_pages is checked against va_pages during + * kbase_alloc_phy_pages() */ + + /* Limit GPU executable allocs to GPU PC size */ + if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) { + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_PROT_GPU_EX and va_pages==%lld larger than GPU PC range %lld", + (unsigned long long)va_pages, + (unsigned long long)gpu_pc_pages_max); + + return -EINVAL; + } + + if ((flags & BASE_MEM_GROW_ON_GPF) && (test_reg.extent == 0)) { + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF but extent == 0\n"); + return -EINVAL; + } + + if ((flags & BASE_MEM_TILER_ALIGN_TOP) && (test_reg.extent == 0)) { + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP but extent == 0\n"); + return -EINVAL; + } + + if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && + test_reg.extent != 0) { + dev_warn(dev, KBASE_MSG_PRE "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extent != 0\n"); + return -EINVAL; + } + + /* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */ + if (flags & BASE_MEM_TILER_ALIGN_TOP) { +#define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and " + unsigned long small_extent; + + if (large_extent > BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES) { + dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%lld pages exceeds limit %lld", + (unsigned long long)large_extent, + BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES); + return -EINVAL; + } + /* For use with is_power_of_2, which takes unsigned long, so + * must ensure e.g. on 32-bit kernel it'll fit in that type */ + small_extent = (unsigned long)large_extent; + + if (!is_power_of_2(small_extent)) { + dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%ld not a non-zero power of 2", + small_extent); + return -EINVAL; + } + + if (commit_pages > large_extent) { + dev_warn(dev, KBASE_MSG_PRE_FLAG "commit_pages==%ld exceeds extent==%ld", + (unsigned long)commit_pages, + (unsigned long)large_extent); + return -EINVAL; + } +#undef KBASE_MSG_PRE_FLAG + } + + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && + (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) { + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GPU_VA_SAME_4GB_PAGE and va_pages==%lld greater than that needed for 4GB space", + (unsigned long long)va_pages); + return -EINVAL; + } + + return 0; +#undef KBASE_MSG_PRE +} + +/** + * @brief Acquire the per-context region list lock + */ +void kbase_gpu_vm_lock(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx != NULL); + mutex_lock(&kctx->reg_lock); +} + +KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); + +/** + * @brief Release the per-context region list lock + */ +void kbase_gpu_vm_unlock(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx != NULL); + mutex_unlock(&kctx->reg_lock); +} + +KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); + +#ifdef CONFIG_DEBUG_FS +struct kbase_jit_debugfs_data { + int (*func)(struct kbase_jit_debugfs_data *); + struct mutex lock; + struct kbase_context *kctx; + u64 active_value; + u64 pool_value; + u64 destroy_value; + char buffer[50]; +}; + +static int kbase_jit_debugfs_common_open(struct inode *inode, + struct file *file, int (*func)(struct kbase_jit_debugfs_data *)) +{ + struct kbase_jit_debugfs_data *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->func = func; + mutex_init(&data->lock); + data->kctx = (struct kbase_context *) inode->i_private; + + file->private_data = data; + + return nonseekable_open(inode, file); +} + +static ssize_t kbase_jit_debugfs_common_read(struct file *file, + char __user *buf, size_t len, loff_t *ppos) +{ + struct kbase_jit_debugfs_data *data; + size_t size; + int ret; + + data = (struct kbase_jit_debugfs_data *) file->private_data; + mutex_lock(&data->lock); + + if (*ppos) { + size = strnlen(data->buffer, sizeof(data->buffer)); + } else { + if (!data->func) { + ret = -EACCES; + goto out_unlock; + } + + if (data->func(data)) { + ret = -EACCES; + goto out_unlock; + } + + size = scnprintf(data->buffer, sizeof(data->buffer), + "%llu,%llu,%llu", data->active_value, + data->pool_value, data->destroy_value); + } + + ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size); + +out_unlock: + mutex_unlock(&data->lock); + return ret; +} + +static int kbase_jit_debugfs_common_release(struct inode *inode, + struct file *file) +{ + kfree(file->private_data); + return 0; +} + +#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + return kbase_jit_debugfs_common_open(inode, file, __func); \ +} \ +static const struct file_operations __fops = { \ + .owner = THIS_MODULE, \ + .open = __fops ## _open, \ + .release = kbase_jit_debugfs_common_release, \ + .read = kbase_jit_debugfs_common_read, \ + .write = NULL, \ + .llseek = generic_file_llseek, \ +} + +static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct list_head *tmp; + + mutex_lock(&kctx->jit_evict_lock); + list_for_each(tmp, &kctx->jit_active_head) { + data->active_value++; + } + + list_for_each(tmp, &kctx->jit_pool_head) { + data->pool_value++; + } + + list_for_each(tmp, &kctx->jit_destroy_head) { + data->destroy_value++; + } + mutex_unlock(&kctx->jit_evict_lock); + + return 0; +} +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops, + kbase_jit_debugfs_count_get); + +static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + data->active_value += reg->nr_pages; + } + + list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { + data->pool_value += reg->nr_pages; + } + + list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { + data->destroy_value += reg->nr_pages; + } + mutex_unlock(&kctx->jit_evict_lock); + + return 0; +} +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops, + kbase_jit_debugfs_vm_get); + +static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + data->active_value += reg->gpu_alloc->nents; + } + + list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { + data->pool_value += reg->gpu_alloc->nents; + } + + list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { + data->destroy_value += reg->gpu_alloc->nents; + } + mutex_unlock(&kctx->jit_evict_lock); + + return 0; +} +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, + kbase_jit_debugfs_phys_get); + +#if MALI_JIT_PRESSURE_LIMIT_BASE +static int kbase_jit_debugfs_used_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jctx.lock); + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + data->active_value += reg->used_pages; + } + mutex_unlock(&kctx->jit_evict_lock); + mutex_unlock(&kctx->jctx.lock); + + return 0; +} + +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_used_fops, + kbase_jit_debugfs_used_get); + +static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, + struct kbase_va_region *reg, size_t pages_needed, + size_t *freed, bool shrink); + +static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jctx.lock); + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + int err; + size_t freed = 0u; + + err = kbase_mem_jit_trim_pages_from_region(kctx, reg, + SIZE_MAX, &freed, false); + + if (err) { + /* Failed to calculate, try the next region */ + continue; + } + + data->active_value += freed; + } + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); + mutex_unlock(&kctx->jctx.lock); + + return 0; +} + +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_trim_fops, + kbase_jit_debugfs_trim_get); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +void kbase_jit_debugfs_init(struct kbase_context *kctx) +{ + /* prevent unprivileged use of debug file system + * in old kernel version + */ +#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) + /* only for newer kernel version debug file system is safe */ + const mode_t mode = 0444; +#else + const mode_t mode = 0400; +#endif + + /* Caller already ensures this, but we keep the pattern for + * maintenance safety. + */ + if (WARN_ON(!kctx) || + WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + + + + /* Debugfs entry for getting the number of JIT allocations. */ + debugfs_create_file("mem_jit_count", mode, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_count_fops); + + /* + * Debugfs entry for getting the total number of virtual pages + * used by JIT allocations. + */ + debugfs_create_file("mem_jit_vm", mode, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_vm_fops); + + /* + * Debugfs entry for getting the number of physical pages used + * by JIT allocations. + */ + debugfs_create_file("mem_jit_phys", mode, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_phys_fops); +#if MALI_JIT_PRESSURE_LIMIT_BASE + /* + * Debugfs entry for getting the number of pages used + * by JIT allocations for estimating the physical pressure + * limit. + */ + debugfs_create_file("mem_jit_used", mode, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_used_fops); + + /* + * Debugfs entry for getting the number of pages that could + * be trimmed to free space for more JIT allocations. + */ + debugfs_create_file("mem_jit_trim", mode, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_trim_fops); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ +} +#endif /* CONFIG_DEBUG_FS */ + +/** + * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations + * @work: Work item + * + * This function does the work of freeing JIT allocations whose physical + * backing has been released. + */ +static void kbase_jit_destroy_worker(struct work_struct *work) +{ + struct kbase_context *kctx; + struct kbase_va_region *reg; + + kctx = container_of(work, struct kbase_context, jit_work); + do { + mutex_lock(&kctx->jit_evict_lock); + if (list_empty(&kctx->jit_destroy_head)) { + mutex_unlock(&kctx->jit_evict_lock); + break; + } + + reg = list_first_entry(&kctx->jit_destroy_head, + struct kbase_va_region, jit_node); + + list_del(®->jit_node); + mutex_unlock(&kctx->jit_evict_lock); + + kbase_gpu_vm_lock(kctx); + reg->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_mem_free_region(kctx, reg); + kbase_gpu_vm_unlock(kctx); + } while (1); +} + +int kbase_jit_init(struct kbase_context *kctx) +{ + mutex_lock(&kctx->jit_evict_lock); + INIT_LIST_HEAD(&kctx->jit_active_head); + INIT_LIST_HEAD(&kctx->jit_pool_head); + INIT_LIST_HEAD(&kctx->jit_destroy_head); + INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); + + INIT_LIST_HEAD(&kctx->jctx.jit_atoms_head); + INIT_LIST_HEAD(&kctx->jctx.jit_pending_alloc); + mutex_unlock(&kctx->jit_evict_lock); + + kctx->jit_max_allocations = 0; + kctx->jit_current_allocations = 0; + kctx->trim_level = 0; + + return 0; +} + +/* Check if the allocation from JIT pool is of the same size as the new JIT + * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets + * the alignment requirements. + */ +static bool meet_size_and_tiler_align_top_requirements( + const struct kbase_va_region *walker, + const struct base_jit_alloc_info *info) +{ + bool meet_reqs = true; + + if (walker->nr_pages != info->va_pages) + meet_reqs = false; + + if (meet_reqs && (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)) { + size_t align = info->extent; + size_t align_mask = align - 1; + + if ((walker->start_pfn + info->commit_pages) & align_mask) + meet_reqs = false; + } + + return meet_reqs; +} + +#if MALI_JIT_PRESSURE_LIMIT_BASE +/* Function will guarantee *@freed will not exceed @pages_needed + */ +static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, + struct kbase_va_region *reg, size_t pages_needed, + size_t *freed, bool shrink) +{ + int err = 0; + size_t available_pages = 0u; + const size_t old_pages = kbase_reg_current_backed_size(reg); + size_t new_pages = old_pages; + size_t to_free = 0u; + size_t max_allowed_pages = old_pages; + + lockdep_assert_held(&kctx->jctx.lock); + lockdep_assert_held(&kctx->reg_lock); + + /* Is this a JIT allocation that has been reported on? */ + if (reg->used_pages == reg->nr_pages) + goto out; + + if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) { + /* For address based memory usage calculation, the GPU + * allocates objects of up to size 's', but aligns every object + * to alignment 'a', with a < s. + * + * It also doesn't have to write to all bytes in an object of + * size 's'. + * + * Hence, we can observe the GPU's address for the end of used + * memory being up to (s - a) bytes into the first unallocated + * page. + * + * We allow for this and only warn when it exceeds this bound + * (rounded up to page sized units). Note, this is allowed to + * exceed reg->nr_pages. + */ + max_allowed_pages += PFN_UP( + KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES - + KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES); + } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { + /* The GPU could report being ready to write to the next + * 'extent' sized chunk, but didn't actually write to it, so we + * can report up to 'extent' size pages more than the backed + * size. + * + * Note, this is allowed to exceed reg->nr_pages. + */ + max_allowed_pages += reg->extent; + + /* Also note that in these GPUs, the GPU may make a large (>1 + * page) initial allocation but not actually write out to all + * of it. Hence it might report that a much higher amount of + * memory was used than actually was written to. This does not + * result in a real warning because on growing this memory we + * round up the size of the allocation up to an 'extent' sized + * chunk, hence automatically bringing the backed size up to + * the reported size. + */ + } + + if (old_pages < reg->used_pages) { + /* Prevent overflow on available_pages, but only report the + * problem if it's in a scenario where used_pages should have + * been consistent with the backed size + * + * Note: In case of a size-based report, this legitimately + * happens in common use-cases: we allow for up to this size of + * memory being used, but depending on the content it doesn't + * have to use all of it. + * + * Hence, we're much more quiet about that in the size-based + * report case - it's not indicating a real problem, it's just + * for information + */ + if (max_allowed_pages < reg->used_pages) { + if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) + dev_warn(kctx->kbdev->dev, + "%s: current backed pages %zu < reported used pages %zu (allowed to be up to %zu) on JIT 0x%llx vapages %zu\n", + __func__, + old_pages, reg->used_pages, + max_allowed_pages, + reg->start_pfn << PAGE_SHIFT, + reg->nr_pages); + else + dev_dbg(kctx->kbdev->dev, + "%s: no need to trim, current backed pages %zu < reported used pages %zu on size-report for JIT 0x%llx vapages %zu\n", + __func__, + old_pages, reg->used_pages, + reg->start_pfn << PAGE_SHIFT, + reg->nr_pages); + } + /* In any case, no error condition to report here, caller can + * try other regions + */ + + goto out; + } + available_pages = old_pages - reg->used_pages; + to_free = min(available_pages, pages_needed); + + if (shrink) { + new_pages -= to_free; + + err = kbase_mem_shrink(kctx, reg, new_pages); + } +out: + trace_mali_jit_trim_from_region(reg, to_free, old_pages, + available_pages, new_pages); + *freed = to_free; + return err; +} + + +/** + * kbase_mem_jit_trim_pages - Trim JIT regions until sufficient pages have been + * freed + * @kctx: Pointer to the kbase context whose active JIT allocations will be + * checked. + * @pages_needed: The maximum number of pages to trim. + * + * This functions checks all active JIT allocations in @kctx for unused pages + * at the end, and trim the backed memory regions of those allocations down to + * the used portion and free the unused pages into the page pool. + * + * Specifying @pages_needed allows us to stop early when there's enough + * physical memory freed to sufficiently bring down the total JIT physical page + * usage (e.g. to below the pressure limit) + * + * Return: Total number of successfully freed pages + */ +static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, + size_t pages_needed) +{ + struct kbase_va_region *reg, *tmp; + size_t total_freed = 0; + + lockdep_assert_held(&kctx->jctx.lock); + lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->jit_evict_lock); + + list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) { + int err; + size_t freed = 0u; + + err = kbase_mem_jit_trim_pages_from_region(kctx, reg, + pages_needed, &freed, true); + + if (err) { + /* Failed to trim, try the next region */ + continue; + } + + total_freed += freed; + WARN_ON(freed > pages_needed); + pages_needed -= freed; + if (!pages_needed) + break; + } + + trace_mali_jit_trim(total_freed); + + return total_freed; +} +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +static int kbase_jit_grow(struct kbase_context *kctx, + const struct base_jit_alloc_info *info, + struct kbase_va_region *reg, + struct kbase_sub_alloc **prealloc_sas) +{ + size_t delta; + size_t pages_required; + size_t old_size; + struct kbase_mem_pool *pool; + int ret = -ENOMEM; + struct tagged_addr *gpu_pages; + + if (info->commit_pages > reg->nr_pages) { + /* Attempted to grow larger than maximum size */ + return -EINVAL; + } + + lockdep_assert_held(&kctx->reg_lock); + + /* Make the physical backing no longer reclaimable */ + if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) + goto update_failed; + + if (reg->gpu_alloc->nents >= info->commit_pages) + goto done; + + /* Grow the backing */ + old_size = reg->gpu_alloc->nents; + + /* Allocate some more pages */ + delta = info->commit_pages - reg->gpu_alloc->nents; + pages_required = delta; + +#ifdef CONFIG_MALI_2MB_ALLOC + if (pages_required >= (SZ_2M / SZ_4K)) { + pool = &kctx->mem_pools.large[kctx->jit_group_id]; + /* Round up to number of 2 MB pages required */ + pages_required += ((SZ_2M / SZ_4K) - 1); + pages_required /= (SZ_2M / SZ_4K); + } else { +#endif + pool = &kctx->mem_pools.small[kctx->jit_group_id]; +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + + if (reg->cpu_alloc != reg->gpu_alloc) + pages_required *= 2; + + spin_lock(&kctx->mem_partials_lock); + kbase_mem_pool_lock(pool); + + /* As we can not allocate memory from the kernel with the vm_lock held, + * grow the pool to the required size with the lock dropped. We hold the + * pool lock to prevent another thread from allocating from the pool + * between the grow and allocation. + */ + while (kbase_mem_pool_size(pool) < pages_required) { + int pool_delta = pages_required - kbase_mem_pool_size(pool); + int ret; + + kbase_mem_pool_unlock(pool); + spin_unlock(&kctx->mem_partials_lock); + + kbase_gpu_vm_unlock(kctx); + ret = kbase_mem_pool_grow(pool, pool_delta); + kbase_gpu_vm_lock(kctx); + + if (ret) + goto update_failed; + + spin_lock(&kctx->mem_partials_lock); + kbase_mem_pool_lock(pool); + } + + gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool, + delta, &prealloc_sas[0]); + if (!gpu_pages) { + kbase_mem_pool_unlock(pool); + spin_unlock(&kctx->mem_partials_lock); + goto update_failed; + } + + if (reg->cpu_alloc != reg->gpu_alloc) { + struct tagged_addr *cpu_pages; + + cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc, + pool, delta, &prealloc_sas[1]); + if (!cpu_pages) { + kbase_free_phy_pages_helper_locked(reg->gpu_alloc, + pool, gpu_pages, delta); + kbase_mem_pool_unlock(pool); + spin_unlock(&kctx->mem_partials_lock); + goto update_failed; + } + } + kbase_mem_pool_unlock(pool); + spin_unlock(&kctx->mem_partials_lock); + + ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, + old_size); + /* + * The grow failed so put the allocation back in the + * pool and return failure. + */ + if (ret) + goto update_failed; + +done: + ret = 0; + + /* Update attributes of JIT allocation taken from the pool */ + reg->initial_commit = info->commit_pages; + reg->extent = info->extent; + +update_failed: + return ret; +} + +static void trace_jit_stats(struct kbase_context *kctx, + u32 bin_id, u32 max_allocations) +{ + const u32 alloc_count = + kctx->jit_current_allocations_per_bin[bin_id]; + struct kbase_device *kbdev = kctx->kbdev; + + struct kbase_va_region *walker; + u32 va_pages = 0; + u32 ph_pages = 0; + + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(walker, &kctx->jit_active_head, jit_node) { + if (walker->jit_bin_id != bin_id) + continue; + + va_pages += walker->nr_pages; + ph_pages += walker->gpu_alloc->nents; + } + mutex_unlock(&kctx->jit_evict_lock); + + KBASE_TLSTREAM_AUX_JIT_STATS(kbdev, kctx->id, bin_id, + max_allocations, alloc_count, va_pages, ph_pages); +} + +#if MALI_JIT_PRESSURE_LIMIT_BASE +/** + * get_jit_phys_backing() - calculate the physical backing of all JIT + * allocations + * + * @kctx: Pointer to the kbase context whose active JIT allocations will be + * checked + * + * Return: number of pages that are committed by JIT allocations + */ +static size_t get_jit_phys_backing(struct kbase_context *kctx) +{ + struct kbase_va_region *walker; + size_t backing = 0; + + lockdep_assert_held(&kctx->jit_evict_lock); + + list_for_each_entry(walker, &kctx->jit_active_head, jit_node) { + backing += kbase_reg_current_backed_size(walker); + } + + return backing; +} + +void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, + size_t needed_pages) +{ + size_t jit_backing = 0; + size_t pages_to_trim = 0; + + lockdep_assert_held(&kctx->jctx.lock); + lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->jit_evict_lock); + + jit_backing = get_jit_phys_backing(kctx); + + /* It is possible that this is the case - if this is the first + * allocation after "ignore_pressure_limit" allocation. + */ + if (jit_backing > kctx->jit_phys_pages_limit) { + pages_to_trim += (jit_backing - kctx->jit_phys_pages_limit) + + needed_pages; + } else { + size_t backed_diff = kctx->jit_phys_pages_limit - jit_backing; + + if (needed_pages > backed_diff) + pages_to_trim += needed_pages - backed_diff; + } + + if (pages_to_trim) { + size_t trimmed_pages = + kbase_mem_jit_trim_pages(kctx, pages_to_trim); + + /* This should never happen - we already asserted that + * we are not violating JIT pressure limit in earlier + * checks, which means that in-flight JIT allocations + * must have enough unused pages to satisfy the new + * allocation + */ + WARN_ON(trimmed_pages < pages_to_trim); + } +} +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +/** + * jit_allow_allocate() - check whether basic conditions are satisfied to allow + * a new JIT allocation + * + * @kctx: Pointer to the kbase context + * @info: Pointer to JIT allocation information for the new allocation + * @ignore_pressure_limit: Flag to indicate whether JIT pressure limit check + * should be ignored + * + * Return: true if allocation can be executed, false otherwise + */ +static bool jit_allow_allocate(struct kbase_context *kctx, + const struct base_jit_alloc_info *info, + bool ignore_pressure_limit) +{ + lockdep_assert_held(&kctx->jctx.lock); + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit && + ((kctx->jit_phys_pages_limit <= kctx->jit_current_phys_pressure) || + (info->va_pages > (kctx->jit_phys_pages_limit - kctx->jit_current_phys_pressure)))) { + dev_dbg(kctx->kbdev->dev, + "Max JIT page allocations limit reached: active pages %llu, max pages %llu\n", + kctx->jit_current_phys_pressure + info->va_pages, + kctx->jit_phys_pages_limit); + return false; + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { + /* Too many current allocations */ + dev_dbg(kctx->kbdev->dev, + "Max JIT allocations limit reached: active allocations %d, max allocations %d\n", + kctx->jit_current_allocations, + kctx->jit_max_allocations); + return false; + } + + if (info->max_allocations > 0 && + kctx->jit_current_allocations_per_bin[info->bin_id] >= + info->max_allocations) { + /* Too many current allocations in this bin */ + dev_dbg(kctx->kbdev->dev, + "Per bin limit of max JIT allocations reached: bin_id %d, active allocations %d, max allocations %d\n", + info->bin_id, + kctx->jit_current_allocations_per_bin[info->bin_id], + info->max_allocations); + return false; + } + + return true; +} + +static struct kbase_va_region * +find_reasonable_region(const struct base_jit_alloc_info *info, + struct list_head *pool_head, bool ignore_usage_id) +{ + struct kbase_va_region *closest_reg = NULL; + struct kbase_va_region *walker; + size_t current_diff = SIZE_MAX; + + list_for_each_entry(walker, pool_head, jit_node) { + if ((ignore_usage_id || + walker->jit_usage_id == info->usage_id) && + walker->jit_bin_id == info->bin_id && + meet_size_and_tiler_align_top_requirements(walker, info)) { + size_t min_size, max_size, diff; + + /* + * The JIT allocations VA requirements have been met, + * it's suitable but other allocations might be a + * better fit. + */ + min_size = min_t(size_t, walker->gpu_alloc->nents, + info->commit_pages); + max_size = max_t(size_t, walker->gpu_alloc->nents, + info->commit_pages); + diff = max_size - min_size; + + if (current_diff > diff) { + current_diff = diff; + closest_reg = walker; + } + + /* The allocation is an exact match */ + if (current_diff == 0) + break; + } + } + + return closest_reg; +} + +struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + const struct base_jit_alloc_info *info, + bool ignore_pressure_limit) +{ + struct kbase_va_region *reg = NULL; + struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; + int i; + + lockdep_assert_held(&kctx->jctx.lock); + + if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) + return NULL; + +#ifdef CONFIG_MALI_2MB_ALLOC + /* Preallocate memory for the sub-allocation structs */ + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { + prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); + if (!prealloc_sas[i]) + goto end; + } +#endif + + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->jit_evict_lock); + + /* + * Scan the pool for an existing allocation which meets our + * requirements and remove it. + */ + if (info->usage_id != 0) + /* First scan for an allocation with the same usage ID */ + reg = find_reasonable_region(info, &kctx->jit_pool_head, false); + + if (!reg) + /* No allocation with the same usage ID, or usage IDs not in + * use. Search for an allocation we can reuse. + */ + reg = find_reasonable_region(info, &kctx->jit_pool_head, true); + + if (reg) { +#if MALI_JIT_PRESSURE_LIMIT_BASE + size_t needed_pages = 0; +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + int ret; + + /* + * Remove the found region from the pool and add it to the + * active list. + */ + list_move(®->jit_node, &kctx->jit_active_head); + + WARN_ON(reg->gpu_alloc->evicted); + + /* + * Remove the allocation from the eviction list as it's no + * longer eligible for eviction. This must be done before + * dropping the jit_evict_lock + */ + list_del_init(®->gpu_alloc->evict_node); + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit) { + if (info->commit_pages > reg->gpu_alloc->nents) + needed_pages = info->commit_pages - + reg->gpu_alloc->nents; + + /* Update early the recycled JIT region's estimate of + * used_pages to ensure it doesn't get trimmed + * undesirably. This is needed as the recycled JIT + * region has been added to the active list but the + * number of used pages for it would be zero, so it + * could get trimmed instead of other allocations only + * to be regrown later resulting in a breach of the JIT + * physical pressure limit. + * Also that trimming would disturb the accounting of + * physical pages, i.e. the VM stats, as the number of + * backing pages would have changed when the call to + * kbase_mem_evictable_unmark_reclaim is made. + * + * The second call to update pressure at the end of + * this function would effectively be a nop. + */ + kbase_jit_report_update_pressure( + kctx, reg, info->va_pages, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + + kbase_jit_request_phys_increase_locked(kctx, + needed_pages); + } +#endif + mutex_unlock(&kctx->jit_evict_lock); + + /* kbase_jit_grow() can release & reacquire 'kctx->reg_lock', + * so any state protected by that lock might need to be + * re-evaluated if more code is added here in future. + */ + ret = kbase_jit_grow(kctx, info, reg, prealloc_sas); + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit) + kbase_jit_done_phys_increase(kctx, needed_pages); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + kbase_gpu_vm_unlock(kctx); + + if (ret < 0) { + /* + * An update to an allocation from the pool failed, + * chances are slim a new allocation would fair any + * better so return the allocation to the pool and + * return the function with failure. + */ + dev_dbg(kctx->kbdev->dev, + "JIT allocation resize failed: va_pages 0x%llx, commit_pages 0x%llx\n", + info->va_pages, info->commit_pages); +#if MALI_JIT_PRESSURE_LIMIT_BASE + /* Undo the early change made to the recycled JIT + * region's estimate of used_pages. + */ + if (!ignore_pressure_limit) { + kbase_jit_report_update_pressure( + kctx, reg, 0, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + mutex_lock(&kctx->jit_evict_lock); + list_move(®->jit_node, &kctx->jit_pool_head); + mutex_unlock(&kctx->jit_evict_lock); + reg = NULL; + goto end; + } + } else { + /* No suitable JIT allocation was found so create a new one */ + u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | + BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | + BASE_MEM_COHERENT_LOCAL | + BASEP_MEM_NO_USER_FREE; + u64 gpu_addr; + + if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) + flags |= BASE_MEM_TILER_ALIGN_TOP; + + flags |= base_mem_group_id_set(kctx->jit_group_id); +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit) { + flags |= BASEP_MEM_PERFORM_JIT_TRIM; + /* The corresponding call to 'done_phys_increase' would + * be made inside the kbase_mem_alloc(). + */ + kbase_jit_request_phys_increase_locked( + kctx, info->commit_pages); + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); + + reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, + info->extent, &flags, &gpu_addr); + if (!reg) { + /* Most likely not enough GPU virtual space left for + * the new JIT allocation. + */ + dev_dbg(kctx->kbdev->dev, + "Failed to allocate JIT memory: va_pages 0x%llx, commit_pages 0x%llx\n", + info->va_pages, info->commit_pages); + goto end; + } + + if (!ignore_pressure_limit) { + /* Due to enforcing of pressure limit, kbase_mem_alloc + * was instructed to perform the trimming which in turn + * would have ensured that the new JIT allocation is + * already in the jit_active_head list, so nothing to + * do here. + */ + WARN_ON(list_empty(®->jit_node)); + } else { + mutex_lock(&kctx->jit_evict_lock); + list_add(®->jit_node, &kctx->jit_active_head); + mutex_unlock(&kctx->jit_evict_lock); + } + } + + trace_mali_jit_alloc(reg, info->id); + + kctx->jit_current_allocations++; + kctx->jit_current_allocations_per_bin[info->bin_id]++; + + trace_jit_stats(kctx, info->bin_id, info->max_allocations); + + reg->jit_usage_id = info->usage_id; + reg->jit_bin_id = info->bin_id; + reg->flags |= KBASE_REG_ACTIVE_JIT_ALLOC; +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) + reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE; + reg->heap_info_gpu_addr = info->heap_info_gpu_addr; + kbase_jit_report_update_pressure(kctx, reg, info->va_pages, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +end: + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) + kfree(prealloc_sas[i]); + + return reg; +} + +void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + u64 old_pages; + + /* JIT id not immediately available here, so use 0u */ + trace_mali_jit_free(reg, 0u); + + /* Get current size of JIT region */ + old_pages = kbase_reg_current_backed_size(reg); + if (reg->initial_commit < old_pages) { + /* Free trim_level % of region, but don't go below initial + * commit size + */ + u64 new_size = MAX(reg->initial_commit, + div_u64(old_pages * (100 - kctx->trim_level), 100)); + u64 delta = old_pages - new_size; + + if (delta) + kbase_mem_shrink(kctx, reg, old_pages - delta); + } + +#if MALI_JIT_PRESSURE_LIMIT_BASE + reg->heap_info_gpu_addr = 0; + kbase_jit_report_update_pressure(kctx, reg, 0, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + kctx->jit_current_allocations--; + kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; + + trace_jit_stats(kctx, reg->jit_bin_id, UINT_MAX); + + kbase_mem_evictable_mark_reclaim(reg->gpu_alloc); + + kbase_gpu_vm_lock(kctx); + reg->flags |= KBASE_REG_DONT_NEED; + reg->flags &= ~KBASE_REG_ACTIVE_JIT_ALLOC; + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); + kbase_gpu_vm_unlock(kctx); + + /* + * Add the allocation to the eviction list and the jit pool, after this + * point the shrink can reclaim it, or it may be reused. + */ + mutex_lock(&kctx->jit_evict_lock); + + /* This allocation can't already be on a list. */ + WARN_ON(!list_empty(®->gpu_alloc->evict_node)); + list_add(®->gpu_alloc->evict_node, &kctx->evict_list); + + list_move(®->jit_node, &kctx->jit_pool_head); + + mutex_unlock(&kctx->jit_evict_lock); +} + +void kbase_jit_backing_lost(struct kbase_va_region *reg) +{ + struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + + if (WARN_ON(!kctx)) + return; + + lockdep_assert_held(&kctx->jit_evict_lock); + + /* + * JIT allocations will always be on a list, if the region + * is not on a list then it's not a JIT allocation. + */ + if (list_empty(®->jit_node)) + return; + + /* + * Freeing the allocation requires locks we might not be able + * to take now, so move the allocation to the free list and kick + * the worker which will do the freeing. + */ + list_move(®->jit_node, &kctx->jit_destroy_head); + + schedule_work(&kctx->jit_work); +} + +bool kbase_jit_evict(struct kbase_context *kctx) +{ + struct kbase_va_region *reg = NULL; + + lockdep_assert_held(&kctx->reg_lock); + + /* Free the oldest allocation from the pool */ + mutex_lock(&kctx->jit_evict_lock); + if (!list_empty(&kctx->jit_pool_head)) { + reg = list_entry(kctx->jit_pool_head.prev, + struct kbase_va_region, jit_node); + list_del(®->jit_node); + list_del_init(®->gpu_alloc->evict_node); + } + mutex_unlock(&kctx->jit_evict_lock); + + if (reg) { + reg->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_mem_free_region(kctx, reg); + } + + return (reg != NULL); +} + +void kbase_jit_term(struct kbase_context *kctx) +{ + struct kbase_va_region *walker; + + /* Free all allocations for this context */ + + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->jit_evict_lock); + /* Free all allocations from the pool */ + while (!list_empty(&kctx->jit_pool_head)) { + walker = list_first_entry(&kctx->jit_pool_head, + struct kbase_va_region, jit_node); + list_del(&walker->jit_node); + list_del_init(&walker->gpu_alloc->evict_node); + mutex_unlock(&kctx->jit_evict_lock); + walker->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_mem_free_region(kctx, walker); + mutex_lock(&kctx->jit_evict_lock); + } + + /* Free all allocations from active list */ + while (!list_empty(&kctx->jit_active_head)) { + walker = list_first_entry(&kctx->jit_active_head, + struct kbase_va_region, jit_node); + list_del(&walker->jit_node); + list_del_init(&walker->gpu_alloc->evict_node); + mutex_unlock(&kctx->jit_evict_lock); + walker->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_mem_free_region(kctx, walker); + mutex_lock(&kctx->jit_evict_lock); + } +#if MALI_JIT_PRESSURE_LIMIT_BASE + WARN_ON(kctx->jit_phys_pages_to_be_allocated); +#endif + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); + + /* + * Flush the freeing of allocations whose backing has been freed + * (i.e. everything in jit_destroy_head). + */ + cancel_work_sync(&kctx->jit_work); +} + +#if MALI_JIT_PRESSURE_LIMIT_BASE +void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned int flags) +{ + /* Offset to the location used for a JIT report within the GPU memory + * + * This constants only used for this debugging function - not useful + * anywhere else in kbase + */ + const u64 jit_report_gpu_mem_offset = sizeof(u64)*2; + + u64 addr_start; + struct kbase_vmap_struct mapping; + u64 *ptr; + + if (reg->heap_info_gpu_addr == 0ull) + goto out; + + /* Nothing else to trace in the case the memory just contains the + * size. Other tracepoints already record the relevant area of memory. + */ + if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) + goto out; + + addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset; + + ptr = kbase_vmap(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE, + &mapping); + if (!ptr) { + dev_warn(kctx->kbdev->dev, + "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n", + __func__, reg->start_pfn << PAGE_SHIFT, + addr_start); + goto out; + } + + trace_mali_jit_report_gpu_mem(addr_start, reg->start_pfn << PAGE_SHIFT, + ptr, flags); + + kbase_vunmap(kctx, &mapping); +out: + return; +} +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +#if MALI_JIT_PRESSURE_LIMIT_BASE +void kbase_jit_report_update_pressure(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 new_used_pages, + unsigned int flags) +{ + u64 diff; + + lockdep_assert_held(&kctx->jctx.lock); + + trace_mali_jit_report_pressure(reg, new_used_pages, + kctx->jit_current_phys_pressure + new_used_pages - + reg->used_pages, + flags); + + if (WARN_ON(new_used_pages > reg->nr_pages)) + return; + + if (reg->used_pages > new_used_pages) { + /* We reduced the number of used pages */ + diff = reg->used_pages - new_used_pages; + + if (!WARN_ON(diff > kctx->jit_current_phys_pressure)) + kctx->jit_current_phys_pressure -= diff; + + reg->used_pages = new_used_pages; + } else { + /* We increased the number of used pages */ + diff = new_used_pages - reg->used_pages; + + if (!WARN_ON(diff > U64_MAX - kctx->jit_current_phys_pressure)) + kctx->jit_current_phys_pressure += diff; + + reg->used_pages = new_used_pages; + } + +} +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +bool kbase_has_exec_va_zone(struct kbase_context *kctx) +{ + bool has_exec_va_zone; + + kbase_gpu_vm_lock(kctx); + has_exec_va_zone = (kctx->exec_va_start != U64_MAX); + kbase_gpu_vm_unlock(kctx); + + return has_exec_va_zone; +} + + +int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + struct page **pages = alloc->imported.user_buf.pages; + unsigned long address = alloc->imported.user_buf.address; + struct mm_struct *mm = alloc->imported.user_buf.mm; + long pinned_pages; + long i; + + if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) + return -EINVAL; + + if (alloc->nents) { + if (WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages)) + return -EINVAL; + else + return 0; + } + + if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm)) + return -EINVAL; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) + pinned_pages = get_user_pages(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, +#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \ +KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#else + reg->flags & KBASE_REG_GPU_WR, + 0, pages, NULL); +#endif +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) + pinned_pages = get_user_pages_remote(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR, + 0, pages, NULL); +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) + pinned_pages = get_user_pages_remote(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#else + pinned_pages = get_user_pages_remote(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL, NULL); +#endif + + if (pinned_pages <= 0) + return pinned_pages; + + if (pinned_pages != alloc->imported.user_buf.nr_pages) { + for (i = 0; i < pinned_pages; i++) + put_page(pages[i]); + return -ENOMEM; + } + + alloc->nents = pinned_pages; + + return 0; +} + +static int kbase_jd_user_buf_map(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + long pinned_pages; + struct kbase_mem_phy_alloc *alloc; + struct page **pages; + struct tagged_addr *pa; + long i; + unsigned long address; + struct device *dev; + unsigned long offset; + unsigned long local_size; + unsigned long gwt_mask = ~0; + int err = kbase_jd_user_buf_pin_pages(kctx, reg); + + if (err) + return err; + + alloc = reg->gpu_alloc; + pa = kbase_get_gpu_phy_pages(reg); + address = alloc->imported.user_buf.address; + pinned_pages = alloc->nents; + pages = alloc->imported.user_buf.pages; + dev = kctx->kbdev->dev; + offset = address & ~PAGE_MASK; + local_size = alloc->imported.user_buf.size; + + for (i = 0; i < pinned_pages; i++) { + dma_addr_t dma_addr; + unsigned long min; + + min = MIN(PAGE_SIZE - offset, local_size); + dma_addr = dma_map_page(dev, pages[i], + offset, min, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, dma_addr)) + goto unwind; + + alloc->imported.user_buf.dma_addrs[i] = dma_addr; + pa[i] = as_tagged(page_to_phys(pages[i])); + + local_size -= min; + offset = 0; + } + +#ifdef CONFIG_MALI_CINSTR_GWT + if (kctx->gwt_enabled) + gwt_mask = ~KBASE_REG_GPU_WR; +#endif + + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + pa, kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, kctx->as_nr, + alloc->group_id); + if (err == 0) + return 0; + + /* fall down */ +unwind: + alloc->nents = 0; + while (i--) { + dma_unmap_page(kctx->kbdev->dev, + alloc->imported.user_buf.dma_addrs[i], + PAGE_SIZE, DMA_BIDIRECTIONAL); + } + + while (++i < pinned_pages) { + put_page(pages[i]); + pages[i] = NULL; + } + + return err; +} + +/* This function would also perform the work of unpinning pages on Job Manager + * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT + * have a corresponding call to kbase_jd_user_buf_unpin_pages(). + */ +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc, bool writeable) +{ + long i; + struct page **pages; + unsigned long size = alloc->imported.user_buf.size; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); + pages = alloc->imported.user_buf.pages; + for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { + unsigned long local_size; + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + + local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); + dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, + DMA_BIDIRECTIONAL); + if (writeable) + set_page_dirty_lock(pages[i]); + put_page(pages[i]); + pages[i] = NULL; + + size -= local_size; + } + alloc->nents = 0; +} + +int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, + void *src_page, size_t *to_copy, unsigned int nr_pages, + unsigned int *target_page_nr, size_t offset) +{ + void *target_page = kmap(dest_pages[*target_page_nr]); + size_t chunk = PAGE_SIZE-offset; + + if (!target_page) { + pr_err("%s: kmap failure", __func__); + return -ENOMEM; + } + + chunk = min(chunk, *to_copy); + + memcpy(target_page + offset, src_page, chunk); + *to_copy -= chunk; + + kunmap(dest_pages[*target_page_nr]); + + *target_page_nr += 1; + if (*target_page_nr >= nr_pages || *to_copy == 0) + return 0; + + target_page = kmap(dest_pages[*target_page_nr]); + if (!target_page) { + pr_err("%s: kmap failure", __func__); + return -ENOMEM; + } + + KBASE_DEBUG_ASSERT(target_page); + + chunk = min(offset, *to_copy); + memcpy(target_page, src_page + PAGE_SIZE-offset, chunk); + *to_copy -= chunk; + + kunmap(dest_pages[*target_page_nr]); + + return 0; +} + +struct kbase_mem_phy_alloc *kbase_map_external_resource( + struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm) +{ + int err; + + lockdep_assert_held(&kctx->reg_lock); + + /* decide what needs to happen for this resource */ + switch (reg->gpu_alloc->type) { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) && + (!reg->gpu_alloc->nents)) + goto exit; + + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; + if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { + err = kbase_jd_user_buf_map(kctx, reg); + if (err) { + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; + goto exit; + } + } + } + break; + case KBASE_MEM_TYPE_IMPORTED_UMM: { + err = kbase_mem_umm_map(kctx, reg); + if (err) + goto exit; + break; + } + default: + goto exit; + } + + return kbase_mem_phy_alloc_get(reg->gpu_alloc); +exit: + return NULL; +} + +void kbase_unmap_external_resource(struct kbase_context *kctx, + struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) +{ + switch (alloc->type) { + case KBASE_MEM_TYPE_IMPORTED_UMM: { + kbase_mem_umm_unmap(kctx, reg, alloc); + } + break; + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + alloc->imported.user_buf.current_mapping_usage_count--; + + if (0 == alloc->imported.user_buf.current_mapping_usage_count) { + bool writeable = true; + + if (!kbase_is_region_invalid_or_free(reg) && + reg->gpu_alloc == alloc) + kbase_mmu_teardown_pages( + kctx->kbdev, + &kctx->mmu, + reg->start_pfn, + kbase_reg_current_backed_size(reg), + kctx->as_nr); + + if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0)) + writeable = false; + + kbase_jd_user_buf_unmap(kctx, alloc, writeable); + } + } + break; + default: + break; + } + kbase_mem_phy_alloc_put(alloc); +} + +struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( + struct kbase_context *kctx, u64 gpu_addr) +{ + struct kbase_ctx_ext_res_meta *meta = NULL; + struct kbase_ctx_ext_res_meta *walker; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Walk the per context external resource metadata list for the + * metadata which matches the region which is being acquired. + */ + list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { + if (walker->gpu_addr == gpu_addr) { + meta = walker; + meta->ref++; + break; + } + } + + /* No metadata exists so create one. */ + if (!meta) { + struct kbase_va_region *reg; + + /* Find the region */ + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, gpu_addr); + if (kbase_is_region_invalid_or_free(reg)) + goto failed; + + /* Allocate the metadata object */ + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + goto failed; + + /* + * Fill in the metadata object and acquire a reference + * for the physical resource. + */ + meta->alloc = kbase_map_external_resource(kctx, reg, NULL); + meta->ref = 1; + + if (!meta->alloc) + goto fail_map; + + meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; + + list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); + } + + return meta; + +fail_map: + kfree(meta); +failed: + return NULL; +} + +static struct kbase_ctx_ext_res_meta * +find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr) +{ + struct kbase_ctx_ext_res_meta *walker; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Walk the per context external resource metadata list for the + * metadata which matches the region which is being released. + */ + list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) + if (walker->gpu_addr == gpu_addr) + return walker; + + return NULL; +} + +static void release_sticky_resource_meta(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta) +{ + struct kbase_va_region *reg; + + /* Drop the physical memory reference and free the metadata. */ + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, + meta->gpu_addr); + + kbase_unmap_external_resource(kctx, reg, meta->alloc); + list_del(&meta->ext_res_node); + kfree(meta); +} + +bool kbase_sticky_resource_release(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) +{ + lockdep_assert_held(&kctx->reg_lock); + + /* Search of the metadata if one isn't provided. */ + if (!meta) + meta = find_sticky_resource_meta(kctx, gpu_addr); + + /* No metadata so just return. */ + if (!meta) + return false; + + if (--meta->ref != 0) + return true; + + release_sticky_resource_meta(kctx, meta); + + return true; +} + +bool kbase_sticky_resource_release_force(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) +{ + lockdep_assert_held(&kctx->reg_lock); + + /* Search of the metadata if one isn't provided. */ + if (!meta) + meta = find_sticky_resource_meta(kctx, gpu_addr); + + /* No metadata so just return. */ + if (!meta) + return false; + + release_sticky_resource_meta(kctx, meta); + + return true; +} + +int kbase_sticky_resource_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->ext_res_meta_head); + + return 0; +} + +void kbase_sticky_resource_term(struct kbase_context *kctx) +{ + struct kbase_ctx_ext_res_meta *walker; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Free any sticky resources which haven't been unmapped. + * + * Note: + * We don't care about refcounts at this point as no future + * references to the meta data will be made. + * Region termination would find these if we didn't free them + * here, but it's more efficient if we do the clean up here. + */ + while (!list_empty(&kctx->ext_res_meta_head)) { + walker = list_first_entry(&kctx->ext_res_meta_head, + struct kbase_ctx_ext_res_meta, ext_res_node); + + kbase_sticky_resource_release_force(kctx, walker, 0); + } +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem.h b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.h new file mode 100644 index 000000000000..a057f611ba83 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.h @@ -0,0 +1,1891 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_mem.h + * Base kernel memory APIs + */ + +#ifndef _KBASE_MEM_H_ +#define _KBASE_MEM_H_ + +#ifndef _KBASE_H_ +#error "Don't include this file directly, use mali_kbase.h instead" +#endif + +#include +#include "mali_base_kernel.h" +#include +#include "mali_kbase_pm.h" +#include "mali_kbase_defs.h" +/* Required for kbase_mem_evictable_unmake */ +#include "mali_kbase_mem_linux.h" + +static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, + int pages); + +/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ + +/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages. +The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and +page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table +updates and generates duplicate page faults as the page table information used by the MMU is not valid. */ +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3) /* round to 8 pages */ + +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0) /* round to 1 page */ + +/* This must always be a power of 2 */ +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2) +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316) +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630) +/** + * A CPU mapping + */ +struct kbase_cpu_mapping { + struct list_head mappings_list; + struct kbase_mem_phy_alloc *alloc; + struct kbase_context *kctx; + struct kbase_va_region *region; + int count; + int free_on_close; +}; + +enum kbase_memory_type { + KBASE_MEM_TYPE_NATIVE, + KBASE_MEM_TYPE_IMPORTED_UMM, + KBASE_MEM_TYPE_IMPORTED_USER_BUF, + KBASE_MEM_TYPE_ALIAS, + KBASE_MEM_TYPE_RAW +}; + +/* internal structure, mirroring base_mem_aliasing_info, + * but with alloc instead of a gpu va (handle) */ +struct kbase_aliased { + struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */ + u64 offset; /* in pages */ + u64 length; /* in pages */ +}; + +/** + * @brief Physical pages tracking object properties + */ +#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED (1u << 0) +#define KBASE_MEM_PHY_ALLOC_LARGE (1u << 1) + +/* struct kbase_mem_phy_alloc - Physical pages tracking object. + * + * Set up to track N pages. + * N not stored here, the creator holds that info. + * This object only tracks how many elements are actually valid (present). + * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc + * is not shared with another region or client. CPU mappings are OK to + * exist when changing, as long as the tracked mappings objects are + * updated as part of the change. + * + * @kref: number of users of this alloc + * @gpu_mappings: count number of times mapped on the GPU + * @nents: 0..N + * @pages: N elements, only 0..nents are valid + * @mappings: List of CPU mappings of this physical memory allocation. + * @evict_node: Node used to store this allocation on the eviction list + * @evicted: Physical backing size when the pages where evicted + * @reg: Back reference to the region structure which created this + * allocation, or NULL if it has been freed. + * @type: type of buffer + * @permanent_map: Kernel side mapping of the alloc, shall never be + * referred directly. kbase_phy_alloc_mapping_get() & + * kbase_phy_alloc_mapping_put() pair should be used + * around access to the kernel-side CPU mapping so that + * mapping doesn't disappear whilst it is being accessed. + * @properties: Bitmask of properties, e.g. KBASE_MEM_PHY_ALLOC_LARGE. + * @group_id: A memory group ID to be passed to a platform-specific + * memory group manager, if present. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @imported: member in union valid based on @a type + */ +struct kbase_mem_phy_alloc { + struct kref kref; + atomic_t gpu_mappings; + size_t nents; + struct tagged_addr *pages; + struct list_head mappings; + struct list_head evict_node; + size_t evicted; + struct kbase_va_region *reg; + enum kbase_memory_type type; + struct kbase_vmap_struct *permanent_map; + u8 properties; + u8 group_id; + + union { + struct { + struct kbase_context *kctx; + struct dma_buf *dma_buf; + struct dma_buf_attachment *dma_attachment; + unsigned int current_mapping_usage_count; + struct sg_table *sgt; + bool need_sync; + } umm; + struct { + u64 stride; + size_t nents; + struct kbase_aliased *aliased; + } alias; + struct { + struct kbase_context *kctx; + /* Number of pages in this structure, including *pages. + * Used for kernel memory tracking. + */ + size_t nr_struct_pages; + } native; + struct kbase_alloc_import_user_buf { + unsigned long address; + unsigned long size; + unsigned long nr_pages; + struct page **pages; + /* top bit (1<<31) of current_mapping_usage_count + * specifies that this import was pinned on import + * See PINNED_ON_IMPORT + */ + u32 current_mapping_usage_count; + struct mm_struct *mm; + dma_addr_t *dma_addrs; + } user_buf; + } imported; +}; + +/* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is + * used to signify that a buffer was pinned when it was imported. Since the + * reference count is limited by the number of atoms that can be submitted at + * once there should be no danger of overflowing into this bit. + * Stealing the top bit also has the benefit that + * current_mapping_usage_count != 0 if and only if the buffer is mapped. + */ +#define PINNED_ON_IMPORT (1<<31) + +/** + * enum kbase_jit_report_flags - Flags for just-in-time memory allocation + * pressure limit functions + * @KBASE_JIT_REPORT_ON_ALLOC_OR_FREE: Notifying about an update happening due + * to a just-in-time memory allocation or free + * + * Used to control flow within pressure limit related functions, or to provide + * extra debugging information + */ +enum kbase_jit_report_flags { + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0) +}; + +static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc) +{ + KBASE_DEBUG_ASSERT(alloc); + /* we only track mappings of NATIVE buffers */ + if (alloc->type == KBASE_MEM_TYPE_NATIVE) + atomic_inc(&alloc->gpu_mappings); +} + +static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc) +{ + KBASE_DEBUG_ASSERT(alloc); + /* we only track mappings of NATIVE buffers */ + if (alloc->type == KBASE_MEM_TYPE_NATIVE) + if (0 > atomic_dec_return(&alloc->gpu_mappings)) { + pr_err("Mismatched %s:\n", __func__); + dump_stack(); + } +} + +/** + * kbase_mem_is_imported - Indicate whether a memory type is imported + * + * @type: the memory type + * + * Return: true if the memory type is imported, false otherwise + */ +static inline bool kbase_mem_is_imported(enum kbase_memory_type type) +{ + return (type == KBASE_MEM_TYPE_IMPORTED_UMM) || + (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); +} + +void kbase_mem_kref_free(struct kref *kref); + +int kbase_mem_init(struct kbase_device *kbdev); +void kbase_mem_halt(struct kbase_device *kbdev); +void kbase_mem_term(struct kbase_device *kbdev); + +static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc) +{ + kref_get(&alloc->kref); + return alloc; +} + +static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc) +{ + kref_put(&alloc->kref, kbase_mem_kref_free); + return NULL; +} + +/** + * A GPU memory region, and attributes for CPU mappings. + * + * @rblink: Node in a red-black tree of memory regions within the same zone of + * the GPU's virtual address space. + * @link: Links to neighboring items in a list of growable memory regions + * that triggered incremental rendering by growing too much. + * @rbtree: Backlink to the red-black tree of memory regions. + * @start_pfn: The Page Frame Number in GPU virtual address space. + * @nr_pages: The size of the region in pages. + * @initial_commit: Initial commit, for aligning the start address and + * correctly growing KBASE_REG_TILER_ALIGN_TOP regions. + * @threshold_pages: If non-zero and the amount of memory committed to a region + * that can grow on page fault exceeds this number of pages + * then the driver switches to incremental rendering. + * @extent: Number of pages allocated on page fault. + * @cpu_alloc: The physical memory we mmap to the CPU when mapping this region. + * @gpu_alloc: The physical memory we mmap to the GPU when mapping this region. + * @jit_node: Links to neighboring regions in the just-in-time memory pool. + * @jit_usage_id: The last just-in-time memory usage ID for this region. + * @jit_bin_id: The just-in-time memory bin this region came from. + * @va_refcnt: Number of users of this region. Protected by reg_lock. + */ +struct kbase_va_region { + struct rb_node rblink; + struct list_head link; + struct rb_root *rbtree; + u64 start_pfn; + size_t nr_pages; + size_t initial_commit; + size_t threshold_pages; + +/* Free region */ +#define KBASE_REG_FREE (1ul << 0) +/* CPU write access */ +#define KBASE_REG_CPU_WR (1ul << 1) +/* GPU write access */ +#define KBASE_REG_GPU_WR (1ul << 2) +/* No eXecute flag */ +#define KBASE_REG_GPU_NX (1ul << 3) +/* Is CPU cached? */ +#define KBASE_REG_CPU_CACHED (1ul << 4) +/* Is GPU cached? + * Some components within the GPU might only be able to access memory that is + * GPU cacheable. Refer to the specific GPU implementation for more details. + */ +#define KBASE_REG_GPU_CACHED (1ul << 5) + +#define KBASE_REG_GROWABLE (1ul << 6) +/* Can grow on pf? */ +#define KBASE_REG_PF_GROW (1ul << 7) + +/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */ +#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8) + +/* inner shareable coherency */ +#define KBASE_REG_SHARE_IN (1ul << 9) +/* inner & outer shareable coherency */ +#define KBASE_REG_SHARE_BOTH (1ul << 10) + +/* Space for 4 different zones */ +#define KBASE_REG_ZONE_MASK (3ul << 11) +#define KBASE_REG_ZONE(x) (((x) & 3) << 11) + +/* GPU read access */ +#define KBASE_REG_GPU_RD (1ul<<13) +/* CPU read access */ +#define KBASE_REG_CPU_RD (1ul<<14) + +/* Index of chosen MEMATTR for this region (0..7) */ +#define KBASE_REG_MEMATTR_MASK (7ul << 16) +#define KBASE_REG_MEMATTR_INDEX(x) (((x) & 7) << 16) +#define KBASE_REG_MEMATTR_VALUE(x) (((x) & KBASE_REG_MEMATTR_MASK) >> 16) + +#define KBASE_REG_PROTECTED (1ul << 19) + +#define KBASE_REG_DONT_NEED (1ul << 20) + +/* Imported buffer is padded? */ +#define KBASE_REG_IMPORT_PAD (1ul << 21) + +/* Bit 22 is reserved. + * + * Do not remove, use the next unreserved bit for new flags + */ +#define KBASE_REG_RESERVED_BIT_22 (1ul << 22) + +/* The top of the initial commit is aligned to extent pages. + * Extent must be a power of 2 */ +#define KBASE_REG_TILER_ALIGN_TOP (1ul << 23) + +/* Whilst this flag is set the GPU allocation is not supposed to be freed by + * user space. The flag will remain set for the lifetime of JIT allocations. + */ +#define KBASE_REG_NO_USER_FREE (1ul << 24) + +/* Memory has permanent kernel side mapping */ +#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25) + +/* GPU VA region has been freed by the userspace, but still remains allocated + * due to the reference held by CPU mappings created on the GPU VA region. + * + * A region with this flag set has had kbase_gpu_munmap() called on it, but can + * still be looked-up in the region tracker as a non-free region. Hence must + * not create or update any more GPU mappings on such regions because they will + * not be unmapped when the region is finally destroyed. + * + * Since such regions are still present in the region tracker, new allocations + * attempted with BASE_MEM_SAME_VA might fail if their address intersects with + * a region with this flag set. + * + * In addition, this flag indicates the gpu_alloc member might no longer valid + * e.g. in infinite cache simulation. + */ +#define KBASE_REG_VA_FREED (1ul << 26) + +/* If set, the heap info address points to a u32 holding the used size in bytes; + * otherwise it points to a u64 holding the lowest address of unused memory. + */ +#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27) + +/* Allocation is actively used for JIT memory */ +#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28) + +#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) + +/* only used with 32-bit clients */ +/* + * On a 32bit platform, custom VA should be wired from 4GB + * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface + * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference). + * So we put the default limit to the maximum possible on Linux and shrink + * it down, if required by the GPU, during initialization. + */ + +#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(1) +#define KBASE_REG_ZONE_CUSTOM_VA_BASE (0x100000000ULL >> PAGE_SHIFT) +#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) +/* end 32-bit clients only */ + +/* The starting address and size of the GPU-executable zone are dynamic + * and depend on the platform and the number of pages requested by the + * user process, with an upper limit of 4 GB. + */ +#define KBASE_REG_ZONE_EXEC_VA KBASE_REG_ZONE(2) +#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */ + + + unsigned long flags; + size_t extent; + struct kbase_mem_phy_alloc *cpu_alloc; + struct kbase_mem_phy_alloc *gpu_alloc; + struct list_head jit_node; + u16 jit_usage_id; + u8 jit_bin_id; +#if MALI_JIT_PRESSURE_LIMIT_BASE + /* Pointer to an object in GPU memory defining an end of an allocated + * region + * + * The object can be one of: + * - u32 value defining the size of the region + * - u64 pointer first unused byte in the region + * + * The interpretation of the object depends on + * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE flag in jit_info_flags - if it is + * set, the heap info object should be interpreted as size. + */ + u64 heap_info_gpu_addr; + + /* The current estimate of the number of pages used, which in normal + * use is either: + * - the initial estimate == va_pages + * - the actual pages used, as found by a JIT usage report + * + * Note that since the value is calculated from GPU memory after a JIT + * usage report, at any point in time it is allowed to take a random + * value that is no greater than va_pages (e.g. it may be greater than + * gpu_alloc->nents) + */ + size_t used_pages; +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + int va_refcnt; +}; + +/* Special marker for failed JIT allocations that still must be marked as + * in-use + */ +#define KBASE_RESERVED_REG_JIT_ALLOC ((struct kbase_va_region *)-1) + +static inline bool kbase_is_region_free(struct kbase_va_region *reg) +{ + return (!reg || reg->flags & KBASE_REG_FREE); +} + +static inline bool kbase_is_region_invalid(struct kbase_va_region *reg) +{ + return (!reg || reg->flags & KBASE_REG_VA_FREED); +} + +static inline bool kbase_is_region_invalid_or_free(struct kbase_va_region *reg) +{ + /* Possibly not all functions that find regions would be using this + * helper, so they need to be checked when maintaining this function. + */ + return (kbase_is_region_invalid(reg) || kbase_is_region_free(reg)); +} + +int kbase_remove_va_region(struct kbase_va_region *reg); +static inline void kbase_region_refcnt_free(struct kbase_va_region *reg) +{ + /* If region was mapped then remove va region*/ + if (reg->start_pfn) + kbase_remove_va_region(reg); + + /* To detect use-after-free in debug builds */ + KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE); + kfree(reg); +} + +static inline struct kbase_va_region *kbase_va_region_alloc_get( + struct kbase_context *kctx, struct kbase_va_region *region) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(!region->va_refcnt); + + /* non-atomic as kctx->reg_lock is held */ + dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %p\n", + region->va_refcnt, (void *)region); + region->va_refcnt++; + + return region; +} + +static inline struct kbase_va_region *kbase_va_region_alloc_put( + struct kbase_context *kctx, struct kbase_va_region *region) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(region->va_refcnt <= 0); + WARN_ON(region->flags & KBASE_REG_FREE); + + /* non-atomic as kctx->reg_lock is held */ + region->va_refcnt--; + dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %p\n", + region->va_refcnt, (void *)region); + if (!region->va_refcnt) + kbase_region_refcnt_free(region); + + return NULL; +} + +/* Common functions */ +static inline struct tagged_addr *kbase_get_cpu_phy_pages( + struct kbase_va_region *reg) +{ + KBASE_DEBUG_ASSERT(reg); + KBASE_DEBUG_ASSERT(reg->cpu_alloc); + KBASE_DEBUG_ASSERT(reg->gpu_alloc); + KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); + + return reg->cpu_alloc->pages; +} + +static inline struct tagged_addr *kbase_get_gpu_phy_pages( + struct kbase_va_region *reg) +{ + KBASE_DEBUG_ASSERT(reg); + KBASE_DEBUG_ASSERT(reg->cpu_alloc); + KBASE_DEBUG_ASSERT(reg->gpu_alloc); + KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); + + return reg->gpu_alloc->pages; +} + +static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg) +{ + KBASE_DEBUG_ASSERT(reg); + /* if no alloc object the backed size naturally is 0 */ + if (!reg->cpu_alloc) + return 0; + + KBASE_DEBUG_ASSERT(reg->cpu_alloc); + KBASE_DEBUG_ASSERT(reg->gpu_alloc); + KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); + + return reg->cpu_alloc->nents; +} + +#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */ + +static inline struct kbase_mem_phy_alloc *kbase_alloc_create( + struct kbase_context *kctx, size_t nr_pages, + enum kbase_memory_type type, int group_id) +{ + struct kbase_mem_phy_alloc *alloc; + size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages; + size_t per_page_size = sizeof(*alloc->pages); + + /* Imported pages may have page private data already in use */ + if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { + alloc_size += nr_pages * + sizeof(*alloc->imported.user_buf.dma_addrs); + per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs); + } + + /* + * Prevent nr_pages*per_page_size + sizeof(*alloc) from + * wrapping around. + */ + if (nr_pages > ((((size_t) -1) - sizeof(*alloc)) + / per_page_size)) + return ERR_PTR(-ENOMEM); + + /* Allocate based on the size to reduce internal fragmentation of vmem */ + if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) + alloc = vzalloc(alloc_size); + else + alloc = kzalloc(alloc_size, GFP_KERNEL); + + if (!alloc) + return ERR_PTR(-ENOMEM); + + if (type == KBASE_MEM_TYPE_NATIVE) { + alloc->imported.native.nr_struct_pages = + (alloc_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + kbase_process_page_usage_inc(kctx, + alloc->imported.native.nr_struct_pages); + } + + /* Store allocation method */ + if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) + alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE; + + kref_init(&alloc->kref); + atomic_set(&alloc->gpu_mappings, 0); + alloc->nents = 0; + alloc->pages = (void *)(alloc + 1); + INIT_LIST_HEAD(&alloc->mappings); + alloc->type = type; + alloc->group_id = group_id; + + if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) + alloc->imported.user_buf.dma_addrs = + (void *) (alloc->pages + nr_pages); + + return alloc; +} + +static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, + struct kbase_context *kctx, int group_id) +{ + KBASE_DEBUG_ASSERT(reg); + KBASE_DEBUG_ASSERT(!reg->cpu_alloc); + KBASE_DEBUG_ASSERT(!reg->gpu_alloc); + KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE); + + reg->cpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, + KBASE_MEM_TYPE_NATIVE, group_id); + if (IS_ERR(reg->cpu_alloc)) + return PTR_ERR(reg->cpu_alloc); + else if (!reg->cpu_alloc) + return -ENOMEM; + + reg->cpu_alloc->imported.native.kctx = kctx; + if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE) + && (reg->flags & KBASE_REG_CPU_CACHED)) { + reg->gpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, + KBASE_MEM_TYPE_NATIVE, group_id); + if (IS_ERR_OR_NULL(reg->gpu_alloc)) { + kbase_mem_phy_alloc_put(reg->cpu_alloc); + return -ENOMEM; + } + reg->gpu_alloc->imported.native.kctx = kctx; + } else { + reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + } + + mutex_lock(&kctx->jit_evict_lock); + INIT_LIST_HEAD(®->cpu_alloc->evict_node); + INIT_LIST_HEAD(®->gpu_alloc->evict_node); + mutex_unlock(&kctx->jit_evict_lock); + + reg->flags &= ~KBASE_REG_FREE; + + return 0; +} + +/* + * Max size for kbdev memory pool (in pages) + */ +#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT) + +/* + * Max size for kctx memory pool (in pages) + */ +#define KBASE_MEM_POOL_MAX_SIZE_KCTX (SZ_64M >> PAGE_SHIFT) + +/* + * The order required for a 2MB page allocation (2^order * 4KB = 2MB) + */ +#define KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER 9 + +/* + * The order required for a 4KB page allocation + */ +#define KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER 0 + +/** + * kbase_mem_pool_config_set_max_size - Set maximum number of free pages in + * initial configuration of a memory pool + * + * @config: Initial configuration for a physical memory pool + * @max_size: Maximum number of free pages that a pool created from + * @config can hold + */ +static inline void kbase_mem_pool_config_set_max_size( + struct kbase_mem_pool_config *const config, size_t const max_size) +{ + WRITE_ONCE(config->max_size, max_size); +} + +/** + * kbase_mem_pool_config_get_max_size - Get maximum number of free pages from + * initial configuration of a memory pool + * + * @config: Initial configuration for a physical memory pool + * + * Return: Maximum number of free pages that a pool created from @config + * can hold + */ +static inline size_t kbase_mem_pool_config_get_max_size( + const struct kbase_mem_pool_config *const config) +{ + return READ_ONCE(config->max_size); +} + +/** + * kbase_mem_pool_init - Create a memory pool for a kbase device + * @pool: Memory pool to initialize + * @config: Initial configuration for the memory pool + * @order: Page order for physical page size (order=0=>4kB, order=9=>2MB) + * @group_id: A memory group ID to be passed to a platform-specific + * memory group manager, if present. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @kbdev: Kbase device where memory is used + * @next_pool: Pointer to the next pool or NULL. + * + * Allocations from @pool are in whole pages. Each @pool has a free list where + * pages can be quickly allocated from. The free list is initially empty and + * filled whenever pages are freed back to the pool. The number of free pages + * in the pool will in general not exceed @max_size, but the pool may in + * certain corner cases grow above @max_size. + * + * If @next_pool is not NULL, we will allocate from @next_pool before going to + * the memory group manager. Similarly pages can spill over to @next_pool when + * @pool is full. Pages are zeroed before they spill over to another pool, to + * prevent leaking information between applications. + * + * A shrinker is registered so that Linux mm can reclaim pages from the pool as + * needed. + * + * Return: 0 on success, negative -errno on error + */ +int kbase_mem_pool_init(struct kbase_mem_pool *pool, + const struct kbase_mem_pool_config *config, + unsigned int order, + int group_id, + struct kbase_device *kbdev, + struct kbase_mem_pool *next_pool); + +/** + * kbase_mem_pool_term - Destroy a memory pool + * @pool: Memory pool to destroy + * + * Pages in the pool will spill over to @next_pool (if available) or freed to + * the kernel. + */ +void kbase_mem_pool_term(struct kbase_mem_pool *pool); + +/** + * kbase_mem_pool_alloc - Allocate a page from memory pool + * @pool: Memory pool to allocate from + * + * Allocations from the pool are made as follows: + * 1. If there are free pages in the pool, allocate a page from @pool. + * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page + * from @next_pool. + * 3. Return NULL if no memory in the pool + * + * Return: Pointer to allocated page, or NULL if allocation failed. + * + * Note : This function should not be used if the pool lock is held. Use + * kbase_mem_pool_alloc_locked() instead. + */ +struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool); + +/** + * kbase_mem_pool_alloc_locked - Allocate a page from memory pool + * @pool: Memory pool to allocate from + * + * If there are free pages in the pool, this function allocates a page from + * @pool. This function does not use @next_pool. + * + * Return: Pointer to allocated page, or NULL if allocation failed. + * + * Note : Caller must hold the pool lock. + */ +struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool); + +/** + * kbase_mem_pool_free - Free a page to memory pool + * @pool: Memory pool where page should be freed + * @page: Page to free to the pool + * @dirty: Whether some of the page may be dirty in the cache. + * + * Pages are freed to the pool as follows: + * 1. If @pool is not full, add @page to @pool. + * 2. Otherwise, if @next_pool is not NULL and not full, add @page to + * @next_pool. + * 3. Finally, free @page to the kernel. + * + * Note : This function should not be used if the pool lock is held. Use + * kbase_mem_pool_free_locked() instead. + */ +void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page, + bool dirty); + +/** + * kbase_mem_pool_free_locked - Free a page to memory pool + * @pool: Memory pool where page should be freed + * @p: Page to free to the pool + * @dirty: Whether some of the page may be dirty in the cache. + * + * If @pool is not full, this function adds @page to @pool. Otherwise, @page is + * freed to the kernel. This function does not use @next_pool. + * + * Note : Caller must hold the pool lock. + */ +void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, + bool dirty); + +/** + * kbase_mem_pool_alloc_pages - Allocate pages from memory pool + * @pool: Memory pool to allocate from + * @nr_4k_pages: Number of pages to allocate + * @pages: Pointer to array where the physical address of the allocated + * pages will be stored. + * @partial_allowed: If fewer pages allocated is allowed + * + * Like kbase_mem_pool_alloc() but optimized for allocating many pages. + * + * Return: + * On success number of pages allocated (could be less than nr_pages if + * partial_allowed). + * On error an error code. + * + * Note : This function should not be used if the pool lock is held. Use + * kbase_mem_pool_alloc_pages_locked() instead. + * + * The caller must not hold vm_lock, as this could cause a deadlock if + * the kernel OoM killer runs. If the caller must allocate pages while holding + * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. + */ +int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, + struct tagged_addr *pages, bool partial_allowed); + +/** + * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool + * @pool: Memory pool to allocate from + * @nr_4k_pages: Number of pages to allocate + * @pages: Pointer to array where the physical address of the allocated + * pages will be stored. + * + * Like kbase_mem_pool_alloc() but optimized for allocating many pages. This + * version does not allocate new pages from the kernel, and therefore will never + * trigger the OoM killer. Therefore, it can be run while the vm_lock is held. + * + * As new pages can not be allocated, the caller must ensure there are + * sufficient pages in the pool. Usage of this function should look like : + * + * kbase_gpu_vm_lock(kctx); + * kbase_mem_pool_lock(pool) + * while (kbase_mem_pool_size(pool) < pages_required) { + * kbase_mem_pool_unlock(pool) + * kbase_gpu_vm_unlock(kctx); + * kbase_mem_pool_grow(pool) + * kbase_gpu_vm_lock(kctx); + * kbase_mem_pool_lock(pool) + * } + * kbase_mem_pool_alloc_pages_locked(pool) + * kbase_mem_pool_unlock(pool) + * Perform other processing that requires vm_lock... + * kbase_gpu_vm_unlock(kctx); + * + * This ensures that the pool can be grown to the required size and that the + * allocation can complete without another thread using the newly grown pages. + * + * Return: + * On success number of pages allocated. + * On error an error code. + * + * Note : Caller must hold the pool lock. + */ +int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, + size_t nr_4k_pages, struct tagged_addr *pages); + +/** + * kbase_mem_pool_free_pages - Free pages to memory pool + * @pool: Memory pool where pages should be freed + * @nr_pages: Number of pages to free + * @pages: Pointer to array holding the physical addresses of the pages to + * free. + * @dirty: Whether any pages may be dirty in the cache. + * @reclaimed: Whether the pages where reclaimable and thus should bypass + * the pool and go straight to the kernel. + * + * Like kbase_mem_pool_free() but optimized for freeing many pages. + */ +void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, + struct tagged_addr *pages, bool dirty, bool reclaimed); + +/** + * kbase_mem_pool_free_pages_locked - Free pages to memory pool + * @pool: Memory pool where pages should be freed + * @nr_pages: Number of pages to free + * @pages: Pointer to array holding the physical addresses of the pages to + * free. + * @dirty: Whether any pages may be dirty in the cache. + * @reclaimed: Whether the pages where reclaimable and thus should bypass + * the pool and go straight to the kernel. + * + * Like kbase_mem_pool_free() but optimized for freeing many pages. + */ +void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, + size_t nr_pages, struct tagged_addr *pages, bool dirty, + bool reclaimed); + +/** + * kbase_mem_pool_size - Get number of free pages in memory pool + * @pool: Memory pool to inspect + * + * Note: the size of the pool may in certain corner cases exceed @max_size! + * + * Return: Number of free pages in the pool + */ +static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool) +{ + return READ_ONCE(pool->cur_size); +} + +/** + * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool + * @pool: Memory pool to inspect + * + * Return: Maximum number of free pages in the pool + */ +static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool) +{ + return pool->max_size; +} + + +/** + * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool + * @pool: Memory pool to inspect + * @max_size: Maximum number of free pages the pool can hold + * + * If @max_size is reduced, the pool will be shrunk to adhere to the new limit. + * For details see kbase_mem_pool_shrink(). + */ +void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size); + +/** + * kbase_mem_pool_grow - Grow the pool + * @pool: Memory pool to grow + * @nr_to_grow: Number of pages to add to the pool + * + * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to + * become larger than the maximum size specified. + * + * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages + */ +int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow); + +/** + * kbase_mem_pool_trim - Grow or shrink the pool to a new size + * @pool: Memory pool to trim + * @new_size: New number of pages in the pool + * + * If @new_size > @cur_size, fill the pool with new pages from the kernel, but + * not above the max_size for the pool. + * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel. + */ +void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); + +/** + * kbase_mem_pool_mark_dying - Mark that this pool is dying + * @pool: Memory pool + * + * This will cause any ongoing allocation operations (eg growing on page fault) + * to be terminated. + */ +void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool); + +/** + * kbase_mem_alloc_page - Allocate a new page for a device + * @pool: Memory pool to allocate a page from + * + * Most uses should use kbase_mem_pool_alloc to allocate a page. However that + * function can fail in the event the pool is empty. + * + * Return: A new page or NULL if no memory + */ +struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool); + +/** + * kbase_region_tracker_init - Initialize the region tracker data structure + * @kctx: kbase context + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_region_tracker_init(struct kbase_context *kctx); + +/** + * kbase_region_tracker_init_jit - Initialize the just-in-time memory + * allocation region + * @kctx: Kbase context. + * @jit_va_pages: Size of the JIT region in pages. + * @max_allocations: Maximum number of allocations allowed for the JIT region. + * Valid range is 0..%BASE_JIT_ALLOC_COUNT. + * @trim_level: Trim level for the JIT region. + * Valid range is 0..%BASE_JIT_MAX_TRIM_LEVEL. + * @group_id: The physical group ID from which to allocate JIT memory. + * Valid range is 0..(%MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @phys_pages_limit: Maximum number of physical pages to use to back the JIT + * region. Must not exceed @jit_va_pages. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, + int max_allocations, int trim_level, int group_id, + u64 phys_pages_limit); + +/** + * kbase_region_tracker_init_exec - Initialize the GPU-executable memory region + * @kctx: kbase context + * @exec_va_pages: Size of the JIT region in pages. + * It must not be greater than 4 GB. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages); + +/** + * kbase_region_tracker_term - Terminate the JIT region + * @kctx: kbase context + */ +void kbase_region_tracker_term(struct kbase_context *kctx); + +/** + * kbase_region_tracker_term_rbtree - Free memory for a region tracker + * + * This will free all the regions within the region tracker + * + * @rbtree: Region tracker tree root + */ +void kbase_region_tracker_term_rbtree(struct rb_root *rbtree); + +struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( + struct kbase_context *kctx, u64 gpu_addr); +struct kbase_va_region *kbase_find_region_enclosing_address( + struct rb_root *rbtree, u64 gpu_addr); + +/** + * @brief Check that a pointer is actually a valid region. + * + * Must be called with context lock held. + */ +struct kbase_va_region *kbase_region_tracker_find_region_base_address( + struct kbase_context *kctx, u64 gpu_addr); +struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree, + u64 gpu_addr); + +struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, + u64 start_pfn, size_t nr_pages, int zone); +void kbase_free_alloced_region(struct kbase_va_region *reg); +int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 addr, size_t nr_pages, size_t align); +int kbase_add_va_region_rbtree(struct kbase_device *kbdev, + struct kbase_va_region *reg, u64 addr, size_t nr_pages, + size_t align); + +bool kbase_check_alloc_flags(unsigned long flags); +bool kbase_check_import_flags(unsigned long flags); + +/** + * kbase_check_alloc_sizes - check user space sizes parameters for an + * allocation + * + * @kctx: kbase context + * @flags: The flags passed from user space + * @va_pages: The size of the requested region, in pages. + * @commit_pages: Number of pages to commit initially. + * @extent: Number of pages to grow by on GPU page fault and/or alignment + * (depending on flags) + * + * Makes checks on the size parameters passed in from user space for a memory + * allocation call, with respect to the flags requested. + * + * Return: 0 if sizes are valid for these flags, negative error code otherwise + */ +int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, + u64 va_pages, u64 commit_pages, u64 extent); + +/** + * kbase_update_region_flags - Convert user space flags to kernel region flags + * + * @kctx: kbase context + * @reg: The region to update the flags on + * @flags: The flags passed from user space + * + * The user space flag BASE_MEM_COHERENT_SYSTEM_REQUIRED will be rejected and + * this function will fail if the system does not support system coherency. + * + * Return: 0 if successful, -EINVAL if the flags are not supported + */ +int kbase_update_region_flags(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned long flags); + +void kbase_gpu_vm_lock(struct kbase_context *kctx); +void kbase_gpu_vm_unlock(struct kbase_context *kctx); + +int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); + +/** + * @brief Register region and map it on the GPU. + * + * Call kbase_add_va_region() and map the region on the GPU. + */ +int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); + +/** + * @brief Remove the region from the GPU and unregister it. + * + * Must be called with context lock held. + */ +int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg); + +/** + * kbase_mmu_update - Configure an address space on the GPU to the specified + * MMU tables + * + * The caller has the following locking conditions: + * - It must hold kbase_device->mmu_hw_mutex + * - It must hold the hwaccess_lock + * + * @kbdev: Kbase device structure + * @mmut: The set of MMU tables to be configured on the address space + * @as_nr: The address space to be configured + */ +void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + int as_nr); + +/** + * kbase_mmu_disable() - Disable the MMU for a previously active kbase context. + * @kctx: Kbase context + * + * Disable and perform the required cache maintenance to remove the all + * data from provided kbase context from the GPU caches. + * + * The caller has the following locking conditions: + * - It must hold kbase_device->mmu_hw_mutex + * - It must hold the hwaccess_lock + */ +void kbase_mmu_disable(struct kbase_context *kctx); + +/** + * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified + * address space. + * @kbdev: Kbase device + * @as_nr: The address space number to set to unmapped. + * + * This function must only be called during reset/power-up and it used to + * ensure the registers are in a known state. + * + * The caller must hold kbdev->mmu_hw_mutex. + */ +void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); + +void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); + +/** Dump the MMU tables to a buffer + * + * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the + * buffer is too small then the return value will be NULL. + * + * The GPU vm lock must be held when calling this function. + * + * The buffer returned should be freed with @ref vfree when it is no longer required. + * + * @param[in] kctx The kbase context to dump + * @param[in] nr_pages The number of pages to allocate for the buffer. + * + * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too + * small) + */ +void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages); + +/** + * kbase_sync_now - Perform cache maintenance on a memory region + * + * @kctx: The kbase context of the region + * @sset: A syncset structure describing the region and direction of the + * synchronisation required + * + * Return: 0 on success or error code + */ +int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset); +void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr cpu_pa, + struct tagged_addr gpu_pa, off_t offset, size_t size, + enum kbase_sync_type sync_fn); + +/* OS specific functions */ +int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr); +int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg); +void kbase_os_mem_map_lock(struct kbase_context *kctx); +void kbase_os_mem_map_unlock(struct kbase_context *kctx); + +/** + * @brief Update the memory allocation counters for the current process + * + * OS specific call to updates the current memory allocation counters for the current process with + * the supplied delta. + * + * @param[in] kctx The kbase context + * @param[in] pages The desired delta to apply to the memory usage counters. + */ + +void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages); + +/** + * @brief Add to the memory allocation counters for the current process + * + * OS specific call to add to the current memory allocation counters for the current process by + * the supplied amount. + * + * @param[in] kctx The kernel base context used for the allocation. + * @param[in] pages The desired delta to apply to the memory usage counters. + */ + +static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages) +{ + kbasep_os_process_page_usage_update(kctx, pages); +} + +/** + * @brief Subtract from the memory allocation counters for the current process + * + * OS specific call to subtract from the current memory allocation counters for the current process by + * the supplied amount. + * + * @param[in] kctx The kernel base context used for the allocation. + * @param[in] pages The desired delta to apply to the memory usage counters. + */ + +static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages) +{ + kbasep_os_process_page_usage_update(kctx, 0 - pages); +} + +/** + * kbasep_find_enclosing_cpu_mapping_offset() - Find the offset of the CPU + * mapping of a memory allocation containing a given address range + * + * Searches for a CPU mapping of any part of any region that fully encloses the + * CPU virtual address range specified by @uaddr and @size. Returns a failure + * indication if only part of the address range lies within a CPU mapping. + * + * @kctx: The kernel base context used for the allocation. + * @uaddr: Start of the CPU virtual address range. + * @size: Size of the CPU virtual address range (in bytes). + * @offset: The offset from the start of the allocation to the specified CPU + * virtual address. + * + * Return: 0 if offset was obtained successfully. Error code otherwise. + */ +int kbasep_find_enclosing_cpu_mapping_offset( + struct kbase_context *kctx, + unsigned long uaddr, size_t size, u64 *offset); + +/** + * kbasep_find_enclosing_gpu_mapping_start_and_offset() - Find the address of + * the start of GPU virtual memory region which encloses @gpu_addr for the + * @size length in bytes + * + * Searches for the memory region in GPU virtual memory space which contains + * the region defined by the @gpu_addr and @size, where @gpu_addr is the + * beginning and @size the length in bytes of the provided region. If found, + * the location of the start address of the GPU virtual memory region is + * passed in @start pointer and the location of the offset of the region into + * the GPU virtual memory region is passed in @offset pointer. + * + * @kctx: The kernel base context within which the memory is searched. + * @gpu_addr: GPU virtual address for which the region is sought; defines + * the beginning of the provided region. + * @size: The length (in bytes) of the provided region for which the + * GPU virtual memory region is sought. + * @start: Pointer to the location where the address of the start of + * the found GPU virtual memory region is. + * @offset: Pointer to the location where the offset of @gpu_addr into + * the found GPU virtual memory region is. + */ +int kbasep_find_enclosing_gpu_mapping_start_and_offset( + struct kbase_context *kctx, + u64 gpu_addr, size_t size, u64 *start, u64 *offset); + +/** + * kbase_alloc_phy_pages_helper - Allocates physical pages. + * @alloc: allocation object to add pages to + * @nr_pages_requested: number of physical pages to allocate + * + * Allocates \a nr_pages_requested and updates the alloc object. + * + * Return: 0 if all pages have been successfully allocated. Error code otherwise + * + * Note : The caller must not hold vm_lock, as this could cause a deadlock if + * the kernel OoM killer runs. If the caller must allocate pages while holding + * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. + * + * This function cannot be used from interrupt context + */ +int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, + size_t nr_pages_requested); + +/** + * kbase_alloc_phy_pages_helper_locked - Allocates physical pages. + * @alloc: allocation object to add pages to + * @pool: Memory pool to allocate from + * @nr_pages_requested: number of physical pages to allocate + * @prealloc_sa: Information about the partial allocation if the amount + * of memory requested is not a multiple of 2MB. One + * instance of struct kbase_sub_alloc must be allocated by + * the caller iff CONFIG_MALI_2MB_ALLOC is enabled. + * + * Allocates \a nr_pages_requested and updates the alloc object. This function + * does not allocate new pages from the kernel, and therefore will never trigger + * the OoM killer. Therefore, it can be run while the vm_lock is held. + * + * As new pages can not be allocated, the caller must ensure there are + * sufficient pages in the pool. Usage of this function should look like : + * + * kbase_gpu_vm_lock(kctx); + * kbase_mem_pool_lock(pool) + * while (kbase_mem_pool_size(pool) < pages_required) { + * kbase_mem_pool_unlock(pool) + * kbase_gpu_vm_unlock(kctx); + * kbase_mem_pool_grow(pool) + * kbase_gpu_vm_lock(kctx); + * kbase_mem_pool_lock(pool) + * } + * kbase_alloc_phy_pages_helper_locked(pool) + * kbase_mem_pool_unlock(pool) + * Perform other processing that requires vm_lock... + * kbase_gpu_vm_unlock(kctx); + * + * This ensures that the pool can be grown to the required size and that the + * allocation can complete without another thread using the newly grown pages. + * + * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then + * @pool must be alloc->imported.native.kctx->lp_mem_pool. Otherwise it must be + * alloc->imported.native.kctx->mem_pool. + * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be + * pre-allocated because we must not sleep (due to the usage of kmalloc()) + * whilst holding pool->pool_lock. + * @prealloc_sa shall be set to NULL if it has been consumed by this function + * to indicate that the caller must not free it. + * + * Return: Pointer to array of allocated pages. NULL on failure. + * + * Note : Caller must hold pool->pool_lock + */ +struct tagged_addr *kbase_alloc_phy_pages_helper_locked( + struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, + size_t nr_pages_requested, + struct kbase_sub_alloc **prealloc_sa); + +/** +* @brief Free physical pages. +* +* Frees \a nr_pages and updates the alloc object. +* +* @param[in] alloc allocation object to free pages from +* @param[in] nr_pages_to_free number of physical pages to free +* +* Return: 0 on success, otherwise a negative error code +*/ +int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free); + +/** + * kbase_free_phy_pages_helper_locked - Free pages allocated with + * kbase_alloc_phy_pages_helper_locked() + * @alloc: Allocation object to free pages from + * @pool: Memory pool to return freed pages to + * @pages: Pages allocated by kbase_alloc_phy_pages_helper_locked() + * @nr_pages_to_free: Number of physical pages to free + * + * This function atomically frees pages allocated with + * kbase_alloc_phy_pages_helper_locked(). @pages is the pointer to the page + * array that is returned by that function. @pool must be the pool that the + * pages were originally allocated from. + * + * If the mem_pool has been unlocked since the allocation then + * kbase_free_phy_pages_helper() should be used instead. + */ +void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, + struct kbase_mem_pool *pool, struct tagged_addr *pages, + size_t nr_pages_to_free); + +static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr) +{ + SetPagePrivate(p); + if (sizeof(dma_addr_t) > sizeof(p->private)) { + /* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the + * private field stays the same. So we have to be clever and + * use the fact that we only store DMA addresses of whole pages, + * so the low bits should be zero */ + KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1))); + set_page_private(p, dma_addr >> PAGE_SHIFT); + } else { + set_page_private(p, dma_addr); + } +} + +static inline dma_addr_t kbase_dma_addr(struct page *p) +{ + if (sizeof(dma_addr_t) > sizeof(p->private)) + return ((dma_addr_t)page_private(p)) << PAGE_SHIFT; + + return (dma_addr_t)page_private(p); +} + +static inline void kbase_clear_dma_addr(struct page *p) +{ + ClearPagePrivate(p); +} + +/** + * @brief Process a page fault. + * + * @param[in] data work_struct passed by queue_work() + */ +void page_fault_worker(struct work_struct *data); + +/** + * @brief Process a bus fault. + * + * @param[in] data work_struct passed by queue_work() + */ +void bus_fault_worker(struct work_struct *data); + +/** + * @brief Flush MMU workqueues. + * + * This function will cause any outstanding page or bus faults to be processed. + * It should be called prior to powering off the GPU. + * + * @param[in] kbdev Device pointer + */ +void kbase_flush_mmu_wqs(struct kbase_device *kbdev); + +/** + * kbase_sync_single_for_device - update physical memory and give GPU ownership + * @kbdev: Device pointer + * @handle: DMA address of region + * @size: Size of region to sync + * @dir: DMA data direction + */ + +void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir); + +/** + * kbase_sync_single_for_cpu - update physical memory and give CPU ownership + * @kbdev: Device pointer + * @handle: DMA address of region + * @size: Size of region to sync + * @dir: DMA data direction + */ + +void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir); + +#ifdef CONFIG_DEBUG_FS +/** + * kbase_jit_debugfs_init - Add per context debugfs entry for JIT. + * @kctx: kbase context + */ +void kbase_jit_debugfs_init(struct kbase_context *kctx); +#endif /* CONFIG_DEBUG_FS */ + +/** + * kbase_jit_init - Initialize the JIT memory pool management + * @kctx: kbase context + * + * Returns zero on success or negative error number on failure. + */ +int kbase_jit_init(struct kbase_context *kctx); + +/** + * kbase_jit_allocate - Allocate JIT memory + * @kctx: kbase context + * @info: JIT allocation information + * @ignore_pressure_limit: Whether the JIT memory pressure limit is ignored + * + * Return: JIT allocation on success or NULL on failure. + */ +struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + const struct base_jit_alloc_info *info, + bool ignore_pressure_limit); + +/** + * kbase_jit_free - Free a JIT allocation + * @kctx: kbase context + * @reg: JIT allocation + * + * Frees a JIT allocation and places it into the free pool for later reuse. + */ +void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg); + +/** + * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing + * @reg: JIT allocation + */ +void kbase_jit_backing_lost(struct kbase_va_region *reg); + +/** + * kbase_jit_evict - Evict a JIT allocation from the pool + * @kctx: kbase context + * + * Evict the least recently used JIT allocation from the pool. This can be + * required if normal VA allocations are failing due to VA exhaustion. + * + * Return: True if a JIT allocation was freed, false otherwise. + */ +bool kbase_jit_evict(struct kbase_context *kctx); + +/** + * kbase_jit_term - Terminate the JIT memory pool management + * @kctx: kbase context + */ +void kbase_jit_term(struct kbase_context *kctx); + +#if MALI_JIT_PRESSURE_LIMIT_BASE +/** + * kbase_trace_jit_report_gpu_mem_trace_enabled - variant of + * kbase_trace_jit_report_gpu_mem() that should only be called once the + * corresponding tracepoint is verified to be enabled + * @kctx: kbase context + * @reg: Just-in-time memory region to trace + * @flags: combination of values from enum kbase_jit_report_flags + */ +void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned int flags); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +/** + * kbase_trace_jit_report_gpu_mem - Trace information about the GPU memory used + * to make a JIT report + * @kctx: kbase context + * @reg: Just-in-time memory region to trace + * @flags: combination of values from enum kbase_jit_report_flags + * + * Information is traced using the trace_mali_jit_report_gpu_mem() tracepoint. + * + * In case that tracepoint is not enabled, this function should have the same + * low overheads as a tracepoint itself (i.e. use of 'jump labels' to avoid + * conditional branches) + * + * This can take the reg_lock on @kctx, do not use in places where this lock is + * already held. + * + * Note: this has to be a macro because at this stage the tracepoints have not + * been included. Also gives no opportunity for the compiler to mess up + * inlining it. + */ +#if MALI_JIT_PRESSURE_LIMIT_BASE +#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ + do { \ + if (trace_mali_jit_report_gpu_mem_enabled()) \ + kbase_trace_jit_report_gpu_mem_trace_enabled( \ + (kctx), (reg), (flags)); \ + } while (0) +#else +#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ + CSTD_NOP(kctx, reg, flags) +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +#if MALI_JIT_PRESSURE_LIMIT_BASE +/** + * kbase_jit_report_update_pressure - safely update the JIT physical page + * pressure and JIT region's estimate of used_pages + * @kctx: kbase context, to update the current physical pressure + * @reg: Just-in-time memory region to update with @new_used_pages + * @new_used_pages: new value of number of pages used in the JIT region + * @flags: combination of values from enum kbase_jit_report_flags + * + * Takes care of: + * - correctly updating the pressure given the current reg->used_pages and + * new_used_pages + * - then updating the %kbase_va_region used_pages member + * + * Precondition: + * - new_used_pages <= reg->nr_pages + */ +void kbase_jit_report_update_pressure(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 new_used_pages, + unsigned int flags); + +/** + * jit_trim_necessary_pages() - calculate and trim the least pages possible to + * satisfy a new JIT allocation + * + * @kctx: Pointer to the kbase context + * @needed_pages: Number of JIT physical pages by which trimming is requested. + * The actual number of pages trimmed could differ. + * + * Before allocating a new just-in-time memory region or reusing a previous + * one, ensure that the total JIT physical page usage also will not exceed the + * pressure limit. + * + * If there are no reported-on allocations, then we already guarantee this will + * be the case - because our current pressure then only comes from the va_pages + * of each JIT region, hence JIT physical page usage is guaranteed to be + * bounded by this. + * + * However as soon as JIT allocations become "reported on", the pressure is + * lowered to allow new JIT regions to be allocated. It is after such a point + * that the total JIT physical page usage could (either now or in the future on + * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly + * allocated JIT regions. Hence, trim any "reported on" regions. + * + * Any pages freed will go into the pool and be allocated from there in + * kbase_mem_alloc(). + */ +void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, + size_t needed_pages); + +/* + * Same as kbase_jit_request_phys_increase(), except that Caller is supposed + * to take jit_evict_lock also on @kctx before calling this function. + */ +static inline void +kbase_jit_request_phys_increase_locked(struct kbase_context *kctx, + size_t needed_pages) +{ + lockdep_assert_held(&kctx->jctx.lock); + lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->jit_evict_lock); + + kctx->jit_phys_pages_to_be_allocated += needed_pages; + + kbase_jit_trim_necessary_pages(kctx, + kctx->jit_phys_pages_to_be_allocated); +} + +/** + * kbase_jit_request_phys_increase() - Increment the backing pages count and do + * the required trimming before allocating pages for a JIT allocation. + * + * @kctx: Pointer to the kbase context + * @needed_pages: Number of pages to be allocated for the JIT allocation. + * + * This function needs to be called before allocating backing pages for a + * just-in-time memory region. The backing pages are currently allocated when, + * + * - A new JIT region is created. + * - An old JIT region is reused from the cached pool. + * - GPU page fault occurs for the active JIT region. + * - Backing is grown for the JIT region through the commit ioctl. + * + * This function would ensure that the total JIT physical page usage does not + * exceed the pressure limit even when the backing pages get allocated + * simultaneously for multiple JIT allocations from different threads. + * + * There should be a matching call to kbase_jit_done_phys_increase(), after + * the pages have been allocated and accounted against the active JIT + * allocation. + * + * Caller is supposed to take reg_lock on @kctx before calling this function. + */ +static inline void kbase_jit_request_phys_increase(struct kbase_context *kctx, + size_t needed_pages) +{ + lockdep_assert_held(&kctx->jctx.lock); + lockdep_assert_held(&kctx->reg_lock); + + mutex_lock(&kctx->jit_evict_lock); + kbase_jit_request_phys_increase_locked(kctx, needed_pages); + mutex_unlock(&kctx->jit_evict_lock); +} + +/** + * kbase_jit_done_phys_increase() - Decrement the backing pages count after the + * allocation of pages for a JIT allocation. + * + * @kctx: Pointer to the kbase context + * @needed_pages: Number of pages that were allocated for the JIT allocation. + * + * This function should be called after backing pages have been allocated and + * accounted against the active JIT allocation. + * The call should be made when the following have been satisfied: + * when the allocation is on the jit_active_head. + * when additional needed_pages have been allocated. + * kctx->reg_lock was held during the above and has not yet been unlocked. + * Failure to call this function before unlocking the kctx->reg_lock when + * either the above have changed may result in over-accounting the memory. + * This ensures kbase_jit_trim_necessary_pages() gets a consistent count of + * the memory. + * + * A matching call to kbase_jit_request_phys_increase() should have been made, + * before the allocation of backing pages. + * + * Caller is supposed to take reg_lock on @kctx before calling this function. + */ +static inline void kbase_jit_done_phys_increase(struct kbase_context *kctx, + size_t needed_pages) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(kctx->jit_phys_pages_to_be_allocated < needed_pages); + + kctx->jit_phys_pages_to_be_allocated -= needed_pages; +} +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +/** + * kbase_has_exec_va_zone - EXEC_VA zone predicate + * + * Determine whether an EXEC_VA zone has been created for the GPU address space + * of the given kbase context. + * + * @kctx: kbase context + * + * Return: True if the kbase context has an EXEC_VA zone. + */ +bool kbase_has_exec_va_zone(struct kbase_context *kctx); + +/** + * kbase_map_external_resource - Map an external resource to the GPU. + * @kctx: kbase context. + * @reg: The region to map. + * @locked_mm: The mm_struct which has been locked for this operation. + * + * Return: The physical allocation which backs the region on success or NULL + * on failure. + */ +struct kbase_mem_phy_alloc *kbase_map_external_resource( + struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm); + +/** + * kbase_unmap_external_resource - Unmap an external resource from the GPU. + * @kctx: kbase context. + * @reg: The region to unmap or NULL if it has already been released. + * @alloc: The physical allocation being unmapped. + */ +void kbase_unmap_external_resource(struct kbase_context *kctx, + struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); + + +/** + * kbase_jd_user_buf_pin_pages - Pin the pages of a user buffer. + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * To successfully pin the pages for a user buffer the current mm_struct must + * be the same as the mm_struct of the user buffer. After successfully pinning + * the pages further calls to this function succeed without doing work. + * + * Return: zero on success or negative number on failure. + */ +int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, + struct kbase_va_region *reg); + +/** + * kbase_sticky_resource_init - Initialize sticky resource management. + * @kctx: kbase context + * + * Returns zero on success or negative error number on failure. + */ +int kbase_sticky_resource_init(struct kbase_context *kctx); + +/** + * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource. + * @kctx: kbase context. + * @gpu_addr: The GPU address of the external resource. + * + * Return: The metadata object which represents the binding between the + * external resource and the kbase context on success or NULL on failure. + */ +struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( + struct kbase_context *kctx, u64 gpu_addr); + +/** + * kbase_sticky_resource_release - Release a reference on a sticky resource. + * @kctx: kbase context. + * @meta: Binding metadata. + * @gpu_addr: GPU address of the external resource. + * + * If meta is NULL then gpu_addr will be used to scan the metadata list and + * find the matching metadata (if any), otherwise the provided meta will be + * used and gpu_addr will be ignored. + * + * Return: True if the release found the metadata and the reference was dropped. + */ +bool kbase_sticky_resource_release(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); + +/** + * kbase_sticky_resource_release_force - Release a sticky resource. + * @kctx: kbase context. + * @meta: Binding metadata. + * @gpu_addr: GPU address of the external resource. + * + * If meta is NULL then gpu_addr will be used to scan the metadata list and + * find the matching metadata (if any), otherwise the provided meta will be + * used and gpu_addr will be ignored. + * + * Return: True if the release found the metadata and the resource was + * released. + */ +bool kbase_sticky_resource_release_force(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); + +/** + * kbase_sticky_resource_term - Terminate sticky resource management. + * @kctx: kbase context + */ +void kbase_sticky_resource_term(struct kbase_context *kctx); + +/** + * kbase_mem_pool_lock - Lock a memory pool + * @pool: Memory pool to lock + */ +static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool) +{ + spin_lock(&pool->pool_lock); +} + +/** + * kbase_mem_pool_lock - Release a memory pool + * @pool: Memory pool to lock + */ +static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool) +{ + spin_unlock(&pool->pool_lock); +} + +/** + * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. + * @alloc: The physical allocation + */ +void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc); + + +/** + * kbase_mem_umm_map - Map dma-buf + * @kctx: Pointer to the kbase context + * @reg: Pointer to the region of the imported dma-buf to map + * + * Map a dma-buf on the GPU. The mappings are reference counted. + * + * Returns 0 on success, or a negative error code. + */ +int kbase_mem_umm_map(struct kbase_context *kctx, + struct kbase_va_region *reg); + +/** + * kbase_mem_umm_unmap - Unmap dma-buf + * @kctx: Pointer to the kbase context + * @reg: Pointer to the region of the imported dma-buf to unmap + * @alloc: Pointer to the alloc to release + * + * Unmap a dma-buf from the GPU. The mappings are reference counted. + * + * @reg must be the original region with GPU mapping of @alloc; or NULL. If + * @reg is NULL, or doesn't match @alloc, the GPU page table entries matching + * @reg will not be updated. + * + * @alloc must be a valid physical allocation of type + * KBASE_MEM_TYPE_IMPORTED_UMM that was previously mapped by + * kbase_mem_umm_map(). The dma-buf attachment referenced by @alloc will + * release it's mapping reference, and if the refcount reaches 0, also be be + * unmapped, regardless of the value of @reg. + */ +void kbase_mem_umm_unmap(struct kbase_context *kctx, + struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); + +/** + * kbase_mem_do_sync_imported - Sync caches for imported memory + * @kctx: Pointer to the kbase context + * @reg: Pointer to the region with imported memory to sync + * @sync_fn: The type of sync operation to perform + * + * Sync CPU caches for supported (currently only dma-buf (UMM)) memory. + * Attempting to sync unsupported imported memory types will result in an error + * code, -EINVAL. + * + * Return: 0 on success, or a negative error code. + */ +int kbase_mem_do_sync_imported(struct kbase_context *kctx, + struct kbase_va_region *reg, enum kbase_sync_type sync_fn); + +/** + * kbase_mem_copy_to_pinned_user_pages - Memcpy from source input page to + * an unaligned address at a given offset from the start of a target page. + * + * @dest_pages: Pointer to the array of pages to which the content is + * to be copied from the provided @src_page. + * @src_page: Pointer to the page which correspond to the source page + * from which the copying will take place. + * @to_copy: Total number of bytes pending to be copied from + * @src_page to @target_page_nr within @dest_pages. + * This will get decremented by number of bytes we + * managed to copy from source page to target pages. + * @nr_pages: Total number of pages present in @dest_pages. + * @target_page_nr: Target page number to which @src_page needs to be + * copied. This will get incremented by one if + * we are successful in copying from source page. + * @offset: Offset in bytes into the target pages from which the + * copying is to be performed. + * + * Return: 0 on success, or a negative error code. + */ +int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, + void *src_page, size_t *to_copy, unsigned int nr_pages, + unsigned int *target_page_nr, size_t offset); + +#endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c new file mode 100644 index 000000000000..f712794487d9 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c @@ -0,0 +1,3072 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_mem_linux.c + * Base kernel memory APIs, Linux implementation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) +#include +#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \ + (KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE)) +/* Enable workaround for ion for kernels prior to v5.0.0 and from v5.3.0 + * onwards. + * + * For kernels prior to v4.12, workaround is needed as ion lacks the cache + * maintenance in begin_cpu_access and end_cpu_access methods. + * + * For kernels prior to v4.17.2, workaround is needed to avoid the potentially + * disruptive warnings which can come if begin_cpu_access and end_cpu_access + * methods are not called in pairs. + * Note that some long term maintenance kernel versions (e.g. 4.9.x, 4.14.x) + * only require this workaround on their earlier releases. However it is still + * safe to use it on such releases, and it simplifies the version check. + * + * For kernels later than v4.17.2, workaround is needed as ion can potentially + * end up calling dma_sync_sg_for_* for a dma-buf importer that hasn't mapped + * the attachment. This would result in a kernel panic as ion populates the + * dma_address when the attachment is mapped and kernel derives the physical + * address for cache maintenance from the dma_address. + * With some multi-threaded tests it has been seen that the same dma-buf memory + * gets imported twice on Mali DDK side and so the problem of sync happening + * with an importer having an unmapped attachment comes at the time of 2nd + * import. The same problem can if there is another importer of dma-buf + * memory. + * + * Workaround can be safely disabled for kernels between v5.0.0 and v5.2.2, + * as all the above stated issues are not there. + * + * dma_sync_sg_for_* calls will be made directly as a workaround using the + * Kbase's attachment to dma-buf that was previously mapped. + */ +#define KBASE_MEM_ION_SYNC_WORKAROUND +#endif + +#define IR_THRESHOLD_STEPS (256u) + + +static int kbase_vmap_phy_pages(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 offset_bytes, size_t size, + struct kbase_vmap_struct *map); +static void kbase_vunmap_phy_pages(struct kbase_context *kctx, + struct kbase_vmap_struct *map); + +static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); + +static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + +/* Retrieve the associated region pointer if the GPU address corresponds to + * one of the event memory pages. The enclosing region, if found, shouldn't + * have been marked as free. + */ +static struct kbase_va_region *kbase_find_event_mem_region( + struct kbase_context *kctx, u64 gpu_addr) +{ + + return NULL; +} + +/** + * kbase_phy_alloc_mapping_init - Initialize the kernel side permanent mapping + * of the physical allocation belonging to a + * region + * @kctx: The kernel base context @reg belongs to. + * @reg: The region whose physical allocation is to be mapped + * @vsize: The size of the requested region, in pages + * @size: The size in pages initially committed to the region + * + * Return: 0 on success, otherwise an error code indicating failure + * + * Maps the physical allocation backing a non-free @reg, so it may be + * accessed directly from the kernel. This is only supported for physical + * allocations of type KBASE_MEM_TYPE_NATIVE, and will fail for other types of + * physical allocation. + * + * The mapping is stored directly in the allocation that backs @reg. The + * refcount is not incremented at this point. Instead, use of the mapping should + * be surrounded by kbase_phy_alloc_mapping_get() and + * kbase_phy_alloc_mapping_put() to ensure it does not disappear whilst the + * client is accessing it. + * + * Both cached and uncached regions are allowed, but any sync operations are the + * responsibility of the client using the permanent mapping. + * + * A number of checks are made to ensure that a region that needs a permanent + * mapping can actually be supported: + * - The region must be created as fully backed + * - The region must not be growable + * + * This function will fail if those checks are not satisfied. + * + * On success, the region will also be forced into a certain kind: + * - It will no longer be growable + */ +static int kbase_phy_alloc_mapping_init(struct kbase_context *kctx, + struct kbase_va_region *reg, size_t vsize, size_t size) +{ + size_t size_bytes = (size << PAGE_SHIFT); + struct kbase_vmap_struct *kern_mapping; + int err = 0; + + /* Can only map in regions that are always fully committed + * Don't setup the mapping twice + * Only support KBASE_MEM_TYPE_NATIVE allocations + */ + if (vsize != size || reg->cpu_alloc->permanent_map != NULL || + reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + return -EINVAL; + + if (size > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES - + atomic_read(&kctx->permanent_mapped_pages))) { + dev_warn(kctx->kbdev->dev, "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %d pages", + (u64)size, + KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES, + atomic_read(&kctx->permanent_mapped_pages)); + return -ENOMEM; + } + + kern_mapping = kzalloc(sizeof(*kern_mapping), GFP_KERNEL); + if (!kern_mapping) + return -ENOMEM; + + err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping); + if (err < 0) + goto vmap_fail; + + /* No support for growing or shrinking mapped regions */ + reg->flags &= ~KBASE_REG_GROWABLE; + + reg->cpu_alloc->permanent_map = kern_mapping; + atomic_add(size, &kctx->permanent_mapped_pages); + + return 0; +vmap_fail: + kfree(kern_mapping); + return err; +} + +void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc) +{ + WARN_ON(!alloc->permanent_map); + kbase_vunmap_phy_pages(kctx, alloc->permanent_map); + kfree(alloc->permanent_map); + + alloc->permanent_map = NULL; + + /* Mappings are only done on cpu_alloc, so don't need to worry about + * this being reduced a second time if a separate gpu_alloc is + * freed + */ + WARN_ON(alloc->nents > atomic_read(&kctx->permanent_mapped_pages)); + atomic_sub(alloc->nents, &kctx->permanent_mapped_pages); +} + +void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, + u64 gpu_addr, + struct kbase_vmap_struct **out_kern_mapping) +{ + struct kbase_va_region *reg; + void *kern_mem_ptr = NULL; + struct kbase_vmap_struct *kern_mapping; + u64 mapping_offset; + + WARN_ON(!kctx); + WARN_ON(!out_kern_mapping); + + kbase_gpu_vm_lock(kctx); + + /* First do a quick lookup in the list of event memory regions */ + reg = kbase_find_event_mem_region(kctx, gpu_addr); + + if (!reg) { + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, gpu_addr); + } + + if (kbase_is_region_invalid_or_free(reg)) + goto out_unlock; + + kern_mapping = reg->cpu_alloc->permanent_map; + if (kern_mapping == NULL) + goto out_unlock; + + mapping_offset = gpu_addr - (reg->start_pfn << PAGE_SHIFT); + + /* Refcount the allocations to prevent them disappearing */ + WARN_ON(reg->cpu_alloc != kern_mapping->cpu_alloc); + WARN_ON(reg->gpu_alloc != kern_mapping->gpu_alloc); + (void)kbase_mem_phy_alloc_get(kern_mapping->cpu_alloc); + (void)kbase_mem_phy_alloc_get(kern_mapping->gpu_alloc); + + kern_mem_ptr = (void *)(uintptr_t)((uintptr_t)kern_mapping->addr + mapping_offset); + *out_kern_mapping = kern_mapping; +out_unlock: + kbase_gpu_vm_unlock(kctx); + return kern_mem_ptr; +} + +void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, + struct kbase_vmap_struct *kern_mapping) +{ + WARN_ON(!kctx); + WARN_ON(!kern_mapping); + + WARN_ON(kctx != kern_mapping->cpu_alloc->imported.native.kctx); + WARN_ON(kern_mapping != kern_mapping->cpu_alloc->permanent_map); + + kbase_mem_phy_alloc_put(kern_mapping->cpu_alloc); + kbase_mem_phy_alloc_put(kern_mapping->gpu_alloc); + + /* kern_mapping and the gpu/cpu phy allocs backing it must not be used + * from now on + */ +} + +struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, + u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, + u64 *gpu_va) +{ + int zone; + struct kbase_va_region *reg; + struct rb_root *rbtree; + struct device *dev; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(flags); + KBASE_DEBUG_ASSERT(gpu_va); + + dev = kctx->kbdev->dev; + dev_dbg(dev, "Allocating %lld va_pages, %lld commit_pages, %lld extent, 0x%llX flags\n", + va_pages, commit_pages, extent, *flags); + + if (!(*flags & BASE_MEM_FLAG_MAP_FIXED)) + *gpu_va = 0; /* return 0 on failure */ + else + dev_err(dev, + "Keeping requested GPU VA of 0x%llx\n", + (unsigned long long)*gpu_va); + + if (!kbase_check_alloc_flags(*flags)) { + dev_warn(dev, + "kbase_mem_alloc called with bad flags (%llx)", + (unsigned long long)*flags); + goto bad_flags; + } + +#ifdef CONFIG_DEBUG_FS + if (unlikely(kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE))) { + /* Mask coherency flags if infinite cache is enabled to prevent + * the skipping of syncs from BASE side. + */ + *flags &= ~(BASE_MEM_COHERENT_SYSTEM_REQUIRED | + BASE_MEM_COHERENT_SYSTEM); + } +#endif + + if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && + (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { + /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ + *flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED; + } + if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && + !kbase_device_is_cpu_coherent(kctx->kbdev)) { + dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable"); + goto bad_flags; + } + if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && + !kbase_device_is_cpu_coherent(kctx->kbdev)) { + /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ + *flags &= ~BASE_MEM_COHERENT_SYSTEM; + } + + if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, extent)) + goto bad_sizes; + +#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED + /* Ensure that memory is fully physically-backed. */ + if (*flags & BASE_MEM_GROW_ON_GPF) + commit_pages = va_pages; +#endif + + /* find out which VA zone to use */ + if (*flags & BASE_MEM_SAME_VA) { + rbtree = &kctx->reg_rbtree_same; + zone = KBASE_REG_ZONE_SAME_VA; + } else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) { + rbtree = &kctx->reg_rbtree_exec; + zone = KBASE_REG_ZONE_EXEC_VA; + } else { + rbtree = &kctx->reg_rbtree_custom; + zone = KBASE_REG_ZONE_CUSTOM_VA; + } + + reg = kbase_alloc_free_region(rbtree, PFN_DOWN(*gpu_va), + va_pages, zone); + + if (!reg) { + dev_err(dev, "Failed to allocate free region"); + goto no_region; + } + + if (kbase_update_region_flags(kctx, reg, *flags) != 0) + goto invalid_flags; + + if (kbase_reg_prepare_native(reg, kctx, + base_mem_group_id_get(*flags)) != 0) { + dev_err(dev, "Failed to prepare region"); + goto prepare_failed; + } + + if (*flags & BASE_MEM_GROW_ON_GPF) { + unsigned int const ir_threshold = atomic_read( + &kctx->kbdev->memdev.ir_threshold); + + reg->threshold_pages = ((va_pages * ir_threshold) + + (IR_THRESHOLD_STEPS / 2)) / IR_THRESHOLD_STEPS; + } else + reg->threshold_pages = 0; + + if (*flags & BASE_MEM_GROW_ON_GPF) { + /* kbase_check_alloc_sizes() already checks extent is valid for + * assigning to reg->extent */ + reg->extent = extent; + } else if (*flags & BASE_MEM_TILER_ALIGN_TOP) { + reg->extent = extent; + } else { + reg->extent = 0; + } + + if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) { + dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)", + (unsigned long long)commit_pages, + (unsigned long long)va_pages); + goto no_mem; + } + reg->initial_commit = commit_pages; + + kbase_gpu_vm_lock(kctx); + + if (reg->flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) { + /* Permanent kernel mappings must happen as soon as + * reg->cpu_alloc->pages is ready. Currently this happens after + * kbase_alloc_phy_pages(). If we move that to setup pages + * earlier, also move this call too + */ + int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages, + commit_pages); + if (err < 0) { + kbase_gpu_vm_unlock(kctx); + goto no_kern_mapping; + } + } + + + /* mmap needed to setup VA? */ + if (*flags & BASE_MEM_SAME_VA) { + unsigned long cookie, cookie_nr; + + /* Bind to a cookie */ + if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) { + dev_err(dev, "No cookies available for allocation!"); + kbase_gpu_vm_unlock(kctx); + goto no_cookie; + } + /* return a cookie */ + cookie_nr = find_first_bit(kctx->cookies, BITS_PER_LONG); + bitmap_clear(kctx->cookies, cookie_nr, 1); + BUG_ON(kctx->pending_regions[cookie_nr]); + kctx->pending_regions[cookie_nr] = reg; + + /* relocate to correct base */ + cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE); + cookie <<= PAGE_SHIFT; + + *gpu_va = (u64) cookie; + } else /* we control the VA */ { + if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, 1) != 0) { + dev_warn(dev, "Failed to map memory on GPU"); + kbase_gpu_vm_unlock(kctx); + goto no_mmap; + } + /* return real GPU VA */ + *gpu_va = reg->start_pfn << PAGE_SHIFT; + } + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) { + kbase_jit_done_phys_increase(kctx, commit_pages); + + mutex_lock(&kctx->jit_evict_lock); + WARN_ON(!list_empty(®->jit_node)); + list_add(®->jit_node, &kctx->jit_active_head); + mutex_unlock(&kctx->jit_evict_lock); + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + kbase_gpu_vm_unlock(kctx); + return reg; + +no_mmap: +no_cookie: +no_kern_mapping: +no_mem: +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) { + kbase_gpu_vm_lock(kctx); + kbase_jit_done_phys_increase(kctx, commit_pages); + kbase_gpu_vm_unlock(kctx); + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); +invalid_flags: +prepare_failed: + kfree(reg); +no_region: +bad_sizes: +bad_flags: + return NULL; +} +KBASE_EXPORT_TEST_API(kbase_mem_alloc); + +int kbase_mem_query(struct kbase_context *kctx, + u64 gpu_addr, u64 query, u64 * const out) +{ + struct kbase_va_region *reg; + int ret = -EINVAL; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(out); + + if (gpu_addr & ~PAGE_MASK) { + dev_warn(kctx->kbdev->dev, "mem_query: gpu_addr: passed parameter is invalid"); + return -EINVAL; + } + + kbase_gpu_vm_lock(kctx); + + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (kbase_is_region_invalid_or_free(reg)) + goto out_unlock; + + switch (query) { + case KBASE_MEM_QUERY_COMMIT_SIZE: + if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) { + *out = kbase_reg_current_backed_size(reg); + } else { + size_t i; + struct kbase_aliased *aliased; + *out = 0; + aliased = reg->cpu_alloc->imported.alias.aliased; + for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++) + *out += aliased[i].length; + } + break; + case KBASE_MEM_QUERY_VA_SIZE: + *out = reg->nr_pages; + break; + case KBASE_MEM_QUERY_FLAGS: + { + *out = 0; + if (KBASE_REG_CPU_WR & reg->flags) + *out |= BASE_MEM_PROT_CPU_WR; + if (KBASE_REG_CPU_RD & reg->flags) + *out |= BASE_MEM_PROT_CPU_RD; + if (KBASE_REG_CPU_CACHED & reg->flags) + *out |= BASE_MEM_CACHED_CPU; + if (KBASE_REG_GPU_WR & reg->flags) + *out |= BASE_MEM_PROT_GPU_WR; + if (KBASE_REG_GPU_RD & reg->flags) + *out |= BASE_MEM_PROT_GPU_RD; + if (!(KBASE_REG_GPU_NX & reg->flags)) + *out |= BASE_MEM_PROT_GPU_EX; + if (KBASE_REG_SHARE_BOTH & reg->flags) + *out |= BASE_MEM_COHERENT_SYSTEM; + if (KBASE_REG_SHARE_IN & reg->flags) + *out |= BASE_MEM_COHERENT_LOCAL; + if (mali_kbase_supports_mem_grow_on_gpf(kctx->api_version)) { + /* Prior to this version, this was known about by + * user-side but we did not return them. Returning + * it caused certain clients that were not expecting + * it to fail, so we omit it as a special-case for + * compatibility reasons + */ + if (KBASE_REG_PF_GROW & reg->flags) + *out |= BASE_MEM_GROW_ON_GPF; + } + if (mali_kbase_supports_mem_protected(kctx->api_version)) { + /* Prior to this version, this was known about by + * user-side but we did not return them. Returning + * it caused certain clients that were not expecting + * it to fail, so we omit it as a special-case for + * compatibility reasons + */ + if (KBASE_REG_PROTECTED & reg->flags) + *out |= BASE_MEM_PROTECTED; + } + if (KBASE_REG_TILER_ALIGN_TOP & reg->flags) + *out |= BASE_MEM_TILER_ALIGN_TOP; + if (!(KBASE_REG_GPU_CACHED & reg->flags)) + *out |= BASE_MEM_UNCACHED_GPU; + if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags) + *out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE; + + *out |= base_mem_group_id_set(reg->cpu_alloc->group_id); + + WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE, + "BASE_MEM_FLAGS_QUERYABLE needs updating\n"); + *out &= BASE_MEM_FLAGS_QUERYABLE; + break; + } + default: + *out = 0; + goto out_unlock; + } + + ret = 0; + +out_unlock: + kbase_gpu_vm_unlock(kctx); + return ret; +} + +/** + * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the + * Ephemeral memory eviction list. + * @s: Shrinker + * @sc: Shrinker control + * + * Return: Number of pages which can be freed. + */ +static +unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_context *kctx; + struct kbase_mem_phy_alloc *alloc; + unsigned long pages = 0; + + kctx = container_of(s, struct kbase_context, reclaim); + + mutex_lock(&kctx->jit_evict_lock); + + list_for_each_entry(alloc, &kctx->evict_list, evict_node) + pages += alloc->nents; + + mutex_unlock(&kctx->jit_evict_lock); + return pages; +} + +/** + * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction + * list for pages and try to reclaim them. + * @s: Shrinker + * @sc: Shrinker control + * + * Return: Number of pages freed (can be less then requested) or -1 if the + * shrinker failed to free pages in its pool. + * + * Note: + * This function accesses region structures without taking the region lock, + * this is required as the OOM killer can call the shrinker after the region + * lock has already been held. + * This is safe as we can guarantee that a region on the eviction list will + * not be freed (kbase_mem_free_region removes the allocation from the list + * before destroying it), or modified by other parts of the driver. + * The eviction list itself is guarded by the eviction lock and the MMU updates + * are protected by their own lock. + */ +static +unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_context *kctx; + struct kbase_mem_phy_alloc *alloc; + struct kbase_mem_phy_alloc *tmp; + unsigned long freed = 0; + + kctx = container_of(s, struct kbase_context, reclaim); + mutex_lock(&kctx->jit_evict_lock); + + list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { + int err; + + err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg, + 0, alloc->nents); + if (err != 0) { + /* + * Failed to remove GPU mapping, tell the shrinker + * to stop trying to shrink our slab even though we + * have pages in it. + */ + freed = -1; + goto out_unlock; + } + + /* + * Update alloc->evicted before freeing the backing so the + * helper can determine that it needs to bypass the accounting + * and memory pool. + */ + alloc->evicted = alloc->nents; + + kbase_free_phy_pages_helper(alloc, alloc->evicted); + freed += alloc->evicted; + list_del_init(&alloc->evict_node); + + /* + * Inform the JIT allocator this region has lost backing + * as it might need to free the allocation. + */ + kbase_jit_backing_lost(alloc->reg); + + /* Enough pages have been freed so stop now */ + if (freed > sc->nr_to_scan) + break; + } +out_unlock: + mutex_unlock(&kctx->jit_evict_lock); + + return freed; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s, + struct shrink_control *sc) +{ + if (sc->nr_to_scan == 0) + return kbase_mem_evictable_reclaim_count_objects(s, sc); + + return kbase_mem_evictable_reclaim_scan_objects(s, sc); +} +#endif + +int kbase_mem_evictable_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->evict_list); + mutex_init(&kctx->jit_evict_lock); + + /* Register shrinker */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) + kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink; +#else + kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; + kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; +#endif + kctx->reclaim.seeks = DEFAULT_SEEKS; + /* Kernel versions prior to 3.1 : + * struct shrinker does not define batch */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) + kctx->reclaim.batch = 0; +#endif + register_shrinker(&kctx->reclaim); + return 0; +} + +void kbase_mem_evictable_deinit(struct kbase_context *kctx) +{ + unregister_shrinker(&kctx->reclaim); +} + +/** + * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. + * @alloc: The physical allocation + */ +void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_context *kctx = alloc->imported.native.kctx; + struct kbase_device *kbdev = kctx->kbdev; + int __maybe_unused new_page_count; + + kbase_process_page_usage_dec(kctx, alloc->nents); + new_page_count = atomic_sub_return(alloc->nents, + &kctx->used_pages); + atomic_sub(alloc->nents, &kctx->kbdev->memdev.used_pages); + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, + kctx->id, + (u64)new_page_count); + kbase_trace_gpu_mem_usage_dec(kbdev, kctx, alloc->nents); +} + +/** + * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable. + * @alloc: The physical allocation + */ +static +void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_context *kctx = alloc->imported.native.kctx; + struct kbase_device *kbdev = kctx->kbdev; + int __maybe_unused new_page_count; + + new_page_count = atomic_add_return(alloc->nents, + &kctx->used_pages); + atomic_add(alloc->nents, &kctx->kbdev->memdev.used_pages); + + /* Increase mm counters so that the allocation is accounted for + * against the process and thus is visible to the OOM killer, + */ + kbase_process_page_usage_inc(kctx, alloc->nents); + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, + kctx->id, + (u64)new_page_count); + kbase_trace_gpu_mem_usage_inc(kbdev, kctx, alloc->nents); +} + +int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) +{ + struct kbase_context *kctx = gpu_alloc->imported.native.kctx; + + lockdep_assert_held(&kctx->reg_lock); + + kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, + 0, gpu_alloc->nents); + + mutex_lock(&kctx->jit_evict_lock); + /* This allocation can't already be on a list. */ + WARN_ON(!list_empty(&gpu_alloc->evict_node)); + + /* + * Add the allocation to the eviction list, after this point the shrink + * can reclaim it. + */ + list_add(&gpu_alloc->evict_node, &kctx->evict_list); + mutex_unlock(&kctx->jit_evict_lock); + kbase_mem_evictable_mark_reclaim(gpu_alloc); + + gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED; + return 0; +} + +bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) +{ + struct kbase_context *kctx = gpu_alloc->imported.native.kctx; + int err = 0; + + lockdep_assert_held(&kctx->reg_lock); + + mutex_lock(&kctx->jit_evict_lock); + /* + * First remove the allocation from the eviction list as it's no + * longer eligible for eviction. + */ + list_del_init(&gpu_alloc->evict_node); + mutex_unlock(&kctx->jit_evict_lock); + + if (gpu_alloc->evicted == 0) { + /* + * The backing is still present, update the VM stats as it's + * in use again. + */ + kbase_mem_evictable_unmark_reclaim(gpu_alloc); + } else { + /* If the region is still alive ... */ + if (gpu_alloc->reg) { + /* ... allocate replacement backing ... */ + err = kbase_alloc_phy_pages_helper(gpu_alloc, + gpu_alloc->evicted); + + /* + * ... and grow the mapping back to its + * pre-eviction size. + */ + if (!err) + err = kbase_mem_grow_gpu_mapping(kctx, + gpu_alloc->reg, + gpu_alloc->evicted, 0); + + gpu_alloc->evicted = 0; + } + } + + /* If the region is still alive remove the DONT_NEED attribute. */ + if (gpu_alloc->reg) + gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED; + + return (err == 0); +} + +int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask) +{ + struct kbase_va_region *reg; + int ret = -EINVAL; + unsigned int real_flags = 0; + unsigned int new_flags = 0; + bool prev_needed, new_needed; + + KBASE_DEBUG_ASSERT(kctx); + + if (!gpu_addr) + return -EINVAL; + + if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) + return -EINVAL; + + /* nuke other bits */ + flags &= mask; + + /* check for only supported flags */ + if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE)) + goto out; + + /* mask covers bits we don't support? */ + if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE)) + goto out; + + /* convert flags */ + if (BASE_MEM_COHERENT_SYSTEM & flags) + real_flags |= KBASE_REG_SHARE_BOTH; + else if (BASE_MEM_COHERENT_LOCAL & flags) + real_flags |= KBASE_REG_SHARE_IN; + + /* now we can lock down the context, and find the region */ + down_write(¤t->mm->mmap_sem); + kbase_gpu_vm_lock(kctx); + + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (kbase_is_region_invalid_or_free(reg)) + goto out_unlock; + + /* Is the region being transitioning between not needed and needed? */ + prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; + new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; + if (prev_needed != new_needed) { + /* Aliased allocations can't be made ephemeral */ + if (atomic_read(®->cpu_alloc->gpu_mappings) > 1) + goto out_unlock; + + if (new_needed) { + /* Only native allocations can be marked not needed */ + if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { + ret = -EINVAL; + goto out_unlock; + } + ret = kbase_mem_evictable_make(reg->gpu_alloc); + if (ret) + goto out_unlock; + } else { + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + } + + /* limit to imported memory */ + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) + goto out_unlock; + + /* shareability flags are ignored for GPU uncached memory */ + if (!(reg->flags & KBASE_REG_GPU_CACHED)) { + ret = 0; + goto out_unlock; + } + + /* no change? */ + if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) { + ret = 0; + goto out_unlock; + } + + new_flags = reg->flags & ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); + new_flags |= real_flags; + + /* Currently supporting only imported memory */ + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) { + ret = -EINVAL; + goto out_unlock; + } + + if (IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { + /* Future use will use the new flags, existing mapping + * will NOT be updated as memory should not be in use + * by the GPU when updating the flags. + */ + WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count); + ret = 0; + } else if (reg->gpu_alloc->imported.umm.current_mapping_usage_count) { + /* + * When CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is not enabled the + * dma-buf GPU mapping should always be present, check that + * this is the case and warn and skip the page table update if + * not. + * + * Then update dma-buf GPU mapping with the new flags. + * + * Note: The buffer must not be in use on the GPU when + * changing flags. If the buffer is in active use on + * the GPU, there is a risk that the GPU may trigger a + * shareability fault, as it will see the same + * addresses from buffer with different shareability + * properties. + */ + dev_dbg(kctx->kbdev->dev, + "Updating page tables on mem flag change\n"); + ret = kbase_mmu_update_pages(kctx, reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), + new_flags, + reg->gpu_alloc->group_id); + if (ret) + dev_warn(kctx->kbdev->dev, + "Failed to update GPU page tables on flag change: %d\n", + ret); + } else + WARN_ON(!reg->gpu_alloc->imported.umm.current_mapping_usage_count); + + /* If everything is good, then set the new flags on the region. */ + if (!ret) + reg->flags = new_flags; + +out_unlock: + kbase_gpu_vm_unlock(kctx); + up_write(¤t->mm->mmap_sem); +out: + return ret; +} + +#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS) + +int kbase_mem_do_sync_imported(struct kbase_context *kctx, + struct kbase_va_region *reg, enum kbase_sync_type sync_fn) +{ + int ret = -EINVAL; + struct dma_buf *dma_buf; + enum dma_data_direction dir = DMA_BIDIRECTIONAL; + + lockdep_assert_held(&kctx->reg_lock); + + /* We assume that the same physical allocation object is used for both + * GPU and CPU for imported buffers. + */ + WARN_ON(reg->cpu_alloc != reg->gpu_alloc); + + /* Currently only handle dma-bufs */ + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) + return ret; + /* + * Attempting to sync with CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND + * enabled can expose us to a Linux Kernel issue between v4.6 and + * v4.19. We will not attempt to support cache syncs on dma-bufs that + * are mapped on demand (i.e. not on import), even on pre-4.6, neither + * on 4.20 or newer kernels, because this makes it difficult for + * userspace to know when they can rely on the cache sync. + * Instead, only support syncing when we always map dma-bufs on import, + * or if the particular buffer is mapped right now. + */ + if (IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND) && + !reg->gpu_alloc->imported.umm.current_mapping_usage_count) + return ret; + + dma_buf = reg->gpu_alloc->imported.umm.dma_buf; + + switch (sync_fn) { + case KBASE_SYNC_TO_DEVICE: + dev_dbg(kctx->kbdev->dev, + "Syncing imported buffer at GPU VA %llx to GPU\n", + reg->start_pfn); + +#ifdef KBASE_MEM_ION_SYNC_WORKAROUND + if (!WARN_ON(!reg->gpu_alloc->imported.umm.dma_attachment)) { + struct dma_buf_attachment *attachment = reg->gpu_alloc->imported.umm.dma_attachment; + struct sg_table *sgt = reg->gpu_alloc->imported.umm.sgt; + + dma_sync_sg_for_device(attachment->dev, sgt->sgl, + sgt->nents, dir); + ret = 0; + } +#else + /* Though the below version check could be superfluous depending upon the version condition + * used for enabling KBASE_MEM_ION_SYNC_WORKAROUND, we still keep this check here to allow + * ease of modification for non-ION systems or systems where ION has been patched. + */ +#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS) + dma_buf_end_cpu_access(dma_buf, + 0, dma_buf->size, + dir); + ret = 0; +#else + ret = dma_buf_end_cpu_access(dma_buf, + dir); +#endif +#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ + break; + case KBASE_SYNC_TO_CPU: + dev_dbg(kctx->kbdev->dev, + "Syncing imported buffer at GPU VA %llx to CPU\n", + reg->start_pfn); + +#ifdef KBASE_MEM_ION_SYNC_WORKAROUND + if (!WARN_ON(!reg->gpu_alloc->imported.umm.dma_attachment)) { + struct dma_buf_attachment *attachment = reg->gpu_alloc->imported.umm.dma_attachment; + struct sg_table *sgt = reg->gpu_alloc->imported.umm.sgt; + + dma_sync_sg_for_cpu(attachment->dev, sgt->sgl, + sgt->nents, dir); + ret = 0; + } +#else + ret = dma_buf_begin_cpu_access(dma_buf, +#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS) + 0, dma_buf->size, +#endif + dir); +#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ + break; + }; + + if (unlikely(ret)) + dev_warn(kctx->kbdev->dev, + "Failed to sync mem region %pK at GPU VA %llx: %d\n", + reg, reg->start_pfn, ret); + + return ret; +} + +/** + * kbase_mem_umm_unmap_attachment - Unmap dma-buf attachment + * @kctx: Pointer to kbase context + * @alloc: Pointer to allocation with imported dma-buf memory to unmap + * + * This will unmap a dma-buf. Must be called after the GPU page tables for the + * region have been torn down. + */ +static void kbase_mem_umm_unmap_attachment(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc) +{ + struct tagged_addr *pa = alloc->pages; + + dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, + alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + alloc->imported.umm.sgt = NULL; + + kbase_remove_dma_buf_usage(kctx, alloc); + + memset(pa, 0xff, sizeof(*pa) * alloc->nents); + alloc->nents = 0; +} + +/** + * kbase_mem_umm_map_attachment - Prepare attached dma-buf for GPU mapping + * @kctx: Pointer to kbase context + * @reg: Pointer to region with imported dma-buf memory to map + * + * Map the dma-buf and prepare the page array with the tagged Mali physical + * addresses for GPU mapping. + * + * Return: 0 on success, or negative error code + */ +static int kbase_mem_umm_map_attachment(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + struct sg_table *sgt; + struct scatterlist *s; + int i; + struct tagged_addr *pa; + int err; + size_t count = 0; + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + + WARN_ON_ONCE(alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM); + WARN_ON_ONCE(alloc->imported.umm.sgt); + + sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, + DMA_BIDIRECTIONAL); + if (IS_ERR_OR_NULL(sgt)) + return -EINVAL; + + /* save for later */ + alloc->imported.umm.sgt = sgt; + + pa = kbase_get_gpu_phy_pages(reg); + + for_each_sg(sgt->sgl, s, sgt->nents, i) { + size_t j, pages = PFN_UP(sg_dma_len(s)); + + WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1), + "sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n", + sg_dma_len(s)); + + WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), + "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", + (unsigned long long) sg_dma_address(s)); + + for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++) + *pa++ = as_tagged(sg_dma_address(s) + + (j << PAGE_SHIFT)); + WARN_ONCE(j < pages, + "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", + alloc->imported.umm.dma_buf->size); + } + + if (!(reg->flags & KBASE_REG_IMPORT_PAD) && + WARN_ONCE(count < reg->nr_pages, + "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", + alloc->imported.umm.dma_buf->size)) { + err = -EINVAL; + goto err_unmap_attachment; + } + + /* Update nents as we now have pages to map */ + alloc->nents = count; + kbase_add_dma_buf_usage(kctx, alloc); + + return 0; + +err_unmap_attachment: + kbase_mem_umm_unmap_attachment(kctx, alloc); + + return err; +} + +int kbase_mem_umm_map(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + int err; + struct kbase_mem_phy_alloc *alloc; + unsigned long gwt_mask = ~0; + + lockdep_assert_held(&kctx->reg_lock); + + alloc = reg->gpu_alloc; + + alloc->imported.umm.current_mapping_usage_count++; + if (alloc->imported.umm.current_mapping_usage_count != 1) { + if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || + alloc->imported.umm.need_sync) { + if (!kbase_is_region_invalid_or_free(reg)) { + err = kbase_mem_do_sync_imported(kctx, reg, + KBASE_SYNC_TO_DEVICE); + WARN_ON_ONCE(err); + } + } + return 0; + } + + err = kbase_mem_umm_map_attachment(kctx, reg); + if (err) + goto bad_map_attachment; + +#ifdef CONFIG_MALI_CINSTR_GWT + if (kctx->gwt_enabled) + gwt_mask = ~KBASE_REG_GPU_WR; +#endif + + err = kbase_mmu_insert_pages(kctx->kbdev, + &kctx->mmu, + reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, + kctx->as_nr, + alloc->group_id); + if (err) + goto bad_insert; + + if (reg->flags & KBASE_REG_IMPORT_PAD && + !WARN_ON(reg->nr_pages < alloc->nents)) { + /* For padded imported dma-buf memory, map the dummy aliasing + * page from the end of the dma-buf pages, to the end of the + * region using a read only mapping. + * + * Assume alloc->nents is the number of actual pages in the + * dma-buf memory. + */ + err = kbase_mmu_insert_single_page(kctx, + reg->start_pfn + alloc->nents, + kctx->aliasing_sink_page, + reg->nr_pages - alloc->nents, + (reg->flags | KBASE_REG_GPU_RD) & + ~KBASE_REG_GPU_WR, + KBASE_MEM_GROUP_SINK); + if (err) + goto bad_pad_insert; + } + + return 0; + +bad_pad_insert: + kbase_mmu_teardown_pages(kctx->kbdev, + &kctx->mmu, + reg->start_pfn, + alloc->nents, + kctx->as_nr); +bad_insert: + kbase_mem_umm_unmap_attachment(kctx, alloc); +bad_map_attachment: + alloc->imported.umm.current_mapping_usage_count--; + + return err; +} + +void kbase_mem_umm_unmap(struct kbase_context *kctx, + struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) +{ + alloc->imported.umm.current_mapping_usage_count--; + if (alloc->imported.umm.current_mapping_usage_count) { + if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || + alloc->imported.umm.need_sync) { + if (!kbase_is_region_invalid_or_free(reg)) { + int err = kbase_mem_do_sync_imported(kctx, reg, + KBASE_SYNC_TO_CPU); + WARN_ON_ONCE(err); + } + } + return; + } + + if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) { + int err; + + err = kbase_mmu_teardown_pages(kctx->kbdev, + &kctx->mmu, + reg->start_pfn, + reg->nr_pages, + kctx->as_nr); + WARN_ON(err); + } + + kbase_mem_umm_unmap_attachment(kctx, alloc); +} + +static int get_umm_memory_group_id(struct kbase_context *kctx, + struct dma_buf *dma_buf) +{ + int group_id = BASE_MEM_GROUP_DEFAULT; + + if (kctx->kbdev->mgm_dev->ops.mgm_get_import_memory_id) { + struct memory_group_manager_import_data mgm_import_data; + + mgm_import_data.type = + MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF; + mgm_import_data.u.dma_buf = dma_buf; + + group_id = kctx->kbdev->mgm_dev->ops.mgm_get_import_memory_id( + kctx->kbdev->mgm_dev, &mgm_import_data); + } + + return group_id; +} + +/** + * kbase_mem_from_umm - Import dma-buf memory into kctx + * @kctx: Pointer to kbase context to import memory into + * @fd: File descriptor of dma-buf to import + * @va_pages: Pointer where virtual size of the region will be output + * @flags: Pointer to memory flags + * @padding: Number of read only padding pages to be inserted at the end of the + * GPU mapping of the dma-buf + * + * Return: Pointer to new kbase_va_region object of the imported dma-buf, or + * NULL on error. + * + * This function imports a dma-buf into kctx, and created a kbase_va_region + * object that wraps the dma-buf. + */ +static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, + int fd, u64 *va_pages, u64 *flags, u32 padding) +{ + struct kbase_va_region *reg; + struct dma_buf *dma_buf; + struct dma_buf_attachment *dma_attachment; + bool shared_zone = false; + bool need_sync = false; + int group_id; + + /* 64-bit address range is the max */ + if (*va_pages > (U64_MAX / PAGE_SIZE)) + return NULL; + + dma_buf = dma_buf_get(fd); + if (IS_ERR_OR_NULL(dma_buf)) + return NULL; + + dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev); + if (IS_ERR_OR_NULL(dma_attachment)) { + dma_buf_put(dma_buf); + return NULL; + } + + *va_pages = (PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT) + padding; + if (!*va_pages) { + dma_buf_detach(dma_buf, dma_attachment); + dma_buf_put(dma_buf); + return NULL; + } + + /* ignore SAME_VA */ + *flags &= ~BASE_MEM_SAME_VA; + + /* + * Force CPU cached flag. + * + * We can't query the dma-buf exporter to get details about the CPU + * cache attributes of CPU mappings, so we have to assume that the + * buffer may be cached, and call into the exporter for cache + * maintenance, and rely on the exporter to do the right thing when + * handling our calls. + */ + *flags |= BASE_MEM_CACHED_CPU; + + if (*flags & BASE_MEM_IMPORT_SHARED) + shared_zone = true; + + if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) + need_sync = true; + +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + /* + * 64-bit tasks require us to reserve VA on the CPU that we use + * on the GPU. + */ + shared_zone = true; + } +#endif + + if (shared_zone) { + *flags |= BASE_MEM_NEED_MMAP; + reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, + 0, *va_pages, KBASE_REG_ZONE_SAME_VA); + } else { + reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, + 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); + } + + if (!reg) { + dma_buf_detach(dma_buf, dma_attachment); + dma_buf_put(dma_buf); + return NULL; + } + + group_id = get_umm_memory_group_id(kctx, dma_buf); + + reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages, + KBASE_MEM_TYPE_IMPORTED_UMM, group_id); + if (IS_ERR_OR_NULL(reg->gpu_alloc)) + goto no_alloc; + + reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + if (kbase_update_region_flags(kctx, reg, *flags) != 0) + goto error_out; + + /* No pages to map yet */ + reg->gpu_alloc->nents = 0; + + reg->flags &= ~KBASE_REG_FREE; + reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */ + reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */ + + if (*flags & BASE_MEM_PROTECTED) + reg->flags |= KBASE_REG_PROTECTED; + + if (padding) + reg->flags |= KBASE_REG_IMPORT_PAD; + + reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM; + reg->gpu_alloc->imported.umm.sgt = NULL; + reg->gpu_alloc->imported.umm.dma_buf = dma_buf; + reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; + reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0; + reg->gpu_alloc->imported.umm.need_sync = need_sync; + reg->gpu_alloc->imported.umm.kctx = kctx; + reg->extent = 0; + + if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { + int err; + + reg->gpu_alloc->imported.umm.current_mapping_usage_count = 1; + + err = kbase_mem_umm_map_attachment(kctx, reg); + if (err) { + dev_warn(kctx->kbdev->dev, + "Failed to map dma-buf %pK on GPU: %d\n", + dma_buf, err); + goto error_out; + } + + *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; + } + + return reg; + +error_out: + kbase_mem_phy_alloc_put(reg->gpu_alloc); + kbase_mem_phy_alloc_put(reg->cpu_alloc); +no_alloc: + kfree(reg); + + return NULL; +} + +u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev) +{ + u32 cpu_cache_line_size = cache_line_size(); + u32 gpu_cache_line_size = + (1UL << kbdev->gpu_props.props.l2_props.log2_line_size); + + return ((cpu_cache_line_size > gpu_cache_line_size) ? + cpu_cache_line_size : + gpu_cache_line_size); +} + +static struct kbase_va_region *kbase_mem_from_user_buffer( + struct kbase_context *kctx, unsigned long address, + unsigned long size, u64 *va_pages, u64 *flags) +{ + long i; + struct kbase_va_region *reg; + struct rb_root *rbtree; + long faulted_pages; + int zone = KBASE_REG_ZONE_CUSTOM_VA; + bool shared_zone = false; + u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); + struct kbase_alloc_import_user_buf *user_buf; + struct page **pages = NULL; + + /* Flag supported only for dma-buf imported memory */ + if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) + return NULL; + + if ((address & (cache_line_alignment - 1)) != 0 || + (size & (cache_line_alignment - 1)) != 0) { + if (*flags & BASE_MEM_UNCACHED_GPU) { + dev_warn(kctx->kbdev->dev, + "User buffer is not cache line aligned and marked as GPU uncached\n"); + goto bad_size; + } + + /* Coherency must be enabled to handle partial cache lines */ + if (*flags & (BASE_MEM_COHERENT_SYSTEM | + BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { + /* Force coherent system required flag, import will + * then fail if coherency isn't available + */ + *flags |= BASE_MEM_COHERENT_SYSTEM_REQUIRED; + } else { + dev_warn(kctx->kbdev->dev, + "User buffer is not cache line aligned and no coherency enabled\n"); + goto bad_size; + } + } + + *va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) - + PFN_DOWN(address); + if (!*va_pages) + goto bad_size; + + if (*va_pages > (UINT64_MAX / PAGE_SIZE)) + /* 64-bit address range is the max */ + goto bad_size; + + /* SAME_VA generally not supported with imported memory (no known use cases) */ + *flags &= ~BASE_MEM_SAME_VA; + + if (*flags & BASE_MEM_IMPORT_SHARED) + shared_zone = true; + +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + /* + * 64-bit tasks require us to reserve VA on the CPU that we use + * on the GPU. + */ + shared_zone = true; + } +#endif + + if (shared_zone) { + *flags |= BASE_MEM_NEED_MMAP; + zone = KBASE_REG_ZONE_SAME_VA; + rbtree = &kctx->reg_rbtree_same; + } else + rbtree = &kctx->reg_rbtree_custom; + + reg = kbase_alloc_free_region(rbtree, 0, *va_pages, zone); + + if (!reg) + goto no_region; + + reg->gpu_alloc = kbase_alloc_create( + kctx, *va_pages, KBASE_MEM_TYPE_IMPORTED_USER_BUF, + BASE_MEM_GROUP_DEFAULT); + if (IS_ERR_OR_NULL(reg->gpu_alloc)) + goto no_alloc_obj; + + reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + if (kbase_update_region_flags(kctx, reg, *flags) != 0) + goto invalid_flags; + + reg->flags &= ~KBASE_REG_FREE; + reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */ + reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */ + + user_buf = ®->gpu_alloc->imported.user_buf; + + user_buf->size = size; + user_buf->address = address; + user_buf->nr_pages = *va_pages; + user_buf->mm = current->mm; +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + atomic_inc(¤t->mm->mm_count); +#else + mmgrab(current->mm); +#endif + if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) + user_buf->pages = vmalloc(*va_pages * sizeof(struct page *)); + else + user_buf->pages = kmalloc_array(*va_pages, + sizeof(struct page *), GFP_KERNEL); + + if (!user_buf->pages) + goto no_page_array; + + /* If the region is coherent with the CPU then the memory is imported + * and mapped onto the GPU immediately. + * Otherwise get_user_pages is called as a sanity check, but with + * NULL as the pages argument which will fault the pages, but not + * pin them. The memory will then be pinned only around the jobs that + * specify the region as an external resource. + */ + if (reg->flags & KBASE_REG_SHARE_BOTH) { + pages = user_buf->pages; + *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; + } + + down_read(¤t->mm->mmap_sem); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) + faulted_pages = get_user_pages(current, current->mm, address, *va_pages, +#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \ +KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#else + reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); +#endif +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) + faulted_pages = get_user_pages(address, *va_pages, + reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); +#else + faulted_pages = get_user_pages(address, *va_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#endif + + up_read(¤t->mm->mmap_sem); + + if (faulted_pages != *va_pages) + goto fault_mismatch; + + reg->gpu_alloc->nents = 0; + reg->extent = 0; + + if (pages) { + struct device *dev = kctx->kbdev->dev; + unsigned long local_size = user_buf->size; + unsigned long offset = user_buf->address & ~PAGE_MASK; + struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg); + + /* Top bit signifies that this was pinned on import */ + user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; + + for (i = 0; i < faulted_pages; i++) { + dma_addr_t dma_addr; + unsigned long min; + + min = MIN(PAGE_SIZE - offset, local_size); + dma_addr = dma_map_page(dev, pages[i], + offset, min, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, dma_addr)) + goto unwind_dma_map; + + user_buf->dma_addrs[i] = dma_addr; + pa[i] = as_tagged(page_to_phys(pages[i])); + + local_size -= min; + offset = 0; + } + + reg->gpu_alloc->nents = faulted_pages; + } + + return reg; + +unwind_dma_map: + while (i--) { + dma_unmap_page(kctx->kbdev->dev, + user_buf->dma_addrs[i], + PAGE_SIZE, DMA_BIDIRECTIONAL); + } +fault_mismatch: + if (pages) { + for (i = 0; i < faulted_pages; i++) + put_page(pages[i]); + } +no_page_array: +invalid_flags: + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); +no_alloc_obj: + kfree(reg); +no_region: +bad_size: + return NULL; + +} + + +u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, + u64 nents, struct base_mem_aliasing_info *ai, + u64 *num_pages) +{ + struct kbase_va_region *reg; + u64 gpu_va; + size_t i; + bool coherent; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(flags); + KBASE_DEBUG_ASSERT(ai); + KBASE_DEBUG_ASSERT(num_pages); + + /* mask to only allowed flags */ + *flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | + BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL | + BASE_MEM_PROT_CPU_RD | BASE_MEM_COHERENT_SYSTEM_REQUIRED); + + if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) { + dev_warn(kctx->kbdev->dev, + "kbase_mem_alias called with bad flags (%llx)", + (unsigned long long)*flags); + goto bad_flags; + } + coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 || + (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0; + + if (!stride) + goto bad_stride; + + if (!nents) + goto bad_nents; + + if ((nents * stride) > (U64_MAX / PAGE_SIZE)) + /* 64-bit address range is the max */ + goto bad_size; + + /* calculate the number of pages this alias will cover */ + *num_pages = nents * stride; + +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + /* 64-bit tasks must MMAP anyway, but not expose this address to + * clients */ + *flags |= BASE_MEM_NEED_MMAP; + reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, + *num_pages, + KBASE_REG_ZONE_SAME_VA); + } else { +#else + if (1) { +#endif + reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, + 0, *num_pages, + KBASE_REG_ZONE_CUSTOM_VA); + } + + if (!reg) + goto no_reg; + + /* zero-sized page array, as we don't need one/can support one */ + reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS, + BASE_MEM_GROUP_DEFAULT); + if (IS_ERR_OR_NULL(reg->gpu_alloc)) + goto no_alloc_obj; + + reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + if (kbase_update_region_flags(kctx, reg, *flags) != 0) + goto invalid_flags; + + reg->gpu_alloc->imported.alias.nents = nents; + reg->gpu_alloc->imported.alias.stride = stride; + reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents); + if (!reg->gpu_alloc->imported.alias.aliased) + goto no_aliased_array; + + kbase_gpu_vm_lock(kctx); + + /* validate and add src handles */ + for (i = 0; i < nents; i++) { + if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) { + if (ai[i].handle.basep.handle != + BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE) + goto bad_handle; /* unsupported magic handle */ + if (!ai[i].length) + goto bad_handle; /* must be > 0 */ + if (ai[i].length > stride) + goto bad_handle; /* can't be larger than the + stride */ + reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; + } else { + struct kbase_va_region *aliasing_reg; + struct kbase_mem_phy_alloc *alloc; + + aliasing_reg = kbase_region_tracker_find_region_base_address( + kctx, + (ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT); + + /* validate found region */ + if (kbase_is_region_invalid_or_free(aliasing_reg)) + goto bad_handle; /* Not found/already free */ + if (aliasing_reg->flags & KBASE_REG_DONT_NEED) + goto bad_handle; /* Ephemeral region */ + if (!(aliasing_reg->flags & KBASE_REG_GPU_CACHED)) + goto bad_handle; /* GPU uncached memory */ + if (!aliasing_reg->gpu_alloc) + goto bad_handle; /* No alloc */ + if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + goto bad_handle; /* Not a native alloc */ + if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0)) + goto bad_handle; + /* Non-coherent memory cannot alias + coherent memory, and vice versa.*/ + + /* check size against stride */ + if (!ai[i].length) + goto bad_handle; /* must be > 0 */ + if (ai[i].length > stride) + goto bad_handle; /* can't be larger than the + stride */ + + alloc = aliasing_reg->gpu_alloc; + + /* check against the alloc's size */ + if (ai[i].offset > alloc->nents) + goto bad_handle; /* beyond end */ + if (ai[i].offset + ai[i].length > alloc->nents) + goto bad_handle; /* beyond end */ + + reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc); + reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; + reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset; + } + } + +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + /* Bind to a cookie */ + if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) { + dev_err(kctx->kbdev->dev, "No cookies available for allocation!"); + goto no_cookie; + } + /* return a cookie */ + gpu_va = find_first_bit(kctx->cookies, BITS_PER_LONG); + bitmap_clear(kctx->cookies, gpu_va, 1); + BUG_ON(kctx->pending_regions[gpu_va]); + kctx->pending_regions[gpu_va] = reg; + + /* relocate to correct base */ + gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); + gpu_va <<= PAGE_SHIFT; + } else /* we control the VA */ { +#else + if (1) { +#endif + if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) { + dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU"); + goto no_mmap; + } + /* return real GPU VA */ + gpu_va = reg->start_pfn << PAGE_SHIFT; + } + + reg->flags &= ~KBASE_REG_FREE; + reg->flags &= ~KBASE_REG_GROWABLE; + + kbase_gpu_vm_unlock(kctx); + + return gpu_va; + +#ifdef CONFIG_64BIT +no_cookie: +#endif +no_mmap: +bad_handle: + kbase_gpu_vm_unlock(kctx); +no_aliased_array: +invalid_flags: + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); +no_alloc_obj: + kfree(reg); +no_reg: +bad_size: +bad_nents: +bad_stride: +bad_flags: + return 0; +} + +int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, + void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, + u64 *flags) +{ + struct kbase_va_region *reg; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(gpu_va); + KBASE_DEBUG_ASSERT(va_pages); + KBASE_DEBUG_ASSERT(flags); + + if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && + kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) + *flags |= BASE_MEM_SAME_VA; + + if (!kbase_check_import_flags(*flags)) { + dev_warn(kctx->kbdev->dev, + "kbase_mem_import called with bad flags (%llx)", + (unsigned long long)*flags); + goto bad_flags; + } + + if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && + (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { + /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ + *flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED; + } + if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && + !kbase_device_is_cpu_coherent(kctx->kbdev)) { + dev_warn(kctx->kbdev->dev, + "kbase_mem_import call required coherent mem when unavailable"); + goto bad_flags; + } + if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && + !kbase_device_is_cpu_coherent(kctx->kbdev)) { + /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ + *flags &= ~BASE_MEM_COHERENT_SYSTEM; + } + + if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) { + dev_warn(kctx->kbdev->dev, + "padding is only supported for UMM"); + goto bad_flags; + } + + switch (type) { + case BASE_MEM_IMPORT_TYPE_UMM: { + int fd; + + if (get_user(fd, (int __user *)phandle)) + reg = NULL; + else + reg = kbase_mem_from_umm(kctx, fd, va_pages, flags, + padding); + } + break; + case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { + struct base_mem_import_user_buffer user_buffer; + void __user *uptr; + + if (copy_from_user(&user_buffer, phandle, + sizeof(user_buffer))) { + reg = NULL; + } else { +#ifdef CONFIG_COMPAT + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + uptr = compat_ptr(user_buffer.ptr); + else +#endif + uptr = u64_to_user_ptr(user_buffer.ptr); + + reg = kbase_mem_from_user_buffer(kctx, + (unsigned long)uptr, user_buffer.length, + va_pages, flags); + } + break; + } + default: { + reg = NULL; + break; + } + } + + if (!reg) + goto no_reg; + + kbase_gpu_vm_lock(kctx); + + /* mmap needed to setup VA? */ + if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) { + /* Bind to a cookie */ + if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) + goto no_cookie; + /* return a cookie */ + *gpu_va = find_first_bit(kctx->cookies, BITS_PER_LONG); + bitmap_clear(kctx->cookies, *gpu_va, 1); + BUG_ON(kctx->pending_regions[*gpu_va]); + kctx->pending_regions[*gpu_va] = reg; + + /* relocate to correct base */ + *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); + *gpu_va <<= PAGE_SHIFT; + + } else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) { + /* we control the VA, mmap now to the GPU */ + if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0) + goto no_gpu_va; + /* return real GPU VA */ + *gpu_va = reg->start_pfn << PAGE_SHIFT; + } else { + /* we control the VA, but nothing to mmap yet */ + if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0) + goto no_gpu_va; + /* return real GPU VA */ + *gpu_va = reg->start_pfn << PAGE_SHIFT; + } + + /* clear out private flags */ + *flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1); + + kbase_gpu_vm_unlock(kctx); + + return 0; + +no_gpu_va: +no_cookie: + kbase_gpu_vm_unlock(kctx); + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); + kfree(reg); +no_reg: +bad_flags: + *gpu_va = 0; + *va_pages = 0; + *flags = 0; + return -ENOMEM; +} + +int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages) +{ + struct tagged_addr *phy_pages; + u64 delta = new_pages - old_pages; + int ret = 0; + + lockdep_assert_held(&kctx->reg_lock); + + /* Map the new pages into the GPU */ + phy_pages = kbase_get_gpu_phy_pages(reg); + ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn + old_pages, phy_pages + old_pages, delta, + reg->flags, kctx->as_nr, reg->gpu_alloc->group_id); + + return ret; +} + +void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages) +{ + u64 gpu_va_start = reg->start_pfn; + + if (new_pages == old_pages) + /* Nothing to do */ + return; + + unmap_mapping_range(kctx->filp->f_inode->i_mapping, + (gpu_va_start + new_pages)<kbdev, &kctx->mmu, + reg->start_pfn + new_pages, delta, kctx->as_nr); + + return ret; +} + +int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) +{ + u64 old_pages; + u64 delta = 0; + int res = -EINVAL; + struct kbase_va_region *reg; + bool read_locked = false; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(gpu_addr != 0); + + if (gpu_addr & ~PAGE_MASK) { + dev_warn(kctx->kbdev->dev, "kbase:mem_commit: gpu_addr: passed parameter is invalid"); + return -EINVAL; + } + + down_write(¤t->mm->mmap_sem); + kbase_gpu_vm_lock(kctx); + + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (kbase_is_region_invalid_or_free(reg)) + goto out_unlock; + + KBASE_DEBUG_ASSERT(reg->cpu_alloc); + KBASE_DEBUG_ASSERT(reg->gpu_alloc); + + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + goto out_unlock; + + if (0 == (reg->flags & KBASE_REG_GROWABLE)) + goto out_unlock; + + if (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) + goto out_unlock; + + /* Would overflow the VA region */ + if (new_pages > reg->nr_pages) + goto out_unlock; + + /* can't be mapped more than once on the GPU */ + if (atomic_read(®->gpu_alloc->gpu_mappings) > 1) + goto out_unlock; + /* can't grow regions which are ephemeral */ + if (reg->flags & KBASE_REG_DONT_NEED) + goto out_unlock; + +#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED + /* Reject resizing commit size */ + if (reg->flags & KBASE_REG_PF_GROW) + new_pages = reg->nr_pages; +#endif + + if (new_pages == reg->gpu_alloc->nents) { + /* no change */ + res = 0; + goto out_unlock; + } + + old_pages = kbase_reg_current_backed_size(reg); + if (new_pages > old_pages) { + delta = new_pages - old_pages; + + /* + * No update to the mm so downgrade the writer lock to a read + * lock so other readers aren't blocked after this point. + */ + downgrade_write(¤t->mm->mmap_sem); + read_locked = true; + + /* Allocate some more pages */ + if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) { + res = -ENOMEM; + goto out_unlock; + } + if (reg->cpu_alloc != reg->gpu_alloc) { + if (kbase_alloc_phy_pages_helper( + reg->gpu_alloc, delta) != 0) { + res = -ENOMEM; + kbase_free_phy_pages_helper(reg->cpu_alloc, + delta); + goto out_unlock; + } + } + + /* No update required for CPU mappings, that's done on fault. */ + + /* Update GPU mapping. */ + res = kbase_mem_grow_gpu_mapping(kctx, reg, + new_pages, old_pages); + + /* On error free the new pages */ + if (res) { + kbase_free_phy_pages_helper(reg->cpu_alloc, delta); + if (reg->cpu_alloc != reg->gpu_alloc) + kbase_free_phy_pages_helper(reg->gpu_alloc, + delta); + res = -ENOMEM; + goto out_unlock; + } + } else { + res = kbase_mem_shrink(kctx, reg, new_pages); + if (res) + res = -ENOMEM; + } + +out_unlock: + kbase_gpu_vm_unlock(kctx); + if (read_locked) + up_read(¤t->mm->mmap_sem); + else + up_write(¤t->mm->mmap_sem); + + return res; +} + +int kbase_mem_shrink(struct kbase_context *const kctx, + struct kbase_va_region *const reg, u64 const new_pages) +{ + u64 delta, old_pages; + int err; + + lockdep_assert_held(&kctx->reg_lock); + + if (WARN_ON(!kctx)) + return -EINVAL; + + if (WARN_ON(!reg)) + return -EINVAL; + + old_pages = kbase_reg_current_backed_size(reg); + if (WARN_ON(old_pages < new_pages)) + return -EINVAL; + + delta = old_pages - new_pages; + + /* Update the GPU mapping */ + err = kbase_mem_shrink_gpu_mapping(kctx, reg, + new_pages, old_pages); + if (err >= 0) { + /* Update all CPU mapping(s) */ + kbase_mem_shrink_cpu_mapping(kctx, reg, + new_pages, old_pages); + + kbase_free_phy_pages_helper(reg->cpu_alloc, delta); + if (reg->cpu_alloc != reg->gpu_alloc) + kbase_free_phy_pages_helper(reg->gpu_alloc, delta); + } + + return err; +} + + +static void kbase_cpu_vm_open(struct vm_area_struct *vma) +{ + struct kbase_cpu_mapping *map = vma->vm_private_data; + + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + /* non-atomic as we're under Linux' mm lock */ + map->count++; +} + +static void kbase_cpu_vm_close(struct vm_area_struct *vma) +{ + struct kbase_cpu_mapping *map = vma->vm_private_data; + + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + + /* non-atomic as we're under Linux' mm lock */ + if (--map->count) + return; + + KBASE_DEBUG_ASSERT(map->kctx); + KBASE_DEBUG_ASSERT(map->alloc); + + kbase_gpu_vm_lock(map->kctx); + + if (map->free_on_close) { + KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) == + KBASE_REG_ZONE_SAME_VA); + /* Avoid freeing memory on the process death which results in + * GPU Page Fault. Memory will be freed in kbase_destroy_context + */ + if (!(current->flags & PF_EXITING)) + kbase_mem_free_region(map->kctx, map->region); + } + + list_del(&map->mappings_list); + + kbase_va_region_alloc_put(map->kctx, map->region); + kbase_gpu_vm_unlock(map->kctx); + + kbase_mem_phy_alloc_put(map->alloc); + kfree(map); +} + +static struct kbase_aliased *get_aliased_alloc(struct vm_area_struct *vma, + struct kbase_va_region *reg, + pgoff_t *start_off, + size_t nr_pages) +{ + struct kbase_aliased *aliased = + reg->cpu_alloc->imported.alias.aliased; + + if (!reg->cpu_alloc->imported.alias.stride || + reg->nr_pages < (*start_off + nr_pages)) { + return NULL; + } + + while (*start_off >= reg->cpu_alloc->imported.alias.stride) { + aliased++; + *start_off -= reg->cpu_alloc->imported.alias.stride; + } + + if (!aliased->alloc) { + /* sink page not available for dumping map */ + return NULL; + } + + if ((*start_off + nr_pages) > aliased->length) { + /* not fully backed by physical pages */ + return NULL; + } + + return aliased; +} + +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) +static vm_fault_t kbase_cpu_vm_fault(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ +#else +static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; +#endif + struct kbase_cpu_mapping *map = vma->vm_private_data; + pgoff_t map_start_pgoff; + pgoff_t fault_pgoff; + size_t i; + pgoff_t addr; + size_t nents; + struct tagged_addr *pages; + vm_fault_t ret = VM_FAULT_SIGBUS; + struct memory_group_manager_device *mgm_dev; + + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + KBASE_DEBUG_ASSERT(map->kctx); + KBASE_DEBUG_ASSERT(map->alloc); + + map_start_pgoff = vma->vm_pgoff - map->region->start_pfn; + + kbase_gpu_vm_lock(map->kctx); + if (unlikely(map->region->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) { + struct kbase_aliased *aliased = + get_aliased_alloc(vma, map->region, &map_start_pgoff, 1); + + if (!aliased) + goto exit; + + nents = aliased->length; + pages = aliased->alloc->pages + aliased->offset; + } else { + nents = map->alloc->nents; + pages = map->alloc->pages; + } + + fault_pgoff = map_start_pgoff + (vmf->pgoff - vma->vm_pgoff); + + if (fault_pgoff >= nents) + goto exit; + + /* Fault on access to DONT_NEED regions */ + if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED)) + goto exit; + + /* We are inserting all valid pages from the start of CPU mapping and + * not from the fault location (the mmap handler was previously doing + * the same). + */ + i = map_start_pgoff; + addr = (pgoff_t)(vma->vm_start >> PAGE_SHIFT); + mgm_dev = map->kctx->kbdev->mgm_dev; + while (i < nents && (addr < vma->vm_end >> PAGE_SHIFT)) { + + ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, + map->alloc->group_id, vma, addr << PAGE_SHIFT, + PFN_DOWN(as_phys_addr_t(pages[i])), vma->vm_page_prot); + + if (ret != VM_FAULT_NOPAGE) + goto exit; + + i++; addr++; + } + +exit: + kbase_gpu_vm_unlock(map->kctx); + return ret; +} + +const struct vm_operations_struct kbase_vm_ops = { + .open = kbase_cpu_vm_open, + .close = kbase_cpu_vm_close, + .fault = kbase_cpu_vm_fault +}; + +static int kbase_cpu_mmap(struct kbase_context *kctx, + struct kbase_va_region *reg, + struct vm_area_struct *vma, + void *kaddr, + size_t nr_pages, + unsigned long aligned_offset, + int free_on_close) +{ + struct kbase_cpu_mapping *map; + int err = 0; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + + if (!map) { + WARN_ON(1); + err = -ENOMEM; + goto out; + } + + /* + * VM_DONTCOPY - don't make this mapping available in fork'ed processes + * VM_DONTEXPAND - disable mremap on this region + * VM_IO - disables paging + * VM_DONTDUMP - Don't include in core dumps (3.7 only) + * VM_MIXEDMAP - Support mixing struct page*s and raw pfns. + * This is needed to support using the dedicated and + * the OS based memory backends together. + */ + /* + * This will need updating to propagate coherency flags + * See MIDBASE-1057 + */ + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) + vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; +#else + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; +#endif + vma->vm_ops = &kbase_vm_ops; + vma->vm_private_data = map; + + if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS && nr_pages) { + pgoff_t rel_pgoff = vma->vm_pgoff - reg->start_pfn + + (aligned_offset >> PAGE_SHIFT); + struct kbase_aliased *aliased = + get_aliased_alloc(vma, reg, &rel_pgoff, nr_pages); + + if (!aliased) { + err = -EINVAL; + kfree(map); + goto out; + } + } + + if (!(reg->flags & KBASE_REG_CPU_CACHED) && + (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) { + /* We can't map vmalloc'd memory uncached. + * Other memory will have been returned from + * kbase_mem_pool which would be + * suitable for mapping uncached. + */ + BUG_ON(kaddr); + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + } + + if (!kaddr) { + vma->vm_flags |= VM_PFNMAP; + } else { + WARN_ON(aligned_offset); + /* MIXEDMAP so we can vfree the kaddr early and not track it after map time */ + vma->vm_flags |= VM_MIXEDMAP; + /* vmalloc remaping is easy... */ + err = remap_vmalloc_range(vma, kaddr, 0); + WARN_ON(err); + } + + if (err) { + kfree(map); + goto out; + } + + map->region = kbase_va_region_alloc_get(kctx, reg); + map->free_on_close = free_on_close; + map->kctx = kctx; + map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + map->count = 1; /* start with one ref */ + + if (reg->flags & KBASE_REG_CPU_CACHED) + map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; + + list_add(&map->mappings_list, &map->alloc->mappings); + + out: + return err; +} + +#ifdef CONFIG_MALI_VECTOR_DUMP +static void kbase_free_unused_jit_allocations(struct kbase_context *kctx) +{ + /* Free all cached/unused JIT allocations as their contents are not + * really needed for the replay. The GPU writes to them would already + * have been captured through the GWT mechanism. + * This considerably reduces the size of mmu-snapshot-file and it also + * helps avoid segmentation fault issue during vector dumping of + * complex contents when the unused JIT allocations are accessed to + * dump their contents (as they appear in the page tables snapshot) + * but they got freed by the shrinker under low memory scenarios + * (which do occur with complex contents). + */ + while (kbase_jit_evict(kctx)) + ; +} +#endif + +static int kbase_mmu_dump_mmap(struct kbase_context *kctx, + struct vm_area_struct *vma, + struct kbase_va_region **const reg, + void **const kmap_addr) +{ + struct kbase_va_region *new_reg; + void *kaddr; + u32 nr_pages; + size_t size; + int err = 0; + + dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n"); + size = (vma->vm_end - vma->vm_start); + nr_pages = size >> PAGE_SHIFT; + +#ifdef CONFIG_MALI_VECTOR_DUMP + kbase_free_unused_jit_allocations(kctx); +#endif + + kaddr = kbase_mmu_dump(kctx, nr_pages); + + if (!kaddr) { + err = -ENOMEM; + goto out; + } + + new_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, nr_pages, + KBASE_REG_ZONE_SAME_VA); + if (!new_reg) { + err = -ENOMEM; + WARN_ON(1); + goto out; + } + + new_reg->cpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW, + BASE_MEM_GROUP_DEFAULT); + if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) { + err = -ENOMEM; + new_reg->cpu_alloc = NULL; + WARN_ON(1); + goto out_no_alloc; + } + + new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc); + + new_reg->flags &= ~KBASE_REG_FREE; + new_reg->flags |= KBASE_REG_CPU_CACHED; + if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) { + err = -ENOMEM; + WARN_ON(1); + goto out_va_region; + } + + *kmap_addr = kaddr; + *reg = new_reg; + + dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n"); + return 0; + +out_no_alloc: +out_va_region: + kbase_free_alloced_region(new_reg); +out: + return err; +} + + +void kbase_os_mem_map_lock(struct kbase_context *kctx) +{ + struct mm_struct *mm = current->mm; + (void)kctx; + down_read(&mm->mmap_sem); +} + +void kbase_os_mem_map_unlock(struct kbase_context *kctx) +{ + struct mm_struct *mm = current->mm; + (void)kctx; + up_read(&mm->mmap_sem); +} + +static int kbasep_reg_mmap(struct kbase_context *kctx, + struct vm_area_struct *vma, + struct kbase_va_region **regm, + size_t *nr_pages, size_t *aligned_offset) + +{ + int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); + struct kbase_va_region *reg; + int err = 0; + + *aligned_offset = 0; + + dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n"); + + /* SAME_VA stuff, fetch the right region */ + reg = kctx->pending_regions[cookie]; + if (!reg) { + err = -ENOMEM; + goto out; + } + + if ((reg->flags & KBASE_REG_GPU_NX) && (reg->nr_pages != *nr_pages)) { + /* incorrect mmap size */ + /* leave the cookie for a potential later + * mapping, or to be reclaimed later when the + * context is freed */ + err = -ENOMEM; + goto out; + } + + if ((vma->vm_flags & VM_READ && !(reg->flags & KBASE_REG_CPU_RD)) || + (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) { + /* VM flags inconsistent with region flags */ + err = -EPERM; + dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n", + __FILE__, __LINE__); + goto out; + } + + /* adjust down nr_pages to what we have physically */ + *nr_pages = kbase_reg_current_backed_size(reg); + + if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset, + reg->nr_pages, 1) != 0) { + dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__); + /* Unable to map in GPU space. */ + WARN_ON(1); + err = -ENOMEM; + goto out; + } + /* no need for the cookie anymore */ + kctx->pending_regions[cookie] = NULL; + bitmap_set(kctx->cookies, cookie, 1); + + /* + * Overwrite the offset with the region start_pfn, so we effectively + * map from offset 0 in the region. However subtract the aligned + * offset so that when user space trims the mapping the beginning of + * the trimmed VMA has the correct vm_pgoff; + */ + vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT); +out: + *regm = reg; + dev_dbg(kctx->kbdev->dev, "kbasep_reg_mmap done\n"); + + return err; +} + +int kbase_context_mmap(struct kbase_context *const kctx, + struct vm_area_struct *const vma) +{ + struct kbase_va_region *reg = NULL; + void *kaddr = NULL; + size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int err = 0; + int free_on_close = 0; + struct device *dev = kctx->kbdev->dev; + size_t aligned_offset = 0; + + dev_dbg(dev, "kbase_mmap\n"); + + if (!(vma->vm_flags & VM_READ)) + vma->vm_flags &= ~VM_MAYREAD; + if (!(vma->vm_flags & VM_WRITE)) + vma->vm_flags &= ~VM_MAYWRITE; + + if (0 == nr_pages) { + err = -EINVAL; + goto out; + } + + if (!(vma->vm_flags & VM_SHARED)) { + err = -EINVAL; + goto out; + } + + kbase_gpu_vm_lock(kctx); + + if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) { + /* The non-mapped tracking helper page */ + err = kbase_tracking_page_setup(kctx, vma); + goto out_unlock; + } + + /* if not the MTP, verify that the MTP has been mapped */ + rcu_read_lock(); + /* catches both when the special page isn't present or + * when we've forked */ + if (rcu_dereference(kctx->process_mm) != current->mm) { + err = -EINVAL; + rcu_read_unlock(); + goto out_unlock; + } + rcu_read_unlock(); + + switch (vma->vm_pgoff) { + case PFN_DOWN(BASEP_MEM_INVALID_HANDLE): + case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE): + /* Illegal handle for direct map */ + err = -EINVAL; + goto out_unlock; + case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): + /* MMU dump */ + err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); + if (0 != err) + goto out_unlock; + /* free the region on munmap */ + free_on_close = 1; + break; + case PFN_DOWN(BASE_MEM_COOKIE_BASE) ... + PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: { + err = kbasep_reg_mmap(kctx, vma, ®, &nr_pages, + &aligned_offset); + if (0 != err) + goto out_unlock; + /* free the region on munmap */ + free_on_close = 1; + break; + } + default: { + reg = kbase_region_tracker_find_region_enclosing_address(kctx, + (u64)vma->vm_pgoff << PAGE_SHIFT); + + if (!kbase_is_region_invalid_or_free(reg)) { + /* will this mapping overflow the size of the region? */ + if (nr_pages > (reg->nr_pages - + (vma->vm_pgoff - reg->start_pfn))) { + err = -ENOMEM; + goto out_unlock; + } + + if ((vma->vm_flags & VM_READ && + !(reg->flags & KBASE_REG_CPU_RD)) || + (vma->vm_flags & VM_WRITE && + !(reg->flags & KBASE_REG_CPU_WR))) { + /* VM flags inconsistent with region flags */ + err = -EPERM; + dev_err(dev, "%s:%d inconsistent VM flags\n", + __FILE__, __LINE__); + goto out_unlock; + } + + if (KBASE_MEM_TYPE_IMPORTED_UMM == + reg->cpu_alloc->type) { + if (0 != (vma->vm_pgoff - reg->start_pfn)) { + err = -EINVAL; + dev_warn(dev, "%s:%d attempt to do a partial map in a dma_buf: non-zero offset to dma_buf mapping!\n", + __FILE__, __LINE__); + goto out_unlock; + } + err = dma_buf_mmap( + reg->cpu_alloc->imported.umm.dma_buf, + vma, vma->vm_pgoff - reg->start_pfn); + goto out_unlock; + } + + if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { + /* initial params check for aliased dumping map */ + if (nr_pages > reg->gpu_alloc->imported.alias.stride || + !reg->gpu_alloc->imported.alias.stride || + !nr_pages) { + err = -EINVAL; + dev_warn(dev, "mmap aliased: invalid params!\n"); + goto out_unlock; + } + } + else if (reg->cpu_alloc->nents < + (vma->vm_pgoff - reg->start_pfn + nr_pages)) { + /* limit what we map to the amount currently backed */ + if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents) + nr_pages = 0; + else + nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn); + } + } else { + err = -ENOMEM; + goto out_unlock; + } + } /* default */ + } /* switch */ + + err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset, + free_on_close); + + if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { + /* MMU dump - userspace should now have a reference on + * the pages, so we can now free the kernel mapping */ + vfree(kaddr); + } + +out_unlock: + kbase_gpu_vm_unlock(kctx); +out: + if (err) + dev_err(dev, "mmap failed %d\n", err); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_context_mmap); + +void kbase_sync_mem_regions(struct kbase_context *kctx, + struct kbase_vmap_struct *map, enum kbase_sync_type dest) +{ + size_t i; + off_t const offset = map->offset_in_page; + size_t const page_count = PFN_UP(offset + map->size); + + /* Sync first page */ + size_t sz = MIN(((size_t) PAGE_SIZE - offset), map->size); + struct tagged_addr cpu_pa = map->cpu_pages[0]; + struct tagged_addr gpu_pa = map->gpu_pages[0]; + + kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, dest); + + /* Sync middle pages (if any) */ + for (i = 1; page_count > 2 && i < page_count - 1; i++) { + cpu_pa = map->cpu_pages[i]; + gpu_pa = map->gpu_pages[i]; + kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, dest); + } + + /* Sync last page (if any) */ + if (page_count > 1) { + cpu_pa = map->cpu_pages[page_count - 1]; + gpu_pa = map->gpu_pages[page_count - 1]; + sz = ((offset + map->size - 1) & ~PAGE_MASK) + 1; + kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, dest); + } +} + +static int kbase_vmap_phy_pages(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 offset_bytes, size_t size, + struct kbase_vmap_struct *map) +{ + unsigned long page_index; + unsigned int offset_in_page = offset_bytes & ~PAGE_MASK; + size_t page_count = PFN_UP(offset_in_page + size); + struct tagged_addr *page_array; + struct page **pages; + void *cpu_addr = NULL; + pgprot_t prot; + size_t i; + + if (!size || !map || !reg->cpu_alloc || !reg->gpu_alloc) + return -EINVAL; + + /* check if page_count calculation will wrap */ + if (size > ((size_t)-1 / PAGE_SIZE)) + return -EINVAL; + + page_index = offset_bytes >> PAGE_SHIFT; + + /* check if page_index + page_count will wrap */ + if (-1UL - page_count < page_index) + return -EINVAL; + + if (page_index + page_count > kbase_reg_current_backed_size(reg)) + return -ENOMEM; + + if (reg->flags & KBASE_REG_DONT_NEED) + return -EINVAL; + + prot = PAGE_KERNEL; + if (!(reg->flags & KBASE_REG_CPU_CACHED)) { + /* Map uncached */ + prot = pgprot_writecombine(prot); + } + + page_array = kbase_get_cpu_phy_pages(reg); + if (!page_array) + return -ENOMEM; + + pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + for (i = 0; i < page_count; i++) + pages[i] = as_page(page_array[page_index + i]); + + /* Note: enforcing a RO prot_request onto prot is not done, since: + * - CPU-arch-specific integration required + * - kbase_vmap() requires no access checks to be made/enforced */ + + cpu_addr = vmap(pages, page_count, VM_MAP, prot); + + kfree(pages); + + if (!cpu_addr) + return -ENOMEM; + + map->offset_in_page = offset_in_page; + map->cpu_alloc = reg->cpu_alloc; + map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index]; + map->gpu_alloc = reg->gpu_alloc; + map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index]; + map->addr = (void *)((uintptr_t)cpu_addr + offset_in_page); + map->size = size; + map->sync_needed = ((reg->flags & KBASE_REG_CPU_CACHED) != 0) && + !kbase_mem_is_imported(map->gpu_alloc->type); + + if (map->sync_needed) + kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU); + + return 0; +} + +void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, + unsigned long prot_request, struct kbase_vmap_struct *map) +{ + struct kbase_va_region *reg; + void *addr = NULL; + u64 offset_bytes; + struct kbase_mem_phy_alloc *cpu_alloc; + struct kbase_mem_phy_alloc *gpu_alloc; + int err; + + kbase_gpu_vm_lock(kctx); + + reg = kbase_region_tracker_find_region_enclosing_address(kctx, + gpu_addr); + if (kbase_is_region_invalid_or_free(reg)) + goto out_unlock; + + /* check access permissions can be satisfied + * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} + */ + if ((reg->flags & prot_request) != prot_request) + goto out_unlock; + + offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT); + cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map); + if (err < 0) + goto fail_vmap_phy_pages; + + addr = map->addr; + +out_unlock: + kbase_gpu_vm_unlock(kctx); + return addr; + +fail_vmap_phy_pages: + kbase_gpu_vm_unlock(kctx); + kbase_mem_phy_alloc_put(cpu_alloc); + kbase_mem_phy_alloc_put(gpu_alloc); + + return NULL; +} + +void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, + struct kbase_vmap_struct *map) +{ + /* 0 is specified for prot_request to indicate no access checks should + * be made. + * + * As mentioned in kbase_vmap_prot() this means that a kernel-side + * CPU-RO mapping is not enforced to allow this to work */ + return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map); +} +KBASE_EXPORT_TEST_API(kbase_vmap); + +static void kbase_vunmap_phy_pages(struct kbase_context *kctx, + struct kbase_vmap_struct *map) +{ + void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK); + vunmap(addr); + + if (map->sync_needed) + kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE); + + map->offset_in_page = 0; + map->cpu_pages = NULL; + map->gpu_pages = NULL; + map->addr = NULL; + map->size = 0; + map->sync_needed = false; +} + +void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) +{ + kbase_vunmap_phy_pages(kctx, map); + map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc); + map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc); +} +KBASE_EXPORT_TEST_API(kbase_vunmap); + +static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value) +{ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0)) + /* To avoid the build breakage due to an unexported kernel symbol + * 'mm_trace_rss_stat' from later kernels, i.e. from V4.19.0 onwards, + * we inline here the equivalent of 'add_mm_counter()' from linux + * kernel V5.4.0~8. + */ + atomic_long_add(value, &mm->rss_stat.count[member]); +#else + add_mm_counter(mm, member, value); +#endif +} + +void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) +{ + struct mm_struct *mm; + + rcu_read_lock(); + mm = rcu_dereference(kctx->process_mm); + if (mm) { + atomic_add(pages, &kctx->nonmapped_pages); +#ifdef SPLIT_RSS_COUNTING + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); +#else + spin_lock(&mm->page_table_lock); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); + spin_unlock(&mm->page_table_lock); +#endif + } + rcu_read_unlock(); +} + +static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx) +{ + int pages; + struct mm_struct *mm; + + spin_lock(&kctx->mm_update_lock); + mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock)); + if (!mm) { + spin_unlock(&kctx->mm_update_lock); + return; + } + + rcu_assign_pointer(kctx->process_mm, NULL); + spin_unlock(&kctx->mm_update_lock); + synchronize_rcu(); + + pages = atomic_xchg(&kctx->nonmapped_pages, 0); +#ifdef SPLIT_RSS_COUNTING + kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); +#else + spin_lock(&mm->page_table_lock); + kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); + spin_unlock(&mm->page_table_lock); +#endif +} + +static void kbase_special_vm_close(struct vm_area_struct *vma) +{ + struct kbase_context *kctx; + + kctx = vma->vm_private_data; + kbasep_os_process_page_usage_drain(kctx); +} + +static const struct vm_operations_struct kbase_vm_special_ops = { + .close = kbase_special_vm_close, +}; + +static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) +{ + /* check that this is the only tracking page */ + spin_lock(&kctx->mm_update_lock); + if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) { + spin_unlock(&kctx->mm_update_lock); + return -EFAULT; + } + + rcu_assign_pointer(kctx->process_mm, current->mm); + + spin_unlock(&kctx->mm_update_lock); + + /* no real access */ + vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; +#else + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; +#endif + vma->vm_ops = &kbase_vm_special_ops; + vma->vm_private_data = kctx; + + return 0; +} + diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.h b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.h new file mode 100644 index 000000000000..0febd3059e7c --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.h @@ -0,0 +1,467 @@ +/* + * + * (C) COPYRIGHT 2010, 2012-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_mem_linux.h + * Base kernel memory APIs, Linux implementation. + */ + +#ifndef _KBASE_MEM_LINUX_H_ +#define _KBASE_MEM_LINUX_H_ + +/** A HWC dump mapping */ +struct kbase_hwc_dma_mapping { + void *cpu_va; + dma_addr_t dma_pa; + size_t size; +}; + +/* MALI_SEC_INTEGRATION */ +struct kbase_mem_phy_alloc; + +/** + * kbase_mem_alloc - Create a new allocation for GPU + * + * @kctx: The kernel context + * @va_pages: The number of pages of virtual address space to reserve + * @commit_pages: The number of physical pages to allocate upfront + * @extent: The number of extra pages to allocate on each GPU fault which + * grows the region. + * @flags: bitmask of BASE_MEM_* flags to convey special requirements & + * properties for the new allocation. + * @gpu_va: Start address of the memory region which was allocated from GPU + * virtual address space. + * + * Return: 0 on success or error code + */ +struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, + u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, + u64 *gpu_va); + +/** + * kbase_mem_query - Query properties of a GPU memory region + * + * @kctx: The kernel context + * @gpu_addr: A GPU address contained within the memory region + * @query: The type of query, from KBASE_MEM_QUERY_* flags, which could be + * regarding the amount of backing physical memory allocated so far + * for the region or the size of the region or the flags associated + * with the region. + * @out: Pointer to the location to store the result of query. + * + * Return: 0 on success or error code + */ +int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, + u64 *const out); + +/** + * kbase_mem_import - Import the external memory for use by the GPU + * + * @kctx: The kernel context + * @type: Type of external memory + * @phandle: Handle to the external memory interpreted as per the type. + * @padding: Amount of extra VA pages to append to the imported buffer + * @gpu_va: GPU address assigned to the imported external memory + * @va_pages: Size of the memory region reserved from the GPU address space + * @flags: bitmask of BASE_MEM_* flags to convey special requirements & + * properties for the new allocation representing the external + * memory. + * Return: 0 on success or error code + */ +int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, + void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, + u64 *flags); + +/** + * kbase_mem_alias - Create a new allocation for GPU, aliasing one or more + * memory regions + * + * @kctx: The kernel context + * @flags: bitmask of BASE_MEM_* flags. + * @stride: Bytes between start of each memory region + * @nents: The number of regions to pack together into the alias + * @ai: Pointer to the struct containing the memory aliasing info + * @num_pages: Number of pages the alias will cover + * + * Return: 0 on failure or otherwise the GPU VA for the alias + */ +u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages); + +/** + * kbase_mem_flags_change - Change the flags for a memory region + * + * @kctx: The kernel context + * @gpu_addr: A GPU address contained within the memory region to modify. + * @flags: The new flags to set + * @mask: Mask of the flags, from BASE_MEM_*, to modify. + * + * Return: 0 on success or error code + */ +int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask); + +/** + * kbase_mem_commit - Change the physical backing size of a region + * + * @kctx: The kernel context + * @gpu_addr: Handle to the memory region + * @new_pages: Number of physical pages to back the region with + * + * Return: 0 on success or error code + */ +int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages); + +/** + * kbase_mem_shrink - Shrink the physical backing size of a region + * + * @kctx: The kernel context + * @reg: The GPU region + * @new_pages: Number of physical pages to back the region with + * + * Return: 0 on success or error code + */ +int kbase_mem_shrink(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 new_pages); + +/** + * kbase_context_mmap - Memory map method, gets invoked when mmap system call is + * issued on device file /dev/malixx. + * @kctx: The kernel context + * @vma: Pointer to the struct containing the info where the GPU allocation + * will be mapped in virtual address space of CPU. + * + * Return: 0 on success or error code + */ +int kbase_context_mmap(struct kbase_context *kctx, struct vm_area_struct *vma); + +/** + * kbase_mem_evictable_init - Initialize the Ephemeral memory eviction + * mechanism. + * @kctx: The kbase context to initialize. + * + * Return: Zero on success or -errno on failure. + */ +int kbase_mem_evictable_init(struct kbase_context *kctx); + +/** + * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction + * mechanism. + * @kctx: The kbase context to de-initialize. + */ +void kbase_mem_evictable_deinit(struct kbase_context *kctx); + +/** + * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation + * @kctx: Context the region belongs to + * @reg: The GPU region + * @new_pages: The number of pages after the grow + * @old_pages: The number of pages before the grow + * + * Return: 0 on success, -errno on error. + * + * Expand the GPU mapping to encompass the new psychical pages which have + * been added to the allocation. + * + * Note: Caller must be holding the region lock. + */ +int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + +/** + * kbase_mem_evictable_make - Make a physical allocation eligible for eviction + * @gpu_alloc: The physical allocation to make evictable + * + * Return: 0 on success, -errno on error. + * + * Take the provided region and make all the physical pages within it + * reclaimable by the kernel, updating the per-process VM stats as well. + * Remove any CPU mappings (as these can't be removed in the shrinker callback + * as mmap_sem might already be taken) but leave the GPU mapping intact as + * and until the shrinker reclaims the allocation. + * + * Note: Must be called with the region lock of the containing context. + */ +int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc); + +/** + * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for + * eviction. + * @alloc: The physical allocation to remove eviction eligibility from. + * + * Return: True if the allocation had its backing restored and false if + * it hasn't. + * + * Make the physical pages in the region no longer reclaimable and update the + * per-process stats, if the shrinker has already evicted the memory then + * re-allocate it if the region is still alive. + * + * Note: Must be called with the region lock of the containing context. + */ +bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc); + +struct kbase_vmap_struct { + off_t offset_in_page; + struct kbase_mem_phy_alloc *cpu_alloc; + struct kbase_mem_phy_alloc *gpu_alloc; + struct tagged_addr *cpu_pages; + struct tagged_addr *gpu_pages; + void *addr; + size_t size; + bool sync_needed; +}; + + +/** + * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the + * requested access permissions are supported + * @kctx: Context the VA range belongs to + * @gpu_addr: Start address of VA range + * @size: Size of VA range + * @prot_request: Flags indicating how the caller will then access the memory + * @map: Structure to be given to kbase_vunmap() on freeing + * + * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error + * + * Map a GPU VA Range into the kernel. The VA range must be contained within a + * GPU memory region. Appropriate CPU cache-flushing operations are made as + * required, dependent on the CPU mapping for the memory region. + * + * This is safer than using kmap() on the pages directly, + * because the pages here are refcounted to prevent freeing (and hence reuse + * elsewhere in the system) until an kbase_vunmap() + * + * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check + * whether the region should allow the intended access, and return an error if + * disallowed. This is essential for security of imported memory, particularly + * a user buf from SHM mapped into the process as RO. In that case, write + * access must be checked if the intention is for kernel to write to the + * memory. + * + * The checks are also there to help catch access errors on memory where + * security is not a concern: imported memory that is always RW, and memory + * that was allocated and owned by the process attached to @kctx. In this case, + * it helps to identify memory that was was mapped with the wrong access type. + * + * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases + * where either the security of memory is solely dependent on those flags, or + * when userspace code was expecting only the GPU to access the memory (e.g. HW + * workarounds). + * + * All cache maintenance operations shall be ignored if the + * memory region has been imported. + * + */ +void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, + unsigned long prot_request, struct kbase_vmap_struct *map); + +/** + * kbase_vmap - Map a GPU VA range into the kernel safely + * @kctx: Context the VA range belongs to + * @gpu_addr: Start address of VA range + * @size: Size of VA range + * @map: Structure to be given to kbase_vunmap() on freeing + * + * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error + * + * Map a GPU VA Range into the kernel. The VA range must be contained within a + * GPU memory region. Appropriate CPU cache-flushing operations are made as + * required, dependent on the CPU mapping for the memory region. + * + * This is safer than using kmap() on the pages directly, + * because the pages here are refcounted to prevent freeing (and hence reuse + * elsewhere in the system) until an kbase_vunmap() + * + * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no + * checks to ensure the security of e.g. imported user bufs from RO SHM. + * + * Note: All cache maintenance operations shall be ignored if the memory region + * has been imported. + */ +void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, + struct kbase_vmap_struct *map); + +/** + * kbase_vunmap - Unmap a GPU VA range from the kernel + * @kctx: Context the VA range belongs to + * @map: Structure describing the mapping from the corresponding kbase_vmap() + * call + * + * Unmaps a GPU VA range from the kernel, given its @map structure obtained + * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as + * required, dependent on the CPU mapping for the memory region. + * + * The reference taken on pages during kbase_vmap() is released. + * + * Note: All cache maintenance operations shall be ignored if the memory region + * has been imported. + */ +void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map); + +extern const struct vm_operations_struct kbase_vm_ops; + +/** + * kbase_sync_mem_regions - Perform the cache maintenance for the kernel mode + * CPU mapping. + * @kctx: Context the CPU mapping belongs to. + * @map: Structure describing the CPU mapping, setup previously by the + * kbase_vmap() call. + * @dest: Indicates the type of maintenance required (i.e. flush or invalidate) + * + * Note: The caller shall ensure that CPU mapping is not revoked & remains + * active whilst the maintenance is in progress. + */ +void kbase_sync_mem_regions(struct kbase_context *kctx, + struct kbase_vmap_struct *map, enum kbase_sync_type dest); + +/** + * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation + * @kctx: Context the region belongs to + * @reg: The GPU region + * @new_pages: The number of pages after the shrink + * @old_pages: The number of pages before the shrink + * + * Shrink (or completely remove) all CPU mappings which reference the shrunk + * part of the allocation. + */ +void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + +/** + * kbase_phy_alloc_mapping_term - Terminate the kernel side mapping of a + * physical allocation + * @kctx: The kernel base context associated with the mapping + * @alloc: Pointer to the allocation to terminate + * + * This function will unmap the kernel mapping, and free any structures used to + * track it. + */ +void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc); + +/** + * kbase_phy_alloc_mapping_get - Get a kernel-side CPU pointer to the permanent + * mapping of a physical allocation + * @kctx: The kernel base context @gpu_addr will be looked up in + * @gpu_addr: The gpu address to lookup for the kernel-side CPU mapping + * @out_kern_mapping: Pointer to storage for a struct kbase_vmap_struct pointer + * which will be used for a call to + * kbase_phy_alloc_mapping_put() + * + * Return: Pointer to a kernel-side accessible location that directly + * corresponds to @gpu_addr, or NULL on failure + * + * Looks up @gpu_addr to retrieve the CPU pointer that can be used to access + * that location kernel-side. Only certain kinds of memory have a permanent + * kernel mapping, refer to the internal functions + * kbase_reg_needs_kernel_mapping() and kbase_phy_alloc_mapping_init() for more + * information. + * + * If this function succeeds, a CPU access to the returned pointer will access + * the actual location represented by @gpu_addr. That is, the return value does + * not require any offset added to it to access the location specified in + * @gpu_addr + * + * The client must take care to either apply any necessary sync operations when + * accessing the data, or ensure that the enclosing region was coherent with + * the GPU, or uncached in the CPU. + * + * The refcount on the physical allocations backing the region are taken, so + * that they do not disappear whilst the client is accessing it. Once the + * client has finished accessing the memory, it must be released with a call to + * kbase_phy_alloc_mapping_put() + * + * Whilst this is expected to execute quickly (the mapping was already setup + * when the physical allocation was created), the call is not IRQ-safe due to + * the region lookup involved. + * + * An error code may indicate that: + * - a userside process has freed the allocation, and so @gpu_addr is no longer + * valid + * - the region containing @gpu_addr does not support a permanent kernel mapping + */ +void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, u64 gpu_addr, + struct kbase_vmap_struct **out_kern_mapping); + +/** + * kbase_phy_alloc_mapping_put - Put a reference to the kernel-side mapping of a + * physical allocation + * @kctx: The kernel base context associated with the mapping + * @kern_mapping: Pointer to a struct kbase_phy_alloc_mapping pointer obtained + * from a call to kbase_phy_alloc_mapping_get() + * + * Releases the reference to the allocations backing @kern_mapping that was + * obtained through a call to kbase_phy_alloc_mapping_get(). This must be used + * when the client no longer needs to access the kernel-side CPU pointer. + * + * If this was the last reference on the underlying physical allocations, they + * will go through the normal allocation free steps, which also includes an + * unmap of the permanent kernel mapping for those allocations. + * + * Due to these operations, the function is not IRQ-safe. However it is + * expected to execute quickly in the normal case, i.e. when the region holding + * the physical allocation is still present. + */ +void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, + struct kbase_vmap_struct *kern_mapping); + +/** + * kbase_get_cache_line_alignment - Return cache line alignment + * + * Helper function to return the maximum cache line alignment considering + * both CPU and GPU cache sizes. + * + * Return: CPU and GPU cache line alignment, in bytes. + * + * @kbdev: Device pointer. + */ +u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev); + +#if (KERNEL_VERSION(4, 20, 0) > LINUX_VERSION_CODE) +static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, pgprot_t pgprot) +{ + int err; + +#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ + ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ + (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) + if (pgprot_val(pgprot) != pgprot_val(vma->vm_page_prot)) + return VM_FAULT_SIGBUS; + + err = vm_insert_pfn(vma, addr, pfn); +#else + err = vm_insert_pfn_prot(vma, addr, pfn, pgprot); +#endif + + if (unlikely(err == -ENOMEM)) + return VM_FAULT_OOM; + if (unlikely(err < 0 && err != -EBUSY)) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} +#endif + +#endif /* _KBASE_MEM_LINUX_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_lowlevel.h new file mode 100644 index 000000000000..70116030f233 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_lowlevel.h @@ -0,0 +1,166 @@ +/* + * + * (C) COPYRIGHT 2012-2014,2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#ifndef _KBASE_MEM_LOWLEVEL_H +#define _KBASE_MEM_LOWLEVEL_H + +#ifndef _KBASE_H_ +#error "Don't include this file directly, use mali_kbase.h instead" +#endif + +#include + +/** + * @brief Flags for kbase_phy_allocator_pages_alloc + */ +#define KBASE_PHY_PAGES_FLAG_DEFAULT (0) /** Default allocation flag */ +#define KBASE_PHY_PAGES_FLAG_CLEAR (1 << 0) /** Clear the pages after allocation */ +#define KBASE_PHY_PAGES_FLAG_POISON (1 << 1) /** Fill the memory with a poison value */ + +#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON) + +#define KBASE_PHY_PAGES_POISON_VALUE 0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */ + +enum kbase_sync_type { + KBASE_SYNC_TO_CPU, + KBASE_SYNC_TO_DEVICE +}; + +struct tagged_addr { phys_addr_t tagged_addr; }; + +#define HUGE_PAGE (1u << 0) +#define HUGE_HEAD (1u << 1) +#define FROM_PARTIAL (1u << 2) + +/* + * Note: if macro for converting physical address to page is not defined + * in the kernel itself, it is defined hereby. This is to avoid build errors + * which are reported during builds for some architectures. + */ +#ifndef phys_to_page +#define phys_to_page(phys) (pfn_to_page((phys) >> PAGE_SHIFT)) +#endif + +/** + * as_phys_addr_t - Retrieve the physical address from tagged address by + * masking the lower order 12 bits. + * @t: tagged address to be translated. + * + * Return: physical address corresponding to tagged address. + */ +static inline phys_addr_t as_phys_addr_t(struct tagged_addr t) +{ + return t.tagged_addr & PAGE_MASK; +} + +/** + * as_page - Retrieve the struct page from a tagged address + * @t: tagged address to be translated. + * + * Return: pointer to struct page corresponding to tagged address. + */ +static inline struct page *as_page(struct tagged_addr t) +{ + return phys_to_page(as_phys_addr_t(t)); +} + +/** + * as_tagged - Convert the physical address to tagged address type though + * there is no tag info present, the lower order 12 bits will be 0 + * @phys: physical address to be converted to tagged type + * + * This is used for 4KB physical pages allocated by the Driver or imported pages + * and is needed as physical pages tracking object stores the reference for + * physical pages using tagged address type in lieu of the type generally used + * for physical addresses. + * + * Return: address of tagged address type. + */ +static inline struct tagged_addr as_tagged(phys_addr_t phys) +{ + struct tagged_addr t; + + t.tagged_addr = phys & PAGE_MASK; + return t; +} + +/** + * as_tagged_tag - Form the tagged address by storing the tag or metadata in the + * lower order 12 bits of physial address + * @phys: physical address to be converted to tagged address + * @tag: tag to be stored along with the physical address. + * + * The tag info is used while freeing up the pages + * + * Return: tagged address storing physical address & tag. + */ +static inline struct tagged_addr as_tagged_tag(phys_addr_t phys, int tag) +{ + struct tagged_addr t; + + t.tagged_addr = (phys & PAGE_MASK) | (tag & ~PAGE_MASK); + return t; +} + +/** + * is_huge - Check if the physical page is one of the 512 4KB pages of the + * large page which was not split to be used partially + * @t: tagged address storing the tag in the lower order bits. + * + * Return: true if page belongs to large page, or false + */ +static inline bool is_huge(struct tagged_addr t) +{ + return t.tagged_addr & HUGE_PAGE; +} + +/** + * is_huge_head - Check if the physical page is the first 4KB page of the + * 512 4KB pages within a large page which was not split + * to be used partially + * @t: tagged address storing the tag in the lower order bits. + * + * Return: true if page is the first page of a large page, or false + */ +static inline bool is_huge_head(struct tagged_addr t) +{ + int mask = HUGE_HEAD | HUGE_PAGE; + + return mask == (t.tagged_addr & mask); +} + +/** + * is_partial - Check if the physical page is one of the 512 pages of the + * large page which was split in 4KB pages to be used + * partially for allocations >= 2 MB in size. + * @t: tagged address storing the tag in the lower order bits. + * + * Return: true if page was taken from large page used partially, or false + */ +static inline bool is_partial(struct tagged_addr t) +{ + return t.tagged_addr & FROM_PARTIAL; +} + +#endif /* _KBASE_LOWLEVEL_H */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c new file mode 100644 index 000000000000..0723e32e2003 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c @@ -0,0 +1,856 @@ +/* + * + * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define pool_dbg(pool, format, ...) \ + dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \ + (pool->next_pool) ? "kctx" : "kbdev", \ + kbase_mem_pool_size(pool), \ + kbase_mem_pool_max_size(pool), \ + ##__VA_ARGS__) + +#define NOT_DIRTY false +#define NOT_RECLAIMED false + +static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool) +{ + ssize_t max_size = kbase_mem_pool_max_size(pool); + ssize_t cur_size = kbase_mem_pool_size(pool); + + return max(max_size - cur_size, (ssize_t)0); +} + +static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool) +{ + return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool); +} + +static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool) +{ + return kbase_mem_pool_size(pool) == 0; +} + +static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, + struct page *p) +{ + lockdep_assert_held(&pool->pool_lock); + + list_add(&p->lru, &pool->page_list); + pool->cur_size++; + + pool_dbg(pool, "added page\n"); +} + +static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p) +{ + kbase_mem_pool_lock(pool); + kbase_mem_pool_add_locked(pool, p); + kbase_mem_pool_unlock(pool); +} + +static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, + struct list_head *page_list, size_t nr_pages) +{ + lockdep_assert_held(&pool->pool_lock); + + list_splice(page_list, &pool->page_list); + pool->cur_size += nr_pages; + + pool_dbg(pool, "added %zu pages\n", nr_pages); +} + +static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool, + struct list_head *page_list, size_t nr_pages) +{ + kbase_mem_pool_lock(pool); + kbase_mem_pool_add_list_locked(pool, page_list, nr_pages); + kbase_mem_pool_unlock(pool); +} + +static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool) +{ + struct page *p; + + lockdep_assert_held(&pool->pool_lock); + + if (kbase_mem_pool_is_empty(pool)) + return NULL; + + p = list_first_entry(&pool->page_list, struct page, lru); + list_del_init(&p->lru); + pool->cur_size--; + + pool_dbg(pool, "removed page\n"); + + return p; +} + +static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool) +{ + struct page *p; + + kbase_mem_pool_lock(pool); + p = kbase_mem_pool_remove_locked(pool); + kbase_mem_pool_unlock(pool); + + return p; +} + +static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool, + struct page *p) +{ + struct device *dev = pool->kbdev->dev; + dma_sync_single_for_device(dev, kbase_dma_addr(p), + (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL); +} + +static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool, + struct page *p) +{ + int i; + + for (i = 0; i < (1U << pool->order); i++) + clear_highpage(p+i); + + kbase_mem_pool_sync_page(pool, p); +} + +static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, + struct page *p) +{ + /* Zero page before spilling */ + kbase_mem_pool_zero_page(next_pool, p); + + kbase_mem_pool_add(next_pool, p); +} + +struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) +{ + struct page *p; + gfp_t gfp; + struct kbase_device *const kbdev = pool->kbdev; + struct device *const dev = kbdev->dev; + dma_addr_t dma_addr; + int i; + +#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) + /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */ + gfp = GFP_USER | __GFP_ZERO; +#else + gfp = GFP_HIGHUSER | __GFP_ZERO; +#endif + + /* don't warn on higher order failures */ + if (pool->order) + gfp |= __GFP_NOWARN; + + p = kbdev->mgm_dev->ops.mgm_alloc_page(kbdev->mgm_dev, + pool->group_id, gfp, pool->order); + if (!p) + return NULL; + + dma_addr = dma_map_page(dev, p, 0, (PAGE_SIZE << pool->order), + DMA_BIDIRECTIONAL); + + if (dma_mapping_error(dev, dma_addr)) { + kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, + pool->group_id, p, pool->order); + return NULL; + } + + WARN_ON(dma_addr != page_to_phys(p)); + for (i = 0; i < (1u << pool->order); i++) + kbase_set_dma_addr(p+i, dma_addr + PAGE_SIZE * i); + + return p; +} + +static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, + struct page *p) +{ + struct kbase_device *const kbdev = pool->kbdev; + struct device *const dev = kbdev->dev; + dma_addr_t dma_addr = kbase_dma_addr(p); + int i; + + dma_unmap_page(dev, dma_addr, (PAGE_SIZE << pool->order), + DMA_BIDIRECTIONAL); + for (i = 0; i < (1u << pool->order); i++) + kbase_clear_dma_addr(p+i); + + kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, + pool->group_id, p, pool->order); + + pool_dbg(pool, "freed page to kernel\n"); +} + +static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool, + size_t nr_to_shrink) +{ + struct page *p; + size_t i; + + lockdep_assert_held(&pool->pool_lock); + + for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) { + p = kbase_mem_pool_remove_locked(pool); + kbase_mem_pool_free_page(pool, p); + } + + return i; +} + +static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, + size_t nr_to_shrink) +{ + size_t nr_freed; + + kbase_mem_pool_lock(pool); + nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink); + kbase_mem_pool_unlock(pool); + + return nr_freed; +} + +int kbase_mem_pool_grow(struct kbase_mem_pool *pool, + size_t nr_to_grow) +{ + struct page *p; + size_t i; + + kbase_mem_pool_lock(pool); + + pool->dont_reclaim = true; + for (i = 0; i < nr_to_grow; i++) { + if (pool->dying) { + pool->dont_reclaim = false; + kbase_mem_pool_shrink_locked(pool, nr_to_grow); + kbase_mem_pool_unlock(pool); + + return -ENOMEM; + } + kbase_mem_pool_unlock(pool); + + p = kbase_mem_alloc_page(pool); + if (!p) { + kbase_mem_pool_lock(pool); + pool->dont_reclaim = false; + kbase_mem_pool_unlock(pool); + + return -ENOMEM; + } + + kbase_mem_pool_lock(pool); + kbase_mem_pool_add_locked(pool, p); + } + pool->dont_reclaim = false; + kbase_mem_pool_unlock(pool); + + return 0; +} + +void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) +{ + size_t cur_size; + int err = 0; + + cur_size = kbase_mem_pool_size(pool); + + if (new_size > pool->max_size) + new_size = pool->max_size; + + if (new_size < cur_size) + kbase_mem_pool_shrink(pool, cur_size - new_size); + else if (new_size > cur_size) + err = kbase_mem_pool_grow(pool, new_size - cur_size); + + if (err) { + size_t grown_size = kbase_mem_pool_size(pool); + + dev_warn(pool->kbdev->dev, + "Mem pool not grown to the required size of %zu bytes, grown for additional %zu bytes instead!\n", + (new_size - cur_size), (grown_size - cur_size)); + } +} + +void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size) +{ + size_t cur_size; + size_t nr_to_shrink; + + kbase_mem_pool_lock(pool); + + pool->max_size = max_size; + + cur_size = kbase_mem_pool_size(pool); + if (max_size < cur_size) { + nr_to_shrink = cur_size - max_size; + kbase_mem_pool_shrink_locked(pool, nr_to_shrink); + } + + kbase_mem_pool_unlock(pool); +} + + +static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_mem_pool *pool; + size_t pool_size; + + pool = container_of(s, struct kbase_mem_pool, reclaim); + + kbase_mem_pool_lock(pool); + if (pool->dont_reclaim && !pool->dying) { + kbase_mem_pool_unlock(pool); + return 0; + } + pool_size = kbase_mem_pool_size(pool); + kbase_mem_pool_unlock(pool); + + return pool_size; +} + +static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_mem_pool *pool; + unsigned long freed; + + pool = container_of(s, struct kbase_mem_pool, reclaim); + + kbase_mem_pool_lock(pool); + if (pool->dont_reclaim && !pool->dying) { + kbase_mem_pool_unlock(pool); + return 0; + } + + pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan); + + freed = kbase_mem_pool_shrink_locked(pool, sc->nr_to_scan); + + kbase_mem_pool_unlock(pool); + + pool_dbg(pool, "reclaim freed %ld pages\n", freed); + + return freed; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +static int kbase_mem_pool_reclaim_shrink(struct shrinker *s, + struct shrink_control *sc) +{ + if (sc->nr_to_scan == 0) + return kbase_mem_pool_reclaim_count_objects(s, sc); + + return kbase_mem_pool_reclaim_scan_objects(s, sc); +} +#endif + +int kbase_mem_pool_init(struct kbase_mem_pool *pool, + const struct kbase_mem_pool_config *config, + unsigned int order, + int group_id, + struct kbase_device *kbdev, + struct kbase_mem_pool *next_pool) +{ + if (WARN_ON(group_id < 0) || + WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { + return -EINVAL; + } + + pool->cur_size = 0; + pool->max_size = kbase_mem_pool_config_get_max_size(config); + pool->order = order; + pool->group_id = group_id; + pool->kbdev = kbdev; + pool->next_pool = next_pool; + pool->dying = false; + + spin_lock_init(&pool->pool_lock); + INIT_LIST_HEAD(&pool->page_list); + + /* Register shrinker */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) + pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink; +#else + pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects; + pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects; +#endif + pool->reclaim.seeks = DEFAULT_SEEKS; + /* Kernel versions prior to 3.1 : + * struct shrinker does not define batch */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) + pool->reclaim.batch = 0; +#endif + register_shrinker(&pool->reclaim); + + pool_dbg(pool, "initialized\n"); + + return 0; +} + +void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool) +{ + kbase_mem_pool_lock(pool); + pool->dying = true; + kbase_mem_pool_unlock(pool); +} + +void kbase_mem_pool_term(struct kbase_mem_pool *pool) +{ + struct kbase_mem_pool *next_pool = pool->next_pool; + struct page *p, *tmp; + size_t nr_to_spill = 0; + LIST_HEAD(spill_list); + LIST_HEAD(free_list); + int i; + + pool_dbg(pool, "terminate()\n"); + + unregister_shrinker(&pool->reclaim); + + kbase_mem_pool_lock(pool); + pool->max_size = 0; + + if (next_pool && !kbase_mem_pool_is_full(next_pool)) { + /* Spill to next pool (may overspill) */ + nr_to_spill = kbase_mem_pool_capacity(next_pool); + nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill); + + /* Zero pages first without holding the next_pool lock */ + for (i = 0; i < nr_to_spill; i++) { + p = kbase_mem_pool_remove_locked(pool); + list_add(&p->lru, &spill_list); + } + } + + while (!kbase_mem_pool_is_empty(pool)) { + /* Free remaining pages to kernel */ + p = kbase_mem_pool_remove_locked(pool); + list_add(&p->lru, &free_list); + } + + kbase_mem_pool_unlock(pool); + + if (next_pool && nr_to_spill) { + list_for_each_entry(p, &spill_list, lru) + kbase_mem_pool_zero_page(pool, p); + + /* Add new page list to next_pool */ + kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill); + + pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill); + } + + list_for_each_entry_safe(p, tmp, &free_list, lru) { + list_del_init(&p->lru); + kbase_mem_pool_free_page(pool, p); + } + + pool_dbg(pool, "terminated\n"); +} + +struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool) +{ + struct page *p; + + do { + pool_dbg(pool, "alloc()\n"); + p = kbase_mem_pool_remove(pool); + + if (p) + return p; + + pool = pool->next_pool; + } while (pool); + + return NULL; +} + +struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool) +{ + struct page *p; + + lockdep_assert_held(&pool->pool_lock); + + pool_dbg(pool, "alloc_locked()\n"); + p = kbase_mem_pool_remove_locked(pool); + + if (p) + return p; + + return NULL; +} + +void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, + bool dirty) +{ + struct kbase_mem_pool *next_pool = pool->next_pool; + + pool_dbg(pool, "free()\n"); + + if (!kbase_mem_pool_is_full(pool)) { + /* Add to our own pool */ + if (dirty) + kbase_mem_pool_sync_page(pool, p); + + kbase_mem_pool_add(pool, p); + } else if (next_pool && !kbase_mem_pool_is_full(next_pool)) { + /* Spill to next pool */ + kbase_mem_pool_spill(next_pool, p); + } else { + /* Free page */ + kbase_mem_pool_free_page(pool, p); + } +} + +void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, + bool dirty) +{ + pool_dbg(pool, "free_locked()\n"); + + lockdep_assert_held(&pool->pool_lock); + + if (!kbase_mem_pool_is_full(pool)) { + /* Add to our own pool */ + if (dirty) + kbase_mem_pool_sync_page(pool, p); + + kbase_mem_pool_add_locked(pool, p); + } else { + /* Free page */ + kbase_mem_pool_free_page(pool, p); + } +} + +int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, + struct tagged_addr *pages, bool partial_allowed) +{ + struct page *p; + size_t nr_from_pool; + size_t i = 0; + int err = -ENOMEM; + size_t nr_pages_internal; + + nr_pages_internal = nr_4k_pages / (1u << (pool->order)); + + if (nr_pages_internal * (1u << pool->order) != nr_4k_pages) + return -EINVAL; + + pool_dbg(pool, "alloc_pages(4k=%zu):\n", nr_4k_pages); + pool_dbg(pool, "alloc_pages(internal=%zu):\n", nr_pages_internal); + + /* Get pages from this pool */ + kbase_mem_pool_lock(pool); + nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool)); + while (nr_from_pool--) { + int j; + p = kbase_mem_pool_remove_locked(pool); + if (pool->order) { + pages[i++] = as_tagged_tag(page_to_phys(p), + HUGE_HEAD | HUGE_PAGE); + for (j = 1; j < (1u << pool->order); j++) + pages[i++] = as_tagged_tag(page_to_phys(p) + + PAGE_SIZE * j, + HUGE_PAGE); + } else { + pages[i++] = as_tagged(page_to_phys(p)); + } + } + kbase_mem_pool_unlock(pool); + + if (i != nr_4k_pages && pool->next_pool) { + /* Allocate via next pool */ + err = kbase_mem_pool_alloc_pages(pool->next_pool, + nr_4k_pages - i, pages + i, partial_allowed); + + if (err < 0) + goto err_rollback; + + i += err; + } else { + /* Get any remaining pages from kernel */ + while (i != nr_4k_pages) { + p = kbase_mem_alloc_page(pool); + if (!p) { + if (partial_allowed) + goto done; + else + goto err_rollback; + } + + if (pool->order) { + int j; + + pages[i++] = as_tagged_tag(page_to_phys(p), + HUGE_PAGE | + HUGE_HEAD); + for (j = 1; j < (1u << pool->order); j++) { + phys_addr_t phys; + + phys = page_to_phys(p) + PAGE_SIZE * j; + pages[i++] = as_tagged_tag(phys, + HUGE_PAGE); + } + } else { + pages[i++] = as_tagged(page_to_phys(p)); + } + } + } + +done: + pool_dbg(pool, "alloc_pages(%zu) done\n", i); + return i; + +err_rollback: + kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED); + return err; +} + +int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, + size_t nr_4k_pages, struct tagged_addr *pages) +{ + struct page *p; + size_t i; + size_t nr_pages_internal; + + lockdep_assert_held(&pool->pool_lock); + + nr_pages_internal = nr_4k_pages / (1u << (pool->order)); + + if (nr_pages_internal * (1u << pool->order) != nr_4k_pages) + return -EINVAL; + + pool_dbg(pool, "alloc_pages_locked(4k=%zu):\n", nr_4k_pages); + pool_dbg(pool, "alloc_pages_locked(internal=%zu):\n", + nr_pages_internal); + + if (kbase_mem_pool_size(pool) < nr_pages_internal) { + pool_dbg(pool, "Failed alloc\n"); + return -ENOMEM; + } + + for (i = 0; i < nr_pages_internal; i++) { + int j; + + p = kbase_mem_pool_remove_locked(pool); + if (pool->order) { + *pages++ = as_tagged_tag(page_to_phys(p), + HUGE_HEAD | HUGE_PAGE); + for (j = 1; j < (1u << pool->order); j++) { + *pages++ = as_tagged_tag(page_to_phys(p) + + PAGE_SIZE * j, + HUGE_PAGE); + } + } else { + *pages++ = as_tagged(page_to_phys(p)); + } + } + + return nr_4k_pages; +} + +static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, + size_t nr_pages, struct tagged_addr *pages, + bool zero, bool sync) +{ + struct page *p; + size_t nr_to_pool = 0; + LIST_HEAD(new_page_list); + size_t i; + + if (!nr_pages) + return; + + pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n", + nr_pages, zero, sync); + + /* Zero/sync pages first without holding the pool lock */ + for (i = 0; i < nr_pages; i++) { + if (unlikely(!as_phys_addr_t(pages[i]))) + continue; + + if (is_huge_head(pages[i]) || !is_huge(pages[i])) { + p = as_page(pages[i]); + if (zero) + kbase_mem_pool_zero_page(pool, p); + else if (sync) + kbase_mem_pool_sync_page(pool, p); + + list_add(&p->lru, &new_page_list); + nr_to_pool++; + } + pages[i] = as_tagged(0); + } + + /* Add new page list to pool */ + kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool); + + pool_dbg(pool, "add_array(%zu) added %zu pages\n", + nr_pages, nr_to_pool); +} + +static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool, + size_t nr_pages, struct tagged_addr *pages, + bool zero, bool sync) +{ + struct page *p; + size_t nr_to_pool = 0; + LIST_HEAD(new_page_list); + size_t i; + + lockdep_assert_held(&pool->pool_lock); + + if (!nr_pages) + return; + + pool_dbg(pool, "add_array_locked(%zu, zero=%d, sync=%d):\n", + nr_pages, zero, sync); + + /* Zero/sync pages first */ + for (i = 0; i < nr_pages; i++) { + if (unlikely(!as_phys_addr_t(pages[i]))) + continue; + + if (is_huge_head(pages[i]) || !is_huge(pages[i])) { + p = as_page(pages[i]); + if (zero) + kbase_mem_pool_zero_page(pool, p); + else if (sync) + kbase_mem_pool_sync_page(pool, p); + + list_add(&p->lru, &new_page_list); + nr_to_pool++; + } + pages[i] = as_tagged(0); + } + + /* Add new page list to pool */ + kbase_mem_pool_add_list_locked(pool, &new_page_list, nr_to_pool); + + pool_dbg(pool, "add_array_locked(%zu) added %zu pages\n", + nr_pages, nr_to_pool); +} + +void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, + struct tagged_addr *pages, bool dirty, bool reclaimed) +{ + struct kbase_mem_pool *next_pool = pool->next_pool; + struct page *p; + size_t nr_to_pool; + LIST_HEAD(to_pool_list); + size_t i = 0; + + pool_dbg(pool, "free_pages(%zu):\n", nr_pages); + + if (!reclaimed) { + /* Add to this pool */ + nr_to_pool = kbase_mem_pool_capacity(pool); + nr_to_pool = min(nr_pages, nr_to_pool); + + kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); + + i += nr_to_pool; + + if (i != nr_pages && next_pool) { + /* Spill to next pool (may overspill) */ + nr_to_pool = kbase_mem_pool_capacity(next_pool); + nr_to_pool = min(nr_pages - i, nr_to_pool); + + kbase_mem_pool_add_array(next_pool, nr_to_pool, + pages + i, true, dirty); + i += nr_to_pool; + } + } + + /* Free any remaining pages to kernel */ + for (; i < nr_pages; i++) { + if (unlikely(!as_phys_addr_t(pages[i]))) + continue; + + if (is_huge(pages[i]) && !is_huge_head(pages[i])) { + pages[i] = as_tagged(0); + continue; + } + + p = as_page(pages[i]); + + kbase_mem_pool_free_page(pool, p); + pages[i] = as_tagged(0); + } + + pool_dbg(pool, "free_pages(%zu) done\n", nr_pages); +} + + +void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, + size_t nr_pages, struct tagged_addr *pages, bool dirty, + bool reclaimed) +{ + struct page *p; + size_t nr_to_pool; + LIST_HEAD(to_pool_list); + size_t i = 0; + + lockdep_assert_held(&pool->pool_lock); + + pool_dbg(pool, "free_pages_locked(%zu):\n", nr_pages); + + if (!reclaimed) { + /* Add to this pool */ + nr_to_pool = kbase_mem_pool_capacity(pool); + nr_to_pool = min(nr_pages, nr_to_pool); + + kbase_mem_pool_add_array_locked(pool, nr_pages, pages, false, + dirty); + + i += nr_to_pool; + } + + /* Free any remaining pages to kernel */ + for (; i < nr_pages; i++) { + if (unlikely(!as_phys_addr_t(pages[i]))) + continue; + + if (is_huge(pages[i]) && !is_huge_head(pages[i])) { + pages[i] = as_tagged(0); + continue; + } + + p = as_page(pages[i]); + + kbase_mem_pool_free_page(pool, p); + pages[i] = as_tagged(0); + } + + pool_dbg(pool, "free_pages_locked(%zu) done\n", nr_pages); +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_debugfs.c b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_debugfs.c new file mode 100644 index 000000000000..5879fdf85b1d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_debugfs.c @@ -0,0 +1,191 @@ +/* + * + * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include + +#include "mali_kbase_mem_pool_debugfs.h" +#include "mali_kbase_debugfs_helper.h" + +void kbase_mem_pool_debugfs_trim(void *const array, size_t const index, + size_t const value) +{ + struct kbase_mem_pool *const mem_pools = array; + + if (WARN_ON(!mem_pools) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return; + + kbase_mem_pool_trim(&mem_pools[index], value); +} + +void kbase_mem_pool_debugfs_set_max_size(void *const array, + size_t const index, size_t const value) +{ + struct kbase_mem_pool *const mem_pools = array; + + if (WARN_ON(!mem_pools) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return; + + kbase_mem_pool_set_max_size(&mem_pools[index], value); +} + +size_t kbase_mem_pool_debugfs_size(void *const array, size_t const index) +{ + struct kbase_mem_pool *const mem_pools = array; + + if (WARN_ON(!mem_pools) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return 0; + + return kbase_mem_pool_size(&mem_pools[index]); +} + +size_t kbase_mem_pool_debugfs_max_size(void *const array, size_t const index) +{ + struct kbase_mem_pool *const mem_pools = array; + + if (WARN_ON(!mem_pools) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return 0; + + return kbase_mem_pool_max_size(&mem_pools[index]); +} + +void kbase_mem_pool_config_debugfs_set_max_size(void *const array, + size_t const index, size_t const value) +{ + struct kbase_mem_pool_config *const configs = array; + + if (WARN_ON(!configs) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return; + + kbase_mem_pool_config_set_max_size(&configs[index], value); +} + +size_t kbase_mem_pool_config_debugfs_max_size(void *const array, + size_t const index) +{ + struct kbase_mem_pool_config *const configs = array; + + if (WARN_ON(!configs) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return 0; + + return kbase_mem_pool_config_get_max_size(&configs[index]); +} + +static int kbase_mem_pool_debugfs_size_show(struct seq_file *sfile, void *data) +{ + CSTD_UNUSED(data); + return kbase_debugfs_helper_seq_read(sfile, + MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_size); +} + +static ssize_t kbase_mem_pool_debugfs_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) +{ + int err; + + CSTD_UNUSED(ppos); + err = kbase_debugfs_helper_seq_write(file, ubuf, count, + MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_trim); + return err ? err : count; +} + +static int kbase_mem_pool_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbase_mem_pool_debugfs_size_show, + in->i_private); +} + +static const struct file_operations kbase_mem_pool_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbase_mem_pool_debugfs_open, + .read = seq_read, + .write = kbase_mem_pool_debugfs_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int kbase_mem_pool_debugfs_max_size_show(struct seq_file *sfile, + void *data) +{ + CSTD_UNUSED(data); + return kbase_debugfs_helper_seq_read(sfile, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_max_size); +} + +static ssize_t kbase_mem_pool_debugfs_max_size_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) +{ + int err; + + CSTD_UNUSED(ppos); + err = kbase_debugfs_helper_seq_write(file, ubuf, count, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_set_max_size); + return err ? err : count; +} + +static int kbase_mem_pool_debugfs_max_size_open(struct inode *in, + struct file *file) +{ + return single_open(file, kbase_mem_pool_debugfs_max_size_show, + in->i_private); +} + +static const struct file_operations kbase_mem_pool_debugfs_max_size_fops = { + .owner = THIS_MODULE, + .open = kbase_mem_pool_debugfs_max_size_open, + .read = seq_read, + .write = kbase_mem_pool_debugfs_max_size_write, + .llseek = seq_lseek, + .release = single_release, +}; + +void kbase_mem_pool_debugfs_init(struct dentry *parent, + struct kbase_context *kctx) +{ + /* prevent unprivileged use of debug file in old kernel version */ +#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) + /* only for newer kernel version debug file system is safe */ + const mode_t mode = 0644; +#else + const mode_t mode = 0600; +#endif + + debugfs_create_file("mem_pool_size", mode, parent, + &kctx->mem_pools.small, &kbase_mem_pool_debugfs_fops); + + debugfs_create_file("mem_pool_max_size", mode, parent, + &kctx->mem_pools.small, &kbase_mem_pool_debugfs_max_size_fops); + + debugfs_create_file("lp_mem_pool_size", mode, parent, + &kctx->mem_pools.large, &kbase_mem_pool_debugfs_fops); + + debugfs_create_file("lp_mem_pool_max_size", mode, parent, + &kctx->mem_pools.large, &kbase_mem_pool_debugfs_max_size_fops); +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_debugfs.h b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_debugfs.h new file mode 100644 index 000000000000..2932945b3185 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_debugfs.h @@ -0,0 +1,123 @@ +/* + * + * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_MEM_POOL_DEBUGFS_H_ +#define _KBASE_MEM_POOL_DEBUGFS_H_ + +#include + +/** + * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool + * @parent: Parent debugfs dentry + * @kctx: The kbase context + * + * Adds four debugfs files under @parent: + * - mem_pool_size: get/set the current sizes of @kctx: mem_pools + * - mem_pool_max_size: get/set the max sizes of @kctx: mem_pools + * - lp_mem_pool_size: get/set the current sizes of @kctx: lp_mem_pool + * - lp_mem_pool_max_size: get/set the max sizes of @kctx:lp_mem_pool + */ +void kbase_mem_pool_debugfs_init(struct dentry *parent, + struct kbase_context *kctx); + +/** + * kbase_mem_pool_debugfs_trim - Grow or shrink a memory pool to a new size + * + * @array: Address of the first in an array of physical memory pools. + * @index: A memory group ID to be used as an index into the array of memory + * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @value: New number of pages in the pool. + * + * If @value > current size, fill the pool with new pages from the kernel, but + * not above the max_size for the pool. + * If @value < current size, shrink the pool by freeing pages to the kernel. + */ +void kbase_mem_pool_debugfs_trim(void *array, size_t index, size_t value); + +/** + * kbase_mem_pool_debugfs_set_max_size - Set maximum number of free pages in + * memory pool + * + * @array: Address of the first in an array of physical memory pools. + * @index: A memory group ID to be used as an index into the array of memory + * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @value: Maximum number of free pages the pool can hold. + * + * If the maximum size is reduced, the pool will be shrunk to adhere to the + * new limit. For details see kbase_mem_pool_shrink(). + */ +void kbase_mem_pool_debugfs_set_max_size(void *array, size_t index, + size_t value); + +/** + * kbase_mem_pool_debugfs_size - Get number of free pages in a memory pool + * + * @array: Address of the first in an array of physical memory pools. + * @index: A memory group ID to be used as an index into the array of memory + * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * Note: the size of the pool may in certain corner cases exceed @max_size! + * + * Return: Number of free pages in the pool + */ +size_t kbase_mem_pool_debugfs_size(void *array, size_t index); + +/** + * kbase_mem_pool_debugfs_max_size - Get maximum number of free pages in a + * memory pool + * + * @array: Address of the first in an array of physical memory pools. + * @index: A memory group ID to be used as an index into the array of memory + * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * Return: Maximum number of free pages in the pool + */ +size_t kbase_mem_pool_debugfs_max_size(void *array, size_t index); + +/** + * kbase_mem_pool_config_debugfs_set_max_size - Set maximum number of free pages + * in initial configuration of pool + * + * @array: Array of initial configurations for a set of physical memory pools. + * @index: A memory group ID to be used as an index into the array. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @value : Maximum number of free pages that a memory pool created from the + * selected configuration can hold. + */ +void kbase_mem_pool_config_debugfs_set_max_size(void *array, size_t index, + size_t value); + +/** + * kbase_mem_pool_config_debugfs_max_size - Get maximum number of free pages + * from initial configuration of pool + * + * @array: Array of initial configurations for a set of physical memory pools. + * @index: A memory group ID to be used as an index into the array. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * Return: Maximum number of free pages that a memory pool created from the + * selected configuration can hold. + */ +size_t kbase_mem_pool_config_debugfs_max_size(void *array, size_t index); + +#endif /*_KBASE_MEM_POOL_DEBUGFS_H_ */ + diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_group.c b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_group.c new file mode 100644 index 000000000000..aa2554805b5b --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_group.c @@ -0,0 +1,115 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include + +#include + +void kbase_mem_pool_group_config_set_max_size( + struct kbase_mem_pool_group_config *const configs, + size_t const max_size) +{ + size_t const large_max_size = max_size >> + (KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER - + KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER); + int gid; + + for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { + kbase_mem_pool_config_set_max_size(&configs->small[gid], + max_size); + + kbase_mem_pool_config_set_max_size(&configs->large[gid], + large_max_size); + } +} + +int kbase_mem_pool_group_init( + struct kbase_mem_pool_group *const mem_pools, + struct kbase_device *const kbdev, + const struct kbase_mem_pool_group_config *const configs, + struct kbase_mem_pool_group *next_pools) +{ + int gid, err = 0; + + for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { + err = kbase_mem_pool_init(&mem_pools->small[gid], + &configs->small[gid], + KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER, + gid, + kbdev, + next_pools ? &next_pools->small[gid] : NULL); + + if (!err) { + err = kbase_mem_pool_init(&mem_pools->large[gid], + &configs->large[gid], + KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER, + gid, + kbdev, + next_pools ? &next_pools->large[gid] : NULL); + if (err) + kbase_mem_pool_term(&mem_pools->small[gid]); + } + + /* Break out of the loop early to avoid incrementing the count + * of memory pool pairs successfully initialized. + */ + if (err) + break; + } + + if (err) { + /* gid gives the number of memory pool pairs successfully + * initialized, which is one greater than the array index of the + * last group. + */ + while (gid-- > 0) { + kbase_mem_pool_term(&mem_pools->small[gid]); + kbase_mem_pool_term(&mem_pools->large[gid]); + } + } + + return err; +} + +void kbase_mem_pool_group_mark_dying( + struct kbase_mem_pool_group *const mem_pools) +{ + int gid; + + for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { + kbase_mem_pool_mark_dying(&mem_pools->small[gid]); + kbase_mem_pool_mark_dying(&mem_pools->large[gid]); + } +} + +void kbase_mem_pool_group_term( + struct kbase_mem_pool_group *const mem_pools) +{ + int gid; + + for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { + kbase_mem_pool_term(&mem_pools->small[gid]); + kbase_mem_pool_term(&mem_pools->large[gid]); + } +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_group.h b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_group.h new file mode 100644 index 000000000000..0484f5940ad1 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool_group.h @@ -0,0 +1,92 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_MEM_POOL_GROUP_H_ +#define _KBASE_MEM_POOL_GROUP_H_ + +#include + +/** + * kbase_mem_pool_group_config_init - Set the initial configuration for a + * set of memory pools + * + * This function sets the initial configuration for every memory pool so that + * the maximum amount of free memory that each pool can hold is identical. + * The equivalent number of 2 MiB pages is calculated automatically for the + * purpose of configuring the large page pools. + * + * @configs: Initial configuration for the set of memory pools + * @max_size: Maximum number of free 4 KiB pages each pool can hold + */ +void kbase_mem_pool_group_config_set_max_size( + struct kbase_mem_pool_group_config *configs, size_t max_size); + +/** + * kbase_mem_pool_group_init - Initialize a set of memory pools + * + * Initializes a complete set of physical memory pools. Memory pools are used to + * allow efficient reallocation of previously-freed physical pages. A pair of + * memory pools is initialized for each physical memory group: one for 4 KiB + * pages and one for 2 MiB pages. + * + * If @next_pools is not NULL then a request to allocate memory from an + * empty pool in @mem_pools will attempt to allocate from the equivalent pool + * in @next_pools before going to the memory group manager. Similarly + * pages can spill over to the equivalent pool in @next_pools when a pool + * is full in @mem_pools. Pages are zeroed before they spill over to another + * pool, to prevent leaking information between applications. + * + * @mem_pools: Set of memory pools to initialize + * @kbdev: Kbase device where memory is used + * @configs: Initial configuration for the set of memory pools + * @next_pools: Set of memory pools from which to allocate memory if there + * is no free memory in one of the @mem_pools + * + * Return: 0 on success, otherwise a negative error code + */ +int kbase_mem_pool_group_init(struct kbase_mem_pool_group *mem_pools, + struct kbase_device *kbdev, + const struct kbase_mem_pool_group_config *configs, + struct kbase_mem_pool_group *next_pools); + +/** + * kbase_mem_pool_group_term - Mark a set of memory pools as dying + * + * Marks a complete set of physical memory pools previously initialized by + * @kbase_mem_pool_group_init as dying. This will cause any ongoing allocation + * operations (eg growing on page fault) to be terminated. + * + * @mem_pools: Set of memory pools to mark + */ +void kbase_mem_pool_group_mark_dying(struct kbase_mem_pool_group *mem_pools); + +/** + * kbase_mem_pool_group_term - Terminate a set of memory pools + * + * Terminates a complete set of physical memory pools previously initialized by + * @kbase_mem_pool_group_init. + * + * @mem_pools: Set of memory pools to terminate + */ +void kbase_mem_pool_group_term(struct kbase_mem_pool_group *mem_pools); + +#endif /* _KBASE_MEM_POOL_GROUP_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_profile_debugfs.c new file mode 100644 index 000000000000..4770bae441cf --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_profile_debugfs.c @@ -0,0 +1,157 @@ +/* + * + * (C) COPYRIGHT 2012-2017, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include + +#ifdef CONFIG_DEBUG_FS + +/** Show callback for the @c mem_profile debugfs file. + * + * This function is called to get the contents of the @c mem_profile debugfs + * file. This is a report of current memory usage and distribution in userspace. + * + * @param sfile The debugfs entry + * @param data Data associated with the entry + * + * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise + */ +static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data) +{ + struct kbase_context *kctx = sfile->private; + + /* MALI_SEC_INTEGRATION : DESTROYED CONTEXT */ + struct kbase_device *kbdev = gpu_get_device_structure(); + mutex_lock(&kbdev->kctx_list_lock); + if (kctx == NULL) { + mutex_unlock(&kbdev->kctx_list_lock); + return 0; + } else { + if (kbdev->vendor_callbacks->mem_profile_check_kctx) { + if (!kbdev->vendor_callbacks->mem_profile_check_kctx(kctx)) { + mutex_unlock(&kbdev->kctx_list_lock); + return 0; + } + } + + if (kctx->destroying_context == true) { + mutex_unlock(&kbdev->kctx_list_lock); + return 0; + } + atomic_inc(&kctx->mem_profile_showing_state); + } + mutex_unlock(&kbdev->kctx_list_lock); + + mutex_lock(&kctx->mem_profile_lock); + + /* MALI_SEC_INTEGRATION */ + if (kctx->mem_profile_data) { + seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size); + + seq_putc(sfile, '\n'); + } + + mutex_unlock(&kctx->mem_profile_lock); + + /* MALI_SEC_INTEGRATION */ + atomic_dec(&kctx->mem_profile_showing_state); + + return 0; +} + +/* + * File operations related to debugfs entry for mem_profile + */ +static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_mem_profile_seq_show, in->i_private); +} + +static const struct file_operations kbasep_mem_profile_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbasep_mem_profile_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size) +{ + int err = 0; + + mutex_lock(&kctx->mem_profile_lock); + + dev_dbg(kctx->kbdev->dev, "initialised: %d", + kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + + if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { + if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { + err = -ENOMEM; + } else if (!debugfs_create_file("mem_profile", 0444, + kctx->kctx_dentry, kctx, + &kbasep_mem_profile_debugfs_fops)) { + err = -EAGAIN; + } else { + kbase_ctx_flag_set(kctx, + KCTX_MEM_PROFILE_INITIALIZED); + } + } + + if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { + kfree(kctx->mem_profile_data); + kctx->mem_profile_data = data; + kctx->mem_profile_size = size; + } else { + kfree(data); + } + + dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d", + err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + + mutex_unlock(&kctx->mem_profile_lock); + + return err; +} + +void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx) +{ + mutex_lock(&kctx->mem_profile_lock); + + dev_dbg(kctx->kbdev->dev, "initialised: %d", + kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + + kfree(kctx->mem_profile_data); + kctx->mem_profile_data = NULL; + kctx->mem_profile_size = 0; + + mutex_unlock(&kctx->mem_profile_lock); +} + +#else /* CONFIG_DEBUG_FS */ + +int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size) +{ + kfree(data); + return 0; +} +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_profile_debugfs.h new file mode 100644 index 000000000000..1462247c3bca --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_profile_debugfs.h @@ -0,0 +1,64 @@ +/* + * + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_mem_profile_debugfs.h + * Header file for mem profiles entries in debugfs + * + */ + +#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H +#define _KBASE_MEM_PROFILE_DEBUGFS_H + +#include +#include + +/** + * @brief Remove entry from Mali memory profile debugfs + */ +void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx); + +/** + * @brief Insert @p data to the debugfs file so it can be read by userspace + * + * The function takes ownership of @p data and frees it later when new data + * is inserted. + * + * If the debugfs entry corresponding to the @p kctx doesn't exist, + * an attempt will be made to create it. + * + * @param kctx The context whose debugfs file @p data should be inserted to + * @param data A NULL-terminated string to be inserted to the debugfs file, + * without the trailing new line character + * @param size The length of the @p data string + * @return 0 if @p data inserted correctly + * -EAGAIN in case of error + * @post @ref mem_profile_initialized will be set to @c true + * the first time this function succeeds. + */ +int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size); + +#endif /*_KBASE_MEM_PROFILE_DEBUGFS_H*/ + diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_profile_debugfs_buf_size.h new file mode 100644 index 000000000000..d55cc854c415 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_profile_debugfs_buf_size.h @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2014, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_mem_profile_debugfs_buf_size.h + * Header file for the size of the buffer to accumulate the histogram report text in + */ + +#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ +#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ + +/** + * The size of the buffer to accumulate the histogram report text in + * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT + */ +#define KBASE_MEM_PROFILE_MAX_BUF_SIZE \ + ((size_t) (64 + ((80 + (56 * 64)) * 53) + 56)) + +#endif /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/ + diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mipe_gen_header.h b/drivers/gpu/arm/b_r26p0/mali_kbase_mipe_gen_header.h new file mode 100644 index 000000000000..ec5212275751 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mipe_gen_header.h @@ -0,0 +1,217 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. + * DO NOT EDIT. + */ + +/* clang-format off */ + +#include "mali_kbase_mipe_proto.h" + +/** + * This header generates MIPE tracepoint declaration BLOB at + * compile time. + * + * It is intentional that there is no header guard. + * The header could be included multiple times for + * different blobs compilation. + * + * Before including this header MIPE_HEADER_* parameters must be + * defined. See documentation below: + */ + +/** + * The name of the variable where the result BLOB will be stored. + */ +#if !defined(MIPE_HEADER_BLOB_VAR_NAME) +#error "MIPE_HEADER_BLOB_VAR_NAME must be defined!" +#endif + +/** + * A compiler attribute for the BLOB variable. + * + * e.g. __attribute__((section("my_section"))) + * + * Default value is no attribute. + */ +#if !defined(MIPE_HEADER_BLOB_VAR_ATTRIBUTE) +#define MIPE_HEADER_BLOB_VAR_ATTRIBUTE +#endif + +/** + * MIPE stream id. + * + * See enum tl_stream_id. + */ +#if !defined(MIPE_HEADER_STREAM_ID) +#error "MIPE_HEADER_STREAM_ID must be defined!" +#endif + +/** + * MIPE packet class. + * + * See enum tl_packet_class. + */ +#if !defined(MIPE_HEADER_PKT_CLASS) +#error "MIPE_HEADER_PKT_CLASS must be defined!" +#endif + +/** + * The list of tracepoints to process. + * + * It should be defined as follows: + * #define MIPE_HEADER_TRACEPOINT_LIST \ + * TRACEPOINT_DESC(FIRST_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ + * TRACEPOINT_DESC(SECOND_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ + * etc. + * + * Where the first argument is tracepoints name, the second + * argument is a short tracepoint description, the third argument + * argument types (see MIPE documentation), and the fourth argument + * is comma separated argument names. + */ +#if !defined(MIPE_HEADER_TRACEPOINT_LIST) +#error "MIPE_HEADER_TRACEPOINT_LIST must be defined!" +#endif + +/** + * The number of entries in MIPE_HEADER_TRACEPOINT_LIST. + */ +#if !defined(MIPE_HEADER_TRACEPOINT_LIST_SIZE) +#error "MIPE_HEADER_TRACEPOINT_LIST_SIZE must be defined!" +#endif + +/** + * The list of enums to process. + * + * It should be defined as follows: + * #define MIPE_HEADER_ENUM_LIST \ + * ENUM_DESC(enum_arg_name, enum_value) \ + * ENUM_DESC(enum_arg_name, enum_value) \ + * etc. + * + * Where enum_arg_name is the name of a tracepoint argument being used with + * this enum. enum_value is a valid C enum value. + * + * Default value is an empty list. + */ +#if defined(MIPE_HEADER_ENUM_LIST) + +/** + * Tracepoint message ID used for enums declaration. + */ +#if !defined(MIPE_HEADER_ENUM_MSG_ID) +#error "MIPE_HEADER_ENUM_MSG_ID must be defined!" +#endif + +#else +#define MIPE_HEADER_ENUM_LIST +#endif + +/** + * The MIPE tracepoint declaration BLOB. + */ +const struct +{ + u32 _mipe_w0; + u32 _mipe_w1; + u8 _protocol_version; + u8 _pointer_size; + u32 _tp_count; +#define TRACEPOINT_DESC(name, desc, arg_types, arg_names) \ + struct { \ + u32 _name; \ + u32 _size_string_name; \ + char _string_name[sizeof(#name)]; \ + u32 _size_desc; \ + char _desc[sizeof(desc)]; \ + u32 _size_arg_types; \ + char _arg_types[sizeof(arg_types)]; \ + u32 _size_arg_names; \ + char _arg_names[sizeof(arg_names)]; \ + } __attribute__ ((__packed__)) __ ## name; + +#define ENUM_DESC(arg_name, value) \ + struct { \ + u32 _msg_id; \ + u32 _arg_name_len; \ + char _arg_name[sizeof(#arg_name)]; \ + u32 _value; \ + u32 _value_str_len; \ + char _value_str[sizeof(#value)]; \ + } __attribute__ ((__packed__)) __ ## arg_name ## _ ## value; + + MIPE_HEADER_TRACEPOINT_LIST + MIPE_HEADER_ENUM_LIST +#undef TRACEPOINT_DESC +#undef ENUM_DESC +} __attribute__((packed)) MIPE_HEADER_BLOB_VAR_NAME MIPE_HEADER_BLOB_VAR_ATTRIBUTE = { + ._mipe_w0 = MIPE_PACKET_HEADER_W0( + TL_PACKET_FAMILY_TL, + MIPE_HEADER_PKT_CLASS, + TL_PACKET_TYPE_HEADER, + MIPE_HEADER_STREAM_ID), + ._mipe_w1 = MIPE_PACKET_HEADER_W1( + sizeof(MIPE_HEADER_BLOB_VAR_NAME) - PACKET_HEADER_SIZE, + 0), + ._protocol_version = SWTRACE_VERSION, + ._pointer_size = sizeof(void *), + ._tp_count = MIPE_HEADER_TRACEPOINT_LIST_SIZE, +#define TRACEPOINT_DESC(name, desc, arg_types, arg_names) \ + .__ ## name = { \ + ._name = name, \ + ._size_string_name = sizeof(#name), \ + ._string_name = #name, \ + ._size_desc = sizeof(desc), \ + ._desc = desc, \ + ._size_arg_types = sizeof(arg_types), \ + ._arg_types = arg_types, \ + ._size_arg_names = sizeof(arg_names), \ + ._arg_names = arg_names \ + }, +#define ENUM_DESC(arg_name, value) \ + .__ ## arg_name ## _ ## value = { \ + ._msg_id = MIPE_HEADER_ENUM_MSG_ID, \ + ._arg_name_len = sizeof(#arg_name), \ + ._arg_name = #arg_name, \ + ._value = value, \ + ._value_str_len = sizeof(#value), \ + ._value_str = #value \ + }, + + MIPE_HEADER_TRACEPOINT_LIST + MIPE_HEADER_ENUM_LIST +#undef TRACEPOINT_DESC +#undef ENUM_DESC +}; + +#undef MIPE_HEADER_BLOB_VAR_NAME +#undef MIPE_HEADER_BLOB_VAR_ATTRIBUTE +#undef MIPE_HEADER_STREAM_ID +#undef MIPE_HEADER_PKT_CLASS +#undef MIPE_HEADER_TRACEPOINT_LIST +#undef MIPE_HEADER_TRACEPOINT_LIST_SIZE +#undef MIPE_HEADER_ENUM_LIST +#undef MIPE_HEADER_ENUM_MSG_ID + +/* clang-format on */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mipe_proto.h b/drivers/gpu/arm/b_r26p0/mali_kbase_mipe_proto.h new file mode 100644 index 000000000000..54667cfc6304 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mipe_proto.h @@ -0,0 +1,127 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. + * DO NOT EDIT. + */ + +/* clang-format off */ + +#if !defined(_KBASE_MIPE_PROTO_H) +#define _KBASE_MIPE_PROTO_H + +#define _BITFIELD_MASK_FIELD(pos, len) \ + (((1u << len) - 1) << pos) + +#define _BITFIELD_SET_FIELD(pos, len, value) \ + (_BITFIELD_MASK_FIELD(pos, len) & (((u32) value) << pos)) + +#define BITFIELD_SET(field_name, value) \ + _BITFIELD_SET_FIELD(field_name ## _POS, field_name ## _LEN, value) + +/* The version of swtrace protocol used in timeline stream. */ +#define SWTRACE_VERSION 3 + +/* Packet header - first word. + * These values must be defined according to MIPE documentation. + */ +#define PACKET_STREAMID_POS 0 +#define PACKET_STREAMID_LEN 8 +#define PACKET_RSVD1_POS (PACKET_STREAMID_POS + PACKET_STREAMID_LEN) +#define PACKET_RSVD1_LEN 8 +#define PACKET_TYPE_POS (PACKET_RSVD1_POS + PACKET_RSVD1_LEN) +#define PACKET_TYPE_LEN 3 +#define PACKET_CLASS_POS (PACKET_TYPE_POS + PACKET_TYPE_LEN) +#define PACKET_CLASS_LEN 7 +#define PACKET_FAMILY_POS (PACKET_CLASS_POS + PACKET_CLASS_LEN) +#define PACKET_FAMILY_LEN 6 + +/* Packet header - second word + * These values must be defined according to MIPE documentation. + */ +#define PACKET_LENGTH_POS 0 +#define PACKET_LENGTH_LEN 24 +#define PACKET_SEQBIT_POS (PACKET_LENGTH_POS + PACKET_LENGTH_LEN) +#define PACKET_SEQBIT_LEN 1 +#define PACKET_RSVD2_POS (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN) +#define PACKET_RSVD2_LEN 7 + +/* First word of a MIPE packet */ +#define MIPE_PACKET_HEADER_W0(pkt_family, pkt_class, pkt_type, stream_id) \ + (0 \ + | BITFIELD_SET(PACKET_FAMILY, pkt_family) \ + | BITFIELD_SET(PACKET_CLASS, pkt_class) \ + | BITFIELD_SET(PACKET_TYPE, pkt_type) \ + | BITFIELD_SET(PACKET_STREAMID, stream_id)) + +/* Second word of a MIPE packet */ +#define MIPE_PACKET_HEADER_W1(packet_length, seqbit) \ + (0 \ + | BITFIELD_SET(PACKET_LENGTH, packet_length) \ + | BITFIELD_SET(PACKET_SEQBIT, seqbit)) + +/* The number of bytes reserved for packet header. + * These value must be defined according to MIPE documentation. + */ +#define PACKET_HEADER_SIZE 8 /* bytes */ + +/* The number of bytes reserved for packet sequence number. + * These value must be defined according to MIPE documentation. + */ +#define PACKET_NUMBER_SIZE 4 /* bytes */ + +/* Timeline packet family ids. + * Values are significant! Check MIPE documentation. + */ +enum tl_packet_family { + TL_PACKET_FAMILY_CTRL = 0, /* control packets */ + TL_PACKET_FAMILY_TL = 1, /* timeline packets */ + TL_PACKET_FAMILY_COUNT +}; + +/* Packet classes used in timeline streams. + * Values are significant! Check MIPE documentation. + */ +enum tl_packet_class { + TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */ + TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */ +}; + +/* Packet types used in timeline streams. + * Values are significant! Check MIPE documentation. + */ +enum tl_packet_type { + TL_PACKET_TYPE_HEADER = 0, /* stream's header/directory */ + TL_PACKET_TYPE_BODY = 1, /* stream's body */ + TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */ +}; + +/* Stream ID types (timeline family). */ +enum tl_stream_id { + TL_STREAM_ID_USER = 0, /* User-space driver Timeline stream. */ + TL_STREAM_ID_KERNEL = 1, /* Kernel-space driver Timeline stream. */ + TL_STREAM_ID_CSFFW = 2, /* CSF firmware driver Timeline stream. */ +}; + +#endif /* _KBASE_MIPE_PROTO_H */ + +/* clang-format on */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_native_mgm.c b/drivers/gpu/arm/b_r26p0/mali_kbase_native_mgm.c new file mode 100644 index 000000000000..38ae46e0ddf1 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_native_mgm.c @@ -0,0 +1,153 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include + +#include +#include + +/** + * kbase_native_mgm_alloc - Native physical memory allocation method + * + * @mgm_dev: The memory group manager the request is being made through. + * @group_id: A physical memory group ID, which must be valid but is not used. + * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @gfp_mask: Bitmask of Get Free Page flags affecting allocator behavior. + * @order: Page order for physical page size (order=0 means 4 KiB, + * order=9 means 2 MiB). + * + * Delegates all memory allocation requests to the kernel's alloc_pages + * function. + * + * Return: Pointer to allocated page, or NULL if allocation failed. + */ +static struct page *kbase_native_mgm_alloc( + struct memory_group_manager_device *mgm_dev, int group_id, + gfp_t gfp_mask, unsigned int order) +{ + /* + * Check that the base and the mgm defines, from separate header files, + * for the max number of memory groups are compatible. + */ + BUILD_BUG_ON(BASE_MEM_GROUP_COUNT != MEMORY_GROUP_MANAGER_NR_GROUPS); + /* + * Check that the mask used for storing the memory group ID is big + * enough for the largest possible memory group ID. + */ + BUILD_BUG_ON((BASEP_CONTEXT_MMU_GROUP_ID_MASK + >> BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) + < (BASE_MEM_GROUP_COUNT - 1)); + + CSTD_UNUSED(mgm_dev); + CSTD_UNUSED(group_id); + + return alloc_pages(gfp_mask, order); +} + +/** + * kbase_native_mgm_free - Native physical memory freeing method + * + * @mgm_dev: The memory group manager the request is being made through. + * @group_id: A physical memory group ID, which must be valid but is not used. + * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @page: Address of the struct associated with a page of physical + * memory that was allocated by calling kbase_native_mgm_alloc + * with the same argument values. + * @order: Page order for physical page size (order=0 means 4 KiB, + * order=9 means 2 MiB). + * + * Delegates all memory freeing requests to the kernel's __free_pages function. + */ +static void kbase_native_mgm_free(struct memory_group_manager_device *mgm_dev, + int group_id, struct page *page, unsigned int order) +{ + CSTD_UNUSED(mgm_dev); + CSTD_UNUSED(group_id); + + __free_pages(page, order); +} + +/** + * kbase_native_mgm_vmf_insert_pfn_prot - Native method to map a page on the CPU + * + * @mgm_dev: The memory group manager the request is being made through. + * @group_id: A physical memory group ID, which must be valid but is not used. + * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @vma: The virtual memory area to insert the page into. + * @addr: An address contained in @vma to assign to the inserted page. + * @pfn: The kernel Page Frame Number to insert at @addr in @vma. + * @pgprot: Protection flags for the inserted page. + * + * Called from a CPU virtual memory page fault handler. Delegates all memory + * mapping requests to the kernel's vmf_insert_pfn_prot function. + * + * Return: Type of fault that occurred or VM_FAULT_NOPAGE if the page table + * entry was successfully installed. + */ +static vm_fault_t kbase_native_mgm_vmf_insert_pfn_prot( + struct memory_group_manager_device *mgm_dev, int group_id, + struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot) +{ + CSTD_UNUSED(mgm_dev); + CSTD_UNUSED(group_id); + + return vmf_insert_pfn_prot(vma, addr, pfn, pgprot); +} + +/** + * kbase_native_mgm_update_gpu_pte - Native method to modify a GPU page table + * entry + * + * @mgm_dev: The memory group manager the request is being made through. + * @group_id: A physical memory group ID, which must be valid but is not used. + * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @mmu_level: The level of the MMU page table where the page is getting mapped. + * @pte: The prepared page table entry. + * + * This function simply returns the @pte without modification. + * + * Return: A GPU page table entry to be stored in a page table. + */ +static u64 +kbase_native_mgm_update_gpu_pte(struct memory_group_manager_device *mgm_dev, + int group_id, int mmu_level, u64 pte) +{ + CSTD_UNUSED(mgm_dev); + CSTD_UNUSED(group_id); + CSTD_UNUSED(mmu_level); + + return pte; +} + +struct memory_group_manager_device kbase_native_mgm_dev = { + .ops = { + .mgm_alloc_page = kbase_native_mgm_alloc, + .mgm_free_page = kbase_native_mgm_free, + .mgm_get_import_memory_id = NULL, + .mgm_vmf_insert_pfn_prot = kbase_native_mgm_vmf_insert_pfn_prot, + .mgm_update_gpu_pte = kbase_native_mgm_update_gpu_pte, + }, + .data = NULL +}; diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_native_mgm.h b/drivers/gpu/arm/b_r26p0/mali_kbase_native_mgm.h new file mode 100644 index 000000000000..431b1f4cb5db --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_native_mgm.h @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_NATIVE_MGM_H_ +#define _KBASE_NATIVE_MGM_H_ + +#include + +/** + * kbase_native_mgm_dev - Native memory group manager device + * + * An implementation of the memory group manager interface that is intended for + * internal use when no platform-specific memory group manager is available. + * + * It ignores the specified group ID and delegates to the kernel's physical + * memory allocation and freeing functions. + */ +extern struct memory_group_manager_device kbase_native_mgm_dev; + +#endif /* _KBASE_NATIVE_MGM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_platform_fake.c b/drivers/gpu/arm/b_r26p0/mali_kbase_platform_fake.c new file mode 100644 index 000000000000..fbb090e6c21f --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_platform_fake.c @@ -0,0 +1,124 @@ +/* + * + * (C) COPYRIGHT 2011-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include +#include + + +/* + * This file is included only for type definitions and functions belonging to + * specific platform folders. Do not add dependencies with symbols that are + * defined somewhere else. + */ +#include + +#define PLATFORM_CONFIG_RESOURCE_COUNT 4 +#define PLATFORM_CONFIG_IRQ_RES_COUNT 3 + +static struct platform_device *mali_device; + +#ifndef CONFIG_OF +/** + * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources + * + * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function + * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT. + * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ. + * + * @param[in] io_resource Input IO resource data + * @param[out] linux_resources Pointer to output array of Linux resource structures + */ +static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources) +{ + if (!io_resources || !linux_resources) { + pr_err("%s: couldn't find proper resources\n", __func__); + return; + } + + memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource)); + + linux_resources[0].start = io_resources->io_memory_region.start; + linux_resources[0].end = io_resources->io_memory_region.end; + linux_resources[0].flags = IORESOURCE_MEM; + + linux_resources[1].start = io_resources->job_irq_number; + linux_resources[1].end = io_resources->job_irq_number; + linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; + + linux_resources[2].start = io_resources->mmu_irq_number; + linux_resources[2].end = io_resources->mmu_irq_number; + linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; + + linux_resources[3].start = io_resources->gpu_irq_number; + linux_resources[3].end = io_resources->gpu_irq_number; + linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; +} +#endif /* CONFIG_OF */ + +int kbase_platform_register(void) +{ + struct kbase_platform_config *config; +#ifndef CONFIG_OF + struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT]; +#endif + int err; + + config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */ + if (config == NULL) { + pr_err("%s: couldn't get platform config\n", __func__); + return -ENODEV; + } + + mali_device = platform_device_alloc("mali", 0); + if (mali_device == NULL) + return -ENOMEM; + +#ifndef CONFIG_OF + kbasep_config_parse_io_resources(config->io_resources, resources); + err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT); + if (err) { + platform_device_put(mali_device); + mali_device = NULL; + return err; + } +#endif /* CONFIG_OF */ + + err = platform_device_add(mali_device); + if (err) { + platform_device_unregister(mali_device); + mali_device = NULL; + return err; + } + + return 0; +} +EXPORT_SYMBOL(kbase_platform_register); + +void kbase_platform_unregister(void) +{ + if (mali_device) + platform_device_unregister(mali_device); +} +EXPORT_SYMBOL(kbase_platform_unregister); diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_pm.c b/drivers/gpu/arm/b_r26p0/mali_kbase_pm.c new file mode 100644 index 000000000000..521bbf69ea7a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_pm.c @@ -0,0 +1,284 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_pm.c + * Base kernel power management APIs + */ + +#include +#include +#include +#include + +#include +#include + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + +#include + +int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags) +{ + return kbase_hwaccess_pm_powerup(kbdev, flags); +} + +void kbase_pm_halt(struct kbase_device *kbdev) +{ + kbase_hwaccess_pm_halt(kbdev); +} + +void kbase_pm_context_active(struct kbase_device *kbdev) +{ + (void)kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); +} + +int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler) +{ + int c; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + dev_dbg(kbdev->dev, "%s - reason = %d, pid = %d\n", __func__, + suspend_handler, current->pid); + kbase_pm_lock(kbdev); + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, suspend_handler)) + return 1; + + if (kbase_pm_is_suspending(kbdev) || + kbase_pm_is_gpu_lost(kbdev)) { +#else + if (kbase_pm_is_suspending(kbdev)) { +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + switch (suspend_handler) { + case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: + if (kbdev->pm.active_count != 0) + break; + /* FALLTHROUGH */ + case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: + kbase_pm_unlock(kbdev); + return 1; + + case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE: + /* FALLTHROUGH */ + default: + KBASE_DEBUG_ASSERT_MSG(false, "unreachable"); + break; + } + } + c = ++kbdev->pm.active_count; + KBASE_KTRACE_ADD(kbdev, PM_CONTEXT_ACTIVE, NULL, c); + + if (c == 1) { + /* First context active: Power on the GPU and + * any cores requested by the policy + */ + kbase_hwaccess_pm_gpu_active(kbdev); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_REF_EVENT); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + kbase_clk_rate_trace_manager_gpu_active(kbdev); + } + + kbase_pm_unlock(kbdev); + dev_dbg(kbdev->dev, "%s %d\n", __func__, kbdev->pm.active_count); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_pm_context_active); + +void kbase_pm_context_idle(struct kbase_device *kbdev) +{ + int c; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + + kbase_pm_lock(kbdev); + + c = --kbdev->pm.active_count; + KBASE_KTRACE_ADD(kbdev, PM_CONTEXT_IDLE, NULL, c); + + KBASE_DEBUG_ASSERT(c >= 0); + + if (c == 0) { + /* Last context has gone idle */ + kbase_hwaccess_pm_gpu_idle(kbdev); + kbase_clk_rate_trace_manager_gpu_idle(kbdev); + + /* Wake up anyone waiting for this to become 0 (e.g. suspend). + * The waiters must synchronize with us by locking the pm.lock + * after waiting. + */ + wake_up(&kbdev->pm.zero_active_count_wait); + } + + kbase_pm_unlock(kbdev); + dev_dbg(kbdev->dev, "%s %d (pid = %d)\n", __func__, + kbdev->pm.active_count, current->pid); +} + +KBASE_EXPORT_TEST_API(kbase_pm_context_idle); + +void kbase_pm_driver_suspend(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + + /* Suspend vinstr. This blocks until the vinstr worker and timer are + * no longer running. + */ + kbase_vinstr_suspend(kbdev->vinstr_ctx); + + /* Disable GPU hardware counters. + * This call will block until counters are disabled. + */ + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + mutex_lock(&kbdev->pm.lock); + if (WARN_ON(kbase_pm_is_suspending(kbdev))) { + mutex_unlock(&kbdev->pm.lock); + return; + } + kbdev->pm.suspending = true; + mutex_unlock(&kbdev->pm.lock); + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) { + int i; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_disjoint_state_up(kbdev); + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbase_job_slot_softstop(kbdev, i, NULL); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + + /* From now on, the active count will drop towards zero. Sometimes, + * it'll go up briefly before going down again. However, once + * it reaches zero it will stay there - guaranteeing that we've idled + * all pm references + */ + /* MALI_SEC_INTEGRATION */ + KBASE_KTRACE_ADD(kbdev, LSI_PM_SUSPEND, NULL, 0); + + /* Suspend job scheduler and associated components, so that it releases all + * the PM active count references */ + kbasep_js_suspend(kbdev); + + /* Wait for the active count to reach zero. This is not the same as + * waiting for a power down, since not all policies power down when this + * reaches zero. + */ + dev_dbg(kbdev->dev, ">wait_event - waiting for active_count == 0 (pid = %d)\n", + current->pid); + wait_event(kbdev->pm.zero_active_count_wait, + kbdev->pm.active_count == 0); + dev_dbg(kbdev->dev, ">wait_event - waiting done\n"); + + /* NOTE: We synchronize with anything that was just finishing a + * kbase_pm_context_idle() call by locking the pm.lock below + */ + kbase_hwaccess_pm_suspend(kbdev); + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) { + mutex_lock(&kbdev->pm.arb_vm_state->vm_state_lock); + kbase_arbiter_pm_vm_stopped(kbdev); + mutex_unlock(&kbdev->pm.arb_vm_state->vm_state_lock); + } +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +} + +void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) +{ + unsigned long flags; + + /* MUST happen before any pm_context_active calls occur */ + kbase_hwaccess_pm_resume(kbdev); + + /* Initial active call, to power on the GPU/cores if needed */ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + (void)kbase_pm_context_active_handle_suspend(kbdev, + (arb_gpu_start ? + KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED : + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE)); +#else + kbase_pm_context_active(kbdev); +#endif + + /* Resume any blocked atoms (which may cause contexts to be scheduled in + * and dependent atoms to run) + */ + kbase_resume_suspended_soft_jobs(kbdev); + + /* Resume the Job Scheduler and associated components, and start running + * atoms + */ + kbasep_js_resume(kbdev); + + /* Matching idle call, to power off the GPU/cores if we didn't actually + * need it and the policy doesn't want it on + */ + kbase_pm_context_idle(kbdev); + + /* Re-enable GPU hardware counters */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Resume vinstr */ + kbase_vinstr_resume(kbdev->vinstr_ctx); +} + +void kbase_pm_suspend(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_SUSPEND_EVENT); + else + kbase_pm_driver_suspend(kbdev); +#else + kbase_pm_driver_suspend(kbdev); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +} + +void kbase_pm_resume(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_RESUME_EVENT); + else + kbase_pm_driver_resume(kbdev, false); +#else + kbase_pm_driver_resume(kbdev, false); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_pm.h b/drivers/gpu/arm/b_r26p0/mali_kbase_pm.h new file mode 100644 index 000000000000..257f959cc5a4 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_pm.h @@ -0,0 +1,244 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_pm.h + * Power management API definitions + */ + +#ifndef _KBASE_PM_H_ +#define _KBASE_PM_H_ + +#include "mali_kbase_hwaccess_pm.h" + +#define PM_ENABLE_IRQS 0x01 +#define PM_HW_ISSUES_DETECT 0x02 + + +/** Initialize the power management framework. + * + * Must be called before any other power management function + * + * @param kbdev The kbase device structure for the device + * (must be a valid pointer) + * + * @return 0 if the power management framework was successfully initialized. + */ +int kbase_pm_init(struct kbase_device *kbdev); + +/** Power up GPU after all modules have been initialized and interrupt handlers installed. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * + * @param flags Flags to pass on to kbase_pm_init_hw + * + * @return 0 if powerup was successful. + */ +int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags); + +/** + * Halt the power management framework. + * Should ensure that no new interrupts are generated, + * but allow any currently running interrupt handlers to complete successfully. + * The GPU is forced off by the time this function returns, regardless of + * whether or not the active power policy asks for the GPU to be powered off. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_halt(struct kbase_device *kbdev); + +/** Terminate the power management framework. + * + * No power management functions may be called after this + * (except @ref kbase_pm_init) + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_term(struct kbase_device *kbdev); + +/** Increment the count of active contexts. + * + * This function should be called when a context is about to submit a job. + * It informs the active power policy that the GPU is going to be in use shortly + * and the policy is expected to start turning on the GPU. + * + * This function will block until the GPU is available. + * + * This function ASSERTS if a suspend is occuring/has occurred whilst this is + * in use. Use kbase_pm_contect_active_unless_suspending() instead. + * + * @note a Suspend is only visible to Kernel threads; user-space threads in a + * syscall cannot witness a suspend, because they are frozen before the suspend + * begins. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_context_active(struct kbase_device *kbdev); + + +/** Handler codes for doing kbase_pm_context_active_handle_suspend() */ +enum kbase_pm_suspend_handler { + /** A suspend is not expected/not possible - this is the same as + * kbase_pm_context_active() + */ + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE, + /** If we're suspending, fail and don't increase the active count */ + KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE, + /** If we're suspending, succeed and allow the active count to increase + * if it didn't go from 0->1 (i.e., we didn't re-activate the GPU). + * + * This should only be used when there is a bounded time on the activation + * (e.g. guarantee it's going to be idled very soon after) + */ + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE, +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /** Special case when Arbiter has notified we can use GPU. + * Active count should always start at 0 in this case. + */ + KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED, +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +}; + +/** Suspend 'safe' variant of kbase_pm_context_active() + * + * If a suspend is in progress, this allows for various different ways of + * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details. + * + * We returns a status code indicating whether we're allowed to keep the GPU + * active during the suspend, depending on the handler code. If the status code + * indicates a failure, the caller must abort whatever operation it was + * attempting, and potentially queue it up for after the OS has resumed. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param suspend_handler The handler code for how to handle a suspend that might occur + * @return zero Indicates success + * @return non-zero Indicates failure due to the system being suspending/suspended. + */ +int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler); + +/** Decrement the reference count of active contexts. + * + * This function should be called when a context becomes idle. + * After this call the GPU may be turned off by the power policy so the calling + * code should ensure that it does not access the GPU's registers. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_context_idle(struct kbase_device *kbdev); + +/* NOTE: kbase_pm_is_active() is in mali_kbase.h, because it is an inline + * function + */ + +/** + * Suspend the GPU and prevent any further register accesses to it from Kernel + * threads. + * + * This is called in response to an OS suspend event, and calls into the various + * kbase components to complete the suspend. + * + * @note the mechanisms used here rely on all user-space threads being frozen + * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up + * the GPU e.g. via atom submission. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_suspend(struct kbase_device *kbdev); + +/** + * Resume the GPU, allow register accesses to it, and resume running atoms on + * the GPU. + * + * This is called in response to an OS resume event, and calls into the various + * kbase components to complete the resume. + * + * Also called when using VM arbiter, when GPU access has been granted. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_resume(struct kbase_device *kbdev); + +/** + * kbase_pm_vsync_callback - vsync callback + * + * @buffer_updated: 1 if a new frame was displayed, 0 otherwise + * @data: Pointer to the kbase device as returned by kbase_find_device() + * + * Callback function used to notify the power management code that a vsync has + * occurred on the display. + */ +void kbase_pm_vsync_callback(int buffer_updated, void *data); + +/** + * kbase_pm_driver_suspend() - Put GPU and driver in suspend state + * @param kbdev The kbase device structure for the device + * (must be a valid pointer) + * + * Suspend the GPU and prevent any further register accesses to it from Kernel + * threads. + * + * This is called in response to an OS suspend event, and calls into the various + * kbase components to complete the suspend. + * + * Despite kbase_pm_suspend(), it will ignore to update Arbiter + * status if MALI_ARBITER_SUPPORT is enabled. + * + * @note the mechanisms used here rely on all user-space threads being frozen + * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up + * the GPU e.g. via atom submission. + */ +void kbase_pm_driver_suspend(struct kbase_device *kbdev); + +/** + * kbase_pm_driver_resume() - Put GPU and driver in resume + * @param kbdev The kbase device structure for the device + * (must be a valid pointer) + * + * Resume the GPU, allow register accesses to it, and resume running atoms on + * the GPU. + * + * This is called in response to an OS resume event, and calls into the various + * kbase components to complete the resume. + * + * Also called when using VM arbiter, when GPU access has been granted. + * + * Despite kbase_pm_resume(), it will ignore to update Arbiter + * status if MALI_ARBITER_SUPPORT is enabled. + */ +void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start); + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +/** + * kbase_pm_handle_gpu_lost() - Handle GPU Lost for the VM + * @kbdev: Device pointer + * + * Handles the case that the Arbiter has forced the GPU away from the VM, + * so that interrupts will not be received and registers are no longer + * accessible because replaced by dummy RAM. + * Kill any running tasks and put the driver into a GPU powered-off state. + */ +void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + +#endif /* _KBASE_PM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_regs_history_debugfs.c b/drivers/gpu/arm/b_r26p0/mali_kbase_regs_history_debugfs.c new file mode 100644 index 000000000000..1d114a65a6d3 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_regs_history_debugfs.c @@ -0,0 +1,136 @@ +/* + * + * (C) COPYRIGHT 2016, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase.h" + +#include "mali_kbase_regs_history_debugfs.h" + +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) + +#include + + +static int regs_history_size_get(void *data, u64 *val) +{ + struct kbase_io_history *const h = data; + + *val = h->size; + + return 0; +} + +static int regs_history_size_set(void *data, u64 val) +{ + struct kbase_io_history *const h = data; + + return kbase_io_history_resize(h, (u16)val); +} + + +DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops, + regs_history_size_get, + regs_history_size_set, + "%llu\n"); + + +/** + * regs_history_show - show callback for the register access history file. + * + * @sfile: The debugfs entry + * @data: Data associated with the entry + * + * This function is called to dump all recent accesses to the GPU registers. + * + * @return 0 if successfully prints data in debugfs entry file, failure + * otherwise + */ +static int regs_history_show(struct seq_file *sfile, void *data) +{ + struct kbase_io_history *const h = sfile->private; + u16 i; + size_t iters; + unsigned long flags; + + if (!h->enabled) { + seq_puts(sfile, "The register access history is disabled\n"); + goto out; + } + + spin_lock_irqsave(&h->lock, flags); + + iters = (h->size > h->count) ? h->count : h->size; + seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters, + h->count); + for (i = 0; i < iters; ++i) { + struct kbase_io_access *io = + &h->buf[(h->count - iters + i) % h->size]; + char const access = (io->addr & 1) ? 'w' : 'r'; + + seq_printf(sfile, "%6i: %c: reg 0x%016lx val %08x\n", i, access, + (unsigned long)(io->addr & ~0x1), io->value); + } + + spin_unlock_irqrestore(&h->lock, flags); + +out: + return 0; +} + + +/** + * regs_history_open - open operation for regs_history debugfs file + * + * @in: &struct inode pointer + * @file: &struct file pointer + * + * @return file descriptor + */ +static int regs_history_open(struct inode *in, struct file *file) +{ + return single_open(file, ®s_history_show, in->i_private); +} + + +static const struct file_operations regs_history_fops = { + .owner = THIS_MODULE, + .open = ®s_history_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + + +void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, + &kbdev->io_history.enabled); + debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, + &kbdev->io_history, ®s_history_size_fops); + debugfs_create_file("regs_history", S_IRUGO, + kbdev->mali_debugfs_directory, &kbdev->io_history, + ®s_history_fops); +} + + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_regs_history_debugfs.h b/drivers/gpu/arm/b_r26p0/mali_kbase_regs_history_debugfs.h new file mode 100644 index 000000000000..a0078cb8600d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_regs_history_debugfs.h @@ -0,0 +1,55 @@ +/* + * + * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Header file for register access history support via debugfs + * + * This interface is made available via /sys/kernel/debug/mali#/regs_history*. + * + * Usage: + * - regs_history_enabled: whether recording of register accesses is enabled. + * Write 'y' to enable, 'n' to disable. + * - regs_history_size: size of the register history buffer, must be > 0 + * - regs_history: return the information about last accesses to the registers. + */ + +#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H +#define _KBASE_REGS_HISTORY_DEBUGFS_H + +struct kbase_device; + +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) + +/** + * kbasep_regs_history_debugfs_init - add debugfs entries for register history + * + * @kbdev: Pointer to kbase_device containing the register history + */ +void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev); + +#else /* CONFIG_DEBUG_FS */ + +#define kbasep_regs_history_debugfs_init CSTD_NOP + +#endif /* CONFIG_DEBUG_FS */ + +#endif /*_KBASE_REGS_HISTORY_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_reset_gpu.h b/drivers/gpu/arm/b_r26p0/mali_kbase_reset_gpu.h new file mode 100644 index 000000000000..df72eecc5e0f --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_reset_gpu.h @@ -0,0 +1,139 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_RESET_GPU_H_ +#define _KBASE_RESET_GPU_H_ + +/** + * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU. + * @kbdev: Device pointer + * + * Caller is expected to hold the kbdev->hwaccess_lock. + * + * Return: a boolean which should be interpreted as follows: + * - true - Prepared for reset, kbase_reset_gpu should be called. + * - false - Another thread is performing a reset, kbase_reset_gpu should + * not be called. + */ +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev); + +/** + * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. + * @kbdev: Device pointer + * + * Return: a boolean which should be interpreted as follows: + * - true - Prepared for reset, kbase_reset_gpu should be called. + * - false - Another thread is performing a reset, kbase_reset_gpu should + * not be called. + */ +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu - Reset the GPU + * @kbdev: Device pointer + * + * This function should be called after kbase_prepare_to_reset_gpu if it returns + * true. It should never be called without a corresponding call to + * kbase_prepare_to_reset_gpu (only on Job Manager GPUs). + * + * After this function is called the caller should call kbase_reset_gpu_wait() + * to know when the reset has completed. + */ +void kbase_reset_gpu(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_locked - Reset the GPU + * @kbdev: Device pointer + * + * This function should be called after kbase_prepare_to_reset_gpu_locked if it + * returns true. It should never be called without a corresponding call to + * kbase_prepare_to_reset_gpu (only on Job Manager GPUs). + * Caller is expected to hold the kbdev->hwaccess_lock. + * + * After this function is called, the caller should call kbase_reset_gpu_wait() + * to know when the reset has completed. + */ +void kbase_reset_gpu_locked(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_silent - Reset the GPU silently + * @kbdev: Device pointer + * + * Reset the GPU without trying to cancel jobs (applicable to Job Manager GPUs) + * and don't emit messages into the kernel log while doing the reset. + * + * This function should be used in cases where we are doing a controlled reset + * of the GPU as part of normal processing (e.g. exiting protected mode) where + * the driver will have ensured the scheduler has been idled and all other + * users of the GPU (e.g. instrumentation) have been suspended. + * + * Return: 0 if the reset was started successfully + * -EAGAIN if another reset is currently in progress + */ +int kbase_reset_gpu_silent(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_is_active - Reports if the GPU is being reset + * @kbdev: Device pointer + * + * Return: True if the GPU is in the process of being reset (or if the reset of + * GPU failed, not applicable to Job Manager GPUs). + */ +bool kbase_reset_gpu_is_active(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_wait - Wait for a GPU reset to complete + * @kbdev: Device pointer + * + * This function may wait indefinitely. + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_reset_gpu_wait(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_init - Initialize the GPU reset handling mechanism. + * + * @kbdev: Device pointer + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_reset_gpu_init(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_term - Terminate the GPU reset handling mechanism. + * + * @kbdev: Device pointer + */ +void kbase_reset_gpu_term(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_register_complete_cb - Register the callback function to be + * invoked on completion of GPU reset. + * + * @kbdev: Device pointer + * @complete_callback: Pointer to the callback function + */ +void kbase_reset_gpu_register_complete_cb(struct kbase_device *kbdev, + int (*complete_callback)(struct kbase_device *kbdev)); + +#endif diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_smc.c b/drivers/gpu/arm/b_r26p0/mali_kbase_smc.c new file mode 100644 index 000000000000..b5c7b1289846 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_smc.c @@ -0,0 +1,91 @@ +/* + * + * (C) COPYRIGHT 2015, 2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifdef CONFIG_ARM64 + +#include +#include + +#include + +/* __asmeq is not available on Kernel versions >= 4.20 */ +#ifndef __asmeq +/* + * This is used to ensure the compiler did actually allocate the register we + * asked it for some inline assembly sequences. Apparently we can't trust the + * compiler from one version to another so a bit of paranoia won't hurt. This + * string is meant to be concatenated with the inline asm string and will + * cause compilation to stop on mismatch. (for details, see gcc PR 15089) + */ +#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" +#endif + +static noinline u64 invoke_smc_fid(u64 function_id, + u64 arg0, u64 arg1, u64 arg2) +{ + register u64 x0 asm("x0") = function_id; + register u64 x1 asm("x1") = arg0; + register u64 x2 asm("x2") = arg1; + register u64 x3 asm("x3") = arg2; + + asm volatile( + __asmeq("%0", "x0") + __asmeq("%1", "x1") + __asmeq("%2", "x2") + __asmeq("%3", "x3") + "smc #0\n" + : "+r" (x0) + : "r" (x1), "r" (x2), "r" (x3)); + + return x0; +} + +u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2) +{ + /* Is fast call (bit 31 set) */ + KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL); + /* bits 16-23 must be zero for fast calls */ + KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0); + + return invoke_smc_fid(fid, arg0, arg1, arg2); +} + +u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, + u64 arg0, u64 arg1, u64 arg2) +{ + u32 fid = 0; + + /* Only the six bits allowed should be used. */ + KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0); + + fid |= SMC_FAST_CALL; /* Bit 31: Fast call */ + if (smc64) + fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */ + fid |= oen; /* Bit 29:24: OEN */ + /* Bit 23:16: Must be zero for fast calls */ + fid |= (function_number); /* Bit 15:0: function number */ + + return kbase_invoke_smc_fid(fid, arg0, arg1, arg2); +} + +#endif /* CONFIG_ARM64 */ + diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_smc.h b/drivers/gpu/arm/b_r26p0/mali_kbase_smc.h new file mode 100644 index 000000000000..221eb21a8c7f --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_smc.h @@ -0,0 +1,72 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#ifndef _KBASE_SMC_H_ +#define _KBASE_SMC_H_ + +#ifdef CONFIG_ARM64 + +#include + +#define SMC_FAST_CALL (1 << 31) +#define SMC_64 (1 << 30) + +#define SMC_OEN_OFFSET 24 +#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */ +#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET) +#define SMC_OEN_STD (4 << SMC_OEN_OFFSET) + + +/** + * kbase_invoke_smc_fid - Perform a secure monitor call + * @fid: The SMC function to call, see SMC Calling convention. + * @arg0: First argument to the SMC. + * @arg1: Second argument to the SMC. + * @arg2: Third argument to the SMC. + * + * See SMC Calling Convention for details. + * + * Return: the return value from the SMC. + */ +u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2); + +/** + * kbase_invoke_smc_fid - Perform a secure monitor call + * @oen: Owning Entity number (SIP, STD etc). + * @function_number: The function number within the OEN. + * @smc64: use SMC64 calling convention instead of SMC32. + * @arg0: First argument to the SMC. + * @arg1: Second argument to the SMC. + * @arg2: Third argument to the SMC. + * + * See SMC Calling Convention for details. + * + * Return: the return value from the SMC call. + */ +u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, + u64 arg0, u64 arg1, u64 arg2); + +#endif /* CONFIG_ARM64 */ + +#endif /* _KBASE_SMC_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c b/drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c new file mode 100644 index 000000000000..639278363427 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c @@ -0,0 +1,1816 @@ +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#include + +#include +#include +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * @file mali_kbase_softjobs.c + * + * This file implements the logic behind software only jobs that are + * executed within the driver rather than being handed over to the GPU. + */ + +static void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + unsigned long lflags; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_add_tail(&katom->queue, &kctx->waiting_soft_jobs); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + unsigned long lflags; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_del(&katom->queue); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + + /* Record the start time of this atom so we could cancel it at + * the right time. + */ + katom->start_timestamp = ktime_get(); + + /* Add the atom to the waiting list before the timer is + * (re)started to make sure that it gets processed. + */ + kbasep_add_waiting_soft_job(katom); + + /* Schedule timeout of this atom after a period if it is not active */ + if (!timer_pending(&kctx->soft_job_timeout)) { + int timeout_ms = atomic_read( + &kctx->kbdev->js_data.soft_job_timeout_ms); + mod_timer(&kctx->soft_job_timeout, + jiffies + msecs_to_jiffies(timeout_ms)); + } +} + +static int kbasep_read_soft_event_status( + struct kbase_context *kctx, u64 evt, unsigned char *status) +{ + unsigned char *mapped_evt; + struct kbase_vmap_struct map; + + mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); + if (!mapped_evt) + return -EFAULT; + + *status = *mapped_evt; + + kbase_vunmap(kctx, &map); + + return 0; +} + +static int kbasep_write_soft_event_status( + struct kbase_context *kctx, u64 evt, unsigned char new_status) +{ + unsigned char *mapped_evt; + struct kbase_vmap_struct map; + + if ((new_status != BASE_JD_SOFT_EVENT_SET) && + (new_status != BASE_JD_SOFT_EVENT_RESET)) + return -EINVAL; + + mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); + if (!mapped_evt) + return -EFAULT; + + *mapped_evt = new_status; + + kbase_vunmap(kctx, &map); + + return 0; +} + +static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) +{ + struct kbase_vmap_struct map; + void *user_result; + struct timespec64 ts; + struct base_dump_cpu_gpu_counters data; + u64 system_time; + u64 cycle_counter; + u64 jc = katom->jc; + struct kbase_context *kctx = katom->kctx; + int pm_active_err; + + memset(&data, 0, sizeof(data)); + + /* Take the PM active reference as late as possible - otherwise, it could + * delay suspend until we process the atom (which may be at the end of a + * long chain of dependencies */ + pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); + if (pm_active_err) { + struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; + + /* We're suspended - queue this on the list of suspended jobs + * Use dep_item[1], because dep_item[0] was previously in use + * for 'waiting_soft_jobs'. + */ + mutex_lock(&js_devdata->runpool_mutex); + list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list); + mutex_unlock(&js_devdata->runpool_mutex); + + /* Also adding this to the list of waiting soft job */ + kbasep_add_waiting_soft_job(katom); + + return pm_active_err; + } + + kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, + &ts); + + kbase_pm_context_idle(kctx->kbdev); + + data.sec = ts.tv_sec; + data.usec = ts.tv_nsec / 1000; + data.system_time = system_time; + data.cycle_counter = cycle_counter; + + /* Assume this atom will be cancelled until we know otherwise */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + /* GPU_WR access is checked on the range for returning the result to + * userspace for the following reasons: + * - security, this is currently how imported user bufs are checked. + * - userspace ddk guaranteed to assume region was mapped as GPU_WR */ + user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map); + if (!user_result) + return 0; + + memcpy(user_result, &data, sizeof(data)); + + kbase_vunmap(kctx, &map); + + /* Atom was fine - mark it as done */ + katom->event_code = BASE_JD_EVENT_DONE; + + return 0; +} + +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +/* Called by the explicit fence mechanism when a fence wait has completed */ +void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + + mutex_lock(&kctx->jctx.lock); + kbasep_remove_waiting_soft_job(katom); + kbase_finish_soft_job(katom); + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(kctx->kbdev); + mutex_unlock(&kctx->jctx.lock); +} +#endif + +static void kbasep_soft_event_complete_job(struct work_struct *work) +{ + struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, + work); + struct kbase_context *kctx = katom->kctx; + int resched; + + mutex_lock(&kctx->jctx.lock); + resched = jd_done_nolock(katom, NULL); + mutex_unlock(&kctx->jctx.lock); + + if (resched) + kbase_js_sched_all(kctx->kbdev); +} + +void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt) +{ + int cancel_timer = 1; + struct list_head *entry, *tmp; + unsigned long lflags; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { + struct kbase_jd_atom *katom = list_entry( + entry, struct kbase_jd_atom, queue); + + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_EVENT_WAIT: + if (katom->jc == evt) { + list_del(&katom->queue); + + katom->event_code = BASE_JD_EVENT_DONE; + INIT_WORK(&katom->work, + kbasep_soft_event_complete_job); + queue_work(kctx->jctx.job_done_wq, + &katom->work); + } else { + /* There are still other waiting jobs, we cannot + * cancel the timer yet. + */ + cancel_timer = 0; + } + break; +#ifdef CONFIG_MALI_FENCE_DEBUG + case BASE_JD_REQ_SOFT_FENCE_WAIT: + /* Keep the timer running if fence debug is enabled and + * there are waiting fence jobs. + */ + cancel_timer = 0; + break; +#endif + } + } + + if (cancel_timer) + del_timer(&kctx->soft_job_timeout); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +#ifdef CONFIG_MALI_FENCE_DEBUG +static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct device *dev = kctx->kbdev->dev; + int i; + + for (i = 0; i < 2; i++) { + struct kbase_jd_atom *dep; + + list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) { + if (dep->status == KBASE_JD_ATOM_STATE_UNUSED || + dep->status == KBASE_JD_ATOM_STATE_COMPLETED) + continue; + + if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) + == BASE_JD_REQ_SOFT_FENCE_TRIGGER) { + /* Found blocked trigger fence. */ + struct kbase_sync_fence_info info; + + if (!kbase_sync_fence_in_info_get(dep, &info)) { + dev_warn(dev, + "\tVictim trigger atom %d fence [%p] %s: %s\n", + kbase_jd_atom_id(kctx, dep), + info.fence, + info.name, + kbase_sync_status_string(info.status)); + } + } + + kbase_fence_debug_check_atom(dep); + } + } +} + +/* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_SEC_JOB_STATUS_CHECK +extern int gpu_job_fence_status_dump(struct sync_file *timeout_sync_file); +#endif +static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct device *dev = katom->kctx->kbdev->dev; + int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms); + unsigned long lflags; + struct kbase_sync_fence_info info; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + +/* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_SEC_JOB_STATUS_CHECK + gpu_job_fence_status_dump(NULL); +#endif + + if (kbase_sync_fence_in_info_get(katom, &info)) { + /* Fence must have signaled just after timeout. */ + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); + return; + } + + dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n", + kctx->tgid, kctx->id, + kbase_jd_atom_id(kctx, katom), + info.fence, timeout_ms); + dev_warn(dev, "\tGuilty fence [%p] %s: %s\n", + info.fence, info.name, + kbase_sync_status_string(info.status)); + + /* Search for blocked trigger atoms */ + kbase_fence_debug_check_atom(katom); + + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); + + kbase_sync_fence_in_dump(katom); +} + +struct kbase_fence_debug_work { + struct kbase_jd_atom *katom; + struct work_struct work; +}; + +static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work) +{ + struct kbase_fence_debug_work *w = container_of(work, + struct kbase_fence_debug_work, work); + struct kbase_jd_atom *katom = w->katom; + struct kbase_context *kctx = katom->kctx; + + mutex_lock(&kctx->jctx.lock); + kbase_fence_debug_wait_timeout(katom); + mutex_unlock(&kctx->jctx.lock); + + kfree(w); +} + +static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom) +{ + struct kbase_fence_debug_work *work; + struct kbase_context *kctx = katom->kctx; + + /* Enqueue fence debug worker. Use job_done_wq to get + * debug print ordered with job completion. + */ + work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC); + /* Ignore allocation failure. */ + if (work) { + work->katom = katom; + INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker); + queue_work(kctx->jctx.job_done_wq, &work->work); + } +} +#endif /* CONFIG_MALI_FENCE_DEBUG */ + +void kbasep_soft_job_timeout_worker(struct timer_list *timer) +{ + struct kbase_context *kctx = container_of(timer, struct kbase_context, + soft_job_timeout); + u32 timeout_ms = (u32)atomic_read( + &kctx->kbdev->js_data.soft_job_timeout_ms); + ktime_t cur_time = ktime_get(); + bool restarting = false; + unsigned long lflags; + struct list_head *entry, *tmp; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { + struct kbase_jd_atom *katom = list_entry(entry, + struct kbase_jd_atom, queue); + s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time, + katom->start_timestamp)); + + if (elapsed_time < (s64)timeout_ms) { + restarting = true; + continue; + } + + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_EVENT_WAIT: + /* Take it out of the list to ensure that it + * will be cancelled in all cases + */ + list_del(&katom->queue); + + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + INIT_WORK(&katom->work, kbasep_soft_event_complete_job); + queue_work(kctx->jctx.job_done_wq, &katom->work); + break; +#ifdef CONFIG_MALI_FENCE_DEBUG + case BASE_JD_REQ_SOFT_FENCE_WAIT: + kbase_fence_debug_timeout(katom); + break; +#endif + } + } + + if (restarting) + mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms)); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +static int kbasep_soft_event_wait(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + unsigned char status; + + /* The status of this soft-job is stored in jc */ + if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + return 0; + } + + if (status == BASE_JD_SOFT_EVENT_SET) + return 0; /* Event already set, nothing to do */ + + kbasep_add_waiting_with_timeout(katom); + + return 1; +} + +static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom, + unsigned char new_status) +{ + /* Complete jobs waiting on the same event */ + struct kbase_context *kctx = katom->kctx; + + if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + return; + } + + if (new_status == BASE_JD_SOFT_EVENT_SET) + kbasep_complete_triggered_soft_events(kctx, katom->jc); +} + +/** + * kbase_soft_event_update() - Update soft event state + * @kctx: Pointer to context + * @event: Event to update + * @new_status: New status value of event + * + * Update the event, and wake up any atoms waiting for the event. + * + * Return: 0 on success, a negative error code on failure. + */ +int kbase_soft_event_update(struct kbase_context *kctx, + u64 event, + unsigned char new_status) +{ + int err = 0; + + mutex_lock(&kctx->jctx.lock); + + if (kbasep_write_soft_event_status(kctx, event, new_status)) { + err = -ENOENT; + goto out; + } + + if (new_status == BASE_JD_SOFT_EVENT_SET) + kbasep_complete_triggered_soft_events(kctx, event); + +out: + mutex_unlock(&kctx->jctx.lock); + + return err; +} + +static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) +{ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(katom->kctx->kbdev); +} + +static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers = katom->softjob_data; + unsigned int i; + unsigned int nr = katom->nr_extres; + + if (!buffers) + return; + + kbase_gpu_vm_lock(katom->kctx); + for (i = 0; i < nr; i++) { + int p; + struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc; + + if (!buffers[i].pages) + break; + for (p = 0; p < buffers[i].nr_pages; p++) { + struct page *pg = buffers[i].pages[p]; + + if (pg) + put_page(pg); + } + if (buffers[i].is_vmalloc) + vfree(buffers[i].pages); + else + kfree(buffers[i].pages); + if (gpu_alloc) { + switch (gpu_alloc->type) { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + { + kbase_free_user_buffer(&buffers[i]); + break; + } + default: + /* Nothing to be done. */ + break; + } + kbase_mem_phy_alloc_put(gpu_alloc); + } + } + kbase_gpu_vm_unlock(katom->kctx); + kfree(buffers); + + katom->softjob_data = NULL; +} + +static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers; + struct base_jd_debug_copy_buffer *user_buffers = NULL; + unsigned int i; + unsigned int nr = katom->nr_extres; + int ret = 0; + void __user *user_structs = (void __user *)(uintptr_t)katom->jc; + + if (!user_structs) + return -EINVAL; + + buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL); + if (!buffers) { + ret = -ENOMEM; + goto out_cleanup; + } + katom->softjob_data = buffers; + + user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL); + + if (!user_buffers) { + ret = -ENOMEM; + goto out_cleanup; + } + + ret = copy_from_user(user_buffers, user_structs, + sizeof(*user_buffers)*nr); + if (ret) { + ret = -EFAULT; + goto out_cleanup; + } + + for (i = 0; i < nr; i++) { + u64 addr = user_buffers[i].address; + u64 page_addr = addr & PAGE_MASK; + u64 end_page_addr = addr + user_buffers[i].size - 1; + u64 last_page_addr = end_page_addr & PAGE_MASK; + int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1; + int pinned_pages; + struct kbase_va_region *reg; + struct base_external_resource user_extres; + + if (!addr) + continue; + + if (last_page_addr < page_addr) { + ret = -EINVAL; + goto out_cleanup; + } + + buffers[i].nr_pages = nr_pages; + buffers[i].offset = addr & ~PAGE_MASK; + if (buffers[i].offset >= PAGE_SIZE) { + ret = -EINVAL; + goto out_cleanup; + } + buffers[i].size = user_buffers[i].size; + + if (nr_pages > (KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD / + sizeof(struct page *))) { + buffers[i].is_vmalloc = true; + buffers[i].pages = vzalloc(nr_pages * + sizeof(struct page *)); + } else { + buffers[i].is_vmalloc = false; + buffers[i].pages = kcalloc(nr_pages, + sizeof(struct page *), GFP_KERNEL); + } + + if (!buffers[i].pages) { + ret = -ENOMEM; + goto out_cleanup; + } + + pinned_pages = get_user_pages_fast(page_addr, + nr_pages, + 1, /* Write */ + buffers[i].pages); + if (pinned_pages < 0) { + /* get_user_pages_fast has failed - page array is not + * valid. Don't try to release any pages. + */ + buffers[i].nr_pages = 0; + + ret = pinned_pages; + goto out_cleanup; + } + if (pinned_pages != nr_pages) { + /* Adjust number of pages, so that we only attempt to + * release pages in the array that we know are valid. + */ + buffers[i].nr_pages = pinned_pages; + + ret = -EINVAL; + goto out_cleanup; + } + + user_extres = user_buffers[i].extres; + if (user_extres.ext_resource == 0ULL) { + ret = -EINVAL; + goto out_cleanup; + } + + kbase_gpu_vm_lock(katom->kctx); + reg = kbase_region_tracker_find_region_enclosing_address( + katom->kctx, user_extres.ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + + if (kbase_is_region_invalid_or_free(reg) || + reg->gpu_alloc == NULL) { + ret = -EINVAL; + goto out_unlock; + } + + buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + buffers[i].nr_extres_pages = reg->nr_pages; + + if (reg->nr_pages*PAGE_SIZE != buffers[i].size) + dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n"); + + switch (reg->gpu_alloc->type) { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + { + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + unsigned long nr_pages = + alloc->imported.user_buf.nr_pages; + + if (alloc->imported.user_buf.mm != current->mm) { + ret = -EINVAL; + goto out_unlock; + } + buffers[i].extres_pages = kcalloc(nr_pages, + sizeof(struct page *), GFP_KERNEL); + if (!buffers[i].extres_pages) { + ret = -ENOMEM; + goto out_unlock; + } + + ret = get_user_pages_fast( + alloc->imported.user_buf.address, + nr_pages, 0, + buffers[i].extres_pages); + if (ret != nr_pages) { + /* Adjust number of pages, so that we only + * attempt to release pages in the array that we + * know are valid. + */ + if (ret < 0) + buffers[i].nr_extres_pages = 0; + else + buffers[i].nr_extres_pages = ret; + + goto out_unlock; + } + ret = 0; + break; + } + default: + /* Nothing to be done. */ + break; + } + kbase_gpu_vm_unlock(katom->kctx); + } + kfree(user_buffers); + + return ret; + +out_unlock: + kbase_gpu_vm_unlock(katom->kctx); + +out_cleanup: + /* Frees allocated memory for kbase_debug_copy_job struct, including + * members, and sets jc to 0 */ + kbase_debug_copy_finish(katom); + kfree(user_buffers); + + return ret; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) +static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, + unsigned long page_num, struct page **page) +{ + struct sg_table *sgt = gpu_alloc->imported.umm.sgt; + struct sg_page_iter sg_iter; + unsigned long page_index = 0; + + if (WARN_ON(gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) + return NULL; + + if (!sgt) + return NULL; + + if (WARN_ON(page_num >= gpu_alloc->nents)) + return NULL; + + for_each_sg_page(sgt->sgl, &sg_iter, sgt->nents, 0) { + if (page_index == page_num) { + *page = sg_page_iter_page(&sg_iter); + + return kmap(*page); + } + page_index++; + } + + return NULL; +} +#endif + +int kbase_mem_copy_from_extres(struct kbase_context *kctx, + struct kbase_debug_copy_buffer *buf_data) +{ + unsigned int i; + unsigned int target_page_nr = 0; + struct page **pages = buf_data->pages; + u64 offset = buf_data->offset; + size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE; + size_t to_copy = min(extres_size, buf_data->size); + struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc; + int ret = 0; + size_t dma_to_copy; + + KBASE_DEBUG_ASSERT(pages != NULL); + + kbase_gpu_vm_lock(kctx); + if (!gpu_alloc) { + ret = -EINVAL; + goto out_unlock; + } + + switch (gpu_alloc->type) { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + { + for (i = 0; i < buf_data->nr_extres_pages && + target_page_nr < buf_data->nr_pages; i++) { + struct page *pg = buf_data->extres_pages[i]; + void *extres_page = kmap(pg); + + if (extres_page) { + ret = kbase_mem_copy_to_pinned_user_pages( + pages, extres_page, &to_copy, + buf_data->nr_pages, + &target_page_nr, offset); + kunmap(pg); + if (ret) + goto out_unlock; + } + } + } + break; + case KBASE_MEM_TYPE_IMPORTED_UMM: { + struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf; + + KBASE_DEBUG_ASSERT(dma_buf != NULL); + if (dma_buf->size > buf_data->nr_extres_pages * PAGE_SIZE) + dev_warn(kctx->kbdev->dev, "External resources buffer size mismatch"); + + dma_to_copy = min(dma_buf->size, + (size_t)(buf_data->nr_extres_pages * PAGE_SIZE)); + ret = dma_buf_begin_cpu_access(dma_buf, +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) + 0, dma_to_copy, +#endif + DMA_FROM_DEVICE); + if (ret) + goto out_unlock; + + for (i = 0; i < dma_to_copy/PAGE_SIZE && + target_page_nr < buf_data->nr_pages; i++) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) + struct page *pg; + void *extres_page = dma_buf_kmap_page(gpu_alloc, i, &pg); +#else + void *extres_page = dma_buf_kmap(dma_buf, i); +#endif + if (extres_page) { + ret = kbase_mem_copy_to_pinned_user_pages( + pages, extres_page, &to_copy, + buf_data->nr_pages, + &target_page_nr, offset); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) + kunmap(pg); +#else + dma_buf_kunmap(dma_buf, i, extres_page); +#endif + if (ret) + goto out_unlock; + } + } + dma_buf_end_cpu_access(dma_buf, +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) + 0, dma_to_copy, +#endif + DMA_FROM_DEVICE); + break; + } + default: + ret = -EINVAL; + } +out_unlock: + kbase_gpu_vm_unlock(kctx); + return ret; +} + +static int kbase_debug_copy(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers = katom->softjob_data; + unsigned int i; + + if (WARN_ON(!buffers)) + return -EINVAL; + + for (i = 0; i < katom->nr_extres; i++) { + int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]); + + if (res) + return res; + } + + return 0; +} + +#define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7) + +int kbasep_jit_alloc_validate(struct kbase_context *kctx, + struct base_jit_alloc_info *info) +{ + int j; + /* If the ID is zero, then fail the job */ + if (info->id == 0) + return -EINVAL; + + /* Sanity check that the PA fits within the VA */ + if (info->va_pages < info->commit_pages) + return -EINVAL; + + /* Ensure the GPU address is correctly aligned */ + if ((info->gpu_alloc_addr & KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT) != 0) + return -EINVAL; + + /* Interface version 2 (introduced with kernel driver version 11.5) + * onward has padding and a flags member to validate. + * + * Note: To support earlier versions the extra bytes will have been set + * to 0 by the caller. + */ + + /* Check padding is all zeroed */ + for (j = 0; j < sizeof(info->padding); j++) { + if (info->padding[j] != 0) + return -EINVAL; + } + + /* Only valid flags shall be set */ + if (info->flags & ~(BASE_JIT_ALLOC_VALID_FLAGS)) + return -EINVAL; + +#if !MALI_JIT_PRESSURE_LIMIT_BASE + /* If just-in-time memory allocation pressure limit feature is disabled, + * heap_info_gpu_addr must be zeroed-out + */ + if (info->heap_info_gpu_addr) + return -EINVAL; +#endif + + /* If BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE is set, heap_info_gpu_addr + * cannot be 0 + */ + if ((info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) && + !info->heap_info_gpu_addr) + return -EINVAL; + + return 0; +} + + +#if (KERNEL_VERSION(3, 18, 63) > LINUX_VERSION_CODE) +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) +#endif + +/* + * Sizes of user data to copy for each just-in-time memory interface version + * + * In interface version 2 onwards this is the same as the struct size, allowing + * copying of arrays of structures from userspace. + * + * In interface version 1 the structure size was variable, and hence arrays of + * structures cannot be supported easily, and were not a feature present in + * version 1 anyway. + */ +static const size_t jit_info_copy_size_for_jit_version[] = { + /* in jit_version 1, the structure did not have any end padding, hence + * it could be a different size on 32 and 64-bit clients. We therefore + * do not copy past the last member + */ + [1] = offsetofend(struct base_jit_alloc_info_10_2, id), + [2] = sizeof(struct base_jit_alloc_info_11_5), + [3] = sizeof(struct base_jit_alloc_info) +}; + +static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) +{ + __user u8 *data = (__user u8 *)(uintptr_t) katom->jc; + struct base_jit_alloc_info *info; + struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + u32 count; + int ret; + u32 i; + size_t jit_info_user_copy_size; + + WARN_ON(kctx->jit_version >= + ARRAY_SIZE(jit_info_copy_size_for_jit_version)); + jit_info_user_copy_size = + jit_info_copy_size_for_jit_version[kctx->jit_version]; + WARN_ON(jit_info_user_copy_size > sizeof(*info)); + + /* For backwards compatibility, and to prevent reading more than 1 jit + * info struct on jit version 1 + */ + if (katom->nr_extres == 0 || kctx->jit_version == 1) + katom->nr_extres = 1; + count = katom->nr_extres; + + /* Sanity checks */ + if (!data || count > kctx->jit_max_allocations || + count > ARRAY_SIZE(kctx->jit_alloc)) { + ret = -EINVAL; + goto fail; + } + + /* Copy the information for safe access and future storage */ + info = kmalloc_array(count, sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto fail; + } + + katom->softjob_data = info; + + for (i = 0; i < count; i++, info++, data += jit_info_user_copy_size) { + if (copy_from_user(info, data, jit_info_user_copy_size) != 0) { + ret = -EINVAL; + goto free_info; + } + /* Clear any remaining bytes when user struct is smaller than + * kernel struct. For jit version 1, this also clears the + * padding bytes + */ + memset(((u8 *)info) + jit_info_user_copy_size, 0, + sizeof(*info) - jit_info_user_copy_size); + + ret = kbasep_jit_alloc_validate(kctx, info); + if (ret) + goto free_info; + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(kbdev, katom, + info->va_pages, info->commit_pages, info->extent, + info->id, info->bin_id, info->max_allocations, + info->flags, info->usage_id); + } + + katom->jit_blocked = false; + + lockdep_assert_held(&kctx->jctx.lock); + list_add_tail(&katom->jit_node, &kctx->jctx.jit_atoms_head); + + /* + * Note: + * The provided info->gpu_alloc_addr isn't validated here as + * userland can cache allocations which means that even + * though the region is valid it doesn't represent the + * same thing it used to. + * + * Complete validation of va_pages, commit_pages and extent + * isn't done here as it will be done during the call to + * kbase_mem_alloc. + */ + return 0; + +free_info: + kfree(katom->softjob_data); + katom->softjob_data = NULL; +fail: + return ret; +} + +static u8 *kbase_jit_free_get_ids(struct kbase_jd_atom *katom) +{ + if (WARN_ON((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) != + BASE_JD_REQ_SOFT_JIT_FREE)) + return NULL; + + return (u8 *) katom->softjob_data; +} + +static void kbase_jit_add_to_pending_alloc_list(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct list_head *target_list_head = NULL; + struct kbase_jd_atom *entry; + + list_for_each_entry(entry, &kctx->jctx.jit_pending_alloc, queue) { + if (katom->age < entry->age) { + target_list_head = &entry->queue; + break; + } + } + + if (target_list_head == NULL) + target_list_head = &kctx->jctx.jit_pending_alloc; + + list_add_tail(&katom->queue, target_list_head); +} + +static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct base_jit_alloc_info *info; + struct kbase_va_region *reg; + struct kbase_vmap_struct mapping; + u64 *ptr, new_addr; + u32 count = katom->nr_extres; + u32 i; + bool ignore_pressure_limit = false; + + trace_sysgraph(SGR_SUBMIT, kctx->id, + kbase_jd_atom_id(kctx, katom)); + + if (katom->jit_blocked) { + list_del(&katom->queue); + katom->jit_blocked = false; + } + + info = katom->softjob_data; + if (WARN_ON(!info)) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return 0; + } + + for (i = 0; i < count; i++, info++) { + /* The JIT ID is still in use so fail the allocation */ + if (kctx->jit_alloc[info->id]) { + katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; + return 0; + } + } + +#if MALI_JIT_PRESSURE_LIMIT_BASE + /** + * If this is the only JIT_ALLOC atom in-flight or if JIT pressure limit + * is disabled at the context scope, then bypass JIT pressure limit + * logic in kbase_jit_allocate(). + */ + if (!kbase_ctx_flag(kctx, KCTX_JPL_ENABLED) + || (kctx->jit_current_allocations == 0)) { + ignore_pressure_limit = true; + } +#else + ignore_pressure_limit = true; +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + for (i = 0, info = katom->softjob_data; i < count; i++, info++) { + if (kctx->jit_alloc[info->id]) { + /* The JIT ID is duplicated in this atom. Roll back + * previous allocations and fail. + */ + u32 j; + + info = katom->softjob_data; + for (j = 0; j < i; j++, info++) { + kbase_jit_free(kctx, kctx->jit_alloc[info->id]); + kctx->jit_alloc[info->id] = + KBASE_RESERVED_REG_JIT_ALLOC; + } + + katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; + return 0; + } + + /* Create a JIT allocation */ + reg = kbase_jit_allocate(kctx, info, ignore_pressure_limit); + if (!reg) { + struct kbase_jd_atom *jit_atom; + bool can_block = false; + + lockdep_assert_held(&kctx->jctx.lock); + + list_for_each_entry(jit_atom, &kctx->jctx.jit_atoms_head, jit_node) { + if (jit_atom == katom) + break; + + if ((jit_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == + BASE_JD_REQ_SOFT_JIT_FREE) { + u8 *free_ids = kbase_jit_free_get_ids(jit_atom); + + if (free_ids && *free_ids && + kctx->jit_alloc[*free_ids]) { + /* A JIT free which is active and + * submitted before this atom + */ + can_block = true; + break; + } + } + } + + if (!can_block) { + /* Mark the failed allocation as well as the + * other un-attempted allocations in the set, + * so we know they are in use even if the + * allocation itself failed. + */ + for (; i < count; i++, info++) { + kctx->jit_alloc[info->id] = + KBASE_RESERVED_REG_JIT_ALLOC; + } + + katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; + dev_warn_ratelimited(kbdev->dev, "JIT alloc softjob failed: atom id %d\n", + kbase_jd_atom_id(kctx, katom)); + return 0; + } + + /* There are pending frees for an active allocation + * so we should wait to see whether they free the + * memory. Add to the list of atoms for which JIT + * allocation is pending. + */ + kbase_jit_add_to_pending_alloc_list(katom); + katom->jit_blocked = true; + + /* Rollback, the whole set will be re-attempted */ + while (i-- > 0) { + info--; + kbase_jit_free(kctx, kctx->jit_alloc[info->id]); + kctx->jit_alloc[info->id] = NULL; + } + + return 1; + } + + /* Bind it to the user provided ID. */ + kctx->jit_alloc[info->id] = reg; + } + + for (i = 0, info = katom->softjob_data; i < count; i++, info++) { + u64 entry_mmu_flags = 0; + /* + * Write the address of the JIT allocation to the user provided + * GPU allocation. + */ + ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), + &mapping); + if (!ptr) { + /* + * Leave the allocations "live" as the JIT free atom + * will be submitted anyway. + */ + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return 0; + } + + reg = kctx->jit_alloc[info->id]; + new_addr = reg->start_pfn << PAGE_SHIFT; + *ptr = new_addr; + +#if defined(CONFIG_MALI_VECTOR_DUMP) + /* + * Retrieve the mmu flags for JIT allocation + * only if dumping is enabled + */ + entry_mmu_flags = kbase_mmu_create_ate(kbdev, + (struct tagged_addr){ 0 }, reg->flags, + MIDGARD_MMU_BOTTOMLEVEL, kctx->jit_group_id); +#endif + + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(kbdev, katom, + info->gpu_alloc_addr, new_addr, info->flags, + entry_mmu_flags, info->id, info->commit_pages, + info->extent, info->va_pages); + kbase_vunmap(kctx, &mapping); + + kbase_trace_jit_report_gpu_mem(kctx, reg, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + } + + katom->event_code = BASE_JD_EVENT_DONE; + + return 0; +} + +static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom) +{ + struct base_jit_alloc_info *info; + + lockdep_assert_held(&katom->kctx->jctx.lock); + + if (WARN_ON(!katom->softjob_data)) + return; + + /* Remove atom from jit_atoms_head list */ + list_del(&katom->jit_node); + + if (katom->jit_blocked) { + list_del(&katom->queue); + katom->jit_blocked = false; + } + + info = katom->softjob_data; + /* Free the info structure */ + kfree(info); +} + +static int kbase_jit_free_prepare(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + __user void *data = (__user void *)(uintptr_t) katom->jc; + u8 *ids; + u32 count = MAX(katom->nr_extres, 1); + u32 i; + int ret; + + /* Sanity checks */ + if (count > ARRAY_SIZE(kctx->jit_alloc)) { + ret = -EINVAL; + goto fail; + } + + /* Copy the information for safe access and future storage */ + ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL); + if (!ids) { + ret = -ENOMEM; + goto fail; + } + + lockdep_assert_held(&kctx->jctx.lock); + katom->softjob_data = ids; + + /* For backwards compatibility */ + if (katom->nr_extres) { + /* Fail the job if there is no list of ids */ + if (!data) { + ret = -EINVAL; + goto free_info; + } + + if (copy_from_user(ids, data, sizeof(*ids)*count) != 0) { + ret = -EINVAL; + goto free_info; + } + } else { + katom->nr_extres = 1; + *ids = (u8)katom->jc; + } + for (i = 0; i < count; i++) + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(kbdev, katom, ids[i]); + + list_add_tail(&katom->jit_node, &kctx->jctx.jit_atoms_head); + + return 0; + +free_info: + kfree(katom->softjob_data); + katom->softjob_data = NULL; +fail: + return ret; +} + +static void kbase_jit_free_process(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + u8 *ids = kbase_jit_free_get_ids(katom); + u32 count = katom->nr_extres; + u32 i; + + if (ids == NULL) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return; + } + + for (i = 0; i < count; i++, ids++) { + /* + * If the ID is zero or it is not in use yet then fail the job. + */ + if ((*ids == 0) || (kctx->jit_alloc[*ids] == NULL)) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return; + } + } +} + +static void kbasep_jit_finish_worker(struct work_struct *work) +{ + struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, + work); + struct kbase_context *kctx = katom->kctx; + int resched; + + mutex_lock(&kctx->jctx.lock); + kbase_finish_soft_job(katom); + resched = jd_done_nolock(katom, NULL); + mutex_unlock(&kctx->jctx.lock); + + if (resched) + kbase_js_sched_all(kctx->kbdev); +} + +void kbase_jit_retry_pending_alloc(struct kbase_context *kctx) +{ + LIST_HEAD(jit_pending_alloc_list); + struct list_head *i, *tmp; + + list_splice_tail_init(&kctx->jctx.jit_pending_alloc, + &jit_pending_alloc_list); + + list_for_each_safe(i, tmp, &jit_pending_alloc_list) { + struct kbase_jd_atom *pending_atom = list_entry(i, + struct kbase_jd_atom, queue); + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kctx->kbdev, pending_atom); + kbase_kinstr_jm_atom_sw_start(pending_atom); + if (kbase_jit_allocate_process(pending_atom) == 0) { + /* Atom has completed */ + INIT_WORK(&pending_atom->work, + kbasep_jit_finish_worker); + queue_work(kctx->jctx.job_done_wq, &pending_atom->work); + } + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kctx->kbdev, pending_atom); + kbase_kinstr_jm_atom_sw_stop(pending_atom); + } +} + +static void kbase_jit_free_finish(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + u8 *ids; + size_t j; + + lockdep_assert_held(&kctx->jctx.lock); + + ids = kbase_jit_free_get_ids(katom); + if (WARN_ON(ids == NULL)) { + return; + } + + /* Remove this atom from the jit_atoms_head list */ + list_del(&katom->jit_node); + + for (j = 0; j != katom->nr_extres; ++j) { + if ((ids[j] != 0) && (kctx->jit_alloc[ids[j]] != NULL)) { + /* + * If the ID is valid but the allocation request failed + * still succeed this soft job but don't try and free + * the allocation. + */ + if (kctx->jit_alloc[ids[j]] != + KBASE_RESERVED_REG_JIT_ALLOC) { + KBASE_TLSTREAM_TL_JIT_USEDPAGES(kctx->kbdev, + kctx->jit_alloc[ids[j]]-> + gpu_alloc->nents, ids[j]); + kbase_jit_free(kctx, kctx->jit_alloc[ids[j]]); + } + kctx->jit_alloc[ids[j]] = NULL; + } + } + /* Free the list of ids */ + kfree(ids); + + kbase_jit_retry_pending_alloc(kctx); +} + +static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) +{ + __user struct base_external_resource_list *user_ext_res; + struct base_external_resource_list *ext_res; + u64 count = 0; + size_t copy_size; + int ret; + + user_ext_res = (__user struct base_external_resource_list *) + (uintptr_t) katom->jc; + + /* Fail the job if there is no info structure */ + if (!user_ext_res) { + ret = -EINVAL; + goto fail; + } + + if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) { + ret = -EINVAL; + goto fail; + } + + /* Is the number of external resources in range? */ + if (!count || count > BASE_EXT_RES_COUNT_MAX) { + ret = -EINVAL; + goto fail; + } + + /* Copy the information for safe access and future storage */ + copy_size = sizeof(*ext_res); + copy_size += sizeof(struct base_external_resource) * (count - 1); + ext_res = kzalloc(copy_size, GFP_KERNEL); + if (!ext_res) { + ret = -ENOMEM; + goto fail; + } + + if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) { + ret = -EINVAL; + goto free_info; + } + + /* + * Overwrite the count with the first value incase it was changed + * after the fact. + */ + ext_res->count = count; + + katom->softjob_data = ext_res; + + return 0; + +free_info: + kfree(ext_res); +fail: + return ret; +} + +static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) +{ + struct base_external_resource_list *ext_res; + int i; + bool failed = false; + + ext_res = katom->softjob_data; + if (!ext_res) + goto failed_jc; + + kbase_gpu_vm_lock(katom->kctx); + + for (i = 0; i < ext_res->count; i++) { + u64 gpu_addr; + + gpu_addr = ext_res->ext_res[i].ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE; + if (map) { + if (!kbase_sticky_resource_acquire(katom->kctx, + gpu_addr)) + goto failed_loop; + } else + if (!kbase_sticky_resource_release_force(katom->kctx, NULL, + gpu_addr)) + failed = true; + } + + /* + * In the case of unmap we continue unmapping other resources in the + * case of failure but will always report failure if _any_ unmap + * request fails. + */ + if (failed) + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + else + katom->event_code = BASE_JD_EVENT_DONE; + + kbase_gpu_vm_unlock(katom->kctx); + + return; + +failed_loop: + while (i > 0) { + u64 const gpu_addr = ext_res->ext_res[i - 1].ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE; + + kbase_sticky_resource_release_force(katom->kctx, NULL, gpu_addr); + + --i; + } + + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_vm_unlock(katom->kctx); + +failed_jc: + return; +} + +static void kbase_ext_res_finish(struct kbase_jd_atom *katom) +{ + struct base_external_resource_list *ext_res; + + ext_res = katom->softjob_data; + /* Free the info structure */ + kfree(ext_res); +} + +int kbase_process_soft_job(struct kbase_jd_atom *katom) +{ + int ret = 0; + struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kbdev, katom); + kbase_kinstr_jm_atom_sw_start(katom); + + trace_sysgraph(SGR_SUBMIT, kctx->id, + kbase_jd_atom_id(kctx, katom)); + + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: + ret = kbase_dump_cpu_gpu_time(katom); + break; + +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: + katom->event_code = kbase_sync_fence_out_trigger(katom, + katom->event_code == BASE_JD_EVENT_DONE ? + 0 : -EFAULT); + break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: + { + ret = kbase_sync_fence_in_wait(katom); + + if (ret == 1) { +#ifdef CONFIG_MALI_FENCE_DEBUG + kbasep_add_waiting_with_timeout(katom); +#else + kbasep_add_waiting_soft_job(katom); +#endif + } + break; + } +#endif + case BASE_JD_REQ_SOFT_EVENT_WAIT: + ret = kbasep_soft_event_wait(katom); + break; + case BASE_JD_REQ_SOFT_EVENT_SET: + kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET); + break; + case BASE_JD_REQ_SOFT_EVENT_RESET: + kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); + break; + case BASE_JD_REQ_SOFT_DEBUG_COPY: + { + int res = kbase_debug_copy(katom); + + if (res) + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + break; + } + case BASE_JD_REQ_SOFT_JIT_ALLOC: + ret = kbase_jit_allocate_process(katom); + break; + case BASE_JD_REQ_SOFT_JIT_FREE: + kbase_jit_free_process(katom); + break; + case BASE_JD_REQ_SOFT_EXT_RES_MAP: + kbase_ext_res_process(katom, true); + break; + case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: + kbase_ext_res_process(katom, false); + break; + } + + /* Atom is complete */ + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kbdev, katom); + kbase_kinstr_jm_atom_sw_stop(katom); + return ret; +} + +void kbase_cancel_soft_job(struct kbase_jd_atom *katom) +{ + /* MALI_SEC_INTEGRATION */ + pgd_t *pgd; + struct mm_struct *mm = katom->kctx->process_mm; + + pgd = pgd_offset(mm, (unsigned long)katom); + if (pgd_none(*pgd) || pgd_bad(*pgd)) { + printk("Abnormal katom\n"); + printk("katom->kctx: 0x%p, katom->kctx->tgid: %d, katom->kctx->process_mm: 0x%p, pgd: 0x%px\n", katom->kctx, katom->kctx->tgid, katom->kctx->process_mm, pgd); + return; + } + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + case BASE_JD_REQ_SOFT_FENCE_WAIT: + kbase_sync_fence_in_cancel_wait(katom); + break; +#endif + case BASE_JD_REQ_SOFT_EVENT_WAIT: + kbasep_soft_event_cancel_job(katom); + break; + default: + /* This soft-job doesn't support cancellation! */ + KBASE_DEBUG_ASSERT(0); + } +} + +int kbase_prepare_soft_job(struct kbase_jd_atom *katom) +{ + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: + { + if (!IS_ALIGNED(katom->jc, cache_line_size())) + return -EINVAL; + } + break; +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: + { + struct base_fence fence; + int fd; + + if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) + return -EINVAL; + + fd = kbase_sync_fence_out_create(katom, + fence.basep.stream_fd); + if (fd < 0) + return -EINVAL; + + fence.basep.fd = fd; + if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) { + kbase_sync_fence_out_remove(katom); + kbase_sync_fence_close_fd(fd); + fence.basep.fd = -EINVAL; + return -EINVAL; + } + } + break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: + { + struct base_fence fence; + int ret; + + if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) + return -EINVAL; + + /* Get a reference to the fence object */ + ret = kbase_sync_fence_in_from_fd(katom, + fence.basep.fd); + if (ret < 0) + return ret; + +#ifdef CONFIG_MALI_DMA_FENCE + /* + * Set KCTX_NO_IMPLICIT_FENCE in the context the first + * time a soft fence wait job is observed. This will + * prevent the implicit dma-buf fence to conflict with + * the Android native sync fences. + */ + if (!kbase_ctx_flag(katom->kctx, KCTX_NO_IMPLICIT_SYNC)) + kbase_ctx_flag_set(katom->kctx, KCTX_NO_IMPLICIT_SYNC); +#endif /* CONFIG_MALI_DMA_FENCE */ + } + break; +#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ + case BASE_JD_REQ_SOFT_JIT_ALLOC: + return kbase_jit_allocate_prepare(katom); + case BASE_JD_REQ_SOFT_JIT_FREE: + return kbase_jit_free_prepare(katom); + case BASE_JD_REQ_SOFT_EVENT_WAIT: + case BASE_JD_REQ_SOFT_EVENT_SET: + case BASE_JD_REQ_SOFT_EVENT_RESET: + if (katom->jc == 0) + return -EINVAL; + break; + case BASE_JD_REQ_SOFT_DEBUG_COPY: + return kbase_debug_copy_prepare(katom); + case BASE_JD_REQ_SOFT_EXT_RES_MAP: + return kbase_ext_res_prepare(katom); + case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: + return kbase_ext_res_prepare(katom); + default: + /* Unsupported soft-job */ + return -EINVAL; + } + return 0; +} + +void kbase_finish_soft_job(struct kbase_jd_atom *katom) +{ + trace_sysgraph(SGR_COMPLETE, katom->kctx->id, + kbase_jd_atom_id(katom->kctx, katom)); + + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: + /* Nothing to do */ + break; +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: + /* If fence has not yet been signaled, do it now */ + kbase_sync_fence_out_trigger(katom, katom->event_code == + BASE_JD_EVENT_DONE ? 0 : -EFAULT); + break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: + /* Release katom's reference to fence object */ + kbase_sync_fence_in_remove(katom); + break; +#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ + case BASE_JD_REQ_SOFT_DEBUG_COPY: + kbase_debug_copy_finish(katom); + break; + case BASE_JD_REQ_SOFT_JIT_ALLOC: + kbase_jit_allocate_finish(katom); + break; + case BASE_JD_REQ_SOFT_EXT_RES_MAP: + kbase_ext_res_finish(katom); + break; + case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: + kbase_ext_res_finish(katom); + break; + case BASE_JD_REQ_SOFT_JIT_FREE: + kbase_jit_free_finish(katom); + break; + } +} + +void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) +{ + LIST_HEAD(local_suspended_soft_jobs); + struct kbase_jd_atom *tmp_iter; + struct kbase_jd_atom *katom_iter; + struct kbasep_js_device_data *js_devdata; + bool resched = false; + + KBASE_DEBUG_ASSERT(kbdev); + + js_devdata = &kbdev->js_data; + + /* Move out the entire list */ + mutex_lock(&js_devdata->runpool_mutex); + list_splice_init(&js_devdata->suspended_soft_jobs_list, + &local_suspended_soft_jobs); + mutex_unlock(&js_devdata->runpool_mutex); + + /* + * Each atom must be detached from the list and ran separately - + * it could be re-added to the old list, but this is unlikely + */ + list_for_each_entry_safe(katom_iter, tmp_iter, + &local_suspended_soft_jobs, dep_item[1]) { + struct kbase_context *kctx = katom_iter->kctx; + + mutex_lock(&kctx->jctx.lock); + + /* Remove from the global list */ + list_del(&katom_iter->dep_item[1]); + /* Remove from the context's list of waiting soft jobs */ + kbasep_remove_waiting_soft_job(katom_iter); + + if (kbase_process_soft_job(katom_iter) == 0) { + kbase_finish_soft_job(katom_iter); + resched |= jd_done_nolock(katom_iter, NULL); + } + mutex_unlock(&kctx->jctx.lock); + } + + if (resched) + kbase_js_sched_all(kbdev); +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_strings.c b/drivers/gpu/arm/b_r26p0/mali_kbase_strings.c new file mode 100644 index 000000000000..22caa4a6d814 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_strings.c @@ -0,0 +1,28 @@ + /* + * + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ +#include "mali_kbase_strings.h" + +#define KBASE_DRV_NAME "mali" +#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline" + +const char kbase_drv_name[] = KBASE_DRV_NAME; +const char kbase_timeline_name[] = KBASE_TIMELINE_NAME; diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_strings.h b/drivers/gpu/arm/b_r26p0/mali_kbase_strings.h new file mode 100644 index 000000000000..d2f1825314fe --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_strings.h @@ -0,0 +1,24 @@ +/* + * + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +extern const char kbase_drv_name[]; +extern const char kbase_timeline_name[]; diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_sync.h b/drivers/gpu/arm/b_r26p0/mali_kbase_sync.h new file mode 100644 index 000000000000..80b54d0de5f8 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_sync.h @@ -0,0 +1,223 @@ +/* + * + * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_sync.h + * + * This file contains our internal "API" for explicit fences. + * It hides the implementation details of the actual explicit fence mechanism + * used (Android fences or sync file with DMA fences). + */ + +#ifndef MALI_KBASE_SYNC_H +#define MALI_KBASE_SYNC_H + +#include +#ifdef CONFIG_SYNC +#include +#endif +#ifdef CONFIG_SYNC_FILE +#include "mali_kbase_fence_defs.h" +#include +#endif + +#include "mali_kbase.h" + +/** + * struct kbase_sync_fence_info - Information about a fence + * @fence: Pointer to fence (type is void*, as underlaying struct can differ) + * @name: The name given to this fence when it was created + * @status: < 0 means error, 0 means active, 1 means signaled + * + * Use kbase_sync_fence_in_info_get() or kbase_sync_fence_out_info_get() + * to get the information. + */ +struct kbase_sync_fence_info { + void *fence; + char name[32]; + int status; +}; + +/** + * kbase_sync_fence_stream_create() - Create a stream object + * @name: Name of stream (only used to ease debugging/visualization) + * @out_fd: A file descriptor representing the created stream object + * + * Can map down to a timeline implementation in some implementations. + * Exposed as a file descriptor. + * Life-time controlled via the file descriptor: + * - dup to add a ref + * - close to remove a ref + * + * return: 0 on success, < 0 on error + */ +int kbase_sync_fence_stream_create(const char *name, int *const out_fd); + +/** + * kbase_sync_fence_out_create Create an explicit output fence to specified atom + * @katom: Atom to assign the new explicit fence to + * @stream_fd: File descriptor for stream object to create fence on + * + * return: Valid file descriptor to fence or < 0 on error + */ +int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd); + +/** + * kbase_sync_fence_in_from_fd() Assigns an existing fence to specified atom + * @katom: Atom to assign the existing explicit fence to + * @fd: File descriptor to an existing fence + * + * Assigns an explicit input fence to atom. + * This can later be waited for by calling @kbase_sync_fence_in_wait + * + * return: 0 on success, < 0 on error + */ +int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd); + +/** + * kbase_sync_fence_validate() - Validate a fd to be a valid fence + * @fd: File descriptor to check + * + * This function is only usable to catch unintentional user errors early, + * it does not stop malicious code changing the fd after this function returns. + * + * return 0: if fd is for a valid fence, < 0 if invalid + */ +int kbase_sync_fence_validate(int fd); + +/** + * kbase_sync_fence_out_trigger - Signal explicit output fence attached on katom + * @katom: Atom with an explicit fence to signal + * @result: < 0 means signal with error, 0 >= indicates success + * + * Signal output fence attached on katom and remove the fence from the atom. + * + * return: The "next" event code for atom, typically JOB_CANCELLED or EVENT_DONE + */ +enum base_jd_event_code +kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result); + +/** + * kbase_sync_fence_in_wait() - Wait for explicit input fence to be signaled + * @katom: Atom with explicit fence to wait for + * + * If the fence is already signaled, then 0 is returned, and the caller must + * continue processing of the katom. + * + * If the fence isn't already signaled, then this kbase_sync framework will + * take responsibility to continue the processing once the fence is signaled. + * + * return: 0 if already signaled, otherwise 1 + */ +int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom); + +/** + * kbase_sync_fence_in_cancel_wait() - Cancel explicit input fence waits + * @katom: Atom to cancel wait for + * + * This function is fully responsible for continuing processing of this atom + * (remove_waiting_soft_job + finish_soft_job + jd_done + js_sched_all) + */ +void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom); + +/** + * kbase_sync_fence_in_remove() - Remove the input fence from the katom + * @katom: Atom to remove explicit input fence for + * + * This will also release the corresponding reference. + */ +void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom); + +/** + * kbase_sync_fence_out_remove() - Remove the output fence from the katom + * @katom: Atom to remove explicit output fence for + * + * This will also release the corresponding reference. + */ +void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom); + +/** + * kbase_sync_fence_close_fd() - Close a file descriptor representing a fence + * @fd: File descriptor to close + */ +static inline void kbase_sync_fence_close_fd(int fd) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + ksys_close(fd); +#else + sys_close(fd); +#endif +} + +/** + * kbase_sync_fence_in_info_get() - Retrieves information about input fence + * @katom: Atom to get fence information from + * @info: Struct to be filled with fence information + * + * return: 0 on success, < 0 on error + */ +int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, + struct kbase_sync_fence_info *info); + +/** + * kbase_sync_fence_out_info_get() - Retrieves information about output fence + * @katom: Atom to get fence information from + * @info: Struct to be filled with fence information + * + * return: 0 on success, < 0 on error + */ +int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, + struct kbase_sync_fence_info *info); + +#if defined(CONFIG_SYNC_FILE) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +void kbase_sync_fence_info_get(struct fence *fence, + struct kbase_sync_fence_info *info); +#else +void kbase_sync_fence_info_get(struct dma_fence *fence, + struct kbase_sync_fence_info *info); +#endif +#endif + +/** + * kbase_sync_status_string() - Get string matching @status + * @status: Value of fence status. + * + * return: Pointer to string describing @status. + */ +const char *kbase_sync_status_string(int status); + + +/* + * Internal worker used to continue processing of atom. + */ +void kbase_sync_fence_wait_worker(struct work_struct *data); + +#ifdef CONFIG_MALI_FENCE_DEBUG +/** + * kbase_sync_fence_in_dump() Trigger a debug dump of atoms input fence state + * @katom: Atom to trigger fence debug dump for + */ +void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom); +#endif + +#endif /* MALI_KBASE_SYNC_H */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_sync_android.c b/drivers/gpu/arm/b_r26p0/mali_kbase_sync_android.c new file mode 100644 index 000000000000..75940fb08a05 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_sync_android.c @@ -0,0 +1,542 @@ +/* + * + * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Code for supporting explicit Android fences (CONFIG_SYNC) + * Known to be good for kernels 4.5 and earlier. + * Replaced with CONFIG_SYNC_FILE for 4.9 and later kernels + * (see mali_kbase_sync_file.c) + */ + +#include +#include +#include +#include +#include +#include +#include +#include "sync.h" +#include +#include + +struct mali_sync_timeline { + struct sync_timeline timeline; + atomic_t counter; + atomic_t signaled; +}; + +struct mali_sync_pt { + struct sync_pt pt; + int order; + int result; +}; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +/* For backwards compatibility with kernels before 3.17. After 3.17 + * sync_pt_parent is included in the kernel. */ +static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt) +{ + return pt->parent; +} +#endif + +static struct mali_sync_timeline *to_mali_sync_timeline( + struct sync_timeline *timeline) +{ + return container_of(timeline, struct mali_sync_timeline, timeline); +} + +static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt) +{ + return container_of(pt, struct mali_sync_pt, pt); +} + +static struct sync_pt *timeline_dup(struct sync_pt *pt) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + struct mali_sync_pt *new_mpt; + struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt), + sizeof(struct mali_sync_pt)); + + if (!new_pt) + return NULL; + + new_mpt = to_mali_sync_pt(new_pt); + new_mpt->order = mpt->order; + new_mpt->result = mpt->result; + + return new_pt; +} + +static int timeline_has_signaled(struct sync_pt *pt) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + struct mali_sync_timeline *mtl = to_mali_sync_timeline( + sync_pt_parent(pt)); + int result = mpt->result; + + int diff = atomic_read(&mtl->signaled) - mpt->order; + + if (diff >= 0) + return (result < 0) ? result : 1; + + return 0; +} + +static int timeline_compare(struct sync_pt *a, struct sync_pt *b) +{ + struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt); + struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt); + + int diff = ma->order - mb->order; + + if (diff == 0) + return 0; + + return (diff < 0) ? -1 : 1; +} + +static void timeline_value_str(struct sync_timeline *timeline, char *str, + int size) +{ + struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline); + + snprintf(str, size, "%d", atomic_read(&mtl->signaled)); +} + +static void pt_value_str(struct sync_pt *pt, char *str, int size) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + + snprintf(str, size, "%d(%d)", mpt->order, mpt->result); +} + +static struct sync_timeline_ops mali_timeline_ops = { + .driver_name = "Mali", + .dup = timeline_dup, + .has_signaled = timeline_has_signaled, + .compare = timeline_compare, + .timeline_value_str = timeline_value_str, + .pt_value_str = pt_value_str, +}; + +/* Allocates a timeline for Mali + * + * One timeline should be allocated per API context. + */ +static struct sync_timeline *mali_sync_timeline_alloc(const char *name) +{ + struct sync_timeline *tl; + struct mali_sync_timeline *mtl; + + tl = sync_timeline_create(&mali_timeline_ops, + sizeof(struct mali_sync_timeline), name); + if (!tl) + return NULL; + + /* Set the counter in our private struct */ + mtl = to_mali_sync_timeline(tl); + atomic_set(&mtl->counter, 0); + atomic_set(&mtl->signaled, 0); + + return tl; +} + +static int kbase_stream_close(struct inode *inode, struct file *file) +{ + struct sync_timeline *tl; + + tl = (struct sync_timeline *)file->private_data; + sync_timeline_destroy(tl); + return 0; +} + +static const struct file_operations stream_fops = { + .owner = THIS_MODULE, + .release = kbase_stream_close, +}; + +int kbase_sync_fence_stream_create(const char *name, int *const out_fd) +{ + struct sync_timeline *tl; + + if (!out_fd) + return -EINVAL; + + tl = mali_sync_timeline_alloc(name); + if (!tl) + return -EINVAL; + + *out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY|O_CLOEXEC); + + if (*out_fd < 0) { + sync_timeline_destroy(tl); + return -EINVAL; + } + + return 0; +} + +/* Allocates a sync point within the timeline. + * + * The timeline must be the one allocated by kbase_sync_timeline_alloc + * + * Sync points must be triggered in *exactly* the same order as they are + * allocated. + */ +static struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent) +{ + struct sync_pt *pt = sync_pt_create(parent, + sizeof(struct mali_sync_pt)); + struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent); + struct mali_sync_pt *mpt; + + if (!pt) + return NULL; + + mpt = to_mali_sync_pt(pt); + mpt->order = atomic_inc_return(&mtl->counter); + mpt->result = 0; + + return pt; +} + +int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd) +{ + struct sync_timeline *tl; + struct sync_pt *pt; + struct sync_fence *fence; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) + struct files_struct *files; + struct fdtable *fdt; +#endif + int fd; + struct file *tl_file; + + tl_file = fget(tl_fd); + if (tl_file == NULL) + return -EBADF; + + if (tl_file->f_op != &stream_fops) { + fd = -EBADF; + goto out; + } + + tl = tl_file->private_data; + + pt = kbase_sync_pt_alloc(tl); + if (!pt) { + fd = -EFAULT; + goto out; + } + + fence = sync_fence_create("mali_fence", pt); + if (!fence) { + sync_pt_free(pt); + fd = -EFAULT; + goto out; + } + + /* from here the fence owns the sync_pt */ + + /* create a fd representing the fence */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) + fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); + if (fd < 0) { + sync_fence_put(fence); + goto out; + } +#else + fd = get_unused_fd(); + if (fd < 0) { + sync_fence_put(fence); + goto out; + } + + files = current->files; + spin_lock(&files->file_lock); + fdt = files_fdtable(files); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) + __set_close_on_exec(fd, fdt); +#else + FD_SET(fd, fdt->close_on_exec); +#endif + spin_unlock(&files->file_lock); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */ + + /* bind fence to the new fd */ + sync_fence_install(fence, fd); + + katom->fence = sync_fence_fdget(fd); + if (katom->fence == NULL) { + /* The only way the fence can be NULL is if userspace closed it + * for us, so we don't need to clear it up */ + fd = -EINVAL; + goto out; + } + +out: + fput(tl_file); + + return fd; +} + +int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) +{ + katom->fence = sync_fence_fdget(fd); + return katom->fence ? 0 : -ENOENT; +} + +int kbase_sync_fence_validate(int fd) +{ + struct sync_fence *fence; + + fence = sync_fence_fdget(fd); + if (!fence) + return -EINVAL; + + sync_fence_put(fence); + return 0; +} + +/* Returns true if the specified timeline is allocated by Mali */ +static int kbase_sync_timeline_is_ours(struct sync_timeline *timeline) +{ + return timeline->ops == &mali_timeline_ops; +} + +/* Signals a particular sync point + * + * Sync points must be triggered in *exactly* the same order as they are + * allocated. + * + * If they are signaled in the wrong order then a message will be printed in + * debug builds and otherwise attempts to signal order sync_pts will be ignored. + * + * result can be negative to indicate error, any other value is interpreted as + * success. + */ +static void kbase_sync_signal_pt(struct sync_pt *pt, int result) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + struct mali_sync_timeline *mtl = to_mali_sync_timeline( + sync_pt_parent(pt)); + int signaled; + int diff; + + mpt->result = result; + + do { + signaled = atomic_read(&mtl->signaled); + + diff = signaled - mpt->order; + + if (diff > 0) { + /* The timeline is already at or ahead of this point. + * This should not happen unless userspace has been + * signaling fences out of order, so warn but don't + * violate the sync_pt API. + * The warning is only in debug builds to prevent + * a malicious user being able to spam dmesg. + */ +#ifdef CONFIG_MALI_DEBUG + pr_err("Fences were triggered in a different order to allocation!"); +#endif /* CONFIG_MALI_DEBUG */ + return; + } + } while (atomic_cmpxchg(&mtl->signaled, + signaled, mpt->order) != signaled); +} + +enum base_jd_event_code +kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) +{ + struct sync_pt *pt; + struct sync_timeline *timeline; + + if (!katom->fence) + return BASE_JD_EVENT_JOB_CANCELLED; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + if (!list_is_singular(&katom->fence->pt_list_head)) { +#else + if (katom->fence->num_fences != 1) { +#endif + /* Not exactly one item in the list - so it didn't (directly) + * come from us */ + return BASE_JD_EVENT_JOB_CANCELLED; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + pt = list_first_entry(&katom->fence->pt_list_head, + struct sync_pt, pt_list); +#else + pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base); +#endif + timeline = sync_pt_parent(pt); + + if (!kbase_sync_timeline_is_ours(timeline)) { + /* Fence has a sync_pt which isn't ours! */ + return BASE_JD_EVENT_JOB_CANCELLED; + } + + kbase_sync_signal_pt(pt, result); + + sync_timeline_signal(timeline); + + kbase_sync_fence_out_remove(katom); + + return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; +} + +static inline int kbase_fence_get_status(struct sync_fence *fence) +{ + if (!fence) + return -ENOENT; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + return fence->status; +#else + return atomic_read(&fence->status); +#endif +} + +static void kbase_fence_wait_callback(struct sync_fence *fence, + struct sync_fence_waiter *waiter) +{ + struct kbase_jd_atom *katom = container_of(waiter, + struct kbase_jd_atom, sync_waiter); + struct kbase_context *kctx = katom->kctx; + + /* Propagate the fence status to the atom. + * If negative then cancel this atom and its dependencies. + */ + if (kbase_fence_get_status(fence) < 0) + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + /* To prevent a potential deadlock we schedule the work onto the + * job_done_wq workqueue + * + * The issue is that we may signal the timeline while holding + * kctx->jctx.lock and the callbacks are run synchronously from + * sync_timeline_signal. So we simply defer the work. + */ + + INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + +int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) +{ + int ret; + + sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback); + + ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter); + + if (ret == 1) { + /* Already signaled */ + return 0; + } + + if (ret < 0) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + /* We should cause the dependent jobs in the bag to be failed, + * to do this we schedule the work queue to complete this job */ + INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); + queue_work(katom->kctx->jctx.job_done_wq, &katom->work); + } + + return 1; +} + +void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) +{ + if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) { + /* The wait wasn't cancelled - leave the cleanup for + * kbase_fence_wait_callback */ + return; + } + + /* Wait was cancelled - zap the atoms */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + kbasep_remove_waiting_soft_job(katom); + kbase_finish_soft_job(katom); + + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(katom->kctx->kbdev); +} + +void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom) +{ + if (katom->fence) { + sync_fence_put(katom->fence); + katom->fence = NULL; + } +} + +void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) +{ + if (katom->fence) { + sync_fence_put(katom->fence); + katom->fence = NULL; + } +} + +int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, + struct kbase_sync_fence_info *info) +{ + if (!katom->fence) + return -ENOENT; + + info->fence = katom->fence; + info->status = kbase_fence_get_status(katom->fence); + strlcpy(info->name, katom->fence->name, sizeof(info->name)); + + return 0; +} + +int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, + struct kbase_sync_fence_info *info) +{ + if (!katom->fence) + return -ENOENT; + + info->fence = katom->fence; + info->status = kbase_fence_get_status(katom->fence); + strlcpy(info->name, katom->fence->name, sizeof(info->name)); + + return 0; +} + +#ifdef CONFIG_MALI_FENCE_DEBUG +void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) +{ + /* Dump out the full state of all the Android sync fences. + * The function sync_dump() isn't exported to modules, so force + * sync_fence_wait() to time out to trigger sync_dump(). + */ + if (katom->fence) + sync_fence_wait(katom->fence, 1); +} +#endif diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_sync_common.c b/drivers/gpu/arm/b_r26p0/mali_kbase_sync_common.c new file mode 100644 index 000000000000..2e1ede5bdb70 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_sync_common.c @@ -0,0 +1,49 @@ +/* + * + * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * @file mali_kbase_sync_common.c + * + * Common code for our explicit fence functionality + */ + +#include +#include "mali_kbase.h" +#include "mali_kbase_sync.h" + +void kbase_sync_fence_wait_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom; + + katom = container_of(data, struct kbase_jd_atom, work); + kbase_soft_event_wait_callback(katom); +} + +const char *kbase_sync_status_string(int status) +{ + if (status == 0) + return "active"; + else if (status > 0) + return "signaled"; + else + return "error"; +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_sync_file.c b/drivers/gpu/arm/b_r26p0/mali_kbase_sync_file.c new file mode 100644 index 000000000000..24ace1585438 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_sync_file.c @@ -0,0 +1,378 @@ +/* + * + * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Code for supporting explicit Linux fences (CONFIG_SYNC_FILE) + * Introduced in kernel 4.9. + * Android explicit fences (CONFIG_SYNC) can be used for older kernels + * (see mali_kbase_sync_android.c) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mali_kbase_fence_defs.h" +#include "mali_kbase_sync.h" +#include "mali_kbase_fence.h" +#include "mali_kbase.h" + +static const struct file_operations stream_fops = { + .owner = THIS_MODULE +}; + +int kbase_sync_fence_stream_create(const char *name, int *const out_fd) +{ + if (!out_fd) + return -EINVAL; + + *out_fd = anon_inode_getfd(name, &stream_fops, NULL, + O_RDONLY | O_CLOEXEC); + if (*out_fd < 0) + return -EINVAL; + + return 0; +} + +int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd) +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + struct sync_file *sync_file; + int fd; + + fence = kbase_fence_out_new(katom); + if (!fence) + return -ENOMEM; + +#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) + /* Take an extra reference to the fence on behalf of the sync_file. + * This is only needed on older kernels where sync_file_create() + * does not take its own reference. This was changed in v4.9.68, + * where sync_file_create() now takes its own reference. + */ + dma_fence_get(fence); +#endif + + /* create a sync_file fd representing the fence */ + sync_file = sync_file_create(fence); + +/* MALI_SEC_INTEGRATION */ +#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) + /* Take an extra reference to the fence on behalf of the sync_file. + * This is only needed on older kernels where sync_file_create() + * does not take its own reference. This was changed in v4.9.68, + * where sync_file_create() now takes its own reference. + */ + dma_fence_put(fence); +#endif + + if (!sync_file) { +/* MALI_SEC_INTEGRATION */ +#if (KERNEL_VERSION(4, 9, 67) < LINUX_VERSION_CODE) + dma_fence_put(fence); +#endif + kbase_fence_out_remove(katom); + return -ENOMEM; + } + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + fput(sync_file->file); + kbase_fence_out_remove(katom); + return fd; + } + + fd_install(fd, sync_file->file); + + return fd; +} + +int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence = sync_file_get_fence(fd); +#else + struct dma_fence *fence = sync_file_get_fence(fd); +#endif + + if (!fence) + return -ENOENT; + + kbase_fence_fence_in_set(katom, fence); + + return 0; +} + +int kbase_sync_fence_validate(int fd) +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence = sync_file_get_fence(fd); +#else + struct dma_fence *fence = sync_file_get_fence(fd); +#endif + + if (!fence) + return -EINVAL; + + dma_fence_put(fence); + + return 0; /* valid */ +} + +enum base_jd_event_code +kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) +{ + int res; + + if (!kbase_fence_out_is_ours(katom)) { + /* Not our fence */ + return BASE_JD_EVENT_JOB_CANCELLED; + } + + res = kbase_fence_out_signal(katom, result); + if (unlikely(res < 0)) { + dev_warn(katom->kctx->kbdev->dev, + "fence_signal() failed with %d\n", res); + } + + kbase_sync_fence_out_remove(katom); + + return (result != 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; +} + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +static void kbase_fence_wait_callback(struct fence *fence, + struct fence_cb *cb) +#else +static void kbase_fence_wait_callback(struct dma_fence *fence, + struct dma_fence_cb *cb) +#endif +{ + struct kbase_fence_cb *kcb = container_of(cb, + struct kbase_fence_cb, + fence_cb); + struct kbase_jd_atom *katom = kcb->katom; + struct kbase_context *kctx = katom->kctx; + + /* Cancel atom if fence is erroneous */ +#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ + (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) + if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error) +#else + if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0) +#endif + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + if (kbase_fence_dep_count_dec_and_test(katom)) { + /* We take responsibility of handling this */ + kbase_fence_dep_count_set(katom, -1); + + /* To prevent a potential deadlock we schedule the work onto the + * job_done_wq workqueue + * + * The issue is that we may signal the timeline while holding + * kctx->jctx.lock and the callbacks are run synchronously from + * sync_timeline_signal. So we simply defer the work. + */ + INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); + } +} + +int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) +{ + int err; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + + fence = kbase_fence_in_get(katom); + if (!fence) + return 0; /* no input fence to wait for, good to go! */ + + kbase_fence_dep_count_set(katom, 1); + + err = kbase_fence_add_callback(katom, fence, kbase_fence_wait_callback); + + kbase_fence_put(fence); + + if (likely(!err)) { + /* Test if the callbacks are already triggered */ + if (kbase_fence_dep_count_dec_and_test(katom)) { + kbase_fence_free_callbacks(katom); + kbase_fence_dep_count_set(katom, -1); + return 0; /* Already signaled, good to go right now */ + } + + /* Callback installed, so we just need to wait for it... */ + } else { + /* Failure */ + kbase_fence_free_callbacks(katom); + kbase_fence_dep_count_set(katom, -1); + + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + /* We should cause the dependent jobs in the bag to be failed, + * to do this we schedule the work queue to complete this job */ + + INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); + queue_work(katom->kctx->jctx.job_done_wq, &katom->work); + } + + return 1; /* completion to be done later by callback/worker */ +} + +void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) +{ + if (!kbase_fence_free_callbacks(katom)) { + /* The wait wasn't cancelled - + * leave the cleanup for kbase_fence_wait_callback */ + return; + } + + /* Take responsibility of completion */ + kbase_fence_dep_count_set(katom, -1); + + /* Wait was cancelled - zap the atoms */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + kbasep_remove_waiting_soft_job(katom); + kbase_finish_soft_job(katom); + + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(katom->kctx->kbdev); +} + +void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom) +{ + kbase_fence_out_remove(katom); +} + +void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) +{ + kbase_fence_free_callbacks(katom); + kbase_fence_in_remove(katom); +} + +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +void kbase_sync_fence_info_get(struct fence *fence, + struct kbase_sync_fence_info *info) +#else +void kbase_sync_fence_info_get(struct dma_fence *fence, + struct kbase_sync_fence_info *info) +#endif +{ + info->fence = fence; + + /* translate into CONFIG_SYNC status: + * < 0 : error + * 0 : active + * 1 : signaled + */ + if (dma_fence_is_signaled(fence)) { +#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ + (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) + int status = fence->error; +#else + int status = fence->status; +#endif + if (status < 0) + info->status = status; /* signaled with error */ + else + info->status = 1; /* signaled with success */ + } else { + info->status = 0; /* still active (unsignaled) */ + } + +#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE) + scnprintf(info->name, sizeof(info->name), "%u#%u", + fence->context, fence->seqno); +#elif (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) + scnprintf(info->name, sizeof(info->name), "%llu#%u", + fence->context, fence->seqno); +#else + scnprintf(info->name, sizeof(info->name), "%llu#%llu", + fence->context, fence->seqno); +#endif +} + +int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, + struct kbase_sync_fence_info *info) +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + + fence = kbase_fence_in_get(katom); + if (!fence) + return -ENOENT; + + kbase_sync_fence_info_get(fence, info); + + kbase_fence_put(fence); + + return 0; +} + +int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, + struct kbase_sync_fence_info *info) +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + + fence = kbase_fence_out_get(katom); + if (!fence) + return -ENOENT; + + kbase_sync_fence_info_get(fence, info); + + kbase_fence_put(fence); + + return 0; +} + + +#ifdef CONFIG_MALI_FENCE_DEBUG +void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) +{ + /* Not implemented */ +} +#endif diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_trace_gpu_mem.c b/drivers/gpu/arm/b_r26p0/mali_kbase_trace_gpu_mem.c new file mode 100644 index 000000000000..0a053da8dfa9 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_trace_gpu_mem.c @@ -0,0 +1,227 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include + +/** + * struct kbase_dma_buf - Object instantiated when a dma-buf imported allocation + * is mapped to GPU for the first time within a process. + * Another instantiation is done for the case when that + * allocation is mapped for the first time to GPU. + * + * @dma_buf: Reference to dma_buf been imported. + * @dma_buf_node: Link node to maintain a rb_tree of kbase_dma_buf. + * @import_count: The number of times the dma_buf was imported. + */ +struct kbase_dma_buf { + struct dma_buf *dma_buf; + struct rb_node dma_buf_node; + u32 import_count; +}; + +/** + * kbase_delete_dma_buf_mapping - Delete a dma buffer mapping. + * + * @kctx: Pointer to kbase context. + * @dma_buf: Pointer to a dma buffer mapping. + * @tree: Pointer to root of rb_tree containing the dma_buf's mapped. + * + * when we un-map any dma mapping we need to remove them from rb_tree, + * rb_tree is maintained at kbase_device level and kbase_process level + * by passing the root of kbase_device or kbase_process we can remove + * the node from the tree. + */ +static bool kbase_delete_dma_buf_mapping(struct kbase_context *kctx, + struct dma_buf *dma_buf, + struct rb_root *tree) +{ + struct kbase_dma_buf *buf_node = NULL; + struct rb_node *node = tree->rb_node; + bool mapping_removed = false; + + lockdep_assert_held(&kctx->kbdev->dma_buf_lock); + + while (node) { + buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node); + + if (dma_buf == buf_node->dma_buf) { + WARN_ON(!buf_node->import_count); + + buf_node->import_count--; + + if (!buf_node->import_count) { + rb_erase(&buf_node->dma_buf_node, tree); + kfree(buf_node); + mapping_removed = true; + } + + break; + } + + if (dma_buf < buf_node->dma_buf) + node = node->rb_left; + else + node = node->rb_right; + } + + WARN_ON(!buf_node); + return mapping_removed; +} + +/** + * kbase_capture_dma_buf_mapping - capture a dma buffer mapping. + * + * @kctx: Pointer to kbase context. + * @dma_buf: Pointer to a dma buffer mapping. + * @root: Pointer to root of rb_tree containing the dma_buf's. + * + * We maintain a kbase_device level and kbase_process level rb_tree + * of all unique dma_buf's mapped to gpu memory. So when attach any + * dma_buf add it the rb_tree's. To add the unique mapping we need + * check if the mapping is not a duplicate and then add them. + */ +static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx, + struct dma_buf *dma_buf, + struct rb_root *root) +{ + struct kbase_dma_buf *buf_node = NULL; + struct rb_node *node = root->rb_node; + bool unique_buf_imported = true; + + lockdep_assert_held(&kctx->kbdev->dma_buf_lock); + + while (node) { + buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node); + + if (dma_buf == buf_node->dma_buf) { + unique_buf_imported = false; + break; + } + + if (dma_buf < buf_node->dma_buf) + node = node->rb_left; + else + node = node->rb_right; + } + + if (unique_buf_imported) { + struct kbase_dma_buf *buf_node = + kzalloc(sizeof(*buf_node), GFP_KERNEL); + + if (buf_node == NULL) { + dev_err(kctx->kbdev->dev, "Error allocating memory for kbase_dma_buf\n"); + /* Dont account for it if we fail to allocate memory */ + unique_buf_imported = false; + } else { + struct rb_node **new = &(root->rb_node), *parent = NULL; + + buf_node->dma_buf = dma_buf; + buf_node->import_count = 1; + while (*new) { + struct kbase_dma_buf *node; + + parent = *new; + node = rb_entry(parent, struct kbase_dma_buf, + dma_buf_node); + if (dma_buf < node->dma_buf) + new = &(*new)->rb_left; + else + new = &(*new)->rb_right; + } + rb_link_node(&buf_node->dma_buf_node, parent, new); + rb_insert_color(&buf_node->dma_buf_node, root); + } + } else if (!WARN_ON(!buf_node)) { + buf_node->import_count++; + } + + return unique_buf_imported; +} + +void kbase_remove_dma_buf_usage(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_device *kbdev = kctx->kbdev; + bool dev_mapping_removed, prcs_mapping_removed; + + mutex_lock(&kbdev->dma_buf_lock); + + dev_mapping_removed = kbase_delete_dma_buf_mapping( + kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root); + + prcs_mapping_removed = kbase_delete_dma_buf_mapping( + kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root); + + WARN_ON(dev_mapping_removed && !prcs_mapping_removed); + + spin_lock(&kbdev->gpu_mem_usage_lock); + if (dev_mapping_removed) + kbdev->total_gpu_pages -= alloc->nents; + + if (prcs_mapping_removed) + kctx->kprcs->total_gpu_pages -= alloc->nents; + + if (dev_mapping_removed || prcs_mapping_removed) + kbase_trace_gpu_mem_usage(kbdev, kctx); + spin_unlock(&kbdev->gpu_mem_usage_lock); + + mutex_unlock(&kbdev->dma_buf_lock); +} + +void kbase_add_dma_buf_usage(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_device *kbdev = kctx->kbdev; + bool unique_dev_dmabuf, unique_prcs_dmabuf; + + mutex_lock(&kbdev->dma_buf_lock); + + /* add dma_buf to device and process. */ + unique_dev_dmabuf = kbase_capture_dma_buf_mapping( + kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root); + + unique_prcs_dmabuf = kbase_capture_dma_buf_mapping( + kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root); + + WARN_ON(unique_dev_dmabuf && !unique_prcs_dmabuf); + + spin_lock(&kbdev->gpu_mem_usage_lock); + if (unique_dev_dmabuf) + kbdev->total_gpu_pages += alloc->nents; + + if (unique_prcs_dmabuf) + kctx->kprcs->total_gpu_pages += alloc->nents; + + if (unique_prcs_dmabuf || unique_dev_dmabuf) + kbase_trace_gpu_mem_usage(kbdev, kctx); + spin_unlock(&kbdev->gpu_mem_usage_lock); + + mutex_unlock(&kbdev->dma_buf_lock); +} + +#ifndef CONFIG_TRACE_GPU_MEM +#define CREATE_TRACE_POINTS +#include "mali_gpu_mem_trace.h" +#endif diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_trace_gpu_mem.h b/drivers/gpu/arm/b_r26p0/mali_kbase_trace_gpu_mem.h new file mode 100644 index 000000000000..b621525ae5df --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_trace_gpu_mem.h @@ -0,0 +1,101 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_TRACE_GPU_MEM_H_ +#define _KBASE_TRACE_GPU_MEM_H_ + +#ifdef CONFIG_TRACE_GPU_MEM +#include +#else +#include "mali_gpu_mem_trace.h" +#endif + +#define DEVICE_TGID ((u32) 0U) + +static void kbase_trace_gpu_mem_usage(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + lockdep_assert_held(&kbdev->gpu_mem_usage_lock); + + trace_gpu_mem_total(kbdev->id, DEVICE_TGID, + kbdev->total_gpu_pages << PAGE_SHIFT); + + if (likely(kctx)) + trace_gpu_mem_total(kbdev->id, kctx->kprcs->tgid, + kctx->kprcs->total_gpu_pages << PAGE_SHIFT); +} + +static inline void kbase_trace_gpu_mem_usage_dec(struct kbase_device *kbdev, + struct kbase_context *kctx, size_t pages) +{ + spin_lock(&kbdev->gpu_mem_usage_lock); + + if (likely(kctx)) + kctx->kprcs->total_gpu_pages -= pages; + + kbdev->total_gpu_pages -= pages; + + kbase_trace_gpu_mem_usage(kbdev, kctx); + + spin_unlock(&kbdev->gpu_mem_usage_lock); +} + +static inline void kbase_trace_gpu_mem_usage_inc(struct kbase_device *kbdev, + struct kbase_context *kctx, size_t pages) +{ + spin_lock(&kbdev->gpu_mem_usage_lock); + + if (likely(kctx)) + kctx->kprcs->total_gpu_pages += pages; + + kbdev->total_gpu_pages += pages; + + kbase_trace_gpu_mem_usage(kbdev, kctx); + + spin_unlock(&kbdev->gpu_mem_usage_lock); +} + +/** + * kbase_remove_dma_buf_usage - Remove a dma-buf entry captured. + * + * @kctx: Pointer to the kbase context + * @alloc: Pointer to the alloc to unmap + * + * Remove reference to dma buf been unmapped from kbase_device level + * rb_tree and Kbase_process level dma buf rb_tree. + */ +void kbase_remove_dma_buf_usage(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc); + +/** + * kbase_add_dma_buf_usage - Add a dma-buf entry captured. + * + * @kctx: Pointer to the kbase context + * @alloc: Pointer to the alloc to map in + * + * Add reference to dma buf been mapped to kbase_device level + * rb_tree and Kbase_process level dma buf rb_tree. + */ +void kbase_add_dma_buf_usage(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc); + +#endif /* _KBASE_TRACE_GPU_MEM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_uku.h b/drivers/gpu/arm/b_r26p0/mali_kbase_uku.h new file mode 100644 index 000000000000..41058ebd96ab --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_uku.h @@ -0,0 +1,126 @@ +/* + * + * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +#ifndef _KBASE_UKU_H_ +#define _KBASE_UKU_H_ + +#include "mali_uk.h" +#include "mali_base_kernel.h" + +/* This file needs to support being included from kernel and userside (which use different defines) */ +#if defined(CONFIG_MALI_ERROR_INJECT) +#define SUPPORT_MALI_ERROR_INJECT +#endif /* defined(CONFIG_MALI_ERROR_INJECT) */ +#if defined(CONFIG_MALI_NO_MALI) +#define SUPPORT_MALI_NO_MALI +#elif defined(MALI_NO_MALI) +#if MALI_NO_MALI +#define SUPPORT_MALI_NO_MALI +#endif +#endif + +#if defined(SUPPORT_MALI_NO_MALI) || defined(SUPPORT_MALI_ERROR_INJECT) +#include "backend/gpu/mali_kbase_model_dummy.h" +#endif + +#include "mali_kbase_gpuprops_types.h" + +enum kbase_uk_function_id { + KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0), + KBASE_FUNC_MEM_IMPORT = (UK_FUNC_ID + 1), + KBASE_FUNC_MEM_COMMIT = (UK_FUNC_ID + 2), + KBASE_FUNC_MEM_QUERY = (UK_FUNC_ID + 3), + KBASE_FUNC_MEM_FREE = (UK_FUNC_ID + 4), + KBASE_FUNC_MEM_FLAGS_CHANGE = (UK_FUNC_ID + 5), + KBASE_FUNC_MEM_ALIAS = (UK_FUNC_ID + 6), + + /* UK_FUNC_ID + 7 not in use since BASE_LEGACY_UK6_SUPPORT dropped */ + + KBASE_FUNC_SYNC = (UK_FUNC_ID + 8), + + KBASE_FUNC_POST_TERM = (UK_FUNC_ID + 9), + + KBASE_FUNC_HWCNT_SETUP = (UK_FUNC_ID + 10), + KBASE_FUNC_HWCNT_DUMP = (UK_FUNC_ID + 11), + KBASE_FUNC_HWCNT_CLEAR = (UK_FUNC_ID + 12), + + KBASE_FUNC_GPU_PROPS_REG_DUMP = (UK_FUNC_ID + 14), + + KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15), + + KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16), + KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18), + + KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19), + KBASE_FUNC_INJECT_ERROR = (UK_FUNC_ID + 20), + KBASE_FUNC_MODEL_CONTROL = (UK_FUNC_ID + 21), + + /* UK_FUNC_ID + 22 not in use since BASE_LEGACY_UK8_SUPPORT dropped */ + + KBASE_FUNC_FENCE_VALIDATE = (UK_FUNC_ID + 23), + KBASE_FUNC_STREAM_CREATE = (UK_FUNC_ID + 24), + KBASE_FUNC_GET_PROFILING_CONTROLS = (UK_FUNC_ID + 25), + KBASE_FUNC_SET_PROFILING_CONTROLS = (UK_FUNC_ID + 26), + /* to be used only for testing + * purposes, otherwise these controls + * are set through gator API */ + + KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD = (UK_FUNC_ID + 27), + KBASE_FUNC_JOB_SUBMIT = (UK_FUNC_ID + 28), + KBASE_FUNC_DISJOINT_QUERY = (UK_FUNC_ID + 29), + + KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31), + + KBASE_FUNC_TLSTREAM_ACQUIRE_V10_4 = (UK_FUNC_ID + 32), +#if MALI_UNIT_TEST + KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33), + KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34), +#endif /* MALI_UNIT_TEST */ + KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35), + + KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36), + +#ifdef SUPPORT_MALI_NO_MALI + KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37), +#endif + + KBASE_FUNC_SOFT_EVENT_UPDATE = (UK_FUNC_ID + 38), + + KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39), + + KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 40), + + KBASE_FUNC_SET_MIN_LOCK, + KBASE_FUNC_UNSET_MIN_LOCK, + + KBASE_FUNC_STEP_UP_MAX_GPU_LIMIT, + KBASE_FUNC_RESTORE_MAX_GPU_LIMIT, + + KBASE_FUNC_SET_VK_BOOST_LOCK, + KBASE_FUNC_UNSET_VK_BOOST_LOCK, + + KBASE_FUNC_MAX +}; + +#endif /* _KBASE_UKU_H_ */ + diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_utility.h b/drivers/gpu/arm/b_r26p0/mali_kbase_utility.h new file mode 100644 index 000000000000..8d4f044376a9 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_utility.h @@ -0,0 +1,55 @@ +/* + * + * (C) COPYRIGHT 2012-2013, 2015, 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#ifndef _KBASE_UTILITY_H +#define _KBASE_UTILITY_H + +#ifndef _KBASE_H_ +#error "Don't include this file directly, use mali_kbase.h instead" +#endif + +static inline void kbase_timer_setup(struct timer_list *timer, + void (*callback)(struct timer_list *timer)) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) + setup_timer(timer, (void (*)(unsigned long)) callback, + (unsigned long) timer); +#else + timer_setup(timer, callback, 0); +#endif +} + +#ifndef WRITE_ONCE + #ifdef ASSIGN_ONCE + #define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x) + #else + #define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val)) + #endif +#endif + +#ifndef READ_ONCE + #define READ_ONCE(x) ACCESS_ONCE(x) +#endif + +#endif /* _KBASE_UTILITY_H */ diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_vinstr.c b/drivers/gpu/arm/b_r26p0/mali_kbase_vinstr.c new file mode 100644 index 000000000000..72cec138dceb --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_vinstr.c @@ -0,0 +1,1083 @@ +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_vinstr.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase_hwcnt_reader.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_ioctl.h" +#include "mali_malisw.h" +#include "mali_kbase_debug.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Hwcnt reader API version */ +#define HWCNT_READER_API 1 + +/* The minimum allowed interval between dumps (equivalent to 10KHz) */ +#define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC) + +/* The maximum allowed buffers per client */ +#define MAX_BUFFER_COUNT 32 + +/** + * struct kbase_vinstr_context - IOCTL interface for userspace hardware + * counters. + * @hvirt: Hardware counter virtualizer used by vinstr. + * @metadata: Hardware counter metadata provided by virtualizer. + * @lock: Lock protecting all vinstr state. + * @suspend_count: Suspend reference count. If non-zero, timer and worker are + * prevented from being re-scheduled. + * @client_count: Number of vinstr clients. + * @clients: List of vinstr clients. + * @dump_timer: Timer that enqueues dump_work to a workqueue. + * @dump_work: Worker for performing periodic counter dumps. + */ +struct kbase_vinstr_context { + struct kbase_hwcnt_virtualizer *hvirt; + const struct kbase_hwcnt_metadata *metadata; + struct mutex lock; + size_t suspend_count; + size_t client_count; + struct list_head clients; + struct hrtimer dump_timer; + struct work_struct dump_work; +}; + +/** + * struct kbase_vinstr_client - A vinstr client attached to a vinstr context. + * @vctx: Vinstr context client is attached to. + * @hvcli: Hardware counter virtualizer client. + * @node: Node used to attach this client to list in vinstr + * context. + * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic + * client. + * @next_dump_time_ns: Time in ns when this client's next periodic dump must + * occur. If 0, not a periodic client. + * @enable_map: Counters enable map. + * @dump_bufs: Array of dump buffers allocated by this client. + * @dump_bufs_meta: Metadata of dump buffers. + * @meta_idx: Index of metadata being accessed by userspace. + * @read_idx: Index of buffer read by userspace. + * @write_idx: Index of buffer being written by dump worker. + * @waitq: Client's notification queue. + */ +struct kbase_vinstr_client { + struct kbase_vinstr_context *vctx; + struct kbase_hwcnt_virtualizer_client *hvcli; + struct list_head node; + u64 next_dump_time_ns; + u32 dump_interval_ns; + struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer_array dump_bufs; + struct kbase_hwcnt_reader_metadata *dump_bufs_meta; + atomic_t meta_idx; + atomic_t read_idx; + atomic_t write_idx; + wait_queue_head_t waitq; +}; + +static unsigned int kbasep_vinstr_hwcnt_reader_poll( + struct file *filp, + poll_table *wait); + +static long kbasep_vinstr_hwcnt_reader_ioctl( + struct file *filp, + unsigned int cmd, + unsigned long arg); + +static int kbasep_vinstr_hwcnt_reader_mmap( + struct file *filp, + struct vm_area_struct *vma); + +static int kbasep_vinstr_hwcnt_reader_release( + struct inode *inode, + struct file *filp); + +/* Vinstr client file operations */ +static const struct file_operations vinstr_client_fops = { + .owner = THIS_MODULE, + .poll = kbasep_vinstr_hwcnt_reader_poll, + .unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, + .compat_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, + .mmap = kbasep_vinstr_hwcnt_reader_mmap, + .release = kbasep_vinstr_hwcnt_reader_release, +}; + +/** + * kbasep_vinstr_timestamp_ns() - Get the current time in nanoseconds. + * + * Return: Current time in nanoseconds. + */ +static u64 kbasep_vinstr_timestamp_ns(void) +{ + return ktime_get_raw_ns(); +} + +/** + * kbasep_vinstr_next_dump_time_ns() - Calculate the next periodic dump time. + * @cur_ts_ns: Current time in nanoseconds. + * @interval: Interval between dumps in nanoseconds. + * + * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump + * time that occurs after cur_ts_ns. + */ +static u64 kbasep_vinstr_next_dump_time_ns(u64 cur_ts_ns, u32 interval) +{ + /* Non-periodic client */ + if (interval == 0) + return 0; + + /* + * Return the next interval after the current time relative to t=0. + * This means multiple clients with the same period will synchronise, + * regardless of when they were started, allowing the worker to be + * scheduled less frequently. + */ + do_div(cur_ts_ns, interval); + return (cur_ts_ns + 1) * interval; +} + +/** + * kbasep_vinstr_client_dump() - Perform a dump for a client. + * @vcli: Non-NULL pointer to a vinstr client. + * @event_id: Event type that triggered the dump. + * + * Return: 0 on success, else error code. + */ +static int kbasep_vinstr_client_dump( + struct kbase_vinstr_client *vcli, + enum base_hwcnt_reader_event event_id) +{ + int errcode; + u64 ts_start_ns; + u64 ts_end_ns; + unsigned int write_idx; + unsigned int read_idx; + struct kbase_hwcnt_dump_buffer *dump_buf; + struct kbase_hwcnt_reader_metadata *meta; + u8 clk_cnt; + + WARN_ON(!vcli); + lockdep_assert_held(&vcli->vctx->lock); + + write_idx = atomic_read(&vcli->write_idx); + read_idx = atomic_read(&vcli->read_idx); + + /* Check if there is a place to copy HWC block into. */ + if (write_idx - read_idx == vcli->dump_bufs.buf_cnt) + return -EBUSY; + write_idx %= vcli->dump_bufs.buf_cnt; + + dump_buf = &vcli->dump_bufs.bufs[write_idx]; + meta = &vcli->dump_bufs_meta[write_idx]; + + errcode = kbase_hwcnt_virtualizer_client_dump( + vcli->hvcli, &ts_start_ns, &ts_end_ns, dump_buf); + if (errcode) + return errcode; + + /* Patch the dump buf headers, to hide the counters that other hwcnt + * clients are using. + */ + kbase_hwcnt_gpu_patch_dump_headers(dump_buf, &vcli->enable_map); + + /* Zero all non-enabled counters (current values are undefined) */ + kbase_hwcnt_dump_buffer_zero_non_enabled(dump_buf, &vcli->enable_map); + + clk_cnt = vcli->vctx->metadata->clk_cnt; + + meta->timestamp = ts_end_ns; + meta->event_id = event_id; + meta->buffer_idx = write_idx; + meta->cycles.top = (clk_cnt > 0) ? dump_buf->clk_cnt_buf[0] : 0; + meta->cycles.shader_cores = + (clk_cnt > 1) ? dump_buf->clk_cnt_buf[1] : 0; + + /* Notify client. Make sure all changes to memory are visible. */ + wmb(); + atomic_inc(&vcli->write_idx); + wake_up_interruptible(&vcli->waitq); + return 0; +} + +/** + * kbasep_vinstr_client_clear() - Reset all the client's counters to zero. + * @vcli: Non-NULL pointer to a vinstr client. + * + * Return: 0 on success, else error code. + */ +static int kbasep_vinstr_client_clear(struct kbase_vinstr_client *vcli) +{ + u64 ts_start_ns; + u64 ts_end_ns; + + WARN_ON(!vcli); + lockdep_assert_held(&vcli->vctx->lock); + + /* A virtualizer dump with a NULL buffer will just clear the virtualizer + * client's buffer. + */ + return kbase_hwcnt_virtualizer_client_dump( + vcli->hvcli, &ts_start_ns, &ts_end_ns, NULL); +} + +/** + * kbasep_vinstr_reschedule_worker() - Update next dump times for all periodic + * vinstr clients, then reschedule the dump + * worker appropriately. + * @vctx: Non-NULL pointer to the vinstr context. + * + * If there are no periodic clients, then the dump worker will not be + * rescheduled. Else, the dump worker will be rescheduled for the next periodic + * client dump. + */ +static void kbasep_vinstr_reschedule_worker(struct kbase_vinstr_context *vctx) +{ + u64 cur_ts_ns; + u64 earliest_next_ns = U64_MAX; + struct kbase_vinstr_client *pos; + + WARN_ON(!vctx); + lockdep_assert_held(&vctx->lock); + + cur_ts_ns = kbasep_vinstr_timestamp_ns(); + + /* + * Update each client's next dump time, and find the earliest next + * dump time if any of the clients have a non-zero interval. + */ + list_for_each_entry(pos, &vctx->clients, node) { + const u64 cli_next_ns = + kbasep_vinstr_next_dump_time_ns( + cur_ts_ns, pos->dump_interval_ns); + + /* Non-zero next dump time implies a periodic client */ + if ((cli_next_ns != 0) && (cli_next_ns < earliest_next_ns)) + earliest_next_ns = cli_next_ns; + + pos->next_dump_time_ns = cli_next_ns; + } + + /* Cancel the timer if it is already pending */ + hrtimer_cancel(&vctx->dump_timer); + + /* Start the timer if there are periodic clients and vinstr is not + * suspended. + */ + if ((earliest_next_ns != U64_MAX) && + (vctx->suspend_count == 0) && + !WARN_ON(earliest_next_ns < cur_ts_ns)) + hrtimer_start( + &vctx->dump_timer, + ns_to_ktime(earliest_next_ns - cur_ts_ns), + HRTIMER_MODE_REL); +} + +/** + * kbasep_vinstr_dump_worker()- Dump worker, that dumps all periodic clients + * that need to be dumped, then reschedules itself. + * @work: Work structure. + */ +static void kbasep_vinstr_dump_worker(struct work_struct *work) +{ + struct kbase_vinstr_context *vctx = + container_of(work, struct kbase_vinstr_context, dump_work); + struct kbase_vinstr_client *pos; + u64 cur_time_ns; + + mutex_lock(&vctx->lock); + + cur_time_ns = kbasep_vinstr_timestamp_ns(); + + /* Dump all periodic clients whose next dump time is before the current + * time. + */ + list_for_each_entry(pos, &vctx->clients, node) { + if ((pos->next_dump_time_ns != 0) && + (pos->next_dump_time_ns < cur_time_ns)) + kbasep_vinstr_client_dump( + pos, BASE_HWCNT_READER_EVENT_PERIODIC); + } + + /* Update the next dump times of all periodic clients, then reschedule + * this worker at the earliest next dump time. + */ + kbasep_vinstr_reschedule_worker(vctx); + + mutex_unlock(&vctx->lock); +} + +/** + * kbasep_vinstr_dump_timer() - Dump timer that schedules the dump worker for + * execution as soon as possible. + * @timer: Timer structure. + */ +static enum hrtimer_restart kbasep_vinstr_dump_timer(struct hrtimer *timer) +{ + struct kbase_vinstr_context *vctx = + container_of(timer, struct kbase_vinstr_context, dump_timer); + + /* We don't need to check vctx->suspend_count here, as the suspend + * function will ensure that any worker enqueued here is immediately + * cancelled, and the worker itself won't reschedule this timer if + * suspend_count != 0. + */ +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, &vctx->dump_work); +#else + queue_work(system_highpri_wq, &vctx->dump_work); +#endif + return HRTIMER_NORESTART; +} + +/** + * kbasep_vinstr_client_destroy() - Destroy a vinstr client. + * @vcli: vinstr client. Must not be attached to a vinstr context. + */ +static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli) +{ + if (!vcli) + return; + + kbase_hwcnt_virtualizer_client_destroy(vcli->hvcli); + kfree(vcli->dump_bufs_meta); + kbase_hwcnt_dump_buffer_array_free(&vcli->dump_bufs); + kbase_hwcnt_enable_map_free(&vcli->enable_map); + kfree(vcli); +} + +/** + * kbasep_vinstr_client_create() - Create a vinstr client. Does not attach to + * the vinstr context. + * @vctx: Non-NULL pointer to vinstr context. + * @setup: Non-NULL pointer to hardware counter ioctl setup structure. + * setup->buffer_count must not be 0. + * @out_vcli: Non-NULL pointer to where created client will be stored on + * success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_vinstr_client_create( + struct kbase_vinstr_context *vctx, + struct kbase_ioctl_hwcnt_reader_setup *setup, + struct kbase_vinstr_client **out_vcli) +{ + int errcode; + struct kbase_vinstr_client *vcli; + struct kbase_hwcnt_physical_enable_map phys_em; + + WARN_ON(!vctx); + WARN_ON(!setup); + WARN_ON(setup->buffer_count == 0); + + vcli = kzalloc(sizeof(*vcli), GFP_KERNEL); + if (!vcli) + return -ENOMEM; + + vcli->vctx = vctx; + + errcode = kbase_hwcnt_enable_map_alloc( + vctx->metadata, &vcli->enable_map); + if (errcode) + goto error; + + phys_em.fe_bm = setup->fe_bm; + phys_em.shader_bm = setup->shader_bm; + phys_em.tiler_bm = setup->tiler_bm; + phys_em.mmu_l2_bm = setup->mmu_l2_bm; + kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em); + + /* Enable all the available clk_enable_map. */ + vcli->enable_map.clk_enable_map = (1ull << vctx->metadata->clk_cnt) - 1; + + errcode = kbase_hwcnt_dump_buffer_array_alloc( + vctx->metadata, setup->buffer_count, &vcli->dump_bufs); + if (errcode) + goto error; + + errcode = -ENOMEM; + vcli->dump_bufs_meta = kmalloc_array( + setup->buffer_count, sizeof(*vcli->dump_bufs_meta), GFP_KERNEL); + if (!vcli->dump_bufs_meta) + goto error; + + errcode = kbase_hwcnt_virtualizer_client_create( + vctx->hvirt, &vcli->enable_map, &vcli->hvcli); + if (errcode) + goto error; + + init_waitqueue_head(&vcli->waitq); + + *out_vcli = vcli; + return 0; +error: + kbasep_vinstr_client_destroy(vcli); + return errcode; +} + +int kbase_vinstr_init( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_vinstr_context **out_vctx) +{ + struct kbase_vinstr_context *vctx; + const struct kbase_hwcnt_metadata *metadata; + + if (!hvirt || !out_vctx) + return -EINVAL; + + metadata = kbase_hwcnt_virtualizer_metadata(hvirt); + if (!metadata) + return -EINVAL; + + vctx = kzalloc(sizeof(*vctx), GFP_KERNEL); + if (!vctx) + return -ENOMEM; + + vctx->hvirt = hvirt; + vctx->metadata = metadata; + + mutex_init(&vctx->lock); + INIT_LIST_HEAD(&vctx->clients); + hrtimer_init(&vctx->dump_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + vctx->dump_timer.function = kbasep_vinstr_dump_timer; + INIT_WORK(&vctx->dump_work, kbasep_vinstr_dump_worker); + + *out_vctx = vctx; + return 0; +} + +void kbase_vinstr_term(struct kbase_vinstr_context *vctx) +{ + if (!vctx) + return; + + cancel_work_sync(&vctx->dump_work); + + /* Non-zero client count implies client leak */ + if (WARN_ON(vctx->client_count != 0)) { + struct kbase_vinstr_client *pos, *n; + + list_for_each_entry_safe(pos, n, &vctx->clients, node) { + list_del(&pos->node); + vctx->client_count--; + kbasep_vinstr_client_destroy(pos); + } + } + + WARN_ON(vctx->client_count != 0); + kfree(vctx); +} + +void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx) +{ + if (WARN_ON(!vctx)) + return; + + mutex_lock(&vctx->lock); + + if (!WARN_ON(vctx->suspend_count == SIZE_MAX)) + vctx->suspend_count++; + + mutex_unlock(&vctx->lock); + + /* Always sync cancel the timer and then the worker, regardless of the + * new suspend count. + * + * This ensures concurrent calls to kbase_vinstr_suspend() always block + * until vinstr is fully suspended. + * + * The timer is cancelled before the worker, as the timer + * unconditionally re-enqueues the worker, but the worker checks the + * suspend_count that we just incremented before rescheduling the timer. + * + * Therefore if we cancel the worker first, the timer might re-enqueue + * the worker before we cancel the timer, but the opposite is not + * possible. + */ + hrtimer_cancel(&vctx->dump_timer); + cancel_work_sync(&vctx->dump_work); +} + +void kbase_vinstr_resume(struct kbase_vinstr_context *vctx) +{ + if (WARN_ON(!vctx)) + return; + + mutex_lock(&vctx->lock); + + if (!WARN_ON(vctx->suspend_count == 0)) { + vctx->suspend_count--; + + /* Last resume, so re-enqueue the worker if we have any periodic + * clients. + */ + if (vctx->suspend_count == 0) { + struct kbase_vinstr_client *pos; + bool has_periodic_clients = false; + + list_for_each_entry(pos, &vctx->clients, node) { + if (pos->dump_interval_ns != 0) { + has_periodic_clients = true; + break; + } + } + + if (has_periodic_clients) +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, &vctx->dump_work); +#else + queue_work(system_highpri_wq, &vctx->dump_work); +#endif + } + } + + mutex_unlock(&vctx->lock); +} + +int kbase_vinstr_hwcnt_reader_setup( + struct kbase_vinstr_context *vctx, + struct kbase_ioctl_hwcnt_reader_setup *setup) +{ + int errcode; + int fd; + struct kbase_vinstr_client *vcli = NULL; + + if (!vctx || !setup || + (setup->buffer_count == 0) || + (setup->buffer_count > MAX_BUFFER_COUNT)) + return -EINVAL; + + errcode = kbasep_vinstr_client_create(vctx, setup, &vcli); + if (errcode) + goto error; + + /* Add the new client. No need to reschedule worker, as not periodic */ + mutex_lock(&vctx->lock); + + vctx->client_count++; + list_add(&vcli->node, &vctx->clients); + + mutex_unlock(&vctx->lock); + + /* Expose to user-space only once the client is fully initialized */ + errcode = anon_inode_getfd( + "[mali_vinstr_desc]", + &vinstr_client_fops, + vcli, + O_RDONLY | O_CLOEXEC); + if (errcode < 0) + goto client_installed_error; + + fd = errcode; + + return fd; + +client_installed_error: + mutex_lock(&vctx->lock); + + vctx->client_count--; + list_del(&vcli->node); + + mutex_unlock(&vctx->lock); +error: + kbasep_vinstr_client_destroy(vcli); + return errcode; +} + +/** + * kbasep_vinstr_hwcnt_reader_buffer_ready() - Check if client has ready + * buffers. + * @cli: Non-NULL pointer to vinstr client. + * + * Return: Non-zero if client has at least one dumping buffer filled that was + * not notified to user yet. + */ +static int kbasep_vinstr_hwcnt_reader_buffer_ready( + struct kbase_vinstr_client *cli) +{ + WARN_ON(!cli); + return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx); +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_dump() - Dump ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * + * Return: 0 on success, else error code. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_dump( + struct kbase_vinstr_client *cli) +{ + int errcode; + + mutex_lock(&cli->vctx->lock); + + errcode = kbasep_vinstr_client_dump( + cli, BASE_HWCNT_READER_EVENT_MANUAL); + + mutex_unlock(&cli->vctx->lock); + return errcode; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_clear() - Clear ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * + * Return: 0 on success, else error code. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_clear( + struct kbase_vinstr_client *cli) +{ + int errcode; + + mutex_lock(&cli->vctx->lock); + + errcode = kbasep_vinstr_client_clear(cli); + + mutex_unlock(&cli->vctx->lock); + return errcode; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer() - Get buffer ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @buffer: Non-NULL pointer to userspace buffer. + * @size: Size of buffer. + * + * Return: 0 on success, else error code. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( + struct kbase_vinstr_client *cli, + void __user *buffer, + size_t size) +{ + unsigned int meta_idx = atomic_read(&cli->meta_idx); + unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt; + + struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx]; + const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata); + const size_t min_size = min(size, meta_size); + + /* Metadata sanity check. */ + WARN_ON(idx != meta->buffer_idx); + + /* Check if there is any buffer available. */ + if (unlikely(atomic_read(&cli->write_idx) == meta_idx)) + return -EAGAIN; + + /* Check if previously taken buffer was put back. */ + if (unlikely(atomic_read(&cli->read_idx) != meta_idx)) + return -EBUSY; + + /* Clear user buffer to zero. */ + if (unlikely(meta_size < size && clear_user(buffer, size))) + return -EFAULT; + + /* Copy next available buffer's metadata to user. */ + if (unlikely(copy_to_user(buffer, meta, min_size))) + return -EFAULT; + + atomic_inc(&cli->meta_idx); + + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer() - Put buffer ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @buffer: Non-NULL pointer to userspace buffer. + * @size: Size of buffer. + * + * Return: 0 on success, else error code. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( + struct kbase_vinstr_client *cli, + void __user *buffer, + size_t size) +{ + unsigned int read_idx = atomic_read(&cli->read_idx); + unsigned int idx = read_idx % cli->dump_bufs.buf_cnt; + + struct kbase_hwcnt_reader_metadata *meta; + const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata); + const size_t max_size = max(size, meta_size); + int ret = 0; + u8 stack_kbuf[64]; + u8 *kbuf = NULL; + size_t i; + + /* Check if any buffer was taken. */ + if (unlikely(atomic_read(&cli->meta_idx) == read_idx)) + return -EPERM; + + if (likely(max_size <= sizeof(stack_kbuf))) { + /* Use stack buffer when the size is small enough. */ + if (unlikely(meta_size > size)) + memset(stack_kbuf, 0, sizeof(stack_kbuf)); + kbuf = stack_kbuf; + } else { + kbuf = kzalloc(max_size, GFP_KERNEL); + if (unlikely(!kbuf)) + return -ENOMEM; + } + + /* + * Copy user buffer to zero cleared kernel buffer which has enough + * space for both user buffer and kernel metadata. + */ + if (unlikely(copy_from_user(kbuf, buffer, size))) { + ret = -EFAULT; + goto out; + } + + /* + * Make sure any "extra" data passed from userspace is zero. + * It's meaningful only in case meta_size < size. + */ + for (i = meta_size; i < size; i++) { + /* Check if user data beyond meta size is zero. */ + if (unlikely(kbuf[i] != 0)) { + ret = -EINVAL; + goto out; + } + } + + /* Check if correct buffer is put back. */ + meta = (struct kbase_hwcnt_reader_metadata *)kbuf; + if (unlikely(idx != meta->buffer_idx)) { + ret = -EINVAL; + goto out; + } + + atomic_inc(&cli->read_idx); +out: + if (unlikely(kbuf != stack_kbuf)) + kfree(kbuf); + return ret; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_set_interval() - Set interval ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @interval: Periodic dumping interval (disable periodic dumping if 0). + * + * Return: 0 always. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( + struct kbase_vinstr_client *cli, + u32 interval) +{ + mutex_lock(&cli->vctx->lock); + + if ((interval != 0) && (interval < DUMP_INTERVAL_MIN_NS)) + interval = DUMP_INTERVAL_MIN_NS; + /* Update the interval, and put in a dummy next dump time */ + cli->dump_interval_ns = interval; + cli->next_dump_time_ns = 0; + + /* + * If it's a periodic client, kick off the worker early to do a proper + * timer reschedule. Return value is ignored, as we don't care if the + * worker is already queued. + */ + if ((interval != 0) && (cli->vctx->suspend_count == 0)) +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, &cli->vctx->dump_work); +#else + queue_work(system_highpri_wq, &cli->vctx->dump_work); +#endif + + mutex_unlock(&cli->vctx->lock); + + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_enable_event() - Enable event ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @event_id: ID of event to enable. + * + * Return: 0 always. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event( + struct kbase_vinstr_client *cli, + enum base_hwcnt_reader_event event_id) +{ + /* No-op, as events aren't supported */ + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_disable_event() - Disable event ioctl + * command. + * @cli: Non-NULL pointer to vinstr client. + * @event_id: ID of event to disable. + * + * Return: 0 always. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event( + struct kbase_vinstr_client *cli, + enum base_hwcnt_reader_event event_id) +{ + /* No-op, as events aren't supported */ + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver() - Get HW version ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @hwver: Non-NULL pointer to user buffer where HW version will be stored. + * + * Return: 0 on success, else error code. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( + struct kbase_vinstr_client *cli, + u32 __user *hwver) +{ + u32 ver = 0; + const enum kbase_hwcnt_gpu_group_type type = + kbase_hwcnt_metadata_group_type(cli->vctx->metadata, 0); + + switch (type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + ver = 4; + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + ver = 5; + break; + default: + WARN_ON(true); + } + + if (ver != 0) { + return put_user(ver, hwver); + } else { + return -EINVAL; + } +} + +/** + * The hwcnt reader's ioctl command - get API version. + * @cli: The non-NULL pointer to the client + * @arg: Command's argument. + * @size: Size of arg. + * + * @return 0 on success, else error code. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( + struct kbase_vinstr_client *cli, unsigned long arg, size_t size) +{ + long ret = -EINVAL; + u8 clk_cnt = cli->vctx->metadata->clk_cnt; + + if (size == sizeof(u32)) { + ret = put_user(HWCNT_READER_API, (u32 __user *)arg); + } else if (size == sizeof(struct kbase_hwcnt_reader_api_version)) { + struct kbase_hwcnt_reader_api_version api_version = { + .version = HWCNT_READER_API, + .features = KBASE_HWCNT_READER_API_VERSION_NO_FEATURE, + }; + + if (clk_cnt > 0) + api_version.features |= + KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP; + if (clk_cnt > 1) + api_version.features |= + KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES; + + ret = put_user(api_version, + (struct kbase_hwcnt_reader_api_version __user *) + arg); + } + return ret; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl() - hwcnt reader's ioctl. + * @filp: Non-NULL pointer to file structure. + * @cmd: User command. + * @arg: Command's argument. + * + * Return: 0 on success, else error code. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl( + struct file *filp, + unsigned int cmd, + unsigned long arg) +{ + long rcode; + struct kbase_vinstr_client *cli; + + if (!filp || (_IOC_TYPE(cmd) != KBASE_HWCNT_READER)) + return -EINVAL; + + cli = filp->private_data; + if (!cli) + return -EINVAL; + + switch (_IOC_NR(cmd)) { + case _IOC_NR(KBASE_HWCNT_READER_GET_API_VERSION): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( + cli, arg, _IOC_SIZE(cmd)); + break; + case _IOC_NR(KBASE_HWCNT_READER_GET_HWVER): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( + cli, (u32 __user *)arg); + break; + case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER_SIZE): + rcode = put_user( + (u32)cli->vctx->metadata->dump_buf_bytes, + (u32 __user *)arg); + break; + case _IOC_NR(KBASE_HWCNT_READER_DUMP): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli); + break; + case _IOC_NR(KBASE_HWCNT_READER_CLEAR): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli); + break; + case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( + cli, (void __user *)arg, _IOC_SIZE(cmd)); + break; + case _IOC_NR(KBASE_HWCNT_READER_PUT_BUFFER): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( + cli, (void __user *)arg, _IOC_SIZE(cmd)); + break; + case _IOC_NR(KBASE_HWCNT_READER_SET_INTERVAL): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval( + cli, (u32)arg); + break; + case _IOC_NR(KBASE_HWCNT_READER_ENABLE_EVENT): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event( + cli, (enum base_hwcnt_reader_event)arg); + break; + case _IOC_NR(KBASE_HWCNT_READER_DISABLE_EVENT): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event( + cli, (enum base_hwcnt_reader_event)arg); + break; + default: + pr_warn("Unknown HWCNT ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); + rcode = -EINVAL; + break; + } + + return rcode; +} + +/** + * kbasep_vinstr_hwcnt_reader_poll() - hwcnt reader's poll. + * @filp: Non-NULL pointer to file structure. + * @wait: Non-NULL pointer to poll table. + * + * Return: POLLIN if data can be read without blocking, 0 if data can not be + * read without blocking, else error code. + */ +static unsigned int kbasep_vinstr_hwcnt_reader_poll( + struct file *filp, + poll_table *wait) +{ + struct kbase_vinstr_client *cli; + + if (!filp || !wait) + return -EINVAL; + + cli = filp->private_data; + if (!cli) + return -EINVAL; + + poll_wait(filp, &cli->waitq, wait); + if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli)) + return POLLIN; + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_mmap() - hwcnt reader's mmap. + * @filp: Non-NULL pointer to file structure. + * @vma: Non-NULL pointer to vma structure. + * + * Return: 0 on success, else error code. + */ +static int kbasep_vinstr_hwcnt_reader_mmap( + struct file *filp, + struct vm_area_struct *vma) +{ + struct kbase_vinstr_client *cli; + unsigned long vm_size, size, addr, pfn, offset; + + if (!filp || !vma) + return -EINVAL; + + cli = filp->private_data; + if (!cli) + return -EINVAL; + + vm_size = vma->vm_end - vma->vm_start; + size = cli->dump_bufs.buf_cnt * cli->vctx->metadata->dump_buf_bytes; + + if (vma->vm_pgoff > (size >> PAGE_SHIFT)) + return -EINVAL; + + offset = vma->vm_pgoff << PAGE_SHIFT; + if (vm_size > size - offset) + return -EINVAL; + + addr = __pa(cli->dump_bufs.page_addr + offset); + pfn = addr >> PAGE_SHIFT; + + return remap_pfn_range( + vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot); +} + +/** + * kbasep_vinstr_hwcnt_reader_release() - hwcnt reader's release. + * @inode: Non-NULL pointer to inode structure. + * @filp: Non-NULL pointer to file structure. + * + * Return: 0 always. + */ +static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, + struct file *filp) +{ + struct kbase_vinstr_client *vcli = filp->private_data; + + mutex_lock(&vcli->vctx->lock); + + vcli->vctx->client_count--; + list_del(&vcli->node); + + mutex_unlock(&vcli->vctx->lock); + + kbasep_vinstr_client_destroy(vcli); + + return 0; +} diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_vinstr.h b/drivers/gpu/arm/b_r26p0/mali_kbase_vinstr.h new file mode 100644 index 000000000000..81d315f95567 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_kbase_vinstr.h @@ -0,0 +1,91 @@ +/* + * + * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Vinstr, used to provide an ioctl for userspace access to periodic hardware + * counters. + */ + +#ifndef _KBASE_VINSTR_H_ +#define _KBASE_VINSTR_H_ + +struct kbase_vinstr_context; +struct kbase_hwcnt_virtualizer; +struct kbase_ioctl_hwcnt_reader_setup; + +/** + * kbase_vinstr_init() - Initialise a vinstr context. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @out_vctx: Non-NULL pointer to where the pointer to the created vinstr + * context will be stored on success. + * + * On creation, the suspend count of the context will be 0. + * + * Return: 0 on success, else error code. + */ +int kbase_vinstr_init( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_vinstr_context **out_vctx); + +/** + * kbase_vinstr_term() - Terminate a vinstr context. + * @vctx: Pointer to the vinstr context to be terminated. + */ +void kbase_vinstr_term(struct kbase_vinstr_context *vctx); + +/** + * kbase_vinstr_suspend() - Increment the suspend count of the context. + * @vctx: Non-NULL pointer to the vinstr context to be suspended. + * + * After this function call returns, it is guaranteed that all timers and + * workers in vinstr will be cancelled, and will not be re-triggered until + * after the context has been resumed. In effect, this means no new counter + * dumps will occur for any existing or subsequently added periodic clients. + */ +void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx); + +/** + * kbase_vinstr_resume() - Decrement the suspend count of the context. + * @vctx: Non-NULL pointer to the vinstr context to be resumed. + * + * If a call to this function decrements the suspend count from 1 to 0, then + * normal operation of vinstr will be resumed (i.e. counter dumps will once + * again be automatically triggered for all periodic clients). + * + * It is only valid to call this function one time for each prior returned call + * to kbase_vinstr_suspend. + */ +void kbase_vinstr_resume(struct kbase_vinstr_context *vctx); + +/** + * kbase_vinstr_hwcnt_reader_setup() - Set up a new hardware counter reader + * client. + * @vinstr_ctx: Non-NULL pointer to the vinstr context. + * @setup: Non-NULL pointer to the hwcnt reader configuration. + * + * Return: file descriptor on success, else a (negative) error code. + */ +int kbase_vinstr_hwcnt_reader_setup( + struct kbase_vinstr_context *vinstr_ctx, + struct kbase_ioctl_hwcnt_reader_setup *setup); + +#endif /* _KBASE_VINSTR_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_linux_systrace.h b/drivers/gpu/arm/b_r26p0/mali_linux_systrace.h new file mode 100644 index 000000000000..33c603a1fd5c --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_linux_systrace.h @@ -0,0 +1,110 @@ +#if !defined(_MALI_SYSTRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _MALI_SYSTRACE_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mali_systrace +#undef TRACE_SYSTEM_STRING +#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM) +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mali_linux_systrace + + +/** + * mali_job_slots_event - called from mali_kbase_core_linux.c + * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro. + */ +TRACE_EVENT(mali_job_systrace_event_start, + + TP_PROTO(char *ev, unsigned int tgid, unsigned int pid, unsigned char job_id, unsigned int ctx_id, + unsigned long cookies, unsigned long long start_timestamp, unsigned int dep_0_id, unsigned int dep_0_type, unsigned int dep_1_id , unsigned int dep_1_type, unsigned int gles_ctx_handle), + TP_ARGS(ev, tgid, pid, job_id, ctx_id, cookies, start_timestamp, dep_0_id, dep_0_type, dep_1_id, dep_1_type, gles_ctx_handle), + TP_STRUCT__entry( + __string(ev_str, ev) + __field(unsigned int, tgid) + __field(unsigned int, pid) + __field(unsigned char, job_id) + __field(unsigned int, ctx_id) + __field(unsigned long, cookies) + __field(unsigned long long, start_timestamp) + __field(unsigned int, dep_0_id) + __field(unsigned int, dep_0_type) + __field(unsigned int, dep_1_id) + __field(unsigned int, dep_1_type) + __field(unsigned int, gles_ctx_handle) + + ), + TP_fast_assign( + __assign_str(ev_str, ev); + __entry->tgid = tgid; + __entry->pid = pid; + __entry->job_id = job_id; + __entry->ctx_id = ctx_id; + __entry->cookies = cookies; + __entry->start_timestamp = start_timestamp; + __entry->dep_0_id = dep_0_id; + __entry->dep_0_type = dep_0_type; + __entry->dep_1_id = dep_1_id; + __entry->dep_1_type = dep_1_type; + __entry->gles_ctx_handle = gles_ctx_handle; + ), + // unique key for S|F syntax + // S| tgid | vertex/fragment-job | atom_id(job_id) | dep_0_id | dep_0_type | dep_1_id| dep_1_type |__entry->gles_ctx_handle | job_id (atom_id)| ctx_id| timestamp + TP_printk("tracing_mark_write: S|%d|%s|%d|%d|%d|%d|%d|%x|%d%d%d%llu", __entry->tgid, __get_str(ev_str), + __entry->job_id, __entry->dep_0_id, __entry->dep_0_type, __entry->dep_1_id, __entry->dep_1_type, __entry->gles_ctx_handle, + __entry->tgid, __entry->job_id, __entry->ctx_id, __entry->start_timestamp) + ); + + +TRACE_EVENT(mali_job_systrace_event_stop, + + TP_PROTO(char *ev, unsigned int tgid, unsigned int pid, unsigned char job_id, unsigned int ctx_id, + unsigned long cookies, unsigned long long start_timestamp, unsigned int dep_0_id, unsigned int dep_0_type, unsigned int dep_1_id , unsigned int dep_1_type, unsigned int gles_ctx_handle), + TP_ARGS(ev, tgid, pid, job_id, ctx_id, cookies, start_timestamp, dep_0_id, dep_0_type, dep_1_id, dep_1_type, gles_ctx_handle), + TP_STRUCT__entry( + __string(ev_str, ev) + __field(unsigned int, tgid) + __field(unsigned int, pid) + __field(unsigned char, job_id) + __field(unsigned int, ctx_id) + __field(unsigned long, cookies) + __field(unsigned long long, start_timestamp) + __field(unsigned int, dep_0_id) + __field(unsigned int, dep_0_type) + __field(unsigned int, dep_1_id) + __field(unsigned int, dep_1_type) + __field(unsigned int, gles_ctx_handle) + + ), + TP_fast_assign( + __assign_str(ev_str, ev); + __entry->tgid = tgid; + __entry->pid = pid; + __entry->job_id = job_id; + __entry->ctx_id = ctx_id; + __entry->cookies = cookies; + __entry->start_timestamp = start_timestamp; + __entry->dep_0_id = dep_0_id; + __entry->dep_0_type = dep_0_type; + __entry->dep_1_id = dep_1_id; + __entry->dep_1_type = dep_1_type; + __entry->gles_ctx_handle = gles_ctx_handle; + ), + // unique key for S|F syntax + // F| tgid | vertex/fragment-job | atom_id(job_id) | dep_0_id | dep_0_type | dep_1_id| dep_1_type |__entry->gles_ctx_handle | job_id (atom_id)| ctx_id| timestamp + TP_printk("tracing_mark_write: F|%d|%s|%d|%d|%d|%d|%d|%x|%d%d%d%llu", __entry->tgid, __get_str(ev_str), + __entry->job_id, __entry->dep_0_id, __entry->dep_0_type, __entry->dep_1_id, __entry->dep_1_type, __entry->gles_ctx_handle, + __entry->tgid, __entry->job_id, __entry->ctx_id, __entry->start_timestamp) + ); + +#endif /* _MALI_SYSTRACE_H */ + +#undef TRACE_INCLUDE_PATH +#undef linux +#define TRACE_INCLUDE_PATH . +#undef TRACE_SYSTEM_STRING + +/* This part must be outside protection */ +#include diff --git a/drivers/gpu/arm/b_r26p0/mali_linux_trace.h b/drivers/gpu/arm/b_r26p0/mali_linux_trace.h new file mode 100644 index 000000000000..b58abc41acba --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_linux_trace.h @@ -0,0 +1,532 @@ +/* + * + * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mali + +#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MALI_H + +#include + +#if defined(CONFIG_MALI_GATOR_SUPPORT) +#define MALI_JOB_SLOTS_EVENT_CHANGED + +/** + * mali_job_slots_event - Reports change of job slot status. + * @gpu_id: Kbase device id + * @event_id: ORed together bitfields representing a type of event, + * made with the GATOR_MAKE_EVENT() macro. + */ +TRACE_EVENT(mali_job_slots_event, + TP_PROTO(u32 gpu_id, u32 event_id, u32 tgid, u32 pid, + u8 job_id), + TP_ARGS(gpu_id, event_id, tgid, pid, job_id), + TP_STRUCT__entry( + __field(u32, gpu_id) + __field(u32, event_id) + __field(u32, tgid) + __field(u32, pid) + __field(u8, job_id) + ), + TP_fast_assign( + __entry->gpu_id = gpu_id; + __entry->event_id = event_id; + __entry->tgid = tgid; + __entry->pid = pid; + __entry->job_id = job_id; + ), + TP_printk("gpu=%u event=%u tgid=%u pid=%u job_id=%u", + __entry->gpu_id, __entry->event_id, + __entry->tgid, __entry->pid, __entry->job_id) +); + +/** + * mali_pm_status - Reports change of power management status. + * @gpu_id: Kbase device id + * @event_id: Core type (shader, tiler, L2 cache) + * @value: 64bits bitmask reporting either power status of + * the cores (1-ON, 0-OFF) + */ +TRACE_EVENT(mali_pm_status, + TP_PROTO(u32 gpu_id, u32 event_id, u64 value), + TP_ARGS(gpu_id, event_id, value), + TP_STRUCT__entry( + __field(u32, gpu_id) + __field(u32, event_id) + __field(u64, value) + ), + TP_fast_assign( + __entry->gpu_id = gpu_id; + __entry->event_id = event_id; + __entry->value = value; + ), + TP_printk("gpu=%u event %u = %llu", + __entry->gpu_id, __entry->event_id, __entry->value) +); + +/** + * mali_page_fault_insert_pages - Reports an MMU page fault + * resulting in new pages being mapped. + * @gpu_id: Kbase device id + * @event_id: MMU address space number + * @value: Number of newly allocated pages + */ +TRACE_EVENT(mali_page_fault_insert_pages, + TP_PROTO(u32 gpu_id, s32 event_id, u64 value), + TP_ARGS(gpu_id, event_id, value), + TP_STRUCT__entry( + __field(u32, gpu_id) + __field(s32, event_id) + __field(u64, value) + ), + TP_fast_assign( + __entry->gpu_id = gpu_id; + __entry->event_id = event_id; + __entry->value = value; + ), + TP_printk("gpu=%u event %d = %llu", + __entry->gpu_id, __entry->event_id, __entry->value) +); + +/** + * mali_total_alloc_pages_change - Reports that the total number of + * allocated pages has changed. + * @gpu_id: Kbase device id + * @event_id: Total number of pages allocated + */ +TRACE_EVENT(mali_total_alloc_pages_change, + TP_PROTO(u32 gpu_id, s64 event_id), + TP_ARGS(gpu_id, event_id), + TP_STRUCT__entry( + __field(u32, gpu_id) + __field(s64, event_id) + ), + TP_fast_assign( + __entry->gpu_id = gpu_id; + __entry->event_id = event_id; + ), + TP_printk("gpu=%u event=%lld", __entry->gpu_id, __entry->event_id) +); +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + +/* + * MMU subsystem tracepoints + */ + +/* Fault status and exception code helpers + * + * Must be macros to allow use by user-side tracepoint tools + * + * bits 0:1 masked off code, and used for the level + * + * Tracepoint files get included more than once - protect against multiple + * definition + */ +#ifndef __TRACE_MALI_MMU_HELPERS +#define __TRACE_MALI_MMU_HELPERS +/* Complex macros should be enclosed in parenthesis. + * + * We need to have those parentheses removed for our arrays of symbolic look-ups + * for __print_symbolic() whilst also being able to use them outside trace code + */ +#define _ENSURE_PARENTHESIS(args...) args + +#define KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT(code) \ + (!KBASE_MMU_FAULT_CODE_VALID(code) ? "UNKNOWN,level=" : \ + __print_symbolic(((code) & ~3u), \ + KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS)) +#define KBASE_MMU_FAULT_CODE_LEVEL(code) \ + (((((code) & ~0x3u) == 0xC4) ? 4 : 0) + ((code) & 0x3u)) + +#define KBASE_MMU_FAULT_STATUS_CODE(status) \ + ((status) & 0xFFu) +#define KBASE_MMU_FAULT_STATUS_DECODED_STRING(status) \ + (((status) & (1u << 10)) ? "DECODER_FAULT" : "SLAVE_FAULT") + +#define KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(status) \ + KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT( \ + KBASE_MMU_FAULT_STATUS_CODE(status)) + +#define KBASE_MMU_FAULT_STATUS_LEVEL(status) \ + KBASE_MMU_FAULT_CODE_LEVEL(KBASE_MMU_FAULT_STATUS_CODE(status)) + +#define KBASE_MMU_FAULT_STATUS_ACCESS(status) \ + ((status) & AS_FAULTSTATUS_ACCESS_TYPE_MASK) +#define KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ + {AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC, "ATOMIC" }, \ + {AS_FAULTSTATUS_ACCESS_TYPE_EX, "EXECUTE"}, \ + {AS_FAULTSTATUS_ACCESS_TYPE_READ, "READ" }, \ + {AS_FAULTSTATUS_ACCESS_TYPE_WRITE, "WRITE" }) +#define KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(status) \ + __print_symbolic(KBASE_MMU_FAULT_STATUS_ACCESS(status), \ + KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS) + +#define KBASE_MMU_FAULT_CODE_VALID(code) \ + ((code >= 0xC0 && code <= 0xEF) && \ + (!(code >= 0xC5 && code <= 0xC6)) && \ + (!(code >= 0xCC && code <= 0xCF)) && \ + (!(code >= 0xD4 && code <= 0xD7)) && \ + (!(code >= 0xDC && code <= 0xDF))) +#define KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ + {0xC0, "TRANSLATION_FAULT_" }, \ + {0xC4, "TRANSLATION_FAULT(_7==_IDENTITY)_" }, \ + {0xC8, "PERMISSION_FAULT_" }, \ + {0xD0, "TRANSTAB_BUS_FAULT_" }, \ + {0xD8, "ACCESS_FLAG_" }, \ + {0xE0, "ADDRESS_SIZE_FAULT_IN" }, \ + {0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ + {0xE8, "MEMORY_ATTRIBUTES_FAULT_" }, \ + {0xEC, "MEMORY_ATTRIBUTES_NONCACHEABLE_" }) +#endif /* __TRACE_MALI_MMU_HELPERS */ + +/* trace_mali_mmu_page_fault_grow + * + * Tracepoint about a successful grow of a region due to a GPU page fault + */ +TRACE_EVENT(mali_mmu_page_fault_grow, + TP_PROTO(struct kbase_va_region *reg, struct kbase_fault *fault, + size_t new_pages), + TP_ARGS(reg, fault, new_pages), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(u64, fault_addr) + __field(u64, fault_extra_addr) + __field(size_t, new_pages) + __field(u32, status) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->fault_addr = fault->addr; + __entry->fault_extra_addr = fault->extra_addr; + __entry->new_pages = new_pages; + __entry->status = fault->status; + ), + TP_printk("start=0x%llx fault_addr=0x%llx fault_extra_addr=0x%llx new_pages=%zu raw_fault_status=0x%x decoded_faultstatus=%s exception_type=0x%x,%s%u access_type=0x%x,%s source_id=0x%x", + __entry->start_addr, __entry->fault_addr, + __entry->fault_extra_addr, __entry->new_pages, + __entry->status, + KBASE_MMU_FAULT_STATUS_DECODED_STRING(__entry->status), + KBASE_MMU_FAULT_STATUS_CODE(__entry->status), + KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(__entry->status), + KBASE_MMU_FAULT_STATUS_LEVEL(__entry->status), + KBASE_MMU_FAULT_STATUS_ACCESS(__entry->status) >> 8, + KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(__entry->status), + __entry->status >> 16) +); + + + + +/* + * Just-in-time memory allocation subsystem tracepoints + */ + +/* Just-in-time memory allocation soft-job template. Override the TP_printk + * further if need be. jit_id can be 0. + */ +DECLARE_EVENT_CLASS(mali_jit_softjob_template, + TP_PROTO(struct kbase_va_region *reg, u8 jit_id), + TP_ARGS(reg, jit_id), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(size_t, nr_pages) + __field(size_t, backed_pages) + __field(u8, jit_id) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->nr_pages = reg->nr_pages; + __entry->backed_pages = kbase_reg_current_backed_size(reg); + __entry->jit_id = jit_id; + ), + TP_printk("jit_id=%u start=0x%llx va_pages=0x%zx backed_size=0x%zx", + __entry->jit_id, __entry->start_addr, __entry->nr_pages, + __entry->backed_pages) +); + +/* trace_mali_jit_alloc() + * + * Tracepoint about a just-in-time memory allocation soft-job successfully + * allocating memory + */ +DEFINE_EVENT(mali_jit_softjob_template, mali_jit_alloc, + TP_PROTO(struct kbase_va_region *reg, u8 jit_id), + TP_ARGS(reg, jit_id)); + +/* trace_mali_jit_free() + * + * Tracepoint about memory that was allocated just-in-time being freed + * (which may happen either on free soft-job, or during rollback error + * paths of an allocation soft-job, etc) + * + * Free doesn't immediately have the just-in-time memory allocation ID so + * it's currently suppressed from the output - set jit_id to 0 + */ +DEFINE_EVENT_PRINT(mali_jit_softjob_template, mali_jit_free, + TP_PROTO(struct kbase_va_region *reg, u8 jit_id), + TP_ARGS(reg, jit_id), + TP_printk("start=0x%llx va_pages=0x%zx backed_size=0x%zx", + __entry->start_addr, __entry->nr_pages, __entry->backed_pages)); + +#if MALI_JIT_PRESSURE_LIMIT_BASE +/* trace_mali_jit_report + * + * Tracepoint about the GPU data structure read to form a just-in-time memory + * allocation report, and its calculated physical page usage + */ +TRACE_EVENT(mali_jit_report, + TP_PROTO(struct kbase_jd_atom *katom, struct kbase_va_region *reg, + unsigned int id_idx, u64 read_val, u64 used_pages), + TP_ARGS(katom, reg, id_idx, read_val, used_pages), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(u64, read_val) + __field(u64, used_pages) + __field(unsigned long, flags) + __field(u8, id_idx) + __field(u8, jit_id) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->read_val = read_val; + __entry->used_pages = used_pages; + __entry->flags = reg->flags; + __entry->id_idx = id_idx; + __entry->jit_id = katom->jit_ids[id_idx]; + ), + TP_printk("start=0x%llx jit_ids[%u]=%u read_type='%s' read_val=0x%llx used_pages=%llu", + __entry->start_addr, __entry->id_idx, __entry->jit_id, + __print_symbolic(__entry->flags, + { 0, "address"}, + { KBASE_REG_TILER_ALIGN_TOP, "address with align" }, + { KBASE_REG_HEAP_INFO_IS_SIZE, "size" }, + { KBASE_REG_HEAP_INFO_IS_SIZE | + KBASE_REG_TILER_ALIGN_TOP, + "size with align (invalid)" } + ), + __entry->read_val, __entry->used_pages) +); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +#if (KERNEL_VERSION(4, 1, 0) <= LINUX_VERSION_CODE) +TRACE_DEFINE_ENUM(KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +#endif + +#if MALI_JIT_PRESSURE_LIMIT_BASE +/* trace_mali_jit_report_pressure + * + * Tracepoint about change in physical memory pressure, due to the information + * about a region changing. Examples include: + * - a report on a region that was allocated just-in-time + * - just-in-time allocation of a region + * - free of a region that was allocated just-in-time + */ +TRACE_EVENT(mali_jit_report_pressure, + TP_PROTO(struct kbase_va_region *reg, u64 new_used_pages, + u64 new_pressure, unsigned int flags), + TP_ARGS(reg, new_used_pages, new_pressure, flags), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(u64, used_pages) + __field(u64, new_used_pages) + __field(u64, new_pressure) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->used_pages = reg->used_pages; + __entry->new_used_pages = new_used_pages; + __entry->new_pressure = new_pressure; + __entry->flags = flags; + ), + TP_printk("start=0x%llx old_used_pages=%llu new_used_pages=%llu new_pressure=%llu report_flags=%s", + __entry->start_addr, __entry->used_pages, + __entry->new_used_pages, __entry->new_pressure, + __print_flags(__entry->flags, "|", + { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, + "HAPPENED_ON_ALLOC_OR_FREE" })) +); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +#ifndef __TRACE_SYSGRAPH_ENUM +#define __TRACE_SYSGRAPH_ENUM +/* Enum of sysgraph message IDs */ +enum sysgraph_msg { + SGR_ARRIVE, + SGR_DEP_RES, + SGR_SUBMIT, + SGR_COMPLETE, + SGR_POST, + SGR_ACTIVE, + SGR_INACTIVE +}; +#endif /* __TRACE_SYSGRAPH_ENUM */ + +/* A template for SYSGRAPH events + * + * Most of the sysgraph events contain only one input argument + * which is atom_id therefore they will be using a common template + */ +TRACE_EVENT(sysgraph, + TP_PROTO(enum sysgraph_msg message, unsigned int proc_id, + unsigned int atom_id), + TP_ARGS(message, proc_id, atom_id), + TP_STRUCT__entry( + __field(unsigned int, proc_id) + __field(enum sysgraph_msg, message) + __field(unsigned int, atom_id) + ), + TP_fast_assign( + __entry->proc_id = proc_id; + __entry->message = message; + __entry->atom_id = atom_id; + ), + TP_printk("msg=%u proc_id=%u, param1=%d\n", __entry->message, + __entry->proc_id, __entry->atom_id) +); + +/* A template for SYSGRAPH GPU events + * + * Sysgraph events that record start/complete events + * on GPU also record a js value in addition to the + * atom id. + */ +TRACE_EVENT(sysgraph_gpu, + TP_PROTO(enum sysgraph_msg message, unsigned int proc_id, + unsigned int atom_id, unsigned int js), + TP_ARGS(message, proc_id, atom_id, js), + TP_STRUCT__entry( + __field(unsigned int, proc_id) + __field(enum sysgraph_msg, message) + __field(unsigned int, atom_id) + __field(unsigned int, js) + ), + TP_fast_assign( + __entry->proc_id = proc_id; + __entry->message = message; + __entry->atom_id = atom_id; + __entry->js = js; + ), + TP_printk("msg=%u proc_id=%u, param1=%d, param2=%d\n", + __entry->message, __entry->proc_id, + __entry->atom_id, __entry->js) +); + +/* Tracepoint files get included more than once - protect against multiple + * definition + */ +#undef KBASE_JIT_REPORT_GPU_MEM_SIZE + +/* Size in bytes of the memory surrounding the location used for a just-in-time + * memory allocation report + */ +#define KBASE_JIT_REPORT_GPU_MEM_SIZE (4 * sizeof(u64)) + +/* trace_mali_jit_report_gpu_mem + * + * Tracepoint about the GPU memory nearby the location used for a just-in-time + * memory allocation report + */ +TRACE_EVENT(mali_jit_report_gpu_mem, + TP_PROTO(u64 base_addr, u64 reg_addr, u64 *gpu_mem, unsigned int flags), + TP_ARGS(base_addr, reg_addr, gpu_mem, flags), + TP_STRUCT__entry( + __field(u64, base_addr) + __field(u64, reg_addr) + __array(u64, mem_values, + KBASE_JIT_REPORT_GPU_MEM_SIZE / sizeof(u64)) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->base_addr = base_addr; + __entry->reg_addr = reg_addr; + memcpy(__entry->mem_values, gpu_mem, + sizeof(__entry->mem_values)); + __entry->flags = flags; + ), + TP_printk("start=0x%llx read GPU memory base=0x%llx values=%s report_flags=%s", + __entry->reg_addr, __entry->base_addr, + __print_array(__entry->mem_values, + ARRAY_SIZE(__entry->mem_values), sizeof(u64)), + __print_flags(__entry->flags, "|", + { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, + "HAPPENED_ON_ALLOC_OR_FREE" })) +); + +/* trace_mali_jit_trim_from_region + * + * Tracepoint about trimming physical pages from a region + */ +TRACE_EVENT(mali_jit_trim_from_region, + TP_PROTO(struct kbase_va_region *reg, size_t freed_pages, + size_t old_pages, size_t available_pages, size_t new_pages), + TP_ARGS(reg, freed_pages, old_pages, available_pages, new_pages), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(size_t, freed_pages) + __field(size_t, old_pages) + __field(size_t, available_pages) + __field(size_t, new_pages) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->freed_pages = freed_pages; + __entry->old_pages = old_pages; + __entry->available_pages = available_pages; + __entry->new_pages = new_pages; + ), + TP_printk("start=0x%llx freed_pages=%zu old_pages=%zu available_pages=%zu new_pages=%zu", + __entry->start_addr, __entry->freed_pages, __entry->old_pages, + __entry->available_pages, __entry->new_pages) +); + +/* trace_mali_jit_trim + * + * Tracepoint about total trimmed physical pages + */ +TRACE_EVENT(mali_jit_trim, + TP_PROTO(size_t freed_pages), + TP_ARGS(freed_pages), + TP_STRUCT__entry( + __field(size_t, freed_pages) + ), + TP_fast_assign( + __entry->freed_pages = freed_pages; + ), + TP_printk("freed_pages=%zu", __entry->freed_pages) +); + +#include "debug/mali_kbase_debug_linux_ktrace.h" + +#endif /* _TRACE_MALI_H */ + +#undef TRACE_INCLUDE_PATH +/* lwn.net/Articles/383362 suggests this should remain as '.', and instead + * extend CFLAGS + */ +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mali_linux_trace + +/* This part must be outside protection */ +#include diff --git a/drivers/gpu/arm/b_r26p0/mali_malisw.h b/drivers/gpu/arm/b_r26p0/mali_malisw.h new file mode 100644 index 000000000000..3a4db10bdb3d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_malisw.h @@ -0,0 +1,109 @@ +/* + * + * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Kernel-wide include for common macros and types. + */ + +#ifndef _MALISW_H_ +#define _MALISW_H_ + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) +#define U8_MAX ((u8)~0U) +#define S8_MAX ((s8)(U8_MAX>>1)) +#define S8_MIN ((s8)(-S8_MAX - 1)) +#define U16_MAX ((u16)~0U) +#define S16_MAX ((s16)(U16_MAX>>1)) +#define S16_MIN ((s16)(-S16_MAX - 1)) +#define U32_MAX ((u32)~0U) +#define S32_MAX ((s32)(U32_MAX>>1)) +#define S32_MIN ((s32)(-S32_MAX - 1)) +#define U64_MAX ((u64)~0ULL) +#define S64_MAX ((s64)(U64_MAX>>1)) +#define S64_MIN ((s64)(-S64_MAX - 1)) +#endif /* LINUX_VERSION_CODE */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) +#define SIZE_MAX (~(size_t)0) +#endif /* LINUX_VERSION_CODE */ + +/** + * MIN - Return the lesser of two values. + * + * As a macro it may evaluate its arguments more than once. + * Refer to MAX macro for more details + */ +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +/** + * MAX - Return the greater of two values. + * + * As a macro it may evaluate its arguments more than once. + * If called on the same two arguments as MIN it is guaranteed to return + * the one that MIN didn't return. This is significant for types where not + * all values are comparable e.g. NaNs in floating-point types. But if you want + * to retrieve the min and max of two values, consider using a conditional swap + * instead. + */ +#define MAX(x, y) ((x) < (y) ? (y) : (x)) + +/** + * @hideinitializer + * Function-like macro for suppressing unused variable warnings. Where possible + * such variables should be removed; this macro is present for cases where we + * much support API backwards compatibility. + */ +#define CSTD_UNUSED(x) ((void)(x)) + +/** + * @hideinitializer + * Function-like macro for use where "no behavior" is desired. This is useful + * when compile time macros turn a function-like macro in to a no-op, but + * where having no statement is otherwise invalid. + */ +#define CSTD_NOP(...) ((void)#__VA_ARGS__) + +/** + * @hideinitializer + * Function-like macro for stringizing a single level macro. + * @code + * #define MY_MACRO 32 + * CSTD_STR1( MY_MACRO ) + * > "MY_MACRO" + * @endcode + */ +#define CSTD_STR1(x) #x + +/** + * @hideinitializer + * Function-like macro for stringizing a macro's value. This should not be used + * if the macro is defined in a way which may have no value; use the + * alternative @c CSTD_STR2N macro should be used instead. + * @code + * #define MY_MACRO 32 + * CSTD_STR2( MY_MACRO ) + * > "32" + * @endcode + */ +#define CSTD_STR2(x) CSTD_STR1(x) + +#endif /* _MALISW_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mali_power_gpu_frequency_trace.c b/drivers/gpu/arm/b_r26p0/mali_power_gpu_frequency_trace.c new file mode 100644 index 000000000000..b6fb5a094fab --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_power_gpu_frequency_trace.c @@ -0,0 +1,27 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* Create the trace point if not configured in kernel */ +#ifndef CONFIG_TRACE_POWER_GPU_FREQUENCY +#define CREATE_TRACE_POINTS +#include "mali_power_gpu_frequency_trace.h" +#endif diff --git a/drivers/gpu/arm/b_r26p0/mali_power_gpu_frequency_trace.h b/drivers/gpu/arm/b_r26p0/mali_power_gpu_frequency_trace.h new file mode 100644 index 000000000000..3b90ae437db9 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_power_gpu_frequency_trace.h @@ -0,0 +1,69 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _TRACE_POWER_GPU_FREQUENCY_MALI +#define _TRACE_POWER_GPU_FREQUENCY_MALI +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM power +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mali_power_gpu_frequency_trace +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . + +#if !defined(_TRACE_POWER_GPU_FREQUENCY_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_POWER_GPU_FREQUENCY_H + +#include + +DECLARE_EVENT_CLASS(gpu, + + TP_PROTO(unsigned int state, unsigned int gpu_id), + + TP_ARGS(state, gpu_id), + + TP_STRUCT__entry( + __field( u32, state ) + __field( u32, gpu_id ) + ), + + TP_fast_assign( + __entry->state = state; + __entry->gpu_id = gpu_id; + ), + + TP_printk("state=%lu gpu_id=%lu", (unsigned long)__entry->state, + (unsigned long)__entry->gpu_id) +); + +DEFINE_EVENT(gpu, gpu_frequency, + + TP_PROTO(unsigned int frequency, unsigned int gpu_id), + + TP_ARGS(frequency, gpu_id) +); + +#endif /* _TRACE_POWER_GPU_FREQUENCY_H */ + +/* This part must be outside protection */ +#include diff --git a/drivers/gpu/arm/b_r26p0/mali_uk.h b/drivers/gpu/arm/b_r26p0/mali_uk.h new file mode 100644 index 000000000000..8f4744fba0a4 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mali_uk.h @@ -0,0 +1,147 @@ +/* + * + * (C) COPYRIGHT 2010, 2012-2015, 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_uk.h + * Types and definitions that are common across OSs for both the user + * and kernel side of the User-Kernel interface. + */ + +#ifndef _UK_H_ +#define _UK_H_ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @defgroup uk_api User-Kernel Interface API + * + * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device + * drivers developed as part of the Midgard DDK. Currently that includes the Base driver. + * + * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent + * kernel-side API (UKK) via an OS-specific communication mechanism. + * + * This API is internal to the Midgard DDK and is not exposed to any applications. + * + * @{ + */ + +/** + * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The + * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this + * identifier to select a UKK client to the uku_open() function. + * + * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id + * enumeration and the uku_open() implemenation for the various OS ports need to be updated to + * provide a mapping of the identifier to the OS specific device name. + * + */ +enum uk_client_id { + /** + * Value used to identify the Base driver UK client. + */ + UK_CLIENT_MALI_T600_BASE, + + /** The number of uk clients supported. This must be the last member of the enum */ + UK_CLIENT_COUNT +}; + +/* MALI_SEC_INTEGRATION */ +/** + * Each function callable through the UK interface has a unique number. + * Functions provided by UK clients start from number UK_FUNC_ID. + * Numbers below UK_FUNC_ID are used for internal UK functions. + */ +enum uk_func { + UKP_FUNC_ID_CHECK_VERSION, /**< UKK Core internal function */ + /** + * Each UK client numbers the functions they provide starting from + * number UK_FUNC_ID. This number is then eventually assigned to the + * id field of the union uk_header structure when preparing to make a + * UK call. See your UK client for a list of their function numbers. + */ + UK_FUNC_ID = 512 +}; + +/** + * Arguments for a UK call are stored in a structure. This structure consists + * of a fixed size header and a payload. The header carries a 32-bit number + * identifying the UK function to be called (see uk_func). When the UKK client + * receives this header and executed the requested UK function, it will use + * the same header to store the result of the function in the form of a + * int return code. The size of this structure is such that the + * first member of the payload following the header can be accessed efficiently + * on a 32 and 64-bit kernel and the structure has the same size regardless + * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined + * accordingly in the OS specific mali_uk_os.h header file. + */ +union uk_header { + /** + * 32-bit number identifying the UK function to be called. + * Also see uk_func. + */ + u32 id; + /** + * The int return code returned by the called UK function. + * See the specification of the particular UK function you are + * calling for the meaning of the error codes returned. All + * UK functions return 0 on success. + */ + u32 ret; + /* + * Used to ensure 64-bit alignment of this union. Do not remove. + * This field is used for padding and does not need to be initialized. + */ + u64 sizer; +}; + +/** + * This structure carries a 16-bit major and minor number and is sent along with an internal UK call + * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side. + */ +struct uku_version_check_args { + union uk_header header; + /**< UK call header */ + u16 major; + /**< This field carries the user-side major version on input and the kernel-side major version on output */ + u16 minor; + /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */ + u8 padding[4]; +}; /* MALI_SEC_INTEGRATION */ + +/** @} end group uk_api */ + +/** @} *//* end group base_api */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* _UK_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c new file mode 100644 index 000000000000..cc233c993b10 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c @@ -0,0 +1,432 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Base kernel MMU management specific for Job Manager GPU. + */ + +#include +#include +#include +#include +#include +#include "../mali_kbase_mmu_internal.h" +#include "mali_kbase_device_internal.h" + +void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, + struct kbase_mmu_setup * const setup) +{ + /* Set up the required caching policies at the correct indices + * in the memattr register. + */ + setup->memattr = + (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << + (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | + (AS_MEMATTR_FORCE_TO_CACHE_ALL << + (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | + (AS_MEMATTR_WRITE_ALLOC << + (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << + (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | + (AS_MEMATTR_AARCH64_OUTER_WA << + (AS_MEMATTR_INDEX_OUTER_WA * 8)) | + (AS_MEMATTR_AARCH64_NON_CACHEABLE << + (AS_MEMATTR_INDEX_NON_CACHEABLE * 8)); + + setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK; + setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; +} + +void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, struct kbase_fault *fault) +{ + struct kbase_device *const kbdev = kctx->kbdev; + u32 const status = fault->status; + u32 const exception_type = (status & 0xFF); + u32 const exception_data = (status >> 8) & 0xFFFFFF; + int const as_no = as->number; + unsigned long flags; + + /* terminal fault, print info about the fault */ + dev_err(kbdev->dev, + "GPU bus fault in AS%d at VA 0x%016llX\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "exception data 0x%X\n" + "pid: %d\n", + as_no, fault->addr, + status, + exception_type, kbase_gpu_exception_name(exception_type), + exception_data, + kctx->pid); + + /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter + * dumping AS transaction begin + */ + mutex_lock(&kbdev->mmu_hw_mutex); + + /* Set the MMU into unmapped mode */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_mmu_disable(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + mutex_unlock(&kbdev->mmu_hw_mutex); + /* AS transaction end */ + + kbase_mmu_hw_clear_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); +} + +/** + * The caller must ensure it's retained the ctx to prevent it from being + * scheduled out whilst it's being worked on. + */ +void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, const char *reason_str, + struct kbase_fault *fault) +{ + unsigned long flags; + u32 exception_type; + u32 access_type; + u32 source_id; + int as_no; + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + + as_no = as->number; + kbdev = kctx->kbdev; + js_devdata = &kbdev->js_data; + + /* Make sure the context was active */ + if (WARN_ON(atomic_read(&kctx->refcount) <= 0)) + return; + + /* decode the fault status */ + exception_type = fault->status & 0xFF; + access_type = (fault->status >> 8) & 0x3; + source_id = (fault->status >> 16); + + /* terminal fault, print info about the fault */ + dev_err(kbdev->dev, + "Unhandled Page fault in AS%d at VA 0x%016llX\n" + "Reason: %s\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X\n" + "pid: %d\n", + as_no, fault->addr, + reason_str, + fault->status, + exception_type, kbase_gpu_exception_name(exception_type), + access_type, kbase_gpu_access_type_name(fault->status), + source_id, + kctx->pid); + + /* MALI_SEC_INTEGRATION */ + if (kbdev->vendor_callbacks->update_status) + kbdev->vendor_callbacks->update_status(kbdev, "completion_code", exception_type); + + /* MALI_SEC_INTEGRATION */ + if (kbdev->vendor_callbacks->debug_pagetable_info) + kbdev->vendor_callbacks->debug_pagetable_info(kctx, fault->addr); + + /* hardware counters dump fault handling */ + if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && + (kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_DUMPING)) { + if ((fault->addr >= kbdev->hwcnt.addr) && + (fault->addr < (kbdev->hwcnt.addr + + kbdev->hwcnt.addr_bytes))) + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; + } + + /* Stop the kctx from submitting more jobs and cause it to be scheduled + * out/rescheduled - this will occur on releasing the context's refcount + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbasep_js_clear_submit_allowed(js_devdata, kctx); + + /* Kill any running jobs from the context. Submit is disallowed, so no + * more jobs from this context can appear in the job slots from this + * point on + */ + kbase_backend_jm_kill_running_jobs_from_kctx(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* AS transaction begin */ + mutex_lock(&kbdev->mmu_hw_mutex); + + /* switch to UNMAPPED mode, will abort all jobs and stop + * any hw counter dumping + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_mmu_disable(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + mutex_unlock(&kbdev->mmu_hw_mutex); + + /* AS transaction end */ + /* Clear down the fault */ + kbase_mmu_hw_clear_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); +} + +void kbase_mmu_interrupt_process(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_as *as, + struct kbase_fault *fault) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + dev_dbg(kbdev->dev, + "Entering %s kctx %p, as %p\n", + __func__, (void *)kctx, (void *)as); + + if (!kctx) { + dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", + kbase_as_has_bus_fault(as, fault) ? + "Bus error" : "Page fault", + as->number, fault->addr); + + /* Since no ctx was found, the MMU must be disabled. */ + WARN_ON(as->current_setup.transtab); + + if (kbase_as_has_bus_fault(as, fault)) { + kbase_mmu_hw_clear_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + } else if (kbase_as_has_page_fault(as, fault)) { + kbase_mmu_hw_clear_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + } + + return; + } + + if (kbase_as_has_bus_fault(as, fault)) { + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + /* + * hw counters dumping in progress, signal the + * other thread that it failed + */ + if ((kbdev->hwcnt.kctx == kctx) && + (kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_DUMPING)) + kbdev->hwcnt.backend.state = + KBASE_INSTR_STATE_FAULT; + + /* + * Stop the kctx from submitting more jobs and cause it + * to be scheduled out/rescheduled when all references + * to it are released + */ + kbasep_js_clear_submit_allowed(js_devdata, kctx); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) + dev_warn(kbdev->dev, + "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", + as->number, fault->addr, + fault->extra_addr); + else + dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", + as->number, fault->addr); + + /* + * We need to switch to UNMAPPED mode - but we do this in a + * worker so that we can sleep + */ + WARN_ON(!queue_work(as->pf_wq, &as->work_busfault)); + atomic_inc(&kbdev->faults_pending); + } else { + WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault)); + atomic_inc(&kbdev->faults_pending); + } + + dev_dbg(kbdev->dev, + "Leaving %s kctx %p, as %p\n", + __func__, (void *)kctx, (void *)as); +} + +static void validate_protected_page_fault(struct kbase_device *kbdev) +{ + /* GPUs which support (native) protected mode shall not report page + * fault addresses unless it has protected debug mode and protected + * debug mode is turned on + */ + u32 protected_debug_mode = 0; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { + protected_debug_mode = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_STATUS)) & GPU_DBGEN; + } + + if (!protected_debug_mode) { + /* fault_addr should never be reported in protected mode. + * However, we just continue by printing an error message + */ + dev_err(kbdev->dev, "Fault address reported in protected mode\n"); + } +} + +void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) +{ + const int num_as = 16; + const int busfault_shift = MMU_PAGE_FAULT_FLAGS; + const int pf_shift = 0; + const unsigned long as_bit_mask = (1UL << num_as) - 1; + unsigned long flags; + u32 new_mask; + u32 tmp, bf_bits, pf_bits; + bool gpu_lost = false; + + dev_dbg(kbdev->dev, "Entering %s irq_stat %u\n", + __func__, irq_stat); + /* bus faults */ + bf_bits = (irq_stat >> busfault_shift) & as_bit_mask; + /* page faults (note: Ignore ASes with both pf and bf) */ + pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits; + + if (WARN_ON(kbdev == NULL)) + return; + + /* remember current mask */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + /* mask interrupts for now */ + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); + + while (bf_bits | pf_bits) { + struct kbase_as *as; + int as_no; + struct kbase_context *kctx; + struct kbase_fault *fault; + + /* + * the while logic ensures we have a bit set, no need to check + * for not-found here + */ + as_no = ffs(bf_bits | pf_bits) - 1; + as = &kbdev->as[as_no]; + + /* find the fault type */ + if (bf_bits & (1 << as_no)) + fault = &as->bf_data; + else + fault = &as->pf_data; + + /* + * Refcount the kctx ASAP - it shouldn't disappear anyway, since + * Bus/Page faults _should_ only occur whilst jobs are running, + * and a job causing the Bus/Page fault shouldn't complete until + * the MMU is updated + */ + kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_no); + + /* find faulting address */ + fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTADDRESS_HI)); + fault->addr <<= 32; + fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTADDRESS_LO)); + /* Mark the fault protected or not */ + fault->protected_mode = kbdev->protected_mode; + + if (kbdev->protected_mode && fault->addr) { + /* check if address reporting is allowed */ + validate_protected_page_fault(kbdev); + } + + /* report the fault to debugfs */ + kbase_as_fault_debugfs_new(kbdev, as_no); + + /* record the fault status */ + fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTSTATUS)); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { + fault->extra_addr = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); + fault->extra_addr <<= 32; + fault->extra_addr |= kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); + } + + /* check if we still have GPU */ + gpu_lost = kbase_is_gpu_lost(kbdev); + if (gpu_lost) { + if (kctx) + kbasep_js_runpool_release_ctx(kbdev, kctx); + return; + } + + if (kbase_as_has_bus_fault(as, fault)) { + /* Mark bus fault as handled. + * Note that a bus fault is processed first in case + * where both a bus fault and page fault occur. + */ + bf_bits &= ~(1UL << as_no); + + /* remove the queued BF (and PF) from the mask */ + new_mask &= ~(MMU_BUS_ERROR(as_no) | + MMU_PAGE_FAULT(as_no)); + } else { + /* Mark page fault as handled */ + pf_bits &= ~(1UL << as_no); + + /* remove the queued PF from the mask */ + new_mask &= ~MMU_PAGE_FAULT(as_no); + } + + /* Process the interrupt for this address space */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_mmu_interrupt_process(kbdev, kctx, as, fault); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + + /* reenable interrupts */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + new_mask |= tmp; + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); + + dev_dbg(kbdev->dev, "Leaving %s irq_stat %u\n", + __func__, irq_stat); +} + +int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, + struct kbase_va_region *const reg) +{ + dev_dbg(kctx->kbdev->dev, + "Switching to incremental rendering for region %p\n", + (void *)reg); + return kbase_job_slot_softstop_start_rp(kctx, reg); +} diff --git a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c new file mode 100644 index 000000000000..a919e0302a66 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c @@ -0,0 +1,2249 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_mmu.c + * Base kernel MMU management. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#define KBASE_MMU_PAGE_ENTRIES 512 + +/* MALI_SEC_INTEGRATION */ +#include + +/** + * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches. + * @kctx: The KBase context. + * @vpfn: The virtual page frame number to start the flush on. + * @nr: The number of pages to flush. + * @sync: Set if the operation should be synchronous or not. + * + * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs. + * + * If sync is not set then transactions still in flight when the flush is issued + * may use the old page tables and the data they write will not be written out + * to memory, this function returns after the flush has been issued but + * before all accesses which might effect the flushed region have completed. + * + * If sync is set then accesses in the flushed region will be drained + * before data is flush and invalidated through L1, L2 and into memory, + * after which point this function will return. + */ +static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, + u64 vpfn, size_t nr, bool sync); + +/** + * kbase_mmu_flush_invalidate_no_ctx() - Flush and invalidate the GPU caches. + * @kbdev: Device pointer. + * @vpfn: The virtual page frame number to start the flush on. + * @nr: The number of pages to flush. + * @sync: Set if the operation should be synchronous or not. + * @as_nr: GPU address space number for which flush + invalidate is required. + * + * This is used for MMU tables which do not belong to a user space context. + */ +static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, + u64 vpfn, size_t nr, bool sync, int as_nr); + +/** + * kbase_mmu_sync_pgd - sync page directory to memory + * @kbdev: Device pointer. + * @handle: Address of DMA region. + * @size: Size of the region to sync. + * + * This should be called after each page directory update. + */ + +static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, + dma_addr_t handle, size_t size) +{ + /* If page table is not coherent then ensure the gpu can read + * the pages from memory + */ + if (kbdev->system_coherency != COHERENCY_ACE) + dma_sync_single_for_device(kbdev->dev, handle, size, + DMA_TO_DEVICE); +} + +/* + * Definitions: + * - PGD: Page Directory. + * - PTE: Page Table Entry. A 64bit value pointing to the next + * level of translation + * - ATE: Address Translation Entry. A 64bit value pointing to + * a 4kB physical page. + */ + +static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int group_id); + +/** + * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to + * a region on a GPU page fault + * + * @reg: The region that will be backed with more pages + * @fault_rel_pfn: PFN of the fault relative to the start of the region + * + * This calculates how much to increase the backing of a region by, based on + * where a GPU page fault occurred and the flags in the region. + * + * This can be more than the minimum number of pages that would reach + * @fault_rel_pfn, for example to reduce the overall rate of page fault + * interrupts on a region, or to ensure that the end address is aligned. + * + * Return: the number of backed pages to increase by + */ +static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, + struct kbase_va_region *reg, size_t fault_rel_pfn) +{ + size_t multiple = reg->extent; + size_t reg_current_size = kbase_reg_current_backed_size(reg); + size_t minimum_extra = fault_rel_pfn - reg_current_size + 1; + size_t remainder; + + if (!multiple) { + dev_warn(kbdev->dev, + "VA Region 0x%llx extent was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n", + ((unsigned long long)reg->start_pfn) << PAGE_SHIFT); + return minimum_extra; + } + + /* Calculate the remainder to subtract from minimum_extra to make it + * the desired (rounded down) multiple of the extent. + * Depending on reg's flags, the base used for calculating multiples is + * different + */ + + /* multiple is based from the current backed size, even if the + * current backed size/pfn for end of committed memory are not + * themselves aligned to multiple + */ + remainder = minimum_extra % multiple; + + if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { + /* multiple is based from the top of the initial commit, which + * has been allocated in such a way that (start_pfn + + * initial_commit) is already aligned to multiple. Hence the + * pfn for the end of committed memory will also be aligned to + * multiple + */ + size_t initial_commit = reg->initial_commit; + + if (fault_rel_pfn < initial_commit) { + /* this case is just to catch in case it's been + * recommitted by userspace to be smaller than the + * initial commit + */ + minimum_extra = initial_commit - reg_current_size; + remainder = 0; + } else { + /* same as calculating + * (fault_rel_pfn - initial_commit + 1) + */ + size_t pages_after_initial = minimum_extra + + reg_current_size - initial_commit; + + remainder = pages_after_initial % multiple; + } + } + + if (remainder == 0) + return minimum_extra; + + return minimum_extra + multiple - remainder; +} + +#ifdef CONFIG_MALI_CINSTR_GWT +static void kbase_gpu_mmu_handle_write_faulting_as( + struct kbase_device *kbdev, + struct kbase_as *faulting_as, + u64 start_pfn, size_t nr, u32 op) +{ + mutex_lock(&kbdev->mmu_hw_mutex); + + kbase_mmu_hw_clear_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + kbase_mmu_hw_do_operation(kbdev, faulting_as, start_pfn, + nr, op, 1); + + mutex_unlock(&kbdev->mmu_hw_mutex); + + kbase_mmu_hw_enable_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); +} + +static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, + struct kbase_as *faulting_as) +{ + struct kbasep_gwt_list_element *pos; + struct kbase_va_region *region; + struct kbase_device *kbdev; + struct kbase_fault *fault; + u64 fault_pfn, pfn_offset; + u32 op; + int ret; + int as_no; + + as_no = faulting_as->number; + kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); + fault = &faulting_as->pf_data; + fault_pfn = fault->addr >> PAGE_SHIFT; + + kbase_gpu_vm_lock(kctx); + + /* Find region and check if it should be writable. */ + region = kbase_region_tracker_find_region_enclosing_address(kctx, + fault->addr); + if (kbase_is_region_invalid_or_free(region)) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory is not mapped on the GPU", + &faulting_as->pf_data); + return; + } + + if (!(region->flags & KBASE_REG_GPU_WR)) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Region does not have write permissions", + &faulting_as->pf_data); + return; + } + + /* Capture addresses of faulting write location + * for job dumping if write tracking is enabled. + */ + if (kctx->gwt_enabled) { + u64 page_addr = fault->addr & PAGE_MASK; + bool found = false; + /* Check if this write was already handled. */ + list_for_each_entry(pos, &kctx->gwt_current_list, link) { + if (page_addr == pos->page_addr) { + found = true; + break; + } + } + + if (!found) { + pos = kmalloc(sizeof(*pos), GFP_KERNEL); + if (pos) { + pos->region = region; + pos->page_addr = page_addr; + pos->num_pages = 1; + list_add(&pos->link, &kctx->gwt_current_list); + } else { + dev_warn(kbdev->dev, "kmalloc failure"); + } + } + } + + pfn_offset = fault_pfn - region->start_pfn; + /* Now make this faulting page writable to GPU. */ + ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn, + &kbase_get_gpu_phy_pages(region)[pfn_offset], + 1, region->flags, region->gpu_alloc->group_id); + + /* flush L2 and unlock the VA (resumes the MMU) */ + op = AS_COMMAND_FLUSH_PT; + + kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, + fault_pfn, 1, op); + + kbase_gpu_vm_unlock(kctx); +} + +static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx, + struct kbase_as *faulting_as) +{ + struct kbase_fault *fault = &faulting_as->pf_data; + + switch (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault->status)) { + case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: + case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: + kbase_gpu_mmu_handle_write_fault(kctx, faulting_as); + break; + case AS_FAULTSTATUS_ACCESS_TYPE_EX: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Execute Permission fault", fault); + break; + case AS_FAULTSTATUS_ACCESS_TYPE_READ: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Read Permission fault", fault); + break; + default: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Unknown Permission fault", fault); + break; + } +} +#endif + +#define MAX_POOL_LEVEL 2 + +/** + * page_fault_try_alloc - Try to allocate memory from a context pool + * @kctx: Context pointer + * @region: Region to grow + * @new_pages: Number of 4 kB pages to allocate + * @pages_to_grow: Pointer to variable to store number of outstanding pages on + * failure. This can be either 4 kB or 2 MB pages, depending on + * the number of pages requested. + * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true + * for 2 MB, false for 4 kB. + * @prealloc_sas: Pointer to kbase_sub_alloc structures + * + * This function will try to allocate as many pages as possible from the context + * pool, then if required will try to allocate the remaining pages from the + * device pool. + * + * This function will not allocate any new memory beyond that that is already + * present in the context or device pools. This is because it is intended to be + * called with the vm_lock held, which could cause recursive locking if the + * allocation caused the out-of-memory killer to run. + * + * If 2 MB pages are enabled and new_pages is >= 2 MB then pages_to_grow will be + * a count of 2 MB pages, otherwise it will be a count of 4 kB pages. + * + * Return: true if successful, false on failure + */ +static bool page_fault_try_alloc(struct kbase_context *kctx, + struct kbase_va_region *region, size_t new_pages, + int *pages_to_grow, bool *grow_2mb_pool, + struct kbase_sub_alloc **prealloc_sas) +{ + struct tagged_addr *gpu_pages[MAX_POOL_LEVEL] = {NULL}; + struct tagged_addr *cpu_pages[MAX_POOL_LEVEL] = {NULL}; + size_t pages_alloced[MAX_POOL_LEVEL] = {0}; + struct kbase_mem_pool *pool, *root_pool; + int pool_level = 0; + bool alloc_failed = false; + size_t pages_still_required; + + if (WARN_ON(region->gpu_alloc->group_id >= + MEMORY_GROUP_MANAGER_NR_GROUPS)) { + /* Do not try to grow the memory pool */ + *pages_to_grow = 0; + return false; + } + +#ifdef CONFIG_MALI_2MB_ALLOC + if (new_pages >= (SZ_2M / SZ_4K)) { + root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id]; + *grow_2mb_pool = true; + } else { +#endif + root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id]; + *grow_2mb_pool = false; +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + + if (region->gpu_alloc != region->cpu_alloc) + new_pages *= 2; + + pages_still_required = new_pages; + + /* Determine how many pages are in the pools before trying to allocate. + * Don't attempt to allocate & free if the allocation can't succeed. + */ + for (pool = root_pool; pool != NULL; pool = pool->next_pool) { + size_t pool_size_4k; + + kbase_mem_pool_lock(pool); + + pool_size_4k = kbase_mem_pool_size(pool) << pool->order; + if (pool_size_4k >= pages_still_required) + pages_still_required = 0; + else + pages_still_required -= pool_size_4k; + + kbase_mem_pool_unlock(pool); + + if (!pages_still_required) + break; + } + + if (pages_still_required) { + /* Insufficient pages in pools. Don't try to allocate - just + * request a grow. + */ + *pages_to_grow = pages_still_required; + + return false; + } + + /* Since we've dropped the pool locks, the amount of memory in the pools + * may change between the above check and the actual allocation. + */ + pool = root_pool; + for (pool_level = 0; pool_level < MAX_POOL_LEVEL; pool_level++) { + size_t pool_size_4k; + size_t pages_to_alloc_4k; + size_t pages_to_alloc_4k_per_alloc; + + kbase_mem_pool_lock(pool); + + /* Allocate as much as possible from this pool*/ + pool_size_4k = kbase_mem_pool_size(pool) << pool->order; + pages_to_alloc_4k = MIN(new_pages, pool_size_4k); + if (region->gpu_alloc == region->cpu_alloc) + pages_to_alloc_4k_per_alloc = pages_to_alloc_4k; + else + pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1; + + pages_alloced[pool_level] = pages_to_alloc_4k; + if (pages_to_alloc_4k) { + gpu_pages[pool_level] = + kbase_alloc_phy_pages_helper_locked( + region->gpu_alloc, pool, + pages_to_alloc_4k_per_alloc, + &prealloc_sas[0]); + + if (!gpu_pages[pool_level]) { + alloc_failed = true; + } else if (region->gpu_alloc != region->cpu_alloc) { + cpu_pages[pool_level] = + kbase_alloc_phy_pages_helper_locked( + region->cpu_alloc, pool, + pages_to_alloc_4k_per_alloc, + &prealloc_sas[1]); + + if (!cpu_pages[pool_level]) + alloc_failed = true; + } + } + + kbase_mem_pool_unlock(pool); + + if (alloc_failed) { + WARN_ON(!new_pages); + WARN_ON(pages_to_alloc_4k >= new_pages); + WARN_ON(pages_to_alloc_4k_per_alloc >= new_pages); + break; + } + + new_pages -= pages_to_alloc_4k; + + if (!new_pages) + break; + + pool = pool->next_pool; + if (!pool) + break; + } + + if (new_pages) { + /* Allocation was unsuccessful */ + int max_pool_level = pool_level; + + pool = root_pool; + + /* Free memory allocated so far */ + for (pool_level = 0; pool_level <= max_pool_level; + pool_level++) { + kbase_mem_pool_lock(pool); + + if (region->gpu_alloc != region->cpu_alloc) { + if (pages_alloced[pool_level] && + cpu_pages[pool_level]) + kbase_free_phy_pages_helper_locked( + region->cpu_alloc, + pool, cpu_pages[pool_level], + pages_alloced[pool_level]); + } + + if (pages_alloced[pool_level] && gpu_pages[pool_level]) + kbase_free_phy_pages_helper_locked( + region->gpu_alloc, + pool, gpu_pages[pool_level], + pages_alloced[pool_level]); + + kbase_mem_pool_unlock(pool); + + pool = pool->next_pool; + } + + /* + * If the allocation failed despite there being enough memory in + * the pool, then just fail. Otherwise, try to grow the memory + * pool. + */ + if (alloc_failed) + *pages_to_grow = 0; + else + *pages_to_grow = new_pages; + + return false; + } + + /* Allocation was successful. No pages to grow, return success. */ + *pages_to_grow = 0; + + return true; +} + +/* Small wrapper function to factor out GPU-dependent context releasing */ +static void release_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + kbasep_js_runpool_release_ctx(kbdev, kctx); +} + +void page_fault_worker(struct work_struct *data) +{ + u64 fault_pfn; + u32 fault_status; + size_t new_pages; + size_t fault_rel_pfn; + struct kbase_as *faulting_as; + int as_no; + struct kbase_context *kctx; + struct kbase_device *kbdev; + struct kbase_va_region *region; + struct kbase_fault *fault; + int err; + bool grown = false; + int pages_to_grow; + bool grow_2mb_pool; + struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; + int i; + size_t current_backed_size; +#if MALI_JIT_PRESSURE_LIMIT_BASE + size_t pages_trimmed = 0; +#endif + + faulting_as = container_of(data, struct kbase_as, work_pagefault); + fault = &faulting_as->pf_data; + fault_pfn = fault->addr >> PAGE_SHIFT; + as_no = faulting_as->number; + + kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); + dev_dbg(kbdev->dev, + "Entering %s %p, fault_pfn %lld, as_no %d\n", + __func__, (void *)data, fault_pfn, as_no); + /* MALI_SEC_INTEGRATION */ + /* clear the type to mark we've arrived in the fault worker */ + //faulting_as->fault_type = KBASE_MMU_FAULT_TYPE_UNKNOWN; + + /* Grab the context that was already refcounted in kbase_mmu_interrupt() + * Therefore, it cannot be scheduled out of this AS until we explicitly + * release it + */ + kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no); + if (!kctx) { + atomic_dec(&kbdev->faults_pending); + return; + } + + KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); + +#if MALI_JIT_PRESSURE_LIMIT_BASE + mutex_lock(&kctx->jctx.lock); +#endif + + if (unlikely(fault->protected_mode)) { + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Protected mode fault", fault); + kbase_mmu_hw_clear_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + + goto fault_done; + } + + fault_status = fault->status; + switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) { + + case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT: + /* need to check against the region to handle this one */ + break; + + case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT: +#ifdef CONFIG_MALI_CINSTR_GWT + /* If GWT was ever enabled then we need to handle + * write fault pages even if the feature was disabled later. + */ + if (kctx->gwt_was_enabled) { + kbase_gpu_mmu_handle_permission_fault(kctx, + faulting_as); + goto fault_done; + } +#endif + + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Permission failure", fault); + goto fault_done; + + case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Translation table bus fault", fault); + goto fault_done; + + case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG: + /* nothing to do, but we don't expect this fault currently */ + dev_warn(kbdev->dev, "Access flag unexpectedly set"); + goto fault_done; + + case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Address size fault", fault); + else + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Unknown fault code", fault); + goto fault_done; + + case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory attributes fault", fault); + else + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Unknown fault code", fault); + goto fault_done; + + default: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Unknown fault code", fault); + goto fault_done; + } + +#ifdef CONFIG_MALI_2MB_ALLOC + /* Preallocate memory for the sub-allocation structs if necessary */ + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { + prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); + if (!prealloc_sas[i]) { + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Failed pre-allocating memory for sub-allocations' metadata", + fault); + goto fault_done; + } + } +#endif /* CONFIG_MALI_2MB_ALLOC */ + +page_fault_retry: + /* so we have a translation fault, + * let's see if it is for growable memory + */ + kbase_gpu_vm_lock(kctx); + + region = kbase_region_tracker_find_region_enclosing_address(kctx, + fault->addr); + if (kbase_is_region_invalid_or_free(region)) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory is not mapped on the GPU", fault); + goto fault_done; + } + + if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "DMA-BUF is not mapped on the GPU", fault); + goto fault_done; + } + + if (region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Bad physical memory group ID", fault); + goto fault_done; + } + + if ((region->flags & GROWABLE_FLAGS_REQUIRED) + != GROWABLE_FLAGS_REQUIRED) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory is not growable", fault); + goto fault_done; + } + + if ((region->flags & KBASE_REG_DONT_NEED)) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Don't need memory can't be grown", fault); + goto fault_done; + } + + /* find the size we need to grow it by + * we know the result fit in a size_t due to + * kbase_region_tracker_find_region_enclosing_address + * validating the fault_address to be within a size_t from the start_pfn + */ + fault_rel_pfn = fault_pfn - region->start_pfn; + + current_backed_size = kbase_reg_current_backed_size(region); + + if (fault_rel_pfn < current_backed_size) { + dev_dbg(kbdev->dev, + "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", + fault->addr, region->start_pfn, + region->start_pfn + + current_backed_size); + + mutex_lock(&kbdev->mmu_hw_mutex); + + kbase_mmu_hw_clear_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + /* [1] in case another page fault occurred while we were + * handling the (duplicate) page fault we need to ensure we + * don't loose the other page fault as result of us clearing + * the MMU IRQ. Therefore, after we clear the MMU IRQ we send + * an UNLOCK command that will retry any stalled memory + * transaction (which should cause the other page fault to be + * raised again). + */ + kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, + AS_COMMAND_UNLOCK, 1); + + mutex_unlock(&kbdev->mmu_hw_mutex); + + kbase_mmu_hw_enable_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + kbase_gpu_vm_unlock(kctx); + + goto fault_done; + } + + new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn); + + /* cap to max vsize */ + new_pages = min(new_pages, region->nr_pages - current_backed_size); + dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault\n", + new_pages); + + if (new_pages == 0) { + mutex_lock(&kbdev->mmu_hw_mutex); + + /* Duplicate of a fault we've already handled, nothing to do */ + kbase_mmu_hw_clear_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + /* See comment [1] about UNLOCK usage */ + kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, + AS_COMMAND_UNLOCK, 1); + + mutex_unlock(&kbdev->mmu_hw_mutex); + + kbase_mmu_hw_enable_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + kbase_gpu_vm_unlock(kctx); + goto fault_done; + } + + pages_to_grow = 0; + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if ((region->flags & KBASE_REG_ACTIVE_JIT_ALLOC) && !pages_trimmed) { + kbase_jit_request_phys_increase(kctx, new_pages); + pages_trimmed = new_pages; + } +#endif + + spin_lock(&kctx->mem_partials_lock); + grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow, + &grow_2mb_pool, prealloc_sas); + spin_unlock(&kctx->mem_partials_lock); + + if (grown) { + u64 pfn_offset; + u32 op; + + /* alloc success */ + WARN_ON(kbase_reg_current_backed_size(region) > + region->nr_pages); + + /* set up the new pages */ + pfn_offset = kbase_reg_current_backed_size(region) - new_pages; + /* + * Note: + * Issuing an MMU operation will unlock the MMU and cause the + * translation to be replayed. If the page insertion fails then + * rather then trying to continue the context should be killed + * so the no_flush version of insert_pages is used which allows + * us to unlock the MMU as we see fit. + */ + err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu, + region->start_pfn + pfn_offset, + &kbase_get_gpu_phy_pages(region)[pfn_offset], + new_pages, region->flags, region->gpu_alloc->group_id); + if (err) { + kbase_free_phy_pages_helper(region->gpu_alloc, + new_pages); + if (region->gpu_alloc != region->cpu_alloc) + kbase_free_phy_pages_helper(region->cpu_alloc, + new_pages); + kbase_gpu_vm_unlock(kctx); + /* The locked VA region will be unlocked and the cache + * invalidated in here + */ + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Page table update failure", fault); + goto fault_done; + } + KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no, + (u64)new_pages); + trace_mali_mmu_page_fault_grow(region, fault, new_pages); + +#if MALI_INCREMENTAL_RENDERING + /* Switch to incremental rendering if we have nearly run out of + * memory in a JIT memory allocation. + */ + if (region->threshold_pages && + kbase_reg_current_backed_size(region) > + region->threshold_pages) { + + dev_dbg(kctx->kbdev->dev, + "%zu pages exceeded IR threshold %zu\n", + new_pages + current_backed_size, + region->threshold_pages); + + if (kbase_mmu_switch_to_ir(kctx, region) >= 0) { + dev_dbg(kctx->kbdev->dev, + "Get region %p for IR\n", + (void *)region); + kbase_va_region_alloc_get(kctx, region); + } + } +#endif + + /* AS transaction begin */ + mutex_lock(&kbdev->mmu_hw_mutex); + + /* flush L2 and unlock the VA (resumes the MMU) */ + op = AS_COMMAND_FLUSH_PT; + + /* clear MMU interrupt - this needs to be done after updating + * the page tables but before issuing a FLUSH command. The + * FLUSH cmd has a side effect that it restarts stalled memory + * transactions in other address spaces which may cause + * another fault to occur. If we didn't clear the interrupt at + * this stage a new IRQ might not be raised when the GPU finds + * a MMU IRQ is already pending. + */ + kbase_mmu_hw_clear_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + + kbase_mmu_hw_do_operation(kbdev, faulting_as, + fault->addr >> PAGE_SHIFT, + new_pages, op, 1); + + mutex_unlock(&kbdev->mmu_hw_mutex); + /* AS transaction end */ + + /* reenable this in the mask */ + kbase_mmu_hw_enable_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + +#ifdef CONFIG_MALI_CINSTR_GWT + if (kctx->gwt_enabled) { + /* GWT also tracks growable regions. */ + struct kbasep_gwt_list_element *pos; + + pos = kmalloc(sizeof(*pos), GFP_KERNEL); + if (pos) { + pos->region = region; + pos->page_addr = (region->start_pfn + + pfn_offset) << + PAGE_SHIFT; + pos->num_pages = new_pages; + list_add(&pos->link, + &kctx->gwt_current_list); + } else { + dev_warn(kbdev->dev, "kmalloc failure"); + } + } +#endif + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (pages_trimmed) { + kbase_jit_done_phys_increase(kctx, pages_trimmed); + pages_trimmed = 0; + } +#endif + kbase_gpu_vm_unlock(kctx); + } else { + int ret = -ENOMEM; + + kbase_gpu_vm_unlock(kctx); + + /* If the memory pool was insufficient then grow it and retry. + * Otherwise fail the allocation. + */ + if (pages_to_grow > 0) { +#ifdef CONFIG_MALI_2MB_ALLOC + if (grow_2mb_pool) { + /* Round page requirement up to nearest 2 MB */ + struct kbase_mem_pool *const lp_mem_pool = + &kctx->mem_pools.large[ + region->gpu_alloc->group_id]; + + pages_to_grow = (pages_to_grow + + ((1 << lp_mem_pool->order) - 1)) + >> lp_mem_pool->order; + + ret = kbase_mem_pool_grow(lp_mem_pool, + pages_to_grow); + } else { +#endif + struct kbase_mem_pool *const mem_pool = + &kctx->mem_pools.small[ + region->gpu_alloc->group_id]; + + ret = kbase_mem_pool_grow(mem_pool, + pages_to_grow); +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + } + if (ret < 0) { + /* failed to extend, handle as a normal PF */ + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Page allocation failure", fault); + } else { + dev_dbg(kbdev->dev, "Try again after pool_grow\n"); + goto page_fault_retry; + } + } + +fault_done: +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (pages_trimmed) { + kbase_gpu_vm_lock(kctx); + kbase_jit_done_phys_increase(kctx, pages_trimmed); + kbase_gpu_vm_unlock(kctx); + } + mutex_unlock(&kctx->jctx.lock); +#endif + + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) + kfree(prealloc_sas[i]); + + /* + * By this point, the fault was handled in some way, + * so release the ctx refcount + */ + release_ctx(kbdev, kctx); + + atomic_dec(&kbdev->faults_pending); + dev_dbg(kbdev->dev, "Leaving page_fault_worker %p\n", (void *)data); +} + +static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut) +{ + u64 *page; + int i; + struct page *p; + + p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]); + if (!p) + return 0; + + page = kmap(p); + if (page == NULL) + goto alloc_free; + + /* If the MMU tables belong to a context then account the memory usage + * to that context, otherwise the MMU tables are device wide and are + * only accounted to the device. + */ + if (mmut->kctx) { + int new_page_count; + + new_page_count = atomic_add_return(1, + &mmut->kctx->used_pages); + KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, + mmut->kctx->id, + (u64)new_page_count); + kbase_process_page_usage_inc(mmut->kctx, 1); + } + + atomic_add(1, &kbdev->memdev.used_pages); + + kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1); + + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) + kbdev->mmu_mode->entry_invalidate(&page[i]); + + kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); + + kunmap(p); + return page_to_phys(p); + +alloc_free: + kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, + false); + + return 0; +} + +/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the + * new table from the pool if needed and possible + */ +static int mmu_get_next_pgd(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + phys_addr_t *pgd, u64 vpfn, int level) +{ + u64 *page; + phys_addr_t target_pgd; + struct page *p; + + KBASE_DEBUG_ASSERT(*pgd); + + lockdep_assert_held(&mmut->mmu_lock); + + /* + * Architecture spec defines level-0 as being the top-most. + * This is a bit unfortunate here, but we keep the same convention. + */ + vpfn >>= (3 - level) * 9; + vpfn &= 0x1FF; + + p = pfn_to_page(PFN_DOWN(*pgd)); + page = kmap(p); + if (page == NULL) { + dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); + return -EINVAL; + } + + target_pgd = kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]); + + if (!target_pgd) { + target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut); + if (!target_pgd) { + dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n", + __func__); + kunmap(p); + return -ENOMEM; + } + + kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd); + + kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); + /* Rely on the caller to update the address space flags. */ + } + + kunmap(p); + *pgd = target_pgd; + + return 0; +} + +/* + * Returns the PGD for the specified level of translation + */ +static int mmu_get_pgd_at_level(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + u64 vpfn, + int level, + phys_addr_t *out_pgd) +{ + phys_addr_t pgd; + int l; + + lockdep_assert_held(&mmut->mmu_lock); + pgd = mmut->pgd; + + for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) { + int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); + /* Handle failure condition */ + if (err) { + dev_dbg(kbdev->dev, + "%s: mmu_get_next_pgd failure at level %d\n", + __func__, l); + return err; + } + } + + *out_pgd = pgd; + + return 0; +} + +static int mmu_get_bottom_pgd(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + u64 vpfn, + phys_addr_t *out_pgd) +{ + return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, + out_pgd); +} + +static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + u64 from_vpfn, u64 to_vpfn) +{ + phys_addr_t pgd; + u64 vpfn = from_vpfn; + struct kbase_mmu_mode const *mmu_mode; + + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn); + + lockdep_assert_held(&mmut->mmu_lock); + + mmu_mode = kbdev->mmu_mode; + + while (vpfn < to_vpfn) { + unsigned int i; + unsigned int idx = vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx; + unsigned int pcount = 0; + unsigned int left = to_vpfn - vpfn; + int level; + u64 *page; + + if (count > left) + count = left; + + /* need to check if this is a 2MB page or a 4kB */ + pgd = mmut->pgd; + + for (level = MIDGARD_MMU_TOPLEVEL; + level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { + idx = (vpfn >> ((3 - level) * 9)) & 0x1FF; + page = kmap(phys_to_page(pgd)); + if (mmu_mode->ate_is_valid(page[idx], level)) + break; /* keep the mapping */ + kunmap(phys_to_page(pgd)); + pgd = mmu_mode->pte_to_phy_addr(page[idx]); + } + + switch (level) { + case MIDGARD_MMU_LEVEL(2): + /* remap to single entry to update */ + pcount = 1; + break; + case MIDGARD_MMU_BOTTOMLEVEL: + /* page count is the same as the logical count */ + pcount = count; + break; + default: + dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n", + __func__, level); + goto next; + } + + /* Invalidate the entries we added */ + for (i = 0; i < pcount; i++) + mmu_mode->entry_invalidate(&page[idx + i]); + + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(phys_to_page(pgd)) + 8 * idx, + 8 * pcount); + kunmap(phys_to_page(pgd)); + +next: + vpfn += count; + } +} + +/* + * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn' + */ +int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr phys, size_t nr, + unsigned long flags, int const group_id) +{ + phys_addr_t pgd; + u64 *pgd_page; + /* In case the insert_single_page only partially completes + * we need to be able to recover + */ + bool recover_required = false; + u64 start_vpfn = vpfn; + size_t recover_count = 0; + size_t remain = nr; + int err; + struct kbase_device *kbdev; + + if (WARN_ON(kctx == NULL)) + return -EINVAL; + + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + + kbdev = kctx->kbdev; + + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + + mutex_lock(&kctx->mmu.mmu_lock); + + while (remain) { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; + struct page *p; + + if (count > remain) + count = remain; + + /* + * Repeatedly calling mmu_get_bottom_pte() is clearly + * suboptimal. We don't have to re-parse the whole tree + * each time (just cache the l0-l2 sequence). + * On the other hand, it's only a gain when we map more than + * 256 pages at once (on average). Do we really care? + */ + do { + err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, + vpfn, &pgd); + if (err != -ENOMEM) + break; + /* Fill the memory pool with enough pages for + * the page walk to succeed + */ + mutex_unlock(&kctx->mmu.mmu_lock); + err = kbase_mem_pool_grow( + &kbdev->mem_pools.small[ + kctx->mmu.group_id], + MIDGARD_MMU_BOTTOMLEVEL); + mutex_lock(&kctx->mmu.mmu_lock); + } while (!err); + if (err) { + dev_warn(kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); + if (recover_required) { + /* Invalidate the pages we have partially + * completed + */ + mmu_insert_pages_failure_recovery(kbdev, + &kctx->mmu, + start_vpfn, + start_vpfn + recover_count); + } + goto fail_unlock; + } + + p = pfn_to_page(PFN_DOWN(pgd)); + pgd_page = kmap(p); + if (!pgd_page) { + dev_warn(kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n"); + if (recover_required) { + /* Invalidate the pages we have partially + * completed + */ + mmu_insert_pages_failure_recovery(kbdev, + &kctx->mmu, + start_vpfn, + start_vpfn + recover_count); + } + err = -ENOMEM; + goto fail_unlock; + } + + for (i = 0; i < count; i++) { + unsigned int ofs = index + i; + + /* Fail if the current page is a valid ATE entry */ + KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); + + pgd_page[ofs] = kbase_mmu_create_ate(kbdev, + phys, flags, MIDGARD_MMU_BOTTOMLEVEL, group_id); + } + + vpfn += count; + remain -= count; + + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(p) + (index * sizeof(u64)), + count * sizeof(u64)); + + kunmap(p); + /* We have started modifying the page table. + * If further pages need inserting and fail we need to undo what + * has already taken place + */ + recover_required = true; + recover_count += count; + } + mutex_unlock(&kctx->mmu.mmu_lock); + kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false); + return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu.mmu_lock); + kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false); + return err; +} + +static inline void cleanup_empty_pte(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 *pte) +{ + phys_addr_t tmp_pgd; + struct page *tmp_p; + + tmp_pgd = kbdev->mmu_mode->pte_to_phy_addr(*pte); + tmp_p = phys_to_page(tmp_pgd); + kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], + tmp_p, false); + + /* If the MMU tables belong to a context then we accounted the memory + * usage to that context, so decrement here. + */ + if (mmut->kctx) { + kbase_process_page_usage_dec(mmut->kctx, 1); + atomic_sub(1, &mmut->kctx->used_pages); + } + atomic_sub(1, &kbdev->memdev.used_pages); + + kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); +} + +u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, + struct tagged_addr const phy, unsigned long const flags, + int const level, int const group_id) +{ + u64 entry; + + kbdev->mmu_mode->entry_set_ate(&entry, phy, flags, level); + return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev, + group_id, level, entry); +} + +int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + const u64 start_vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, + int const group_id) +{ + phys_addr_t pgd; + u64 *pgd_page; + u64 insert_vpfn = start_vpfn; + size_t remain = nr; + int err; + struct kbase_mmu_mode const *mmu_mode; + + /* Note that 0 is a valid start_vpfn */ + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE)); + + mmu_mode = kbdev->mmu_mode; + + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + + mutex_lock(&mmut->mmu_lock); + + while (remain) { + unsigned int i; + unsigned int vindex = insert_vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex; + struct page *p; + int cur_level; + + if (count > remain) + count = remain; + + if (!vindex && is_huge_head(*phys)) + cur_level = MIDGARD_MMU_LEVEL(2); + else + cur_level = MIDGARD_MMU_BOTTOMLEVEL; + + /* + * Repeatedly calling mmu_get_pgd_at_level() is clearly + * suboptimal. We don't have to re-parse the whole tree + * each time (just cache the l0-l2 sequence). + * On the other hand, it's only a gain when we map more than + * 256 pages at once (on average). Do we really care? + */ + do { + err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, + cur_level, &pgd); + if (err != -ENOMEM) + break; + /* Fill the memory pool with enough pages for + * the page walk to succeed + */ + mutex_unlock(&mmut->mmu_lock); + err = kbase_mem_pool_grow( + &kbdev->mem_pools.small[mmut->group_id], + cur_level); + mutex_lock(&mmut->mmu_lock); + } while (!err); + + if (err) { + dev_warn(kbdev->dev, + "%s: mmu_get_bottom_pgd failure\n", __func__); + if (insert_vpfn != start_vpfn) { + /* Invalidate the pages we have partially + * completed + */ + mmu_insert_pages_failure_recovery(kbdev, + mmut, start_vpfn, insert_vpfn); + } + goto fail_unlock; + } + + p = pfn_to_page(PFN_DOWN(pgd)); + pgd_page = kmap(p); + if (!pgd_page) { + dev_warn(kbdev->dev, "%s: kmap failure\n", + __func__); + if (insert_vpfn != start_vpfn) { + /* Invalidate the pages we have partially + * completed + */ + mmu_insert_pages_failure_recovery(kbdev, + mmut, start_vpfn, insert_vpfn); + } + err = -ENOMEM; + goto fail_unlock; + } + + if (cur_level == MIDGARD_MMU_LEVEL(2)) { + int level_index = (insert_vpfn >> 9) & 0x1FF; + u64 *target = &pgd_page[level_index]; + + if (mmu_mode->pte_is_valid(*target, cur_level)) + cleanup_empty_pte(kbdev, mmut, target); + *target = kbase_mmu_create_ate(kbdev, *phys, flags, + cur_level, group_id); + } else { + for (i = 0; i < count; i++) { + unsigned int ofs = vindex + i; + u64 *target = &pgd_page[ofs]; + + /* Warn if the current page is a valid ATE + * entry. The page table shouldn't have anything + * in the place where we are trying to put a + * new entry. Modification to page table entries + * should be performed with + * kbase_mmu_update_pages() + */ + WARN_ON((*target & 1UL) != 0); + + *target = kbase_mmu_create_ate(kbdev, + phys[i], flags, cur_level, group_id); + } + } + + phys += count; + insert_vpfn += count; + remain -= count; + + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(p) + (vindex * sizeof(u64)), + count * sizeof(u64)); + + kunmap(p); + } + + err = 0; + +fail_unlock: + mutex_unlock(&mmut->mmu_lock); + return err; +} + +/* + * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space + * number 'as_nr'. + */ +int kbase_mmu_insert_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int const group_id) +{ + int err; + + err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, + phys, nr, flags, group_id); + + if (mmut->kctx) + kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false); + else + kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false, + as_nr); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); + +/** + * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches + * without retaining the kbase context. + * @kctx: The KBase context. + * @vpfn: The virtual page frame number to start the flush on. + * @nr: The number of pages to flush. + * @sync: Set if the operation should be synchronous or not. + * + * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any + * other locking. + */ +static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, + u64 vpfn, size_t nr, bool sync) +{ + struct kbase_device *kbdev = kctx->kbdev; + int err; + u32 op; + + /* Early out if there is nothing to do */ + if (nr == 0) + return; + + if (sync) + op = AS_COMMAND_FLUSH_MEM; + else + op = AS_COMMAND_FLUSH_PT; + + err = kbase_mmu_hw_do_operation(kbdev, + &kbdev->as[kctx->as_nr], + vpfn, nr, op, 0); + if (err) { + /* Flush failed to complete, assume the + * GPU has hung and perform a reset to recover + */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); + + if (kbase_prepare_to_reset_gpu_locked(kbdev)) + kbase_reset_gpu_locked(kbdev); + } +} + +/* Perform a flush/invalidate on a particular address space + */ +static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, + struct kbase_as *as, + u64 vpfn, size_t nr, bool sync) +{ + int err; + u32 op; + + if (kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + /* GPU is off so there's no need to perform flush/invalidate */ + return; + } + + /* AS transaction begin */ + mutex_lock(&kbdev->mmu_hw_mutex); + + if (sync) + op = AS_COMMAND_FLUSH_MEM; + else + op = AS_COMMAND_FLUSH_PT; + + err = kbase_mmu_hw_do_operation(kbdev, + as, vpfn, nr, op, 0); + + if (err) { + /* Flush failed to complete, assume the GPU has hung and + * perform a reset to recover + */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); + + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + } + + mutex_unlock(&kbdev->mmu_hw_mutex); + /* AS transaction end */ + + kbase_pm_context_idle(kbdev); +} + +static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, + u64 vpfn, size_t nr, bool sync, int as_nr) +{ + /* Skip if there is nothing to do */ + if (nr) { + kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], vpfn, + nr, sync); + } +} + +static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, + u64 vpfn, size_t nr, bool sync) +{ + struct kbase_device *kbdev; + bool ctx_is_in_runpool; + + /* Early out if there is nothing to do */ + if (nr == 0) + return; + + /* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_RT_PM + if (!gpu_is_power_on()) + return; +#endif + + kbdev = kctx->kbdev; + mutex_lock(&kbdev->js_data.queue_mutex); + ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx); + mutex_unlock(&kbdev->js_data.queue_mutex); + + if (ctx_is_in_runpool) { + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], + vpfn, nr, sync); + + release_ctx(kbdev, kctx); + } +} + +void kbase_mmu_update(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + int as_nr) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->mmu_hw_mutex); + KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID); + + kbdev->mmu_mode->update(kbdev, mmut, as_nr); +} +KBASE_EXPORT_TEST_API(kbase_mmu_update); + +void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->mmu_hw_mutex); + + kbdev->mmu_mode->disable_as(kbdev, as_nr); +} + +void kbase_mmu_disable(struct kbase_context *kctx) +{ + /* ASSERT that the context has a valid as_nr, which is only the case + * when it's scheduled in. + * + * as_nr won't change because the caller has the hwaccess_lock + */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + /* + * The address space is being disabled, drain all knowledge of it out + * from the caches as pages and page tables might be freed after this. + * + * The job scheduler code will already be holding the locks and context + * so just do the flush. + */ + kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true); + + kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); +} +KBASE_EXPORT_TEST_API(kbase_mmu_disable); + +/* + * We actually only discard the ATE, and not the page table + * pages. There is a potential DoS here, as we'll leak memory by + * having PTEs that are potentially unused. Will require physical + * page accounting, so MMU pages are part of the process allocation. + * + * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is + * currently scheduled into the runpool, and so potentially uses a lot of locks. + * These locks must be taken in the correct order with respect to others + * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more + * information. + */ +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr) +{ + phys_addr_t pgd; + u64 start_vpfn = vpfn; + size_t requested_nr = nr; + struct kbase_mmu_mode const *mmu_mode; + int err = -EFAULT; + + if (nr == 0) { + /* early out if nothing to do */ + return 0; + } + + mutex_lock(&mmut->mmu_lock); + + mmu_mode = kbdev->mmu_mode; + + while (nr) { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; + unsigned int pcount; + int level; + u64 *page; + + if (count > nr) + count = nr; + + /* need to check if this is a 2MB or a 4kB page */ + pgd = mmut->pgd; + + for (level = MIDGARD_MMU_TOPLEVEL; + level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { + phys_addr_t next_pgd; + + index = (vpfn >> ((3 - level) * 9)) & 0x1FF; + page = kmap(phys_to_page(pgd)); + if (mmu_mode->ate_is_valid(page[index], level)) + break; /* keep the mapping */ + else if (!mmu_mode->pte_is_valid(page[index], level)) { + /* nothing here, advance */ + switch (level) { + case MIDGARD_MMU_LEVEL(0): + count = 134217728; + break; + case MIDGARD_MMU_LEVEL(1): + count = 262144; + break; + case MIDGARD_MMU_LEVEL(2): + count = 512; + break; + case MIDGARD_MMU_LEVEL(3): + count = 1; + break; + } + if (count > nr) + count = nr; + goto next; + } + next_pgd = mmu_mode->pte_to_phy_addr(page[index]); + kunmap(phys_to_page(pgd)); + pgd = next_pgd; + } + + switch (level) { + case MIDGARD_MMU_LEVEL(0): + case MIDGARD_MMU_LEVEL(1): + dev_warn(kbdev->dev, + "%s: No support for ATEs at level %d\n", + __func__, level); + kunmap(phys_to_page(pgd)); + goto out; + case MIDGARD_MMU_LEVEL(2): + /* can only teardown if count >= 512 */ + if (count >= 512) { + pcount = 1; + } else { + dev_warn(kbdev->dev, + "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n", + __func__, count); + pcount = 0; + } + break; + case MIDGARD_MMU_BOTTOMLEVEL: + /* page count is the same as the logical count */ + pcount = count; + break; + default: + dev_err(kbdev->dev, + "%s: found non-mapped memory, early out\n", + __func__); + vpfn += count; + nr -= count; + continue; + } + + /* Invalidate the entries we added */ + for (i = 0; i < pcount; i++) + mmu_mode->entry_invalidate(&page[index + i]); + + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(phys_to_page(pgd)) + + 8 * index, 8*pcount); + +next: + kunmap(phys_to_page(pgd)); + vpfn += count; + nr -= count; + } + err = 0; +out: + mutex_unlock(&mmut->mmu_lock); + + if (mmut->kctx) + kbase_mmu_flush_invalidate(mmut->kctx, start_vpfn, requested_nr, + true); + else + kbase_mmu_flush_invalidate_no_ctx(kbdev, start_vpfn, requested_nr, + true, as_nr); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); + +/** + * kbase_mmu_update_pages_no_flush() - Update page table entries on the GPU + * + * This will update page table entries that already exist on the GPU based on + * the new flags that are passed. It is used as a response to the changes of + * the memory attributes + * + * The caller is responsible for validating the memory attributes + * + * @kctx: Kbase context + * @vpfn: Virtual PFN (Page Frame Number) of the first page to update + * @phys: Tagged physical addresses of the physical pages to replace the + * current mappings + * @nr: Number of pages to update + * @flags: Flags + * @group_id: The physical memory group in which the page was allocated. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + */ +static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id) +{ + phys_addr_t pgd; + u64 *pgd_page; + int err; + struct kbase_device *kbdev; + + if (WARN_ON(kctx == NULL)) + return -EINVAL; + + KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + + mutex_lock(&kctx->mmu.mmu_lock); + + kbdev = kctx->kbdev; + + while (nr) { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + size_t count = KBASE_MMU_PAGE_ENTRIES - index; + struct page *p; + + if (count > nr) + count = nr; + + do { + err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, + vpfn, &pgd); + if (err != -ENOMEM) + break; + /* Fill the memory pool with enough pages for + * the page walk to succeed + */ + mutex_unlock(&kctx->mmu.mmu_lock); + err = kbase_mem_pool_grow( + &kbdev->mem_pools.small[ + kctx->mmu.group_id], + MIDGARD_MMU_BOTTOMLEVEL); + mutex_lock(&kctx->mmu.mmu_lock); + } while (!err); + if (err) { + dev_warn(kbdev->dev, + "mmu_get_bottom_pgd failure\n"); + goto fail_unlock; + } + + p = pfn_to_page(PFN_DOWN(pgd)); + pgd_page = kmap(p); + if (!pgd_page) { + dev_warn(kbdev->dev, "kmap failure\n"); + err = -ENOMEM; + goto fail_unlock; + } + + for (i = 0; i < count; i++) + pgd_page[index + i] = kbase_mmu_create_ate(kbdev, + phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL, + group_id); + + phys += count; + vpfn += count; + nr -= count; + + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(p) + (index * sizeof(u64)), + count * sizeof(u64)); + + kunmap(pfn_to_page(PFN_DOWN(pgd))); + } + + mutex_unlock(&kctx->mmu.mmu_lock); + return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu.mmu_lock); + return err; +} + +int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id) +{ + int err; + + err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags, + group_id); + kbase_mmu_flush_invalidate(kctx, vpfn, nr, true); + return err; +} + +static void mmu_teardown_level(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, phys_addr_t pgd, + int level, u64 *pgd_page_buffer) +{ + phys_addr_t target_pgd; + struct page *p; + u64 *pgd_page; + int i; + struct kbase_mmu_mode const *mmu_mode; + + lockdep_assert_held(&mmut->mmu_lock); + + pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); + /* kmap_atomic should NEVER fail. */ + if (WARN_ON(pgd_page == NULL)) + return; + /* Copy the page to our preallocated buffer so that we can minimize + * kmap_atomic usage + */ + memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); + kunmap_atomic(pgd_page); + pgd_page = pgd_page_buffer; + + mmu_mode = kbdev->mmu_mode; + + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { + target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]); + + if (target_pgd) { + if (mmu_mode->pte_is_valid(pgd_page[i], level)) { + mmu_teardown_level(kbdev, mmut, + target_pgd, + level + 1, + pgd_page_buffer + + (PAGE_SIZE / sizeof(u64))); + } + } + } + + p = pfn_to_page(PFN_DOWN(pgd)); + + kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], + p, true); + + atomic_sub(1, &kbdev->memdev.used_pages); + + /* If MMU tables belong to a context then pages will have been accounted + * against it, so we must decrement the usage counts here. + */ + if (mmut->kctx) { + kbase_process_page_usage_dec(mmut->kctx, 1); + atomic_sub(1, &mmut->kctx->used_pages); + } + + kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); +} + +int kbase_mmu_init(struct kbase_device *const kbdev, + struct kbase_mmu_table *const mmut, struct kbase_context *const kctx, + int const group_id) +{ + if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) || + WARN_ON(group_id < 0)) + return -EINVAL; + + mmut->group_id = group_id; + mutex_init(&mmut->mmu_lock); + mmut->kctx = kctx; + + /* Preallocate MMU depth of four pages for mmu_teardown_level to use */ + mmut->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); + + if (mmut->mmu_teardown_pages == NULL) + return -ENOMEM; + + mmut->pgd = 0; + /* We allocate pages into the kbdev memory pool, then + * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to + * avoid allocations from the kernel happening with the lock held. + */ + while (!mmut->pgd) { + int err; + + err = kbase_mem_pool_grow( + &kbdev->mem_pools.small[mmut->group_id], + MIDGARD_MMU_BOTTOMLEVEL); + if (err) { + kbase_mmu_term(kbdev, mmut); + return -ENOMEM; + } + + mutex_lock(&mmut->mmu_lock); + mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut); + mutex_unlock(&mmut->mmu_lock); + } + + return 0; +} + +void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) +{ + if (mmut->pgd) { + mutex_lock(&mmut->mmu_lock); + mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL, + mmut->mmu_teardown_pages); + mutex_unlock(&mmut->mmu_lock); + + if (mmut->kctx) + KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0); + } + + kfree(mmut->mmu_teardown_pages); + mutex_destroy(&mmut->mmu_lock); +} + +static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, + int level, char ** const buffer, size_t *size_left) +{ + phys_addr_t target_pgd; + u64 *pgd_page; + int i; + size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64); + size_t dump_size; + struct kbase_device *kbdev; + struct kbase_mmu_mode const *mmu_mode; + + if (WARN_ON(kctx == NULL)) + return 0; + lockdep_assert_held(&kctx->mmu.mmu_lock); + + kbdev = kctx->kbdev; + mmu_mode = kbdev->mmu_mode; + + pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); + if (!pgd_page) { + dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); + return 0; + } + + if (*size_left >= size) { + /* A modified physical address that contains + * the page table level + */ + u64 m_pgd = pgd | level; + + /* Put the modified physical address in the output buffer */ + memcpy(*buffer, &m_pgd, sizeof(m_pgd)); + *buffer += sizeof(m_pgd); + + /* Followed by the page table itself */ + memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES); + *buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES; + + *size_left -= size; + } + + if (level < MIDGARD_MMU_BOTTOMLEVEL) { + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { + if (mmu_mode->pte_is_valid(pgd_page[i], level)) { + target_pgd = mmu_mode->pte_to_phy_addr( + pgd_page[i]); + + dump_size = kbasep_mmu_dump_level(kctx, + target_pgd, level + 1, + buffer, size_left); + if (!dump_size) { + kunmap(pfn_to_page(PFN_DOWN(pgd))); + return 0; + } + size += dump_size; + } + } + } + + kunmap(pfn_to_page(PFN_DOWN(pgd))); + + return size; +} + +void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) +{ + void *kaddr; + size_t size_left; + + KBASE_DEBUG_ASSERT(kctx); + + if (nr_pages == 0) { + /* can't dump in a 0 sized buffer, early out */ + return NULL; + } + + size_left = nr_pages * PAGE_SIZE; + + if (WARN_ON(size_left == 0)) + return NULL; + kaddr = vmalloc_user(size_left); + + mutex_lock(&kctx->mmu.mmu_lock); + + if (kaddr) { + u64 end_marker = 0xFFULL; + char *buffer; + char *mmu_dump_buffer; + u64 config[3]; + size_t dump_size, size = 0; + struct kbase_mmu_setup as_setup; + + buffer = (char *)kaddr; + mmu_dump_buffer = buffer; + + kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu, + &as_setup); + config[0] = as_setup.transtab; + config[1] = as_setup.memattr; + config[2] = as_setup.transcfg; + memcpy(buffer, &config, sizeof(config)); + mmu_dump_buffer += sizeof(config); + size_left -= sizeof(config); + size += sizeof(config); + + dump_size = kbasep_mmu_dump_level(kctx, + kctx->mmu.pgd, + MIDGARD_MMU_TOPLEVEL, + &mmu_dump_buffer, + &size_left); + + if (!dump_size) + goto fail_free; + + size += dump_size; + + /* Add on the size for the end marker */ + size += sizeof(u64); + + if (size > (nr_pages * PAGE_SIZE)) { + /* The buffer isn't big enough - free the memory and + * return failure + */ + goto fail_free; + } + + /* Add the end marker */ + memcpy(mmu_dump_buffer, &end_marker, sizeof(u64)); + } + + mutex_unlock(&kctx->mmu.mmu_lock); + return kaddr; + +fail_free: + vfree(kaddr); + mutex_unlock(&kctx->mmu.mmu_lock); + return NULL; +} +KBASE_EXPORT_TEST_API(kbase_mmu_dump); + +void bus_fault_worker(struct work_struct *data) +{ + struct kbase_as *faulting_as; + int as_no; + struct kbase_context *kctx; + struct kbase_device *kbdev; + struct kbase_fault *fault; + + faulting_as = container_of(data, struct kbase_as, work_busfault); + fault = &faulting_as->bf_data; + + /* Ensure that any pending page fault worker has completed */ + flush_work(&faulting_as->work_pagefault); + + as_no = faulting_as->number; + + kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); + + /* MALI_SEC_INTEGRATION */ + /* clear the type to mark we've arrived in the fault worker */ + //faulting_as->fault_type = KBASE_MMU_FAULT_TYPE_UNKNOWN; + /* Grab the context, already refcounted in kbase_mmu_interrupt() on + * flagging of the bus-fault. Therefore, it cannot be scheduled out of + * this AS until we explicitly release it + */ + kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no); + if (!kctx) { + atomic_dec(&kbdev->faults_pending); + return; + } + + if (unlikely(fault->protected_mode)) { + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Permission failure", fault); + kbase_mmu_hw_clear_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + release_ctx(kbdev, kctx); + atomic_dec(&kbdev->faults_pending); + return; + + } + + /* NOTE: If GPU already powered off for suspend, + * we don't need to switch to unmapped + */ + if (!kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault); + kbase_pm_context_idle(kbdev); + } + + release_ctx(kbdev, kctx); + + atomic_dec(&kbdev->faults_pending); +} + +void kbase_flush_mmu_wqs(struct kbase_device *kbdev) +{ + int i; + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + struct kbase_as *as = &kbdev->as[i]; + + flush_workqueue(as->pf_wq); + } +} diff --git a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.h b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.h new file mode 100644 index 000000000000..c9e27b1255c5 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.h @@ -0,0 +1,118 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_MMU_H_ +#define _KBASE_MMU_H_ + +/** + * kbase_mmu_init - Initialise an object representing GPU page tables + * + * The structure should be terminated using kbase_mmu_term() + * + * @kbdev: Instance of GPU platform device, allocated from the probe method. + * @mmut: GPU page tables to be initialized. + * @kctx: Optional kbase context, may be NULL if this set of MMU tables + * is not associated with a context. + * @group_id: The physical group ID from which to allocate GPU page tables. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * Return: 0 if successful, otherwise a negative error code. + */ +int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + struct kbase_context *kctx, int group_id); + +/** + * kbase_mmu_interrupt - Process an MMU interrupt. + * + * Process the MMU interrupt that was reported by the &kbase_device. + * + * @kbdev: Pointer to the kbase device for which the interrupt happened. + * @irq_stat: Value of the MMU_IRQ_STATUS register. + */ +void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); + +/** + * kbase_mmu_term - Terminate an object representing GPU page tables + * + * This will free any page tables that have been allocated + * + * @kbdev: Instance of GPU platform device, allocated from the probe method. + * @mmut: GPU page tables to be destroyed. + */ +void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut); + +/** + * kbase_mmu_create_ate - Create an address translation entry + * + * @kbdev: Instance of GPU platform device, allocated from the probe method. + * @phy: Physical address of the page to be mapped for GPU access. + * @flags: Bitmask of attributes of the GPU memory region being mapped. + * @level: Page table level for which to build an address translation entry. + * @group_id: The physical memory group in which the page was allocated. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * This function creates an address translation entry to encode the physical + * address of a page to be mapped for access by the GPU, along with any extra + * attributes required for the GPU memory region. + * + * Return: An address translation entry, either in LPAE or AArch64 format + * (depending on the driver's configuration). + */ +u64 kbase_mmu_create_ate(struct kbase_device *kbdev, + struct tagged_addr phy, unsigned long flags, int level, int group_id); + +int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + const u64 start_vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int group_id); +int kbase_mmu_insert_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int group_id); +int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr phys, size_t nr, + unsigned long flags, int group_id); + +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, + size_t nr, int as_nr); +int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id); + +/** + * kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt. + * + * Process the bus fault interrupt that was reported for a particular GPU + * address space. + * + * @kbdev: Pointer to the kbase device for which bus fault was reported. + * @status: Value of the GPU_FAULTSTATUS register. + * @as_nr: GPU address space for which the bus fault occurred. + * + * Return: zero if the operation was successful, non-zero otherwise. + */ +int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status, + u32 as_nr); + +#endif /* _KBASE_MMU_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw.h b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw.h new file mode 100644 index 000000000000..e6eef86d7ac0 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw.h @@ -0,0 +1,107 @@ +/* + * + * (C) COPYRIGHT 2014-2015, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * DOC: Interface file for accessing MMU hardware functionality + * + * This module provides an abstraction for accessing the functionality provided + * by the midgard MMU and thus allows all MMU HW access to be contained within + * one common place and allows for different backends (implementations) to + * be provided. + */ + +#ifndef _KBASE_MMU_HW_H_ +#define _KBASE_MMU_HW_H_ + +/* Forward declarations */ +struct kbase_device; +struct kbase_as; +struct kbase_context; + +/** + * enum kbase_mmu_fault_type - MMU fault type descriptor. + */ +enum kbase_mmu_fault_type { + KBASE_MMU_FAULT_TYPE_UNKNOWN = 0, + KBASE_MMU_FAULT_TYPE_PAGE, + KBASE_MMU_FAULT_TYPE_BUS, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED +}; + +/** + * kbase_mmu_hw_configure - Configure an address space for use. + * @kbdev: kbase device to configure. + * @as: address space to configure. + * + * Configure the MMU using the address space details setup in the + * kbase_context structure. + */ +void kbase_mmu_hw_configure(struct kbase_device *kbdev, + struct kbase_as *as); + +/** + * kbase_mmu_hw_do_operation - Issue an operation to the MMU. + * @kbdev: kbase device to issue the MMU operation on. + * @as: address space to issue the MMU operation on. + * @vpfn: MMU Virtual Page Frame Number to start the operation on. + * @nr: Number of pages to work on. + * @type: Operation type (written to ASn_COMMAND). + * @handling_irq: Is this operation being called during the handling + * of an interrupt? + * + * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that + * is associated with the provided kbase_context over the specified range + * + * Return: Zero if the operation was successful, non-zero otherwise. + */ +int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, + u64 vpfn, u32 nr, u32 type, + unsigned int handling_irq); + +/** + * kbase_mmu_hw_clear_fault - Clear a fault that has been previously reported by + * the MMU. + * @kbdev: kbase device to clear the fault from. + * @as: address space to clear the fault from. + * @type: The type of fault that needs to be cleared. + * + * Clear a bus error or page fault that has been reported by the MMU. + */ +void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, + enum kbase_mmu_fault_type type); + +/** + * kbase_mmu_hw_enable_fault - Enable fault that has been previously reported by + * the MMU. + * @kbdev: kbase device to again enable the fault from. + * @as: address space to again enable the fault from. + * @type: The type of fault that needs to be enabled again. + * + * After a page fault or bus error has been reported by the MMU these + * will be disabled. After these are handled this function needs to be + * called to enable the page fault or bus error fault again. + */ +void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, + enum kbase_mmu_fault_type type); + +#endif /* _KBASE_MMU_HW_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw_direct.c new file mode 100644 index 000000000000..f22e73e07398 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_hw_direct.c @@ -0,0 +1,272 @@ +/* + * + * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/** + * lock_region() - Generate lockaddr to lock memory region in MMU + * @pfn: Starting page frame number of the region to lock + * @num_pages: Number of pages to lock. It must be greater than 0. + * @lockaddr: Address and size of memory region to lock + * + * The lockaddr value is a combination of the starting address and + * the size of the region that encompasses all the memory pages to lock. + * + * The size is expressed as a logarithm: it is represented in a way + * that is compatible with the HW specification and it also determines + * how many of the lowest bits of the address are cleared. + * + * Return: 0 if success, or an error code on failure. + */ +static int lock_region(u64 pfn, u32 num_pages, u64 *lockaddr) +{ + const u64 lockaddr_base = pfn << PAGE_SHIFT; + u64 lockaddr_size_log2, region_frame_number_start, + region_frame_number_end; + + if (num_pages == 0) + return -EINVAL; + + /* The size is expressed as a logarithm and should take into account + * the possibility that some pages might spill into the next region. + */ + lockaddr_size_log2 = fls(num_pages) + PAGE_SHIFT - 1; + + /* Round up if the number of pages is not a power of 2. */ + if (num_pages != ((u32)1 << (lockaddr_size_log2 - PAGE_SHIFT))) + lockaddr_size_log2 += 1; + + /* Round up if some memory pages spill into the next region. */ + region_frame_number_start = pfn >> (lockaddr_size_log2 - PAGE_SHIFT); + region_frame_number_end = + (pfn + num_pages - 1) >> (lockaddr_size_log2 - PAGE_SHIFT); + + if (region_frame_number_start < region_frame_number_end) + lockaddr_size_log2 += 1; + + /* Represent the size according to the HW specification. */ + lockaddr_size_log2 = MAX(lockaddr_size_log2, + KBASE_LOCK_REGION_MIN_SIZE_LOG2); + + if (lockaddr_size_log2 > KBASE_LOCK_REGION_MAX_SIZE_LOG2) + return -EINVAL; + + /* The lowest bits are cleared and then set to size - 1 to represent + * the size in a way that is compatible with the HW specification. + */ + *lockaddr = lockaddr_base & ~((1ull << lockaddr_size_log2) - 1); + *lockaddr |= lockaddr_size_log2 - 1; + + return 0; +} + +static int wait_ready(struct kbase_device *kbdev, + unsigned int as_nr) +{ + unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); + + /* Wait for the MMU status to indicate there is no active command, in + * case one is pending. Do not log remaining register accesses. + */ + while (--max_loops && (val & AS_STATUS_AS_ACTIVE)) + val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); + + if (max_loops == 0) { + dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n"); + return -1; + } + + /* If waiting in loop was performed, log last read value. */ + if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops) + kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); + + return 0; +} + +static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd) +{ + int status; + + /* write AS_COMMAND when MMU is ready to accept another command */ + status = wait_ready(kbdev, as_nr); + if (status == 0) + kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd); + + return status; +} + +void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) +{ + struct kbase_mmu_setup *current_setup = &as->current_setup; + u64 transcfg = 0; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { + transcfg = current_setup->transcfg; + + /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK + * Clear PTW_MEMATTR bits + */ + transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; + /* Enable correct PTW_MEMATTR bits */ + transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; + /* Ensure page-tables reads use read-allocate cache-policy in + * the L2 + */ + transcfg |= AS_TRANSCFG_R_ALLOCATE; + + if (kbdev->system_coherency == COHERENCY_ACE) { + /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) + * Clear PTW_SH bits + */ + transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); + /* Enable correct PTW_SH bits */ + transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); + } + + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), + transcfg); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), + (transcfg >> 32) & 0xFFFFFFFFUL); + } else { + if (kbdev->system_coherency == COHERENCY_ACE) + current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; + } + + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), + current_setup->transtab & 0xFFFFFFFFUL); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), + (current_setup->transtab >> 32) & 0xFFFFFFFFUL); + + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO), + current_setup->memattr & 0xFFFFFFFFUL); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), + (current_setup->memattr >> 32) & 0xFFFFFFFFUL); + + KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(kbdev, as, + current_setup->transtab, + current_setup->memattr, + transcfg); + + write_cmd(kbdev, as->number, AS_COMMAND_UPDATE); +} + +int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, + u64 vpfn, u32 nr, u32 op, + unsigned int handling_irq) +{ + int ret; + + lockdep_assert_held(&kbdev->mmu_hw_mutex); + + if (op == AS_COMMAND_UNLOCK) { + /* Unlock doesn't require a lock first */ + ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); + } else { + u64 lock_addr; + + ret = lock_region(vpfn, nr, &lock_addr); + + if (!ret) { + /* Lock the region that needs to be updated */ + kbase_reg_write(kbdev, + MMU_AS_REG(as->number, AS_LOCKADDR_LO), + lock_addr & 0xFFFFFFFFUL); + kbase_reg_write(kbdev, + MMU_AS_REG(as->number, AS_LOCKADDR_HI), + (lock_addr >> 32) & 0xFFFFFFFFUL); + write_cmd(kbdev, as->number, AS_COMMAND_LOCK); + + /* Run the MMU operation */ + write_cmd(kbdev, as->number, op); + + /* Wait for the flush to complete */ + ret = wait_ready(kbdev, as->number); + } + } + + return ret; +} + +void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, + enum kbase_mmu_fault_type type) +{ + unsigned long flags; + u32 pf_bf_mask; + + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + + /* + * A reset is in-flight and we're flushing the IRQ + bottom half + * so don't update anything as it could race with the reset code. + */ + if (kbdev->irq_reset_flush) + goto unlock; + + /* Clear the page (and bus fault IRQ as well in case one occurred) */ + pf_bf_mask = MMU_PAGE_FAULT(as->number); + if (type == KBASE_MMU_FAULT_TYPE_BUS || + type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) + pf_bf_mask |= MMU_BUS_ERROR(as->number); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask); + +unlock: + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); +} + +void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, + enum kbase_mmu_fault_type type) +{ + unsigned long flags; + u32 irq_mask; + + /* Enable the page fault IRQ + * (and bus fault IRQ as well in case one occurred) + */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + + /* + * A reset is in-flight and we're flushing the IRQ + bottom half + * so don't update anything as it could race with the reset code. + */ + if (kbdev->irq_reset_flush) + goto unlock; + + irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) | + MMU_PAGE_FAULT(as->number); + + if (type == KBASE_MMU_FAULT_TYPE_BUS || + type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) + irq_mask |= MMU_BUS_ERROR(as->number); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask); + +unlock: + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); +} diff --git a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_internal.h b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_internal.h new file mode 100644 index 000000000000..28bd341bf082 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_internal.h @@ -0,0 +1,63 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_MMU_INTERNAL_H_ +#define _KBASE_MMU_INTERNAL_H_ + +void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, + struct kbase_mmu_setup * const setup); + +void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, struct kbase_fault *fault); + +void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, const char *reason_str, + struct kbase_fault *fault); + +/** + * kbase_mmu_interrupt_process - Process a bus or page fault. + * @kbdev The kbase_device the fault happened on + * @kctx The kbase_context for the faulting address space if one was found. + * @as The address space that has the fault + * @fault Data relating to the fault + * + * This function will process a fault on a specific address space + */ +void kbase_mmu_interrupt_process(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_as *as, + struct kbase_fault *fault); + +/** + * kbase_mmu_switch_to_ir() - Switch to incremental rendering if possible + * @kctx The kbase_context for the faulting address space. + * @reg Reference of a growable GPU memory region in the same context. + * Takes ownership of the reference if successful. + * + * Used to switch to incremental rendering if we have nearly run out of + * virtual address space in a growable memory region. + * + * Return 0 if successful, otherwise a negative error code. + */ +int kbase_mmu_switch_to_ir(struct kbase_context *kctx, + struct kbase_va_region *reg); + +#endif /* _KBASE_MMU_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_aarch64.c new file mode 100644 index 000000000000..02493e9b2621 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_aarch64.c @@ -0,0 +1,200 @@ +/* + * + * (C) COPYRIGHT 2010-2014, 2016-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase.h" +#include +#include "mali_kbase_defs.h" +#include +#include + +#define ENTRY_TYPE_MASK 3ULL +/* For valid ATEs bit 1 = ((level == 3) ? 1 : 0). + * Valid ATE entries at level 3 are flagged with the value 3. + * Valid ATE entries at level 0-2 are flagged with the value 1. + */ +#define ENTRY_IS_ATE_L3 3ULL +#define ENTRY_IS_ATE_L02 1ULL +#define ENTRY_IS_INVAL 2ULL +#define ENTRY_IS_PTE 3ULL + +#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ +#define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */ +#define ENTRY_ACCESS_RO (3ULL << 6) +#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ +#define ENTRY_ACCESS_BIT (1ULL << 10) +#define ENTRY_NX_BIT (1ULL << 54) + +/* Helper Function to perform assignment of page table entries, to + * ensure the use of strd, which is required on LPAE systems. + */ +static inline void page_table_entry_set(u64 *pte, u64 phy) +{ +#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE + WRITE_ONCE(*pte, phy); +#else +#ifdef CONFIG_64BIT + barrier(); + *pte = phy; + barrier(); +#elif defined(CONFIG_ARM) + barrier(); + asm volatile("ldrd r0, [%1]\n\t" + "strd r0, %0\n\t" + : "=m" (*pte) + : "r" (&phy) + : "r0", "r1"); + barrier(); +#else +#error "64-bit atomic write must be implemented for your architecture" +#endif +#endif +} + +static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + int as_nr) +{ + struct kbase_as *as; + struct kbase_mmu_setup *current_setup; + + if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID)) + return; + + as = &kbdev->as[as_nr]; + current_setup = &as->current_setup; + + kbase_mmu_get_as_setup(mmut, current_setup); + + /* Apply the address space setting */ + kbase_mmu_hw_configure(kbdev, as); +} + +static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) +{ + struct kbase_as * const as = &kbdev->as[as_nr]; + struct kbase_mmu_setup * const current_setup = &as->current_setup; + + current_setup->transtab = 0ULL; + current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED; + + /* Apply the address space setting */ + kbase_mmu_hw_configure(kbdev, as); +} + +static phys_addr_t pte_to_phy_addr(u64 entry) +{ + if (!(entry & 1)) + return 0; + + return entry & ~0xFFF; +} + +static int ate_is_valid(u64 ate, int const level) +{ + if (level == MIDGARD_MMU_BOTTOMLEVEL) + return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L3); + else + return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L02); +} + +static int pte_is_valid(u64 pte, int const level) +{ + /* PTEs cannot exist at the bottom level */ + if (level == MIDGARD_MMU_BOTTOMLEVEL) + return false; + return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); +} + +/* + * Map KBASE_REG flags to MMU flags + */ +static u64 get_mmu_flags(unsigned long flags) +{ + u64 mmu_flags; + + /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */ + mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2; + + /* Set access flags - note that AArch64 stage 1 does not support + * write-only access, so we use read/write instead + */ + if (flags & KBASE_REG_GPU_WR) + mmu_flags |= ENTRY_ACCESS_RW; + else if (flags & KBASE_REG_GPU_RD) + mmu_flags |= ENTRY_ACCESS_RO; + + /* nx if requested */ + mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; + + if (flags & KBASE_REG_SHARE_BOTH) { + /* inner and outer shareable */ + mmu_flags |= SHARE_BOTH_BITS; + } else if (flags & KBASE_REG_SHARE_IN) { + /* inner shareable coherency */ + mmu_flags |= SHARE_INNER_BITS; + } + + return mmu_flags; +} + +static void entry_set_ate(u64 *entry, + struct tagged_addr phy, + unsigned long flags, + int const level) +{ + if (level == MIDGARD_MMU_BOTTOMLEVEL) + page_table_entry_set(entry, as_phys_addr_t(phy) | + get_mmu_flags(flags) | + ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3); + else + page_table_entry_set(entry, as_phys_addr_t(phy) | + get_mmu_flags(flags) | + ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02); +} + +static void entry_set_pte(u64 *entry, phys_addr_t phy) +{ + page_table_entry_set(entry, (phy & PAGE_MASK) | + ENTRY_ACCESS_BIT | ENTRY_IS_PTE); +} + +static void entry_invalidate(u64 *entry) +{ + page_table_entry_set(entry, ENTRY_IS_INVAL); +} + +static struct kbase_mmu_mode const aarch64_mode = { + .update = mmu_update, + .get_as_setup = kbase_mmu_get_as_setup, + .disable_as = mmu_disable_as, + .pte_to_phy_addr = pte_to_phy_addr, + .ate_is_valid = ate_is_valid, + .pte_is_valid = pte_is_valid, + .entry_set_ate = entry_set_ate, + .entry_set_pte = entry_set_pte, + .entry_invalidate = entry_invalidate, + .flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE +}; + +struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void) +{ + return &aarch64_mode; +} diff --git a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_lpae.c new file mode 100644 index 000000000000..91a2d7ac4dcb --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu_mode_lpae.c @@ -0,0 +1,215 @@ +/* + * + * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +#include "mali_kbase.h" +#include +#include "mali_kbase_defs.h" + +#define ENTRY_TYPE_MASK 3ULL +#define ENTRY_IS_ATE 1ULL +#define ENTRY_IS_INVAL 2ULL +#define ENTRY_IS_PTE 3ULL + +#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ +#define ENTRY_RD_BIT (1ULL << 6) +#define ENTRY_WR_BIT (1ULL << 7) +#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ +#define ENTRY_ACCESS_BIT (1ULL << 10) +#define ENTRY_NX_BIT (1ULL << 54) + +#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \ + ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT) + +/* Helper Function to perform assignment of page table entries, to + * ensure the use of strd, which is required on LPAE systems. + */ +static inline void page_table_entry_set(u64 *pte, u64 phy) +{ +#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE + WRITE_ONCE(*pte, phy); +#else +#ifdef CONFIG_64BIT + barrier(); + *pte = phy; + barrier(); +#elif defined(CONFIG_ARM) + barrier(); + asm volatile("ldrd r0, [%1]\n\t" + "strd r0, %0\n\t" + : "=m" (*pte) + : "r" (&phy) + : "r0", "r1"); + barrier(); +#else +#error "64-bit atomic write must be implemented for your architecture" +#endif +#endif +} + +static void mmu_get_as_setup(struct kbase_mmu_table *mmut, + struct kbase_mmu_setup * const setup) +{ + /* Set up the required caching policies at the correct indices + * in the memattr register. + */ + setup->memattr = + (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY << + (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | + (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL << + (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | + (AS_MEMATTR_LPAE_WRITE_ALLOC << + (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + (AS_MEMATTR_LPAE_OUTER_IMPL_DEF << + (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | + (AS_MEMATTR_LPAE_OUTER_WA << + (AS_MEMATTR_INDEX_OUTER_WA * 8)) | + 0; /* The other indices are unused for now */ + + setup->transtab = ((u64)mmut->pgd & + ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) | + AS_TRANSTAB_LPAE_ADRMODE_TABLE | + AS_TRANSTAB_LPAE_READ_INNER; + + setup->transcfg = 0; +} + +static void mmu_update(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + int as_nr) +{ + struct kbase_as *as; + struct kbase_mmu_setup *current_setup; + + if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID)) + return; + + as = &kbdev->as[as_nr]; + current_setup = &as->current_setup; + + mmu_get_as_setup(mmut, current_setup); + + /* Apply the address space setting */ + kbase_mmu_hw_configure(kbdev, as); +} + +static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) +{ + struct kbase_as * const as = &kbdev->as[as_nr]; + struct kbase_mmu_setup * const current_setup = &as->current_setup; + + current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED; + + /* Apply the address space setting */ + kbase_mmu_hw_configure(kbdev, as); +} + +static phys_addr_t pte_to_phy_addr(u64 entry) +{ + if (!(entry & 1)) + return 0; + + return entry & ~0xFFF; +} + +static int ate_is_valid(u64 ate, int const level) +{ + return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE); +} + +static int pte_is_valid(u64 pte, int const level) +{ + return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); +} + +/* + * Map KBASE_REG flags to MMU flags + */ +static u64 get_mmu_flags(unsigned long flags) +{ + u64 mmu_flags; + unsigned long memattr_idx; + + memattr_idx = KBASE_REG_MEMATTR_VALUE(flags); + if (WARN(memattr_idx == AS_MEMATTR_INDEX_NON_CACHEABLE, + "Legacy Mode MMU cannot honor GPU non-cachable memory, will use default instead\n")) + memattr_idx = AS_MEMATTR_INDEX_DEFAULT; + /* store mem_attr index as 4:2, noting that: + * - macro called above ensures 3 bits already + * - all AS_MEMATTR_INDEX_<...> macros only use 3 bits + */ + mmu_flags = memattr_idx << 2; + + /* write perm if requested */ + mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0; + /* read perm if requested */ + mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0; + /* nx if requested */ + mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; + + if (flags & KBASE_REG_SHARE_BOTH) { + /* inner and outer shareable */ + mmu_flags |= SHARE_BOTH_BITS; + } else if (flags & KBASE_REG_SHARE_IN) { + /* inner shareable coherency */ + mmu_flags |= SHARE_INNER_BITS; + } + + return mmu_flags; +} + +static void entry_set_ate(u64 *entry, + struct tagged_addr phy, + unsigned long flags, + int const level) +{ + page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) | + ENTRY_IS_ATE); +} + +static void entry_set_pte(u64 *entry, phys_addr_t phy) +{ + page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE); +} + +static void entry_invalidate(u64 *entry) +{ + page_table_entry_set(entry, ENTRY_IS_INVAL); +} + +static struct kbase_mmu_mode const lpae_mode = { + .update = mmu_update, + .get_as_setup = mmu_get_as_setup, + .disable_as = mmu_disable_as, + .pte_to_phy_addr = pte_to_phy_addr, + .ate_is_valid = ate_is_valid, + .pte_is_valid = pte_is_valid, + .entry_set_ate = entry_set_ate, + .entry_set_pte = entry_set_pte, + .entry_invalidate = entry_invalidate, + .flags = 0 +}; + +struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void) +{ + return &lpae_mode; +} diff --git a/drivers/gpu/arm/b_r26p0/platform/Kbuild b/drivers/gpu/arm/b_r26p0/platform/Kbuild new file mode 100644 index 000000000000..558657bbced9 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/Kbuild @@ -0,0 +1,21 @@ +# +# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y) +# remove begin and end quotes from the Kconfig string type + platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)) + obj-y += $(platform_name)/ +endif diff --git a/drivers/gpu/arm/b_r26p0/platform/Kconfig b/drivers/gpu/arm/b_r26p0/platform/Kconfig new file mode 100644 index 000000000000..c67433521976 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/Kconfig @@ -0,0 +1,31 @@ +# +# (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + + + +# Add your platform specific Kconfig file here +# +# "drivers/gpu/arm/b_25p1/platform/xxx/Kconfig" +# +# Where xxx is the platform name is the name set in MALI_PLATFORM_NAME +# + +source "drivers/gpu/arm/b_r26p0/platform/exynos/Kconfig" diff --git a/drivers/gpu/arm/b_r26p0/platform/devicetree/Kbuild b/drivers/gpu/arm/b_r26p0/platform/devicetree/Kbuild new file mode 100644 index 000000000000..d8fb293cad61 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/devicetree/Kbuild @@ -0,0 +1,29 @@ +# +<<<<<<< HEAD +# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. +======= +# (C) COPYRIGHT 2012-2017, 2020 ARM Limited. All rights reserved. +>>>>>>> 73818f1... Mali Bifrost and Valhall Android DDK r26p0-01eac0 PREVIEW +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +mali_kbase-y += \ + $(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \ + $(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \ + $(MALI_PLATFORM_DIR)/mali_kbase_clk_rate_trace.o diff --git a/drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_clk_rate_trace.c b/drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_clk_rate_trace.c new file mode 100644 index 000000000000..11a8b77dca06 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_clk_rate_trace.c @@ -0,0 +1,68 @@ +/* + * + * (C) COPYRIGHT 2015, 2017-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include "mali_kbase_config_platform.h" + +static void *enumerate_gpu_clk(struct kbase_device *kbdev, + unsigned int index) +{ + if (index >= kbdev->nr_clocks) + return NULL; + + return kbdev->clocks[index]; +} + +static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev, + void *gpu_clk_handle) +{ + return clk_get_rate((struct clk *)gpu_clk_handle); +} + +static int gpu_clk_notifier_register(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb) +{ + compiletime_assert(offsetof(struct clk_notifier_data, clk) == + offsetof(struct kbase_gpu_clk_notifier_data, gpu_clk_handle), + "mismatch in the offset of clk member"); + + compiletime_assert(sizeof(((struct clk_notifier_data *)0)->clk) == + sizeof(((struct kbase_gpu_clk_notifier_data *)0)->gpu_clk_handle), + "mismatch in the size of clk member"); + + return clk_notifier_register((struct clk *)gpu_clk_handle, nb); +} + +static void gpu_clk_notifier_unregister(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb) +{ + clk_notifier_unregister((struct clk *)gpu_clk_handle, nb); +} + +struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops = { + .get_gpu_clk_rate = get_gpu_clk_rate, + .enumerate_gpu_clk = enumerate_gpu_clk, + .gpu_clk_notifier_register = gpu_clk_notifier_register, + .gpu_clk_notifier_unregister = gpu_clk_notifier_unregister, +}; diff --git a/drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_config_devicetree.c b/drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_config_devicetree.c new file mode 100644 index 000000000000..ccefddf882fd --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_config_devicetree.c @@ -0,0 +1,41 @@ +/* + * + * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include + +static struct kbase_platform_config dummy_platform_config; + +struct kbase_platform_config *kbase_get_platform_config(void) +{ + return &dummy_platform_config; +} + +#ifndef CONFIG_OF +int kbase_platform_register(void) +{ + return 0; +} + +void kbase_platform_unregister(void) +{ +} +#endif diff --git a/drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_config_platform.h new file mode 100644 index 000000000000..2137b425c1ab --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_config_platform.h @@ -0,0 +1,49 @@ +/* + * + * (C) COPYRIGHT 2014-2017, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ +#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ +#define PLATFORM_FUNCS (NULL) + +#define CLK_RATE_TRACE_OPS (&clk_rate_trace_ops) + +extern struct kbase_pm_callback_conf pm_callbacks; +extern struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops; + +/** + * Autosuspend delay + * + * The delay time (in milliseconds) to be used for autosuspend + */ +#define AUTO_SUSPEND_DELAY (100) diff --git a/drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_runtime_pm.c new file mode 100644 index 000000000000..8772edb56f73 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/devicetree/mali_kbase_runtime_pm.c @@ -0,0 +1,185 @@ +/* + * + * (C) COPYRIGHT 2015, 2017-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include +#include +#include +#include "mali_kbase_config_platform.h" + +static void enable_gpu_power_control(struct kbase_device *kbdev) +{ + unsigned int i; + +#if defined(CONFIG_REGULATOR) + for (i = 0; i < kbdev->nr_regulators; i++) { + if (WARN_ON(kbdev->regulators[i] == NULL)) + ; + else if (!regulator_is_enabled(kbdev->regulators[i])) + WARN_ON(regulator_enable(kbdev->regulators[i])); + } +#endif + + for (i = 0; i < kbdev->nr_clocks; i++) { + if (WARN_ON(kbdev->clocks[i] == NULL)) + ; + else if (!__clk_is_enabled(kbdev->clocks[i])) + WARN_ON(clk_prepare_enable(kbdev->clocks[i])); + } +} + +static void disable_gpu_power_control(struct kbase_device *kbdev) +{ + unsigned int i; + + for (i = 0; i < kbdev->nr_clocks; i++) { + if (WARN_ON(kbdev->clocks[i] == NULL)) + ; + else if (__clk_is_enabled(kbdev->clocks[i])) { + clk_disable_unprepare(kbdev->clocks[i]); + WARN_ON(__clk_is_enabled(kbdev->clocks[i])); + } + + } + +#if defined(CONFIG_REGULATOR) + for (i = 0; i < kbdev->nr_regulators; i++) { + if (WARN_ON(kbdev->regulators[i] == NULL)) + ; + else if (regulator_is_enabled(kbdev->regulators[i])) + WARN_ON(regulator_disable(kbdev->regulators[i])); + } +#endif +} + +static int pm_callback_power_on(struct kbase_device *kbdev) +{ + int ret = 1; /* Assume GPU has been powered off */ + int error; + + dev_dbg(kbdev->dev, "pm_callback_power_on %p\n", + (void *)kbdev->dev->pm_domain); + + enable_gpu_power_control(kbdev); + + error = pm_runtime_get_sync(kbdev->dev); + if (error == 1) { + /* + * Let core know that the chip has not been + * powered off, so we can save on re-initialization. + */ + ret = 0; + } + + dev_dbg(kbdev->dev, "pm_runtime_get_sync returned %d\n", error); + + return ret; +} + +static void pm_callback_power_off(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "pm_callback_power_off\n"); + + pm_runtime_mark_last_busy(kbdev->dev); + pm_runtime_put_autosuspend(kbdev->dev); + +#ifndef KBASE_PM_RUNTIME + disable_gpu_power_control(kbdev); +#endif +} + +#ifdef KBASE_PM_RUNTIME +static int kbase_device_runtime_init(struct kbase_device *kbdev) +{ + int ret = 0; + + dev_dbg(kbdev->dev, "kbase_device_runtime_init\n"); + + pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY); + pm_runtime_use_autosuspend(kbdev->dev); + + pm_runtime_set_active(kbdev->dev); + pm_runtime_enable(kbdev->dev); + + if (!pm_runtime_enabled(kbdev->dev)) { + dev_warn(kbdev->dev, "pm_runtime not enabled"); + ret = -ENOSYS; + } + + return ret; +} + +static void kbase_device_runtime_disable(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n"); + pm_runtime_disable(kbdev->dev); +} +#endif + +static int pm_callback_runtime_on(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "pm_callback_runtime_on\n"); + + enable_gpu_power_control(kbdev); + return 0; +} + +static void pm_callback_runtime_off(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "pm_callback_runtime_off\n"); + + disable_gpu_power_control(kbdev); +} + +static void pm_callback_resume(struct kbase_device *kbdev) +{ + int ret = pm_callback_runtime_on(kbdev); + + WARN_ON(ret); +} + +static void pm_callback_suspend(struct kbase_device *kbdev) +{ + pm_callback_runtime_off(kbdev); +} + +struct kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = pm_callback_suspend, + .power_resume_callback = pm_callback_resume, +#ifdef KBASE_PM_RUNTIME + .power_runtime_init_callback = kbase_device_runtime_init, + .power_runtime_term_callback = kbase_device_runtime_disable, + .power_runtime_on_callback = pm_callback_runtime_on, + .power_runtime_off_callback = pm_callback_runtime_off, +#else /* KBASE_PM_RUNTIME */ + .power_runtime_init_callback = NULL, + .power_runtime_term_callback = NULL, + .power_runtime_on_callback = NULL, + .power_runtime_off_callback = NULL, +#endif /* KBASE_PM_RUNTIME */ +}; + + diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/Kbuild b/drivers/gpu/arm/b_r26p0/platform/exynos/Kbuild new file mode 100644 index 000000000000..88920c9dfb50 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/Kbuild @@ -0,0 +1,40 @@ +# +# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + +MALI_CUSTOMER_RELEASE = 1 +DEFINES +=-DMALI_SEC_PROBE_TEST=0 + +# EXYNOS THERMAL +THERMAL_PATH = $(srctree)/drivers/thermal/samsung/ + +ccflags-y += $(DEFINES) -I$(THERMAL_PATH) +subdir-ccflags-y += $(DEFINES) -I$(THERMAL_PATH) + +soc_name := $(shell echo $(CONFIG_EXYNOS_SOC_NAME)) + +obj-y += gpu_integration_callbacks.o +obj-y += mali_kbase_platform.o +obj-y += gpu_notifier.o +obj-y += gpu_control.o +obj-y += gpu_pmqos.o +obj-y += gpu_utilization.o +obj-y += gpu_dvfs_handler.o +obj-y += gpu_dvfs_api.o +obj-y += gpu_dvfs_governor.o +obj-y += gpu_job_fence_debug.o +obj-y += mali_kbase_clk_rate_trace.o +obj-$(CONFIG_MALI_DEBUG_SYS) += gpu_custom_interface.o +obj-$(CONFIG_CPU_THERMAL_IPA) += gpu_ipa.o +obj-$(CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY) += gpu_protected_mode.o +obj-$(CONFIG_MALI_EXYNOS_SECURE_RENDERING_ARM) += gpu_protected_mode.o diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/Kconfig b/drivers/gpu/arm/b_r26p0/platform/exynos/Kconfig new file mode 100644 index 000000000000..6955769c0dfe --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/Kconfig @@ -0,0 +1,142 @@ +# +# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + + +# Add your platform specific Kconfig file here +# +# "drivers/gpu/arm/.../platform/Kconfig" +# +# Where xxx is the platform name is the name set in MALI_PLATFORM_THIRDPARTY_NAME +# +config EXYNOS_SOC_NAME + depends on MALI_MIDGARD + string "Third party soc name" + help + soc name. + +config MALI_DVFS + bool "Enable EXYNOS DVFS" + default y + help + Choose this option to enable DVFS in the Mali Midgard DDK. + +config MALI_RT_PM + bool "Enable EXYNOS Runtime power management" + default y + help + Choose this option to enable runtime power management in the Mali Midgard DDK. + +config MALI_EXYNOS_TRACE + bool "Enable EXYNOS kbase tracing" + depends on MALI_MIDGARD + default y + help + Enables tracing in kbase. Trace log available through + the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled + +config MALI_DEBUG_SYS + bool "Enable sysfs for the Mali Midgard DDK " + depends on MALI_MIDGARD && SYSFS + default y + help + Enables sysfs for the Mali Midgard DDK. Set/Monitor the Mali Midgard DDK + +config MALI_SEC_CL_BOOST + bool "Enable EXYNOS cl booster" + default y + help + Enables open cl dvfs booster. + +config MALI_PM_QOS + bool "Enable DVFS with QoS" + default y + help + Choose this option to enable PM_QOS in the Mali tTRx DDK. + +config MALI_GPU_PM_QOS + bool "Enable GPU PM QoS" + default n + help + Enable GPU PM QoS to control GPU frequency. + +config MALI_DEBUG_KERNEL_SYSFS + bool "Support Kernel Group Debug SysFS" + depends on MALI_MIDGARD && MALI_DEBUG_SYS + default y + help + Support Support Kernel Group Debug SysFS on /sys/kernel/gpu + +config MALI_ASV_CALIBRATION_SUPPORT + bool "Enable GPU ASV AUTO CALIBRATION" + depends on MALI_MIDGARD && MALI_RT_PM && MALI_DVFS && VDD_AUTO_CAL + default n + help + Choose this option to enable AUTO_CALIBRATION_SUPPORT in the Mali tTRx DDK. + +config MALI_SEC_VK_BOOST + bool "Enable GPU VK JOB PMQOS BOOST" + default n + help + Choose this option to enable VK JOB PMQOS BOOST in the Mali tTRx DDK. + +config MALI_SEC_JOB_STATUS_CHECK + bool "Enable GPU JOB & FENCE STATUS DUMP" + depends on MALI_MIDGARD && MALI_RT_PM && MALI_DVFS && (SYNC || SYNC_FILE) + default n + help + Choose this option to enable job & fence status dump feature in the Mali tTRx DDK. + +config MALI_SUSTAINABLE_OPT + bool "Enable Sustainable optimize" + depends on MALI_MIDGARD && MALI_RT_PM && MALI_DVFS + default n + help + Choose this option to enable sustainable optimization in the Mali tTRx DDK. + +config MALI_CAMERA_EXT_BTS + bool "Enable sysfs node for camera ext bts scenario" + depends on MALI_MIDGARD && EXYNOS_BTS && EXYNOS9630_BTS + default n + help + Choose this option to enable sysfs node for camera ext bts scenario + +config MALI_FTRACE_FREQ + bool "Enable ftrace for gpu frequency" + depends on MALI_MIDGARD && MALI_DVFS + default n + help + Choose this option to enable ftrace for gpu frequency + +config MALI_EXYNOS_SECURE_RENDERING_LEGACY + bool "Legacy" + depends on MALI_MIDGARD && ION_EXYNOS && EXYNOS_CONTENT_PATH_PROTECTION + default n + help + Support Secure Rendering on Exynos SoC (Legacy) + +config MALI_EXYNOS_SECURE_RENDERING_ARM + bool "Modified ARM" + depends on MALI_MIDGARD && ION_EXYNOS && EXYNOS_CONTENT_PATH_PROTECTION + default n + help + Support Secure Rendering on Exynos SoC (Newer ARM method) + +config MALI_EXYNOS_SECURE_RENDERING_UNSUPPORTED + bool "Disabled" + default !(MALI_EXYNOS_SECURE_RENDERING_LEGACY) && !(MALI_EXYNOS_SECURE_RENDERING_ARM) + help + Does not support Secure Rendering on Exynos SoC. + Should be enabled when MALI_EXYNOS_SECURE_RENDERING_LEGACY and MALI_EXYNOS_SECURE_RENDERING_ARM are disabled. diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_control.c b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_control.c new file mode 100644 index 000000000000..706cd2b2c8e6 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_control.c @@ -0,0 +1,641 @@ +/* drivers/gpu/arm/.../platform/gpu_control.c + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DVFS driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_control.c + * DVFS + */ + +#include + +#include +#include +#include +#include + +#include "mali_kbase_platform.h" +#include "gpu_dvfs_handler.h" +#include "gpu_control.h" + +#ifdef CONFIG_EXYNOS_PD +#include +#endif +#ifdef CONFIG_EXYNOS_PMU +#include +#endif +#ifdef CONFIG_CAL_IF +#include +#endif +#ifdef CONFIG_OF +#include +#endif + +#ifdef CONFIG_MALI_FTRACE_FREQ +#include +#endif + +extern struct regulator *g3d_m_regulator; +unsigned int gpu_pmu_status_reg_offset; +unsigned int gpu_pmu_status_local_pwr_mask; +#define EXYNOS_PMU_G3D_STATUS gpu_pmu_status_reg_offset +#define LOCAL_PWR_CFG gpu_pmu_status_local_pwr_mask + +#ifdef CONFIG_MALI_RT_PM +static struct exynos_pm_domain *gpu_get_pm_domain(char *g3d_genpd_name) +{ + struct platform_device *pdev = NULL; + struct device_node *np = NULL; + struct exynos_pm_domain *pd_temp, *pd = NULL; + + for_each_compatible_node(np, NULL, "samsung,exynos-pd") { + if (!of_device_is_available(np)) + continue; + + pdev = of_find_device_by_node(np); + pd_temp = (struct exynos_pm_domain *)platform_get_drvdata(pdev); + if (!strcmp(g3d_genpd_name, (const char *)(pd_temp->genpd.name))) { + pd = pd_temp; + break; + } + } + + if(pd == NULL) + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: g3d pm_domain is null\n", __func__); + + return pd; +} +#endif /* CONFIG_MALI_RT_PM */ + +int gpu_register_dump(void) +{ + return 0; +} + +int gpu_is_power_on(void) +{ + unsigned int val = 0; + +#ifdef CONFIG_EXYNOS_PMU + exynos_pmu_read(EXYNOS_PMU_G3D_STATUS, &val); +#else + val = 0xf; +#endif + return ((val & LOCAL_PWR_CFG) == LOCAL_PWR_CFG) ? 1 : 0; +} + +int gpu_control_is_power_on(struct kbase_device *kbdev) +{ + int ret = 0; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context is null\n", __func__); + return -ENODEV; + } + + mutex_lock(&platform->gpu_clock_lock); + ret = gpu_is_power_on(); + mutex_unlock(&platform->gpu_clock_lock); + + return ret; +} + +int gpu_get_cur_clock(struct exynos_context *platform) +{ + if (!platform) + return -ENODEV; +#ifdef CONFIG_CAL_IF + return cal_dfs_get_rate(platform->g3d_cmu_cal_id); +#else + return 0; +#endif +} + +#ifdef CONFIG_MALI_DVFS +static int gpu_set_dvfs_using_calapi(struct exynos_context *platform, int clk) +{ + int ret = 0; + +#ifdef CONFIG_MALI_RT_PM + if (platform->exynos_pm_domain) + mutex_lock(&platform->exynos_pm_domain->access_lock); + + if (!gpu_is_power_on()) { + ret = -1; + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "%s: can't set clock in the power-off state!\n", __func__); + goto err; + } +#endif /* CONFIG_MALI_RT_PM */ + + if (clk == platform->cur_clock) { + ret = 0; + GPU_LOG(DVFS_DEBUG, DUMMY, 0u, 0u, "%s: skipped to set clock for %dMhz!\n", + __func__, platform->cur_clock); + +#ifdef CONFIG_MALI_RT_PM + if (platform->exynos_pm_domain) + mutex_unlock(&platform->exynos_pm_domain->access_lock); +#endif + return ret; + } + +#ifdef CONFIG_DEBUG_SNAPSHOT_FREQ + if (platform->gpu_dss_freq_id) +#ifdef CONFIG_SOC_EXYNOS9820 + dbg_snapshot_freq_misc(platform->gpu_dss_freq_id, platform->cur_clock, clk, DSS_FLAG_IN); +#else + dbg_snapshot_freq(platform->gpu_dss_freq_id, platform->cur_clock, clk, DSS_FLAG_IN); +#endif +#endif + + cal_dfs_set_rate(platform->g3d_cmu_cal_id, clk); + +#ifdef CONFIG_DEBUG_SNAPSHOT_FREQ + if (platform->gpu_dss_freq_id) +#ifdef CONFIG_SOC_EXYNOS9820 + dbg_snapshot_freq_misc(platform->gpu_dss_freq_id, platform->cur_clock, clk, DSS_FLAG_OUT); +#else + dbg_snapshot_freq(platform->gpu_dss_freq_id, platform->cur_clock, clk, DSS_FLAG_OUT); +#endif +#endif + + platform->cur_clock = cal_dfs_get_rate(platform->g3d_cmu_cal_id); + +#ifdef CONFIG_MALI_FTRACE_FREQ + trace_gpu_frequency_change(platform->cur_clock); +#endif + + GPU_LOG(DVFS_DEBUG, LSI_CLOCK_VALUE, clk, platform->cur_clock, + "[id: %x] clock set: %d, clock get: %d\n", + platform->g3d_cmu_cal_id, clk, platform->cur_clock); + +#ifdef CONFIG_MALI_RT_PM +err: + if (platform->exynos_pm_domain) + mutex_unlock(&platform->exynos_pm_domain->access_lock); +#endif /* CONFIG_MALI_RT_PM */ + return ret; +} + +int gpu_control_set_dvfs(struct kbase_device *kbdev, int clock) +{ + int ret = 0; + bool is_up = false; + static int prev_clock = -1; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context is null\n", __func__); + return -ENODEV; + } + + if (platform->dvs_is_enabled || (platform->inter_frame_pm_status && !platform->inter_frame_pm_is_poweron)) { + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, + "%s: can't set clock in the dvs mode (requested clock %d)\n", __func__, clock); + return 0; + } +#ifdef CONFIG_MALI_DVFS + if (gpu_dvfs_get_level(clock) < 0) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: mismatch clock error (%d)\n", __func__, clock); + return -1; + } +#endif + + is_up = prev_clock < clock; + +#ifdef CONFIG_MALI_PM_QOS + if (is_up) + gpu_pm_qos_command(platform, GPU_CONTROL_PM_QOS_SET); +#endif /* CONFIG_MALI_PM_QOS */ + + if (platform->g3d_cmu_cal_id) + gpu_set_dvfs_using_calapi(platform, clock); + +#ifdef CONFIG_MALI_PM_QOS + if (!is_up) /* is_down */ + gpu_pm_qos_command(platform, GPU_CONTROL_PM_QOS_SET); +#endif /* CONFIG_MALI_PM_QOS */ + + gpu_dvfs_update_time_in_state(prev_clock); + + /* + * We assume there is only one callback registered, and we call it directly. + * We assume there is enough locking already (both here and in the receiving callback) + * This is why we simply call the callback funciton directly + */ + if (platform->nb_clock_change != NULL) { + struct kbase_gpu_clk_notifier_data ndata; + ndata.gpu_clk_handle = platform; /* only one clock, we use the platform struct as a fake clock handle */ + ndata.old_rate = prev_clock; + ndata.new_rate = clock; + platform->nb_clock_change->notifier_call(platform->nb_clock_change, POST_RATE_CHANGE, &ndata); + } + + prev_clock = clock; + + return ret; +} + +int gpu_control_set_clock(struct kbase_device *kbdev, int clock) +{ + int ret = 0; + bool is_up = false; + static int prev_clock = -1; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context is null\n", __func__); + return -ENODEV; + } + + if (platform->dvs_is_enabled || (platform->inter_frame_pm_status && !platform->inter_frame_pm_is_poweron)) { + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, + "%s: can't set clock in the dvs mode (requested clock %d)\n", __func__, clock); + return 0; + } +#ifdef CONFIG_MALI_DVFS + if (gpu_dvfs_get_level(clock) < 0) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: mismatch clock error (%d)\n", __func__, clock); + return -1; + } +#endif + + is_up = prev_clock < clock; + +#ifdef CONFIG_MALI_PM_QOS + if (is_up) + gpu_pm_qos_command(platform, GPU_CONTROL_PM_QOS_SET); +#endif /* CONFIG_MALI_PM_QOS */ + +#ifdef CONFIG_MALI_PM_QOS + if (is_up && ret) + gpu_pm_qos_command(platform, GPU_CONTROL_PM_QOS_SET); + else if (!is_up && !ret) + gpu_pm_qos_command(platform, GPU_CONTROL_PM_QOS_SET); +#endif /* CONFIG_MALI_PM_QOS */ + + gpu_dvfs_update_time_in_state(prev_clock); + prev_clock = clock; + + return ret; +} + +int gpu_control_enable_clock(struct kbase_device *kbdev) +{ + int ret = 0; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context is null\n", __func__); + return -ENODEV; + } + + gpu_dvfs_update_time_in_state(0); + + return ret; +} + +int gpu_control_disable_clock(struct kbase_device *kbdev) +{ + int ret = 0; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context is null\n", __func__); + return -ENODEV; + } + + gpu_dvfs_update_time_in_state(platform->cur_clock); +#ifdef CONFIG_MALI_PM_QOS + gpu_pm_qos_command(platform, GPU_CONTROL_PM_QOS_RESET); +#endif /* CONFIG_MALI_PM_QOS */ + + return ret; +} + +#ifdef CONFIG_MALI_ASV_CALIBRATION_SUPPORT +int gpu_control_power_policy_set(struct kbase_device *kbdev, const char *buf) +{ + int ret = 0; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + const struct kbase_pm_policy *const *policy_list; + static const struct kbase_pm_policy *prev_policy; + int policy_count; + int i; + + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context is null\n", __func__); + return -ENODEV; + } + + prev_policy = kbase_pm_get_policy(kbdev); + + policy_count = kbase_pm_list_policies(kbdev, &policy_list); + + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: gpu dev_drv name = %s\n", __func__, kbdev->dev->driver->name); + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: gpu prev power policy = %s\n", __func__, prev_policy->name); + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: gpu power policy count= %d\n", __func__, policy_count); + + for (i = 0; i < policy_count; i++) { + if (sysfs_streq(policy_list[i]->name, buf)) { + kbase_pm_set_policy(kbdev, policy_list[i]); + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: gpu cur power policy = %s\n", __func__, policy_list[i]->name); + break; + } + } + + return ret; +} +#endif + + +#endif + +#ifdef CONFIG_REGULATOR +int gpu_enable_dvs(struct exynos_context *platform) +{ +#ifdef CONFIG_MALI_RT_PM + if (!platform->dvs_status) + return 0; + + if (!gpu_is_power_on()) { + GPU_LOG(DVFS_DEBUG, DUMMY, 0u, 0u, "%s: can't set dvs in the power-off state!\n", __func__); + return -1; + } + +#if defined(CONFIG_REGULATOR_S2MPS16) + /* Do not need to enable dvs during suspending */ + if (!pkbdev->pm.suspending) { + if (cal_dfs_ext_ctrl(dvfs_g3d, cal_dfs_dvs, 1) != 0) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: failed to enable dvs\n", __func__); + return -1; + } + } +#endif /* CONFIG_REGULATOR_S2MPS16 */ + + GPU_LOG(DVFS_DEBUG, DUMMY, 0u, 0u, "dvs is enabled (vol: %d)\n", gpu_get_cur_voltage(platform)); +#endif + return 0; +} + +int gpu_disable_dvs(struct exynos_context *platform) +{ + if (!platform->dvs_status) + return 0; + +#ifdef CONFIG_MALI_RT_PM +#if defined(CONFIG_REGULATOR_S2MPS16) + if (cal_dfs_ext_ctrl(dvfs_g3d, cal_dfs_dvs, 0) != 0) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: failed to disable dvs\n", __func__); + return -1; + } +#endif /* CONFIG_REGULATOR_S2MPS16 */ + + GPU_LOG(DVFS_DEBUG, DUMMY, 0u, 0u, "dvs is disabled (vol: %d)\n", gpu_get_cur_voltage(platform)); +#endif + return 0; +} + +int gpu_inter_frame_power_on(struct exynos_context *platform) +{ +#ifdef CONFIG_MALI_RT_PM + int status; + + if (!platform->inter_frame_pm_status) + return 0; + + mutex_lock(&platform->exynos_pm_domain->access_lock); + + status = cal_pd_status(platform->exynos_pm_domain->cal_pdid); + if (status) { + GPU_LOG(DVFS_DEBUG, DUMMY, 0u, 0u, + "%s: status checking : Already gpu inter frame power on\n",__func__); + mutex_unlock(&platform->exynos_pm_domain->access_lock); + return 0; + } + + if (cal_pd_control(platform->exynos_pm_domain->cal_pdid, 1) != 0) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: failed to gpu inter frame power on\n", __func__); + mutex_unlock(&platform->exynos_pm_domain->access_lock); + return -1; + } + + status = cal_pd_status(platform->exynos_pm_domain->cal_pdid); + if (!status) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: status error : gpu inter frame power on\n", __func__); + mutex_unlock(&platform->exynos_pm_domain->access_lock); + return -1; + } + + mutex_unlock(&platform->exynos_pm_domain->access_lock); + GPU_LOG(DVFS_DEBUG, LSI_IFPM_POWER_ON, 0u, 0u, "gpu inter frame power on\n"); +#endif + return 0; +} + +int gpu_inter_frame_power_off(struct exynos_context *platform) +{ +#ifdef CONFIG_MALI_RT_PM + int status; + + if (!platform->inter_frame_pm_status) + return 0; + + mutex_lock(&platform->exynos_pm_domain->access_lock); + + status = cal_pd_status(platform->exynos_pm_domain->cal_pdid); + if (!status) { + GPU_LOG(DVFS_DEBUG, DUMMY, 0u, 0u, + "%s: status checking: Already gpu inter frame power off\n", __func__); + mutex_unlock(&platform->exynos_pm_domain->access_lock); + return 0; + } + + if (cal_pd_control(platform->exynos_pm_domain->cal_pdid, 0) != 0) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: failed to gpu inter frame power off\n", __func__); + mutex_unlock(&platform->exynos_pm_domain->access_lock); + return -1; + } + + status = cal_pd_status(platform->exynos_pm_domain->cal_pdid); + if (status) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: status error : gpu inter frame power off\n", __func__); + mutex_unlock(&platform->exynos_pm_domain->access_lock); + return -1; + } + + mutex_unlock(&platform->exynos_pm_domain->access_lock); + GPU_LOG(DVFS_DEBUG, LSI_IFPM_POWER_OFF, 0u, 0u, "gpu inter frame power off\n"); +#endif + return 0; +} + + +int gpu_control_enable_customization(struct kbase_device *kbdev) +{ + int ret = 0; + struct exynos_context *platform = (struct exynos_context *)kbdev->platform_context; + if (!platform) + return -ENODEV; + +#ifdef CONFIG_REGULATOR +#if (defined(CONFIG_SCHED_EMS) || defined(CONFIG_SCHED_EHMP) || defined(CONFIG_SCHED_HMP)) + mutex_lock(&platform->gpu_sched_hmp_lock); + + if (platform->inter_frame_pm_feature == false) + platform->inter_frame_pm_status = false; + else if (platform->ctx_need_qos == true) + platform->inter_frame_pm_status = false; +#ifdef CONFIG_MALI_SEC_CL_BOOST + else if (kbdev->pm.backend.metrics.is_full_compute_util) + platform->inter_frame_pm_status = false; +#endif + else + platform->inter_frame_pm_status = true; + + mutex_unlock(&platform->gpu_sched_hmp_lock); +#endif + if (!platform->dvs_status && !platform->inter_frame_pm_status) + return 0; + + mutex_lock(&platform->gpu_clock_lock); + + if (platform->dvs_status) { + ret = gpu_enable_dvs(platform); + platform->dvs_is_enabled = true; + } else if (platform->inter_frame_pm_status) { + /* inter frame power off */ + if (platform->gpu_set_pmu_duration_reg && + platform->gpu_set_pmu_duration_val) + exynos_pmu_write(platform->gpu_set_pmu_duration_reg, platform->gpu_set_pmu_duration_val); + gpu_inter_frame_power_off(platform); + platform->inter_frame_pm_is_poweron = false; + } + mutex_unlock(&platform->gpu_clock_lock); +#endif /* CONFIG_REGULATOR */ + + return ret; +} + +int gpu_control_disable_customization(struct kbase_device *kbdev) +{ + int ret = 0; + struct exynos_context *platform = (struct exynos_context *)kbdev->platform_context; + if (!platform) + return -ENODEV; + +#ifdef CONFIG_REGULATOR + if (!platform->dvs_status && !platform->inter_frame_pm_status) + return 0; + + mutex_lock(&platform->gpu_clock_lock); + if (platform->dvs_status) { + ret = gpu_disable_dvs(platform); + platform->dvs_is_enabled = false; + } else if (platform->inter_frame_pm_status) { + /* inter frame power on */ + gpu_inter_frame_power_on(platform); + platform->inter_frame_pm_is_poweron = true; + } + + mutex_unlock(&platform->gpu_clock_lock); +#endif /* CONFIG_REGULATOR */ + + return ret; +} + +#ifdef CONFIG_MALI_ASV_CALIBRATION_SUPPORT +struct workqueue_struct *gpu_asv_cali_wq; +struct delayed_work gpu_asv_cali_stop_work; + +static void gpu_asv_calibration_stop_callback(struct work_struct *data) +{ + struct exynos_context *platform = (struct exynos_context *) pkbdev->platform_context; + + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context is null\n", __func__); + return; + } + + gpu_dvfs_clock_lock(GPU_DVFS_MAX_UNLOCK, ASV_CALI_LOCK, 0); + gpu_dvfs_clock_lock(GPU_DVFS_MIN_UNLOCK, ASV_CALI_LOCK, 0); + gpu_control_power_policy_set(pkbdev, "demand"); + platform->gpu_auto_cali_status = false; +} + +int gpu_asv_calibration_start(void) +{ + struct exynos_context *platform = (struct exynos_context *) pkbdev->platform_context; + + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context is null\n", __func__); + return -ENODEV; + } + + platform->gpu_auto_cali_status = true; + gpu_control_power_policy_set(pkbdev, "always_on"); + gpu_dvfs_clock_lock(GPU_DVFS_MAX_LOCK, ASV_CALI_LOCK, platform->gpu_asv_cali_lock_val); + gpu_dvfs_clock_lock(GPU_DVFS_MIN_LOCK, ASV_CALI_LOCK, platform->gpu_asv_cali_lock_val); + + if (gpu_asv_cali_wq == NULL) { + INIT_DELAYED_WORK(&gpu_asv_cali_stop_work, gpu_asv_calibration_stop_callback); + gpu_asv_cali_wq = create_workqueue("g3d_asv_cali"); + + queue_delayed_work_on(0, gpu_asv_cali_wq, + &gpu_asv_cali_stop_work, msecs_to_jiffies(15000)); /* 15 second */ +} + + return 0; +} +#endif + +#endif /* CONFIG_REGULATOR */ + +int gpu_get_cur_voltage(struct exynos_context *platform) +{ + return 0; +} +int *get_mif_table(int *size) +{ + return NULL; +} + +int gpu_control_module_init(struct kbase_device *kbdev) +{ +#ifdef CONFIG_OF + struct device_node *np; +#endif + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + if (!platform) + return -ENODEV; + +#ifdef CONFIG_MALI_RT_PM + platform->exynos_pm_domain = gpu_get_pm_domain(platform->g3d_genpd_name); +#endif /* CONFIG_MALI_RT_PM */ + +#ifdef CONFIG_OF + np = kbdev->dev->of_node; + if (np != NULL) { + gpu_update_config_data_int(np, "gpu_pmu_status_reg_offset", &gpu_pmu_status_reg_offset); + gpu_update_config_data_int(np, "gpu_pmu_status_local_pwr_mask", &gpu_pmu_status_local_pwr_mask); + } +#endif + + return 0; +} + +void gpu_control_module_term(struct kbase_device *kbdev) +{ + struct exynos_context *platform = (struct exynos_context *)kbdev->platform_context; + if (!platform) + return; + +#ifdef CONFIG_MALI_RT_PM + platform->exynos_pm_domain = NULL; +#endif /* CONFIG_MALI_RT_PM */ +} diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_control.h b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_control.h new file mode 100644 index 000000000000..628c72774d17 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_control.h @@ -0,0 +1,78 @@ +/* drivers/gpu/arm/.../platform/gpu_control.h + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DVFS driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_control.h + * DVFS + */ + +#ifndef _GPU_CONTROL_H_ +#define _GPU_CONTROL_H_ + +struct gpu_control_ops { + int (*is_power_on)(void); + + int (*set_dvfs)(struct exynos_context *platform, int clk); + int (*set_voltage)(struct exynos_context *platform, int vol); + int (*set_voltage_pre)(struct exynos_context *platform, bool is_up); + int (*set_voltage_post)(struct exynos_context *platform, bool is_up); + + int (*set_clock)(struct exynos_context *platform, int clk); + int (*set_clock_pre)(struct exynos_context *platform, int clk, bool is_up); + int (*set_clock_post)(struct exynos_context *platform, int clk, bool is_up); + int (*set_clock_to_osc)(struct exynos_context *platform); + + int (*enable_clock)(struct exynos_context *platform); + int (*disable_clock)(struct exynos_context *platform); +}; + +int get_cpu_clock_speed(u32 *cpu_clock); +int gpu_control_set_voltage(struct kbase_device *kbdev, int voltage); +#if defined (CONFIG_SOC_EXYNOS8890) +int gpu_control_set_m_voltage(struct kbase_device *kbdev, int clk); +#endif +int gpu_control_set_dvfs(struct kbase_device *kbdev, int clock); +int gpu_control_set_clock(struct kbase_device *kbdev, int clock); +int gpu_control_enable_clock(struct kbase_device *kbdev); +int gpu_control_disable_clock(struct kbase_device *kbdev); +int gpu_control_is_power_on(struct kbase_device *kbdev); + +int gpu_is_power_on(void); +int gpu_power_init(struct kbase_device *kbdev); +int gpu_get_cur_voltage(struct exynos_context *platform); +int gpu_get_cur_clock(struct exynos_context *platform); +int gpu_is_clock_on(void); +int gpu_register_dump(void); +int gpu_clock_init(struct kbase_device *kbdev); +struct gpu_control_ops *gpu_get_control_ops(void); + +int gpu_control_enable_customization(struct kbase_device *kbdev); +int gpu_control_disable_customization(struct kbase_device *kbdev); + +int gpu_enable_dvs(struct exynos_context *platform); +int gpu_disable_dvs(struct exynos_context *platform); + +int gpu_inter_frame_power_on(struct exynos_context *platform); +int gpu_inter_frame_power_off(struct exynos_context *platform); + +#ifdef CONFIG_MALI_ASV_CALIBRATION_SUPPORT +int gpu_control_power_policy_set(struct kbase_device *kbdev, const char *buf); +int gpu_asv_calibration_start(void); +#endif + +int gpu_regulator_init(struct exynos_context *platform); + +int gpu_control_module_init(struct kbase_device *kbdev); +void gpu_control_module_term(struct kbase_device *kbdev); + +int gpu_device_specific_init(struct kbase_device *kbdev); +#endif /* _GPU_CONTROL_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_custom_interface.c b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_custom_interface.c new file mode 100644 index 000000000000..b98349dbd858 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_custom_interface.c @@ -0,0 +1,2224 @@ +/* drivers/gpu/arm/.../platform/gpu_custom_interface.c + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DVFS driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_custom_interface.c + * DVFS + */ + +#include + +#include + +#if defined(CONFIG_MALI_DVFS) && defined(CONFIG_EXYNOS_THERMAL) && defined(CONFIG_GPU_THERMAL) +#include "exynos_tmu.h" +#endif + +#include "mali_kbase_platform.h" +#include "gpu_dvfs_handler.h" +#include "gpu_dvfs_governor.h" +#include "gpu_control.h" +#ifdef CONFIG_CPU_THERMAL_IPA +#include "gpu_ipa.h" +#endif /* CONFIG_CPU_THERMAL_IPA */ +#include "gpu_custom_interface.h" + +#ifdef CONFIG_MALI_RT_PM +#include +#endif + +#ifdef CONFIG_MALI_CAMERA_EXT_BTS +#include +#endif + +extern struct kbase_device *pkbdev; + +int gpu_pmqos_dvfs_min_lock(int level) +{ +#ifdef CONFIG_MALI_DVFS + int clock; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context is not initialized\n", __func__); + return -ENODEV; + } + + clock = gpu_dvfs_get_clock(level); + if (clock < 0) + gpu_dvfs_clock_lock(GPU_DVFS_MIN_UNLOCK, PMQOS_LOCK, 0); + else + gpu_dvfs_clock_lock(GPU_DVFS_MIN_LOCK, PMQOS_LOCK, clock); +#endif /* CONFIG_MALI_DVFS */ + return 0; +} + +static ssize_t show_clock(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + +#ifdef CONFIG_MALI_DVFS + int clock = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + +#ifdef CONFIG_MALI_RT_PM + if (platform->exynos_pm_domain) { + mutex_lock(&platform->exynos_pm_domain->access_lock); + if(!platform->dvs_is_enabled && gpu_is_power_on()) + clock = gpu_get_cur_clock(platform); + mutex_unlock(&platform->exynos_pm_domain->access_lock); + } +#else + if (gpu_control_is_power_on(pkbdev) == 1) { + mutex_lock(&platform->gpu_clock_lock); + if (!platform->dvs_is_enabled) + clock = gpu_get_cur_clock(platform); + mutex_unlock(&platform->gpu_clock_lock); + } +#endif + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", clock); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } +#endif /* CONFIG_MALI_DVFS */ + + return ret; +} + +static ssize_t set_clock(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned int clk = 0; + int ret, i, policy_count; + static bool cur_state; + const struct kbase_pm_policy *const *policy_list; + static const struct kbase_pm_policy *prev_policy; + static bool prev_tmu_status = true; +#ifdef CONFIG_MALI_DVFS + static bool prev_dvfs_status = true; +#endif /* CONFIG_MALI_DVFS */ + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret = kstrtoint(buf, 0, &clk); + if (ret) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid value\n", __func__); + return -ENOENT; + } + + if (!cur_state) { + prev_tmu_status = platform->tmu_status; +#ifdef CONFIG_MALI_DVFS + prev_dvfs_status = platform->dvfs_status; +#endif /* CONFIG_MALI_DVFS */ + prev_policy = kbase_pm_get_policy(pkbdev); + } + + if (clk == 0) { + kbase_pm_set_policy(pkbdev, prev_policy); + platform->tmu_status = prev_tmu_status; +#ifdef CONFIG_MALI_DVFS + if (!platform->dvfs_status) + gpu_dvfs_on_off(true); +#endif /* CONFIG_MALI_DVFS */ + cur_state = false; + } else { + policy_count = kbase_pm_list_policies(pkbdev, &policy_list); + for (i = 0; i < policy_count; i++) { + if (sysfs_streq(policy_list[i]->name, "always_on")) { + kbase_pm_set_policy(pkbdev, policy_list[i]); + break; + } + } + platform->tmu_status = false; +#ifdef CONFIG_MALI_DVFS + if (platform->dvfs_status) + gpu_dvfs_on_off(false); +#endif /* CONFIG_MALI_DVFS */ + gpu_set_target_clk_vol(clk, false); + cur_state = true; + } + + return count; +} + +static ssize_t show_vol(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", gpu_get_cur_voltage(platform)); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t show_power_state(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", gpu_control_is_power_on(pkbdev)); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static int gpu_get_asv_table(struct exynos_context *platform, char *buf, size_t buf_size) +{ + int i, cnt = 0; + + if (!platform) + return -ENODEV; + + if (buf == NULL) + return 0; + + cnt += snprintf(buf+cnt, buf_size-cnt, "GPU, vol, min, max, down_stay, mif, cpu0, cpu1\n"); + + for (i = gpu_dvfs_get_level(platform->gpu_max_clock); i <= gpu_dvfs_get_level(platform->gpu_min_clock); i++) { + cnt += snprintf(buf+cnt, buf_size-cnt, "%d, %7d, %2d, %3d, %d, %7d, %7d, %7d\n", + platform->table[i].clock, platform->table[i].voltage, platform->table[i].min_threshold, + platform->table[i].max_threshold, platform->table[i].down_staycount, platform->table[i].mem_freq, + platform->table[i].cpu_little_min_freq, platform->table[i].cpu_middle_min_freq); + } + + return cnt; +} + +static ssize_t show_asv_table(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += gpu_get_asv_table(platform, buf+ret, (size_t)PAGE_SIZE-ret); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +#ifdef CONFIG_MALI_DVFS +static int gpu_get_dvfs_table(struct exynos_context *platform, char *buf, size_t buf_size) +{ + int i, cnt = 0; + + if (!platform) + return -ENODEV; + + if (buf == NULL) + return 0; + + for (i = gpu_dvfs_get_level(platform->gpu_max_clock); i <= gpu_dvfs_get_level(platform->gpu_min_clock); i++) + cnt += snprintf(buf+cnt, buf_size-cnt, " %d", platform->table[i].clock); + + cnt += snprintf(buf+cnt, buf_size-cnt, "\n"); + + return cnt; +} +#endif + +static ssize_t show_dvfs_table(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + +#ifdef CONFIG_MALI_DVFS + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += gpu_get_dvfs_table(platform, buf+ret, (size_t)PAGE_SIZE-ret); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } +#endif + + return ret; +} + +static ssize_t show_time_in_state(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + +#ifdef CONFIG_MALI_DVFS + int i; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + gpu_dvfs_update_time_in_state(gpu_control_is_power_on(pkbdev) * platform->cur_clock); + + for (i = gpu_dvfs_get_level(platform->gpu_min_clock); i >= gpu_dvfs_get_level(platform->gpu_max_clock); i--) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d %llu\n", + platform->table[i].clock, + platform->table[i].time); + } + + if (ret >= PAGE_SIZE - 1) { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } +#endif + + return ret; +} + +static ssize_t set_time_in_state(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + gpu_dvfs_init_time_in_state(); + + return count; +} + +static ssize_t show_utilization(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", gpu_control_is_power_on(pkbdev) * platform->env_data.utilization); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t show_perf(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", gpu_control_is_power_on(pkbdev) * platform->env_data.perf); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +#ifdef CONFIG_MALI_DVFS +static ssize_t show_dvfs(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", platform->dvfs_status); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t set_dvfs(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + if (sysfs_streq("0", buf)) + gpu_dvfs_on_off(false); + else if (sysfs_streq("1", buf)) + gpu_dvfs_on_off(true); + + return count; +} + +static ssize_t show_governor(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + gpu_dvfs_governor_info *governor_info; + int i; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + governor_info = (gpu_dvfs_governor_info *)gpu_dvfs_get_governor_info(); + + for (i = 0; i < G3D_MAX_GOVERNOR_NUM; i++) + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%s\n", governor_info[i].name); + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "[Current Governor] %s", governor_info[platform->governor_type].name); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t set_governor(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + int ret; + int next_governor_type; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret = kstrtoint(buf, 0, &next_governor_type); + + if ((next_governor_type < 0) || (next_governor_type >= G3D_MAX_GOVERNOR_NUM)) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid value\n", __func__); + return -ENOENT; + } + + ret = gpu_dvfs_governor_change(next_governor_type); + + if (ret < 0) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, + "%s: fail to set the new governor (%d)\n", __func__, next_governor_type); + return -ENOENT; + } + + return count; +} + +static ssize_t show_max_lock_status(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + unsigned long flags; + int i; + int max_lock_status[NUMBER_LOCK]; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + for (i = 0; i < NUMBER_LOCK; i++) + max_lock_status[i] = platform->user_max_lock[i]; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + for (i = 0; i < NUMBER_LOCK; i++) + ret += snprintf(buf+ret, PAGE_SIZE-ret, "[%d:%d]", i, max_lock_status[i]); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t show_min_lock_status(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + unsigned long flags; + int i; + int min_lock_status[NUMBER_LOCK]; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + for (i = 0; i < NUMBER_LOCK; i++) + min_lock_status[i] = platform->user_min_lock[i]; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + for (i = 0; i < NUMBER_LOCK; i++) + ret += snprintf(buf+ret, PAGE_SIZE-ret, "[%d:%d]", i, min_lock_status[i]); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t show_max_lock_dvfs(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + unsigned long flags; + int locked_clock = -1; + int user_locked_clock = -1; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + locked_clock = platform->max_lock; + user_locked_clock = platform->user_max_lock_input; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + if (locked_clock > 0) + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d / %d", locked_clock, user_locked_clock); + else + ret += snprintf(buf+ret, PAGE_SIZE-ret, "-1"); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t set_max_lock_dvfs(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + int ret, clock = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + if (sysfs_streq("0", buf)) { + platform->user_max_lock_input = 0; + gpu_dvfs_clock_lock(GPU_DVFS_MAX_UNLOCK, SYSFS_LOCK, 0); + } else { + ret = kstrtoint(buf, 0, &clock); + if (ret) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid value\n", __func__); + return -ENOENT; + } + + platform->user_max_lock_input = clock; + + clock = gpu_dvfs_get_level_clock(clock); + + ret = gpu_dvfs_get_level(clock); + if ((ret < gpu_dvfs_get_level(platform->gpu_max_clock)) || (ret > gpu_dvfs_get_level(platform->gpu_min_clock))) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid clock value (%d)\n", __func__, clock); + return -ENOENT; + } + + if (clock == platform->gpu_max_clock) + gpu_dvfs_clock_lock(GPU_DVFS_MAX_UNLOCK, SYSFS_LOCK, 0); + else + gpu_dvfs_clock_lock(GPU_DVFS_MAX_LOCK, SYSFS_LOCK, clock); + } + + return count; +} + +static ssize_t show_min_lock_dvfs(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + unsigned long flags; + int locked_clock = -1; + int user_locked_clock = -1; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + locked_clock = platform->min_lock; + user_locked_clock = platform->user_min_lock_input; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + if (locked_clock > 0) + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d / %d", locked_clock, user_locked_clock); + else + ret += snprintf(buf+ret, PAGE_SIZE-ret, "-1"); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t set_min_lock_dvfs(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + int ret, clock = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + if (sysfs_streq("0", buf)) { + platform->user_min_lock_input = 0; + gpu_dvfs_clock_lock(GPU_DVFS_MIN_UNLOCK, SYSFS_LOCK, 0); + } else { + ret = kstrtoint(buf, 0, &clock); + if (ret) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid value\n", __func__); + return -ENOENT; + } + + platform->user_min_lock_input = clock; + + clock = gpu_dvfs_get_level_clock(clock); + + ret = gpu_dvfs_get_level(clock); + if ((ret < gpu_dvfs_get_level(platform->gpu_max_clock)) || (ret > gpu_dvfs_get_level(platform->gpu_min_clock))) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid clock value (%d)\n", __func__, clock); + return -ENOENT; + } + + if (clock > platform->gpu_max_clock_limit) + clock = platform->gpu_max_clock_limit; + + if (clock == platform->gpu_min_clock) + gpu_dvfs_clock_lock(GPU_DVFS_MIN_UNLOCK, SYSFS_LOCK, 0); + else + gpu_dvfs_clock_lock(GPU_DVFS_MIN_LOCK, SYSFS_LOCK, clock); + } + + return count; +} + +static ssize_t show_down_staycount(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + unsigned long flags; + int i = -1; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + for (i = gpu_dvfs_get_level(platform->gpu_max_clock); i <= gpu_dvfs_get_level(platform->gpu_min_clock); i++) + ret += snprintf(buf+ret, PAGE_SIZE-ret, "Clock %d - %d\n", + platform->table[i].clock, platform->table[i].down_staycount); + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +#define MIN_DOWN_STAYCOUNT 1 +#define MAX_DOWN_STAYCOUNT 10 +static ssize_t set_down_staycount(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned long flags; + char tmpbuf[32]; + char *sptr, *tok; + int ret = -1; + int clock = -1, level = -1, down_staycount = 0; + unsigned int len = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + len = (unsigned int)min(count, sizeof(tmpbuf) - 1); + memcpy(tmpbuf, buf, len); + tmpbuf[len] = '\0'; + sptr = tmpbuf; + + tok = strsep(&sptr, " ,"); + if (tok == NULL) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid input\n", __func__); + return -ENOENT; + } + + ret = kstrtoint(tok, 0, &clock); + if (ret) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid input %d\n", __func__, clock); + return -ENOENT; + } + + tok = strsep(&sptr, " ,"); + if (tok == NULL) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid input\n", __func__); + return -ENOENT; + } + + ret = kstrtoint(tok, 0, &down_staycount); + if (ret) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid input %d\n", __func__, down_staycount); + return -ENOENT; + } + + level = gpu_dvfs_get_level(clock); + if (level < 0) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid clock value (%d)\n", __func__, clock); + return -ENOENT; + } + + if ((down_staycount < MIN_DOWN_STAYCOUNT) || (down_staycount > MAX_DOWN_STAYCOUNT)) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: down_staycount is out of range (%d, %d ~ %d)\n", + __func__, down_staycount, MIN_DOWN_STAYCOUNT, MAX_DOWN_STAYCOUNT); + return -ENOENT; + } + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + platform->table[level].down_staycount = down_staycount; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + return count; +} + +static ssize_t show_highspeed_clock(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + unsigned long flags; + int highspeed_clock = -1; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + highspeed_clock = platform->interactive.highspeed_clock; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", highspeed_clock); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t set_highspeed_clock(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + ssize_t ret = 0; + unsigned long flags; + int highspeed_clock = -1; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret = kstrtoint(buf, 0, &highspeed_clock); + if (ret) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid value\n", __func__); + return -ENOENT; + } + + ret = gpu_dvfs_get_level(highspeed_clock); + if ((ret < gpu_dvfs_get_level(platform->gpu_max_clock)) || (ret > gpu_dvfs_get_level(platform->gpu_min_clock))) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid clock value (%d)\n", __func__, highspeed_clock); + return -ENOENT; + } + + if (highspeed_clock > platform->gpu_max_clock_limit) + highspeed_clock = platform->gpu_max_clock_limit; + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + platform->interactive.highspeed_clock = highspeed_clock; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + return count; +} + +static ssize_t show_highspeed_load(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + unsigned long flags; + int highspeed_load = -1; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + highspeed_load = platform->interactive.highspeed_load; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", highspeed_load); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t set_highspeed_load(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + ssize_t ret = 0; + unsigned long flags; + int highspeed_load = -1; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret = kstrtoint(buf, 0, &highspeed_load); + if (ret) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid value\n", __func__); + return -ENOENT; + } + + if ((highspeed_load < 0) || (highspeed_load > 100)) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid load value (%d)\n", __func__, highspeed_load); + return -ENOENT; + } + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + platform->interactive.highspeed_load = highspeed_load; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + return count; +} + +static ssize_t show_highspeed_delay(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + unsigned long flags; + int highspeed_delay = -1; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + highspeed_delay = platform->interactive.highspeed_delay; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", highspeed_delay); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t set_highspeed_delay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + ssize_t ret = 0; + unsigned long flags; + int highspeed_delay = -1; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret = kstrtoint(buf, 0, &highspeed_delay); + if (ret) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid value\n", __func__); + return -ENOENT; + } + + if ((highspeed_delay < 0) || (highspeed_delay > 5)) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid load value (%d)\n", __func__, highspeed_delay); + return -ENOENT; + } + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + platform->interactive.highspeed_delay = highspeed_delay; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + return count; +} + +static ssize_t show_wakeup_lock(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", platform->wakeup_lock); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t set_wakeup_lock(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + if (sysfs_streq("0", buf)) + platform->wakeup_lock = false; + else if (sysfs_streq("1", buf)) + platform->wakeup_lock = true; + else + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid val - only [0 or 1] is available\n", __func__); + + return count; +} + +static ssize_t show_polling_speed(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", platform->polling_speed); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t set_polling_speed(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + int ret, polling_speed; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret = kstrtoint(buf, 0, &polling_speed); + + if (ret) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid value\n", __func__); + return -ENOENT; + } + + if ((polling_speed < 100) || (polling_speed > 1000)) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: out of range [100~1000] (%d)\n", __func__, polling_speed); + return -ENOENT; + } + + platform->polling_speed = polling_speed; + + return count; +} + +static ssize_t show_tmu(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", platform->tmu_status); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t set_tmu_control(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + if (sysfs_streq("0", buf)) { + if (platform->voltage_margin != 0) { + platform->voltage_margin = 0; + gpu_set_target_clk_vol(platform->cur_clock, false); + } + gpu_dvfs_clock_lock(GPU_DVFS_MAX_UNLOCK, TMU_LOCK, 0); + platform->tmu_status = false; + } else if (sysfs_streq("1", buf)) + platform->tmu_status = true; + else + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid value - only [0 or 1] is available\n", __func__); + + return count; +} + +#ifdef CONFIG_CPU_THERMAL_IPA +static ssize_t show_norm_utilization(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; +#ifdef CONFIG_EXYNOS_THERMAL + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", gpu_ipa_dvfs_get_norm_utilisation(pkbdev)); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } +#else + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: EXYNOS THERMAL build config is disabled\n", __func__); +#endif /* CONFIG_EXYNOS_THERMAL */ + + return ret; +} + +static ssize_t show_utilization_stats(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; +#ifdef CONFIG_EXYNOS_THERMAL + struct mali_debug_utilisation_stats stats; + + gpu_ipa_dvfs_get_utilisation_stats(&stats); + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "util=%d norm_util=%d norm_freq=%d time_busy=%u time_idle=%u time_tick=%d", + stats.s.utilisation, stats.s.norm_utilisation, + stats.s.freq_for_norm, stats.time_busy, stats.time_idle, + stats.time_tick); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } +#else + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: EXYNOS THERMAL build config is disabled\n", __func__); +#endif /* CONFIG_EXYNOS_THERMAL */ + + return ret; +} +#endif /* CONFIG_CPU_THERMAL_IPA */ +#endif /* CONFIG_MALI_DVFS */ + +static ssize_t show_debug_level(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "[Current] %d (%d ~ %d)", + gpu_get_debug_level(), DVFS_DEBUG_START+1, DVFS_DEBUG_END-1); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t set_debug_level(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + int debug_level, ret; + + ret = kstrtoint(buf, 0, &debug_level); + if (ret) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid value\n", __func__); + return -ENOENT; + } + + if ((debug_level <= DVFS_DEBUG_START) || (debug_level >= DVFS_DEBUG_END)) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid debug level (%d)\n", __func__, debug_level); + return -ENOENT; + } + + gpu_set_debug_level(debug_level); + + return count; +} + +#ifdef CONFIG_MALI_EXYNOS_TRACE +static ssize_t show_trace_level(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + int level; + + for (level = TRACE_NONE + 1; level < TRACE_END - 1; level++) + if (gpu_check_trace_level(level)) + ret += snprintf(buf+ret, PAGE_SIZE-ret, "<%d> ", level); + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\nList: %d ~ %d\n(None: %d, All: %d)", + TRACE_NONE + 1, TRACE_ALL - 1, TRACE_NONE, TRACE_ALL); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t set_trace_level(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + int trace_level, ret; + + ret = kstrtoint(buf, 0, &trace_level); + if (ret) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid value\n", __func__); + return -ENOENT; + } + + if ((trace_level <= TRACE_START) || (trace_level >= TRACE_END)) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid trace level (%d)\n", __func__, trace_level); + return -ENOENT; + } + + gpu_set_trace_level(trace_level); + + return count; +} + +extern void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, char *buffer, int len); +static ssize_t show_trace_dump(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + unsigned long flags; + u32 start, end; + + spin_lock_irqsave(&pkbdev->ktrace.lock, flags); + start = pkbdev->ktrace.first_out; + end = pkbdev->ktrace.next_in; + + while (start != end) { + char buffer[KBASE_KTRACE_SIZE]; + struct kbase_ktrace_msg *trace_msg = &pkbdev->ktrace.rbuf[start]; + + kbasep_ktrace_format_msg(trace_msg, buffer, KBASE_KTRACE_SIZE); + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%s\n", buffer); + if (ret >= PAGE_SIZE - 1) + break; + start = (start + 1) & KBASE_KTRACE_MASK; + } + + spin_unlock_irqrestore(&pkbdev->ktrace.lock, flags); + KBASE_KTRACE_CLEAR(pkbdev); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t init_trace_dump(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + KBASE_KTRACE_CLEAR(pkbdev); + + return count; +} +#endif /* CONFIG_MALI_EXYNOS_TRACE */ + +#ifdef DEBUG_FBDEV +static ssize_t show_fbdev(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + int i; + + for (i = 0; i < num_registered_fb; i++) + ret += snprintf(buf+ret, PAGE_SIZE-ret, "fb[%d] xres=%d, yres=%d, addr=0x%lx\n", i, registered_fb[i]->var.xres, registered_fb[i]->var.yres, registered_fb[i]->fix.smem_start); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} +#endif + +static int gpu_get_status(struct exynos_context *platform, char *buf, size_t buf_size) +{ + int cnt = 0; + int i; + int mmu_fault_cnt = 0; + + if (!platform) + return -ENODEV; + + if (buf == NULL) + return 0; + + for (i = GPU_MMU_TRANSLATION_FAULT; i <= GPU_MMU_MEMORY_ATTRIBUTES_FAULT; i++) + mmu_fault_cnt += platform->gpu_exception_count[i]; + + cnt += snprintf(buf+cnt, buf_size-cnt, "reset count : %d\n", platform->gpu_exception_count[GPU_RESET]); + cnt += snprintf(buf+cnt, buf_size-cnt, "data invalid count : %d\n", platform->gpu_exception_count[GPU_DATA_INVALIDATE_FAULT]); + cnt += snprintf(buf+cnt, buf_size-cnt, "mmu fault count : %d\n", mmu_fault_cnt); + + for (i = 0; i < BMAX_RETRY_CNT; i++) + cnt += snprintf(buf+cnt, buf_size-cnt, "warmup retry count %d : %d\n", i+1, platform->balance_retry_count[i]); + + return cnt; +} + +static ssize_t show_gpu_status(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += gpu_get_status(platform, buf+ret, (size_t)PAGE_SIZE-ret); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +#ifdef CONFIG_MALI_SEC_VK_BOOST +static ssize_t show_vk_boost_status(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", platform->ctx_vk_need_qos); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} +#endif + +#ifdef CONFIG_MALI_SUSTAINABLE_OPT +static ssize_t show_sustainable_status(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", platform->sustainable.status); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} +#endif + +#ifdef CONFIG_MALI_SEC_CL_BOOST +static ssize_t set_cl_boost_disable(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned int cl_boost_disable = 0; + int ret; + + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret = kstrtoint(buf, 0, &cl_boost_disable); + if (ret) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid value\n", __func__); + return -ENOENT; + } + + if (cl_boost_disable == 0) + platform->cl_boost_disable = false; + else + platform->cl_boost_disable = true; + + return count; +} + +static ssize_t show_cl_boost_disable(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", platform->cl_boost_disable); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} +#endif + +#ifdef CONFIG_MALI_CAMERA_EXT_BTS +static ssize_t set_camera_ext_bts_scenario(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned int camera_ext_bts_scenario = 0; + int ret; + + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret = kstrtoint(buf, 0, &camera_ext_bts_scenario); + if (ret) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid value\n", __func__); + return -ENOENT; + } + + if (camera_ext_bts_scenario == 0 && platform->is_set_bts_camera_ext) { + bts_del_scenario(platform->bts_camera_ext_idx); + platform->is_set_bts_camera_ext = 0; + } else if (camera_ext_bts_scenario != 0 && platform->is_set_bts_camera_ext == 0) { + bts_add_scenario(platform->bts_camera_ext_idx); + platform->is_set_bts_camera_ext = 1; + } + + return count; +} + +static ssize_t show_camera_ext_bts_scenario(struct device *dev, struct device_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", platform->is_set_bts_camera_ext); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} +#endif + +/** The sysfs file @c clock, fbdev. + * + * This is used for obtaining information about the mali t series operating clock & framebuffer address, + */ + +DEVICE_ATTR(clock, S_IRUGO|S_IWUSR, show_clock, set_clock); +DEVICE_ATTR(vol, S_IRUGO, show_vol, NULL); +DEVICE_ATTR(power_state, S_IRUGO, show_power_state, NULL); +DEVICE_ATTR(asv_table, S_IRUGO, show_asv_table, NULL); +DEVICE_ATTR(dvfs_table, S_IRUGO, show_dvfs_table, NULL); +DEVICE_ATTR(time_in_state, S_IRUGO|S_IWUSR, show_time_in_state, set_time_in_state); +DEVICE_ATTR(utilization, S_IRUGO, show_utilization, NULL); +DEVICE_ATTR(perf, S_IRUGO, show_perf, NULL); +#ifdef CONFIG_MALI_DVFS +DEVICE_ATTR(dvfs, S_IRUGO|S_IWUSR, show_dvfs, set_dvfs); +DEVICE_ATTR(dvfs_governor, S_IRUGO|S_IWUSR, show_governor, set_governor); +DEVICE_ATTR(dvfs_max_lock_status, S_IRUGO, show_max_lock_status, NULL); +DEVICE_ATTR(dvfs_min_lock_status, S_IRUGO, show_min_lock_status, NULL); +DEVICE_ATTR(dvfs_max_lock, S_IRUGO|S_IWUSR, show_max_lock_dvfs, set_max_lock_dvfs); +DEVICE_ATTR(dvfs_min_lock, S_IRUGO|S_IWUSR, show_min_lock_dvfs, set_min_lock_dvfs); +DEVICE_ATTR(down_staycount, S_IRUGO|S_IWUSR, show_down_staycount, set_down_staycount); +DEVICE_ATTR(highspeed_clock, S_IRUGO|S_IWUSR, show_highspeed_clock, set_highspeed_clock); +DEVICE_ATTR(highspeed_load, S_IRUGO|S_IWUSR, show_highspeed_load, set_highspeed_load); +DEVICE_ATTR(highspeed_delay, S_IRUGO|S_IWUSR, show_highspeed_delay, set_highspeed_delay); +DEVICE_ATTR(wakeup_lock, S_IRUGO|S_IWUSR, show_wakeup_lock, set_wakeup_lock); +DEVICE_ATTR(polling_speed, S_IRUGO|S_IWUSR, show_polling_speed, set_polling_speed); +DEVICE_ATTR(tmu, S_IRUGO|S_IWUSR, show_tmu, set_tmu_control); +#ifdef CONFIG_CPU_THERMAL_IPA +DEVICE_ATTR(norm_utilization, S_IRUGO, show_norm_utilization, NULL); +DEVICE_ATTR(utilization_stats, S_IRUGO, show_utilization_stats, NULL); +#endif /* CONFIG_CPU_THERMAL_IPA */ +#endif /* CONFIG_MALI_DVFS */ +DEVICE_ATTR(debug_level, S_IRUGO|S_IWUSR, show_debug_level, set_debug_level); +#ifdef CONFIG_MALI_EXYNOS_TRACE +DEVICE_ATTR(trace_level, S_IRUGO|S_IWUSR, show_trace_level, set_trace_level); +DEVICE_ATTR(trace_dump, S_IRUGO|S_IWUSR, show_trace_dump, init_trace_dump); +#endif /* CONFIG_MALI_EXYNOS_TRACE */ +#ifdef DEBUG_FBDEV +DEVICE_ATTR(fbdev, S_IRUGO, show_fbdev, NULL); +#endif +DEVICE_ATTR(gpu_status, S_IRUGO, show_gpu_status, NULL); +#ifdef CONFIG_MALI_SEC_VK_BOOST +DEVICE_ATTR(vk_boost_status, S_IRUGO, show_vk_boost_status, NULL); +#endif +#ifdef CONFIG_MALI_SUSTAINABLE_OPT +DEVICE_ATTR(sustainable_status, S_IRUGO, show_sustainable_status, NULL); +#endif +#ifdef CONFIG_MALI_SEC_CL_BOOST +DEVICE_ATTR(cl_boost_disable, S_IRUGO|S_IWUSR, show_cl_boost_disable, set_cl_boost_disable); +#endif +#ifdef CONFIG_MALI_CAMERA_EXT_BTS +DEVICE_ATTR(camera_ext_bts, S_IRUGO|S_IWUSR, show_camera_ext_bts_scenario, set_camera_ext_bts_scenario); +#endif + + +#ifdef CONFIG_MALI_DEBUG_KERNEL_SYSFS +#ifdef CONFIG_MALI_DVFS +#define BUF_SIZE 1000 +static ssize_t show_kernel_sysfs_gpu_info(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + ssize_t ret = 0; + + if (!platform) + return -ENODEV; + + if (buf == NULL) + return 0; + + ret += snprintf(buf+ret, BUF_SIZE-ret, "\"SSTOP\":\"%d\",", platform->gpu_exception_count[GPU_SOFT_STOP]); + ret += snprintf(buf+ret, BUF_SIZE-ret, "\"HSTOP\":\"%d\",", platform->gpu_exception_count[GPU_HARD_STOP]); + ret += snprintf(buf+ret, BUF_SIZE-ret, "\"RESET\":\"%d\",", platform->gpu_exception_count[GPU_RESET]); + ret += snprintf(buf+ret, BUF_SIZE-ret, "\"DIFLT\":\"%d\",", platform->gpu_exception_count[GPU_DATA_INVALIDATE_FAULT]); + ret += snprintf(buf+ret, BUF_SIZE-ret, "\"TRFLT\":\"%d\",", platform->gpu_exception_count[GPU_MMU_TRANSLATION_FAULT]); + ret += snprintf(buf+ret, BUF_SIZE-ret, "\"PMFLT\":\"%d\",", platform->gpu_exception_count[GPU_MMU_PERMISSION_FAULT]); + ret += snprintf(buf+ret, BUF_SIZE-ret, "\"BFLT\":\"%d\",", platform->gpu_exception_count[GPU_MMU_TRANSTAB_BUS_FAULT]); + ret += snprintf(buf+ret, BUF_SIZE-ret, "\"ACCFG\":\"%d\",", platform->gpu_exception_count[GPU_MMU_ACCESS_FLAG_FAULT]); + ret += snprintf(buf+ret, BUF_SIZE-ret, "\"ASFLT\":\"%d\",", platform->gpu_exception_count[GPU_MMU_ADDRESS_SIZE_FAULT]); + ret += snprintf(buf+ret, BUF_SIZE-ret, "\"ATFLT\":\"%d\",", platform->gpu_exception_count[GPU_MMU_MEMORY_ATTRIBUTES_FAULT]); + ret += snprintf(buf+ret, BUF_SIZE-ret, "\"UNKN\":\"%d\"", platform->gpu_exception_count[GPU_UNKNOWN]); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t show_kernel_sysfs_max_lock_dvfs(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + ssize_t ret = 0; + unsigned long flags; + int locked_clock = -1; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + locked_clock = platform->max_lock; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + if (locked_clock > 0) + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", locked_clock); + else + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", platform->gpu_max_clock); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t set_kernel_sysfs_max_lock_dvfs(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret, clock = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + if (sysfs_streq("0", buf)) { + platform->user_max_lock_input = 0; + gpu_dvfs_clock_lock(GPU_DVFS_MAX_UNLOCK, SYSFS_LOCK, 0); + } else { + ret = kstrtoint(buf, 0, &clock); + if (ret) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid value\n", __func__); + return -ENOENT; + } + + platform->user_max_lock_input = clock; + + clock = gpu_dvfs_get_level_clock(clock); + + ret = gpu_dvfs_get_level(clock); + if ((ret < gpu_dvfs_get_level(platform->gpu_max_clock)) || (ret > gpu_dvfs_get_level(platform->gpu_min_clock))) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid clock value (%d)\n", __func__, clock); + return -ENOENT; + } + + if (clock == platform->gpu_max_clock) + gpu_dvfs_clock_lock(GPU_DVFS_MAX_UNLOCK, SYSFS_LOCK, 0); + else + gpu_dvfs_clock_lock(GPU_DVFS_MAX_LOCK, SYSFS_LOCK, clock); + } + + return count; +} + +static ssize_t show_kernel_sysfs_available_governor(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + ssize_t ret = 0; + gpu_dvfs_governor_info *governor_info; + int i; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + governor_info = (gpu_dvfs_governor_info *)gpu_dvfs_get_governor_info(); + + for (i = 0; i < G3D_MAX_GOVERNOR_NUM; i++) + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%s ", governor_info[i].name); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t show_kernel_sysfs_min_lock_dvfs(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + ssize_t ret = 0; + unsigned long flags; + int locked_clock = -1; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + locked_clock = platform->min_lock; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + if (locked_clock > 0) + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", locked_clock); + else + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", platform->gpu_min_clock); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t set_kernel_sysfs_min_lock_dvfs(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret, clock = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + if (sysfs_streq("0", buf)) { + platform->user_min_lock_input = 0; + gpu_dvfs_clock_lock(GPU_DVFS_MIN_UNLOCK, SYSFS_LOCK, 0); + } else { + ret = kstrtoint(buf, 0, &clock); + if (ret) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid value\n", __func__); + return -ENOENT; + } + + platform->user_min_lock_input = clock; + + clock = gpu_dvfs_get_level_clock(clock); + + ret = gpu_dvfs_get_level(clock); + if ((ret < gpu_dvfs_get_level(platform->gpu_max_clock)) || (ret > gpu_dvfs_get_level(platform->gpu_min_clock))) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid clock value (%d)\n", __func__, clock); + return -ENOENT; + } + + if (clock > platform->gpu_max_clock_limit) + clock = platform->gpu_max_clock_limit; + + if (clock == platform->gpu_min_clock) + gpu_dvfs_clock_lock(GPU_DVFS_MIN_UNLOCK, SYSFS_LOCK, 0); + else + gpu_dvfs_clock_lock(GPU_DVFS_MIN_LOCK, SYSFS_LOCK, clock); + } + + return count; +} +#endif /* #ifdef CONFIG_MALI_DVFS */ + +static ssize_t show_kernel_sysfs_utilization(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + ssize_t ret = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%3d%%", platform->env_data.utilization); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t show_kernel_sysfs_clock(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + ssize_t ret = 0; + int clock = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + +#ifdef CONFIG_MALI_RT_PM + if (platform->exynos_pm_domain) { + mutex_lock(&platform->exynos_pm_domain->access_lock); + if (!platform->dvs_is_enabled && gpu_is_power_on()) + clock = gpu_get_cur_clock(platform); + mutex_unlock(&platform->exynos_pm_domain->access_lock); + } +#else + if (gpu_control_is_power_on(pkbdev) == 1) { + mutex_lock(&platform->gpu_clock_lock); + if (!platform->dvs_is_enabled) + clock = gpu_get_cur_clock(platform); + mutex_unlock(&platform->gpu_clock_lock); + } +#endif + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d", clock); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t show_kernel_sysfs_freq_table(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + ssize_t ret = 0; + int i = 0; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + for (i = gpu_dvfs_get_level(platform->gpu_min_clock); i >= gpu_dvfs_get_level(platform->gpu_max_clock); i--) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d ", platform->table[i].clock); + } + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +#ifdef CONFIG_MALI_DVFS +static ssize_t show_kernel_sysfs_governor(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + ssize_t ret = 0; + gpu_dvfs_governor_info *governor_info = NULL; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + governor_info = (gpu_dvfs_governor_info *)gpu_dvfs_get_governor_info(); + + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%s", governor_info[platform->governor_type].name); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static ssize_t set_kernel_sysfs_governor(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) +{ + int ret; + int i = 0; + int next_governor_type = -1; + size_t governor_name_size = 0; + gpu_dvfs_governor_info *governor_info = NULL; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; + + if (!platform) + return -ENODEV; + + governor_info = (gpu_dvfs_governor_info *)gpu_dvfs_get_governor_info(); + + for (i = 0; i < G3D_MAX_GOVERNOR_NUM; i++) { + governor_name_size = strlen(governor_info[i].name); + if (!strncmp(buf, governor_info[i].name, governor_name_size)) { + next_governor_type = i; + break; + } + } + + if ((next_governor_type < 0) || (next_governor_type >= G3D_MAX_GOVERNOR_NUM)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: invalid value\n", __func__); + return -ENOENT; + } + + ret = gpu_dvfs_governor_change(next_governor_type); + + if (ret < 0) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, + "%s: fail to set the new governor (%d)\n", __func__, next_governor_type); + return -ENOENT; + } + + return count; +} +#endif /* #ifdef CONFIG_MALI_DVFS */ + +static ssize_t show_kernel_sysfs_gpu_model(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + /* COPY from mali_kbase_core_linux.c : 2594 line, last updated: 20161017, r2p0-03rel0 */ + static const struct gpu_product_id_name { + unsigned id; + char *name; + } gpu_product_id_names[] = { + { .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G71" }, + { .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-THEx" }, + }; + const char *product_name = "(Unknown Mali GPU)"; + struct kbase_device *kbdev; + u32 gpu_id; + unsigned product_id, product_id_mask; + unsigned i; + + kbdev = pkbdev; + if (!kbdev) + return -ENODEV; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + product_id_mask = GPU_ID2_PRODUCT_MODEL >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) { + const struct gpu_product_id_name *p = &gpu_product_id_names[i]; + + if ((p->id & product_id_mask) == + (product_id & product_id_mask)) { + product_name = p->name; + break; + } + } + + return scnprintf(buf, PAGE_SIZE, "%s\n", product_name); +} + +#if defined(CONFIG_MALI_DVFS) && defined(CONFIG_EXYNOS_THERMAL) && defined(CONFIG_GPU_THERMAL) + +extern struct exynos_tmu_data *gpu_thermal_data; + +static ssize_t show_kernel_sysfs_gpu_temp(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + ssize_t ret = 0; + int gpu_temp = 0; + int gpu_temp_int = 0; + int gpu_temp_point = 0; + + + if (!gpu_thermal_data) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "[Kernel group SYSFS] thermal driver does not ready\n"); + return -ENODEV; + } + + mutex_lock(&gpu_thermal_data->lock); + + if (gpu_thermal_data->num_of_sensors) + gpu_temp = gpu_thermal_data->tmu_read(gpu_thermal_data) * MCELSIUS; + + mutex_unlock(&gpu_thermal_data->lock); + + gpu_temp_int = gpu_temp / 1000; + gpu_temp_point = gpu_temp % gpu_temp_int; + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%d.%d", gpu_temp_int, gpu_temp_point); + + if (ret < PAGE_SIZE - 1) { + ret += snprintf(buf+ret, PAGE_SIZE-ret, "\n"); + } else { + buf[PAGE_SIZE-2] = '\n'; + buf[PAGE_SIZE-1] = '\0'; + ret = PAGE_SIZE-1; + } + + return ret; +} + +static struct kobj_attribute gpu_temp_attribute = + __ATTR(gpu_tmu, S_IRUGO, show_kernel_sysfs_gpu_temp, NULL); +#endif + +#ifdef CONFIG_MALI_DVFS +static struct kobj_attribute gpu_info_attribute = + __ATTR(gpu_info, S_IRUGO, show_kernel_sysfs_gpu_info, NULL); + +static struct kobj_attribute gpu_max_lock_attribute = + __ATTR(gpu_max_clock, S_IRUGO|S_IWUSR, show_kernel_sysfs_max_lock_dvfs, set_kernel_sysfs_max_lock_dvfs); + +static struct kobj_attribute gpu_min_lock_attribute = + __ATTR(gpu_min_clock, S_IRUGO|S_IWUSR, show_kernel_sysfs_min_lock_dvfs, set_kernel_sysfs_min_lock_dvfs); +#endif /* #ifdef CONFIG_MALI_DVFS */ + +static struct kobj_attribute gpu_busy_attribute = + __ATTR(gpu_busy, S_IRUGO, show_kernel_sysfs_utilization, NULL); + +static struct kobj_attribute gpu_clock_attribute = + __ATTR(gpu_clock, S_IRUGO, show_kernel_sysfs_clock, NULL); + +static struct kobj_attribute gpu_freq_table_attribute = + __ATTR(gpu_freq_table, S_IRUGO, show_kernel_sysfs_freq_table, NULL); + +#ifdef CONFIG_MALI_DVFS +static struct kobj_attribute gpu_governor_attribute = + __ATTR(gpu_governor, S_IRUGO|S_IWUSR, show_kernel_sysfs_governor, set_kernel_sysfs_governor); + +static struct kobj_attribute gpu_available_governor_attribute = + __ATTR(gpu_available_governor, S_IRUGO, show_kernel_sysfs_available_governor, NULL); +#endif /* #ifdef CONFIG_MALI_DVFS */ + +static struct kobj_attribute gpu_model_attribute = + __ATTR(gpu_model, S_IRUGO, show_kernel_sysfs_gpu_model, NULL); + + +static struct attribute *attrs[] = { +#ifdef CONFIG_MALI_DVFS +#if defined(CONFIG_EXYNOS_THERMAL) && defined(CONFIG_GPU_THERMAL) + &gpu_temp_attribute.attr, +#endif + &gpu_info_attribute.attr, + &gpu_max_lock_attribute.attr, + &gpu_min_lock_attribute.attr, +#endif /* #ifdef CONFIG_MALI_DVFS */ + &gpu_busy_attribute.attr, + &gpu_clock_attribute.attr, + &gpu_freq_table_attribute.attr, +#ifdef CONFIG_MALI_DVFS + &gpu_governor_attribute.attr, + &gpu_available_governor_attribute.attr, +#endif /* #ifdef CONFIG_MALI_DVFS */ + &gpu_model_attribute.attr, + NULL, +}; + +static struct attribute_group attr_group = { + .attrs = attrs, +}; +static struct kobject *external_kobj; +#endif + +int gpu_create_sysfs_file(struct device *dev) +{ +#ifdef CONFIG_MALI_DEBUG_KERNEL_SYSFS + int retval = 0; +#endif + + if (device_create_file(dev, &dev_attr_clock)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [clock]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_vol)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [vol]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_power_state)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [power_state]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_asv_table)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [asv_table]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_dvfs_table)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [dvfs_table]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_time_in_state)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [time_in_state]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_utilization)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [utilization]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_perf)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [perf]\n"); + goto out; + } +#ifdef CONFIG_MALI_DVFS + if (device_create_file(dev, &dev_attr_dvfs)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [dvfs]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_dvfs_governor)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [dvfs_governor]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_dvfs_max_lock_status)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [dvfs_max_lock_status]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_dvfs_min_lock_status)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [dvfs_min_lock_status]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_dvfs_max_lock)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [dvfs_max_lock]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_dvfs_min_lock)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [dvfs_min_lock]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_down_staycount)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [down_staycount]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_highspeed_clock)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [highspeed_clock]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_highspeed_load)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [highspeed_load]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_highspeed_delay)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [highspeed_delay]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_wakeup_lock)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [wakeup_lock]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_polling_speed)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [polling_speed]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_tmu)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [tmu]\n"); + goto out; + } +#ifdef CONFIG_CPU_THERMAL_IPA + if (device_create_file(dev, &dev_attr_norm_utilization)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [norm_utilization]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_utilization_stats)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [utilization_stats]\n"); + goto out; + } +#endif /* CONFIG_CPU_THERMAL_IPA */ +#endif /* CONFIG_MALI_DVFS */ + if (device_create_file(dev, &dev_attr_debug_level)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [debug_level]\n"); + goto out; + } +#ifdef CONFIG_MALI_EXYNOS_TRACE + if (device_create_file(dev, &dev_attr_trace_level)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [trace_level]\n"); + goto out; + } + + if (device_create_file(dev, &dev_attr_trace_dump)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [trace_dump]\n"); + goto out; + } +#endif /* CONFIG_MALI_EXYNOS_TRACE */ +#ifdef DEBUG_FBDEV + if (device_create_file(dev, &dev_attr_fbdev)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [fbdev]\n"); + goto out; + } +#endif + + if (device_create_file(dev, &dev_attr_gpu_status)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [gpu_status]\n"); + goto out; + } + +#ifdef CONFIG_MALI_SEC_VK_BOOST + if (device_create_file(dev, &dev_attr_vk_boost_status)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [vk_boost_status]\n"); + goto out; + } +#endif + +#ifdef CONFIG_MALI_SUSTAINABLE_OPT + if (device_create_file(dev, &dev_attr_sustainable_status)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [sustainable_status]\n"); + goto out; + } +#endif + +#ifdef CONFIG_MALI_SEC_CL_BOOST + if (device_create_file(dev, &dev_attr_cl_boost_disable)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [cl_boost_disable]\n"); + goto out; + } +#endif + +#ifdef CONFIG_MALI_CAMERA_EXT_BTS + if (device_create_file(dev, &dev_attr_camera_ext_bts)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create sysfs file [camera_ext_bts]\n"); + goto out; + } +#endif + +#ifdef CONFIG_MALI_DEBUG_KERNEL_SYSFS + external_kobj = kobject_create_and_add("gpu", kernel_kobj); + if (!external_kobj) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't create Kobj for group [KERNEL - GPU]\n"); + goto out; + } + + retval = sysfs_create_group(external_kobj, &attr_group); + if (retval) { + kobject_put(external_kobj); + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "couldn't add sysfs group [KERNEL - GPU]\n"); + goto out; + } +#endif + + return 0; +out: + return -ENOENT; +} + +void gpu_remove_sysfs_file(struct device *dev) +{ + device_remove_file(dev, &dev_attr_clock); + device_remove_file(dev, &dev_attr_vol); + device_remove_file(dev, &dev_attr_power_state); + device_remove_file(dev, &dev_attr_asv_table); + device_remove_file(dev, &dev_attr_dvfs_table); + device_remove_file(dev, &dev_attr_time_in_state); + device_remove_file(dev, &dev_attr_utilization); + device_remove_file(dev, &dev_attr_perf); +#ifdef CONFIG_MALI_DVFS + device_remove_file(dev, &dev_attr_dvfs); + device_remove_file(dev, &dev_attr_dvfs_governor); + device_remove_file(dev, &dev_attr_dvfs_max_lock_status); + device_remove_file(dev, &dev_attr_dvfs_min_lock_status); + device_remove_file(dev, &dev_attr_dvfs_max_lock); + device_remove_file(dev, &dev_attr_dvfs_min_lock); + device_remove_file(dev, &dev_attr_down_staycount); + device_remove_file(dev, &dev_attr_highspeed_clock); + device_remove_file(dev, &dev_attr_highspeed_load); + device_remove_file(dev, &dev_attr_highspeed_delay); + device_remove_file(dev, &dev_attr_wakeup_lock); + device_remove_file(dev, &dev_attr_polling_speed); + device_remove_file(dev, &dev_attr_tmu); +#ifdef CONFIG_CPU_THERMAL_IPA + device_remove_file(dev, &dev_attr_norm_utilization); + device_remove_file(dev, &dev_attr_utilization_stats); +#endif /* CONFIG_CPU_THERMAL_IPA */ +#endif /* CONFIG_MALI_DVFS */ + device_remove_file(dev, &dev_attr_debug_level); +#ifdef CONFIG_MALI_EXYNOS_TRACE + device_remove_file(dev, &dev_attr_trace_level); + device_remove_file(dev, &dev_attr_trace_dump); +#endif /* CONFIG_MALI_EXYNOS_TRACE */ +#ifdef DEBUG_FBDEV + device_remove_file(dev, &dev_attr_fbdev); +#endif + device_remove_file(dev, &dev_attr_gpu_status); +#ifdef CONFIG_MALI_SEC_VK_BOOST + device_remove_file(dev, &dev_attr_vk_boost_status); +#endif +#ifdef CONFIG_MALI_SUSTAINABLE_OPT + device_remove_file(dev, &dev_attr_sustainable_status); +#endif +#ifdef CONFIG_MALI_SEC_CL_BOOST + device_remove_file(dev, &dev_attr_cl_boost_disable); +#endif +#ifdef CONFIG_MALI_CAMERA_EXT_BTS + device_remove_file(dev, &dev_attr_camera_ext_bts); +#endif +#ifdef CONFIG_MALI_DEBUG_KERNEL_SYSFS + kobject_put(external_kobj); +#endif +} diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_custom_interface.h b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_custom_interface.h new file mode 100644 index 000000000000..ed626a572c97 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_custom_interface.h @@ -0,0 +1,27 @@ +/* drivers/gpu/arm/.../platform/gpu_custom_interface.h + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DVFS driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_custom_interface.h + * DVFS + */ + +#ifndef _GPU_CUSTOM_INTERFACE_H_ +#define _GPU_CUSTOM_INTERFACE_H_ + +int gpu_pmqos_dvfs_min_lock(int level); +#ifdef CONFIG_MALI_DEBUG_SYS +int gpu_create_sysfs_file(struct device *dev); +void gpu_remove_sysfs_file(struct device *dev); +#endif /* CONFIG_MALI_DEBUG_SYS */ + +#endif /* _GPU_CUSTOM_INTERFACE_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_api.c b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_api.c new file mode 100644 index 000000000000..84b2f48a1c3e --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_api.c @@ -0,0 +1,654 @@ +/* drivers/gpu/arm/.../platform/gpu_dvfs_api.c + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DVFS driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_dvfs_api.c + * DVFS + */ + +#include +#include +#ifdef CONFIG_EXYNOS_ASV +#include +#endif +#include + +#include "mali_kbase_platform.h" +#include "gpu_control.h" +#include "gpu_dvfs_handler.h" +#include "gpu_dvfs_governor.h" + +#ifdef CONFIG_EXYNOS9630_BTS +#include +#endif + +extern struct kbase_device *pkbdev; + +static int gpu_check_target_clock(struct exynos_context *platform, int clock) +{ + int target_clock = clock; + + DVFS_ASSERT(platform); + + if (gpu_dvfs_get_level(target_clock) < 0) + return -1; + +#ifdef CONFIG_MALI_DVFS + if (!platform->dvfs_status) + return target_clock; + + GPU_LOG(DVFS_DEBUG, DUMMY, 0u, 0u, "clock: %d, min: %d, max: %d\n", clock, platform->min_lock, platform->max_lock); + + if ((platform->min_lock > 0) && (platform->power_status) && + ((target_clock < platform->min_lock) || (platform->cur_clock < platform->min_lock))) + target_clock = platform->min_lock; + + if ((platform->max_lock > 0) && (target_clock > platform->max_lock)) + target_clock = platform->max_lock; +#endif /* CONFIG_MALI_DVFS */ + + platform->step = gpu_dvfs_get_level(target_clock); + + return target_clock; +} + +#ifdef CONFIG_MALI_DVFS +static int gpu_update_cur_level(struct exynos_context *platform) +{ + unsigned long flags; + int level = 0; + + DVFS_ASSERT(platform); + + level = gpu_dvfs_get_level(platform->cur_clock); + if (level >= 0) { + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + if (platform->step != level) + platform->down_requirement = platform->table[level].down_staycount; + if (platform->step < level) + platform->interactive.delay_count = 0; + platform->step = level; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + } else { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: invalid dvfs level returned %d gpu power %d\n", __func__, platform->cur_clock, gpu_is_power_on()); + return -1; + } + return 0; +} +#else +#define gpu_update_cur_level(platform) (0) +#endif + +int gpu_set_target_clk_vol(int clk, bool pending_is_allowed) +{ + int ret = 0, target_clk = 0; + int prev_clk = 0; + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + if (!gpu_control_is_power_on(pkbdev)) { + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "%s: can't set clock and voltage in the power-off state!\n", __func__); + return -1; + } + + mutex_lock(&platform->gpu_clock_lock); +#ifdef CONFIG_MALI_DVFS + if (pending_is_allowed && platform->dvs_is_enabled) { + if (!platform->dvfs_pending && clk < platform->cur_clock) { + platform->dvfs_pending = clk; + GPU_LOG(DVFS_DEBUG, DUMMY, 0u, 0u, "pending to change the clock [%d -> %d\n", platform->cur_clock, platform->dvfs_pending); + } else if (clk > platform->cur_clock) { + platform->dvfs_pending = 0; + } + mutex_unlock(&platform->gpu_clock_lock); + return 0; + } else { + platform->dvfs_pending = 0; + } + + if (platform->dvs_is_enabled || !platform->power_status) { + mutex_unlock(&platform->gpu_clock_lock); + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "%s: can't control clock and voltage in dvs and power off %d %d\n", + __func__, + platform->dvs_is_enabled, + platform->power_status); + return 0; + } + +#endif /* CONFIG_MALI_DVFS */ + + target_clk = gpu_check_target_clock(platform, clk); + if (target_clk < 0) { + mutex_unlock(&platform->gpu_clock_lock); + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, + "%s: mismatch clock error (source %d, target %d)\n", __func__, clk, target_clk); + return -1; + } + +#ifdef CONFIG_MALI_RT_PM + if (platform->exynos_pm_domain) { + mutex_lock(&platform->exynos_pm_domain->access_lock); + if (!platform->dvs_is_enabled && gpu_is_power_on()) + prev_clk = gpu_get_cur_clock(platform); + mutex_unlock(&platform->exynos_pm_domain->access_lock); + } +#endif + +#ifdef CONFIG_MALI_DVFS + gpu_control_set_dvfs(kbdev, target_clk); +#endif + ret = gpu_update_cur_level(platform); + +/* W/A for BS_G3D_PERFORMANCE misspelling on kernel version 4.4 */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)) +#define BS_G3D_PERFORMANCE BS_G3D_PEFORMANCE +#endif + + /* MALI_SEC_INTEGRATION : for EXYNOS_BTS */ + if (platform->gpu_bts_support) { +#ifdef CONFIG_EXYNOS9630_BTS + if (target_clk >= platform->mo_min_clock && !platform->is_set_bts) { + bts_add_scenario(platform->bts_scen_idx); + platform->is_set_bts = 1; + } else if (target_clk < platform->mo_min_clock && platform->is_set_bts) { + bts_del_scenario(platform->bts_scen_idx); + platform->is_set_bts = 0; + } +#else + if (target_clk >= platform->mo_min_clock) + bts_update_scen(BS_G3D_PERFORMANCE, 1); /* GPU IDQ : 0 (max token) */ + else + bts_update_scen(BS_G3D_PERFORMANCE, 0); /* GPU IDQ : 0x3 (default 12ea) */ +#endif + } + + mutex_unlock(&platform->gpu_clock_lock); + + GPU_LOG(DVFS_DEBUG, DUMMY, 0u, 0u, "clk[%d -> %d], vol[%d (margin : %d)]\n", + prev_clk, target_clk, gpu_get_cur_voltage(platform), platform->voltage_margin); + + return ret; +} + +#ifdef CONFIG_MALI_DVFS +int gpu_set_target_clk_vol_pending(int clk) +{ + int ret = 0, target_clk = 0; + int prev_clk = 0; + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + target_clk = gpu_check_target_clock(platform, clk); + if (target_clk < 0) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, + "%s: mismatch clock error (source %d, target %d)\n", __func__, clk, target_clk); + return -1; + } + +#ifdef CONFIG_MALI_RT_PM + if (platform->exynos_pm_domain) { + mutex_lock(&platform->exynos_pm_domain->access_lock); + if (!platform->dvs_is_enabled && gpu_is_power_on()) + prev_clk = gpu_get_cur_clock(platform); + mutex_unlock(&platform->exynos_pm_domain->access_lock); + } +#endif + + gpu_control_set_dvfs(kbdev, target_clk); + ret = gpu_update_cur_level(platform); + + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "pending clk[%d -> %d], vol[%d (margin : %d)]\n", + prev_clk, target_clk, gpu_get_cur_voltage(platform), platform->voltage_margin); + + return ret; +} + +int gpu_dvfs_boost_lock(gpu_dvfs_boost_command boost_command) +{ + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + if (!platform->dvfs_status) + return 0; + + if ((boost_command < GPU_DVFS_BOOST_SET) || (boost_command > GPU_DVFS_BOOST_END)) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid boost command is called (%d)\n", __func__, boost_command); + return -1; + } + + switch (boost_command) { + case GPU_DVFS_BOOST_SET: + platform->boost_is_enabled = true; + if (platform->boost_gpu_min_lock) + gpu_dvfs_clock_lock(GPU_DVFS_MIN_LOCK, BOOST_LOCK, platform->boost_gpu_min_lock); +#ifdef CONFIG_MALI_PM_QOS + if (platform->boost_egl_min_lock) + gpu_pm_qos_command(platform, GPU_CONTROL_PM_QOS_EGL_SET); +#endif /* CONFIG_MALI_PM_QOS */ + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "%s: boost mode is enabled (CPU: %d, GPU %d)\n", + __func__, platform->boost_egl_min_lock, platform->boost_gpu_min_lock); + break; + case GPU_DVFS_BOOST_UNSET: + platform->boost_is_enabled = false; + if (platform->boost_gpu_min_lock) + gpu_dvfs_clock_lock(GPU_DVFS_MIN_UNLOCK, BOOST_LOCK, 0); +#ifdef CONFIG_MALI_PM_QOS + if (platform->boost_egl_min_lock) + gpu_pm_qos_command(platform, GPU_CONTROL_PM_QOS_EGL_RESET); +#endif /* CONFIG_MALI_PM_QOS */ + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "%s: boost mode is disabled (CPU: %d, GPU %d)\n", + __func__, platform->boost_egl_min_lock, platform->boost_gpu_min_lock); + break; + case GPU_DVFS_BOOST_GPU_UNSET: + if (platform->boost_gpu_min_lock) + gpu_dvfs_clock_lock(GPU_DVFS_MIN_UNLOCK, BOOST_LOCK, 0); + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "%s: boost mode is disabled (GPU %d)\n", + __func__, platform->boost_gpu_min_lock); + break; + default: + break; + } + + return 0; +} + +int gpu_dvfs_clock_lock(gpu_dvfs_lock_command lock_command, gpu_dvfs_lock_type lock_type, int clock) +{ + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + int i; + bool dirty = false; + unsigned long flags; + + DVFS_ASSERT(platform); + + if (!platform->dvfs_status) + return 0; + + if ((lock_type < TMU_LOCK) || (lock_type >= NUMBER_LOCK)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: invalid lock type is called (%d)\n", __func__, lock_type); + return -1; + } + + switch (lock_command) { + case GPU_DVFS_MAX_LOCK: + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + if (gpu_dvfs_get_level(clock) < 0) { + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "max lock error: invalid clock value %d\n", clock); + return -1; + } + + platform->user_max_lock[lock_type] = clock; + platform->max_lock = clock; + + if (platform->max_lock > 0) { + for (i = 0; i < NUMBER_LOCK; i++) { + if (platform->user_max_lock[i] > 0) + platform->max_lock = MIN(platform->max_lock, platform->user_max_lock[i]); + } + } else { + platform->max_lock = clock; + } + + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + if ((platform->max_lock > 0) && (platform->cur_clock >= platform->max_lock)) + gpu_set_target_clk_vol(platform->max_lock, false); + + GPU_LOG(DVFS_DEBUG, LSI_GPU_MAX_LOCK, lock_type, clock, + "lock max clk[%d], user lock[%d], current clk[%d]\n", + platform->max_lock, platform->user_max_lock[lock_type], platform->cur_clock); + break; + case GPU_DVFS_MIN_LOCK: + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + if (gpu_dvfs_get_level(clock) < 0) { + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "min lock error: invalid clock value %d\n", clock); + return -1; + } + + platform->user_min_lock[lock_type] = clock; + platform->min_lock = clock; + + if (platform->min_lock > 0) { + for (i = 0; i < NUMBER_LOCK; i++) { + if (platform->user_min_lock[i] > 0) + platform->min_lock = MAX(platform->min_lock, platform->user_min_lock[i]); + } + } else { + platform->min_lock = clock; + } + + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + if ((platform->min_lock > 0) && (platform->cur_clock < platform->min_lock) + && (platform->min_lock <= platform->max_lock)) + gpu_set_target_clk_vol(platform->min_lock, false); + + GPU_LOG(DVFS_DEBUG, LSI_GPU_MIN_LOCK, lock_type, clock, + "lock min clk[%d], user lock[%d], current clk[%d]\n", + platform->min_lock, platform->user_min_lock[lock_type], platform->cur_clock); + break; + case GPU_DVFS_MAX_UNLOCK: + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + + platform->user_max_lock[lock_type] = 0; + platform->max_lock = platform->gpu_max_clock; + + for (i = 0; i < NUMBER_LOCK; i++) { + if (platform->user_max_lock[i] > 0) { + dirty = true; + platform->max_lock = MIN(platform->user_max_lock[i], platform->max_lock); + } + } + + if (!dirty) + platform->max_lock = 0; + + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + GPU_LOG(DVFS_DEBUG, LSI_GPU_MAX_LOCK, lock_type, clock, "unlock max clk\n"); + break; + case GPU_DVFS_MIN_UNLOCK: + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + + platform->user_min_lock[lock_type] = 0; + platform->min_lock = platform->gpu_min_clock; + + for (i = 0; i < NUMBER_LOCK; i++) { + if (platform->user_min_lock[i] > 0) { + dirty = true; + platform->min_lock = MAX(platform->user_min_lock[i], platform->min_lock); + } + } + + if (!dirty) + platform->min_lock = 0; + + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + GPU_LOG(DVFS_DEBUG, LSI_GPU_MIN_LOCK, lock_type, clock, "unlock min clk\n"); + break; + default: + break; + } + + return 0; +} + +void gpu_dvfs_timer_control(bool enable) +{ + unsigned long flags; + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + if (!platform->dvfs_status) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: DVFS is disabled\n", __func__); + return; + } + if (kbdev->pm.backend.metrics.timer_active && !enable) { + cancel_delayed_work(platform->delayed_work); + flush_workqueue(platform->dvfs_wq); + } else if (!kbdev->pm.backend.metrics.timer_active && enable) { + queue_delayed_work_on(0, platform->dvfs_wq, + platform->delayed_work, msecs_to_jiffies(platform->polling_speed)); + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + platform->down_requirement = platform->table[platform->step].down_staycount; + platform->interactive.delay_count = 0; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + } + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbdev->pm.backend.metrics.timer_active = enable; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} + +int gpu_dvfs_on_off(bool enable) +{ + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *)kbdev->platform_context; + + DVFS_ASSERT(platform); + + if (enable && !platform->dvfs_status) { + mutex_lock(&platform->gpu_dvfs_handler_lock); + gpu_set_target_clk_vol(platform->cur_clock, false); + gpu_dvfs_handler_init(kbdev); + mutex_unlock(&platform->gpu_dvfs_handler_lock); + + gpu_dvfs_timer_control(true); + } else if (!enable && platform->dvfs_status) { + gpu_dvfs_timer_control(false); + + mutex_lock(&platform->gpu_dvfs_handler_lock); + gpu_dvfs_handler_deinit(kbdev); + gpu_set_target_clk_vol(platform->gpu_dvfs_config_clock, false); + mutex_unlock(&platform->gpu_dvfs_handler_lock); + } else { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: impossible state to change dvfs status (current: %d, request: %d)\n", + __func__, platform->dvfs_status, enable); + return -1; + } + + return 0; +} + +int gpu_dvfs_governor_change(int governor_type) +{ + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + mutex_lock(&platform->gpu_dvfs_handler_lock); + gpu_dvfs_governor_setting(platform, governor_type); + mutex_unlock(&platform->gpu_dvfs_handler_lock); + + return 0; +} +#endif /* CONFIG_MALI_DVFS */ + +int gpu_dvfs_init_time_in_state(void) +{ +#ifdef CONFIG_MALI_DEBUG_SYS + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + int i; + + DVFS_ASSERT(platform); + + for (i = gpu_dvfs_get_level(platform->gpu_max_clock); i <= gpu_dvfs_get_level(platform->gpu_min_clock); i++) + platform->table[i].time = 0; +#endif /* CONFIG_MALI_DEBUG_SYS */ + + return 0; +} + +int gpu_dvfs_update_time_in_state(int clock) +{ +#if defined(CONFIG_MALI_DEBUG_SYS) && defined(CONFIG_MALI_DVFS) + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + u64 current_time; + static u64 prev_time; + int level = gpu_dvfs_get_level(clock); + + DVFS_ASSERT(platform); + + if (prev_time == 0) + prev_time = get_jiffies_64(); + + current_time = get_jiffies_64(); + if ((level >= gpu_dvfs_get_level(platform->gpu_max_clock)) && (level <= gpu_dvfs_get_level(platform->gpu_min_clock))) + platform->table[level].time += current_time-prev_time; + + prev_time = current_time; +#endif /* CONFIG_MALI_DEBUG_SYS */ + + return 0; +} + +int gpu_dvfs_get_level(int clock) +{ + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + int i; + + DVFS_ASSERT(platform); + + if ((clock < platform->gpu_min_clock) || + (!platform->using_max_limit_clock && (clock > platform->gpu_max_clock)) || + (platform->using_max_limit_clock && (clock > platform->gpu_max_clock_limit))) + return -1; + + for (i = 0; i < platform->table_size; i++) { + if (platform->table[i].clock == clock) + return i; + } + + return -1; +} + +int gpu_dvfs_get_level_clock(int clock) +{ + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + int i, min, max; + + DVFS_ASSERT(platform); + + min = gpu_dvfs_get_level(platform->gpu_min_clock); + max = gpu_dvfs_get_level(platform->gpu_max_clock); + + for (i = max; i <= min; i++) + if (clock - (int)(platform->table[i].clock) >= 0) + return platform->table[i].clock; + + return -1; +} + +int gpu_dvfs_get_voltage(int clock) +{ + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + int i; + + DVFS_ASSERT(platform); + + for (i = 0; i < platform->table_size; i++) { + if (platform->table[i].clock == clock) + return platform->table[i].voltage; + } + + return -1; +} + +int gpu_dvfs_get_cur_asv_abb(void) +{ + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + if ((platform->step < 0) || (platform->step >= platform->table_size)) + return 0; + + return platform->table[platform->step].asv_abb; +} + +int gpu_dvfs_get_clock(int level) +{ + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + if ((level < 0) || (level >= platform->table_size)) + return -1; + + return platform->table[level].clock; +} + +int gpu_dvfs_get_step(void) +{ + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + return platform->table_size; +} + +int gpu_dvfs_get_cur_clock(void) +{ + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + int clock = 0; + + DVFS_ASSERT(platform); +#ifdef CONFIG_MALI_RT_PM + if (platform->exynos_pm_domain) { + mutex_lock(&platform->exynos_pm_domain->access_lock); + if (!platform->dvs_is_enabled && gpu_is_power_on()) + clock = gpu_get_cur_clock(platform); + mutex_unlock(&platform->exynos_pm_domain->access_lock); + } +#else + if (gpu_control_is_power_on(pkbdev) == 1) { + mutex_lock(&platform->gpu_clock_lock); + + if (platform->dvs_is_enabled || (platform->inter_frame_pm_status && !platform->inter_frame_pm_is_poweron)) { + mutex_unlock(&platform->gpu_clock_lock); + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, + "%s: can't get dvfs cur clock\n", __func__); + return 0; + } + clock = gpu_get_cur_clock(platform); + mutex_unlock(&platform->gpu_clock_lock); + } +#endif + + return clock; +} + +int gpu_dvfs_get_utilization(void) +{ + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + int util = 0; + + DVFS_ASSERT(platform); + + if (gpu_control_is_power_on(pkbdev) == 1) + util = platform->env_data.utilization; + + return util; +} + +int gpu_dvfs_get_max_freq(void) +{ + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + return platform->gpu_max_clock; +} diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_governor.c b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_governor.c new file mode 100644 index 000000000000..ffaf2fd91c2a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_governor.c @@ -0,0 +1,377 @@ +/* drivers/gpu/arm/.../platform/gpu_dvfs_governor.c + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DVFS driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_dvfs_governor.c + * DVFS + */ + +#include + +#include "mali_kbase_platform.h" +#include "gpu_dvfs_handler.h" +#include "gpu_dvfs_governor.h" +#ifdef CONFIG_CPU_THERMAL_IPA +#include "gpu_ipa.h" +#endif /* CONFIG_CPU_THERMAL_IPA */ + +#ifdef CONFIG_MALI_DVFS +typedef int (*GET_NEXT_LEVEL)(struct exynos_context *platform, int utilization); +GET_NEXT_LEVEL gpu_dvfs_get_next_level; + +static int gpu_dvfs_governor_default(struct exynos_context *platform, int utilization); +static int gpu_dvfs_governor_interactive(struct exynos_context *platform, int utilization); +static int gpu_dvfs_governor_static(struct exynos_context *platform, int utilization); +static int gpu_dvfs_governor_booster(struct exynos_context *platform, int utilization); +static int gpu_dvfs_governor_dynamic(struct exynos_context *platform, int utilization); + +static gpu_dvfs_governor_info governor_info[G3D_MAX_GOVERNOR_NUM] = { + { + G3D_DVFS_GOVERNOR_DEFAULT, + "Default", + gpu_dvfs_governor_default, + NULL + }, + { + G3D_DVFS_GOVERNOR_INTERACTIVE, + "Interactive", + gpu_dvfs_governor_interactive, + NULL + }, + { + G3D_DVFS_GOVERNOR_STATIC, + "Static", + gpu_dvfs_governor_static, + NULL + }, + { + G3D_DVFS_GOVERNOR_BOOSTER, + "Booster", + gpu_dvfs_governor_booster, + NULL + }, + { + G3D_DVFS_GOVERNOR_DYNAMIC, + "Dynamic", + gpu_dvfs_governor_dynamic, + NULL + }, +}; + +void gpu_dvfs_update_start_clk(int governor_type, int clk) +{ + governor_info[governor_type].start_clk = clk; +} + +void gpu_dvfs_update_table(int governor_type, gpu_dvfs_info *table) +{ + governor_info[governor_type].table = table; +} + +void gpu_dvfs_update_table_size(int governor_type, int size) +{ + governor_info[governor_type].table_size = size; +} + +void *gpu_dvfs_get_governor_info(void) +{ + return &governor_info; +} + +static int gpu_dvfs_governor_default(struct exynos_context *platform, int utilization) +{ + DVFS_ASSERT(platform); + + if ((platform->step > gpu_dvfs_get_level(platform->gpu_max_clock)) && + (utilization > platform->table[platform->step].max_threshold)) { + platform->step--; + if (platform->table[platform->step].clock > platform->gpu_max_clock_limit) + platform->step = gpu_dvfs_get_level(platform->gpu_max_clock_limit); + platform->down_requirement = platform->table[platform->step].down_staycount; + } else if ((platform->step < gpu_dvfs_get_level(platform->gpu_min_clock)) && (utilization < platform->table[platform->step].min_threshold)) { + platform->down_requirement--; + if (platform->down_requirement == 0) { + platform->step++; + platform->down_requirement = platform->table[platform->step].down_staycount; + } + } else { + platform->down_requirement = platform->table[platform->step].down_staycount; + } + DVFS_ASSERT((platform->step >= gpu_dvfs_get_level(platform->gpu_max_clock)) + && (platform->step <= gpu_dvfs_get_level(platform->gpu_min_clock))); + + return 0; +} + +static int gpu_dvfs_governor_interactive(struct exynos_context *platform, int utilization) +{ + DVFS_ASSERT(platform); + + if ((platform->step > gpu_dvfs_get_level(platform->gpu_max_clock) || + (platform->using_max_limit_clock && platform->step > gpu_dvfs_get_level(platform->gpu_max_clock_limit))) + && (utilization > platform->table[platform->step].max_threshold)) { + int highspeed_level = gpu_dvfs_get_level(platform->interactive.highspeed_clock); + if ((highspeed_level > 0) && (platform->step > highspeed_level) + && (utilization > platform->interactive.highspeed_load)) { + if (platform->interactive.delay_count == platform->interactive.highspeed_delay) { + platform->step = highspeed_level; + platform->interactive.delay_count = 0; + } else { + platform->interactive.delay_count++; + } + } else { + platform->step--; + platform->interactive.delay_count = 0; + } + if (platform->table[platform->step].clock > platform->gpu_max_clock_limit) + platform->step = gpu_dvfs_get_level(platform->gpu_max_clock_limit); + platform->down_requirement = platform->table[platform->step].down_staycount; + } else if ((platform->step < gpu_dvfs_get_level(platform->gpu_min_clock)) + && (utilization < platform->table[platform->step].min_threshold)) { + platform->interactive.delay_count = 0; + platform->down_requirement--; + if (platform->down_requirement == 0) { + platform->step++; + platform->down_requirement = platform->table[platform->step].down_staycount; + } + } else { + platform->interactive.delay_count = 0; + platform->down_requirement = platform->table[platform->step].down_staycount; + } + + DVFS_ASSERT(((platform->using_max_limit_clock && (platform->step >= gpu_dvfs_get_level(platform->gpu_max_clock_limit))) || + ((!platform->using_max_limit_clock && (platform->step >= gpu_dvfs_get_level(platform->gpu_max_clock))))) + && (platform->step <= gpu_dvfs_get_level(platform->gpu_min_clock))); + + return 0; +} + +#define G3D_GOVERNOR_STATIC_PERIOD 10 +static int gpu_dvfs_governor_static(struct exynos_context *platform, int utilization) +{ + static bool step_down = true; + static int count; + + DVFS_ASSERT(platform); + + if (count == G3D_GOVERNOR_STATIC_PERIOD) { + if (step_down) { + if (platform->step > gpu_dvfs_get_level(platform->gpu_max_clock)) + platform->step--; + if (((platform->max_lock > 0) && (platform->table[platform->step].clock == platform->max_lock)) + || (platform->step == gpu_dvfs_get_level(platform->gpu_max_clock))) + step_down = false; + } else { + if (platform->step < gpu_dvfs_get_level(platform->gpu_min_clock)) + platform->step++; + if (((platform->min_lock > 0) && (platform->table[platform->step].clock == platform->min_lock)) + || (platform->step == gpu_dvfs_get_level(platform->gpu_min_clock))) + step_down = true; + } + + count = 0; + } else { + count++; + } + + return 0; +} + +static int gpu_dvfs_governor_booster(struct exynos_context *platform, int utilization) +{ + static int weight; + int cur_weight, booster_threshold, dvfs_table_lock; + + DVFS_ASSERT(platform); + + cur_weight = platform->cur_clock*utilization; + /* booster_threshold = current clock * set the percentage of utilization */ + booster_threshold = platform->cur_clock * 50; + + dvfs_table_lock = gpu_dvfs_get_level(platform->gpu_max_clock); + + if ((platform->step >= dvfs_table_lock+2) && + ((cur_weight - weight) > booster_threshold)) { + platform->step -= 2; + platform->down_requirement = platform->table[platform->step].down_staycount; + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "Booster Governor: G3D level 2 step\n"); + } else if ((platform->step > gpu_dvfs_get_level(platform->gpu_max_clock)) && + (utilization > platform->table[platform->step].max_threshold)) { + platform->step--; + platform->down_requirement = platform->table[platform->step].down_staycount; + } else if ((platform->step < gpu_dvfs_get_level(platform->gpu_min_clock)) && + (utilization < platform->table[platform->step].min_threshold)) { + platform->down_requirement--; + if (platform->down_requirement == 0) { + platform->step++; + platform->down_requirement = platform->table[platform->step].down_staycount; + } + } else { + platform->down_requirement = platform->table[platform->step].down_staycount; + } + + DVFS_ASSERT((platform->step >= gpu_dvfs_get_level(platform->gpu_max_clock)) + && (platform->step <= gpu_dvfs_get_level(platform->gpu_min_clock))); + + weight = cur_weight; + + return 0; +} + +static int gpu_dvfs_governor_dynamic(struct exynos_context *platform, int utilization) +{ + int max_clock_lev = gpu_dvfs_get_level(platform->gpu_max_clock); + int min_clock_lev = gpu_dvfs_get_level(platform->gpu_min_clock); + + DVFS_ASSERT(platform); + + if ((platform->step > max_clock_lev) && (utilization > platform->table[platform->step].max_threshold)) { + if (platform->table[platform->step].clock * utilization > + platform->table[platform->step - 1].clock * platform->table[platform->step - 1].max_threshold) { + platform->step -= 2; + if (platform->step < max_clock_lev) { + platform->step = max_clock_lev; + } + } else { + platform->step--; + } + + if (platform->table[platform->step].clock > platform->gpu_max_clock_limit) + platform->step = gpu_dvfs_get_level(platform->gpu_max_clock_limit); + + platform->down_requirement = platform->table[platform->step].down_staycount; + } else if ((platform->step < min_clock_lev) && (utilization < platform->table[platform->step].min_threshold)) { + platform->down_requirement--; + if (platform->down_requirement == 0) + { + if (platform->table[platform->step].clock * utilization < + platform->table[platform->step + 1].clock * platform->table[platform->step + 1].min_threshold) { + platform->step += 2; + if (platform->step > min_clock_lev) { + platform->step = min_clock_lev; + } + } else { + platform->step++; + } + platform->down_requirement = platform->table[platform->step].down_staycount; + } + } else { + platform->down_requirement = platform->table[platform->step].down_staycount; + } + + DVFS_ASSERT(((platform->using_max_limit_clock && (platform->step >= gpu_dvfs_get_level(platform->gpu_max_clock_limit))) || + ((!platform->using_max_limit_clock && (platform->step >= gpu_dvfs_get_level(platform->gpu_max_clock))))) + && (platform->step <= gpu_dvfs_get_level(platform->gpu_min_clock))); + + return 0; +} + +static int gpu_dvfs_decide_next_governor(struct exynos_context *platform) +{ + return 0; +} + +void ipa_mali_dvfs_requested(unsigned int freq); +int gpu_dvfs_decide_next_freq(struct kbase_device *kbdev, int utilization) +{ + unsigned long flags; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + DVFS_ASSERT(platform); + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + gpu_dvfs_decide_next_governor(platform); + gpu_dvfs_get_next_level(platform, utilization); + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + +#ifdef CONFIG_MALI_SEC_CL_BOOST + if (kbdev->pm.backend.metrics.is_full_compute_util && platform->cl_boost_disable == false) + platform->step = gpu_dvfs_get_level(platform->gpu_max_clock); +#endif + +#ifdef CONFIG_CPU_THERMAL_IPA + ipa_mali_dvfs_requested(platform->table[platform->step].clock); +#endif /* CONFIG_CPU_THERMAL_IPA */ + + return platform->table[platform->step].clock; +} + +int gpu_dvfs_governor_setting(struct exynos_context *platform, int governor_type) +{ +#ifdef CONFIG_MALI_DVFS + int i; +#endif /* CONFIG_MALI_DVFS */ + unsigned long flags; + + DVFS_ASSERT(platform); + + if ((governor_type < 0) || (governor_type >= G3D_MAX_GOVERNOR_NUM)) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: invalid governor type (%d)\n", __func__, governor_type); + return -1; + } + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); +#ifdef CONFIG_MALI_DVFS + platform->table = governor_info[governor_type].table; + platform->table_size = governor_info[governor_type].table_size; + platform->step = gpu_dvfs_get_level(governor_info[governor_type].start_clk); + gpu_dvfs_get_next_level = (GET_NEXT_LEVEL)(governor_info[governor_type].governor); + + platform->env_data.utilization = 80; + platform->max_lock = 0; + platform->min_lock = 0; + + for (i = 0; i < NUMBER_LOCK; i++) { + platform->user_max_lock[i] = 0; + platform->user_min_lock[i] = 0; + } + + platform->down_requirement = 1; + platform->governor_type = governor_type; + + gpu_dvfs_init_time_in_state(); +#else /* CONFIG_MALI_DVFS */ + platform->table = (gpu_dvfs_info *)gpu_get_attrib_data(platform->attrib, GPU_GOVERNOR_TABLE_DEFAULT); + platform->table_size = (u32)gpu_get_attrib_data(platform->attrib, GPU_GOVERNOR_TABLE_SIZE_DEFAULT); + platform->step = gpu_dvfs_get_level(platform->gpu_dvfs_start_clock); +#endif /* CONFIG_MALI_DVFS */ + platform->cur_clock = platform->table[platform->step].clock; + + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + return 0; +} + +int gpu_dvfs_governor_init(struct kbase_device *kbdev) +{ + int governor_type = G3D_DVFS_GOVERNOR_DEFAULT; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + +#ifdef CONFIG_MALI_DVFS + governor_type = platform->governor_type; +#endif /* CONFIG_MALI_DVFS */ + if (gpu_dvfs_governor_setting(platform, governor_type) < 0) { + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "%s: fail to initialize governor\n", __func__); + return -1; + } + + /* share table_size among governors, as every single governor has same table_size. */ + platform->save_cpu_max_freq = kmalloc(sizeof(int) * platform->table_size, GFP_KERNEL); +#if defined(CONFIG_MALI_DVFS) && defined(CONFIG_CPU_THERMAL_IPA) + gpu_ipa_dvfs_calc_norm_utilisation(kbdev); +#endif /* CONFIG_MALI_DVFS && CONFIG_CPU_THERMAL_IPA */ + + return 0; +} + +#endif /* CONFIG_MALI_DVFS */ diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_governor.h b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_governor.h new file mode 100644 index 000000000000..d205b0978983 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_governor.h @@ -0,0 +1,38 @@ +/* drivers/gpu/arm/.../platform/gpu_dvfs_governor.h + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DVFS driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_dvfs_governor.h + * DVFS + */ + +#ifndef _GPU_DVFS_GOVERNOR_H_ +#define _GPU_DVFS_GOVERNOR_H_ + +typedef enum { + G3D_DVFS_GOVERNOR_DEFAULT = 0, + G3D_DVFS_GOVERNOR_INTERACTIVE, + G3D_DVFS_GOVERNOR_STATIC, + G3D_DVFS_GOVERNOR_BOOSTER, + G3D_DVFS_GOVERNOR_DYNAMIC, + G3D_MAX_GOVERNOR_NUM, +} gpu_governor_type; + +void gpu_dvfs_update_start_clk(int governor_type, int clk); +void gpu_dvfs_update_table(int governor_type, gpu_dvfs_info *table); +void gpu_dvfs_update_table_size(int governor_type, int size); +void *gpu_dvfs_get_governor_info(void); +int gpu_dvfs_decide_next_freq(struct kbase_device *kbdev, int utilization); +int gpu_dvfs_governor_setting(struct exynos_context *platform, int governor_type); +int gpu_dvfs_governor_init(struct kbase_device *kbdev); + +#endif /* _GPU_DVFS_GOVERNOR_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_handler.c b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_handler.c new file mode 100644 index 000000000000..d8789baf39ab --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_handler.c @@ -0,0 +1,100 @@ +/* drivers/gpu/arm/.../platform/gpu_dvfs_handler.c + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DVFS driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_dvfs_handler.c + * DVFS + */ + +#include + +#include "mali_kbase_platform.h" +#include "gpu_control.h" +#include "gpu_dvfs_handler.h" +#include "gpu_dvfs_governor.h" + +extern struct kbase_device *pkbdev; + +#ifdef CONFIG_MALI_DVFS +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) +{ + struct exynos_context *platform; + char *env[2] = {"FEATURE=GPUI", NULL}; + + platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + if(platform->fault_count >= 5 && platform->bigdata_uevent_is_sent == false) + { + platform->bigdata_uevent_is_sent = true; + kobject_uevent_env(&kbdev->dev->kobj, KOBJ_CHANGE, env); + } + + mutex_lock(&platform->gpu_dvfs_handler_lock); + if (gpu_control_is_power_on(kbdev)) { + int clk = 0; + gpu_dvfs_calculate_env_data(kbdev); + clk = gpu_dvfs_decide_next_freq(kbdev, platform->env_data.utilization); + gpu_set_target_clk_vol(clk, true); + } + mutex_unlock(&platform->gpu_dvfs_handler_lock); + + GPU_LOG(DVFS_DEBUG, DUMMY, 0u, 0u, "dvfs hanlder is called\n"); + + return 0; +} + +int gpu_dvfs_handler_init(struct kbase_device *kbdev) +{ + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + if (!platform->dvfs_status) + platform->dvfs_status = true; + + +#ifdef CONFIG_MALI_PM_QOS + gpu_pm_qos_command(platform, GPU_CONTROL_PM_QOS_INIT); +#endif /* CONFIG_MALI_PM_QOS */ + + gpu_set_target_clk_vol(platform->table[platform->step].clock, false); + + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "dvfs handler initialized\n"); + return 0; +} + +int gpu_dvfs_handler_deinit(struct kbase_device *kbdev) +{ + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + if (platform->dvfs_status) + platform->dvfs_status = false; + +#ifdef CONFIG_MALI_PM_QOS + gpu_pm_qos_command(platform, GPU_CONTROL_PM_QOS_DEINIT); +#endif /* CONFIG_MALI_PM_QOS */ + + + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "dvfs handler de-initialized\n"); + return 0; +} +#else +#define gpu_dvfs_event_proc(q) do { } while (0) +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) +{ + return 0; +} +#endif /* CONFIG_MALI_DVFS */ diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_handler.h b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_handler.h new file mode 100644 index 000000000000..f8b24af75aed --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_dvfs_handler.h @@ -0,0 +1,87 @@ +/* drivers/gpu/arm/.../platform/gpu_dvfs_handler.h + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DVFS driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_dvfs_handler.h + * DVFS + */ + +#ifndef _GPU_DVFS_HANDLER_H_ +#define _GPU_DVFS_HANDLER_H_ + +#define DVFS_ASSERT(x) \ +do { if (x) break; \ + printk(KERN_EMERG "### ASSERTION FAILED %s: %s: %d: %s\n", __FILE__, __func__, __LINE__, #x); dump_stack(); \ +} while (0) + +typedef enum { + GPU_DVFS_MAX_LOCK = 0, + GPU_DVFS_MIN_LOCK, + GPU_DVFS_MAX_UNLOCK, + GPU_DVFS_MIN_UNLOCK, +} gpu_dvfs_lock_command; + +typedef enum { + GPU_DVFS_BOOST_SET = 0, + GPU_DVFS_BOOST_UNSET, + GPU_DVFS_BOOST_GPU_UNSET, + GPU_DVFS_BOOST_END, +} gpu_dvfs_boost_command; + +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation); +int gpu_dvfs_handler_init(struct kbase_device *kbdev); +int gpu_dvfs_handler_deinit(struct kbase_device *kbdev); + +/* gpu_dvfs_api.c */ +int gpu_set_target_clk_vol(int clk, bool pending_is_allowed); +int gpu_set_target_clk_vol_pending(int clk); +int gpu_dvfs_boost_lock(gpu_dvfs_boost_command boost_command); +int gpu_dvfs_clock_lock(gpu_dvfs_lock_command lock_command, gpu_dvfs_lock_type lock_type, int clock); +void gpu_dvfs_timer_control(bool enable); +int gpu_dvfs_on_off(bool enable); +int gpu_dvfs_governor_change(int governor_type); +int gpu_dvfs_init_time_in_state(void); +int gpu_dvfs_update_time_in_state(int clock); +int gpu_dvfs_get_level(int clock); +int gpu_dvfs_get_level_clock(int clock); +int gpu_dvfs_get_voltage(int clock); +int gpu_dvfs_get_cur_asv_abb(void); +int gpu_dvfs_get_clock(int level); +int gpu_dvfs_get_step(void); +int gpu_dvfs_get_cur_clock(void); +int gpu_dvfs_get_utilization(void); +int gpu_dvfs_get_max_freq(void); + +int gpu_dvfs_decide_max_clock(struct exynos_context *platform); + +/* gpu_utilization */ +int gpu_dvfs_start_env_data_gathering(struct kbase_device *kbdev); +int gpu_dvfs_stop_env_data_gathering(struct kbase_device *kbdev); +int gpu_dvfs_reset_env_data(struct kbase_device *kbdev); +int gpu_dvfs_calculate_env_data(struct kbase_device *kbdev); +int gpu_dvfs_calculate_env_data_ppmu(struct kbase_device *kbdev); +int gpu_dvfs_utilization_init(struct kbase_device *kbdev); +int gpu_dvfs_utilization_deinit(struct kbase_device *kbdev); + +/* gpu_pmqos.c */ +typedef enum { + GPU_CONTROL_PM_QOS_INIT = 0, + GPU_CONTROL_PM_QOS_DEINIT, + GPU_CONTROL_PM_QOS_SET, + GPU_CONTROL_PM_QOS_RESET, + GPU_CONTROL_PM_QOS_EGL_SET, + GPU_CONTROL_PM_QOS_EGL_RESET, +} gpu_pmqos_state; + +int gpu_pm_qos_command(struct exynos_context *platform, gpu_pmqos_state state); +int gpu_mif_pmqos(struct exynos_context *platform, int mem_freq); +#endif /* _GPU_DVFS_HANDLER_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_integration_callbacks.c b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_integration_callbacks.c new file mode 100644 index 000000000000..2a60ac8c655a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_integration_callbacks.c @@ -0,0 +1,716 @@ +/* drivers/gpu/arm/.../platform/gpu_integration_callbacks.c + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DDK porting layer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_integration_callbacks.c + * DDK porting layer. + */ + +#include +//#include +#include + +#include +#include + +#include +#include + +#if MALI_SEC_PROBE_TEST != 1 +#include +#endif + +#if defined(CONFIG_SCHED_EMS) +#include +#if defined(CONFIG_SCHED_EMS_TUNE) +static struct emstune_mode_request emstune_req; +#else +static struct gb_qos_request gb_req = { + .name = "ems_boost", +}; +#endif +#elif defined(CONFIG_SCHED_EHMP) +#include +static struct gb_qos_request gb_req = { + .name = "ehmp_boost", +}; +#elif defined(CONFIG_SCHED_HMP) +extern int set_hmp_boost(int enable); +#endif + +/* MALI_SEC_INTEGRATION */ +#include +#define KBASE_REG_CUSTOM_TMEM (1ul << 19) +#define KBASE_REG_CUSTOM_PMEM (1ul << 20) + +#define ENTRY_TYPE_MASK 3ULL +#define ENTRY_IS_ATE 1ULL +#define ENTRY_IS_INVAL 2ULL +#define ENTRY_IS_PTE 3ULL + +#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ +#define ENTRY_RD_BIT (1ULL << 6) +#define ENTRY_WR_BIT (1ULL << 7) +#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ +#define ENTRY_ACCESS_BIT (1ULL << 10) +#define ENTRY_NX_BIT (1ULL << 54) + +#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \ + ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT) + +/* +* peak_flops: 100/85 +* sobel: 100/50 +*/ +#define COMPUTE_JOB_WEIGHT (10000/50) + +#ifdef CONFIG_SENSORS_SEC_THERMISTOR +extern int sec_therm_get_ap_temperature(void); +#endif + +#ifdef CONFIG_MALI_SEC_VK_BOOST +#include +extern struct pm_qos_request exynos5_g3d_mif_min_qos; +#endif + +extern int gpu_register_dump(void); + +void gpu_create_context(void *ctx) +{ +#if MALI_SEC_PROBE_TEST != 1 + struct kbase_context *kctx; + char current_name[sizeof(current->comm)]; + + kctx = (struct kbase_context *)ctx; + KBASE_DEBUG_ASSERT(kctx != NULL); + + kctx->ctx_status = CTX_UNINITIALIZED; + + get_task_comm(current_name, current); + strncpy((char *)(&kctx->name), current_name, CTX_NAME_SIZE); + + kctx->ctx_status = CTX_INITIALIZED; + + kctx->destroying_context = false; + + kctx->need_to_force_schedule_out = false; +#endif +} + +void gpu_destroy_context(void *ctx) +{ +#if MALI_SEC_PROBE_TEST != 1 + struct kbase_context *kctx; + struct kbase_device *kbdev; +#if (defined(CONFIG_SCHED_EMS) || defined(CONFIG_SCHED_EHMP) || defined(CONFIG_SCHED_HMP) || defined(CONFIG_MALI_SEC_VK_BOOST)) + struct exynos_context *platform; +#endif + + kctx = (struct kbase_context *)ctx; + KBASE_DEBUG_ASSERT(kctx != NULL); + + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + kctx->destroying_context = true; + + kctx->ctx_status = CTX_DESTROYED; + +#ifdef CONFIG_MALI_DVFS + gpu_dvfs_boost_lock(GPU_DVFS_BOOST_UNSET); +#endif +#if defined(CONFIG_SCHED_EMS) || defined(CONFIG_SCHED_EHMP) + platform = (struct exynos_context *) kbdev->platform_context; + mutex_lock(&platform->gpu_sched_hmp_lock); + if (platform->ctx_need_qos) + { + platform->ctx_need_qos = false; +#if defined(CONFIG_SCHED_EMS_TUNE) + emstune_boost(&emstune_req, 0); +#else + gb_qos_update_request(&gb_req, 0); +#endif + } + + mutex_unlock(&platform->gpu_sched_hmp_lock); +#elif defined(CONFIG_SCHED_HMP) + platform = (struct exynos_context *) kbdev->platform_context; + mutex_lock(&platform->gpu_sched_hmp_lock); + if (platform->ctx_need_qos) { + platform->ctx_need_qos = false; + set_hmp_boost(0); + set_hmp_aggressive_up_migration(false); + set_hmp_aggressive_yield(false); + } + mutex_unlock(&platform->gpu_sched_hmp_lock); +#endif +#ifdef CONFIG_MALI_SEC_VK_BOOST + platform = (struct exynos_context *) kbdev->platform_context; + mutex_lock(&platform->gpu_vk_boost_lock); + + if (kctx->ctx_vk_need_qos) { + pm_qos_update_request(&exynos5_g3d_mif_min_qos, platform->table[platform->step].mem_freq); + kctx->ctx_vk_need_qos = false; + platform->ctx_vk_need_qos = false; + } + + mutex_unlock(&platform->gpu_vk_boost_lock); +#endif +#ifdef CONFIG_MALI_SEC_CL_BOOST + platform->cl_boost_disable = false; +#endif +#endif /* MALI_SEC_PROBE_TEST */ +} + +int gpu_vendor_dispatch(struct kbase_context *kctx, u32 flags) +{ + struct kbase_device *kbdev; + + kbdev = kctx->kbdev; + + switch (flags) + { +#if MALI_SEC_PROBE_TEST != 1 + case KBASE_FUNC_STEP_UP_MAX_GPU_LIMIT: + { +#ifdef CONFIG_MALI_DVFS + struct exynos_context *platform = (struct exynos_context *)kbdev->platform_context; + + if (!platform->using_max_limit_clock) { + platform->using_max_limit_clock = true; + } +#endif + break; + } + case KBASE_FUNC_RESTORE_MAX_GPU_LIMIT: + { +#ifdef CONFIG_MALI_DVFS + struct exynos_context *platform = (struct exynos_context *)kbdev->platform_context; + + if (platform->using_max_limit_clock) { + platform->using_max_limit_clock = false; + } +#endif + break; + } + case KBASE_FUNC_SET_MIN_LOCK: + { +#if defined(CONFIG_MALI_PM_QOS) + struct exynos_context *platform; + platform = (struct exynos_context *) kbdev->platform_context; +#if (defined(CONFIG_SCHED_EMS) || defined(CONFIG_SCHED_EHMP)) + mutex_lock(&platform->gpu_sched_hmp_lock); + if (!platform->ctx_need_qos) { + platform->ctx_need_qos = true; + /* set hmp boost */ +#if defined(CONFIG_SCHED_EMS_TUNE) + emstune_boost(&emstune_req, 1); +#else + gb_qos_update_request(&gb_req, 100); +#endif + } + mutex_unlock(&platform->gpu_sched_hmp_lock); + gpu_pm_qos_command(platform, GPU_CONTROL_PM_QOS_EGL_SET); +#elif defined(CONFIG_SCHED_HMP) + mutex_lock(&platform->gpu_sched_hmp_lock); + if (!platform->ctx_need_qos) { + platform->ctx_need_qos = true; + /* set hmp boost */ + set_hmp_boost(1); + set_hmp_aggressive_up_migration(true); + set_hmp_aggressive_yield(true); + } + mutex_unlock(&platform->gpu_sched_hmp_lock); + gpu_pm_qos_command(platform, GPU_CONTROL_PM_QOS_EGL_SET); +#endif +#endif /* CONFIG_MALI_PM_QOS */ + break; + } + + case KBASE_FUNC_UNSET_MIN_LOCK: + { +#if defined(CONFIG_MALI_PM_QOS) + struct exynos_context *platform; + platform = (struct exynos_context *) kbdev->platform_context; +#if (defined(CONFIG_SCHED_EMS) || defined(CONFIG_SCHED_EHMP)) + mutex_lock(&platform->gpu_sched_hmp_lock); + if (platform->ctx_need_qos) { + platform->ctx_need_qos = false; + /* unset hmp boost */ +#if defined(CONFIG_SCHED_EMS_TUNE) + emstune_boost(&emstune_req, 0); +#else + gb_qos_update_request(&gb_req, 0); +#endif + } + mutex_unlock(&platform->gpu_sched_hmp_lock); + gpu_pm_qos_command(platform, GPU_CONTROL_PM_QOS_EGL_RESET); +#elif defined(CONFIG_SCHED_HMP) + mutex_lock(&platform->gpu_sched_hmp_lock); + if (platform->ctx_need_qos) { + platform->ctx_need_qos = false; + /* unset hmp boost */ + set_hmp_boost(0); + set_hmp_aggressive_up_migration(false); + set_hmp_aggressive_yield(false); + } + mutex_unlock(&platform->gpu_sched_hmp_lock); + gpu_pm_qos_command(platform, GPU_CONTROL_PM_QOS_EGL_RESET); +#endif +#endif /* CONFIG_MALI_PM_QOS */ + break; + } + case KBASE_FUNC_SET_VK_BOOST_LOCK: + { +#if defined(CONFIG_MALI_PM_QOS) && defined(CONFIG_MALI_SEC_VK_BOOST) + struct exynos_context *platform; + platform = (struct exynos_context *) kbdev->platform_context; + + mutex_lock(&platform->gpu_vk_boost_lock); + + if (!kctx->ctx_vk_need_qos) { + kctx->ctx_vk_need_qos = true; + platform->ctx_vk_need_qos = true; + } + + if (platform->ctx_vk_need_qos == true && platform->max_lock == platform->gpu_vk_boost_max_clk_lock) { + pm_qos_update_request(&exynos5_g3d_mif_min_qos, platform->gpu_vk_boost_mif_min_clk_lock); + } + + mutex_unlock(&platform->gpu_vk_boost_lock); +#endif + break; + } + case KBASE_FUNC_UNSET_VK_BOOST_LOCK: + { +#if defined(CONFIG_MALI_PM_QOS) && defined(CONFIG_MALI_SEC_VK_BOOST) + struct exynos_context *platform; + platform = (struct exynos_context *) kbdev->platform_context; + + mutex_lock(&platform->gpu_vk_boost_lock); + + if (kctx->ctx_vk_need_qos) { + kctx->ctx_vk_need_qos = false; + platform->ctx_vk_need_qos = false; + pm_qos_update_request(&exynos5_g3d_mif_min_qos, platform->table[platform->step].mem_freq); + } + + mutex_unlock(&platform->gpu_vk_boost_lock); +#endif + break; + } +#endif /* MALI_SEC_PROBE_TEST */ + default: + break; + } + return 0; +} + +int gpu_memory_seq_show(struct seq_file *sfile, void *data) +{ + return 0; +} + +void gpu_update_status(void *dev, char *str, u32 val) +{ + struct kbase_device *kbdev; + struct exynos_context *platform; + int i; + + kbdev = (struct kbase_device *)dev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + platform = (struct exynos_context *) kbdev->platform_context; + if (strcmp(str, "completion_code") == 0) { + if (val == 0x40) + platform->gpu_exception_count[GPU_JOB_CONFIG_FAULT]++; + else if (val == 0x41) + platform->gpu_exception_count[GPU_JOB_POWER_FAULT]++; + else if (val == 0x42) + platform->gpu_exception_count[GPU_JOB_READ_FAULT]++; + else if (val == 0x43) + platform->gpu_exception_count[GPU_JOB_WRITE_FAULT]++; + else if (val == 0x44) + platform->gpu_exception_count[GPU_JOB_AFFINITY_FAULT]++; + else if (val == 0x48) + platform->gpu_exception_count[GPU_JOB_BUS_FAULT]++; + else if (val == 0x58) + platform->gpu_exception_count[GPU_DATA_INVALIDATE_FAULT]++; + else if (val == 0x59) + platform->gpu_exception_count[GPU_TILE_RANGE_FAULT]++; + else if (val == 0x60) + platform->gpu_exception_count[GPU_OUT_OF_MEMORY_FAULT]++; + /* GPU FAULT */ + else if (val == 0x80) + platform->gpu_exception_count[GPU_DELAYED_BUS_FAULT]++; + else if (val == 0x88) + platform->gpu_exception_count[GPU_SHAREABILITY_FAULT]++; + /* MMU FAULT */ + else if (val >= 0xC0 && val <= 0xC7) + platform->gpu_exception_count[GPU_MMU_TRANSLATION_FAULT]++; + else if (val >= 0xC8 && val <= 0xCF) + platform->gpu_exception_count[GPU_MMU_PERMISSION_FAULT]++; + else if (val >= 0xD0 && val <= 0xD7) + platform->gpu_exception_count[GPU_MMU_TRANSTAB_BUS_FAULT]++; + else if (val >= 0xD8 && val <= 0xDF) + platform->gpu_exception_count[GPU_MMU_ACCESS_FLAG_FAULT]++; + else if (val >= 0xE0 && val <= 0xE7) + platform->gpu_exception_count[GPU_MMU_ADDRESS_SIZE_FAULT]++; + else if (val >= 0xE8 && val <= 0xEF) + platform->gpu_exception_count[GPU_MMU_MEMORY_ATTRIBUTES_FAULT]++; + else + platform->gpu_exception_count[GPU_UNKNOWN]++; + } else if (strcmp(str, "soft_stop") == 0) + platform->gpu_exception_count[GPU_SOFT_STOP]++; + else if (strcmp(str, "hard_stop") == 0) + platform->gpu_exception_count[GPU_HARD_STOP]++; + else if (strcmp(str, "reset_count") == 0) + platform->gpu_exception_count[GPU_RESET]++; + + for (i = GPU_JOB_CONFIG_FAULT; i < GPU_EXCEPTION_LIST_END; i++) + platform->fault_count += platform->gpu_exception_count[i]; +} + +#define KBASE_MMU_PAGE_ENTRIES 512 + +static phys_addr_t mmu_pte_to_phy_addr(u64 entry) +{ + if (!(entry & 1)) + return 0; + + return entry & ~0xFFF; +} + +/* MALI_SEC_INTEGRATION */ +static void gpu_page_table_info_dp_level(struct kbase_context *kctx, u64 vaddr, phys_addr_t pgd, int level) +{ + u64 *pgd_page; + int i; + int index = (vaddr >> (12 + ((3 - level) * 9))) & 0x1FF; + int min_index = index - 3; + int max_index = index + 3; + + if (min_index < 0) + min_index = 0; + if (max_index >= KBASE_MMU_PAGE_ENTRIES) + max_index = KBASE_MMU_PAGE_ENTRIES - 1; + + /* Map and dump entire page */ + + pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); + + dev_err(kctx->kbdev->dev, "Dumping level %d @ physical address 0x%016llX (matching index %d):\n", level, pgd, index); + + if (!pgd_page) { + dev_err(kctx->kbdev->dev, "kmap failure\n"); + return; + } + + for (i = min_index; i <= max_index; i++) { + if (i == index) { + dev_err(kctx->kbdev->dev, "[%03d]: 0x%016llX *\n", i, pgd_page[i]); + } else { + dev_err(kctx->kbdev->dev, "[%03d]: 0x%016llX\n", i, pgd_page[i]); + } + } + + /* parse next level (if any) */ + + if ((pgd_page[index] & 3) == ENTRY_IS_PTE) { + phys_addr_t target_pgd = mmu_pte_to_phy_addr(pgd_page[index]); + gpu_page_table_info_dp_level(kctx, vaddr, target_pgd, level + 1); + } else if ((pgd_page[index] & 3) == ENTRY_IS_ATE) { + dev_err(kctx->kbdev->dev, "Final physical address: 0x%016llX\n", pgd_page[index] & ~(0xFFF | ENTRY_FLAGS_MASK)); + } else { + dev_err(kctx->kbdev->dev, "Final physical address: INVALID!\n"); + } + + kunmap(pfn_to_page(PFN_DOWN(pgd))); +} + +void gpu_debug_pagetable_info(void *ctx, u64 vaddr) +{ + struct kbase_context *kctx; + + kctx = (struct kbase_context *)ctx; + KBASE_DEBUG_ASSERT(kctx != NULL); + + dev_err(kctx->kbdev->dev, "Looking up virtual GPU address: 0x%016llX\n", vaddr); + gpu_page_table_info_dp_level(kctx, vaddr, kctx->mmu.pgd, 0); +} + +#ifdef CONFIG_MALI_SEC_CL_BOOST +void gpu_cl_boost_init(void *dev) +{ + struct kbase_device *kbdev; + + kbdev = (struct kbase_device *)dev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + atomic_set(&kbdev->pm.backend.metrics.time_compute_jobs, 0); + atomic_set(&kbdev->pm.backend.metrics.time_vertex_jobs, 0); + atomic_set(&kbdev->pm.backend.metrics.time_fragment_jobs, 0); +} + +void gpu_cl_boost_update_utilization(void *dev, void *atom, u64 microseconds_spent) +{ + struct kbase_jd_atom *katom; + struct kbase_device *kbdev; + + kbdev = (struct kbase_device *)dev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + katom = (struct kbase_jd_atom *)atom; + KBASE_DEBUG_ASSERT(katom != NULL); + + if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) + atomic_add((microseconds_spent >> KBASE_PM_TIME_SHIFT), &kbdev->pm.backend.metrics.time_compute_jobs); + else if (katom->core_req & BASE_JD_REQ_FS) + atomic_add((microseconds_spent >> KBASE_PM_TIME_SHIFT), &kbdev->pm.backend.metrics.time_fragment_jobs); + else if (katom->core_req & BASE_JD_REQ_CS) + atomic_add((microseconds_spent >> KBASE_PM_TIME_SHIFT), &kbdev->pm.backend.metrics.time_vertex_jobs); +} +#endif + +#ifdef CONFIG_MALI_DVFS +static void dvfs_callback(struct work_struct *data) +{ + unsigned long flags; + struct kbasep_pm_metrics_state *metrics; + struct kbase_device *kbdev; + struct exynos_context *platform; + + KBASE_DEBUG_ASSERT(data != NULL); + + metrics = container_of(data, struct kbasep_pm_metrics_state, work.work); + + kbdev = metrics->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + platform = (struct exynos_context *)kbdev->platform_context; + KBASE_DEBUG_ASSERT(platform != NULL); + + kbase_platform_dvfs_event(metrics->kbdev, 0); + + spin_lock_irqsave(&metrics->lock, flags); + +#ifdef CONFIG_MALI_RT_PM + if (metrics->timer_active) +#endif + queue_delayed_work_on(0, platform->dvfs_wq, + platform->delayed_work, msecs_to_jiffies(platform->polling_speed)); + + spin_unlock_irqrestore(&metrics->lock, flags); +} + +void gpu_pm_metrics_init(void *dev) +{ + struct kbase_device *kbdev; + struct exynos_context *platform; + + kbdev = (struct kbase_device *)dev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + platform = (struct exynos_context *)kbdev->platform_context; + KBASE_DEBUG_ASSERT(platform != NULL); + + INIT_DELAYED_WORK(&kbdev->pm.backend.metrics.work, dvfs_callback); + platform->dvfs_wq = create_workqueue("g3d_dvfs"); + platform->delayed_work = &kbdev->pm.backend.metrics.work; + + queue_delayed_work_on(0, platform->dvfs_wq, + platform->delayed_work, msecs_to_jiffies(platform->polling_speed)); +} + +void gpu_pm_metrics_term(void *dev) +{ + struct kbase_device *kbdev; + struct exynos_context *platform; + + kbdev = (struct kbase_device *)dev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + platform = (struct exynos_context *)kbdev->platform_context; + KBASE_DEBUG_ASSERT(platform != NULL); + + cancel_delayed_work(platform->delayed_work); + flush_workqueue(platform->dvfs_wq); + destroy_workqueue(platform->dvfs_wq); +} +#endif + +/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this function */ +#ifdef CONFIG_MALI_DVFS +int gpu_pm_get_dvfs_utilisation(struct kbase_device *kbdev, int *util_gl_share, int util_cl_share[2]) +{ + unsigned long flags; + int utilisation = 0; +#if !defined(CONFIG_MALI_SEC_CL_BOOST) + int busy; +#else + int compute_time = 0, vertex_time = 0, fragment_time = 0, total_time = 0, compute_time_rate = 0; +#endif + + ktime_t now = ktime_get(); + ktime_t diff; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); + + if (kbdev->pm.backend.metrics.gpu_active) { + u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + kbdev->pm.backend.metrics.values.time_busy += ns_time; + kbdev->pm.backend.metrics.values.busy_cl[0] += ns_time * kbdev->pm.backend.metrics.active_cl_ctx[0]; + kbdev->pm.backend.metrics.values.busy_cl[1] += ns_time * kbdev->pm.backend.metrics.active_cl_ctx[1]; + kbdev->pm.backend.metrics.time_period_start = now; + } else { + kbdev->pm.backend.metrics.values.time_idle += (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + kbdev->pm.backend.metrics.time_period_start = now; + } + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + if (kbdev->pm.backend.metrics.values.time_idle + kbdev->pm.backend.metrics.values.time_busy == 0) { + /* No data - so we return NOP */ + utilisation = -1; +#if !defined(CONFIG_MALI_SEC_CL_BOOST) + if (util_gl_share) + *util_gl_share = -1; + if (util_cl_share) { + util_cl_share[0] = -1; + util_cl_share[1] = -1; + } +#endif + goto out; + } + + utilisation = (100 * kbdev->pm.backend.metrics.values.time_busy) / + (kbdev->pm.backend.metrics.values.time_idle + + kbdev->pm.backend.metrics.values.time_busy); + +#if !defined(CONFIG_MALI_SEC_CL_BOOST) + busy = kbdev->pm.backend.metrics.values.busy_gl + + kbdev->pm.backend.metrics.values.busy_cl[0] + + kbdev->pm.backend.metrics.values.busy_cl[1]; + + if (busy != 0) { + if (util_gl_share) + *util_gl_share = + (100 * kbdev->pm.backend.metrics.values.busy_gl) / busy; + if (util_cl_share) { + util_cl_share[0] = + (100 * kbdev->pm.backend.metrics.values.busy_cl[0]) / busy; + util_cl_share[1] = + (100 * kbdev->pm.backend.metrics.values.busy_cl[1]) / busy; + } + } else { + if (util_gl_share) + *util_gl_share = -1; + if (util_cl_share) { + util_cl_share[0] = -1; + util_cl_share[1] = -1; + } + } +#endif + +#ifdef CONFIG_MALI_SEC_CL_BOOST + compute_time = atomic_read(&kbdev->pm.backend.metrics.time_compute_jobs); + vertex_time = atomic_read(&kbdev->pm.backend.metrics.time_vertex_jobs); + fragment_time = atomic_read(&kbdev->pm.backend.metrics.time_fragment_jobs); + total_time = compute_time + vertex_time + fragment_time; + + if (compute_time > 0 && total_time > 0) { + compute_time_rate = (100 * compute_time) / total_time; + if (compute_time_rate == 100) + kbdev->pm.backend.metrics.is_full_compute_util = true; + else + kbdev->pm.backend.metrics.is_full_compute_util = false; + } else + kbdev->pm.backend.metrics.is_full_compute_util = false; +#endif + out: + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbdev->pm.backend.metrics.values.time_idle = 0; + kbdev->pm.backend.metrics.values.time_busy = 0; +#ifdef CONFIG_MALI_SEC_CL_BOOST + atomic_set(&kbdev->pm.backend.metrics.time_compute_jobs, 0); + atomic_set(&kbdev->pm.backend.metrics.time_vertex_jobs, 0); + atomic_set(&kbdev->pm.backend.metrics.time_fragment_jobs, 0); +#else + kbdev->pm.backend.metrics.values.busy_cl[0] = 0; + kbdev->pm.backend.metrics.values.busy_cl[1] = 0; + kbdev->pm.backend.metrics.values.busy_gl = 0; +#endif + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + return utilisation; +} +#endif /* CONFIG_MALI_DVFS */ + +/* MALI_SEC_INTEGRATION */ +static bool gpu_mem_profile_check_kctx(void *ctx) +{ + bool found_element = false; +#if MALI_SEC_PROBE_TEST != 1 + struct kbase_device *kbdev; + struct kbase_context *kctx, *tmp; + + kbdev = gpu_get_device_structure(); + + list_for_each_entry_safe(kctx, tmp, &kbdev->kctx_list, kctx_list_link) { + if (kctx == (struct kbase_context *)ctx) + if (kctx->destroying_context == false) { + found_element = true; + break; + } + } +#endif + return found_element; +} + +struct kbase_vendor_callbacks exynos_callbacks = { + .create_context = gpu_create_context, + .destroy_context = gpu_destroy_context, +#ifdef CONFIG_MALI_SEC_CL_BOOST + .cl_boost_init = gpu_cl_boost_init, + .cl_boost_update_utilization = gpu_cl_boost_update_utilization, +#else + .cl_boost_init = NULL, + .cl_boost_update_utilization = NULL, +#endif +#if defined(CONFIG_SOC_EXYNOS7420) || defined(CONFIG_SOC_EXYNOS7890) + .init_hw = exynos_gpu_init_hw, +#else + .init_hw = NULL, +#endif +#ifdef CONFIG_MALI_DVFS + .pm_metrics_init = gpu_pm_metrics_init, + .pm_metrics_term = gpu_pm_metrics_term, +#else + .pm_metrics_init = NULL, + .pm_metrics_term = NULL, +#endif + .debug_pagetable_info = gpu_debug_pagetable_info, + .mem_profile_check_kctx = gpu_mem_profile_check_kctx, +#if MALI_SEC_PROBE_TEST != 1 + .register_dump = gpu_register_dump, + .update_status = gpu_update_status, +#else + .register_dump = NULL, + .update_status = NULL, +#endif +}; + +uintptr_t gpu_get_callbacks(void) +{ + return (uintptr_t)&exynos_callbacks; +} + diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_integration_defs.h b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_integration_defs.h new file mode 100644 index 000000000000..d0c2a24a8d8d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_integration_defs.h @@ -0,0 +1,62 @@ +/* drivers/gpu/arm/.../platform/gpu_integration_defs.h + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DDK porting layer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_integration_defs.h + * DDK porting layer. + */ + +#ifndef _SEC_INTEGRATION_H_ +#define _SEC_INTEGRATION_H_ + +#include +#include +#include "mali_kbase_platform.h" +#include "gpu_dvfs_handler.h" + +/* kctx initialized with zero from vzalloc, so initialized value required only */ +#define CTX_UNINITIALIZED 0x0 +#define CTX_INITIALIZED 0x1 +#define CTX_DESTROYED 0x2 +#define CTX_NAME_SIZE 32 + +/* MALI_SEC_INTEGRATION */ +#define KBASE_PM_TIME_SHIFT 8 + +/* MALI_SEC_INTEGRATION */ +#define MEM_FREE_LIMITS 16384 +#define MEM_FREE_DEFAULT 16384 + +uintptr_t gpu_get_callbacks(void); +int gpu_vendor_dispatch(struct kbase_context *kctx, u32 flags); +void gpu_cacheclean(struct kbase_device *kbdev); +void kbase_mem_free_list_cleanup(struct kbase_context *kctx); +void kbase_mem_set_max_size(struct kbase_context *kctx); +int gpu_memory_seq_show(struct seq_file *sfile, void *data); + +struct kbase_vendor_callbacks { + void (*create_context)(void *ctx); + void (*destroy_context)(void *ctx); + void (*pm_metrics_init)(void *dev); + void (*pm_metrics_term)(void *dev); + void (*cl_boost_init)(void *dev); + void (*cl_boost_update_utilization)(void *dev, void *atom, u64 microseconds_spent); + int (*get_core_mask)(void *dev); + int (*init_hw)(void *dev); + void (*debug_pagetable_info)(void *ctx, u64 vaddr); + void (*jd_done_worker)(void *dev); + void (*update_status)(void *dev, char *str, u32 val); + bool (*mem_profile_check_kctx)(void *ctx); + int (*register_dump)(void); +}; + +#endif /* _SEC_INTEGRATION_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_ipa.c b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_ipa.c new file mode 100644 index 000000000000..afbb6af15c9d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_ipa.c @@ -0,0 +1,271 @@ +/* drivers/gpu/arm/.../platform/gpu_ipa.c + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DVFS driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_ipa.c + * DVFS + */ + +#include +#include "mali_kbase_platform.h" + +#include "gpu_ipa.h" +#include "gpu_control.h" +#include "gpu_dvfs_handler.h" + +#define CREATE_TRACE_POINTS +#include "mali_power.h" +#undef CREATE_TRACE_POINTS + +extern struct kbase_device *pkbdev; + +#ifdef CONFIG_MALI_DVFS +static void gpu_ipa_trace_utilisation(struct kbase_device *kbdev) +{ + int utilisation; + int norm_utilisation; + int freq_for_norm; + + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context (0x%p) is not initialized\n", __func__, platform); + return ; + } + + /* Can expand this to only trace when utilisation changed, to avoid too + * much trace output and losing the part we're interested in */ + + utilisation = platform->env_data.utilization; + norm_utilisation = platform->norm_utilisation; + freq_for_norm = platform->freq_for_normalisation; + + trace_mali_utilization_stats(utilisation, norm_utilisation, freq_for_norm); +} + +static unsigned int gpu_ipa_dvfs_max_allowed_freq(struct kbase_device *kbdev) +{ + gpu_dvfs_info *dvfs_max_info; + int max_thermal_step = -1; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + int max_step; + + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context (0x%p) is not initialized\n", __func__, platform); + return 0xffffffff; + } + + max_step = gpu_dvfs_get_level(platform->gpu_max_clock); + + /* Account for Throttling Lock */ +#ifdef CONFIG_EXYNOS_THERMAL + max_thermal_step = gpu_dvfs_get_level(platform->gpu_max_clock); +#endif /* CONFIG_EXYNOS_THERMAL */ + if (max_thermal_step <= gpu_dvfs_get_level(platform->gpu_min_clock) && max_thermal_step > max_step) + max_step = max_thermal_step; + + /* NOTE: This is the absolute maximum, not taking into account any tmu + * throttling */ + dvfs_max_info = &(platform->table[max_step]); + return dvfs_max_info->clock; +} + +void gpu_ipa_dvfs_calc_norm_utilisation(struct kbase_device *kbdev) +{ + int cur_freq; + unsigned int cur_vol; + int max_freq; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + int cur_utilisation; + gpu_dvfs_info *dvfs_cur_info; + + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context (0x%p) is not initialized\n", __func__, platform); + return ; + } + + /* TODO: + * - Other callers of kbase_platform_dvfs_set_level() + */ + + /* Get Current Op point */ + /* This is before mali_dvfs_event_proc queued, so the dvfs 'step' is taken before we change frequency */ + cur_utilisation = platform->env_data.utilization; + dvfs_cur_info = &(platform->table[platform->step]); /* dvfs_status under spinlock */ + cur_freq = (int)dvfs_cur_info->clock; + + cur_vol = dvfs_cur_info->voltage/10000; + /* Get Max Op point */ + max_freq = gpu_ipa_dvfs_max_allowed_freq(kbdev); + + /* Calculate */ + platform->norm_utilisation = (cur_utilisation * cur_freq)/max_freq; + /* Store what frequency was used for normalization */ + platform->freq_for_normalisation = cur_freq; + platform->power = div_u64((u64)platform->ipa_power_coeff_gpu * cur_freq * cur_vol * cur_vol, 1000000); + /* adding an extra 0 for division in order to compensate for GPU coefficient unit change */ + + gpu_ipa_trace_utilisation(kbdev); +} + +int gpu_ipa_dvfs_get_norm_utilisation(struct kbase_device *kbdev) +{ + unsigned long flags; + int norm_utilisation = 0; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context (0x%p) is not initialized\n", __func__, platform); + return -1; + } + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + norm_utilisation = platform->norm_utilisation; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + return norm_utilisation; +} +#endif /* CONFIG_MALI_DVFS */ + +int kbase_platform_dvfs_freq_to_power(int freq) +{ +#ifdef CONFIG_MALI_DVFS + int level; + unsigned int vol; + unsigned long flags; + unsigned long long power; + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context (0x%p) is not initialized\n", __func__, platform); + return -1; + } + + if (0 == freq) { + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + power = platform->power; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + } else { + for (level = gpu_dvfs_get_level(platform->gpu_max_clock); level <= gpu_dvfs_get_level(platform->gpu_min_clock); level++) + if (platform->table[level].clock == freq) + break; + + if (level <= gpu_dvfs_get_level(platform->gpu_min_clock)) { + vol = platform->table[level].voltage / 10000; + power = div_u64((u64)platform->ipa_power_coeff_gpu * freq * vol * vol, 1000000); + } else { + power = 0; + } + } + + return (int)power; +#else + return 0; +#endif /* CONFIG_MALI_DVFS */ +} + +int kbase_platform_dvfs_power_to_freq(int power) +{ +#ifdef CONFIG_MALI_DVFS + int level, freq; + unsigned int vol; + u64 _power; + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context (0x%p) is not initialized\n", __func__, platform); + return -1; + } + + for (level = gpu_dvfs_get_level(platform->gpu_min_clock); level >= gpu_dvfs_get_level(platform->gpu_max_clock); level--) { + vol = platform->table[level].voltage / 10000; + freq = platform->table[level].clock; + _power = div_u64((u64)platform->ipa_power_coeff_gpu * freq * vol * vol, 1000000); + if ((int)_power >= power) + break; + } + + return platform->table[level].clock; +#else + return 0; +#endif /* CONFIG_MALI_DVFS */ +} + +/** + * Get a number of statsistics under the same lock, so they are all 'in sync' + */ +void gpu_ipa_dvfs_get_utilisation_stats(struct mali_debug_utilisation_stats *stats) +{ +#ifdef CONFIG_MALI_DVFS + unsigned long flags; + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *)kbdev->platform_context; + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context (0x%p) is not initialized\n", __func__, platform); + return ; + } + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + stats->s.utilisation = platform->env_data.utilization; + stats->s.norm_utilisation = platform->norm_utilisation; + stats->s.freq_for_norm = platform->freq_for_normalisation; + stats->time_busy = platform->time_busy; + stats->time_idle = platform->time_idle; + stats->time_tick = platform->time_tick; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); +#endif /* CONFIG_MALI_DVFS */ +} + +int gpu_ipa_dvfs_max_lock(int clock) +{ +#ifdef CONFIG_MALI_DVFS + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *)kbdev->platform_context; + + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context (0x%p) is not initialized\n", __func__, platform); + return -ENODEV; + } + + gpu_dvfs_clock_lock(GPU_DVFS_MAX_LOCK, IPA_LOCK, clock); +#endif /* CONFIG_MALI_DVFS */ + return 0; +} + +int gpu_ipa_dvfs_max_unlock(void) +{ +#ifdef CONFIG_MALI_DVFS + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *)kbdev->platform_context; + + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context (0x%p) is not initialized\n", __func__, platform); + return -ENODEV; + } + + gpu_dvfs_clock_lock(GPU_DVFS_MAX_UNLOCK, IPA_LOCK, 0); +#endif /* CONFIG_MALI_DVFS */ + return 0; +} + +int get_ipa_dvfs_max_freq(void) +{ + struct kbase_device *kbdev = pkbdev; + struct exynos_context *platform = (struct exynos_context *)kbdev->platform_context; + + if (!platform) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: platform context (0x%p) is not initialized\n", __func__, platform); + return -ENODEV; + } + + return platform->gpu_max_clock; +} diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_ipa.h b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_ipa.h new file mode 100644 index 000000000000..1d747c83b057 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_ipa.h @@ -0,0 +1,41 @@ +/* drivers/gpu/arm/.../platform/gpu_ipa.h + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DVFS driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_ipa.h + * DVFS + */ + +#ifndef _GPU_IPA_H_ +#define _GPU_IPA_H_ + +struct mali_utilisation_stats { + int utilisation; + int norm_utilisation; + int freq_for_norm; +}; + +struct mali_debug_utilisation_stats { + struct mali_utilisation_stats s; + u32 time_busy; + u32 time_idle; + int time_tick; +}; + +int gpu_ipa_dvfs_get_norm_utilisation(struct kbase_device *kbdev); +void gpu_ipa_dvfs_get_utilisation_stats(struct mali_debug_utilisation_stats *stats); +void gpu_ipa_dvfs_calc_norm_utilisation(struct kbase_device *kbdev); +int gpu_ipa_dvfs_max_lock(int clock); +int gpu_ipa_dvfs_max_unlock(void); +int get_ipa_dvfs_max_freq(void); + +#endif /* _GPU_IPA_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_job_fence_debug.c b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_job_fence_debug.c new file mode 100644 index 000000000000..ed693789c7c2 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_job_fence_debug.c @@ -0,0 +1,344 @@ +/* drivers/gpu/arm/.../platform/exynos/gpu_job_fence_debug.c + * + * Copyright 2018 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series JOB & FENCE debug driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_job_fence_debug.c + * JOB FENCE DEBUG + */ + +#include + +#ifdef CONFIG_MALI_SEC_JOB_STATUS_CHECK + +#include "backend/gpu/mali_kbase_jm_rb.h" + +#if defined(CONFIG_SYNC) +int gpu_job_fence_status_dump(struct sync_fence *timeout_fence); +static char *gpu_fence_status_to_string(int status) +{ + if (status == 0) + return "signaled"; + else if (status > 0) + return "active"; + else + return "error"; +} + +void gpu_fence_debug_check_dependency_atom(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct device *dev = kctx->kbdev->dev; + int i; + + for (i = 0; i < 2; i++) { + struct kbase_jd_atom *dep; + + list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) { + if (dep->status == KBASE_JD_ATOM_STATE_UNUSED || + dep->status == KBASE_JD_ATOM_STATE_COMPLETED) + continue; + + if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_TRIGGER || + (dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) { + struct sync_fence *fence = dep->fence; + int status = atomic_read(&fence->status); + + /* Found blocked fence. */ + dev_warn(dev, + "\t\t\t\t--- Atom %d fence [%p] %s: %s, fence type = 0x%x\n", + kbase_jd_atom_id(kctx, dep), + fence, fence->name, + gpu_fence_status_to_string(status), + dep->core_req); + } else { + dev_warn(dev, + "\t\t\t\t--- Atom %d\n", kbase_jd_atom_id(kctx, dep)); + } + + /* gpu_fence_debug_check_dependency_atom(dep); */ + } + } +} + +int gpu_job_fence_status_dump(struct sync_fence *timeout_fence) +{ + struct device *dev; + struct list_head *entry; + const struct list_head *kbdev_list; + struct kbase_device *kbdev = NULL; + struct kbase_context *kctx; + struct sync_fence *fence; + unsigned long lflags; + int i; + int cnt[5] = {0,}; + + /* dev_warn(dev,"GPU JOB STATUS DUMP\n"); */ + + kbdev_list = kbase_dev_list_get(); + + if (kbdev_list == NULL) { + kbase_dev_list_put(kbdev_list); + return -ENODEV; + } + + list_for_each(entry, kbdev_list) { + kbdev = list_entry(entry, struct kbase_device, entry); + + if (kbdev == NULL) { + kbase_dev_list_put(kbdev_list); + return -ENODEV; + } + + dev = kbdev->dev; + dev_warn(dev, "[%p] kbdev dev name : %s\n", kbdev, kbdev->devname); + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + mutex_lock(&kctx->jctx.lock); + dev_warn(dev, "\t[%p] kctx(%d_%d_%d)_jobs_nr(%d)\n", kctx, kctx->pid, kctx->tgid, kctx->id, kctx->jctx.job_nr); + if (kctx->jctx.job_nr > 0) { + for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) { + if (kctx->jctx.atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) { + cnt[0]++; + } else if (kctx->jctx.atoms[i].status == KBASE_JD_ATOM_STATE_QUEUED) { + cnt[1]++; + dev_warn(dev, "\t\t- [%p] Atom %d STATE_QUEUED\n", &kctx->jctx.atoms[i], i); + /* dev_warn(dev, " -- Atom %d slot_nr 0x%x coreref_state 0x%x core_req 0x%x event_code 0x%x gpu_rb_state 0x%x\n", + i, kctx->jctx.atoms[i].slot_nr, kctx->jctx.atoms[i].coreref_state, kctx->jctx.atoms[i].core_req, kctx->jctx.atoms[i].event_code, kctx->jctx.atoms[i].gpu_rb_state); */ + } else if (kctx->jctx.atoms[i].status == KBASE_JD_ATOM_STATE_IN_JS) { + cnt[2]++; + dev_warn(dev, "\t\t- [%p] Atom %d STATE_IN_JS\n", &kctx->jctx.atoms[i], i); + dev_warn(dev, "\t\t\t-- Atom %d slot_nr 0x%x coreref_state 0x%x core_req 0x%x event_code 0x%x gpu_rb_state 0x%x\n", + i, kctx->jctx.atoms[i].slot_nr, kctx->jctx.atoms[i].coreref_state, kctx->jctx.atoms[i].core_req, kctx->jctx.atoms[i].event_code, kctx->jctx.atoms[i].gpu_rb_state); + } else if (kctx->jctx.atoms[i].status == KBASE_JD_ATOM_STATE_HW_COMPLETED) { + cnt[3]++; + } else if (kctx->jctx.atoms[i].status == KBASE_JD_ATOM_STATE_COMPLETED) { + cnt[4]++; + } + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + /* Print fence infomation */ + fence = kctx->jctx.atoms[i].fence; + if (fence != NULL) { + dev_warn(dev, "\t\t\t-- Atom %d Fence Info [%p] %s: %s, fence type = 0x%x, %s\n", + i, fence, fence->name, gpu_fence_status_to_string(atomic_read(&fence->status)), kctx->jctx.atoms[i].core_req, (fence == timeout_fence) ? "***" : " "); + } + /* Print dependency atom infomation */ + if (kctx->jctx.atoms[i].status == KBASE_JD_ATOM_STATE_QUEUED || kctx->jctx.atoms[i].status == KBASE_JD_ATOM_STATE_IN_JS) { + dev_warn(dev, "\t\t\t-- Dependency Atom List\n"); + gpu_fence_debug_check_dependency_atom(&kctx->jctx.atoms[i]); + } + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); + } + dev_warn(dev, "\t\t: ATOM STATE INFO : UNUSED(%d)_QUEUED(%d)_IN_JS(%d)_HW_COMPLETED(%d)_COMPLETED(%d)\n", cnt[0], cnt[1], cnt[2], cnt[3], cnt[4]); + cnt[0] = cnt[1] = cnt[2] = cnt[3] = cnt[4] = 0; + } + mutex_unlock(&kctx->jctx.lock); + } + mutex_unlock(&kbdev->kctx_list_lock); + /* katom list in backed slot rb */ + kbase_gpu_dump_slots(kbdev); + } + + if (timeout_fence != NULL) + dev_warn(dev, "Timeout Fence *** [%p] %s: %s\n", timeout_fence, timeout_fence->name, gpu_fence_status_to_string(atomic_read(&timeout_fence->status))); + + kbase_dev_list_put(kbdev_list); + + return 0; +} /* #if defined(CONFIG_SYNC) */ + +#elif defined(CONFIG_SYNC_FILE) + +#include "mali_kbase_sync.h" +/* If MALI_SEC_DEPENDENCY_CHECK is enabled, there could be deadlock. So, it could be enabled for debugging only */ +/* #define MALI_SEC_DEPENDENCY_CHECK */ + +#ifdef MALI_SEC_DEPENDENCY_CHECK +void gpu_fence_debug_check_dependency_atom(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct device *dev = kctx->kbdev->dev; + int i; + + for (i = 0; i < 2; i++) { + struct kbase_jd_atom *dep; + + list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) { + if (dep->status == KBASE_JD_ATOM_STATE_UNUSED || + dep->status == KBASE_JD_ATOM_STATE_COMPLETED) + continue; + + /* Found defendency fence & job */ + if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_TRIGGER) { + struct kbase_sync_fence_info info; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence; /* trigger fence */ +#else + struct dma_fence *fence; +#endif + if (!kbase_sync_fence_out_info_get(dep, &info)) { + fence = info.fence; + dev_warn(dev, + "\t\t\t\t--- Atom %d fence_out [%p] %s: fence type = 0x%x, fence ctx = %llu, fence seqno = %u\n", + kbase_jd_atom_id(kctx, dep), + info.fence, fence->ops->get_driver_name(fence), + dep->core_req, + fence->context, + fence->seqno); + } + } else if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) { + struct kbase_sync_fence_info info; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence; /* wait fence */ +#else + struct dma_fence *fence; +#endif + if (!kbase_sync_fence_in_info_get(dep, &info)) { + fence = info.fence; + dev_warn(dev, + "\t\t\t\t--- Atom %d fence_in [%p] %s: fence type = 0x%x, fence ctx = %llu, fence seqno = %u\n", + kbase_jd_atom_id(kctx, dep), + info.fence, fence->ops->get_driver_name(fence), + dep->core_req, + fence->context, + fence->seqno); + } + } else { + dev_warn(dev, + "\t\t\t\t--- Atom %d\n", kbase_jd_atom_id(kctx, dep)); + } + + /* gpu_fence_debug_check_dependency_atom(dep); */ + } + } +} +#endif + +extern struct kbase_device *pkbdev; +int gpu_job_fence_status_dump(struct sync_file *timeout_sync_file) +{ + struct device *dev; + struct kbase_device *kbdev = NULL; + struct kbase_context *kctx; + struct kbase_sync_fence_info info_in; + struct kbase_sync_fence_info info_out; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence_in; + struct fence *fence_out; +#else + struct dma_fence *fence_in; + struct dma_fence *fence_out; +#endif + int i; + int cnt[5] = {0,}; + bool check_fence; + + /* dev_warn(dev,"GPU JOB STATUS DUMP\n"); */ + + + kbdev = pkbdev; + + if (kbdev == NULL) + return -ENODEV; + + dev = kbdev->dev; + dev_warn(dev, "[%p] kbdev dev name : %s\n", kbdev, kbdev->devname); + + dev = kbdev->dev; + dev_warn(dev, "[%p] kbdev dev name : %s\n", kbdev, kbdev->devname); + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + mutex_lock(&kctx->jctx.lock); + dev_warn(dev, "\t[%p] kctx(%d_%d_%d)_jobs_nr(%d)\n", kctx, kctx->pid, kctx->tgid, kctx->id, kctx->jctx.job_nr); + if (kctx->jctx.job_nr > 0) { + for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) { + if (kctx->jctx.atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) { + cnt[0]++; + } else if (kctx->jctx.atoms[i].status == KBASE_JD_ATOM_STATE_QUEUED) { + cnt[1]++; + dev_warn(dev, "\t\t- [%p] Atom %d STATE_QUEUED\n", &kctx->jctx.atoms[i], i); + /* dev_warn(dev, " -- Atom %d slot_nr 0x%x coreref_state 0x%x core_req 0x%x event_code 0x%x gpu_rb_state 0x%x\n", + i, kctx->jctx.atoms[i].slot_nr, kctx->jctx.atoms[i].coreref_state, kctx->jctx.atoms[i].core_req, kctx->jctx.atoms[i].event_code, kctx->jctx.atoms[i].gpu_rb_state); */ + } else if (kctx->jctx.atoms[i].status == KBASE_JD_ATOM_STATE_IN_JS) { + cnt[2]++; + dev_warn(dev, "\t\t- [%p] Atom %d STATE_IN_JS\n", &kctx->jctx.atoms[i], i); + dev_warn(dev, "\t\t\t-- Atom %d slot_nr 0x%x core_req 0x%x event_code 0x%x gpu_rb_state 0x%x\n", + i, kctx->jctx.atoms[i].slot_nr, kctx->jctx.atoms[i].core_req, kctx->jctx.atoms[i].event_code, kctx->jctx.atoms[i].gpu_rb_state); } else if (kctx->jctx.atoms[i].status == KBASE_JD_ATOM_STATE_HW_COMPLETED) { + cnt[3]++; + } else if (kctx->jctx.atoms[i].status == KBASE_JD_ATOM_STATE_COMPLETED) { + cnt[4]++; + } + + /* Print fence infomation */ + if (kctx->jctx.atoms[i].status == KBASE_JD_ATOM_STATE_QUEUED) { + if ((kctx->jctx.atoms[i].core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_TRIGGER) { + if (!kbase_sync_fence_out_info_get(&kctx->jctx.atoms[i], &info_out)) { + fence_out = info_out.fence; + if (timeout_sync_file != NULL && timeout_sync_file->fence != NULL) { + if (fence_out == timeout_sync_file->fence) + check_fence = true; + else + check_fence = false; + } else + check_fence = false; + + dev_warn(dev, "\t\t\t-- Atom %d Fence_out Info [%p] %s: fence type = 0x%x, fence ctx = %llu, fence seqno = %u, %s\n", + i, info_out.fence, fence_out->ops->get_driver_name(fence_out), kctx->jctx.atoms[i].core_req, fence_out->context, fence_out->seqno, (check_fence == true) ? "***" : " "); + } + } + if ((kctx->jctx.atoms[i].core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) { + if (!kbase_sync_fence_in_info_get(&kctx->jctx.atoms[i], &info_in)) { + fence_in = info_in.fence; + if (timeout_sync_file != NULL && timeout_sync_file->fence != NULL) { + if (fence_in == timeout_sync_file->fence) + check_fence = true; + else + check_fence = false; + } else + check_fence = false; + + dev_warn(dev, "\t\t\t-- Atom %d Fence_in Info [%p] %s: fence type = 0x%x, fence ctx = %llu, fence seqno = %u, %s\n", + i, info_in.fence, fence_in->ops->get_driver_name(fence_in), kctx->jctx.atoms[i].core_req, fence_in->context, fence_in->seqno, (check_fence == true) ? "***" : " "); + } + } + } +#ifdef MALI_SEC_DEPENDENCY_CHECK + /* Print dependency atom infomation */ + if (kctx->jctx.atoms[i].status == KBASE_JD_ATOM_STATE_QUEUED || kctx->jctx.atoms[i].status == KBASE_JD_ATOM_STATE_IN_JS) { + dev_warn(dev, "\t\t\t-- Dependency Atom List\n"); + gpu_fence_debug_check_dependency_atom(&kctx->jctx.atoms[i]); + } +#endif + } + dev_warn(dev, "\t\t: ATOM STATE INFO : UNUSED(%d)_QUEUED(%d)_IN_JS(%d)_HW_COMPLETED(%d)_COMPLETED(%d)\n", cnt[0], cnt[1], cnt[2], cnt[3], cnt[4]); + cnt[0] = cnt[1] = cnt[2] = cnt[3] = cnt[4] = 0; + } + mutex_unlock(&kctx->jctx.lock); + } + mutex_unlock(&kbdev->kctx_list_lock); + /* katom list in backed slot rb */ + kbase_gpu_dump_slots(kbdev); + + if (timeout_sync_file != NULL) { +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + dev_warn(dev, "Timeout Sync_file [%p] Sync_file name %s\n", timeout_sync_file, timeout_sync_file->name); +#else + dev_warn(dev, "Timeout Sync_file [%p] Sync_file name %s\n", timeout_sync_file, timeout_sync_file->user_name); +#endif + dev_warn(dev, "Timeout Fence *** [%p] \n", timeout_sync_file->fence); + } + + return 0; +} +#endif /* #if defined(CONFIG_SYNC_FILE) */ +#endif /* #if CONFIG_MALI_SEC_JOB_STATUS_CHECK */ + diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_notifier.c b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_notifier.c new file mode 100644 index 000000000000..1cfb7054f748 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_notifier.c @@ -0,0 +1,511 @@ +/* drivers/gpu/arm/.../platform/gpu_notifier.c + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series platform-dependent codes + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_notifier.c + */ + +#include + +#include +#include + +#include "mali_kbase_platform.h" +#include "gpu_dvfs_handler.h" +#include "gpu_notifier.h" +#include "gpu_control.h" + +#ifdef CONFIG_EXYNOS_THERMAL +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +#include +#else +#include +#endif +#endif /* CONFIG_EXYNOS_THERMAL */ + +#ifdef CONFIG_EXYNOS_BUSMONITOR +#include +#endif + +#if defined(CONFIG_MALI_PM_QOS) && defined(CONFIG_MALI_GPU_PM_QOS) +#include +#endif + +#include + +#if !defined(CONFIG_MALI_EXYNOS_SECURE_RENDERING_UNSUPPORTED) && defined(CONFIG_SOC_EXYNOS9630) +#include +#endif + +extern struct kbase_device *pkbdev; + +#if defined(CONFIG_MALI_PM_QOS) && defined(CONFIG_MALI_GPU_PM_QOS) +static int gpu_pm_qos_notifier(struct notifier_block *nb, + unsigned long val, void *v) +{ + int pm_qos_class = *((int *)v); + + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: update GPU PM Qos class %d to %ld kHz\n", __func__, pm_qos_class, val); + + if (pm_qos_class == PM_QOS_GPU_THROUGHPUT_MAX) { + /* TO DO FOR MAX LOCK */ + if ( val > 0) + gpu_dvfs_clock_lock(GPU_DVFS_MAX_LOCK, PMQOS_LOCK, val); + else + gpu_dvfs_clock_lock(GPU_DVFS_MAX_UNLOCK, PMQOS_LOCK, -1); + } else if (pm_qos_class == PM_QOS_GPU_THROUGHPUT_MIN) { + /* TO DO FOR MIN LOCK */ + if ( val > 0) + gpu_dvfs_clock_lock(GPU_DVFS_MIN_LOCK, PMQOS_LOCK, val); + else + gpu_dvfs_clock_lock(GPU_DVFS_MIN_UNLOCK, PMQOS_LOCK, -1); + } else { + /* invalid PM QoS class */ + return -EINVAL; + } + + return NOTIFY_OK; +} + +static struct notifier_block gpu_min_qos_notifier = { + .notifier_call = gpu_pm_qos_notifier, + .priority = INT_MAX, +}; + +static struct notifier_block gpu_max_qos_notifier = { + .notifier_call = gpu_pm_qos_notifier, + .priority = INT_MAX, +}; +#endif + +#if defined (CONFIG_EXYNOS_THERMAL) && defined(CONFIG_GPU_THERMAL) +static void gpu_tmu_normal_work(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_DVFS + struct exynos_context *platform = (struct exynos_context *)kbdev->platform_context; + if (!platform) + return; + + gpu_dvfs_clock_lock(GPU_DVFS_MAX_UNLOCK, TMU_LOCK, 0); +#endif /* CONFIG_MALI_DVFS */ +} + +static int gpu_tmu_notifier(struct notifier_block *notifier, + unsigned long event, void *v) +{ + int frequency; + struct exynos_context *platform = (struct exynos_context *)pkbdev->platform_context; +#if defined(CONFIG_DEBUG_SNAPSHOT_THERMAL) || defined(CONFIG_EXYNOS_SNAPSHOT_THERMAL) + char *cooling_device_name = "GPU"; +#endif + + if (!platform) + return -ENODEV; + + if (!platform->tmu_status) + return NOTIFY_OK; + + platform->voltage_margin = 0; + frequency = *(int *)v; + + if (event == GPU_COLD) { + platform->voltage_margin = platform->gpu_default_vol_margin; + } else if (event == GPU_NORMAL) { + gpu_tmu_normal_work(pkbdev); + } else if (event == GPU_THROTTLING || event == GPU_TRIPPING) { +#ifdef CONFIG_MALI_DVFS + gpu_dvfs_clock_lock(GPU_DVFS_MAX_LOCK, TMU_LOCK, frequency); +#endif +#if defined(CONFIG_EXYNOS_SNAPSHOT_THERMAL) + exynos_ss_thermal(NULL, 0, cooling_device_name, frequency); +#elif defined(CONFIG_DEBUG_SNAPSHOT_THERMAL) + dbg_snapshot_thermal(NULL, 0, cooling_device_name, frequency); +#endif + } + + GPU_LOG(DVFS_DEBUG, LSI_TMU_VALUE, 0u, event, "tmu event %lu, frequency %d\n", event, frequency); + + gpu_set_target_clk_vol(platform->cur_clock, false); + + return NOTIFY_OK; +} + +static struct notifier_block gpu_tmu_nb = { + .notifier_call = gpu_tmu_notifier, +}; +#endif /* CONFIG_EXYNOS_THERMAL */ + + +static int gpu_power_on(struct kbase_device *kbdev) +{ + int ret = 0; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + if (!platform) + return -ENODEV; + +#ifdef CONFIG_MALI_RT_PM + if (!platform->inter_frame_pm_status) + gpu_control_disable_customization(kbdev); + + ret = pm_runtime_get_sync(kbdev->dev); + + if (platform->inter_frame_pm_status) + gpu_control_disable_customization(kbdev); +#else + ret = 0; +#endif + + + GPU_LOG(DVFS_INFO, LSI_GPU_RPM_RESUME_API, 0u, ret, "power on\n"); + + if (ret > 0) { +#ifdef CONFIG_MALI_DVFS + if (platform->early_clk_gating_status) { + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "already power on\n"); + gpu_control_enable_clock(kbdev); + } +#endif + platform->power_runtime_resume_ret = ret; + return 0; + } else if (ret == 0) { + platform->power_runtime_resume_ret = ret; + return 1; + } else { + platform->power_runtime_resume_ret = ret; + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "runtime pm returned %d\n", ret); + return 0; + } +} + +static void gpu_power_off(struct kbase_device *kbdev) +{ + int ret = 0; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + if (!platform) + return; + + GPU_LOG(DVFS_DEBUG, DUMMY, 0u, 0u, "power off\n"); +#ifdef CONFIG_MALI_RT_PM + gpu_control_enable_customization(kbdev); + + pm_runtime_mark_last_busy(kbdev->dev); + ret = pm_runtime_put_autosuspend(kbdev->dev); + +#ifdef CONFIG_MALI_DVFS + if (platform->early_clk_gating_status) + gpu_control_disable_clock(kbdev); +#endif +#endif + platform->power_runtime_suspend_ret = ret; + GPU_LOG(DVFS_INFO, LSI_GPU_RPM_SUSPEND_API, 0u, ret, "power off\n"); +} + +static void gpu_power_suspend(struct kbase_device *kbdev) +{ + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + int ret = 0; + + if (!platform) + return; + + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "power suspend\n"); + if (platform->dvs_status) + gpu_control_enable_customization(kbdev); + + ret = pm_runtime_suspend(kbdev->dev); + +#ifdef CONFIG_MALI_DVFS + if (platform->early_clk_gating_status) + gpu_control_disable_clock(kbdev); +#endif + + platform->power_runtime_suspend_ret = ret; + GPU_LOG(DVFS_INFO, LSI_SUSPEND_CALLBACK, 0u, ret, "power suspend\n"); +} + +#ifdef CONFIG_MALI_RT_PM +static int gpu_pm_notifier(struct notifier_block *nb, unsigned long event, void *cmd) +{ + int err = NOTIFY_OK; + struct kbase_device *kbdev = pkbdev; + struct kbasep_js_device_data *js_devdata = NULL; + struct exynos_context *platform = NULL; + + if (kbdev) { + js_devdata = &kbdev->js_data; + platform = (struct exynos_context *)kbdev->platform_context; + } + + if (!kbdev || !js_devdata || !platform) { + GPU_LOG(DVFS_ERROR, DUMMY, event, 0u, "[G3D] error control of variable : event[%lu]\n", event); + GPU_LOG(DVFS_ERROR, DUMMY, event, 0u, " kbdev [%p]\n", kbdev); + GPU_LOG(DVFS_ERROR, DUMMY, event, 0u, " js_devdata [%p]\n", js_devdata); + GPU_LOG(DVFS_ERROR, DUMMY, event, 0u, " platform [%p]\n", platform); + } + + switch (event) { + case PM_SUSPEND_PREPARE: + break; + case PM_POST_SUSPEND: + break; + default: + break; + } + return err; +} + +static struct notifier_block gpu_pm_nb = { + .notifier_call = gpu_pm_notifier +}; + +static int gpu_device_runtime_init(struct kbase_device *kbdev) +{ + int ret = 0; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + if (!platform) { + dev_warn(kbdev->dev, "kbase_device_runtime_init failed %p\n", platform); + ret = -ENOSYS; + return ret; + } + + platform->power_runtime_resume_ret = 0; + platform->power_runtime_suspend_ret = 0; + + dev_dbg(kbdev->dev, "kbase_device_runtime_init\n"); + + pm_runtime_set_autosuspend_delay(kbdev->dev, platform->runtime_pm_delay_time); + pm_runtime_use_autosuspend(kbdev->dev); + + pm_runtime_set_active(kbdev->dev); + pm_runtime_enable(kbdev->dev); + + if (!pm_runtime_enabled(kbdev->dev)) { + dev_warn(kbdev->dev, "pm_runtime not enabled"); + ret = -ENOSYS; + } + + return ret; +} + +static void gpu_device_runtime_disable(struct kbase_device *kbdev) +{ + pm_runtime_disable(kbdev->dev); +} + +#if MALI_SEC_PROBE_TEST != 1 +static int pm_callback_dvfs_on(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_DVFS + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + gpu_dvfs_timer_control(true); + + if (platform->dvfs_pending) + platform->dvfs_pending = 0; +#endif + + return 0; +} +#endif + +static int pm_callback_runtime_on(struct kbase_device *kbdev) +{ + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + if (!platform) + return -ENODEV; + + GPU_LOG(DVFS_DEBUG, LSI_GPU_ON, 0u, 0u, "runtime on callback\n"); + +#ifdef CONFIG_MALI_DVFS + gpu_control_enable_clock(kbdev); +#endif + gpu_dvfs_start_env_data_gathering(kbdev); + platform->power_status = true; + +#if !defined(CONFIG_MALI_EXYNOS_SECURE_RENDERING_UNSUPPORTED) && defined(CONFIG_SOC_EXYNOS9630) + exynos_smc(SMC_DRM_G3D_PPCFW_RESTORE, 0, 0, 0); +#endif + +#if 0 +#ifdef CONFIG_MALI_DVFS +#ifdef CONFIG_MALI_SEC_CL_BOOST + if (platform->dvfs_status && platform->wakeup_lock && !kbdev->pm.backend.metrics.is_full_compute_util) +#else + if (platform->dvfs_status && platform->wakeup_lock) +#endif /* CONFIG_MALI_SEC_CL_BOOST */ + gpu_set_target_clk_vol(platform->gpu_dvfs_start_clock, false); + else + gpu_set_target_clk_vol(platform->cur_clock, false); +#endif /* CONFIG_MALI_DVFS */ +#endif + return 0; +} +extern void preload_balance_setup(struct kbase_device *kbdev); +static void pm_callback_runtime_off(struct kbase_device *kbdev) +{ + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + if (!platform) + return; + + GPU_LOG(DVFS_DEBUG, LSI_GPU_OFF, 0u, 0u, "runtime off callback\n"); + + platform->power_status = false; + + gpu_control_disable_customization(kbdev); + + gpu_dvfs_stop_env_data_gathering(kbdev); +#ifdef CONFIG_MALI_DVFS + gpu_dvfs_timer_control(false); + if (platform->dvfs_pending) + platform->dvfs_pending = 0; + if (!platform->early_clk_gating_status) + gpu_control_disable_clock(kbdev); +#endif /* CONFIG_MALI_DVFS */ + + +#if !defined(CONFIG_MALI_EXYNOS_SECURE_RENDERING_UNSUPPORTED) && defined(CONFIG_SOC_EXYNOS9630) + exynos_smc(SMC_DRM_G3D_POWER_OFF, 0, 0, 0); +#endif + +#if defined(CONFIG_SOC_EXYNOS7420) || defined(CONFIG_SOC_EXYNOS7890) + preload_balance_setup(kbdev); +#endif +} +#endif /* CONFIG_MALI_RT_PM */ + +struct kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = gpu_power_on, + .power_off_callback = gpu_power_off, + .power_suspend_callback = gpu_power_suspend, +#ifdef CONFIG_MALI_RT_PM + .power_runtime_init_callback = gpu_device_runtime_init, + .power_runtime_term_callback = gpu_device_runtime_disable, + .power_runtime_on_callback = pm_callback_runtime_on, + .power_runtime_off_callback = pm_callback_runtime_off, +#if MALI_SEC_PROBE_TEST != 1 + .power_dvfs_on_callback = pm_callback_dvfs_on, +#endif +#else /* CONFIG_MALI_RT_PM */ + .power_runtime_init_callback = NULL, + .power_runtime_term_callback = NULL, + .power_runtime_on_callback = NULL, + .power_runtime_off_callback = NULL, +#if MALI_SEC_PROBE_TEST != 1 + .power_dvfs_on_callback = NULL, +#endif +#endif /* CONFIG_MALI_RT_PM */ +}; + +#ifdef CONFIG_EXYNOS_BUSMONITOR +static int gpu_noc_notifier(struct notifier_block *nb, unsigned long event, void *cmd) +{ + if (strstr((char *)cmd, "G3D")) { + GPU_LOG(DVFS_ERROR, LSI_RESUME, 0u, 0u, "%s: gpu_noc_notifier\n", __func__); + gpu_register_dump(); + } + return 0; +} +#endif + +#ifdef CONFIG_EXYNOS_BUSMONITOR +static struct notifier_block gpu_noc_nb = { + .notifier_call = gpu_noc_notifier +}; +#endif + +#if ((LINUX_VERSION_CODE > KERNEL_VERSION(4, 5, 0)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))) +static int gpu_oomdebug_notifier(struct notifier_block *self, + unsigned long dummy, void *parm) +{ + struct list_head *entry; + const struct list_head *kbdev_list; + + kbdev_list = kbase_dev_list_get(); + list_for_each(entry, kbdev_list) { + struct kbase_device *kbdev = NULL; + struct kbase_context *kctx; + + kbdev = list_entry(entry, struct kbase_device, entry); + /* output the total memory usage and cap for this device */ + pr_info("%-16s %10u\n", + kbdev->devname, + atomic_read(&(kbdev->memdev.used_pages))); + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + /* output the memory usage and cap for each kctx + 54 * opened on this device */ + pr_info(" %s-0x%p %10u\n", + "kctx", + element->kctx, + atomic_read(&(element->kctx->used_pages))); + } + mutex_unlock(&kbdev->kctx_list_lock); + } + kbase_dev_list_put(kbdev_list); + return NOTIFY_OK; +} + +static struct notifier_block gpu_oomdebug_nb = { + .notifier_call = gpu_oomdebug_notifier, +}; +#endif + +int gpu_notifier_init(struct kbase_device *kbdev) +{ + struct exynos_context *platform = (struct exynos_context *)kbdev->platform_context; + if (!platform) + return -ENODEV; + + platform->voltage_margin = platform->gpu_default_vol_margin; +#if defined (CONFIG_EXYNOS_THERMAL) && defined(CONFIG_GPU_THERMAL) + exynos_gpu_add_notifier(&gpu_tmu_nb); +#endif /* CONFIG_EXYNOS_THERMAL */ + +#ifdef CONFIG_MALI_RT_PM + if (register_pm_notifier(&gpu_pm_nb)) + return -1; +#endif /* CONFIG_MALI_RT_PM */ + +#if defined(CONFIG_MALI_PM_QOS) && defined(CONFIG_MALI_GPU_PM_QOS) + pm_qos_add_notifier(PM_QOS_GPU_THROUGHPUT_MAX, &gpu_max_qos_notifier); + pm_qos_add_notifier(PM_QOS_GPU_THROUGHPUT_MIN, &gpu_min_qos_notifier); +#endif + +#ifdef CONFIG_EXYNOS_BUSMONITOR + busmon_notifier_chain_register(&gpu_noc_nb); +#endif + + platform->power_status = true; + + /* Cannot find following API in 4.14 kernel */ +#if ((LINUX_VERSION_CODE > KERNEL_VERSION(4, 5, 0)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))) + if (register_oomdebug_notifier(&gpu_oomdebug_nb) < 0) + pr_err("%s: failed to register oom debug notifier\n", __func__); +#endif + + return 0; +} + +void gpu_notifier_term(void) +{ +#ifdef CONFIG_MALI_RT_PM + unregister_pm_notifier(&gpu_pm_nb); +#endif /* CONFIG_MALI_RT_PM */ +#if defined(CONFIG_MALI_PM_QOS) && defined(CONFIG_MALI_GPU_PM_QOS) + pm_qos_remove_notifier(PM_QOS_GPU_THROUGHPUT_MAX, &gpu_max_qos_notifier); + pm_qos_remove_notifier(PM_QOS_GPU_THROUGHPUT_MIN, &gpu_min_qos_notifier); +#endif + return; +} diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_notifier.h b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_notifier.h new file mode 100644 index 000000000000..1c5e7d398895 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_notifier.h @@ -0,0 +1,23 @@ +/* drivers/gpu/arm/.../platform/gpu_notifier.h + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series platform-dependent codes + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_notifier.h + */ + +#ifndef _GPU_NOTIFIER_H_ +#define _GPU_NOTIFIER_H_ + +int gpu_notifier_init(struct kbase_device *kbdev); +void gpu_notifier_term(void); + +#endif /* _GPU_NOTIFIER_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_pmqos.c b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_pmqos.c new file mode 100644 index 000000000000..c91bda23f77c --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_pmqos.c @@ -0,0 +1,206 @@ +/* drivers/gpu/arm/.../platform/gpu_pmqos.c + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DVFS driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_pmqos.c + * DVFS + */ + +#include + +#include + +#include "mali_kbase_platform.h" +#include "gpu_dvfs_handler.h" + +#if defined(PM_QOS_CLUSTER2_FREQ_MAX_DEFAULT_VALUE) +#define PM_QOS_CPU_CLUSTER_NUM 3 +#else +#define PM_QOS_CPU_CLUSTER_NUM 2 +#ifndef PM_QOS_CLUSTER1_FREQ_MAX_DEFAULT_VALUE +#define PM_QOS_CLUSTER1_FREQ_MAX_DEFAULT_VALUE INT_MAX +#endif +#endif + +struct pm_qos_request exynos5_g3d_mif_min_qos; +struct pm_qos_request exynos5_g3d_mif_max_qos; +struct pm_qos_request exynos5_g3d_cpu_cluster0_min_qos; +struct pm_qos_request exynos5_g3d_cpu_cluster1_max_qos; +struct pm_qos_request exynos5_g3d_cpu_cluster1_min_qos; +#if PM_QOS_CPU_CLUSTER_NUM == 3 +struct pm_qos_request exynos5_g3d_cpu_cluster2_max_qos; +struct pm_qos_request exynos5_g3d_cpu_cluster2_min_qos; +#endif + +#ifdef CONFIG_MALI_SUSTAINABLE_OPT +struct pm_qos_request exynos5_g3d_cpu_cluster0_max_qos; +#endif + +extern struct kbase_device *pkbdev; + +#ifdef CONFIG_MALI_PM_QOS +int gpu_pm_qos_command(struct exynos_context *platform, gpu_pmqos_state state) +{ + int idx; + + DVFS_ASSERT(platform); + +#ifdef CONFIG_MALI_ASV_CALIBRATION_SUPPORT + if (platform->gpu_auto_cali_status) + return 0; +#endif + + switch (state) { + case GPU_CONTROL_PM_QOS_INIT: + pm_qos_add_request(&exynos5_g3d_mif_min_qos, PM_QOS_BUS_THROUGHPUT, 0); + if (platform->pmqos_mif_max_clock) + pm_qos_add_request(&exynos5_g3d_mif_max_qos, PM_QOS_BUS_THROUGHPUT_MAX, PM_QOS_BUS_THROUGHPUT_MAX_DEFAULT_VALUE); + pm_qos_add_request(&exynos5_g3d_cpu_cluster0_min_qos, PM_QOS_CLUSTER0_FREQ_MIN, 0); + pm_qos_add_request(&exynos5_g3d_cpu_cluster1_max_qos, PM_QOS_CLUSTER1_FREQ_MAX, PM_QOS_CLUSTER1_FREQ_MAX_DEFAULT_VALUE); +#if PM_QOS_CPU_CLUSTER_NUM == 2 + if (platform->boost_egl_min_lock) + pm_qos_add_request(&exynos5_g3d_cpu_cluster1_min_qos, PM_QOS_CLUSTER1_FREQ_MIN, 0); +#endif +#if PM_QOS_CPU_CLUSTER_NUM == 3 + pm_qos_add_request(&exynos5_g3d_cpu_cluster1_min_qos, PM_QOS_CLUSTER1_FREQ_MIN, 0); + pm_qos_add_request(&exynos5_g3d_cpu_cluster2_max_qos, PM_QOS_CLUSTER2_FREQ_MAX, PM_QOS_CLUSTER2_FREQ_MAX_DEFAULT_VALUE); + if (platform->boost_egl_min_lock) + pm_qos_add_request(&exynos5_g3d_cpu_cluster2_min_qos, PM_QOS_CLUSTER2_FREQ_MIN, 0); +#ifdef CONFIG_MALI_SUSTAINABLE_OPT + pm_qos_add_request(&exynos5_g3d_cpu_cluster0_max_qos, PM_QOS_CLUSTER0_FREQ_MAX, PM_QOS_CLUSTER0_FREQ_MAX_DEFAULT_VALUE); +#endif +#endif + for (idx = 0; idx < platform->table_size; idx++) + platform->save_cpu_max_freq[idx] = platform->table[idx].cpu_big_max_freq; + platform->is_pm_qos_init = true; + break; + case GPU_CONTROL_PM_QOS_DEINIT: + pm_qos_remove_request(&exynos5_g3d_mif_min_qos); + if (platform->pmqos_mif_max_clock) + pm_qos_remove_request(&exynos5_g3d_mif_max_qos); + pm_qos_remove_request(&exynos5_g3d_cpu_cluster0_min_qos); + pm_qos_remove_request(&exynos5_g3d_cpu_cluster1_max_qos); +#if PM_QOS_CPU_CLUSTER_NUM == 2 + if (platform->boost_egl_min_lock) + pm_qos_remove_request(&exynos5_g3d_cpu_cluster1_min_qos); +#endif +#if PM_QOS_CPU_CLUSTER_NUM == 3 + pm_qos_remove_request(&exynos5_g3d_cpu_cluster1_min_qos); + pm_qos_remove_request(&exynos5_g3d_cpu_cluster2_max_qos); + if (platform->boost_egl_min_lock) + pm_qos_remove_request(&exynos5_g3d_cpu_cluster2_min_qos); +#ifdef CONFIG_MALI_SUSTAINABLE_OPT + pm_qos_remove_request(&exynos5_g3d_cpu_cluster0_max_qos); +#endif +#endif + platform->is_pm_qos_init = false; + break; + case GPU_CONTROL_PM_QOS_SET: + if (!platform->is_pm_qos_init) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: PM QOS ERROR : pm_qos deinit -> set\n", __func__); + return -ENOENT; + } + KBASE_DEBUG_ASSERT(platform->step >= 0); + pm_qos_update_request(&exynos5_g3d_mif_min_qos, platform->table[platform->step].mem_freq); + if (platform->pmqos_mif_max_clock && + (platform->table[platform->step].clock >= platform->pmqos_mif_max_clock_base)) + pm_qos_update_request(&exynos5_g3d_mif_max_qos, platform->pmqos_mif_max_clock); +#ifdef CONFIG_MALI_SEC_VK_BOOST /* VK JOB Boost */ + mutex_lock(&platform->gpu_vk_boost_lock); + if (platform->ctx_vk_need_qos && platform->max_lock == platform->gpu_vk_boost_max_clk_lock) { + pm_qos_update_request(&exynos5_g3d_mif_min_qos, platform->gpu_vk_boost_mif_min_clk_lock); + } + mutex_unlock(&platform->gpu_vk_boost_lock); +#endif + pm_qos_update_request(&exynos5_g3d_cpu_cluster0_min_qos, platform->table[platform->step].cpu_little_min_freq); + + if (!platform->boost_is_enabled) + pm_qos_update_request(&exynos5_g3d_cpu_cluster1_max_qos, platform->table[platform->step].cpu_big_max_freq); +#if PM_QOS_CPU_CLUSTER_NUM == 3 + pm_qos_update_request(&exynos5_g3d_cpu_cluster1_min_qos, platform->table[platform->step].cpu_middle_min_freq); + if (!platform->boost_is_enabled) + pm_qos_update_request(&exynos5_g3d_cpu_cluster2_max_qos, platform->table[platform->step].cpu_big_max_freq); +#ifdef CONFIG_MALI_SUSTAINABLE_OPT + if (platform->sustainable.info_array[0] > 0) { + if (((platform->cur_clock == platform->sustainable.info_array[0]) + || (platform->max_lock == platform->sustainable.info_array[0])) + && platform->env_data.utilization > platform->sustainable.info_array[1]) { + platform->sustainable.status = true; + pm_qos_update_request(&exynos5_g3d_cpu_cluster0_max_qos, platform->sustainable.info_array[2]); + pm_qos_update_request(&exynos5_g3d_cpu_cluster1_max_qos, platform->sustainable.info_array[3]); + pm_qos_update_request(&exynos5_g3d_cpu_cluster2_max_qos, platform->sustainable.info_array[4]); + } else { + platform->sustainable.status = false; + pm_qos_update_request(&exynos5_g3d_cpu_cluster0_max_qos, PM_QOS_CLUSTER0_FREQ_MAX_DEFAULT_VALUE); + pm_qos_update_request(&exynos5_g3d_cpu_cluster1_max_qos, PM_QOS_CLUSTER1_FREQ_MAX_DEFAULT_VALUE); + pm_qos_update_request(&exynos5_g3d_cpu_cluster2_max_qos, platform->table[platform->step].cpu_big_max_freq); + } + } +#endif +#ifdef CONFIG_MALI_SEC_CL_BOOST + if (pkbdev->pm.backend.metrics.is_full_compute_util && platform->cl_boost_disable == false) + pm_qos_update_request(&exynos5_g3d_cpu_cluster2_max_qos, PM_QOS_CLUSTER2_FREQ_MAX_DEFAULT_VALUE); +#endif +#endif + + break; + case GPU_CONTROL_PM_QOS_RESET: + if (!platform->is_pm_qos_init) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: PM QOS ERROR : pm_qos deinit -> reset\n", __func__); + return -ENOENT; + } + pm_qos_update_request(&exynos5_g3d_mif_min_qos, 0); + if (platform->pmqos_mif_max_clock) + pm_qos_update_request(&exynos5_g3d_mif_max_qos, PM_QOS_BUS_THROUGHPUT_MAX_DEFAULT_VALUE); + pm_qos_update_request(&exynos5_g3d_cpu_cluster0_min_qos, 0); + pm_qos_update_request(&exynos5_g3d_cpu_cluster1_max_qos, PM_QOS_CLUSTER1_FREQ_MAX_DEFAULT_VALUE); +#if PM_QOS_CPU_CLUSTER_NUM == 3 + pm_qos_update_request(&exynos5_g3d_cpu_cluster1_min_qos, 0); + pm_qos_update_request(&exynos5_g3d_cpu_cluster2_max_qos, PM_QOS_CLUSTER2_FREQ_MAX_DEFAULT_VALUE); +#ifdef CONFIG_MALI_SUSTAINABLE_OPT + pm_qos_update_request(&exynos5_g3d_cpu_cluster0_max_qos, PM_QOS_CLUSTER0_FREQ_MAX_DEFAULT_VALUE); + pm_qos_update_request(&exynos5_g3d_cpu_cluster1_max_qos, PM_QOS_CLUSTER1_FREQ_MAX_DEFAULT_VALUE); + pm_qos_update_request(&exynos5_g3d_cpu_cluster2_max_qos, PM_QOS_CLUSTER2_FREQ_MAX_DEFAULT_VALUE); +#endif +#endif + break; + case GPU_CONTROL_PM_QOS_EGL_SET: + if (!platform->is_pm_qos_init) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: PM QOS ERROR : pm_qos deinit -> egl_set\n", __func__); + return -ENOENT; + } + pm_qos_update_request_timeout(&exynos5_g3d_cpu_cluster1_min_qos, platform->boost_egl_min_lock, 30000); + for (idx = 0; idx < platform->table_size; idx++) { + platform->table[idx].cpu_big_max_freq = PM_QOS_CLUSTER1_FREQ_MAX_DEFAULT_VALUE; + } +#if PM_QOS_CPU_CLUSTER_NUM == 3 + pm_qos_update_request_timeout(&exynos5_g3d_cpu_cluster2_min_qos, platform->boost_egl_min_lock, 30000); + for (idx = 0; idx < platform->table_size; idx++) { + platform->table[idx].cpu_big_max_freq = PM_QOS_CLUSTER2_FREQ_MAX_DEFAULT_VALUE; + } +#endif + break; + case GPU_CONTROL_PM_QOS_EGL_RESET: + if (!platform->is_pm_qos_init) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: PM QOS ERROR : pm_qos deinit -> egl_reset\n", __func__); + return -ENOENT; + } + for (idx = 0; idx < platform->table_size; idx++) + platform->table[idx].cpu_big_max_freq = platform->save_cpu_max_freq[idx]; + break; + default: + break; + } + + return 0; +} +#endif diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_protected_mode.c b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_protected_mode.c new file mode 100644 index 000000000000..216ba4a22ddf --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_protected_mode.c @@ -0,0 +1,251 @@ +/* + * + * (C) COPYRIGHT 2020 Samsung Electronics Inc. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include +/*#include + +/* SMC CALL return value for Successfully works */ +#define GPU_SMC_TZPC_OK 0 + +int exynos_secure_mode_enable(struct protected_mode_device *pdev) +{ + /* enable secure mode : TZPC */ + struct kbase_device *kbdev = pdev->data; + struct exynos_context *platform; + unsigned long flags; + int ret = 0; + + if (!kbdev) { + ret = -EINVAL; + goto secure_out; + } + + platform = (struct exynos_context *) kbdev->platform_context; + + spin_lock_irqsave(&platform->exynos_smc_lock, flags); + if (platform->exynos_smc_enabled) { + spin_unlock_irqrestore(&platform->exynos_smc_lock, flags); + goto secure_out; + } + + ret = exynos_smc(SMC_PROTECTION_SET, 0, PROT_G3D, SMC_PROTECTION_ENABLE); + + if (ret == GPU_SMC_TZPC_OK) { + platform->exynos_smc_enabled = true; + } + spin_unlock_irqrestore(&platform->exynos_smc_lock, flags); + + GPU_LOG(DVFS_INFO, LSI_SECURE_WORLD_ENTER, 0u, 0u, "LSI_SECURE_WORLD_ENTER\n"); + + if (ret == GPU_SMC_TZPC_OK) { + ret = 0; + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "%s: Enter Secure World by GPU\n", __func__); + } else { + GPU_LOG(DVFS_ERROR, LSI_GPU_SECURE, 0u, 0u, "%s: failed exynos_smc() ret : %d\n", __func__, ret); + } + +secure_out: + return ret; +} + +int exynos_secure_mode_disable(struct protected_mode_device *pdev) +{ + /* Turn off secure mode and reset GPU : TZPC */ + struct kbase_device *kbdev = pdev->data; + struct exynos_context *platform; + unsigned long flags; + int ret = 0; + + if (!kbdev) { + ret = -EINVAL; + goto secure_out; + } + + platform = (struct exynos_context *) kbdev->platform_context; + + spin_lock_irqsave(&platform->exynos_smc_lock, flags); + if (!platform->exynos_smc_enabled) { + spin_unlock_irqrestore(&platform->exynos_smc_lock, flags); + goto secure_out; + } + + ret = exynos_smc(SMC_PROTECTION_SET, 0, PROT_G3D, SMC_PROTECTION_DISABLE); + + if (ret == GPU_SMC_TZPC_OK) { + platform->exynos_smc_enabled = false; + } + spin_unlock_irqrestore(&platform->exynos_smc_lock, flags); + + GPU_LOG(DVFS_INFO, LSI_SECURE_WORLD_EXIT, 0u, 0u, "LSI_SECURE_WORLD_EXIT\n"); + + if (ret == GPU_SMC_TZPC_OK) { + ret = 0; + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "%s: Exit Secure World by GPU\n", __func__); + } else { + GPU_LOG(DVFS_ERROR, LSI_GPU_SECURE, 0u, 0u, "%s: failed exynos_smc() ret : %d\n", __func__, ret); + } + +secure_out: + return ret; +} + +#if IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY) +struct protected_mode_ops exynos_protected_ops = { + .protected_mode_enable = exynos_secure_mode_enable, + .protected_mode_disable = exynos_secure_mode_disable +}; + +static void kbasep_js_cacheclean(struct kbase_device *kbdev) +{ + /* Limit the number of loops to avoid a hang if the interrupt is missed */ + u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + + GPU_LOG(DVFS_INFO, LSI_SECURE_CACHE, 0u, 0u, "GPU CACHE WORKING for Secure Rendering\n"); + /* use GPU_COMMAND completion solution */ + /* clean the caches */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CLEAN_CACHES); + + /* wait for cache flush to complete before continuing */ + while (--max_loops && (kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & CLEAN_CACHES_COMPLETED) == 0) + ; + + /* clear the CLEAN_CACHES_COMPLETED irq */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), CLEAN_CACHES_COMPLETED); + GPU_LOG(DVFS_INFO, LSI_SECURE_CACHE_END, 0u, 0u, "GPU CACHE WORKING for Secure Rendering\n"); +} + +int kbase_jm_enter_protected_mode_exynos(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js) +{ + int err = 0; + + CSTD_UNUSED(katom); + CSTD_UNUSED(idx); + CSTD_UNUSED(js); + + if (kbase_gpu_atoms_submitted_any(kbdev)) + return -EAGAIN; + + if (kbdev->protected_ops) { + /* Switch GPU to protected mode */ + kbasep_js_cacheclean(kbdev); + err = exynos_secure_mode_enable(kbdev->protected_dev); + + if (err) + dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", err); + else + kbdev->protected_mode = true; + } + + return 0; +} + +int kbase_jm_exit_protected_mode_exynos(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js) +{ + int err = 0; + + CSTD_UNUSED(katom); + CSTD_UNUSED(idx); + CSTD_UNUSED(js); + + if (kbase_gpu_atoms_submitted_any(kbdev)) + return -EAGAIN; + + if (kbdev->protected_ops) { + /* Switch GPU to protected mode */ + kbasep_js_cacheclean(kbdev); + err = exynos_secure_mode_disable(kbdev->protected_dev); + + if (err) + dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", + err); + else + kbdev->protected_mode = false; + } + + return 0; +} + +int kbase_protected_mode_disable_exynos(struct kbase_device *kbdev) +{ + int err = 0; + + if (!kbdev) + return -EINVAL; + + if (kbdev->protected_mode == true) { + + /* Switch GPU to non-secure mode */ + err = exynos_secure_mode_disable(kbdev->protected_dev); + if (err) + dev_warn(kbdev->dev, "Failed to disable secure mode: %d\n", err); + else + kbdev->protected_mode = false; + } + + return err; +} +#elif IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_ARM) +/* + * Call back functions for PROTECTED_CALLBACKS + */ +static int exynos_secure_mode_enable_arm(struct protected_mode_device *pdev) +{ + int ret = 0; + struct kbase_device *kbdev = pdev->data; + + if (!kbdev) + return 0; + + ret = kbase_pm_protected_mode_enable(kbdev); + if (ret != 0) + return ret; + + return exynos_secure_mode_enable(pdev); +} + +static int exynos_secure_mode_disable_arm(struct protected_mode_device *pdev) +{ + int ret = 0; + struct kbase_device *kbdev = pdev->data; + + if (!kbdev) + return 0; + + ret = kbase_pm_protected_mode_disable(kbdev); + if (ret != 0) + return ret; + + return exynos_secure_mode_disable(pdev); +} + +struct protected_mode_ops exynos_protected_ops_arm = { + .protected_mode_enable = exynos_secure_mode_enable_arm, + .protected_mode_disable = exynos_secure_mode_disable_arm +}; +#endif /* CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY */ diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_protected_mode.h b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_protected_mode.h new file mode 100644 index 000000000000..08ea59982c55 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_protected_mode.h @@ -0,0 +1,42 @@ +/* + * + * (C) COPYRIGHT 2020 Samsung Electronics Inc. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _GPU_PROTECTED_MODE_H_ +#define _GPU_PROTECTED_MODE_H_ + +#if IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY) +#include + +int exynos_secure_mode_enable(struct protected_mode_device *pdev); + +int exynos_secure_mode_disable(struct protected_mode_device *pdev); + +int kbase_jm_enter_protected_mode_exynos(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js); + +int kbase_jm_exit_protected_mode_exynos(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js); + +int kbase_protected_mode_disable_exynos(struct kbase_device *kbdev); +#endif /* CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY */ + +#endif /* _GPU_PROTECTED_MODE_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_trace_defs.h b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_trace_defs.h new file mode 100644 index 000000000000..7655b478d243 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_trace_defs.h @@ -0,0 +1,73 @@ +/* drivers/gpu/arm/.../platform/gpu_treace_defs.h + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DDK porting layer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_trace_defs.h + * DDK porting layer. + */ + +#if 0 /* Dummy section to avoid breaking formatting */ +int dummy_array[] = { +#endif + + /* MALI_SEC_INTEGRATION */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_JM_IRQ), + KBASE_KTRACE_CODE_MAKE_CODE(LSI_JM_IRQ_E), + KBASE_KTRACE_CODE_MAKE_CODE(LSI_MMU_IRQ), + KBASE_KTRACE_CODE_MAKE_CODE(LSI_MMU_IRQ_E), + KBASE_KTRACE_CODE_MAKE_CODE(LSI_GPU_IRQ), + KBASE_KTRACE_CODE_MAKE_CODE(LSI_GPU_IRQ_E), + + KBASE_KTRACE_CODE_MAKE_CODE(KBASE_DEVICE_SUSPEND), + KBASE_KTRACE_CODE_MAKE_CODE(KBASE_DEVICE_RESUME), + KBASE_KTRACE_CODE_MAKE_CODE(KBASE_DEVICE_PM_SUSPEND), + KBASE_KTRACE_CODE_MAKE_CODE(KBASE_DEVICE_PM_RESUME), + KBASE_KTRACE_CODE_MAKE_CODE(KBASE_DEVICE_PM_WAIT_WQ_RUN), + KBASE_KTRACE_CODE_MAKE_CODE(KBASE_DEVICE_PM_WAIT_WQ_QUEUE_WORK), + KBASE_KTRACE_CODE_MAKE_CODE(LSI_GPU_RPM_RESUME_API), /* gpu on */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_GPU_RPM_SUSPEND_API), /* gpu off */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_SUSPEND_CALLBACK), /* suspend */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_GPU_ON), /* gpu on */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_GPU_OFF), /* gpu off */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_GPU_DVS_ON), /* gpu dvs on */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_GPU_DVS_OFF), /* gpu dvs off */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_RESUME), /* resume */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_CLOCK_VALUE), /* clock */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_TMU_VALUE), /* TMU LOCK info */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_REGISTER_DUMP), /* CMU & PMU info */ + + + KBASE_KTRACE_CODE_MAKE_CODE(LSI_SECURE_WORLD_ENTER), /* SECURE RENDERING START */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_SECURE_WORLD_EXIT), /* SECURE RENDERING END */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_SECURE_CACHE), /* SECURE RENDERING CACHE FLUSH */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_SECURE_CACHE_END), /* SECURE RENDERING CACHE FLUSH END */ + + KBASE_KTRACE_CODE_MAKE_CODE(LSI_KBASE_PM_INIT_HW), /* SECURE RENDERING END */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_IFPM_POWER_ON), /* IFPM Power on */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_IFPM_POWER_OFF), /* IFPM Power off */ + + KBASE_KTRACE_CODE_MAKE_CODE(LSI_RESUME_CHECK), /* is resume check */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_RESUME_FREQ), /* resume freq check */ + + KBASE_KTRACE_CODE_MAKE_CODE(LSI_GPU_MAX_LOCK), /* GPU MAX CLOCK LOCK */ + KBASE_KTRACE_CODE_MAKE_CODE(LSI_GPU_MIN_LOCK), /* GPU MIN CLOCK LOCK */ + + KBASE_KTRACE_CODE_MAKE_CODE(LSI_GPU_SECURE), /* GPU Secure Rendering */ + + KBASE_KTRACE_CODE_MAKE_CODE(LSI_RESET_GPU_EARLY_DUPE), + KBASE_KTRACE_CODE_MAKE_CODE(LSI_RESET_RACE_DETECTED_EARLY_OUT), + KBASE_KTRACE_CODE_MAKE_CODE(LSI_PM_SUSPEND), + + KBASE_KTRACE_CODE_MAKE_CODE(LSI_KATOM_REMOVED), +#if 0 +}; +#endif diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_utilization.c b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_utilization.c new file mode 100644 index 000000000000..a1effd878508 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/gpu_utilization.c @@ -0,0 +1,144 @@ +/* drivers/gpu/arm/.../platform/gpu_utilization.c + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series DVFS driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file gpu_utilization.c + * DVFS + */ + +#include + +#include "mali_kbase_platform.h" +#include "gpu_control.h" +#include "gpu_dvfs_handler.h" +#include "gpu_ipa.h" + +extern struct kbase_device *pkbdev; + +/* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_DVFS +extern int gpu_pm_get_dvfs_utilisation(struct kbase_device *kbdev, int *, int *); +static void gpu_dvfs_update_utilization(struct kbase_device *kbdev) +{ + unsigned long flags; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + +#if defined(CONFIG_MALI_DVFS) && defined(CONFIG_CPU_THERMAL_IPA) + if (platform->time_tick < platform->gpu_dvfs_time_interval) { + platform->time_tick++; + platform->time_busy += kbdev->pm.backend.metrics.values.time_busy; + platform->time_idle += kbdev->pm.backend.metrics.values.time_idle; + } else { + platform->time_busy = kbdev->pm.backend.metrics.values.time_busy; + platform->time_idle = kbdev->pm.backend.metrics.values.time_idle; + platform->time_tick = 0; + } +#endif /* CONFIG_MALI_DVFS && CONFIG_CPU_THERMAL_IPA */ + + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + + platform->env_data.utilization = gpu_pm_get_dvfs_utilisation(kbdev, 0, 0); + + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + +#if defined(CONFIG_MALI_DVFS) && defined(CONFIG_CPU_THERMAL_IPA) + gpu_ipa_dvfs_calc_norm_utilisation(kbdev); +#endif /* CONFIG_MALI_DVFS && CONFIG_CPU_THERMAL_IPA */ +} +#endif /* CONFIG_MALI_DVFS */ + +int gpu_dvfs_start_env_data_gathering(struct kbase_device *kbdev) +{ + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + return 0; +} + +int gpu_dvfs_stop_env_data_gathering(struct kbase_device *kbdev) +{ + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + return 0; +} + +#ifdef CONFIG_MALI_DVFS +int gpu_dvfs_reset_env_data(struct kbase_device *kbdev) +{ + unsigned long flags; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + /* reset gpu utilization value */ + spin_lock_irqsave(&platform->gpu_dvfs_spinlock, flags); + kbdev->pm.backend.metrics.values.time_idle = kbdev->pm.backend.metrics.values.time_idle + kbdev->pm.backend.metrics.values.time_busy; + kbdev->pm.backend.metrics.values.time_busy = 0; + spin_unlock_irqrestore(&platform->gpu_dvfs_spinlock, flags); + + return 0; +} + +int gpu_dvfs_calculate_env_data(struct kbase_device *kbdev) +{ + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + static int polling_period; + + DVFS_ASSERT(platform); + + gpu_dvfs_update_utilization(kbdev); + + polling_period -= platform->polling_speed; + if (polling_period > 0) + return 0; + + if (platform->dvs_is_enabled == true) + return 0; + + return 0; +} +#endif + +int gpu_dvfs_calculate_env_data_ppmu(struct kbase_device *kbdev) +{ + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + return 0; +} + +int gpu_dvfs_utilization_init(struct kbase_device *kbdev) +{ + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "utilization module initialized\n"); + + return 0; +} + +int gpu_dvfs_utilization_deinit(struct kbase_device *kbdev) +{ + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + DVFS_ASSERT(platform); + + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "utilization module de-initialized\n"); + + return 0; +} diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_clk_rate_trace.c b/drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_clk_rate_trace.c new file mode 100644 index 000000000000..09e3bf8beff7 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_clk_rate_trace.c @@ -0,0 +1,92 @@ +/* + * + * (C) COPYRIGHT 2015, 2017-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include "mali_kbase_config_platform.h" +#include "gpu_control.h" + +/* + * Exynos is not using standarized clock framework to handle GPU clock, + * so a custom implementation to handle GPU clock change notifications + * are needed. + * + * Exynos platform only uses one clock, and there is no suitable + * clock struct to be used as "gpu_clk_handle". + * We therefor use the platform_context as the one and only clock handle + * + * A short-cut is taken with the registration of callbacks, as we only + * allow one callback to be installed at any time. This should be enough + * for now at least. + */ +static void *enumerate_gpu_clk(struct kbase_device *kbdev, + unsigned int index) +{ + void *ret = NULL; + + if (index == 0) + ret = kbdev->platform_context; + + return ret; +} + +static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev, + void *gpu_clk_handle) +{ + struct exynos_context *platform = (struct exynos_context *)gpu_clk_handle; + return gpu_get_cur_clock(platform); +} + +static int gpu_clk_notifier_register(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb) +{ + struct exynos_context *platform = (struct exynos_context *)gpu_clk_handle; + + compiletime_assert(offsetof(struct clk_notifier_data, clk) == + offsetof(struct kbase_gpu_clk_notifier_data, gpu_clk_handle), + "mismatch in the offset of clk member"); + + compiletime_assert(sizeof(((struct clk_notifier_data *)0)->clk) == + sizeof(((struct kbase_gpu_clk_notifier_data *)0)->gpu_clk_handle), + "mismatch in the size of clk member"); + + if (platform->nb_clock_change != NULL) + return -EEXIST; /* callback already installed, do currently not support multiple */ + + platform->nb_clock_change = nb; + return 0; +} + +static void gpu_clk_notifier_unregister(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb) +{ + struct exynos_context *platform = (struct exynos_context *)gpu_clk_handle; + platform->nb_clock_change = NULL; +} + +struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops = { + .get_gpu_clk_rate = get_gpu_clk_rate, + .enumerate_gpu_clk = enumerate_gpu_clk, + .gpu_clk_notifier_register = gpu_clk_notifier_register, + .gpu_clk_notifier_unregister = gpu_clk_notifier_unregister, +}; diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_config_platform.h b/drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_config_platform.h new file mode 100644 index 000000000000..c9828967439a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_config_platform.h @@ -0,0 +1,97 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * Maximum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ +#define GPU_FREQ_KHZ_MAX (5000) +/** + * Minimum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ +#define GPU_FREQ_KHZ_MIN (5000) + +/** + * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock + * + * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func + * for the function prototype. + * + * Attached value: A kbase_cpu_clk_speed_func. + * Default Value: NA + */ +#define CPU_SPEED_FUNC (NULL) + +/** + * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock + * + * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func + * for the function prototype. + * + * Attached value: A kbase_gpu_clk_speed_func. + * Default Value: NA + */ +#define GPU_SPEED_FUNC (NULL) + +/** + * Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ +#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ +/* MALI_SEC_INTEGRATION */ +#define PLATFORM_FUNCS (&platform_funcs) + +/** Power model for IPA + * + * Attached value: pointer to @ref mali_pa_model_ops + */ +#define POWER_MODEL_CALLBACKS (NULL) + +extern struct kbase_pm_callback_conf pm_callbacks; +extern struct kbase_platform_funcs_conf platform_funcs; + +/** + * Secure mode switch + * + * Attached value: pointer to @ref kbase_secure_ops + */ +#if IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY) +#define PROTECTED_CALLBACKS (&exynos_protected_ops) +extern struct protected_mode_ops exynos_protected_ops; +#elif IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_ARM) +#define PROTECTED_CALLBACKS (&exynos_protected_ops_arm) +extern struct protected_mode_ops exynos_protected_ops_arm; +#endif + +#define CLK_RATE_TRACE_OPS (&clk_rate_trace_ops) +extern struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops; diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_platform.c b/drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_platform.c new file mode 100644 index 000000000000..e79a992cd08a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_platform.c @@ -0,0 +1,703 @@ +/* drivers/gpu/arm/.../platform/mali_kbase_platform.c + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series platform-dependent codes + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file mali_kbase_platform.c + * Platform-dependent init. + */ + +#include + +#include "mali_kbase_platform.h" +#include "gpu_custom_interface.h" +#include "gpu_dvfs_handler.h" +#include "gpu_notifier.h" +#include "gpu_dvfs_governor.h" +#include "gpu_control.h" + +#ifdef CONFIG_OF +#include +#endif +#ifdef CONFIG_MALI_DVFS +#ifdef CONFIG_CAL_IF +#include +#endif +static gpu_dvfs_info gpu_dvfs_table_default[DVFS_TABLE_ROW_MAX]; +#endif + +#include + +#ifdef CONFIG_EXYNOS9630_BTS +#include +#endif + +struct kbase_device *pkbdev; +static int gpu_debug_level; +static int gpu_trace_level; + +struct kbase_device *gpu_get_device_structure(void) +{ + return pkbdev; +} + +void gpu_set_debug_level(int level) +{ + gpu_debug_level = level; +} + +int gpu_get_debug_level(void) +{ + return gpu_debug_level; +} + +#ifdef CONFIG_MALI_EXYNOS_TRACE +struct kbase_ktrace_msg exynos_trace_rbuf[KBASE_KTRACE_SIZE]; +extern const struct file_operations kbasep_ktrace_debugfs_fops; +static int gpu_trace_init(struct kbase_device *kbdev) +{ + kbdev->ktrace.rbuf = exynos_trace_rbuf; + + spin_lock_init(&kbdev->ktrace.lock); + +/* below work : register entry from making debugfs create file to trace_dentry + * is same work as kbasep_trace_debugfs_init */ +#ifdef MALI_SEC_INTEGRATION + kbdev->trace_dentry = debugfs_create_file("mali_trace", S_IRUGO, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_ktrace_debugfs_fops); +#endif /* MALI_SEC_INTEGRATION */ + return 0; +} + +void gpu_set_trace_level(int level) +{ + int i; + + if (level == TRACE_ALL) { + for (i = TRACE_NONE + 1; i < TRACE_ALL; i++) + gpu_trace_level |= (1U << i); + } else if (level == TRACE_NONE) { + gpu_trace_level = TRACE_NONE; + } else { + gpu_trace_level |= (1U << level); + } +} + +bool gpu_check_trace_level(int level) +{ + if (gpu_trace_level & (1U << level)) + return true; + return false; +} + +bool gpu_check_trace_code(int code) +{ + int level; + switch (code) { + case KBASE_KTRACE_CODE(DUMMY): + return false; + case KBASE_KTRACE_CODE(LSI_CLOCK_VALUE): + case KBASE_KTRACE_CODE(LSI_GPU_MAX_LOCK): + case KBASE_KTRACE_CODE(LSI_GPU_MIN_LOCK): + case KBASE_KTRACE_CODE(LSI_SECURE_WORLD_ENTER): + case KBASE_KTRACE_CODE(LSI_SECURE_WORLD_EXIT): + case KBASE_KTRACE_CODE(LSI_SECURE_CACHE): + case KBASE_KTRACE_CODE(LSI_SECURE_CACHE_END): + case KBASE_KTRACE_CODE(LSI_KBASE_PM_INIT_HW): + case KBASE_KTRACE_CODE(LSI_RESUME_CHECK): + case KBASE_KTRACE_CODE(LSI_RESUME_FREQ): + case KBASE_KTRACE_CODE(LSI_IFPM_POWER_ON): + case KBASE_KTRACE_CODE(LSI_IFPM_POWER_OFF): + level = TRACE_CLK; + break; + level = TRACE_VOL; + break; + case KBASE_KTRACE_CODE(LSI_GPU_ON): + case KBASE_KTRACE_CODE(LSI_GPU_OFF): + case KBASE_KTRACE_CODE(LSI_RESET_GPU_EARLY_DUPE): + case KBASE_KTRACE_CODE(LSI_RESET_RACE_DETECTED_EARLY_OUT): + case KBASE_KTRACE_CODE(LSI_PM_SUSPEND): + case KBASE_KTRACE_CODE(LSI_RESUME): + case KBASE_KTRACE_CODE(LSI_GPU_RPM_RESUME_API): + case KBASE_KTRACE_CODE(LSI_GPU_RPM_SUSPEND_API): + case KBASE_KTRACE_CODE(LSI_SUSPEND_CALLBACK): + case KBASE_KTRACE_CODE(KBASE_DEVICE_SUSPEND): + case KBASE_KTRACE_CODE(KBASE_DEVICE_RESUME): + case KBASE_KTRACE_CODE(KBASE_DEVICE_PM_WAIT_WQ_RUN): + case KBASE_KTRACE_CODE(KBASE_DEVICE_PM_WAIT_WQ_QUEUE_WORK): + case KBASE_KTRACE_CODE(LSI_TMU_VALUE): + level = TRACE_NOTIFIER; + break; + case KBASE_KTRACE_CODE(LSI_REGISTER_DUMP): + level = TRACE_DUMP; + break; + default: + level = TRACE_DEFAULT; + break; + } + return gpu_check_trace_level(level); +} +#endif /* CONFIG_MALI_EXYNOS_TRACE */ + +void gpu_update_config_data_bool(struct device_node *np, const char *of_string, bool *of_data) +{ + int of_data_int; + + if (!of_string || !of_data) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "NULL: failed to get item from dt\n"); + return; + } + + if (of_property_read_u32(np, of_string, &of_data_int)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: failed to get item from dt. Data will be set to 0.\n", of_string); + of_data_int = 0; + } else { + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "%s: %d\n", of_string, of_data_int); + } + + *of_data = (bool)of_data_int; + + return; +} + +void gpu_update_config_data_int(struct device_node *np, const char *of_string, int *of_data) +{ + if (!of_string || !of_data) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "NULL: failed to get item from dt\n"); + return; + } + + if (of_property_read_u32(np, of_string, of_data)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: failed to get item from dt. Data will be set to 0.\n", of_string); + *of_data = 0; + } else { + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "%s: %d\n", of_string, *of_data); + } +} + +void gpu_update_config_data_string(struct device_node *np, const char *of_string, const char **of_data) +{ + if (!of_string || !of_data) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "NULL: failed to get item from dt\n"); + return; + } + + if (of_property_read_string(np, of_string, of_data)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: failed to get item from dt. Data will be set to NULL.\n", of_string); + *of_data = NULL; + } else { + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "%s: %s\n", of_string, *of_data); + } +} + +void gpu_update_config_data_int_array(struct device_node *np, const char *of_string, int *of_data, int sz) +{ + int i; + + if (!of_string || !of_data) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "NULL: failed to get item from dt\n"); + return; + } + + if (sz > OF_DATA_NUM_MAX) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "size overflow(%d): failed to get item from dt\n", sz); + return; + } + + if (of_property_read_u32_array(np, of_string, of_data, sz)) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "%s: failed to get item from dt\n", of_string); + } else { + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "[%s]", of_string); + for (i = 0; i < sz; i++) { + if (i % 7 == 0) + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "\n"); + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "%d\t\n", of_data[i]); + } + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "\n"); + } +} +static int gpu_dvfs_update_config_data_from_dt(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_DVFS + int i; + int of_data_int_array[OF_DATA_NUM_MAX]; + int of_data_int; + const char *of_string; +#endif + struct device_node *np = kbdev->dev->of_node; + struct exynos_context *platform = (struct exynos_context *) kbdev->platform_context; + + gpu_update_config_data_int(np, "gpu_debug_level", &gpu_debug_level); + +#ifdef CONFIG_MALI_EXYNOS_TRACE + gpu_update_config_data_int(np, "gpu_trace_level", &gpu_trace_level); + gpu_set_trace_level(gpu_trace_level); +#endif + +#ifdef CONFIG_MALI_DVFS + gpu_update_config_data_int(np, "g3d_cmu_cal_id", &platform->g3d_cmu_cal_id); + gpu_update_config_data_string(np, "governor", &of_string); + if (!strncmp("interactive", of_string, strlen("interactive"))) { + platform->governor_type = G3D_DVFS_GOVERNOR_INTERACTIVE; + gpu_update_config_data_int_array(np, "interactive_info", of_data_int_array, 3); + platform->interactive.highspeed_clock = of_data_int_array[0] == 0 ? 500 : (u32) of_data_int_array[0]; + platform->interactive.highspeed_load = of_data_int_array[1] == 0 ? 100 : (u32) of_data_int_array[1]; + platform->interactive.highspeed_delay = of_data_int_array[2] == 0 ? 0 : (u32) of_data_int_array[2]; + } else if (!strncmp("static", of_string, strlen("static"))) { + platform->governor_type = G3D_DVFS_GOVERNOR_STATIC; + } else if (!strncmp("booster", of_string, strlen("booster"))) { + platform->governor_type = G3D_DVFS_GOVERNOR_BOOSTER; + } else if (!strncmp("dynamic", of_string, strlen("dynamic"))) { + platform->governor_type = G3D_DVFS_GOVERNOR_DYNAMIC; + } else { + platform->governor_type = G3D_DVFS_GOVERNOR_DEFAULT; + } + +#ifdef CONFIG_CAL_IF + platform->gpu_dvfs_start_clock = cal_dfs_get_boot_freq(platform->g3d_cmu_cal_id); + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "get g3d start clock from ect : %d\n", platform->gpu_dvfs_start_clock); +#else + gpu_update_config_data_int(np, "gpu_dvfs_start_clock", &platform->gpu_dvfs_start_clock); +#endif + gpu_update_config_data_int_array(np, "gpu_dvfs_table_size", of_data_int_array, 2); + for (i = 0; i < G3D_MAX_GOVERNOR_NUM; i++) { + gpu_dvfs_update_start_clk(i, platform->gpu_dvfs_start_clock); + gpu_dvfs_update_table(i, gpu_dvfs_table_default); + gpu_dvfs_update_table_size(i, of_data_int_array[0]); + } + + gpu_update_config_data_int(np, "gpu_pmqos_cpu_cluster_num", &platform->gpu_pmqos_cpu_cluster_num); + gpu_update_config_data_int(np, "gpu_max_clock", &platform->gpu_max_clock); +#ifdef CONFIG_CAL_IF + platform->gpu_max_clock_limit = (int)cal_dfs_get_max_freq(platform->g3d_cmu_cal_id); +#else + gpu_update_config_data_int(np, "gpu_max_clock_limit", &platform->gpu_max_clock_limit); +#endif + gpu_update_config_data_int(np, "gpu_min_clock", &platform->gpu_min_clock); + gpu_update_config_data_int(np, "gpu_dvfs_bl_config_clock", &platform->gpu_dvfs_config_clock); + gpu_update_config_data_int(np, "gpu_default_voltage", &platform->gpu_default_vol); + gpu_update_config_data_int(np, "gpu_cold_minimum_vol", &platform->cold_min_vol); + gpu_update_config_data_int(np, "gpu_voltage_offset_margin", &platform->gpu_default_vol_margin); + gpu_update_config_data_bool(np, "gpu_tmu_control", &platform->tmu_status); + gpu_update_config_data_int(np, "gpu_temp_throttling_level_num", &of_data_int); + if (of_data_int == TMU_LOCK_CLK_END) + gpu_update_config_data_int_array(np, "gpu_temp_throttling", platform->tmu_lock_clk, TMU_LOCK_CLK_END); + else + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "mismatch tmu lock table size: %d, %d\n", + of_data_int, TMU_LOCK_CLK_END); +#ifdef CONFIG_CPU_THERMAL_IPA + gpu_update_config_data_int(np, "gpu_power_coeff", &platform->ipa_power_coeff_gpu); + gpu_update_config_data_int(np, "gpu_dvfs_time_interval", &platform->gpu_dvfs_time_interval); +#endif /* CONFIG_CPU_THERMAL_IPA */ + gpu_update_config_data_bool(np, "gpu_default_wakeup_lock", &platform->wakeup_lock); + gpu_update_config_data_bool(np, "gpu_dynamic_abb", &platform->dynamic_abb_status); + gpu_update_config_data_int(np, "gpu_dvfs_polling_time", &platform->polling_speed); + gpu_update_config_data_bool(np, "gpu_pmqos_int_disable", &platform->pmqos_int_disable); + gpu_update_config_data_int(np, "gpu_pmqos_mif_max_clock", &platform->pmqos_mif_max_clock); + gpu_update_config_data_int(np, "gpu_pmqos_mif_max_clock_base", &platform->pmqos_mif_max_clock_base); + gpu_update_config_data_int(np, "gpu_cl_dvfs_start_base", &platform->cl_dvfs_start_base); +#endif /* CONFIG_MALI_DVFS */ + gpu_update_config_data_bool(np, "gpu_early_clk_gating", &platform->early_clk_gating_status); +#ifdef CONFIG_MALI_RT_PM + gpu_update_config_data_bool(np, "gpu_dvs", &platform->dvs_status); + gpu_update_config_data_bool(np, "gpu_inter_frame_pm", &platform->inter_frame_pm_feature); +#else + platform->dvs_status = 0; + platform->inter_frame_pm_feature = 0; +#endif + gpu_update_config_data_int(np, "gpu_runtime_pm_delay_time", &platform->runtime_pm_delay_time); + +#ifdef CONFIG_EXYNOS_BTS + gpu_update_config_data_int(np, "gpu_mo_min_clock", &platform->mo_min_clock); +#ifdef CONFIG_EXYNOS9630_BTS + platform->bts_scen_idx = bts_get_scenindex("g3d_performance"); +#endif +#ifdef CONFIG_MALI_CAMERA_EXT_BTS + platform->bts_camera_ext_idx= bts_get_scenindex("camera_ext"); + platform->is_set_bts_camera_ext= 0; +#endif +#endif + gpu_update_config_data_int(np, "gpu_boost_gpu_min_lock", &platform->boost_gpu_min_lock); + gpu_update_config_data_int(np, "gpu_boost_egl_min_lock", &platform->boost_egl_min_lock); +#ifdef CONFIG_MALI_SEC_VK_BOOST + gpu_update_config_data_int(np, "gpu_vk_boost_max_lock", &platform->gpu_vk_boost_max_clk_lock); + gpu_update_config_data_int(np, "gpu_vk_boost_mif_min_lock", &platform->gpu_vk_boost_mif_min_clk_lock); +#endif + +#ifdef CONFIG_MALI_SUSTAINABLE_OPT + gpu_update_config_data_int_array(np, "gpu_sustainable_info", of_data_int_array, 5); + for (i = 0; i < 5; i++) { + platform->sustainable.info_array[i] = of_data_int_array[i] == 0 ? 0 : (u32) of_data_int_array[i]; + } +#endif + gpu_update_config_data_bool(np, "gpu_bts_support", &platform->gpu_bts_support); + gpu_update_config_data_int(np, "gpu_set_pmu_duration_reg", &platform->gpu_set_pmu_duration_reg); + gpu_update_config_data_int(np, "gpu_set_pmu_duration_val", &platform->gpu_set_pmu_duration_val); + +#ifdef CONFIG_MALI_DVFS + gpu_update_config_data_string(np, "g3d_genpd_name", &of_string); + if (of_string) + strncpy(platform->g3d_genpd_name, of_string, sizeof(platform->g3d_genpd_name)); +#endif + platform->gpu_dss_freq_id = 0; + gpu_update_config_data_int(np, "gpu_ess_id_type", &platform->gpu_dss_freq_id); + + return 0; +} + +#ifdef CONFIG_MALI_DVFS +static int gpu_dvfs_update_asv_table(struct kbase_device *kbdev) +{ + struct exynos_context *platform = kbdev->platform_context; + gpu_dvfs_info *dvfs_table; + struct dvfs_rate_volt g3d_rate_volt[48]; + int cal_get_dvfs_lv_num; + int cal_table_size; + int of_data_int_array[OF_DATA_NUM_MAX]; + int dvfs_table_row_num = 0, dvfs_table_col_num = 0; + int dvfs_table_size = 0; + int table_idx; + struct device_node *np; + int i, j, cal_freq, cal_vol; + + np = kbdev->dev->of_node; + gpu_update_config_data_int_array(np, "gpu_dvfs_table_size", of_data_int_array, 2); + + dvfs_table_row_num = of_data_int_array[0]; + dvfs_table_col_num = of_data_int_array[1]; + dvfs_table_size = dvfs_table_row_num * dvfs_table_col_num; + + if (dvfs_table_size > OF_DATA_NUM_MAX) { + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "dvfs_table size is not enough\n"); + return -1; + } + dvfs_table = gpu_dvfs_table_default; + + cal_get_dvfs_lv_num = cal_dfs_get_lv_num(platform->g3d_cmu_cal_id); + cal_table_size = cal_dfs_get_rate_asv_table(platform->g3d_cmu_cal_id, g3d_rate_volt); + if (!cal_table_size) + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "Failed to get G3D ASV table\n"); + + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "ECT table(%d) and gpu driver(%d)\n", + cal_get_dvfs_lv_num, dvfs_table_row_num); + + gpu_update_config_data_int_array(np, "gpu_dvfs_table", of_data_int_array, dvfs_table_size); + + for (i = 0; i < cal_get_dvfs_lv_num; i++) { + cal_freq = g3d_rate_volt[i].rate; + cal_vol = g3d_rate_volt[i].volt; + if (cal_freq <= platform->gpu_max_clock && cal_freq >= platform->gpu_min_clock) { + for (j = 0; j < dvfs_table_row_num; j++) { + table_idx = j * dvfs_table_col_num; + // Compare cal_freq with DVFS table freq + if (cal_freq == of_data_int_array[table_idx]) { + dvfs_table[j].clock = cal_freq; + dvfs_table[j].voltage = cal_vol; + dvfs_table[j].min_threshold = of_data_int_array[table_idx+1]; + dvfs_table[j].max_threshold = of_data_int_array[table_idx+2]; + dvfs_table[j].down_staycount = of_data_int_array[table_idx+3]; + dvfs_table[j].mem_freq = of_data_int_array[table_idx+4]; + dvfs_table[j].cpu_little_min_freq = of_data_int_array[table_idx+5]; + GPU_LOG(DVFS_WARNING, DUMMY, 0u, 0u, "G3D %7dKhz ASV is %duV\n", cal_freq, cal_vol); + if (platform->gpu_pmqos_cpu_cluster_num == 3) { + dvfs_table[j].cpu_middle_min_freq = of_data_int_array[table_idx+6]; + dvfs_table[j].cpu_big_max_freq = (of_data_int_array[table_idx+7] ? of_data_int_array[table_idx+7]:CPU_MAX); + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "up [%d] down [%d] staycnt [%d] mif [%d] lit [%d] mid [%d] big [%d]\n", + dvfs_table[j].max_threshold, dvfs_table[j].min_threshold, dvfs_table[j].down_staycount, + dvfs_table[j].mem_freq, dvfs_table[j].cpu_little_min_freq, dvfs_table[j].cpu_middle_min_freq, + dvfs_table[j].cpu_big_max_freq); + } else { + //Assuming cpu cluster number is 2 + dvfs_table[j].cpu_big_max_freq = (of_data_int_array[table_idx+6] ? of_data_int_array[table_idx+6]:CPU_MAX); + GPU_LOG(DVFS_INFO, DUMMY, 0u, 0u, "up [%d] down [%d] staycnt [%d] mif [%d] lit [%d] big [%d]\n", + dvfs_table[j].max_threshold, dvfs_table[j].min_threshold, dvfs_table[j].down_staycount, + dvfs_table[j].mem_freq, dvfs_table[j].cpu_little_min_freq, dvfs_table[j].cpu_big_max_freq); + } + } + } + } + } + return 0; +} +#endif + +static int gpu_context_init(struct kbase_device *kbdev) +{ + struct exynos_context *platform; + struct mali_base_gpu_core_props *core_props; + + platform = kmalloc(sizeof(struct exynos_context), GFP_KERNEL); + + if (platform == NULL) + return -1; + + memset(platform, 0, sizeof(struct exynos_context)); + kbdev->platform_context = (void *) platform; + pkbdev = kbdev; + + mutex_init(&platform->gpu_clock_lock); + mutex_init(&platform->gpu_dvfs_handler_lock); + spin_lock_init(&platform->gpu_dvfs_spinlock); + +#if (defined(CONFIG_SCHED_EMS) || defined(CONFIG_SCHED_EHMP) || defined(CONFIG_SCHED_HMP)) + mutex_init(&platform->gpu_sched_hmp_lock); + platform->ctx_need_qos = false; +#endif + +#ifdef CONFIG_MALI_SEC_VK_BOOST + mutex_init(&platform->gpu_vk_boost_lock); + platform->ctx_vk_need_qos = false; +#endif + + gpu_dvfs_update_config_data_from_dt(kbdev); +#ifdef CONFIG_MALI_DVFS + gpu_dvfs_update_asv_table(kbdev); +#endif + + core_props = &(kbdev->gpu_props.props.core_props); + core_props->gpu_freq_khz_max = platform->gpu_max_clock * 1000; + +#if MALI_SEC_PROBE_TEST != 1 + kbdev->vendor_callbacks = (struct kbase_vendor_callbacks *)gpu_get_callbacks(); +#endif + +#ifdef CONFIG_MALI_EXYNOS_TRACE + if (gpu_trace_init(kbdev) != 0) + return -1; +#endif + +#ifdef CONFIG_MALI_ASV_CALIBRATION_SUPPORT + platform->gpu_auto_cali_status = false; +#endif + + platform->inter_frame_pm_status = platform->inter_frame_pm_feature; + +#if IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY) || IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_ARM) + spin_lock_init(&platform->exynos_smc_lock); +#endif + + return 0; +} + +#ifdef CONFIG_MALI_GPU_CORE_MASK_SELECTION +static void gpu_core_mask_set(struct kbase_device *kbdev) +{ + u64 default_core_mask = 0x0; + void __iomem *core_fused_reg; + u64 temp, core_info; + u64 val; + u64 core_stack[8] = {0, }; + int i = 0; + void __iomem *lotid_fused_reg; + u64 lotid_val, lotid_info; + + lotid_fused_reg = ioremap(0x10000004, SZ_8K); + lotid_val = __raw_readl(lotid_fused_reg); + lotid_info = lotid_val & 0xFFFFF; + + if (lotid_info == 0x3A8D3) { /* core mask code for KC first lot */ + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "[GPU] first lot!!!\n"); + core_fused_reg = ioremap(0x1000903c, SZ_8K); /* GPU DEAD CORE Info */ + val = __raw_readl(core_fused_reg); + + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "[GPU] core fused reg info, Addr[0x%llx], Data[0x%llx]\n", (unsigned long long)core_fused_reg, val); + core_info = (val >> 8) & 0xFFFFF; + + if (core_info) { /* has dead core more 1-core */ + temp = (~core_info) & 0xFFFFF; + + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "[GPU] core last info = 0x%llx\n", temp); + core_stack[0] = temp & 0xF; /* core 0, 1, 2, 3 */ + core_stack[1] = (temp & 0x70) >> 4; /* core 4, 5, 6 */ + core_stack[2] = (temp & 0x380) >> 7; /* core 7, 8, 9 */ + core_stack[4] = (temp & 0x3C00) >> 10; /* core 10, 11, 12, 13 */ + core_stack[5] = (temp & 0x1C000) >> 14; /* core 14, 15, 16 */ + core_stack[6] = (temp & 0xE0000) >> 17; /* core 17, 18, 19 */ + + for (i = 0; i < 8; i++) { + if (i == 3 || i == 7) + continue; + + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "[GPU] before core stack[%d] = 0x%llx\n", i, core_stack[i]); + if (core_stack[i] == 0xb) + core_stack[i] = 3; /* 0b1011 */ + if (core_stack[i] == 0xd) + core_stack[i] = 1; /* 0b1101 */ + if (core_stack[i] == 0x9) + core_stack[i] = 1; /* 0b1001 */ + if (core_stack[i] == 0x5) + core_stack[i] = 1; /* 0b101 */ + if (!(core_stack[i] == 0x1 || core_stack[i] == 0x3 || core_stack[i] == 0x7 || core_stack[i] == 0xf)) + core_stack[i] = 0; + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "[GPU] after core stack[%d] = 0x%llx\n", i, core_stack[i]); + + if (i < 4) { + default_core_mask |= (((core_stack[i] >> 0) & 0x1) << (0 + i)); + default_core_mask |= (((core_stack[i] >> 1) & 0x1) << (4 + i)); + default_core_mask |= (((core_stack[i] >> 2) & 0x1) << (8 + i)); + default_core_mask |= (((core_stack[i] >> 3) & 0x1) << (12 + i)); + } else { + default_core_mask |= (((core_stack[i] >> 0) & 0x1) << (16 + i - 4)); + default_core_mask |= (((core_stack[i] >> 1) & 0x1) << (20 + i - 4)); + default_core_mask |= (((core_stack[i] >> 2) & 0x1) << (24 + i - 4)); + default_core_mask |= (((core_stack[i] >> 3) & 0x1) << (28 + i - 4)); + } + } + kbdev->pm.debug_core_mask_info = default_core_mask; + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "[GPU] has dead core!, normal core mask = 0x%llx\n", default_core_mask); + } else { + kbdev->pm.debug_core_mask_info = 0x17771777; + } + } else { /* Have to use this code since 'KC second lot' release */ + core_fused_reg = ioremap(0x1000A024, SZ_1K); /* GPU DEAD CORE Info */ + val = __raw_readl(core_fused_reg); + + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "[GPU] core fused reg info, Addr[0x%llx], Data[0x%llx]\n", (unsigned long long)core_fused_reg, val); + core_info = val; + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "[GPU] core shift info = 0x%llx\n", core_info); + + if (core_info) { /* has dead core more 1-core */ + temp = (~core_info) & 0x17771777; + + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "[GPU] core last info = 0x%llx\n", temp); + core_stack[0] = temp & 0x1111; /* core 0, 1, 2, 3 */ + core_stack[1] = (temp & 0x222); /* core 4, 5, 6 */ + core_stack[2] = (temp & 0x444); /* core 7, 8, 9 */ + core_stack[4] = (temp & 0x11110000) >> 16; /* core 10, 11, 12, 13 */ + core_stack[5] = (temp & 0x2220000) >> 16; /* core 14, 15, 16 */ + core_stack[6] = (temp & 0x4440000) >> 16; /* core 17, 18, 19 */ + + for (i = 0; i < 8; i++) { + if (i == 3 || i == 7) + continue; + + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "[GPU] before core stack[%d] = 0x%llx\n", i, core_stack[i]); + if(i==1 || i==5) core_stack[i] = core_stack[i] >> 1; + if(i==2 || i==6) core_stack[i] = core_stack[i] >> 2; + if (core_stack[i] == 0x1011) + core_stack[i] = 0x0011; /* 0b1011 */ + if (core_stack[i] == 0x1101) + core_stack[i] = 0x0001; /* 0b1101 */ + if (core_stack[i] == 0x1001) + core_stack[i] = 0x0001; /* 0b1001 */ + if (core_stack[i] == 0x101) + core_stack[i] = 0x0001; /* 0b101 */ + if (!(core_stack[i] == 0x1 || core_stack[i] == 0x11 || core_stack[i] == 0x111 || core_stack[i] == 0x1111)) + core_stack[i] = 0; + if (i == 1 || i == 5) + core_stack[i] = core_stack[i] << 1; + if (i == 2 || i == 6) + core_stack[i] = core_stack[i] << 2; + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "[GPU] after core stack[%d] = 0x%llx\n", i, core_stack[i]); + + if (i < 4) { + default_core_mask |= core_stack[i]; + } else { + default_core_mask |= (core_stack[i]<<16); + } + } + kbdev->pm.debug_core_mask_info = default_core_mask; + GPU_LOG(DVFS_ERROR, DUMMY, 0u, 0u, "[GPU] has dead core!, normal core mask = 0x%llx\n", default_core_mask); + } else { + kbdev->pm.debug_core_mask_info = 0x17771777; + } + } + iounmap(core_fused_reg); + iounmap(lotid_fused_reg); +} +#endif + +/** + ** Exynos5 hardware specific initialization + **/ +static int kbase_platform_exynos5_init(struct kbase_device *kbdev) +{ + /* gpu context init */ + if (gpu_context_init(kbdev) < 0) + goto init_fail; + +#if defined(CONFIG_SOC_EXYNOS7420) || defined(CONFIG_SOC_EXYNOS7890) + if (gpu_device_specific_init(kbdev) < 0) + goto init_fail; +#endif + /* gpu control module init */ + if (gpu_control_module_init(kbdev) < 0) + goto init_fail; + + /* gpu notifier init */ + if (gpu_notifier_init(kbdev) < 0) + goto init_fail; + +#ifdef CONFIG_MALI_DVFS + /* gpu utilization moduel init */ + gpu_dvfs_utilization_init(kbdev); + + /* dvfs governor init */ + gpu_dvfs_governor_init(kbdev); + + /* dvfs handler init */ + gpu_dvfs_handler_init(kbdev); +#endif /* CONFIG_MALI_DVFS */ + +#ifdef CONFIG_MALI_DEBUG_SYS + /* gpu sysfs file init */ + if (gpu_create_sysfs_file(kbdev->dev) < 0) + goto init_fail; +#endif /* CONFIG_MALI_DEBUG_SYS */ + /* MALI_SEC_INTEGRATION */ +#ifdef CONFIG_MALI_GPU_CORE_MASK_SELECTION + gpu_core_mask_set(kbdev); +#endif + + return 0; + +init_fail: + kfree(kbdev->platform_context); + + return -1; +} + +/** + ** Exynos5 hardware specific termination + **/ +static void kbase_platform_exynos5_term(struct kbase_device *kbdev) +{ + struct exynos_context *platform; + platform = (struct exynos_context *) kbdev->platform_context; + + gpu_notifier_term(); + +#ifdef CONFIG_MALI_DVFS + gpu_dvfs_handler_deinit(kbdev); +#endif /* CONFIG_MALI_DVFS */ + + gpu_dvfs_utilization_deinit(kbdev); + + gpu_control_module_term(kbdev); + + kfree(kbdev->platform_context); + kbdev->platform_context = 0; + +#ifdef CONFIG_MALI_DEBUG_SYS + gpu_remove_sysfs_file(kbdev->dev); +#endif /* CONFIG_MALI_DEBUG_SYS */ +} + +struct kbase_platform_funcs_conf platform_funcs = { + .platform_init_func = &kbase_platform_exynos5_init, + .platform_term_func = &kbase_platform_exynos5_term, +}; + +int kbase_platform_early_init(void) +{ + return 0; +} diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_platform.h b/drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_platform.h new file mode 100644 index 000000000000..d93e2e70e79d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/mali_kbase_platform.h @@ -0,0 +1,366 @@ +/* drivers/gpu/arm/.../platform/mali_kbase_platform.h + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series platform-dependent codes + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file mali_kbase_platform.h + * Platform-dependent init + */ + +#ifndef _GPU_PLATFORM_H_ +#define _GPU_PLATFORM_H_ + +#include + +#ifdef CONFIG_MALI_EXYNOS_TRACE +#define GPU_LOG(level, code, gpu_addr, info_val, msg, args...) \ +do { \ + if (level >= gpu_get_debug_level()) { \ + printk(KERN_INFO "[G3D] "msg, ## args); \ + } \ + if (gpu_check_trace_code(KBASE_KTRACE_CODE(code))) { \ + KBASE_KTRACE_ADD_EXYNOS(gpu_get_device_structure(), code, NULL, info_val); \ + } \ +} while (0) +#else /* CONFIG_MALI_EXYNOS_TRACE */ +#define GPU_LOG(level, code, gpu_addr, info_val, msg, args...) \ +do { \ + if (level >= gpu_get_debug_level()) { \ + printk(KERN_INFO msg, ## args); \ + } \ +} while (0) +#endif /* CONFIG_MALI_EXYNOS_TRACE */ + +#define GPU_DVFS_TABLE_LIST_SIZE(X) ARRAY_SIZE(X) + +#define BMAX_RETRY_CNT 10 + +#define CPU_MAX INT_MAX +#define DVFS_TABLE_COL_NUM 8 +#define DVFS_TABLE_ROW_MAX 20 +#define OF_DATA_NUM_MAX 160 + +typedef enum { + DVFS_DEBUG_START = 0, + DVFS_DEBUG, + DVFS_INFO, + DVFS_WARNING, + DVFS_ERROR, + DVFS_DEBUG_END, +} gpu_dvfs_debug_level; + +typedef enum { + GPU_L0, + GPU_L1, + GPU_L2, + GPU_L3, + GPU_L4, + GPU_L5, + GPU_L6, + GPU_L7, + GPU_MAX_LEVEL, +} gpu_clock_level; + +typedef enum { + TRACE_START = 0, + TRACE_NONE, + TRACE_DEFAULT, + TRACE_CLK, + TRACE_VOL, + TRACE_NOTIFIER, + TRACE_DVFS, + TRACE_DUMP, + TRACE_ALL, + TRACE_END, +} gpu_dvfs_trace_level; + +typedef enum { + TMU_LOCK = 0, + SYSFS_LOCK, +#ifdef CONFIG_CPU_THERMAL_IPA + IPA_LOCK, +#endif /* CONFIG_CPU_THERMAL_IPA */ + BOOST_LOCK, + PMQOS_LOCK, +#ifdef CONFIG_MALI_ASV_CALIBRATION_SUPPORT + ASV_CALI_LOCK, +#endif + NUMBER_LOCK +} gpu_dvfs_lock_type; + +typedef enum { + THROTTLING1 = 0, + THROTTLING2, + THROTTLING3, + THROTTLING4, + THROTTLING5, + TRIPPING, + TMU_LOCK_CLK_END, +} tmu_lock_clk; + +typedef enum { + GPU_JOB_CONFIG_FAULT, + GPU_JOB_POWER_FAULT, + GPU_JOB_READ_FAULT, + GPU_JOB_WRITE_FAULT, + GPU_JOB_AFFINITY_FAULT, + GPU_JOB_BUS_FAULT, + GPU_DATA_INVALIDATE_FAULT, + GPU_TILE_RANGE_FAULT, + GPU_OUT_OF_MEMORY_FAULT, + GPU_DELAYED_BUS_FAULT, + GPU_SHAREABILITY_FAULT, + GPU_MMU_TRANSLATION_FAULT, + GPU_MMU_PERMISSION_FAULT, + GPU_MMU_TRANSTAB_BUS_FAULT, + GPU_MMU_ACCESS_FLAG_FAULT, + GPU_MMU_ADDRESS_SIZE_FAULT, + GPU_MMU_MEMORY_ATTRIBUTES_FAULT, + GPU_UNKNOWN, + GPU_SOFT_STOP, + GPU_HARD_STOP, + GPU_RESET, + GPU_EXCEPTION_LIST_END, +} gpu_excention_type; + +typedef struct _gpu_attribute { + int id; + uintptr_t data; +} gpu_attribute; + +typedef struct _gpu_dvfs_info { + unsigned int clock; + unsigned int voltage; + int asv_abb; + int min_threshold; + int max_threshold; + int down_staycount; + unsigned long long time; + int mem_freq; + int int_freq; + int cpu_little_min_freq; + int cpu_middle_min_freq; + int cpu_big_max_freq; + int g3dm_voltage; +} gpu_dvfs_info; + +typedef struct _gpu_dvfs_governor_info { + int id; + char *name; + void *governor; + gpu_dvfs_info *table; + int table_size; + int start_clk; +} gpu_dvfs_governor_info; + +typedef struct _gpu_dvfs_env_data { + int utilization; + int perf; + int hwcnt; +} gpu_dvfs_env_data; + +struct exynos_context { + /* lock variables */ + struct mutex gpu_clock_lock; + struct mutex gpu_dvfs_handler_lock; + spinlock_t gpu_dvfs_spinlock; +#if (defined(CONFIG_SCHED_EMS) || defined(CONFIG_SCHED_EHMP) || defined(CONFIG_SCHED_HMP)) + struct mutex gpu_sched_hmp_lock; +#endif + /* clock & voltage related variables */ + int clk_g3d_status; +#ifdef CONFIG_MALI_RT_PM + struct exynos_pm_domain *exynos_pm_domain; +#endif /* CONFIG_MALI_RT_PM */ + + /* dvfs related variables */ + gpu_dvfs_info *table; + int table_size; + int step; + gpu_dvfs_env_data env_data; + struct workqueue_struct *dvfs_wq; + struct delayed_work *delayed_work; +#if defined(SET_MINLOCK) + int custom_cpu_max_lock; +#endif +#ifdef CONFIG_MALI_DVFS + bool dvfs_status; + int utilization; + int max_lock; + int min_lock; + int user_max_lock[NUMBER_LOCK]; + int user_min_lock[NUMBER_LOCK]; + int down_requirement; + int governor_type; + bool wakeup_lock; + int dvfs_pending; + + /* For the interactive governor */ + struct { + int highspeed_clock; + int highspeed_load; + int highspeed_delay; + int delay_count; + } interactive; +#ifdef CONFIG_CPU_THERMAL_IPA + int norm_utilisation; + int freq_for_normalisation; + unsigned long long power; + int time_tick; + u32 time_busy; + u32 time_idle; + + int ipa_power_coeff_gpu; + int gpu_dvfs_time_interval; +#endif /* CONFIG_CPU_THERMAL_IPA */ +#endif /* CONFIG_MALI_DVFS */ + + /* status */ + int cur_clock; + int cur_voltage; + int voltage_margin; + + /* gpu configuration */ + bool using_max_limit_clock; + int gpu_max_clock; + int gpu_max_clock_limit; + int gpu_min_clock; + int gpu_dvfs_start_clock; + int gpu_dvfs_config_clock; + int user_max_lock_input; + int user_min_lock_input; + + /* gpu boost lock */ + int boost_gpu_min_lock; + int boost_egl_min_lock; + bool boost_is_enabled; + bool tmu_status; + int tmu_lock_clk[TMU_LOCK_CLK_END]; + int cold_min_vol; + int gpu_default_vol; + int gpu_default_vol_margin; + + bool dynamic_abb_status; + bool early_clk_gating_status; + bool dvs_status; + bool dvs_is_enabled; + bool inter_frame_pm_feature; + bool inter_frame_pm_status; + bool inter_frame_pm_is_poweron; + + bool power_status; + int power_runtime_suspend_ret; + int power_runtime_resume_ret; + + + int polling_speed; + int runtime_pm_delay_time; + bool pmqos_int_disable; + + int pmqos_mif_max_clock; + int pmqos_mif_max_clock_base; + + int cl_dvfs_start_base; + + int debug_level; + int trace_level; + + int fault_count; + bool bigdata_uevent_is_sent; + int gpu_exception_count[GPU_EXCEPTION_LIST_END]; + int balance_retry_count[BMAX_RETRY_CNT]; + gpu_attribute *attrib; +#ifdef CONFIG_EXYNOS_BTS + int mo_min_clock; +#ifdef CONFIG_EXYNOS9630_BTS + unsigned int bts_scen_idx; + unsigned int is_set_bts; // Check the pair of bts scenario. +#endif +#ifdef CONFIG_MALI_CAMERA_EXT_BTS + unsigned int bts_camera_ext_idx; + unsigned int is_set_bts_camera_ext; +#endif +#endif + int *save_cpu_max_freq; + + unsigned int g3d_cmu_cal_id; +#ifdef CONFIG_MALI_PM_QOS + bool is_pm_qos_init; +#endif /* CONFIG_MALI_PM_QOS */ + const struct kbase_pm_policy *cur_policy; + +#ifdef CONFIG_MALI_ASV_CALIBRATION_SUPPORT + bool gpu_auto_cali_status; +#endif + +#if (defined(CONFIG_SCHED_EMS) || defined(CONFIG_SCHED_EHMP) || defined(CONFIG_SCHED_HMP)) + bool ctx_need_qos; +#endif + +#ifdef CONFIG_MALI_SEC_VK_BOOST + bool ctx_vk_need_qos; + struct mutex gpu_vk_boost_lock; + int gpu_vk_boost_max_clk_lock; + int gpu_vk_boost_mif_min_clk_lock; +#endif + + int gpu_pmqos_cpu_cluster_num; + +#ifdef CONFIG_MALI_SUSTAINABLE_OPT + struct { + bool status; + int info_array[5]; + } sustainable; +#endif + +#ifdef CONFIG_MALI_SEC_CL_BOOST + bool cl_boost_disable; +#endif + + int gpu_set_pmu_duration_reg; + int gpu_set_pmu_duration_val; + bool gpu_bts_support; + char g3d_genpd_name[30]; + int gpu_dss_freq_id; + +#if IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_LEGACY) || IS_ENABLED(CONFIG_MALI_EXYNOS_SECURE_RENDERING_ARM) + bool exynos_smc_enabled; + spinlock_t exynos_smc_lock; +#endif + + /* Callback to call when for GPU clock changes */ + struct notifier_block *nb_clock_change; +}; + +struct kbase_device *gpu_get_device_structure(void); +void gpu_set_debug_level(int level); +int gpu_get_debug_level(void); +void gpu_set_trace_level(int level); +bool gpu_check_trace_level(int level); +bool gpu_check_trace_code(int code); +void *gpu_get_config_attributes(void); +uintptr_t gpu_get_attrib_data(gpu_attribute *attrib, int id); +int gpu_platform_context_init(struct exynos_context *platform); + +int gpu_set_rate_for_pm_resume(struct kbase_device *kbdev, int clk); +void gpu_clock_disable(struct kbase_device *kbdev); + +bool balance_init(struct kbase_device *kbdev); +int exynos_gpu_init_hw(void *dev); + +#ifdef CONFIG_OF +void gpu_update_config_data_bool(struct device_node *np, const char *of_string, bool *of_data); +void gpu_update_config_data_int(struct device_node *np, const char *of_string, int *of_data); +void gpu_update_config_data_string(struct device_node *np, const char *of_string, const char **of_data); +void gpu_update_config_data_int_array(struct device_node *np, const char *of_string, int *of_data, int sz); +#endif + +#endif /* _GPU_PLATFORM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/platform/exynos/mali_power.h b/drivers/gpu/arm/b_r26p0/platform/exynos/mali_power.h new file mode 100644 index 000000000000..e29468088ebf --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/exynos/mali_power.h @@ -0,0 +1,61 @@ +/* drivers/gpu/arm/.../platform/mali_power.h + * + * Copyright 2011 by S.LSI. Samsung Electronics Inc. + * San#24, Nongseo-Dong, Giheung-Gu, Yongin, Korea + * + * Samsung SoC Mali-T Series platform-dependent codes + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software FoundatIon. + */ + +/** + * @file mali_power.h + * DVFS + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mali_power + +#if !defined(_MALI_POWER_H) || defined(TRACE_HEADER_MULTI_READ) +#define _MALI_POWER_H + +#include + +TRACE_EVENT(mali_utilization_stats, + + TP_PROTO(int util, + int norm_util, + int norm_freq), + + TP_ARGS(util, + norm_util, + norm_freq), + + TP_STRUCT__entry( + __field(int, util) + __field(int, norm_util) + __field(int, norm_freq) + ), + + TP_fast_assign( + __entry->util = util; + __entry->norm_util = norm_util; + __entry->norm_freq = norm_freq; + ), + + TP_printk("util=%d norm_util=%d norm_freq=%d", + __entry->util, + __entry->norm_util, + __entry->norm_freq) +); + + +#endif /* _MALI_POWER_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../drivers/gpu/arm/b_r26p0/platform + +/* This part must be outside protection */ +#include diff --git a/drivers/gpu/arm/b_r26p0/platform/vexpress/Kbuild b/drivers/gpu/arm/b_r26p0/platform/vexpress/Kbuild new file mode 100644 index 000000000000..6780e4c9433b --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/vexpress/Kbuild @@ -0,0 +1,24 @@ +# +# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +mali_kbase-y += \ + $(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ + mali_kbase_platform_fake.o diff --git a/drivers/gpu/arm/b_r26p0/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/b_r26p0/platform/vexpress/mali_kbase_config_platform.h new file mode 100644 index 000000000000..fac3cd52182f --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/vexpress/mali_kbase_config_platform.h @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ +#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ +#define PLATFORM_FUNCS (NULL) + +extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/drivers/gpu/arm/b_r26p0/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/b_r26p0/platform/vexpress/mali_kbase_config_vexpress.c new file mode 100644 index 000000000000..d165ce262814 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/vexpress/mali_kbase_config_vexpress.c @@ -0,0 +1,69 @@ +/* + * + * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#include +#include +#include +#include +#include "mali_kbase_config_platform.h" + +#ifndef CONFIG_OF +static struct kbase_io_resources io_resources = { + .job_irq_number = 68, + .mmu_irq_number = 69, + .gpu_irq_number = 70, + .io_memory_region = { + .start = 0xFC010000, + .end = 0xFC010000 + (4096 * 4) - 1 + } +}; +#endif /* CONFIG_OF */ + +static int pm_callback_power_on(struct kbase_device *kbdev) +{ + /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ + return 1; +} + +static void pm_callback_power_off(struct kbase_device *kbdev) +{ +} + +struct kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL +}; + +static struct kbase_platform_config versatile_platform_config = { +#ifndef CONFIG_OF + .io_resources = &io_resources +#endif +}; + +struct kbase_platform_config *kbase_get_platform_config(void) +{ + return &versatile_platform_config; +} diff --git a/drivers/gpu/arm/b_r26p0/platform/vexpress_1xv7_a57/Kbuild b/drivers/gpu/arm/b_r26p0/platform/vexpress_1xv7_a57/Kbuild new file mode 100644 index 000000000000..51b408efd48a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/vexpress_1xv7_a57/Kbuild @@ -0,0 +1,24 @@ +# +# (C) COPYRIGHT 2013-2014, 2016-2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +mali_kbase-y += \ + $(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ + mali_kbase_platform_fake.o diff --git a/drivers/gpu/arm/b_r26p0/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/drivers/gpu/arm/b_r26p0/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h new file mode 100644 index 000000000000..fac3cd52182f --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ +#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ +#define PLATFORM_FUNCS (NULL) + +extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/drivers/gpu/arm/b_r26p0/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/drivers/gpu/arm/b_r26p0/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c new file mode 100644 index 000000000000..efca0a5b3493 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c @@ -0,0 +1,65 @@ +/* + * + * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include + +#ifndef CONFIG_OF +static struct kbase_io_resources io_resources = { + .job_irq_number = 68, + .mmu_irq_number = 69, + .gpu_irq_number = 70, + .io_memory_region = { + .start = 0x2f010000, + .end = 0x2f010000 + (4096 * 4) - 1} +}; +#endif + +static int pm_callback_power_on(struct kbase_device *kbdev) +{ + /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ + return 1; +} + +static void pm_callback_power_off(struct kbase_device *kbdev) +{ +} + +struct kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL +}; + +static struct kbase_platform_config versatile_platform_config = { +#ifndef CONFIG_OF + .io_resources = &io_resources +#endif +}; + +struct kbase_platform_config *kbase_get_platform_config(void) +{ + return &versatile_platform_config; +} diff --git a/drivers/gpu/arm/b_r26p0/platform/vexpress_6xvirtex7_10mhz/Kbuild b/drivers/gpu/arm/b_r26p0/platform/vexpress_6xvirtex7_10mhz/Kbuild new file mode 100644 index 000000000000..e07709c9b1a5 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/vexpress_6xvirtex7_10mhz/Kbuild @@ -0,0 +1,25 @@ +# +# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +mali_kbase-y += \ + $(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ + $(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \ + mali_kbase_platform_fake.o diff --git a/drivers/gpu/arm/b_r26p0/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/drivers/gpu/arm/b_r26p0/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h new file mode 100644 index 000000000000..fac3cd52182f --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ +#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ +#define PLATFORM_FUNCS (NULL) + +extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/drivers/gpu/arm/b_r26p0/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/drivers/gpu/arm/b_r26p0/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c new file mode 100644 index 000000000000..b6714b95b776 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c @@ -0,0 +1,67 @@ +/* + * + * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#include +#include +#include +#include + +#ifndef CONFIG_OF +static struct kbase_io_resources io_resources = { + .job_irq_number = 75, + .mmu_irq_number = 76, + .gpu_irq_number = 77, + .io_memory_region = { + .start = 0x2F000000, + .end = 0x2F000000 + (4096 * 4) - 1} +}; +#endif + +static int pm_callback_power_on(struct kbase_device *kbdev) +{ + /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ + return 1; +} + +static void pm_callback_power_off(struct kbase_device *kbdev) +{ +} + +struct kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL +}; + +static struct kbase_platform_config versatile_platform_config = { +#ifndef CONFIG_OF + .io_resources = &io_resources +#endif +}; + +struct kbase_platform_config *kbase_get_platform_config(void) +{ + return &versatile_platform_config; +} diff --git a/drivers/gpu/arm/b_r26p0/protected_mode_switcher.h b/drivers/gpu/arm/b_r26p0/protected_mode_switcher.h new file mode 100644 index 000000000000..8778d812aea0 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/protected_mode_switcher.h @@ -0,0 +1,69 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _PROTECTED_MODE_SWITCH_H_ +#define _PROTECTED_MODE_SWITCH_H_ + +struct protected_mode_device; + +/** + * struct protected_mode_ops - Callbacks for protected mode switch operations + * + * @protected_mode_enable: Callback to enable protected mode for device + * @protected_mode_disable: Callback to disable protected mode for device + */ +struct protected_mode_ops { + /** + * protected_mode_enable() - Enable protected mode on device + * @dev: The struct device + * + * Return: 0 on success, non-zero on error + */ + int (*protected_mode_enable)( + struct protected_mode_device *protected_dev); + + /** + * protected_mode_disable() - Disable protected mode on device, and + * reset device + * @dev: The struct device + * + * Return: 0 on success, non-zero on error + */ + int (*protected_mode_disable)( + struct protected_mode_device *protected_dev); +}; + +/** + * struct protected_mode_device - Device structure for protected mode devices + * + * @ops - Callbacks associated with this device + * @data - Pointer to device private data + * + * This structure should be registered with the platform device using + * platform_set_drvdata(). + */ +struct protected_mode_device { + struct protected_mode_ops ops; + void *data; +}; + +#endif /* _PROTECTED_MODE_SWITCH_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/tests/Kbuild b/drivers/gpu/arm/b_r26p0/tests/Kbuild new file mode 100644 index 000000000000..c26bef780781 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/Kbuild @@ -0,0 +1,24 @@ +# +# (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +obj-$(CONFIG_MALI_KUTF) += kutf/ +obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test/ +obj-$(CONFIG_MALI_CLK_RATE_TRACE_PORTAL) += mali_kutf_clk_rate_trace/kernel/ diff --git a/drivers/gpu/arm/b_r26p0/tests/Kconfig b/drivers/gpu/arm/b_r26p0/tests/Kconfig new file mode 100644 index 000000000000..89ac0699525e --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/Kconfig @@ -0,0 +1,24 @@ +# +# (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +source "drivers/gpu/arm/b_r26p0/tests/kutf/Kconfig" +source "drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/Kconfig" +source "drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/Kconfig" diff --git a/drivers/gpu/arm/b_r26p0/tests/Mconfig b/drivers/gpu/arm/b_r26p0/tests/Mconfig new file mode 100644 index 000000000000..be3fedb216ad --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/Mconfig @@ -0,0 +1,38 @@ +# +# (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# + +config UNIT_TEST_KERNEL_MODULES + bool + default y if UNIT_TEST_CODE && BUILD_KERNEL_MODULES + default n + +config BUILD_IPA_TESTS + bool + default y if UNIT_TEST_KERNEL_MODULES && MALI_DEVFREQ + default n + +config BUILD_IPA_UNIT_TESTS + bool + default y if NO_MALI && BUILD_IPA_TESTS + default n + +config BUILD_CSF_TESTS + bool + default y if UNIT_TEST_KERNEL_MODULES && GPU_HAS_CSF + default n + +config BUILD_ARBIF_TESTS + bool + default y if UNIT_TEST_KERNEL_MODULES && MALI_ARBITER_SUPPORT + default n + diff --git a/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_helpers.h b/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_helpers.h new file mode 100644 index 000000000000..858b9c38b49a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_helpers.h @@ -0,0 +1,85 @@ +/* + * + * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KERNEL_UTF_HELPERS_H_ +#define _KERNEL_UTF_HELPERS_H_ + +/* kutf_helpers.h + * Test helper functions for the kernel UTF test infrastructure. + * + * These functions provide methods for enqueuing/dequeuing lines of text sent + * by user space. They are used to implement the transfer of "userdata" from + * user space to kernel. + */ + +#include + +/** + * kutf_helper_pending_input() - Check any pending lines sent by user space + * @context: KUTF context + * + * Return: true if there are pending lines, otherwise false + */ +bool kutf_helper_pending_input(struct kutf_context *context); + +/** + * kutf_helper_input_dequeue() - Dequeue a line sent by user space + * @context: KUTF context + * @str_size: Pointer to an integer to receive the size of the string + * + * If no line is available then this function will wait (interruptibly) until + * a line is available. + * + * Return: The line dequeued, ERR_PTR(-EINTR) if interrupted or NULL on end + * of data. + */ +char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size); + +/** + * kutf_helper_input_enqueue() - Enqueue a line sent by user space + * @context: KUTF context + * @str: The user space address of the line + * @size: The length in bytes of the string + * + * This function will use copy_from_user to copy the string out of user space. + * The string need not be NULL-terminated (@size should not include the NULL + * termination). + * + * As a special case @str==NULL and @size==0 is valid to mark the end of input, + * but callers should use kutf_helper_input_enqueue_end_of_data() instead. + * + * Return: 0 on success, -EFAULT if the line cannot be copied from user space, + * -ENOMEM if out of memory. + */ +int kutf_helper_input_enqueue(struct kutf_context *context, + const char __user *str, size_t size); + +/** + * kutf_helper_input_enqueue_end_of_data() - Signal no more data is to be sent + * @context: KUTF context + * + * After this function has been called, kutf_helper_input_dequeue() will always + * return NULL. + */ +void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context); + +#endif /* _KERNEL_UTF_HELPERS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_helpers_user.h b/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_helpers_user.h new file mode 100644 index 000000000000..3b1300e1ce6f --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_helpers_user.h @@ -0,0 +1,179 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KERNEL_UTF_HELPERS_USER_H_ +#define _KERNEL_UTF_HELPERS_USER_H_ + +/* kutf_helpers.h + * Test helper functions for the kernel UTF test infrastructure, whose + * implementation mirrors that of similar functions for kutf-userside + */ + +#include +#include + + +#define KUTF_HELPER_MAX_VAL_NAME_LEN 255 + +enum kutf_helper_valtype { + KUTF_HELPER_VALTYPE_INVALID, + KUTF_HELPER_VALTYPE_U64, + KUTF_HELPER_VALTYPE_STR, + + KUTF_HELPER_VALTYPE_COUNT /* Must be last */ +}; + +struct kutf_helper_named_val { + enum kutf_helper_valtype type; + char *val_name; + union { + u64 val_u64; + char *val_str; + } u; +}; + +/* Extra error values for certain helpers when we want to distinguish between + * Linux's own error values too. + * + * These can only be used on certain functions returning an int type that are + * documented as returning one of these potential values, they cannot be used + * from functions return a ptr type, since we can't decode it with PTR_ERR + * + * No negative values are used - Linux error codes should be used instead, and + * indicate a problem in accessing the data file itself (are generally + * unrecoverable) + * + * Positive values indicate correct access but invalid parsing (can be + * recovered from assuming data in the future is correct) */ +enum kutf_helper_err { + /* No error - must be zero */ + KUTF_HELPER_ERR_NONE = 0, + /* Named value parsing encountered an invalid name */ + KUTF_HELPER_ERR_INVALID_NAME, + /* Named value parsing of string or u64 type encountered extra + * characters after the value (after the last digit for a u64 type or + * after the string end delimiter for string type) */ + KUTF_HELPER_ERR_CHARS_AFTER_VAL, + /* Named value parsing of string type couldn't find the string end + * delimiter. + * + * This cannot be encountered when the NAME="value" message exceeds the + * textbuf's maximum line length, because such messages are not checked + * for an end string delimiter */ + KUTF_HELPER_ERR_NO_END_DELIMITER, + /* Named value didn't parse as any of the known types */ + KUTF_HELPER_ERR_INVALID_VALUE, +}; + + +/* Send named NAME=value pair, u64 value + * + * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long + * + * Any failure will be logged on the suite's current test fixture + * + * Returns 0 on success, non-zero on failure + */ +int kutf_helper_send_named_u64(struct kutf_context *context, + const char *val_name, u64 val); + +/* Get the maximum length of a string that can be represented as a particular + * NAME="value" pair without string-value truncation in the kernel's buffer + * + * Given val_name and the kernel buffer's size, this can be used to determine + * the maximum length of a string that can be sent as val_name="value" pair + * without having the string value truncated. Any string longer than this will + * be truncated at some point during communication to this size. + * + * It is assumed that val_name is a valid name for + * kutf_helper_send_named_str(), and no checking will be made to + * ensure this. + * + * Returns the maximum string length that can be represented, or a negative + * value if the NAME="value" encoding itself wouldn't fit in kern_buf_sz + */ +int kutf_helper_max_str_len_for_kern(const char *val_name, int kern_buf_sz); + +/* Send named NAME="str" pair + * + * no escaping allowed in str. Any of the following characters will terminate + * the string: '"' '\\' '\n' + * + * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long + * + * Any failure will be logged on the suite's current test fixture + * + * Returns 0 on success, non-zero on failure */ +int kutf_helper_send_named_str(struct kutf_context *context, + const char *val_name, const char *val_str); + +/* Receive named NAME=value pair + * + * This can receive u64 and string values - check named_val->type + * + * If you are not planning on dynamic handling of the named value's name and + * type, then kutf_helper_receive_check_val() is more useful as a + * convenience function. + * + * String members of named_val will come from memory allocated on the fixture's mempool + * + * Returns 0 on success. Negative value on failure to receive from the 'run' + * file, positive value indicates an enum kutf_helper_err value for correct + * reception of data but invalid parsing */ +int kutf_helper_receive_named_val( + struct kutf_context *context, + struct kutf_helper_named_val *named_val); + +/* Receive and validate NAME=value pair + * + * As with kutf_helper_receive_named_val, but validate that the + * name and type are as expected, as a convenience for a common pattern found + * in tests. + * + * NOTE: this only returns an error value if there was actually a problem + * receiving data. + * + * NOTE: If the underlying data was received correctly, but: + * - isn't of the expected name + * - isn't the expected type + * - isn't correctly parsed for the type + * then the following happens: + * - failure result is recorded + * - named_val->type will be KUTF_HELPER_VALTYPE_INVALID + * - named_val->u will contain some default value that should be relatively + * harmless for the test, including being writable in the case of string + * values + * - return value will be 0 to indicate success + * + * The rationale behind this is that we'd prefer to continue the rest of the + * test with failures propagated, rather than hitting a timeout */ +int kutf_helper_receive_check_val( + struct kutf_helper_named_val *named_val, + struct kutf_context *context, + const char *expect_val_name, + enum kutf_helper_valtype expect_val_type); + +/* Output a named value to kmsg */ +void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val); + + +#endif /* _KERNEL_UTF_HELPERS_USER_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_mem.h b/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_mem.h new file mode 100644 index 000000000000..988559de1edf --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_mem.h @@ -0,0 +1,73 @@ +/* + * + * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KERNEL_UTF_MEM_H_ +#define _KERNEL_UTF_MEM_H_ + +/* kutf_mem.h + * Functions for management of memory pools in the kernel. + * + * This module implements a memory pool allocator, allowing a test + * implementation to allocate linked allocations which can then be freed by a + * single free which releases all of the resources held by the entire pool. + * + * Note that it is not possible to free single resources within the pool once + * allocated. + */ + +#include +#include + +/** + * struct kutf_mempool - the memory pool context management structure + * @head: list head on which the allocations in this context are added to + * @lock: mutex for concurrent allocation from multiple threads + * + */ +struct kutf_mempool { + struct list_head head; + struct mutex lock; +}; + +/** + * kutf_mempool_init() - Initialize a memory pool. + * @pool: Memory pool structure to initialize, provided by the user + * + * Return: zero on success + */ +int kutf_mempool_init(struct kutf_mempool *pool); + +/** + * kutf_mempool_alloc() - Allocate memory from a pool + * @pool: Memory pool to allocate from + * @size: Size of memory wanted in number of bytes + * + * Return: Pointer to memory on success, NULL on failure. + */ +void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size); + +/** + * kutf_mempool_destroy() - Destroy a memory pool, freeing all memory within it. + * @pool: The memory pool to free + */ +void kutf_mempool_destroy(struct kutf_mempool *pool); +#endif /* _KERNEL_UTF_MEM_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_resultset.h b/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_resultset.h new file mode 100644 index 000000000000..49ebeb4ec546 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_resultset.h @@ -0,0 +1,181 @@ +/* + * + * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KERNEL_UTF_RESULTSET_H_ +#define _KERNEL_UTF_RESULTSET_H_ + +/* kutf_resultset.h + * Functions and structures for handling test results and result sets. + * + * This section of the kernel UTF contains structures and functions used for the + * management of Results and Result Sets. + */ + +/** + * enum kutf_result_status - Status values for a single Test error. + * @KUTF_RESULT_BENCHMARK: Result is a meta-result containing benchmark + * results. + * @KUTF_RESULT_SKIP: The test was skipped. + * @KUTF_RESULT_UNKNOWN: The test has an unknown result. + * @KUTF_RESULT_PASS: The test result passed. + * @KUTF_RESULT_DEBUG: The test result passed, but raised a debug + * message. + * @KUTF_RESULT_INFO: The test result passed, but raised + * an informative message. + * @KUTF_RESULT_WARN: The test result passed, but raised a warning + * message. + * @KUTF_RESULT_FAIL: The test result failed with a non-fatal error. + * @KUTF_RESULT_FATAL: The test result failed with a fatal error. + * @KUTF_RESULT_ABORT: The test result failed due to a non-UTF + * assertion failure. + * @KUTF_RESULT_USERDATA: User data is ready to be read, + * this is not seen outside the kernel + * @KUTF_RESULT_USERDATA_WAIT: Waiting for user data to be sent, + * this is not seen outside the kernel + * @KUTF_RESULT_TEST_FINISHED: The test has finished, no more results will + * be produced. This is not seen outside kutf + */ +enum kutf_result_status { + KUTF_RESULT_BENCHMARK = -3, + KUTF_RESULT_SKIP = -2, + KUTF_RESULT_UNKNOWN = -1, + + KUTF_RESULT_PASS = 0, + KUTF_RESULT_DEBUG = 1, + KUTF_RESULT_INFO = 2, + KUTF_RESULT_WARN = 3, + KUTF_RESULT_FAIL = 4, + KUTF_RESULT_FATAL = 5, + KUTF_RESULT_ABORT = 6, + + KUTF_RESULT_USERDATA = 7, + KUTF_RESULT_USERDATA_WAIT = 8, + KUTF_RESULT_TEST_FINISHED = 9 +}; + +/* The maximum size of a kutf_result_status result when + * converted to a string + */ +#define KUTF_ERROR_MAX_NAME_SIZE 21 + +#ifdef __KERNEL__ + +#include +#include + +struct kutf_context; + +/** + * struct kutf_result - Represents a single test result. + * @node: Next result in the list of results. + * @status: The status summary (pass / warn / fail / etc). + * @message: A more verbose status message. + */ +struct kutf_result { + struct list_head node; + enum kutf_result_status status; + const char *message; +}; + +/** + * KUTF_RESULT_SET_WAITING_FOR_INPUT - Test is waiting for user data + * + * This flag is set within a struct kutf_result_set whenever the test is blocked + * waiting for user data. Attempts to dequeue results when this flag is set + * will cause a dummy %KUTF_RESULT_USERDATA_WAIT result to be produced. This + * is used to output a warning message and end of file. + */ +#define KUTF_RESULT_SET_WAITING_FOR_INPUT 1 + +/** + * struct kutf_result_set - Represents a set of results. + * @results: List head of a struct kutf_result list for storing the results + * @waitq: Wait queue signalled whenever new results are added. + * @flags: Flags see %KUTF_RESULT_SET_WAITING_FOR_INPUT + */ +struct kutf_result_set { + struct list_head results; + wait_queue_head_t waitq; + int flags; +}; + +/** + * kutf_create_result_set() - Create a new result set + * to which results can be added. + * + * Return: The created result set. + */ +struct kutf_result_set *kutf_create_result_set(void); + +/** + * kutf_add_result() - Add a result to the end of an existing result set. + * + * @context: The kutf context + * @status: The result status to add. + * @message: The result message to add. + * + * Return: 0 if the result is successfully added. -ENOMEM if allocation fails. + */ +int kutf_add_result(struct kutf_context *context, + enum kutf_result_status status, const char *message); + +/** + * kutf_remove_result() - Remove a result from the head of a result set. + * @set: The result set. + * + * This function will block until there is a result to read. The wait is + * interruptible, so this function will return with an ERR_PTR if interrupted. + * + * Return: result or ERR_PTR if interrupted + */ +struct kutf_result *kutf_remove_result( + struct kutf_result_set *set); + +/** + * kutf_destroy_result_set() - Free a previously created result set. + * + * @results: The result set whose resources to free. + */ +void kutf_destroy_result_set(struct kutf_result_set *results); + +/** + * kutf_set_waiting_for_input() - The test is waiting for userdata + * + * @set: The result set to update + * + * Causes the result set to always have results and return a fake + * %KUTF_RESULT_USERDATA_WAIT result. + */ +void kutf_set_waiting_for_input(struct kutf_result_set *set); + +/** + * kutf_clear_waiting_for_input() - The test is no longer waiting for userdata + * + * @set: The result set to update + * + * Cancels the effect of kutf_set_waiting_for_input() + */ +void kutf_clear_waiting_for_input(struct kutf_result_set *set); + +#endif /* __KERNEL__ */ + +#endif /* _KERNEL_UTF_RESULTSET_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_suite.h b/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_suite.h new file mode 100644 index 000000000000..8d75f506f9eb --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_suite.h @@ -0,0 +1,569 @@ +/* + * + * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KERNEL_UTF_SUITE_H_ +#define _KERNEL_UTF_SUITE_H_ + +/* kutf_suite.h + * Functions for management of test suites. + * + * This collection of data structures, macros, and functions are used to + * create Test Suites, Tests within those Test Suites, and Fixture variants + * of each test. + */ + +#include +#include +#include + +#include +#include + +/* Arbitrary maximum size to prevent user space allocating too much kernel + * memory + */ +#define KUTF_MAX_LINE_LENGTH (1024u) + +/** + * Pseudo-flag indicating an absence of any specified test class. Note that + * tests should not be annotated with this constant as it is simply a zero + * value; tests without a more specific class must be marked with the flag + * KUTF_F_TEST_GENERIC. + */ +#define KUTF_F_TEST_NONE ((unsigned int)(0)) + +/** + * Class indicating this test is a smoke test. + * A given set of smoke tests should be quick to run, enabling rapid turn-around + * of "regress-on-commit" test runs. + */ +#define KUTF_F_TEST_SMOKETEST ((unsigned int)(1 << 1)) + +/** + * Class indicating this test is a performance test. + * These tests typically produce a performance metric, such as "time to run" or + * "frames per second", + */ +#define KUTF_F_TEST_PERFORMANCE ((unsigned int)(1 << 2)) + +/** + * Class indicating that this test is a deprecated test. + * These tests have typically been replaced by an alternative test which is + * more efficient, or has better coverage. + */ +#define KUTF_F_TEST_DEPRECATED ((unsigned int)(1 << 3)) + +/** + * Class indicating that this test is a known failure. + * These tests have typically been run and failed, but marking them as a known + * failure means it is easier to triage results. + * + * It is typically more convenient to triage known failures using the + * results database and web UI, as this means there is no need to modify the + * test code. + */ +#define KUTF_F_TEST_EXPECTED_FAILURE ((unsigned int)(1 << 4)) + +/** + * Class indicating that this test is a generic test, which is not a member of + * a more specific test class. Tests which are not created with a specific set + * of filter flags by the user are assigned this test class by default. + */ +#define KUTF_F_TEST_GENERIC ((unsigned int)(1 << 5)) + +/** + * Class indicating this test is a resource allocation failure test. + * A resource allocation failure test will test that an error code is + * correctly propagated when an allocation fails. + */ +#define KUTF_F_TEST_RESFAIL ((unsigned int)(1 << 6)) + +/** + * Additional flag indicating that this test is an expected failure when + * run in resource failure mode. These tests are never run when running + * the low resource mode. + */ +#define KUTF_F_TEST_EXPECTED_FAILURE_RF ((unsigned int)(1 << 7)) + +/** + * Flag reserved for user-defined filter zero. + */ +#define KUTF_F_TEST_USER_0 ((unsigned int)(1 << 24)) + +/** + * Flag reserved for user-defined filter one. + */ +#define KUTF_F_TEST_USER_1 ((unsigned int)(1 << 25)) + +/** + * Flag reserved for user-defined filter two. + */ +#define KUTF_F_TEST_USER_2 ((unsigned int)(1 << 26)) + +/** + * Flag reserved for user-defined filter three. + */ +#define KUTF_F_TEST_USER_3 ((unsigned int)(1 << 27)) + +/** + * Flag reserved for user-defined filter four. + */ +#define KUTF_F_TEST_USER_4 ((unsigned int)(1 << 28)) + +/** + * Flag reserved for user-defined filter five. + */ +#define KUTF_F_TEST_USER_5 ((unsigned int)(1 << 29)) + +/** + * Flag reserved for user-defined filter six. + */ +#define KUTF_F_TEST_USER_6 ((unsigned int)(1 << 30)) + +/** + * Flag reserved for user-defined filter seven. + */ +#define KUTF_F_TEST_USER_7 ((unsigned int)(1 << 31)) + +/** + * Pseudo-flag indicating that all test classes should be executed. + */ +#define KUTF_F_TEST_ALL ((unsigned int)(0xFFFFFFFFU)) + +/** + * union kutf_callback_data - Union used to store test callback data + * @ptr_value: pointer to the location where test callback data + * are stored + * @u32_value: a number which represents test callback data + */ +union kutf_callback_data { + void *ptr_value; + u32 u32_value; +}; + +/** + * struct kutf_userdata_line - A line of user data to be returned to the user + * @node: struct list_head to link this into a list + * @str: The line of user data to return to user space + * @size: The number of bytes within @str + */ +struct kutf_userdata_line { + struct list_head node; + char *str; + size_t size; +}; + +/** + * KUTF_USERDATA_WARNING_OUTPUT - Flag specifying that a warning has been output + * + * If user space reads the "run" file while the test is waiting for user data, + * then the framework will output a warning message and set this flag within + * struct kutf_userdata. A subsequent read will then simply return an end of + * file condition rather than outputting the warning again. The upshot of this + * is that simply running 'cat' on a test which requires user data will produce + * the warning followed by 'cat' exiting due to EOF - which is much more user + * friendly than blocking indefinitely waiting for user data. + */ +#define KUTF_USERDATA_WARNING_OUTPUT 1 + +/** + * struct kutf_userdata - Structure holding user data + * @flags: See %KUTF_USERDATA_WARNING_OUTPUT + * @input_head: List of struct kutf_userdata_line containing user data + * to be read by the kernel space test. + * @input_waitq: Wait queue signalled when there is new user data to be + * read by the kernel space test. + */ +struct kutf_userdata { + unsigned long flags; + struct list_head input_head; + wait_queue_head_t input_waitq; +}; + +/** + * struct kutf_context - Structure representing a kernel test context + * @kref: Refcount for number of users of this context + * @suite: Convenience pointer to the suite this context + * is running + * @test_fix: The fixture that is being run in this context + * @fixture_pool: The memory pool used for the duration of + * the fixture/text context. + * @fixture: The user provided fixture structure. + * @fixture_index: The index (id) of the current fixture. + * @fixture_name: The name of the current fixture (or NULL if unnamed). + * @test_data: Any user private data associated with this test + * @result_set: All the results logged by this test context + * @status: The status of the currently running fixture. + * @expected_status: The expected status on exist of the currently + * running fixture. + * @work: Work item to enqueue onto the work queue to run the test + * @userdata: Structure containing the user data for the test to read + */ +struct kutf_context { + struct kref kref; + struct kutf_suite *suite; + struct kutf_test_fixture *test_fix; + struct kutf_mempool fixture_pool; + void *fixture; + unsigned int fixture_index; + const char *fixture_name; + union kutf_callback_data test_data; + struct kutf_result_set *result_set; + enum kutf_result_status status; + enum kutf_result_status expected_status; + + struct work_struct work; + struct kutf_userdata userdata; +}; + +/** + * struct kutf_suite - Structure representing a kernel test suite + * @app: The application this suite belongs to. + * @name: The name of this suite. + * @suite_data: Any user private data associated with this + * suite. + * @create_fixture: Function used to create a new fixture instance + * @remove_fixture: Function used to destroy a new fixture instance + * @fixture_variants: The number of variants (must be at least 1). + * @suite_default_flags: Suite global filter flags which are set on + * all tests. + * @node: List node for suite_list + * @dir: The debugfs directory for this suite + * @test_list: List head to store all the tests which are + * part of this suite + */ +struct kutf_suite { + struct kutf_application *app; + const char *name; + union kutf_callback_data suite_data; + void *(*create_fixture)(struct kutf_context *context); + void (*remove_fixture)(struct kutf_context *context); + unsigned int fixture_variants; + unsigned int suite_default_flags; + struct list_head node; + struct dentry *dir; + struct list_head test_list; +}; + +/* ============================================================================ + Application functions +============================================================================ */ + +/** + * kutf_create_application() - Create an in kernel test application. + * @name: The name of the test application. + * + * Return: pointer to the kutf_application on success or NULL + * on failure + */ +struct kutf_application *kutf_create_application(const char *name); + +/** + * kutf_destroy_application() - Destroy an in kernel test application. + * + * @app: The test application to destroy. + */ +void kutf_destroy_application(struct kutf_application *app); + +/* ============================================================================ + Suite functions +============================================================================ */ + +/** + * kutf_create_suite() - Create a kernel test suite. + * @app: The test application to create the suite in. + * @name: The name of the suite. + * @fixture_count: The number of fixtures to run over the test + * functions in this suite + * @create_fixture: Callback used to create a fixture. The returned value + * is stored in the fixture pointer in the context for + * use in the test functions. + * @remove_fixture: Callback used to remove a previously created fixture. + * + * Suite names must be unique. Should two suites with the same name be + * registered with the same application then this function will fail, if they + * are registered with different applications then the function will not detect + * this and the call will succeed. + * + * Return: pointer to the created kutf_suite on success or NULL + * on failure + */ +struct kutf_suite *kutf_create_suite( + struct kutf_application *app, + const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context)); + +/** + * kutf_create_suite_with_filters() - Create a kernel test suite with user + * defined default filters. + * @app: The test application to create the suite in. + * @name: The name of the suite. + * @fixture_count: The number of fixtures to run over the test + * functions in this suite + * @create_fixture: Callback used to create a fixture. The returned value + * is stored in the fixture pointer in the context for + * use in the test functions. + * @remove_fixture: Callback used to remove a previously created fixture. + * @filters: Filters to apply to a test if it doesn't provide its own + * + * Suite names must be unique. Should two suites with the same name be + * registered with the same application then this function will fail, if they + * are registered with different applications then the function will not detect + * this and the call will succeed. + * + * Return: pointer to the created kutf_suite on success or NULL on failure + */ +struct kutf_suite *kutf_create_suite_with_filters( + struct kutf_application *app, + const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context), + unsigned int filters); + +/** + * kutf_create_suite_with_filters_and_data() - Create a kernel test suite with + * user defined default filters. + * @app: The test application to create the suite in. + * @name: The name of the suite. + * @fixture_count: The number of fixtures to run over the test + * functions in this suite + * @create_fixture: Callback used to create a fixture. The returned value + * is stored in the fixture pointer in the context for + * use in the test functions. + * @remove_fixture: Callback used to remove a previously created fixture. + * @filters: Filters to apply to a test if it doesn't provide its own + * @suite_data: Suite specific callback data, provided during the + * running of the test in the kutf_context + * + * Return: pointer to the created kutf_suite on success or NULL + * on failure + */ +struct kutf_suite *kutf_create_suite_with_filters_and_data( + struct kutf_application *app, + const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context), + unsigned int filters, + union kutf_callback_data suite_data); + +/** + * kutf_add_test() - Add a test to a kernel test suite. + * @suite: The suite to add the test to. + * @id: The ID of the test. + * @name: The name of the test. + * @execute: Callback to the test function to run. + * + * Note: As no filters are provided the test will use the suite filters instead + */ +void kutf_add_test(struct kutf_suite *suite, + unsigned int id, + const char *name, + void (*execute)(struct kutf_context *context)); + +/** + * kutf_add_test_with_filters() - Add a test to a kernel test suite with filters + * @suite: The suite to add the test to. + * @id: The ID of the test. + * @name: The name of the test. + * @execute: Callback to the test function to run. + * @filters: A set of filtering flags, assigning test categories. + */ +void kutf_add_test_with_filters(struct kutf_suite *suite, + unsigned int id, + const char *name, + void (*execute)(struct kutf_context *context), + unsigned int filters); + +/** + * kutf_add_test_with_filters_and_data() - Add a test to a kernel test suite + * with filters. + * @suite: The suite to add the test to. + * @id: The ID of the test. + * @name: The name of the test. + * @execute: Callback to the test function to run. + * @filters: A set of filtering flags, assigning test categories. + * @test_data: Test specific callback data, provided during the + * running of the test in the kutf_context + */ +void kutf_add_test_with_filters_and_data( + struct kutf_suite *suite, + unsigned int id, + const char *name, + void (*execute)(struct kutf_context *context), + unsigned int filters, + union kutf_callback_data test_data); + + +/* ============================================================================ + Test functions +============================================================================ */ +/** + * kutf_test_log_result_external() - Log a result which has been created + * externally into a in a standard form + * recognized by the log parser. + * @context: The test context the test is running in + * @message: The message for this result + * @new_status: The result status of this log message + */ +void kutf_test_log_result_external( + struct kutf_context *context, + const char *message, + enum kutf_result_status new_status); + +/** + * kutf_test_expect_abort() - Tell the kernel that you expect the current + * fixture to produce an abort. + * @context: The test context this test is running in. + */ +void kutf_test_expect_abort(struct kutf_context *context); + +/** + * kutf_test_expect_fatal() - Tell the kernel that you expect the current + * fixture to produce a fatal error. + * @context: The test context this test is running in. + */ +void kutf_test_expect_fatal(struct kutf_context *context); + +/** + * kutf_test_expect_fail() - Tell the kernel that you expect the current + * fixture to fail. + * @context: The test context this test is running in. + */ +void kutf_test_expect_fail(struct kutf_context *context); + +/** + * kutf_test_expect_warn() - Tell the kernel that you expect the current + * fixture to produce a warning. + * @context: The test context this test is running in. + */ +void kutf_test_expect_warn(struct kutf_context *context); + +/** + * kutf_test_expect_pass() - Tell the kernel that you expect the current + * fixture to pass. + * @context: The test context this test is running in. + */ +void kutf_test_expect_pass(struct kutf_context *context); + +/** + * kutf_test_skip() - Tell the kernel that the test should be skipped. + * @context: The test context this test is running in. + */ +void kutf_test_skip(struct kutf_context *context); + +/** + * kutf_test_skip_msg() - Tell the kernel that this test has been skipped, + * supplying a reason string. + * @context: The test context this test is running in. + * @message: A message string containing the reason for the skip. + * + * Note: The message must not be freed during the lifetime of the test run. + * This means it should either be a prebaked string, or if a dynamic string + * is required it must be created with kutf_dsprintf which will store + * the resultant string in a buffer who's lifetime is the same as the test run. + */ +void kutf_test_skip_msg(struct kutf_context *context, const char *message); + +/** + * kutf_test_pass() - Tell the kernel that this test has passed. + * @context: The test context this test is running in. + * @message: A message string containing the reason for the pass. + * + * Note: The message must not be freed during the lifetime of the test run. + * This means it should either be a pre-baked string, or if a dynamic string + * is required it must be created with kutf_dsprintf which will store + * the resultant string in a buffer who's lifetime is the same as the test run. + */ +void kutf_test_pass(struct kutf_context *context, char const *message); + +/** + * kutf_test_debug() - Send a debug message + * @context: The test context this test is running in. + * @message: A message string containing the debug information. + * + * Note: The message must not be freed during the lifetime of the test run. + * This means it should either be a pre-baked string, or if a dynamic string + * is required it must be created with kutf_dsprintf which will store + * the resultant string in a buffer who's lifetime is the same as the test run. + */ +void kutf_test_debug(struct kutf_context *context, char const *message); + +/** + * kutf_test_info() - Send an information message + * @context: The test context this test is running in. + * @message: A message string containing the information message. + * + * Note: The message must not be freed during the lifetime of the test run. + * This means it should either be a pre-baked string, or if a dynamic string + * is required it must be created with kutf_dsprintf which will store + * the resultant string in a buffer who's lifetime is the same as the test run. + */ +void kutf_test_info(struct kutf_context *context, char const *message); + +/** + * kutf_test_warn() - Send a warning message + * @context: The test context this test is running in. + * @message: A message string containing the warning message. + * + * Note: The message must not be freed during the lifetime of the test run. + * This means it should either be a pre-baked string, or if a dynamic string + * is required it must be created with kutf_dsprintf which will store + * the resultant string in a buffer who's lifetime is the same as the test run. + */ +void kutf_test_warn(struct kutf_context *context, char const *message); + +/** + * kutf_test_fail() - Tell the kernel that a test has failed + * @context: The test context this test is running in. + * @message: A message string containing the failure message. + * + * Note: The message must not be freed during the lifetime of the test run. + * This means it should either be a pre-baked string, or if a dynamic string + * is required it must be created with kutf_dsprintf which will store + * the resultant string in a buffer who's lifetime is the same as the test run. + */ +void kutf_test_fail(struct kutf_context *context, char const *message); + +/** + * kutf_test_fatal() - Tell the kernel that a test has triggered a fatal error + * @context: The test context this test is running in. + * @message: A message string containing the fatal error message. + * + * Note: The message must not be freed during the lifetime of the test run. + * This means it should either be a pre-baked string, or if a dynamic string + * is required it must be created with kutf_dsprintf which will store + * the resultant string in a buffer who's lifetime is the same as the test run. + */ +void kutf_test_fatal(struct kutf_context *context, char const *message); + +/** + * kutf_test_abort() - Tell the kernel that a test triggered an abort in the test + * + * @context: The test context this test is running in. + */ +void kutf_test_abort(struct kutf_context *context); + +#endif /* _KERNEL_UTF_SUITE_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_utils.h b/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_utils.h new file mode 100644 index 000000000000..25b8285500d7 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/include/kutf/kutf_utils.h @@ -0,0 +1,60 @@ +/* + * + * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KERNEL_UTF_UTILS_H_ +#define _KERNEL_UTF_UTILS_H_ + +/* kutf_utils.h + * Utilities for the kernel UTF test infrastructure. + * + * This collection of library functions are provided for use by kernel UTF + * and users of kernel UTF which don't directly fit within the other + * code modules. + */ + +#include + +/** + * Maximum size of the message strings within kernel UTF, messages longer then + * this will be truncated. + */ +#define KUTF_MAX_DSPRINTF_LEN 1024 + +/** + * kutf_dsprintf() - dynamic sprintf + * @pool: memory pool to allocate from + * @fmt: The format string describing the string to document. + * @... The parameters to feed in to the format string. + * + * This function implements sprintf which dynamically allocates memory to store + * the string. The library will free the memory containing the string when the + * result set is cleared or destroyed. + * + * Note The returned string may be truncated to fit an internal temporary + * buffer, which is KUTF_MAX_DSPRINTF_LEN bytes in length. + * + * Return: Returns pointer to allocated string, or NULL on error. + */ +const char *kutf_dsprintf(struct kutf_mempool *pool, + const char *fmt, ...); + +#endif /* _KERNEL_UTF_UTILS_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/tests/kutf/Kbuild b/drivers/gpu/arm/b_r26p0/tests/kutf/Kbuild new file mode 100644 index 000000000000..2531d41ca28d --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/kutf/Kbuild @@ -0,0 +1,26 @@ +# +# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +ccflags-y += -I$(src)/../include + +obj-$(CONFIG_MALI_KUTF) += kutf.o + +kutf-y := kutf_mem.o kutf_resultset.o kutf_suite.o kutf_utils.o kutf_helpers.o kutf_helpers_user.o diff --git a/drivers/gpu/arm/b_r26p0/tests/kutf/Kconfig b/drivers/gpu/arm/b_r26p0/tests/kutf/Kconfig new file mode 100644 index 000000000000..0cdb474c06a3 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/kutf/Kconfig @@ -0,0 +1,28 @@ +# +# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + + +config MALI_KUTF + tristate "Mali Kernel Unit Test Framework" + default m + help + Enables MALI testing framework. To compile it as a module, + choose M here - this will generate a single module called kutf. diff --git a/drivers/gpu/arm/b_r26p0/tests/kutf/Makefile b/drivers/gpu/arm/b_r26p0/tests/kutf/Makefile new file mode 100644 index 000000000000..d848e8774bd0 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/kutf/Makefile @@ -0,0 +1,35 @@ +# +# (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +# linux build system bootstrap for out-of-tree module + +# default to building for the host +ARCH ?= $(shell uname -m) + +ifeq ($(KDIR),) +$(error Must specify KDIR to point to the kernel to target)) +endif + +all: + $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS=-I$(CURDIR)/../include modules + +clean: + $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean diff --git a/drivers/gpu/arm/b_r26p0/tests/kutf/build.bp b/drivers/gpu/arm/b_r26p0/tests/kutf/build.bp new file mode 100644 index 000000000000..32eab143e669 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/kutf/build.bp @@ -0,0 +1,36 @@ +/* + * + * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +bob_kernel_module { + name: "kutf", + defaults: [ + "kernel_defaults", + "kutf_includes", + ], + srcs: [ + "Kbuild", + "kutf_helpers.c", + "kutf_helpers_user.c", + "kutf_mem.c", + "kutf_resultset.c", + "kutf_suite.c", + "kutf_utils.c", + ], + kbuild_options: ["CONFIG_MALI_KUTF=m"], + enabled: false, + base_build_kutf: { + enabled: true, + }, +} diff --git a/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_helpers.c b/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_helpers.c new file mode 100644 index 000000000000..4463b04792f5 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_helpers.c @@ -0,0 +1,131 @@ +/* + * + * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* Kernel UTF test helpers */ +#include + +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(kutf_input_lock); + +bool kutf_helper_pending_input(struct kutf_context *context) +{ + bool input_pending; + + spin_lock(&kutf_input_lock); + + input_pending = !list_empty(&context->userdata.input_head); + + spin_unlock(&kutf_input_lock); + + return input_pending; +} +EXPORT_SYMBOL(kutf_helper_pending_input); + +char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size) +{ + struct kutf_userdata_line *line; + + spin_lock(&kutf_input_lock); + + while (list_empty(&context->userdata.input_head)) { + int err; + + kutf_set_waiting_for_input(context->result_set); + + spin_unlock(&kutf_input_lock); + + err = wait_event_interruptible(context->userdata.input_waitq, + kutf_helper_pending_input(context)); + + if (err) + return ERR_PTR(-EINTR); + + spin_lock(&kutf_input_lock); + } + + line = list_first_entry(&context->userdata.input_head, + struct kutf_userdata_line, node); + if (line->str) { + /* + * Unless it is the end-of-input marker, + * remove it from the list + */ + list_del(&line->node); + } + + spin_unlock(&kutf_input_lock); + + if (str_size) + *str_size = line->size; + return line->str; +} + +int kutf_helper_input_enqueue(struct kutf_context *context, + const char __user *str, size_t size) +{ + struct kutf_userdata_line *line; + + line = kutf_mempool_alloc(&context->fixture_pool, + sizeof(*line) + size + 1); + if (!line) + return -ENOMEM; + if (str) { + unsigned long bytes_not_copied; + + line->size = size; + line->str = (void *)(line + 1); + bytes_not_copied = copy_from_user(line->str, str, size); + if (bytes_not_copied != 0) + return -EFAULT; + /* Zero terminate the string */ + line->str[size] = '\0'; + } else { + /* This is used to mark the end of input */ + WARN_ON(size); + line->size = 0; + line->str = NULL; + } + + spin_lock(&kutf_input_lock); + + list_add_tail(&line->node, &context->userdata.input_head); + + kutf_clear_waiting_for_input(context->result_set); + + spin_unlock(&kutf_input_lock); + + wake_up(&context->userdata.input_waitq); + + return 0; +} + +void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context) +{ + kutf_helper_input_enqueue(context, NULL, 0); +} diff --git a/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_helpers_user.c b/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_helpers_user.c new file mode 100644 index 000000000000..108fa82d9b21 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_helpers_user.c @@ -0,0 +1,468 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* Kernel UTF test helpers that mirror those for kutf-userside */ +#include +#include +#include + +#include +#include +#include + +const char *valtype_names[] = { + "INVALID", + "U64", + "STR", +}; + +static const char *get_val_type_name(enum kutf_helper_valtype valtype) +{ + /* enums can be signed or unsigned (implementation dependant), so + * enforce it to prevent: + * a) "<0 comparison on unsigned type" warning - if we did both upper + * and lower bound check + * b) incorrect range checking if it was a signed type - if we did + * upper bound check only */ + unsigned int type_idx = (unsigned int)valtype; + + if (type_idx >= (unsigned int)KUTF_HELPER_VALTYPE_COUNT) + type_idx = (unsigned int)KUTF_HELPER_VALTYPE_INVALID; + + return valtype_names[type_idx]; +} + +/* Check up to str_len chars of val_str to see if it's a valid value name: + * + * - Has between 1 and KUTF_HELPER_MAX_VAL_NAME_LEN characters before the \0 terminator + * - And, each char is in the character set [A-Z0-9_] */ +static int validate_val_name(const char *val_str, int str_len) +{ + int i = 0; + + for (i = 0; str_len && i <= KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'; ++i, --str_len) { + char val_chr = val_str[i]; + + if (val_chr >= 'A' && val_chr <= 'Z') + continue; + if (val_chr >= '0' && val_chr <= '9') + continue; + if (val_chr == '_') + continue; + + /* Character not in the set [A-Z0-9_] - report error */ + return 1; + } + + /* Names of 0 length are not valid */ + if (i == 0) + return 1; + /* Length greater than KUTF_HELPER_MAX_VAL_NAME_LEN not allowed */ + if (i > KUTF_HELPER_MAX_VAL_NAME_LEN || (i == KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0')) + return 1; + + return 0; +} + +/* Find the length of the valid part of the string when it will be in quotes + * e.g. "str" + * + * That is, before any '\\', '\n' or '"' characters. This is so we don't have + * to escape the string */ +static int find_quoted_string_valid_len(const char *str) +{ + char *ptr; + const char *check_chars = "\\\n\""; + + ptr = strpbrk(str, check_chars); + if (ptr) + return (int)(ptr-str); + + return (int)strlen(str); +} + +static int kutf_helper_userdata_enqueue(struct kutf_context *context, + const char *str) +{ + char *str_copy; + size_t len; + int err; + + len = strlen(str)+1; + + str_copy = kutf_mempool_alloc(&context->fixture_pool, len); + if (!str_copy) + return -ENOMEM; + + strcpy(str_copy, str); + + err = kutf_add_result(context, KUTF_RESULT_USERDATA, str_copy); + + return err; +} + +#define MAX_U64_HEX_LEN 16 +/* (Name size) + ("=0x" size) + (64-bit hex value size) + (terminator) */ +#define NAMED_U64_VAL_BUF_SZ (KUTF_HELPER_MAX_VAL_NAME_LEN + 3 + MAX_U64_HEX_LEN + 1) + +int kutf_helper_send_named_u64(struct kutf_context *context, + const char *val_name, u64 val) +{ + int ret = 1; + char msgbuf[NAMED_U64_VAL_BUF_SZ]; + const char *errmsg = NULL; + + if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) { + errmsg = kutf_dsprintf(&context->fixture_pool, + "Failed to send u64 value named '%s': Invalid value name", val_name); + goto out_err; + } + + ret = snprintf(msgbuf, NAMED_U64_VAL_BUF_SZ, "%s=0x%llx", val_name, val); + if (ret >= NAMED_U64_VAL_BUF_SZ || ret < 0) { + errmsg = kutf_dsprintf(&context->fixture_pool, + "Failed to send u64 value named '%s': snprintf() problem buffer size==%d ret=%d", + val_name, NAMED_U64_VAL_BUF_SZ, ret); + goto out_err; + } + + ret = kutf_helper_userdata_enqueue(context, msgbuf); + if (ret) { + errmsg = kutf_dsprintf(&context->fixture_pool, + "Failed to send u64 value named '%s': send returned %d", + val_name, ret); + goto out_err; + } + + return ret; +out_err: + kutf_test_fail(context, errmsg); + return ret; +} +EXPORT_SYMBOL(kutf_helper_send_named_u64); + +#define NAMED_VALUE_SEP "=" +#define NAMED_STR_START_DELIM NAMED_VALUE_SEP "\"" +#define NAMED_STR_END_DELIM "\"" + +int kutf_helper_max_str_len_for_kern(const char *val_name, + int kern_buf_sz) +{ + const int val_name_len = strlen(val_name); + const int start_delim_len = strlen(NAMED_STR_START_DELIM); + const int end_delim_len = strlen(NAMED_STR_END_DELIM); + int max_msg_len = kern_buf_sz; + int max_str_len; + + max_str_len = max_msg_len - val_name_len - start_delim_len - + end_delim_len; + + return max_str_len; +} +EXPORT_SYMBOL(kutf_helper_max_str_len_for_kern); + +int kutf_helper_send_named_str(struct kutf_context *context, + const char *val_name, + const char *val_str) +{ + int val_str_len; + int str_buf_sz; + char *str_buf = NULL; + int ret = 1; + char *copy_ptr; + int val_name_len; + int start_delim_len = strlen(NAMED_STR_START_DELIM); + int end_delim_len = strlen(NAMED_STR_END_DELIM); + const char *errmsg = NULL; + + if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) { + errmsg = kutf_dsprintf(&context->fixture_pool, + "Failed to send u64 value named '%s': Invalid value name", val_name); + goto out_err; + } + val_name_len = strlen(val_name); + + val_str_len = find_quoted_string_valid_len(val_str); + + /* (name length) + ("=\"" length) + (val_str len) + ("\"" length) + terminator */ + str_buf_sz = val_name_len + start_delim_len + val_str_len + end_delim_len + 1; + + /* Using kmalloc() here instead of mempool since we know we need to free + * before we return */ + str_buf = kmalloc(str_buf_sz, GFP_KERNEL); + if (!str_buf) { + errmsg = kutf_dsprintf(&context->fixture_pool, + "Failed to send str value named '%s': kmalloc failed, str_buf_sz=%d", + val_name, str_buf_sz); + goto out_err; + } + copy_ptr = str_buf; + + /* Manually copy each string component instead of snprintf because + * val_str may need to end early, and less error path handling */ + + /* name */ + memcpy(copy_ptr, val_name, val_name_len); + copy_ptr += val_name_len; + + /* str start delimiter */ + memcpy(copy_ptr, NAMED_STR_START_DELIM, start_delim_len); + copy_ptr += start_delim_len; + + /* str value */ + memcpy(copy_ptr, val_str, val_str_len); + copy_ptr += val_str_len; + + /* str end delimiter */ + memcpy(copy_ptr, NAMED_STR_END_DELIM, end_delim_len); + copy_ptr += end_delim_len; + + /* Terminator */ + *copy_ptr = '\0'; + + ret = kutf_helper_userdata_enqueue(context, str_buf); + + if (ret) { + errmsg = kutf_dsprintf(&context->fixture_pool, + "Failed to send str value named '%s': send returned %d", + val_name, ret); + goto out_err; + } + + kfree(str_buf); + return ret; + +out_err: + kutf_test_fail(context, errmsg); + kfree(str_buf); + return ret; +} +EXPORT_SYMBOL(kutf_helper_send_named_str); + +int kutf_helper_receive_named_val( + struct kutf_context *context, + struct kutf_helper_named_val *named_val) +{ + size_t recv_sz; + char *recv_str; + char *search_ptr; + char *name_str = NULL; + int name_len; + int strval_len; + enum kutf_helper_valtype type = KUTF_HELPER_VALTYPE_INVALID; + char *strval = NULL; + u64 u64val = 0; + int err = KUTF_HELPER_ERR_INVALID_VALUE; + + recv_str = kutf_helper_input_dequeue(context, &recv_sz); + if (!recv_str) + return -EBUSY; + else if (IS_ERR(recv_str)) + return PTR_ERR(recv_str); + + /* Find the '=', grab the name and validate it */ + search_ptr = strnchr(recv_str, recv_sz, NAMED_VALUE_SEP[0]); + if (search_ptr) { + name_len = search_ptr - recv_str; + if (!validate_val_name(recv_str, name_len)) { + /* no need to reallocate - just modify string in place */ + name_str = recv_str; + name_str[name_len] = '\0'; + + /* Move until after the '=' */ + recv_str += (name_len + 1); + recv_sz -= (name_len + 1); + } + } + if (!name_str) { + pr_err("Invalid name part for received string '%s'\n", + recv_str); + return KUTF_HELPER_ERR_INVALID_NAME; + } + + /* detect value type */ + if (*recv_str == NAMED_STR_START_DELIM[1]) { + /* string delimiter start*/ + ++recv_str; + --recv_sz; + + /* Find end of string */ + search_ptr = strnchr(recv_str, recv_sz, NAMED_STR_END_DELIM[0]); + if (search_ptr) { + strval_len = search_ptr - recv_str; + /* Validate the string to ensure it contains no quotes */ + if (strval_len == find_quoted_string_valid_len(recv_str)) { + /* no need to reallocate - just modify string in place */ + strval = recv_str; + strval[strval_len] = '\0'; + + /* Move until after the end delimiter */ + recv_str += (strval_len + 1); + recv_sz -= (strval_len + 1); + type = KUTF_HELPER_VALTYPE_STR; + } else { + pr_err("String value contains invalid characters in rest of received string '%s'\n", recv_str); + err = KUTF_HELPER_ERR_CHARS_AFTER_VAL; + } + } else { + pr_err("End of string delimiter not found in rest of received string '%s'\n", recv_str); + err = KUTF_HELPER_ERR_NO_END_DELIMITER; + } + } else { + /* possibly a number value - strtoull will parse it */ + err = kstrtoull(recv_str, 0, &u64val); + /* unlike userspace can't get an end ptr, but if kstrtoull() + * reads characters after the number it'll report -EINVAL */ + if (!err) { + int len_remain = strnlen(recv_str, recv_sz); + + type = KUTF_HELPER_VALTYPE_U64; + recv_str += len_remain; + recv_sz -= len_remain; + } else { + /* special case: not a number, report as such */ + pr_err("Rest of received string was not a numeric value or quoted string value: '%s'\n", recv_str); + } + } + + if (type == KUTF_HELPER_VALTYPE_INVALID) + return err; + + /* Any remaining characters - error */ + if (strnlen(recv_str, recv_sz) != 0) { + pr_err("Characters remain after value of type %s: '%s'\n", + get_val_type_name(type), recv_str); + return KUTF_HELPER_ERR_CHARS_AFTER_VAL; + } + + /* Success - write into the output structure */ + switch (type) { + case KUTF_HELPER_VALTYPE_U64: + named_val->u.val_u64 = u64val; + break; + case KUTF_HELPER_VALTYPE_STR: + named_val->u.val_str = strval; + break; + default: + pr_err("Unreachable, fix kutf_helper_receive_named_val\n"); + /* Coding error, report as though 'run' file failed */ + return -EINVAL; + } + + named_val->val_name = name_str; + named_val->type = type; + + return KUTF_HELPER_ERR_NONE; +} +EXPORT_SYMBOL(kutf_helper_receive_named_val); + +#define DUMMY_MSG "" +int kutf_helper_receive_check_val( + struct kutf_helper_named_val *named_val, + struct kutf_context *context, + const char *expect_val_name, + enum kutf_helper_valtype expect_val_type) +{ + int err; + + err = kutf_helper_receive_named_val(context, named_val); + if (err < 0) { + const char *msg = kutf_dsprintf(&context->fixture_pool, + "Failed to receive value named '%s'", + expect_val_name); + kutf_test_fail(context, msg); + return err; + } else if (err > 0) { + const char *msg = kutf_dsprintf(&context->fixture_pool, + "Named-value parse error when expecting value named '%s'", + expect_val_name); + kutf_test_fail(context, msg); + goto out_fail_and_fixup; + } + + if (strcmp(named_val->val_name, expect_val_name) != 0) { + const char *msg = kutf_dsprintf(&context->fixture_pool, + "Expecting to receive value named '%s' but got '%s'", + expect_val_name, named_val->val_name); + kutf_test_fail(context, msg); + goto out_fail_and_fixup; + } + + + if (named_val->type != expect_val_type) { + const char *msg = kutf_dsprintf(&context->fixture_pool, + "Expecting value named '%s' to be of type %s but got %s", + expect_val_name, get_val_type_name(expect_val_type), + get_val_type_name(named_val->type)); + kutf_test_fail(context, msg); + goto out_fail_and_fixup; + } + + return err; + +out_fail_and_fixup: + /* Produce a valid but incorrect value */ + switch (expect_val_type) { + case KUTF_HELPER_VALTYPE_U64: + named_val->u.val_u64 = 0ull; + break; + case KUTF_HELPER_VALTYPE_STR: + { + char *str = kutf_mempool_alloc(&context->fixture_pool, sizeof(DUMMY_MSG)); + + if (!str) + return -1; + + strcpy(str, DUMMY_MSG); + named_val->u.val_str = str; + break; + } + default: + break; + } + + /* Indicate that this is invalid */ + named_val->type = KUTF_HELPER_VALTYPE_INVALID; + + /* But at least allow the caller to continue in the test with failures */ + return 0; +} +EXPORT_SYMBOL(kutf_helper_receive_check_val); + +void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val) +{ + switch (named_val->type) { + case KUTF_HELPER_VALTYPE_U64: + pr_warn("%s=0x%llx\n", named_val->val_name, named_val->u.val_u64); + break; + case KUTF_HELPER_VALTYPE_STR: + pr_warn("%s=\"%s\"\n", named_val->val_name, named_val->u.val_str); + break; + case KUTF_HELPER_VALTYPE_INVALID: + pr_warn("%s is invalid\n", named_val->val_name); + break; + default: + pr_warn("%s has unknown type %d\n", named_val->val_name, named_val->type); + break; + } +} +EXPORT_SYMBOL(kutf_helper_output_named_val); diff --git a/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_mem.c b/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_mem.c new file mode 100644 index 000000000000..fd98beaeb84a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_mem.c @@ -0,0 +1,108 @@ +/* + * + * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* Kernel UTF memory management functions */ + +#include +#include +#include + +#include + + +/** + * struct kutf_alloc_entry - Structure representing an allocation. + * @node: List node for use with kutf_mempool. + * @data: Data area of the allocation + */ +struct kutf_alloc_entry { + struct list_head node; + u8 data[0]; +}; + +int kutf_mempool_init(struct kutf_mempool *pool) +{ + if (!pool) { + pr_err("NULL pointer passed to %s\n", __func__); + return -1; + } + + INIT_LIST_HEAD(&pool->head); + mutex_init(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(kutf_mempool_init); + +void kutf_mempool_destroy(struct kutf_mempool *pool) +{ + struct list_head *remove; + struct list_head *tmp; + + if (!pool) { + pr_err("NULL pointer passed to %s\n", __func__); + return; + } + + mutex_lock(&pool->lock); + list_for_each_safe(remove, tmp, &pool->head) { + struct kutf_alloc_entry *remove_alloc; + + remove_alloc = list_entry(remove, struct kutf_alloc_entry, node); + list_del(&remove_alloc->node); + kfree(remove_alloc); + } + mutex_unlock(&pool->lock); + +} +EXPORT_SYMBOL(kutf_mempool_destroy); + +void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size) +{ + struct kutf_alloc_entry *ret; + + if (!pool) { + pr_err("NULL pointer passed to %s\n", __func__); + goto fail_pool; + } + + mutex_lock(&pool->lock); + + ret = kmalloc(sizeof(*ret) + size, GFP_KERNEL); + if (!ret) { + pr_err("Failed to allocate memory\n"); + goto fail_alloc; + } + + INIT_LIST_HEAD(&ret->node); + list_add(&ret->node, &pool->head); + + mutex_unlock(&pool->lock); + + return &ret->data[0]; + +fail_alloc: + mutex_unlock(&pool->lock); +fail_pool: + return NULL; +} +EXPORT_SYMBOL(kutf_mempool_alloc); diff --git a/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_resultset.c b/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_resultset.c new file mode 100644 index 000000000000..94ecfa4421e1 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_resultset.c @@ -0,0 +1,164 @@ +/* + * + * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* Kernel UTF result management functions */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +/* Lock to protect all result structures */ +static DEFINE_SPINLOCK(kutf_result_lock); + +struct kutf_result_set *kutf_create_result_set(void) +{ + struct kutf_result_set *set; + + set = kmalloc(sizeof(*set), GFP_KERNEL); + if (!set) { + pr_err("Failed to allocate resultset"); + goto fail_alloc; + } + + INIT_LIST_HEAD(&set->results); + init_waitqueue_head(&set->waitq); + set->flags = 0; + + return set; + +fail_alloc: + return NULL; +} + +int kutf_add_result(struct kutf_context *context, + enum kutf_result_status status, + const char *message) +{ + struct kutf_mempool *mempool = &context->fixture_pool; + struct kutf_result_set *set = context->result_set; + /* Create the new result */ + struct kutf_result *new_result; + + BUG_ON(set == NULL); + + new_result = kutf_mempool_alloc(mempool, sizeof(*new_result)); + if (!new_result) { + pr_err("Result allocation failed\n"); + return -ENOMEM; + } + + INIT_LIST_HEAD(&new_result->node); + new_result->status = status; + new_result->message = message; + + spin_lock(&kutf_result_lock); + + list_add_tail(&new_result->node, &set->results); + + spin_unlock(&kutf_result_lock); + + wake_up(&set->waitq); + + return 0; +} + +void kutf_destroy_result_set(struct kutf_result_set *set) +{ + if (!list_empty(&set->results)) + pr_err("kutf_destroy_result_set: Unread results from test\n"); + + kfree(set); +} + +static bool kutf_has_result(struct kutf_result_set *set) +{ + bool has_result; + + spin_lock(&kutf_result_lock); + if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) + /* Pretend there are results if waiting for input */ + has_result = true; + else + has_result = !list_empty(&set->results); + spin_unlock(&kutf_result_lock); + + return has_result; +} + +struct kutf_result *kutf_remove_result(struct kutf_result_set *set) +{ + struct kutf_result *result = NULL; + int ret; + + do { + ret = wait_event_interruptible(set->waitq, + kutf_has_result(set)); + + if (ret) + return ERR_PTR(ret); + + spin_lock(&kutf_result_lock); + + if (!list_empty(&set->results)) { + result = list_first_entry(&set->results, + struct kutf_result, + node); + list_del(&result->node); + } else if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) { + /* Return a fake result */ + static struct kutf_result waiting = { + .status = KUTF_RESULT_USERDATA_WAIT + }; + result = &waiting; + } + /* If result == NULL then there was a race with the event + * being removed between the check in kutf_has_result and + * the lock being obtained. In this case we retry + */ + + spin_unlock(&kutf_result_lock); + } while (result == NULL); + + return result; +} + +void kutf_set_waiting_for_input(struct kutf_result_set *set) +{ + spin_lock(&kutf_result_lock); + set->flags |= KUTF_RESULT_SET_WAITING_FOR_INPUT; + spin_unlock(&kutf_result_lock); + + wake_up(&set->waitq); +} + +void kutf_clear_waiting_for_input(struct kutf_result_set *set) +{ + spin_lock(&kutf_result_lock); + set->flags &= ~KUTF_RESULT_SET_WAITING_FOR_INPUT; + spin_unlock(&kutf_result_lock); +} diff --git a/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_suite.c b/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_suite.c new file mode 100644 index 000000000000..3f15669a2a0a --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_suite.c @@ -0,0 +1,1219 @@ +/* + * + * (C) COPYRIGHT 2014, 2017-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* Kernel UTF suite, test and fixture management including user to kernel + * interaction */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +/** + * struct kutf_application - Structure which represents kutf application + * @name: The name of this test application. + * @dir: The debugfs directory for this test + * @suite_list: List head to store all the suites which are part of this + * application + */ +struct kutf_application { + const char *name; + struct dentry *dir; + struct list_head suite_list; +}; + +/** + * struct kutf_test_function - Structure which represents kutf test function + * @suite: Back reference to the suite this test function + * belongs to + * @filters: Filters that apply to this test function + * @test_id: Test ID + * @execute: Function to run for this test + * @test_data: Static data for this test + * @node: List node for test_list + * @variant_list: List head to store all the variants which can run on + * this function + * @dir: debugfs directory for this test function + */ +struct kutf_test_function { + struct kutf_suite *suite; + unsigned int filters; + unsigned int test_id; + void (*execute)(struct kutf_context *context); + union kutf_callback_data test_data; + struct list_head node; + struct list_head variant_list; + struct dentry *dir; +}; + +/** + * struct kutf_test_fixture - Structure which holds information on the kutf + * test fixture + * @test_func: Test function this fixture belongs to + * @fixture_index: Index of this fixture + * @node: List node for variant_list + * @dir: debugfs directory for this test fixture + */ +struct kutf_test_fixture { + struct kutf_test_function *test_func; + unsigned int fixture_index; + struct list_head node; + struct dentry *dir; +}; + +static struct dentry *base_dir; +static struct workqueue_struct *kutf_workq; + +/** + * struct kutf_convert_table - Structure which keeps test results + * @result_name: Status of the test result + * @result: Status value for a single test + */ +struct kutf_convert_table { + char result_name[50]; + enum kutf_result_status result; +}; + +struct kutf_convert_table kutf_convert[] = { +#define ADD_UTF_RESULT(_name) \ +{ \ + #_name, \ + _name, \ +}, +ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK) +ADD_UTF_RESULT(KUTF_RESULT_SKIP) +ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN) +ADD_UTF_RESULT(KUTF_RESULT_PASS) +ADD_UTF_RESULT(KUTF_RESULT_DEBUG) +ADD_UTF_RESULT(KUTF_RESULT_INFO) +ADD_UTF_RESULT(KUTF_RESULT_WARN) +ADD_UTF_RESULT(KUTF_RESULT_FAIL) +ADD_UTF_RESULT(KUTF_RESULT_FATAL) +ADD_UTF_RESULT(KUTF_RESULT_ABORT) +}; + +#define UTF_CONVERT_SIZE (ARRAY_SIZE(kutf_convert)) + +/** + * kutf_create_context() - Create a test context in which a specific fixture + * of an application will be run and its results + * reported back to the user + * @test_fix: Test fixture to be run. + * + * The context's refcount will be initialized to 1. + * + * Return: Returns the created test context on success or NULL on failure + */ +static struct kutf_context *kutf_create_context( + struct kutf_test_fixture *test_fix); + +/** + * kutf_destroy_context() - Destroy a previously created test context, only + * once its refcount has become zero + * @kref: pointer to kref member within the context + * + * This should only be used via a kref_put() call on the context's kref member + */ +static void kutf_destroy_context(struct kref *kref); + +/** + * kutf_context_get() - increment refcount on a context + * @context: the kutf context + * + * This must be used when the lifetime of the context might exceed that of the + * thread creating @context + */ +static void kutf_context_get(struct kutf_context *context); + +/** + * kutf_context_put() - decrement refcount on a context, destroying it when it + * reached zero + * @context: the kutf context + * + * This must be used only after a corresponding kutf_context_get() call on + * @context, and the caller no longer needs access to @context. + */ +static void kutf_context_put(struct kutf_context *context); + +/** + * kutf_set_result() - Set the test result against the specified test context + * @context: Test context + * @status: Result status + */ +static void kutf_set_result(struct kutf_context *context, + enum kutf_result_status status); + +/** + * kutf_set_expected_result() - Set the expected test result for the specified + * test context + * @context: Test context + * @expected_status: Expected result status + */ +static void kutf_set_expected_result(struct kutf_context *context, + enum kutf_result_status expected_status); + +/** + * kutf_result_to_string() - Converts a KUTF result into a string + * @result_str: Output result string + * @result: Result status to convert + * + * Return: 1 if test result was successfully converted to string, 0 otherwise + */ +static int kutf_result_to_string(char **result_str, + enum kutf_result_status result) +{ + int i; + int ret = 0; + + for (i = 0; i < UTF_CONVERT_SIZE; i++) { + if (result == kutf_convert[i].result) { + *result_str = kutf_convert[i].result_name; + ret = 1; + } + } + return ret; +} + +/** + * kutf_debugfs_const_string_read() - Simple debugfs read callback which + * returns a constant string + * @file: Opened file to read from + * @buf: User buffer to write the data into + * @len: Amount of data to read + * @ppos: Offset into file to read from + * + * Return: On success, the number of bytes read and offset @ppos advanced by + * this number; on error, negative value + */ +static ssize_t kutf_debugfs_const_string_read(struct file *file, + char __user *buf, size_t len, loff_t *ppos) +{ + char *str = file->private_data; + + return simple_read_from_buffer(buf, len, ppos, str, strlen(str)); +} + +static const struct file_operations kutf_debugfs_const_string_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = kutf_debugfs_const_string_read, + .llseek = default_llseek, +}; + +/** + * kutf_add_explicit_result() - Check if an explicit result needs to be added + * @context: KUTF test context + */ +static void kutf_add_explicit_result(struct kutf_context *context) +{ + switch (context->expected_status) { + case KUTF_RESULT_UNKNOWN: + break; + + case KUTF_RESULT_WARN: + if (context->status == KUTF_RESULT_WARN) + kutf_test_pass(context, + "Pass (expected warn occurred)"); + else if (context->status != KUTF_RESULT_SKIP) + kutf_test_fail(context, + "Fail (expected warn missing)"); + break; + + case KUTF_RESULT_FAIL: + if (context->status == KUTF_RESULT_FAIL) + kutf_test_pass(context, + "Pass (expected fail occurred)"); + else if (context->status != KUTF_RESULT_SKIP) { + /* Force the expected status so the fail gets logged */ + context->expected_status = KUTF_RESULT_PASS; + kutf_test_fail(context, + "Fail (expected fail missing)"); + } + break; + + case KUTF_RESULT_FATAL: + if (context->status == KUTF_RESULT_FATAL) + kutf_test_pass(context, + "Pass (expected fatal occurred)"); + else if (context->status != KUTF_RESULT_SKIP) + kutf_test_fail(context, + "Fail (expected fatal missing)"); + break; + + case KUTF_RESULT_ABORT: + if (context->status == KUTF_RESULT_ABORT) + kutf_test_pass(context, + "Pass (expected abort occurred)"); + else if (context->status != KUTF_RESULT_SKIP) + kutf_test_fail(context, + "Fail (expected abort missing)"); + break; + default: + break; + } +} + +static void kutf_run_test(struct work_struct *data) +{ + struct kutf_context *test_context = container_of(data, + struct kutf_context, work); + struct kutf_suite *suite = test_context->suite; + struct kutf_test_function *test_func; + + test_func = test_context->test_fix->test_func; + + /* + * Call the create fixture function if required before the + * fixture is run + */ + if (suite->create_fixture) + test_context->fixture = suite->create_fixture(test_context); + + /* Only run the test if the fixture was created (if required) */ + if ((suite->create_fixture && test_context->fixture) || + (!suite->create_fixture)) { + /* Run this fixture */ + test_func->execute(test_context); + + if (suite->remove_fixture) + suite->remove_fixture(test_context); + + kutf_add_explicit_result(test_context); + } + + kutf_add_result(test_context, KUTF_RESULT_TEST_FINISHED, NULL); + + kutf_context_put(test_context); +} + +/** + * kutf_debugfs_run_open() Debugfs open callback for the "run" entry. + * @inode: inode of the opened file + * @file: Opened file to read from + * + * This function creates a KUTF context and queues it onto a workqueue to be + * run asynchronously. The resulting file descriptor can be used to communicate + * userdata to the test and to read back the results of the test execution. + * + * Return: 0 on success + */ +static int kutf_debugfs_run_open(struct inode *inode, struct file *file) +{ + struct kutf_test_fixture *test_fix = inode->i_private; + struct kutf_context *test_context; + int err = 0; + + test_context = kutf_create_context(test_fix); + if (!test_context) { + err = -ENOMEM; + goto finish; + } + + file->private_data = test_context; + + /* This reference is release by the kutf_run_test */ + kutf_context_get(test_context); + + queue_work(kutf_workq, &test_context->work); + +finish: + return err; +} + +#define USERDATA_WARNING_MESSAGE "WARNING: This test requires userdata\n" + +/** + * kutf_debugfs_run_read() - Debugfs read callback for the "run" entry. + * @file: Opened file to read from + * @buf: User buffer to write the data into + * @len: Amount of data to read + * @ppos: Offset into file to read from + * + * This function emits the results of the test, blocking until they are + * available. + * + * If the test involves user data then this will also return user data records + * to user space. If the test is waiting for user data then this function will + * output a message (to make the likes of 'cat' display it), followed by + * returning 0 to mark the end of file. + * + * Results will be emitted one at a time, once all the results have been read + * 0 will be returned to indicate there is no more data. + * + * Return: Number of bytes read. + */ +static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + struct kutf_context *test_context = file->private_data; + struct kutf_result *res; + unsigned long bytes_not_copied; + ssize_t bytes_copied = 0; + char *kutf_str_ptr = NULL; + size_t kutf_str_len = 0; + size_t message_len = 0; + char separator = ':'; + char terminator = '\n'; + + res = kutf_remove_result(test_context->result_set); + + if (IS_ERR(res)) + return PTR_ERR(res); + + /* + * Handle 'fake' results - these results are converted to another + * form before being returned from the kernel + */ + switch (res->status) { + case KUTF_RESULT_TEST_FINISHED: + return 0; + case KUTF_RESULT_USERDATA_WAIT: + if (test_context->userdata.flags & + KUTF_USERDATA_WARNING_OUTPUT) { + /* + * Warning message already output, + * signal end-of-file + */ + return 0; + } + + message_len = sizeof(USERDATA_WARNING_MESSAGE)-1; + if (message_len > len) + message_len = len; + + bytes_not_copied = copy_to_user(buf, + USERDATA_WARNING_MESSAGE, + message_len); + if (bytes_not_copied != 0) + return -EFAULT; + test_context->userdata.flags |= KUTF_USERDATA_WARNING_OUTPUT; + return message_len; + case KUTF_RESULT_USERDATA: + message_len = strlen(res->message); + if (message_len > len-1) { + message_len = len-1; + pr_warn("User data truncated, read not long enough\n"); + } + bytes_not_copied = copy_to_user(buf, res->message, + message_len); + if (bytes_not_copied != 0) { + pr_warn("Failed to copy data to user space buffer\n"); + return -EFAULT; + } + /* Finally the terminator */ + bytes_not_copied = copy_to_user(&buf[message_len], + &terminator, 1); + if (bytes_not_copied != 0) { + pr_warn("Failed to copy data to user space buffer\n"); + return -EFAULT; + } + return message_len+1; + default: + /* Fall through - this is a test result */ + break; + } + + /* Note: This code assumes a result is read completely */ + kutf_result_to_string(&kutf_str_ptr, res->status); + if (kutf_str_ptr) + kutf_str_len = strlen(kutf_str_ptr); + + if (res->message) + message_len = strlen(res->message); + + if ((kutf_str_len + 1 + message_len + 1) > len) { + pr_err("Not enough space in user buffer for a single result"); + return 0; + } + + /* First copy the result string */ + if (kutf_str_ptr) { + bytes_not_copied = copy_to_user(&buf[0], kutf_str_ptr, + kutf_str_len); + bytes_copied += kutf_str_len - bytes_not_copied; + if (bytes_not_copied) + goto exit; + } + + /* Then the separator */ + bytes_not_copied = copy_to_user(&buf[bytes_copied], + &separator, 1); + bytes_copied += 1 - bytes_not_copied; + if (bytes_not_copied) + goto exit; + + /* Finally Next copy the result string */ + if (res->message) { + bytes_not_copied = copy_to_user(&buf[bytes_copied], + res->message, message_len); + bytes_copied += message_len - bytes_not_copied; + if (bytes_not_copied) + goto exit; + } + + /* Finally the terminator */ + bytes_not_copied = copy_to_user(&buf[bytes_copied], + &terminator, 1); + bytes_copied += 1 - bytes_not_copied; + +exit: + return bytes_copied; +} + +/** + * kutf_debugfs_run_write() Debugfs write callback for the "run" entry. + * @file: Opened file to write to + * @buf: User buffer to read the data from + * @len: Amount of data to write + * @ppos: Offset into file to write to + * + * This function allows user and kernel to exchange extra data necessary for + * the test fixture. + * + * The data is added to the first struct kutf_context running the fixture + * + * Return: Number of bytes written + */ +static ssize_t kutf_debugfs_run_write(struct file *file, + const char __user *buf, size_t len, loff_t *ppos) +{ + int ret = 0; + struct kutf_context *test_context = file->private_data; + + if (len > KUTF_MAX_LINE_LENGTH) + return -EINVAL; + + ret = kutf_helper_input_enqueue(test_context, buf, len); + if (ret < 0) + return ret; + + return len; +} + +/** + * kutf_debugfs_run_release() - Debugfs release callback for the "run" entry. + * @inode: File entry representation + * @file: A specific opening of the file + * + * Release any resources that were created during the opening of the file + * + * Note that resources may not be released immediately, that might only happen + * later when other users of the kutf_context release their refcount. + * + * Return: 0 on success + */ +static int kutf_debugfs_run_release(struct inode *inode, struct file *file) +{ + struct kutf_context *test_context = file->private_data; + + kutf_helper_input_enqueue_end_of_data(test_context); + + kutf_context_put(test_context); + return 0; +} + +static const struct file_operations kutf_debugfs_run_ops = { + .owner = THIS_MODULE, + .open = kutf_debugfs_run_open, + .read = kutf_debugfs_run_read, + .write = kutf_debugfs_run_write, + .release = kutf_debugfs_run_release, + .llseek = default_llseek, +}; + +/** + * create_fixture_variant() - Creates a fixture variant for the specified + * test function and index and the debugfs entries + * that represent it. + * @test_func: Test function + * @fixture_index: Fixture index + * + * Return: 0 on success, negative value corresponding to error code in failure + */ +static int create_fixture_variant(struct kutf_test_function *test_func, + unsigned int fixture_index) +{ + struct kutf_test_fixture *test_fix; + char name[11]; /* Enough to print the MAX_UINT32 + the null terminator */ + struct dentry *tmp; + int err; + + test_fix = kmalloc(sizeof(*test_fix), GFP_KERNEL); + if (!test_fix) { + pr_err("Failed to create debugfs directory when adding fixture\n"); + err = -ENOMEM; + goto fail_alloc; + } + + test_fix->test_func = test_func; + test_fix->fixture_index = fixture_index; + + snprintf(name, sizeof(name), "%d", fixture_index); + test_fix->dir = debugfs_create_dir(name, test_func->dir); + if (!test_func->dir) { + pr_err("Failed to create debugfs directory when adding fixture\n"); + /* Might not be the right error, we don't get it passed back to us */ + err = -EEXIST; + goto fail_dir; + } + + tmp = debugfs_create_file("type", S_IROTH, test_fix->dir, "fixture\n", + &kutf_debugfs_const_string_ops); + if (!tmp) { + pr_err("Failed to create debugfs file \"type\" when adding fixture\n"); + /* Might not be the right error, we don't get it passed back to us */ + err = -EEXIST; + goto fail_file; + } + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + tmp = debugfs_create_file_unsafe( +#else + tmp = debugfs_create_file( +#endif + "run", 0600, test_fix->dir, + test_fix, + &kutf_debugfs_run_ops); + if (!tmp) { + pr_err("Failed to create debugfs file \"run\" when adding fixture\n"); + /* Might not be the right error, we don't get it passed back to us */ + err = -EEXIST; + goto fail_file; + } + + list_add(&test_fix->node, &test_func->variant_list); + return 0; + +fail_file: + debugfs_remove_recursive(test_fix->dir); +fail_dir: + kfree(test_fix); +fail_alloc: + return err; +} + +/** + * kutf_remove_test_variant() - Destroy a previously created fixture variant. + * @test_fix: Test fixture + */ +static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix) +{ + debugfs_remove_recursive(test_fix->dir); + kfree(test_fix); +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) +/* Adapting to the upstream debugfs_create_x32() change */ +static int ktufp_u32_get(void *data, u64 *val) +{ + *val = *(u32 *)data; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(kutfp_fops_x32_ro, ktufp_u32_get, NULL, "0x%08llx\n"); +#endif + +void kutf_add_test_with_filters_and_data( + struct kutf_suite *suite, + unsigned int id, + const char *name, + void (*execute)(struct kutf_context *context), + unsigned int filters, + union kutf_callback_data test_data) +{ + struct kutf_test_function *test_func; + struct dentry *tmp; + unsigned int i; + + test_func = kmalloc(sizeof(*test_func), GFP_KERNEL); + if (!test_func) { + pr_err("Failed to allocate memory when adding test %s\n", name); + goto fail_alloc; + } + + INIT_LIST_HEAD(&test_func->variant_list); + + test_func->dir = debugfs_create_dir(name, suite->dir); + if (!test_func->dir) { + pr_err("Failed to create debugfs directory when adding test %s\n", name); + goto fail_dir; + } + + tmp = debugfs_create_file("type", S_IROTH, test_func->dir, "test\n", + &kutf_debugfs_const_string_ops); + if (!tmp) { + pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name); + goto fail_file; + } + + test_func->filters = filters; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) + tmp = debugfs_create_file_unsafe("filters", S_IROTH, test_func->dir, + &test_func->filters, &kutfp_fops_x32_ro); +#else + tmp = debugfs_create_x32("filters", S_IROTH, test_func->dir, + &test_func->filters); +#endif + if (!tmp) { + pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name); + goto fail_file; + } + + test_func->test_id = id; + tmp = debugfs_create_u32("test_id", S_IROTH, test_func->dir, + &test_func->test_id); + if (!tmp) { + pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name); + goto fail_file; + } + + for (i = 0; i < suite->fixture_variants; i++) { + if (create_fixture_variant(test_func, i)) { + pr_err("Failed to create fixture %d when adding test %s\n", i, name); + goto fail_file; + } + } + + test_func->suite = suite; + test_func->execute = execute; + test_func->test_data = test_data; + + list_add(&test_func->node, &suite->test_list); + return; + +fail_file: + debugfs_remove_recursive(test_func->dir); +fail_dir: + kfree(test_func); +fail_alloc: + return; +} +EXPORT_SYMBOL(kutf_add_test_with_filters_and_data); + +void kutf_add_test_with_filters( + struct kutf_suite *suite, + unsigned int id, + const char *name, + void (*execute)(struct kutf_context *context), + unsigned int filters) +{ + union kutf_callback_data data; + + data.ptr_value = NULL; + + kutf_add_test_with_filters_and_data(suite, + id, + name, + execute, + suite->suite_default_flags, + data); +} +EXPORT_SYMBOL(kutf_add_test_with_filters); + +void kutf_add_test(struct kutf_suite *suite, + unsigned int id, + const char *name, + void (*execute)(struct kutf_context *context)) +{ + union kutf_callback_data data; + + data.ptr_value = NULL; + + kutf_add_test_with_filters_and_data(suite, + id, + name, + execute, + suite->suite_default_flags, + data); +} +EXPORT_SYMBOL(kutf_add_test); + +/** + * kutf_remove_test(): Remove a previously added test function. + * @test_func: Test function + */ +static void kutf_remove_test(struct kutf_test_function *test_func) +{ + struct list_head *pos; + struct list_head *tmp; + + list_for_each_safe(pos, tmp, &test_func->variant_list) { + struct kutf_test_fixture *test_fix; + + test_fix = list_entry(pos, struct kutf_test_fixture, node); + kutf_remove_test_variant(test_fix); + } + + list_del(&test_func->node); + debugfs_remove_recursive(test_func->dir); + kfree(test_func); +} + +struct kutf_suite *kutf_create_suite_with_filters_and_data( + struct kutf_application *app, + const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context), + unsigned int filters, + union kutf_callback_data suite_data) +{ + struct kutf_suite *suite; + struct dentry *tmp; + + suite = kmalloc(sizeof(*suite), GFP_KERNEL); + if (!suite) { + pr_err("Failed to allocate memory when creating suite %s\n", name); + goto fail_kmalloc; + } + + suite->dir = debugfs_create_dir(name, app->dir); + if (!suite->dir) { + pr_err("Failed to create debugfs directory when adding test %s\n", name); + goto fail_debugfs; + } + + tmp = debugfs_create_file("type", S_IROTH, suite->dir, "suite\n", + &kutf_debugfs_const_string_ops); + if (!tmp) { + pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name); + goto fail_file; + } + + INIT_LIST_HEAD(&suite->test_list); + suite->app = app; + suite->name = name; + suite->fixture_variants = fixture_count; + suite->create_fixture = create_fixture; + suite->remove_fixture = remove_fixture; + suite->suite_default_flags = filters; + suite->suite_data = suite_data; + + list_add(&suite->node, &app->suite_list); + + return suite; + +fail_file: + debugfs_remove_recursive(suite->dir); +fail_debugfs: + kfree(suite); +fail_kmalloc: + return NULL; +} +EXPORT_SYMBOL(kutf_create_suite_with_filters_and_data); + +struct kutf_suite *kutf_create_suite_with_filters( + struct kutf_application *app, + const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context), + unsigned int filters) +{ + union kutf_callback_data data; + + data.ptr_value = NULL; + return kutf_create_suite_with_filters_and_data(app, + name, + fixture_count, + create_fixture, + remove_fixture, + filters, + data); +} +EXPORT_SYMBOL(kutf_create_suite_with_filters); + +struct kutf_suite *kutf_create_suite( + struct kutf_application *app, + const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context)) +{ + union kutf_callback_data data; + + data.ptr_value = NULL; + return kutf_create_suite_with_filters_and_data(app, + name, + fixture_count, + create_fixture, + remove_fixture, + KUTF_F_TEST_GENERIC, + data); +} +EXPORT_SYMBOL(kutf_create_suite); + +/** + * kutf_destroy_suite() - Destroy a previously added test suite. + * @suite: Test suite + */ +static void kutf_destroy_suite(struct kutf_suite *suite) +{ + struct list_head *pos; + struct list_head *tmp; + + list_for_each_safe(pos, tmp, &suite->test_list) { + struct kutf_test_function *test_func; + + test_func = list_entry(pos, struct kutf_test_function, node); + kutf_remove_test(test_func); + } + + list_del(&suite->node); + debugfs_remove_recursive(suite->dir); + kfree(suite); +} + +struct kutf_application *kutf_create_application(const char *name) +{ + struct kutf_application *app; + struct dentry *tmp; + + app = kmalloc(sizeof(*app), GFP_KERNEL); + if (!app) { + pr_err("Failed to create allocate memory when creating application %s\n", name); + goto fail_kmalloc; + } + + app->dir = debugfs_create_dir(name, base_dir); + if (!app->dir) { + pr_err("Failed to create debugfs direcotry when creating application %s\n", name); + goto fail_debugfs; + } + + tmp = debugfs_create_file("type", S_IROTH, app->dir, "application\n", + &kutf_debugfs_const_string_ops); + if (!tmp) { + pr_err("Failed to create debugfs file \"type\" when creating application %s\n", name); + goto fail_file; + } + + INIT_LIST_HEAD(&app->suite_list); + app->name = name; + + return app; + +fail_file: + debugfs_remove_recursive(app->dir); +fail_debugfs: + kfree(app); +fail_kmalloc: + return NULL; +} +EXPORT_SYMBOL(kutf_create_application); + +void kutf_destroy_application(struct kutf_application *app) +{ + struct list_head *pos; + struct list_head *tmp; + + list_for_each_safe(pos, tmp, &app->suite_list) { + struct kutf_suite *suite; + + suite = list_entry(pos, struct kutf_suite, node); + kutf_destroy_suite(suite); + } + + debugfs_remove_recursive(app->dir); + kfree(app); +} +EXPORT_SYMBOL(kutf_destroy_application); + +static struct kutf_context *kutf_create_context( + struct kutf_test_fixture *test_fix) +{ + struct kutf_context *new_context; + + new_context = kmalloc(sizeof(*new_context), GFP_KERNEL); + if (!new_context) { + pr_err("Failed to allocate test context"); + goto fail_alloc; + } + + new_context->result_set = kutf_create_result_set(); + if (!new_context->result_set) { + pr_err("Failed to create result set"); + goto fail_result_set; + } + + new_context->test_fix = test_fix; + /* Save the pointer to the suite as the callbacks will require it */ + new_context->suite = test_fix->test_func->suite; + new_context->status = KUTF_RESULT_UNKNOWN; + new_context->expected_status = KUTF_RESULT_UNKNOWN; + + kutf_mempool_init(&new_context->fixture_pool); + new_context->fixture = NULL; + new_context->fixture_index = test_fix->fixture_index; + new_context->fixture_name = NULL; + new_context->test_data = test_fix->test_func->test_data; + + new_context->userdata.flags = 0; + INIT_LIST_HEAD(&new_context->userdata.input_head); + init_waitqueue_head(&new_context->userdata.input_waitq); + + INIT_WORK(&new_context->work, kutf_run_test); + + kref_init(&new_context->kref); + + return new_context; + +fail_result_set: + kfree(new_context); +fail_alloc: + return NULL; +} + +static void kutf_destroy_context(struct kref *kref) +{ + struct kutf_context *context; + + context = container_of(kref, struct kutf_context, kref); + kutf_destroy_result_set(context->result_set); + kutf_mempool_destroy(&context->fixture_pool); + kfree(context); +} + +static void kutf_context_get(struct kutf_context *context) +{ + kref_get(&context->kref); +} + +static void kutf_context_put(struct kutf_context *context) +{ + kref_put(&context->kref, kutf_destroy_context); +} + + +static void kutf_set_result(struct kutf_context *context, + enum kutf_result_status status) +{ + context->status = status; +} + +static void kutf_set_expected_result(struct kutf_context *context, + enum kutf_result_status expected_status) +{ + context->expected_status = expected_status; +} + +/** + * kutf_test_log_result() - Log a result for the specified test context + * @context: Test context + * @message: Result string + * @new_status: Result status + */ +static void kutf_test_log_result( + struct kutf_context *context, + const char *message, + enum kutf_result_status new_status) +{ + if (context->status < new_status) + context->status = new_status; + + if (context->expected_status != new_status) + kutf_add_result(context, new_status, message); +} + +void kutf_test_log_result_external( + struct kutf_context *context, + const char *message, + enum kutf_result_status new_status) +{ + kutf_test_log_result(context, message, new_status); +} +EXPORT_SYMBOL(kutf_test_log_result_external); + +void kutf_test_expect_abort(struct kutf_context *context) +{ + kutf_set_expected_result(context, KUTF_RESULT_ABORT); +} +EXPORT_SYMBOL(kutf_test_expect_abort); + +void kutf_test_expect_fatal(struct kutf_context *context) +{ + kutf_set_expected_result(context, KUTF_RESULT_FATAL); +} +EXPORT_SYMBOL(kutf_test_expect_fatal); + +void kutf_test_expect_fail(struct kutf_context *context) +{ + kutf_set_expected_result(context, KUTF_RESULT_FAIL); +} +EXPORT_SYMBOL(kutf_test_expect_fail); + +void kutf_test_expect_warn(struct kutf_context *context) +{ + kutf_set_expected_result(context, KUTF_RESULT_WARN); +} +EXPORT_SYMBOL(kutf_test_expect_warn); + +void kutf_test_expect_pass(struct kutf_context *context) +{ + kutf_set_expected_result(context, KUTF_RESULT_PASS); +} +EXPORT_SYMBOL(kutf_test_expect_pass); + +void kutf_test_skip(struct kutf_context *context) +{ + kutf_set_result(context, KUTF_RESULT_SKIP); + kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN); + + kutf_test_log_result(context, "Test skipped", KUTF_RESULT_SKIP); +} +EXPORT_SYMBOL(kutf_test_skip); + +void kutf_test_skip_msg(struct kutf_context *context, const char *message) +{ + kutf_set_result(context, KUTF_RESULT_SKIP); + kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN); + + kutf_test_log_result(context, kutf_dsprintf(&context->fixture_pool, + "Test skipped: %s", message), KUTF_RESULT_SKIP); + kutf_test_log_result(context, "!!!Test skipped!!!", KUTF_RESULT_SKIP); +} +EXPORT_SYMBOL(kutf_test_skip_msg); + +void kutf_test_debug(struct kutf_context *context, char const *message) +{ + kutf_test_log_result(context, message, KUTF_RESULT_DEBUG); +} +EXPORT_SYMBOL(kutf_test_debug); + +void kutf_test_pass(struct kutf_context *context, char const *message) +{ + static const char explicit_message[] = "(explicit pass)"; + + if (!message) + message = explicit_message; + + kutf_test_log_result(context, message, KUTF_RESULT_PASS); +} +EXPORT_SYMBOL(kutf_test_pass); + +void kutf_test_info(struct kutf_context *context, char const *message) +{ + kutf_test_log_result(context, message, KUTF_RESULT_INFO); +} +EXPORT_SYMBOL(kutf_test_info); + +void kutf_test_warn(struct kutf_context *context, char const *message) +{ + kutf_test_log_result(context, message, KUTF_RESULT_WARN); +} +EXPORT_SYMBOL(kutf_test_warn); + +void kutf_test_fail(struct kutf_context *context, char const *message) +{ + kutf_test_log_result(context, message, KUTF_RESULT_FAIL); +} +EXPORT_SYMBOL(kutf_test_fail); + +void kutf_test_fatal(struct kutf_context *context, char const *message) +{ + kutf_test_log_result(context, message, KUTF_RESULT_FATAL); +} +EXPORT_SYMBOL(kutf_test_fatal); + +void kutf_test_abort(struct kutf_context *context) +{ + kutf_test_log_result(context, "", KUTF_RESULT_ABORT); +} +EXPORT_SYMBOL(kutf_test_abort); + +#ifdef CONFIG_DEBUG_FS + +/** + * init_kutf_core() - Module entry point. + * + * Create the base entry point in debugfs. + */ +static int __init init_kutf_core(void) +{ + kutf_workq = alloc_workqueue("kutf workq", WQ_UNBOUND, 1); + if (!kutf_workq) + return -ENOMEM; + + base_dir = debugfs_create_dir("kutf_tests", NULL); + if (!base_dir) { + destroy_workqueue(kutf_workq); + kutf_workq = NULL; + return -ENOMEM; + } + + return 0; +} + +/** + * exit_kutf_core() - Module exit point. + * + * Remove the base entry point in debugfs. + */ +static void __exit exit_kutf_core(void) +{ + debugfs_remove_recursive(base_dir); + + if (kutf_workq) + destroy_workqueue(kutf_workq); +} + +#else /* CONFIG_DEBUG_FS */ + +/** + * init_kutf_core() - Module entry point. + * + * Stub for when build against a kernel without debugfs support + */ +static int __init init_kutf_core(void) +{ + pr_debug("KUTF requires a kernel with debug fs support"); + + return -ENODEV; +} + +/** + * exit_kutf_core() - Module exit point. + * + * Stub for when build against a kernel without debugfs support + */ +static void __exit exit_kutf_core(void) +{ +} +#endif /* CONFIG_DEBUG_FS */ + +MODULE_LICENSE("GPL"); + +module_init(init_kutf_core); +module_exit(exit_kutf_core); diff --git a/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_utils.c b/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_utils.c new file mode 100644 index 000000000000..7f5ac517fdb4 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/kutf/kutf_utils.c @@ -0,0 +1,76 @@ +/* + * + * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* Kernel UTF utility functions */ + +#include +#include +#include +#include + +#include +#include + +static char tmp_buffer[KUTF_MAX_DSPRINTF_LEN]; + +DEFINE_MUTEX(buffer_lock); + +const char *kutf_dsprintf(struct kutf_mempool *pool, + const char *fmt, ...) +{ + va_list args; + int len; + int size; + void *buffer; + + mutex_lock(&buffer_lock); + va_start(args, fmt); + len = vsnprintf(tmp_buffer, sizeof(tmp_buffer), fmt, args); + va_end(args); + + if (len < 0) { + pr_err("kutf_dsprintf: Bad format dsprintf format %s\n", fmt); + goto fail_format; + } + + if (len >= sizeof(tmp_buffer)) { + pr_warn("kutf_dsprintf: Truncated dsprintf message %s\n", fmt); + size = sizeof(tmp_buffer); + } else { + size = len + 1; + } + + buffer = kutf_mempool_alloc(pool, size); + if (!buffer) + goto fail_alloc; + + memcpy(buffer, tmp_buffer, size); + mutex_unlock(&buffer_lock); + + return buffer; + +fail_alloc: +fail_format: + mutex_unlock(&buffer_lock); + return NULL; +} +EXPORT_SYMBOL(kutf_dsprintf); diff --git a/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/Kbuild b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/Kbuild new file mode 100644 index 000000000000..f5565d30f9cf --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/Kbuild @@ -0,0 +1,26 @@ +# +# (C) COPYRIGHT 2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android + +obj-$(CONFIG_MALI_CLK_RATE_TRACE_PORTAL) += mali_kutf_clk_rate_trace_test_portal.o + +mali_kutf_clk_rate_trace_test_portal-y := mali_kutf_clk_rate_trace_test.o diff --git a/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/Kconfig b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/Kconfig new file mode 100644 index 000000000000..04b44cfff854 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/Kconfig @@ -0,0 +1,30 @@ +# +# (C) COPYRIGHT 2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +config CONFIG_MALI_CLK_RATE_TRACE_PORTAL + tristate "Mali GPU Clock Trace Test portal" + depends on MALI_MIDGARD && MALI_DEBUG && MALI_KUTF + default m + help + This option will build a test module mali_kutf_clk_rate_trace_test_portal + that can test the clocks integration into the platform and exercise some + basic trace test in the system. Choosing M here will generate a single + module called mali_kutf_clk_rate_trace_test_portal. diff --git a/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/Makefile b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/Makefile new file mode 100644 index 000000000000..71c78b84830c --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/Makefile @@ -0,0 +1,57 @@ +# +# (C) COPYRIGHT 2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +ifneq ($(KERNELRELEASE),) + +ccflags-y := \ + -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ + -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ + -I$(src)/../../include \ + -I$(src)/../../../../../../../include \ + -I$(src)/../../../../ \ + -I$(src)/../../../ \ + -I$(src)/../../../backend/gpu \ + -I$(src)/../../../debug \ + -I$(src)/../../../debug/backend \ + -I$(src)/ \ + -I$(srctree)/drivers/staging/android \ + -I$(srctree)/include/linux + +obj-m := mali_kutf_clk_rate_trace_test_portal.o +mali_kutf_clk_rate_trace_test_portal-y := mali_kutf_clk_rate_trace_test.o + +else +# linux build system bootstrap for out-of-tree module + +# default to building for the host +ARCH ?= $(shell uname -m) + +ifeq ($(KDIR),) +$(error Must specify KDIR to point to the kernel to target)) +endif + +all: + $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) KBUILD_EXTRA_SYMBOLS="$(CURDIR)/../../kutf/Module.symvers $(CURDIR)/../../../Module.symvers" modules + +clean: + $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean + +endif diff --git a/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/build.bp b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/build.bp new file mode 100644 index 000000000000..0cc2904db542 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/build.bp @@ -0,0 +1,34 @@ +/* + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +bob_kernel_module { + name: "mali_kutf_clk_rate_trace_test_portal", + defaults: [ + "mali_kbase_shared_config_defaults", + "kernel_test_includes", + ], + srcs: [ + "../mali_kutf_clk_rate_trace_test.h", + "Makefile", + "mali_kutf_clk_rate_trace_test.c", + ], + extra_symbols: [ + "mali_kbase", + "kutf", + ], + enabled: false, + base_build_kutf: { + enabled: true, + kbuild_options: ["CONFIG_MALI_CLK_RATE_TRACE_PORTAL=m"], + }, +} diff --git a/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c new file mode 100644 index 000000000000..d46666122df3 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c @@ -0,0 +1,886 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include + +#include +#include +#include +#include +#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) +#include +#else +#include +#endif +#include "mali_kbase.h" +#include "mali_kbase_irq_internal.h" +#include "mali_kbase_pm_internal.h" +#include "mali_kbase_clk_rate_trace_mgr.h" + +#include +#include +#include +#include + +#include "../mali_kutf_clk_rate_trace_test.h" + +#define MINOR_FOR_FIRST_KBASE_DEV (-1) + +/* KUTF test application pointer for this test */ +struct kutf_application *kutf_app; + +enum portal_server_state { + PORTAL_STATE_NO_CLK, + PORTAL_STATE_LIVE, + PORTAL_STATE_CLOSING, +}; + +/** + * struct clk_trace_snapshot - Trace info data on a clock. + * @previous_rate: Snapshot start point clock rate. + * @current_rate: End point clock rate. It becomes the start rate of the + * next trace snapshot. + * @rate_up_cnt: Count in the snapshot duration when the clock trace + * write is a rate of higher value than the last. + * @rate_down_cnt: Count in the snapshot duration when the clock trace write + * is a rate of lower value than the last. + */ +struct clk_trace_snapshot { + unsigned long previous_rate; + unsigned long current_rate; + u32 rate_up_cnt; + u32 rate_down_cnt; +}; + +/** + * struct kutf_clk_rate_trace_fixture_data - Fixture data for the test. + * @kbdev: kbase device for the GPU. + * @listener: Clock rate change listener structure. + * @invoke_notify: When true, invoke notify command is being executed. + * @snapshot: Clock trace update snapshot data array. A snapshot + * for each clock contains info accumulated beteen two + * GET_TRACE_SNAPSHOT requests. + * @nclks: Number of clocks visible to the trace portal. + * @pm_ctx_cnt: Net count of PM (Power Management) context INC/DEC + * PM_CTX_CNT requests made to the portal. On change from + * 0 to 1 (INC), or, 1 to 0 (DEC), a PM context action is + * triggered. + * @total_update_cnt: Total number of received trace write callbacks. + * @server_state: Portal server operational state. + * @result_msg: Message for the test result. + * @test_status: Portal test reslt status. + */ +struct kutf_clk_rate_trace_fixture_data { + struct kbase_device *kbdev; + struct kbase_clk_rate_listener listener; + bool invoke_notify; + struct clk_trace_snapshot snapshot[BASE_MAX_NR_CLOCKS_REGULATORS]; + unsigned int nclks; + unsigned int pm_ctx_cnt; + unsigned int total_update_cnt; + enum portal_server_state server_state; + char const *result_msg; + enum kutf_result_status test_status; +}; + +struct clk_trace_portal_input { + struct kutf_helper_named_val cmd_input; + enum kbasep_clk_rate_trace_req portal_cmd; + int named_val_err; +}; + +struct kbasep_cmd_name_pair { + enum kbasep_clk_rate_trace_req cmd; + const char *name; +}; + +struct kbasep_cmd_name_pair kbasep_portal_cmd_name_map[] = { + {PORTAL_CMD_GET_CLK_RATE_MGR, GET_CLK_RATE_MGR}, + {PORTAL_CMD_GET_CLK_RATE_TRACE, GET_CLK_RATE_TRACE}, + {PORTAL_CMD_GET_TRACE_SNAPSHOT, GET_TRACE_SNAPSHOT}, + {PORTAL_CMD_INC_PM_CTX_CNT, INC_PM_CTX_CNT}, + {PORTAL_CMD_DEC_PM_CTX_CNT, DEC_PM_CTX_CNT}, + {PORTAL_CMD_CLOSE_PORTAL, CLOSE_PORTAL}, + {PORTAL_CMD_INVOKE_NOTIFY_42KHZ, INVOKE_NOTIFY_42KHZ}, + }; + +/* Global pointer for the kutf_portal_trace_write() to use. When + * this pointer is engaged, new requests for create fixture will fail + * hence limiting the use of the portal at any time to a singleton. + */ +struct kutf_clk_rate_trace_fixture_data *g_ptr_portal_data; + +#define PORTAL_MSG_LEN (KUTF_MAX_LINE_LENGTH - MAX_REPLY_NAME_LEN) +static char portal_msg_buf[PORTAL_MSG_LEN]; + +static void kutf_portal_trace_write( + struct kbase_clk_rate_listener *listener, + u32 index, u32 new_rate) +{ + struct clk_trace_snapshot *snapshot; + struct kutf_clk_rate_trace_fixture_data *data = container_of( + listener, struct kutf_clk_rate_trace_fixture_data, listener); + + lockdep_assert_held(&data->kbdev->pm.clk_rtm.lock); + + if (WARN_ON(g_ptr_portal_data == NULL)) + return; + if (WARN_ON(index >= g_ptr_portal_data->nclks)) + return; + + /* This callback is triggered by invoke notify command, skipping */ + if (data->invoke_notify) + return; + + snapshot = &g_ptr_portal_data->snapshot[index]; + if (new_rate > snapshot->current_rate) + snapshot->rate_up_cnt++; + else + snapshot->rate_down_cnt++; + snapshot->current_rate = new_rate; + g_ptr_portal_data->total_update_cnt++; +} + +static void kutf_set_pm_ctx_active(struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + + if (WARN_ON(data->pm_ctx_cnt != 1)) + return; + + kbase_pm_context_active(data->kbdev); + kbase_pm_wait_for_desired_state(data->kbdev); + kbase_pm_request_gpu_cycle_counter(data->kbdev); +} + +static void kutf_set_pm_ctx_idle(struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + + if (WARN_ON(data->pm_ctx_cnt > 0)) + return; + + kbase_pm_context_idle(data->kbdev); + kbase_pm_release_gpu_cycle_counter(data->kbdev); +} + +static char const *kutf_clk_trace_do_change_pm_ctx(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + const unsigned int cnt = data->pm_ctx_cnt; + const enum kbasep_clk_rate_trace_req req = cmd->portal_cmd; + char const *errmsg = NULL; + + WARN_ON(req != PORTAL_CMD_INC_PM_CTX_CNT && + req != PORTAL_CMD_DEC_PM_CTX_CNT); + + if (req == PORTAL_CMD_INC_PM_CTX_CNT && cnt < UINT_MAX) { + data->pm_ctx_cnt++; + if (data->pm_ctx_cnt == 1) + kutf_set_pm_ctx_active(context); + } + + if (req == PORTAL_CMD_DEC_PM_CTX_CNT && cnt > 0) { + data->pm_ctx_cnt--; + if (data->pm_ctx_cnt == 0) + kutf_set_pm_ctx_idle(context); + } + + /* Skip the length check, no chance of overflow for two ints */ + snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending ack for adjusting pm_ctx_cnt\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending ack for adjusting pm_ctx_cnt"); + } + + return errmsg; +} + +static char const *kutf_clk_trace_do_get_rate(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + struct kbase_device *kbdev = data->kbdev; + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + unsigned long rate; + bool idle; + int ret; + int i; + char const *errmsg = NULL; + + WARN_ON((cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_MGR) && + (cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_TRACE)); + + ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, RATE:[", seq); + + for (i = 0; i < data->nclks; i++) { + spin_lock(&kbdev->pm.clk_rtm.lock); + if (cmd->portal_cmd == PORTAL_CMD_GET_CLK_RATE_MGR) + rate = kbdev->pm.clk_rtm.clks[i]->clock_val; + else + rate = data->snapshot[i].current_rate; + idle = kbdev->pm.clk_rtm.gpu_idle; + spin_unlock(&kbdev->pm.clk_rtm.lock); + + if ((i + 1) == data->nclks) + ret += snprintf(portal_msg_buf + ret, + PORTAL_MSG_LEN - ret, "0x%lx], GPU_IDLE:%d}", + rate, idle); + else + ret += snprintf(portal_msg_buf + ret, + PORTAL_MSG_LEN - ret, "0x%lx, ", rate); + + if (ret >= PORTAL_MSG_LEN) { + pr_warn("Message buf overflow with rate array data\n"); + return kutf_dsprintf(&context->fixture_pool, + "Message buf overflow with rate array data"); + } + } + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending back rate array\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending rate array"); + } + + return errmsg; +} + +/** + * kutf_clk_trace_do_get_snapshot() - Send back the current snapshot + * @context: KUTF context + * @cmd: The decoded portal input request + * + * The accumulated clock rate trace information is kept inside as an snapshot + * record. A user request of getting the snapshot marks the closure of the + * current snapshot record, and the start of the next one. The response + * message contains the current snapshot record, with each clock's + * data sequentially placed inside (array marker) [ ]. + */ +static char const *kutf_clk_trace_do_get_snapshot(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + struct clk_trace_snapshot snapshot; + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + int ret; + int i; + char const *fmt; + char const *errmsg = NULL; + + WARN_ON(cmd->portal_cmd != PORTAL_CMD_GET_TRACE_SNAPSHOT); + + ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, SNAPSHOT_ARRAY:[", seq); + + for (i = 0; i < data->nclks; i++) { + spin_lock(&data->kbdev->pm.clk_rtm.lock); + /* copy out the snapshot of the clock */ + snapshot = data->snapshot[i]; + /* Set the next snapshot start condition */ + data->snapshot[i].previous_rate = snapshot.current_rate; + data->snapshot[i].rate_up_cnt = 0; + data->snapshot[i].rate_down_cnt = 0; + spin_unlock(&data->kbdev->pm.clk_rtm.lock); + + /* Check i corresponding to the last clock */ + if ((i + 1) == data->nclks) + fmt = "(0x%lx, 0x%lx, %u, %u)]}"; + else + fmt = "(0x%lx, 0x%lx, %u, %u), "; + ret += snprintf(portal_msg_buf + ret, PORTAL_MSG_LEN - ret, + fmt, snapshot.previous_rate, snapshot.current_rate, + snapshot.rate_up_cnt, snapshot.rate_down_cnt); + if (ret >= PORTAL_MSG_LEN) { + pr_warn("Message buf overflow with snapshot data\n"); + return kutf_dsprintf(&context->fixture_pool, + "Message buf overflow with snapshot data"); + } + } + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending back snapshot array\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending snapshot array"); + } + + return errmsg; +} + +/** + * kutf_clk_trace_do_invoke_notify_42k() - Invokes the stored notification callback + * @context: KUTF context + * @cmd: The decoded portal input request + * + * Invokes frequency change notification callbacks with a fake + * GPU frequency 42 kHz for the top clock domain. + */ +static char const *kutf_clk_trace_do_invoke_notify_42k( + struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + const unsigned long new_rate_hz = 42000; + int ret; + char const *errmsg = NULL; + struct kbase_clk_rate_trace_manager *clk_rtm = &data->kbdev->pm.clk_rtm; + + WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVOKE_NOTIFY_42KHZ); + + spin_lock(&clk_rtm->lock); + + data->invoke_notify = true; + kbase_clk_rate_trace_manager_notify_all( + clk_rtm, 0, new_rate_hz); + data->invoke_notify = false; + + spin_unlock(&clk_rtm->lock); + + ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, HZ:%lu}", seq, new_rate_hz); + + if (ret >= PORTAL_MSG_LEN) { + pr_warn("Message buf overflow with invoked data\n"); + return kutf_dsprintf(&context->fixture_pool, + "Message buf overflow with invoked data"); + } + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending ack for " INVOKE_NOTIFY_42KHZ "request\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending ack for " INVOKE_NOTIFY_42KHZ "request"); + } + + return errmsg; +} + +static char const *kutf_clk_trace_do_close_portal(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + char const *errmsg = NULL; + + WARN_ON(cmd->portal_cmd != PORTAL_CMD_CLOSE_PORTAL); + + data->server_state = PORTAL_STATE_CLOSING; + + /* Skip the length check, no chance of overflow for two ints */ + snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending ack for " CLOSE_PORTAL "reuquest\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending ack for " CLOSE_PORTAL "reuquest"); + } + + return errmsg; +} + +static bool kutf_clk_trace_dequeue_portal_cmd(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + int i; + int err = kutf_helper_receive_named_val(context, &cmd->cmd_input); + + cmd->named_val_err = err; + if (err == KUTF_HELPER_ERR_NONE && + cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { + /* All portal request commands are of format (named u64): + * CMD_NAME=1234 + * where, 1234 is a (variable) sequence number tag. + */ + for (i = 0; i < PORTAL_TOTAL_CMDS; i++) { + if (strcmp(cmd->cmd_input.val_name, + kbasep_portal_cmd_name_map[i].name)) + continue; + + cmd->portal_cmd = kbasep_portal_cmd_name_map[i].cmd; + return true; + } + } + + cmd->portal_cmd = PORTAL_CMD_INVALID; + return false; +} + +static void kutf_clk_trace_flag_result(struct kutf_context *context, + enum kutf_result_status result, char const *msg) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + + if (result > data->test_status) { + data->test_status = result; + if (msg) + data->result_msg = msg; + if (data->server_state == PORTAL_STATE_LIVE && + result > KUTF_RESULT_WARN) { + data->server_state = PORTAL_STATE_CLOSING; + } + } +} + +static bool kutf_clk_trace_process_portal_cmd(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + char const *errmsg = NULL; + + BUILD_BUG_ON(ARRAY_SIZE(kbasep_portal_cmd_name_map) != + PORTAL_TOTAL_CMDS); + WARN_ON(cmd->portal_cmd == PORTAL_CMD_INVALID); + + switch (cmd->portal_cmd) { + case PORTAL_CMD_GET_CLK_RATE_MGR: + /* Fall through */ + case PORTAL_CMD_GET_CLK_RATE_TRACE: + errmsg = kutf_clk_trace_do_get_rate(context, cmd); + break; + case PORTAL_CMD_GET_TRACE_SNAPSHOT: + errmsg = kutf_clk_trace_do_get_snapshot(context, cmd); + break; + case PORTAL_CMD_INC_PM_CTX_CNT: + /* Fall through */ + case PORTAL_CMD_DEC_PM_CTX_CNT: + errmsg = kutf_clk_trace_do_change_pm_ctx(context, cmd); + break; + case PORTAL_CMD_CLOSE_PORTAL: + errmsg = kutf_clk_trace_do_close_portal(context, cmd); + break; + case PORTAL_CMD_INVOKE_NOTIFY_42KHZ: + errmsg = kutf_clk_trace_do_invoke_notify_42k(context, cmd); + break; + default: + pr_warn("Don't know how to handle portal_cmd: %d, abort session.\n", + cmd->portal_cmd); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Don't know how to handle portal_cmd: %d", + cmd->portal_cmd); + break; + } + + if (errmsg) + kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg); + + return (errmsg == NULL); +} + +/** + * kutf_clk_trace_do_nack_response() - respond a NACK to erroneous input + * @context: KUTF context + * @cmd: The erroneous input request + * + * This function deal with an erroneous input request, and respond with + * a proper 'NACK' message. + */ +static int kutf_clk_trace_do_nack_response(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + int seq; + int err; + char const *errmsg = NULL; + + WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVALID); + + if (cmd->named_val_err == KUTF_HELPER_ERR_NONE && + cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { + /* Keep seq number as % 256 */ + seq = cmd->cmd_input.u.val_u64 & 255; + snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, MSG: Unknown command '%s'.}", seq, + cmd->cmd_input.val_name); + err = kutf_helper_send_named_str(context, "NACK", + portal_msg_buf); + } else + err = kutf_helper_send_named_str(context, "NACK", + "Wrong portal cmd format (Ref example: CMD_NAME=0X16)"); + + if (err) { + errmsg = kutf_dsprintf(&context->fixture_pool, + "Failed to send portal NACK response"); + kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg); + } + + return err; +} + +/** + * kutf_clk_trace_barebone_check() - Sanity test on the clock tracing + * @context: KUTF context + * + * This function carries out some basic test on the tracing operation: + * 1). GPU idle on test start, trace rate should be 0 (low power state) + * 2). Make sure GPU is powered up, the trace rate should match + * that from the clcok manager's internal recorded rate + * 3). If the GPU active transition occurs following 2), there + * must be rate change event from tracing. + */ +void kutf_clk_trace_barebone_check(struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + struct kbase_device *kbdev = data->kbdev; + bool fail = false; + bool idle[2] = { false }; + char const *msg = NULL; + int i; + + /* Check consistency if gpu happens to be idle */ + spin_lock(&kbdev->pm.clk_rtm.lock); + idle[0] = kbdev->pm.clk_rtm.gpu_idle; + if (kbdev->pm.clk_rtm.gpu_idle) { + for (i = 0; i < data->nclks; i++) { + if (data->snapshot[i].current_rate) { + /* Idle should have a rate 0 */ + fail = true; + break; + } + } + } + spin_unlock(&kbdev->pm.clk_rtm.lock); + if (fail) { + msg = kutf_dsprintf(&context->fixture_pool, + "GPU Idle not yielding 0-rate"); + pr_err("Trace did not see idle rate\n"); + } else { + /* Make local PM active if not done so yet */ + if (data->pm_ctx_cnt == 0) { + /* Ensure the GPU is powered */ + data->pm_ctx_cnt++; + kutf_set_pm_ctx_active(context); + } + /* Checking the rate is consistent */ + spin_lock(&kbdev->pm.clk_rtm.lock); + idle[1] = kbdev->pm.clk_rtm.gpu_idle; + for (i = 0; i < data->nclks; i++) { + /* Rate match between the manager and the trace */ + if (kbdev->pm.clk_rtm.clks[i]->clock_val != + data->snapshot[i].current_rate) { + fail = true; + break; + } + } + spin_unlock(&kbdev->pm.clk_rtm.lock); + + if (idle[1]) { + msg = kutf_dsprintf(&context->fixture_pool, + "GPU still idle after set_pm_ctx_active"); + pr_err("GPU still idle after set_pm_ctx_active\n"); + } + + if (!msg && fail) { + msg = kutf_dsprintf(&context->fixture_pool, + "Trace rate not matching Clk manager's read"); + pr_err("Trace rate not matching Clk manager's read\n"); + } + } + + if (!msg && idle[0] && !idle[1] && !data->total_update_cnt) { + msg = kutf_dsprintf(&context->fixture_pool, + "Trace update did not occur"); + pr_err("Trace update did not occur\n"); + } + if (msg) + kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, msg); + else if (!data->total_update_cnt) { + msg = kutf_dsprintf(&context->fixture_pool, + "No trace update seen during the test!"); + kutf_clk_trace_flag_result(context, KUTF_RESULT_WARN, msg); + } +} + +static bool kutf_clk_trace_end_of_stream(struct clk_trace_portal_input *cmd) +{ + return (cmd->named_val_err == -EBUSY); +} + +void kutf_clk_trace_no_clks_dummy(struct kutf_context *context) +{ + struct clk_trace_portal_input cmd; + unsigned long timeout = jiffies + HZ * 2; + bool has_cmd; + + while (time_before(jiffies, timeout)) { + if (kutf_helper_pending_input(context)) { + has_cmd = kutf_clk_trace_dequeue_portal_cmd(context, + &cmd); + if (!has_cmd && kutf_clk_trace_end_of_stream(&cmd)) + break; + + kutf_helper_send_named_str(context, "NACK", + "Fatal! No clocks visible, aborting"); + } + msleep(20); + } + + kutf_clk_trace_flag_result(context, KUTF_RESULT_FATAL, + "No clocks visble to the portal"); +} + +/** + * mali_kutf_clk_rate_trace_test_portal() - Service portal input + * @context: KUTF context + * + * The test portal operates on input requests. If the input request is one + * of the recognized portal commands, it handles it accordingly. Otherwise + * a negative response 'NACK' is returned. The portal service terminates + * when a 'CLOSE_PORTAL' request is received, or due to an internal error. + * Both case would result in the server_state transitioned to CLOSING. + * + * If the portal is closed on request, a sanity test on the clock rate + * trace operation is undertaken via function: + * kutf_clk_trace_barebone_check(); + */ +static void mali_kutf_clk_rate_trace_test_portal(struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + struct clk_trace_portal_input new_cmd; + + pr_debug("Test portal service start\n"); + + while (data->server_state == PORTAL_STATE_LIVE) { + if (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd)) + kutf_clk_trace_process_portal_cmd(context, &new_cmd); + else if (kutf_clk_trace_end_of_stream(&new_cmd)) + /* Dequeue on portal input, end of stream */ + data->server_state = PORTAL_STATE_CLOSING; + else + kutf_clk_trace_do_nack_response(context, &new_cmd); + } + + /* Closing, exhausting all the pending inputs with NACKs. */ + if (data->server_state == PORTAL_STATE_CLOSING) { + while (kutf_helper_pending_input(context) && + (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd) || + !kutf_clk_trace_end_of_stream(&new_cmd))) { + kutf_helper_send_named_str(context, "NACK", + "Portal closing down"); + } + } + + /* If no portal error, do a barebone test here irrespective + * whatever the portal live session has been testing, which + * is entirely driven by the user-side via portal requests. + */ + if (data->test_status <= KUTF_RESULT_WARN) { + if (data->server_state != PORTAL_STATE_NO_CLK) + kutf_clk_trace_barebone_check(context); + else { + /* No clocks case, NACK 2-sec for the fatal situation */ + kutf_clk_trace_no_clks_dummy(context); + } + } + + /* If we have changed pm_ctx count, drop it back */ + if (data->pm_ctx_cnt) { + /* Although we count on portal requests, it only has material + * impact when from 0 -> 1. So the reverse is a simple one off. + */ + data->pm_ctx_cnt = 0; + kutf_set_pm_ctx_idle(context); + } + + /* Finally log the test result line */ + if (data->test_status < KUTF_RESULT_WARN) + kutf_test_pass(context, data->result_msg); + else if (data->test_status == KUTF_RESULT_WARN) + kutf_test_warn(context, data->result_msg); + else if (data->test_status == KUTF_RESULT_FATAL) + kutf_test_fatal(context, data->result_msg); + else + kutf_test_fail(context, data->result_msg); + + pr_debug("Test end\n"); +} + +/** + * mali_kutf_clk_rate_trace_create_fixture() - Creates the fixture data + * required for mali_kutf_clk_rate_trace_test_portal. + * @context: KUTF context. + * + * Return: Fixture data created on success or NULL on failure + */ +static void *mali_kutf_clk_rate_trace_create_fixture( + struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data; + struct kbase_device *kbdev; + unsigned long rate; + int i; + + /* Acquire the kbase device */ + pr_debug("Finding device\n"); + kbdev = kbase_find_device(MINOR_FOR_FIRST_KBASE_DEV); + if (kbdev == NULL) { + kutf_test_fail(context, "Failed to find kbase device"); + return NULL; + } + + pr_debug("Creating fixture\n"); + data = kutf_mempool_alloc(&context->fixture_pool, + sizeof(struct kutf_clk_rate_trace_fixture_data)); + if (!data) + return NULL; + + *data = (const struct kutf_clk_rate_trace_fixture_data) { 0 }; + pr_debug("Hooking up the test portal to kbdev clk rate trace\n"); + spin_lock(&kbdev->pm.clk_rtm.lock); + + if (g_ptr_portal_data != NULL) { + pr_warn("Test portal is already in use, run aborted\n"); + kutf_test_fail(context, "Portal allows single session only"); + spin_unlock(&kbdev->pm.clk_rtm.lock); + return NULL; + } + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + if (kbdev->pm.clk_rtm.clks[i]) { + data->nclks++; + if (kbdev->pm.clk_rtm.gpu_idle) + rate = 0; + else + rate = kbdev->pm.clk_rtm.clks[i]->clock_val; + data->snapshot[i].previous_rate = rate; + data->snapshot[i].current_rate = rate; + } + } + + spin_unlock(&kbdev->pm.clk_rtm.lock); + + if (data->nclks) { + /* Subscribe this test server portal */ + data->listener.notify = kutf_portal_trace_write; + data->invoke_notify = false; + + kbase_clk_rate_trace_manager_subscribe( + &kbdev->pm.clk_rtm, &data->listener); + /* Update the kutf_server_portal fixture_data pointer */ + g_ptr_portal_data = data; + } + + data->kbdev = kbdev; + data->result_msg = NULL; + data->test_status = KUTF_RESULT_PASS; + + if (data->nclks == 0) { + data->server_state = PORTAL_STATE_NO_CLK; + pr_debug("Kbdev has no clocks for rate trace"); + } else + data->server_state = PORTAL_STATE_LIVE; + + pr_debug("Created fixture\n"); + + return data; +} + +/** + * Destroy fixture data previously created by + * mali_kutf_clk_rate_trace_create_fixture. + * + * @context: KUTF context. + */ +static void mali_kutf_clk_rate_trace_remove_fixture( + struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + struct kbase_device *kbdev = data->kbdev; + + if (data->nclks) { + /* Clean up the portal trace write arrangement */ + g_ptr_portal_data = NULL; + + kbase_clk_rate_trace_manager_unsubscribe( + &kbdev->pm.clk_rtm, &data->listener); + } + pr_debug("Destroying fixture\n"); + kbase_release_device(kbdev); + pr_debug("Destroyed fixture\n"); +} + +/** + * mali_kutf_clk_rate_trace_test_module_init() - Entry point for test mdoule. + */ +int mali_kutf_clk_rate_trace_test_module_init(void) +{ + struct kutf_suite *suite; + unsigned int filters; + union kutf_callback_data suite_data = { 0 }; + + pr_debug("Creating app\n"); + + g_ptr_portal_data = NULL; + kutf_app = kutf_create_application(CLK_RATE_TRACE_APP_NAME); + + if (!kutf_app) { + pr_warn("Creation of app " CLK_RATE_TRACE_APP_NAME + " failed!\n"); + return -ENOMEM; + } + + pr_debug("Create suite %s\n", CLK_RATE_TRACE_SUITE_NAME); + suite = kutf_create_suite_with_filters_and_data( + kutf_app, CLK_RATE_TRACE_SUITE_NAME, 1, + mali_kutf_clk_rate_trace_create_fixture, + mali_kutf_clk_rate_trace_remove_fixture, + KUTF_F_TEST_GENERIC, + suite_data); + + if (!suite) { + pr_warn("Creation of suite %s failed!\n", + CLK_RATE_TRACE_SUITE_NAME); + kutf_destroy_application(kutf_app); + return -ENOMEM; + } + + filters = suite->suite_default_flags; + kutf_add_test_with_filters( + suite, 0x0, CLK_RATE_TRACE_PORTAL, + mali_kutf_clk_rate_trace_test_portal, + filters); + + pr_debug("Init complete\n"); + return 0; +} + +/** + * mali_kutf_clk_rate_trace_test_module_exit() - Module exit point for this + * test. + */ +void mali_kutf_clk_rate_trace_test_module_exit(void) +{ + pr_debug("Exit start\n"); + kutf_destroy_application(kutf_app); + pr_debug("Exit complete\n"); +} + + +module_init(mali_kutf_clk_rate_trace_test_module_init); +module_exit(mali_kutf_clk_rate_trace_test_module_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h new file mode 100644 index 000000000000..f46afd5086bd --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h @@ -0,0 +1,148 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KUTF_CLK_RATE_TRACE_TEST_H_ +#define _KUTF_CLK_RATE_TRACE_TEST_H_ + +#define CLK_RATE_TRACE_APP_NAME "clk_rate_trace" +#define CLK_RATE_TRACE_SUITE_NAME "rate_trace" +#define CLK_RATE_TRACE_PORTAL "portal" + +/** + * enum kbasep_clk_rate_trace_req - request command to the clock rate trace + * service portal. + * + * @PORTAL_CMD_GET_CLK_RATE_MGR: Request the clock trace manager internal + * data record. On a positive acknowledgement + * the prevailing clock rates and the GPU idle + * condition flag are returned. + * @PORTAL_CMD_GET_CLK_RATE_TRACE: Request the clock trace portal to return its + * data record. On a positive acknowledgement + * the last trace recorded clock rates and the + * GPU idle condition flag are returned. + * @PORTAL_CMD_GET_TRACE_SNAPSHOT: Request the clock trace portal to return its + * current snapshot data record. On a positive + * acknowledgement the snapshot array matching + * the number of clocks are returned. It also + * starts a fresh snapshot inside the clock + * trace portal. + * @PORTAL_CMD_INC_PM_CTX_CNT: Request the clock trace portal to increase + * its internal PM_CTX_COUNT. If this increase + * yielded a count of 0 -> 1 change, the portal + * will initiate a PM_CTX_ACTIVE call to the + * Kbase power management. Futher increase + * requests will limit to only affect the + * portal internal count value. + * @PORTAL_CMD_DEC_PM_CTX_CNT: Request the clock trace portal to decrease + * its internal PM_CTX_COUNT. If this decrease + * yielded a count of 1 -> 0 change, the portal + * will initiate a PM_CTX_IDLE call to the + * Kbase power management. + * @PORTAL_CMD_CLOSE_PORTAL: Inform the clock trace portal service the + * client has completed its session. The portal + * will start the close down action. If no + * error has occurred during the dynamic + * interactive session, an inherent basic test + * carrying out some sanity check on the clock + * trace is undertaken. + * @PORTAL_CMD_INVOKE_NOTIFY_42KHZ: Invokes all clock rate trace manager callbacks + * for the top clock domain with a new GPU frequency + * set to 42 kHZ. + * @PORTAL_CMD_INVALID: Valid commands termination marker. Must be + * the highest enumeration value, as it + * represents valid command array size. + * @PORTAL_TOTAL_CMDS: Alias of PORTAL_CMD_INVALID. + */ +/* PORTAL_CMD_INVALID must be the last one, serving the size */ +enum kbasep_clk_rate_trace_req { + PORTAL_CMD_GET_CLK_RATE_MGR, + PORTAL_CMD_GET_CLK_RATE_TRACE, + PORTAL_CMD_GET_TRACE_SNAPSHOT, + PORTAL_CMD_INC_PM_CTX_CNT, + PORTAL_CMD_DEC_PM_CTX_CNT, + PORTAL_CMD_CLOSE_PORTAL, + PORTAL_CMD_INVOKE_NOTIFY_42KHZ, + PORTAL_CMD_INVALID, + PORTAL_TOTAL_CMDS = PORTAL_CMD_INVALID, +}; + +/** + * Portal service request command names. The portal request consists of a kutf + * named u64-value. For those above enumerated PORTAL_CMD, the names defined + * here are used to mark the name and then followed with a sequence number + * value. Example (manual script here for illustration): + * exec 5<>run # open the portal kutf run as fd-5 + * echo GET_CLK_RATE_MGR=1 >&5 # send the cmd and sequence number 1 + * head -n 1 <&5 # read back the 1-line server reseponse + * ACK="{SEQ:1, RATE:[0x1ad27480], GPU_IDLE:1}" # response string + * echo GET_TRACE_SNAPSHOT=1 >&5 # send the cmd and sequence number 1 + * head -n 1 <&5 # read back the 1-line server reseponse + * ACK="{SEQ:1, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}" + * echo CLOSE_PORTAL=1 >&5 # close the portal + * cat <&5 # read back all the response lines + * ACK="{SEQ:1, PM_CTX_CNT:0}" # response to close command + * KUTF_RESULT_PASS:(explicit pass) # internal sanity test passed. + * exec 5>&- # close the service portal fd. + * + * Expected request command return format: + * GET_CLK_RATE_MGR: ACK="{SEQ:12, RATE:[1080, 1280], GPU_IDLE:1}" + * Note, the above contains 2-clock with rates in [], GPU idle + * GET_CLK_RATE_TRACE: ACK="{SEQ:6, RATE:[0x1ad27480], GPU_IDLE:0}" + * Note, 1-clock with rate in [], GPU not idle + * GET_TRACE_SNAPSHOT: ACK="{SEQ:8, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}" + * Note, 1-clock, (start_rate : 0, last_rate : 0x1ad27480, + * trace_rate_up_count: 1, trace_rate_down_count : 0) + * For the specific sample case here, there is a single rate_trace event + * that yielded a rate increase change. No rate drop event recorded in the + * reporting snapshot duration. + * INC_PM_CTX_CNT: ACK="{SEQ:1, PM_CTX_CNT:1}" + * Note, after the increment, M_CTX_CNT is 1. (i.e. 0 -> 1) + * DEC_PM_CTX_CNT: ACK="{SEQ:3, PM_CTX_CNT:0}" + * Note, after the decrement, PM_CTX_CNT is 0. (i.e. 1 -> 0) + * CLOSE_PORTAL: ACK="{SEQ:1, PM_CTX_CNT:1}" + * Note, at the close, PM_CTX_CNT is 1. The PM_CTX_CNT will internally be + * dropped down to 0 as part of the portal close clean up. + */ +#define GET_CLK_RATE_MGR "GET_CLK_RATE_MGR" +#define GET_CLK_RATE_TRACE "GET_CLK_RATE_TRACE" +#define GET_TRACE_SNAPSHOT "GET_TRACE_SNAPSHOT" +#define INC_PM_CTX_CNT "INC_PM_CTX_CNT" +#define DEC_PM_CTX_CNT "DEC_PM_CTX_CNT" +#define CLOSE_PORTAL "CLOSE_PORTAL" +#define INVOKE_NOTIFY_42KHZ "INVOKE_NOTIFY_42KHZ" + +/** + * Portal service response tag names. The response consists of a kutf + * named string-value. In case of a 'NACK' (negative acknowledgement), it + * can be one of the two formats: + * 1. NACK="{SEQ:2, MSG:xyzed}" # NACK on command with sequence tag-2. + * Note, the portal has received a valid name and valid sequence number + * but can't carry-out the request, reason in the MSG field. + * 2. NACK="Failing-message" + * Note, unable to parse a valid name or valid sequence number, + * or some internal error condition. Reason in the quoted string. + */ +#define ACK "ACK" +#define NACK "NACK" +#define MAX_REPLY_NAME_LEN 32 + +#endif /* _KUTF_CLK_RATE_TRACE_TEST_H_ */ diff --git a/drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/Kbuild b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/Kbuild new file mode 100644 index 000000000000..ca8c51273b4c --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/Kbuild @@ -0,0 +1,26 @@ +# +# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android + +obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test.o + +mali_kutf_irq_test-y := mali_kutf_irq_test_main.o diff --git a/drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/Kconfig b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/Kconfig new file mode 100644 index 000000000000..4a3863afc9bf --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/Kconfig @@ -0,0 +1,29 @@ +# +# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +config MALI_IRQ_LATENCY + tristate "Mali GPU IRQ latency measurement" + depends on MALI_MIDGARD && MALI_DEBUG && MALI_KUTF + default m + help + This option will build a test module mali_kutf_irq_test that + can determine the latency of the Mali GPU IRQ on your system. + Choosing M here will generate a single module called mali_kutf_irq_test. diff --git a/drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/Makefile b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/Makefile new file mode 100644 index 000000000000..bc4d654a90ca --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/Makefile @@ -0,0 +1,51 @@ +# +# (C) COPYRIGHT 2015, 2017-2018, 2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +# linux build system bootstrap for out-of-tree module + +# default to building for the host +ARCH ?= $(shell uname -m) + +ifeq ($(KDIR),) +$(error Must specify KDIR to point to the kernel to target)) +endif + +TEST_CCFLAGS := \ + -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ + -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ + -DMALI_USE_CSF=$(MALI_USE_CSF) \ + $(SCONS_CFLAGS) \ + -I$(CURDIR)/../include \ + -I$(CURDIR)/../../../../../../include \ + -I$(CURDIR)/../../../ \ + -I$(CURDIR)/../../ \ + -I$(CURDIR)/../../backend/gpu \ + -I$(CURDIR)/../../debug \ + -I$(CURDIR)/../../debug/backend \ + -I$(CURDIR)/ \ + -I$(srctree)/drivers/staging/android \ + -I$(srctree)/include/linux + +all: + $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS="$(TEST_CCFLAGS)" KBUILD_EXTRA_SYMBOLS="$(CURDIR)/../kutf/Module.symvers $(CURDIR)/../../Module.symvers" modules + +clean: + $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean diff --git a/drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/build.bp b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/build.bp new file mode 100644 index 000000000000..90efdcf9ad9c --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/build.bp @@ -0,0 +1,35 @@ +/* + * + * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +bob_kernel_module { + name: "mali_kutf_irq_test", + defaults: [ + "mali_kbase_shared_config_defaults", + "kernel_test_includes", + ], + srcs: [ + "Kbuild", + "mali_kutf_irq_test_main.c", + ], + extra_symbols: [ + "mali_kbase", + "kutf", + ], + enabled: false, + base_build_kutf: { + enabled: true, + kbuild_options: ["CONFIG_MALI_IRQ_LATENCY=m"], + }, +} diff --git a/drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c new file mode 100644 index 000000000000..57ac590e4a76 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c @@ -0,0 +1,278 @@ +/* + * + * (C) COPYRIGHT 2016-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include + +#include "mali_kbase.h" +#include +#include + +#include +#include + +/* + * This file contains the code which is used for measuring interrupt latency + * of the Mali GPU IRQ. In particular, function mali_kutf_irq_latency() is + * used with this purpose and it is called within KUTF framework - a kernel + * unit test framework. The measured latency provided by this test should + * be representative for the latency of the Mali JOB/MMU IRQs as well. + */ + +/* KUTF test application pointer for this test */ +struct kutf_application *irq_app; + +/** + * struct kutf_irq_fixture data - test fixture used by the test functions. + * @kbdev: kbase device for the GPU. + * + */ +struct kutf_irq_fixture_data { + struct kbase_device *kbdev; +}; + +#define SEC_TO_NANO(s) ((s)*1000000000LL) + +/* ID for the GPU IRQ */ +#define GPU_IRQ_HANDLER 2 + +#define NR_TEST_IRQS ((u32)1000000) + +/* IRQ for the test to trigger. Currently POWER_CHANGED_SINGLE as it is + * otherwise unused in the DDK + */ +#define TEST_IRQ POWER_CHANGED_SINGLE + +#define IRQ_TIMEOUT HZ + +/* Kernel API for setting irq throttle hook callback and irq time in us*/ +extern int kbase_set_custom_irq_handler(struct kbase_device *kbdev, + irq_handler_t custom_handler, + int irq_type); +extern irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val); + +static DECLARE_WAIT_QUEUE_HEAD(wait); +static bool triggered; +static u64 irq_time; + +static void *kbase_untag(void *ptr) +{ + return (void *)(((uintptr_t) ptr) & ~3); +} + +/** + * kbase_gpu_irq_custom_handler - Custom IRQ throttle handler + * @irq: IRQ number + * @data: Data associated with this IRQ + * + * Return: state of the IRQ + */ +static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data) +{ + struct kbase_device *kbdev = kbase_untag(data); + u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); + irqreturn_t result; + u64 tval; + bool has_test_irq = val & TEST_IRQ; + + if (has_test_irq) { + tval = ktime_get_real_ns(); + /* Clear the test source only here */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), + TEST_IRQ); + /* Remove the test IRQ status bit */ + val = val ^ TEST_IRQ; + } + + result = kbase_gpu_irq_test_handler(irq, data, val); + + if (has_test_irq) { + irq_time = tval; + triggered = true; + wake_up(&wait); + result = IRQ_HANDLED; + } + + return result; +} + +/** + * mali_kutf_irq_default_create_fixture() - Creates the fixture data required + * for all the tests in the irq suite. + * @context: KUTF context. + * + * Return: Fixture data created on success or NULL on failure + */ +static void *mali_kutf_irq_default_create_fixture( + struct kutf_context *context) +{ + struct kutf_irq_fixture_data *data; + + data = kutf_mempool_alloc(&context->fixture_pool, + sizeof(struct kutf_irq_fixture_data)); + + if (!data) + goto fail; + + /* Acquire the kbase device */ + data->kbdev = kbase_find_device(-1); + if (data->kbdev == NULL) { + kutf_test_fail(context, "Failed to find kbase device"); + goto fail; + } + + return data; + +fail: + return NULL; +} + +/** + * mali_kutf_irq_default_remove_fixture() - Destroy fixture data previously + * created by mali_kutf_irq_default_create_fixture. + * + * @context: KUTF context. + */ +static void mali_kutf_irq_default_remove_fixture( + struct kutf_context *context) +{ + struct kutf_irq_fixture_data *data = context->fixture; + struct kbase_device *kbdev = data->kbdev; + + kbase_release_device(kbdev); +} + +/** + * mali_kutf_irq_latency() - measure GPU IRQ latency + * @context: kutf context within which to perform the test + * + * The test triggers IRQs manually, and measures the + * time between triggering the IRQ and the IRQ handler being executed. + * + * This is not a traditional test, in that the pass/fail status has little + * meaning (other than indicating that the IRQ handler executed at all). Instead + * the results are in the latencies provided with the test result. There is no + * meaningful pass/fail result that can be obtained here, instead the latencies + * are provided for manual analysis only. + */ +static void mali_kutf_irq_latency(struct kutf_context *context) +{ + struct kutf_irq_fixture_data *data = context->fixture; + struct kbase_device *kbdev = data->kbdev; + u64 min_time = U64_MAX, max_time = 0, average_time = 0; + u32 i; + const char *results; + + /* Force GPU to be powered */ + kbase_pm_context_active(kbdev); + kbase_pm_wait_for_desired_state(kbdev); + + kbase_set_custom_irq_handler(kbdev, kbase_gpu_irq_custom_handler, + GPU_IRQ_HANDLER); + + for (i = 1; i <= NR_TEST_IRQS; i++) { + u64 start_time = ktime_get_real_ns(); + + triggered = false; + + /* Trigger fake IRQ */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), + TEST_IRQ); + + if (wait_event_timeout(wait, triggered, IRQ_TIMEOUT) == 0) { + /* Wait extra time to see if it would come */ + wait_event_timeout(wait, triggered, 10 * IRQ_TIMEOUT); + break; + } + + if ((irq_time - start_time) < min_time) + min_time = irq_time - start_time; + if ((irq_time - start_time) > max_time) + max_time = irq_time - start_time; + average_time += irq_time - start_time; + + udelay(10); + } + + /* Go back to default handler */ + kbase_set_custom_irq_handler(kbdev, NULL, GPU_IRQ_HANDLER); + + kbase_pm_context_idle(kbdev); + + if (i > NR_TEST_IRQS) { + do_div(average_time, NR_TEST_IRQS); + results = kutf_dsprintf(&context->fixture_pool, + "Min latency = %lldns, Max latency = %lldns, Average latency = %lldns\n", + min_time, max_time, average_time); + kutf_test_pass(context, results); + } else { + results = kutf_dsprintf(&context->fixture_pool, + "Timed out for the %u-th IRQ (loop_limit: %u), triggered late: %d\n", + i, NR_TEST_IRQS, triggered); + kutf_test_fail(context, results); + } +} + +/** + * Module entry point for this test. + */ +int mali_kutf_irq_test_main_init(void) +{ + struct kutf_suite *suite; + + irq_app = kutf_create_application("irq"); + + if (NULL == irq_app) { + pr_warn("Creation of test application failed!\n"); + return -ENOMEM; + } + + suite = kutf_create_suite(irq_app, "irq_default", + 1, mali_kutf_irq_default_create_fixture, + mali_kutf_irq_default_remove_fixture); + + if (NULL == suite) { + pr_warn("Creation of test suite failed!\n"); + kutf_destroy_application(irq_app); + return -ENOMEM; + } + + kutf_add_test(suite, 0x0, "irq_latency", + mali_kutf_irq_latency); + return 0; +} + +/** + * Module exit point for this test. + */ +void mali_kutf_irq_test_main_exit(void) +{ + kutf_destroy_application(irq_app); +} + +module_init(mali_kutf_irq_test_main_init); +module_exit(mali_kutf_irq_test_main_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("ARM Ltd."); +MODULE_VERSION("1.0"); diff --git a/drivers/gpu/arm/b_r26p0/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/b_r26p0/thirdparty/mali_kbase_mmap.c new file mode 100644 index 000000000000..f266d8e6f5de --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/thirdparty/mali_kbase_mmap.c @@ -0,0 +1,366 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "linux/mman.h" +#include "../mali_kbase.h" + +/* mali_kbase_mmap.c + * + * This file contains Linux specific implementation of + * kbase_context_get_unmapped_area() interface. + */ + + +/** + * align_and_check() - Align the specified pointer to the provided alignment and + * check that it is still in range. + * @gap_end: Highest possible start address for allocation (end of gap in + * address space) + * @gap_start: Start address of current memory area / gap in address space + * @info: vm_unmapped_area_info structure passed to caller, containing + * alignment, length and limits for the allocation + * @is_shader_code: True if the allocation is for shader code (which has + * additional alignment requirements) + * @is_same_4gb_page: True if the allocation needs to reside completely within + * a 4GB chunk + * + * Return: true if gap_end is now aligned correctly and is still in range, + * false otherwise + */ +static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, + struct vm_unmapped_area_info *info, bool is_shader_code, + bool is_same_4gb_page) +{ + /* Compute highest gap address at the desired alignment */ + (*gap_end) -= info->length; + (*gap_end) -= (*gap_end - info->align_offset) & info->align_mask; + + if (is_shader_code) { + /* Check for 4GB boundary */ + if (0 == (*gap_end & BASE_MEM_MASK_4GB)) + (*gap_end) -= (info->align_offset ? info->align_offset : + info->length); + if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) + (*gap_end) -= (info->align_offset ? info->align_offset : + info->length); + + if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end + + info->length) & BASE_MEM_MASK_4GB)) + return false; + } else if (is_same_4gb_page) { + unsigned long start = *gap_end; + unsigned long end = *gap_end + info->length; + unsigned long mask = ~((unsigned long)U32_MAX); + + /* Check if 4GB boundary is straddled */ + if ((start & mask) != ((end - 1) & mask)) { + unsigned long offset = end - (end & mask); + /* This is to ensure that alignment doesn't get + * disturbed in an attempt to prevent straddling at + * 4GB boundary. The GPU VA is aligned to 2MB when the + * allocation size is > 2MB and there is enough CPU & + * GPU virtual space. + */ + unsigned long rounded_offset = + ALIGN(offset, info->align_mask + 1); + + start -= rounded_offset; + end -= rounded_offset; + + *gap_end = start; + + /* The preceding 4GB boundary shall not get straddled, + * even after accounting for the alignment, as the + * size of allocation is limited to 4GB and the initial + * start location was already aligned. + */ + WARN_ON((start & mask) != ((end - 1) & mask)); + } + } + + + if ((*gap_end < info->low_limit) || (*gap_end < gap_start)) + return false; + + + return true; +} + +/** + * kbase_unmapped_area_topdown() - allocates new areas top-down from + * below the stack limit. + * @info: Information about the memory area to allocate. + * @is_shader_code: Boolean which denotes whether the allocated area is + * intended for the use by shader core in which case a + * special alignment requirements apply. + * @is_same_4gb_page: Boolean which indicates whether the allocated area needs + * to reside completely within a 4GB chunk. + * + * The unmapped_area_topdown() function in the Linux kernel is not exported + * using EXPORT_SYMBOL_GPL macro. To allow us to call this function from a + * module and also make use of the fact that some of the requirements for + * the unmapped area are known in advance, we implemented an extended version + * of this function and prefixed it with 'kbase_'. + * + * The difference in the call parameter list comes from the fact that + * kbase_unmapped_area_topdown() is called with additional parameters which + * are provided to indicate whether the allocation is for a shader core memory, + * which has additional alignment requirements, and whether the allocation can + * straddle a 4GB boundary. + * + * The modification of the original Linux function lies in how the computation + * of the highest gap address at the desired alignment is performed once the + * gap with desirable properties is found. For this purpose a special function + * is introduced (@ref align_and_check()) which beside computing the gap end + * at the desired alignment also performs additional alignment checks for the + * case when the memory is executable shader core memory, for which it is + * ensured that the gap does not end on a 4GB boundary, and for the case when + * memory needs to be confined within a 4GB chunk. + * + * Return: address of the found gap end (high limit) if area is found; + * -ENOMEM if search is unsuccessful +*/ + +static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info + *info, bool is_shader_code, bool is_same_4gb_page) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long length, low_limit, high_limit, gap_start, gap_end; + + /* Adjust search length to account for worst case alignment overhead */ + length = info->length + info->align_mask; + if (length < info->length) + return -ENOMEM; + + /* + * Adjust search limits by the desired length. + * See implementation comment at top of unmapped_area(). + */ + gap_end = info->high_limit; + if (gap_end < length) + return -ENOMEM; + high_limit = gap_end - length; + + if (info->low_limit > high_limit) + return -ENOMEM; + low_limit = info->low_limit + length; + + /* Check highest gap, which does not precede any rbtree node */ + gap_start = mm->highest_vm_end; + if (gap_start <= high_limit) { + if (align_and_check(&gap_end, gap_start, info, + is_shader_code, is_same_4gb_page)) + return gap_end; + } + + /* Check if rbtree root looks promising */ + if (RB_EMPTY_ROOT(&mm->mm_rb)) + return -ENOMEM; + vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); + if (vma->rb_subtree_gap < length) + return -ENOMEM; + + while (true) { + /* Visit right subtree if it looks promising */ + gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; + if (gap_start <= high_limit && vma->vm_rb.rb_right) { + struct vm_area_struct *right = + rb_entry(vma->vm_rb.rb_right, + struct vm_area_struct, vm_rb); + if (right->rb_subtree_gap >= length) { + vma = right; + continue; + } + } + +check_current: + /* Check if current node has a suitable gap */ + gap_end = vma->vm_start; + if (gap_end < low_limit) + return -ENOMEM; + if (gap_start <= high_limit && gap_end - gap_start >= length) { + /* We found a suitable gap. Clip it with the original + * high_limit. */ + if (gap_end > info->high_limit) + gap_end = info->high_limit; + + if (align_and_check(&gap_end, gap_start, info, + is_shader_code, is_same_4gb_page)) + return gap_end; + } + + /* Visit left subtree if it looks promising */ + if (vma->vm_rb.rb_left) { + struct vm_area_struct *left = + rb_entry(vma->vm_rb.rb_left, + struct vm_area_struct, vm_rb); + if (left->rb_subtree_gap >= length) { + vma = left; + continue; + } + } + + /* Go back up the rbtree to find next candidate node */ + while (true) { + struct rb_node *prev = &vma->vm_rb; + + if (!rb_parent(prev)) + return -ENOMEM; + vma = rb_entry(rb_parent(prev), + struct vm_area_struct, vm_rb); + if (prev == vma->vm_rb.rb_right) { + gap_start = vma->vm_prev ? + vma->vm_prev->vm_end : 0; + goto check_current; + } + } + } + + return -ENOMEM; +} + + +/* This function is based on Linux kernel's arch_get_unmapped_area, but + * simplified slightly. Modifications come from the fact that some values + * about the memory area are known in advance. + */ +unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, + const unsigned long addr, const unsigned long len, + const unsigned long pgoff, const unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_unmapped_area_info info; + unsigned long align_offset = 0; + unsigned long align_mask = 0; + unsigned long high_limit = mm->mmap_base; + unsigned long low_limit = PAGE_SIZE; + int cpu_va_bits = BITS_PER_LONG; + int gpu_pc_bits = + kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; + bool is_shader_code = false; + bool is_same_4gb_page = false; + unsigned long ret; + + /* err on fixed address */ + if ((flags & MAP_FIXED) || addr) + return -EINVAL; + +#ifdef CONFIG_64BIT + /* too big? */ + if (len > TASK_SIZE - SZ_2M) + return -ENOMEM; + + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + + high_limit = min_t(unsigned long, mm->mmap_base, + (kctx->same_va_end << PAGE_SHIFT)); + + /* If there's enough (> 33 bits) of GPU VA space, align + * to 2MB boundaries. + */ + if (kctx->kbdev->gpu_props.mmu.va_bits > 33) { + if (len >= SZ_2M) { + align_offset = SZ_2M; + align_mask = SZ_2M - 1; + } + } + + low_limit = SZ_2M; + } else { + cpu_va_bits = 32; + } +#endif /* CONFIG_64BIT */ + if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) && + (PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) { + int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); + struct kbase_va_region *reg; + + /* Need to hold gpu vm lock when using reg */ + kbase_gpu_vm_lock(kctx); + reg = kctx->pending_regions[cookie]; + if (!reg) { + kbase_gpu_vm_unlock(kctx); + return -EINVAL; + } + if (!(reg->flags & KBASE_REG_GPU_NX)) { + if (cpu_va_bits > gpu_pc_bits) { + align_offset = 1ULL << gpu_pc_bits; + align_mask = align_offset - 1; + is_shader_code = true; + } + } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { + unsigned long extent_bytes = + (unsigned long)(reg->extent << PAGE_SHIFT); + /* kbase_check_alloc_sizes() already satisfies + * these checks, but they're here to avoid + * maintenance hazards due to the assumptions + * involved */ + WARN_ON(reg->extent > (ULONG_MAX >> PAGE_SHIFT)); + WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT)); + WARN_ON(!is_power_of_2(extent_bytes)); + align_mask = extent_bytes - 1; + align_offset = + extent_bytes - (reg->initial_commit << PAGE_SHIFT); + } else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) { + is_same_4gb_page = true; + } + kbase_gpu_vm_unlock(kctx); +#ifndef CONFIG_64BIT + } else { + return current->mm->get_unmapped_area( + kctx->filp, addr, len, pgoff, flags); +#endif + } + + info.flags = 0; + info.length = len; + info.low_limit = low_limit; + info.high_limit = high_limit; + info.align_offset = align_offset; + info.align_mask = align_mask; + + ret = kbase_unmapped_area_topdown(&info, is_shader_code, + is_same_4gb_page); + + if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base && + high_limit < (kctx->same_va_end << PAGE_SHIFT)) { + /* Retry above mmap_base */ + info.low_limit = mm->mmap_base; + info.high_limit = min_t(u64, TASK_SIZE, + (kctx->same_va_end << PAGE_SHIFT)); + + ret = kbase_unmapped_area_topdown(&info, is_shader_code, + is_same_4gb_page); + } + + return ret; +} diff --git a/drivers/gpu/arm/b_r26p0/tl/backend/mali_kbase_timeline_jm.c b/drivers/gpu/arm/b_r26p0/tl/backend/mali_kbase_timeline_jm.c new file mode 100644 index 000000000000..c368ac7288da --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tl/backend/mali_kbase_timeline_jm.c @@ -0,0 +1,97 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "../mali_kbase_tracepoints.h" +#include "../mali_kbase_timeline.h" +#include "../mali_kbase_timeline_priv.h" + +#include + +void kbase_create_timeline_objects(struct kbase_device *kbdev) +{ + unsigned int lpu_id; + unsigned int as_nr; + struct kbase_context *kctx; + struct kbase_timeline *timeline = kbdev->timeline; + struct kbase_tlstream *summary = + &timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]; + + /* Summarize the LPU objects. */ + for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { + u32 *lpu = + &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; + __kbase_tlstream_tl_new_lpu(summary, lpu, lpu_id, *lpu); + } + + /* Summarize the Address Space objects. */ + for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) + __kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr); + + /* Create GPU object and make it retain all LPUs and address spaces. */ + __kbase_tlstream_tl_new_gpu(summary, + kbdev, + kbdev->gpu_props.props.raw_props.gpu_id, + kbdev->gpu_props.num_cores); + + for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { + void *lpu = + &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; + __kbase_tlstream_tl_lifelink_lpu_gpu(summary, lpu, kbdev); + } + + for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) + __kbase_tlstream_tl_lifelink_as_gpu(summary, + &kbdev->as[as_nr], + kbdev); + + /* Lock the context list, to ensure no changes to the list are made + * while we're summarizing the contexts and their contents. + */ + mutex_lock(&kbdev->kctx_list_lock); + + /* For each context in the device... */ + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + /* Summarize the context itself */ + __kbase_tlstream_tl_new_ctx(summary, + kctx, + kctx->id, + (u32)(kctx->tgid)); + }; + + /* Reset body stream buffers while holding the kctx lock. + * This ensures we can't fire both summary and normal tracepoints for + * the same objects. + * If we weren't holding the lock, it's possible that the summarized + * objects could have been created, destroyed, or used after we + * constructed the summary stream tracepoints, but before we reset + * the body stream, resulting in losing those object event tracepoints. + */ + kbase_timeline_streams_body_reset(timeline); + + mutex_unlock(&kbdev->kctx_list_lock); + + /* Static object are placed into summary packet that needs to be + * transmitted first. Flush all streams to make it available to + * user space. + */ + kbase_timeline_streams_flush(timeline); +} \ No newline at end of file diff --git a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.c b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.c new file mode 100644 index 000000000000..88fba83840af --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.c @@ -0,0 +1,274 @@ +/* + * + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_timeline.h" +#include "mali_kbase_timeline_priv.h" +#include "mali_kbase_tracepoints.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* The period of autoflush checker execution in milliseconds. */ +#define AUTOFLUSH_INTERVAL 1000 /* ms */ + +/*****************************************************************************/ + +/* These values are used in mali_kbase_tracepoints.h + * to retrieve the streams from a kbase_timeline instance. + */ +const size_t __obj_stream_offset = + offsetof(struct kbase_timeline, streams) + + sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_OBJ; + +const size_t __aux_stream_offset = + offsetof(struct kbase_timeline, streams) + + sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_AUX; + +/** + * kbasep_timeline_autoflush_timer_callback - autoflush timer callback + * @timer: Timer list + * + * Timer is executed periodically to check if any of the stream contains + * buffer ready to be submitted to user space. + */ +static void kbasep_timeline_autoflush_timer_callback(struct timer_list *timer) +{ + enum tl_stream_type stype; + int rcode; + struct kbase_timeline *timeline = + container_of(timer, struct kbase_timeline, autoflush_timer); + + CSTD_UNUSED(timer); + + for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT; + stype++) { + struct kbase_tlstream *stream = &timeline->streams[stype]; + + int af_cnt = atomic_read(&stream->autoflush_counter); + + /* Check if stream contain unflushed data. */ + if (af_cnt < 0) + continue; + + /* Check if stream should be flushed now. */ + if (af_cnt != atomic_cmpxchg( + &stream->autoflush_counter, + af_cnt, + af_cnt + 1)) + continue; + if (!af_cnt) + continue; + + /* Autoflush this stream. */ + kbase_tlstream_flush_stream(stream); + } + + if (atomic_read(&timeline->autoflush_timer_active)) + rcode = mod_timer( + &timeline->autoflush_timer, + jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); + CSTD_UNUSED(rcode); +} + + + +/*****************************************************************************/ + +int kbase_timeline_init(struct kbase_timeline **timeline, + atomic_t *timeline_flags) +{ + enum tl_stream_type i; + struct kbase_timeline *result; + + if (!timeline || !timeline_flags) + return -EINVAL; + + result = kzalloc(sizeof(*result), GFP_KERNEL); + if (!result) + return -ENOMEM; + + mutex_init(&result->reader_lock); + init_waitqueue_head(&result->event_queue); + + /* Prepare stream structures. */ + for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) + kbase_tlstream_init(&result->streams[i], i, + &result->event_queue); + + /* Initialize autoflush timer. */ + atomic_set(&result->autoflush_timer_active, 0); + kbase_timer_setup(&result->autoflush_timer, + kbasep_timeline_autoflush_timer_callback); + result->timeline_flags = timeline_flags; + + + *timeline = result; + return 0; +} + +void kbase_timeline_term(struct kbase_timeline *timeline) +{ + enum tl_stream_type i; + + if (!timeline) + return; + + + for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; i++) + kbase_tlstream_term(&timeline->streams[i]); + + kfree(timeline); +} + +#ifdef CONFIG_MALI_DEVFREQ +static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev) +{ + struct devfreq *devfreq = kbdev->devfreq; + + /* Devfreq initialization failure isn't a fatal error, so devfreq might + * be null. + */ + if (devfreq) { + unsigned long cur_freq = 0; + + mutex_lock(&devfreq->lock); +#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE + cur_freq = kbdev->current_nominal_freq; +#else + cur_freq = devfreq->last_status.current_frequency; +#endif + KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)cur_freq); + mutex_unlock(&devfreq->lock); + } +} +#endif /* CONFIG_MALI_DEVFREQ */ + +int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) +{ + int ret; + u32 timeline_flags = TLSTREAM_ENABLED | flags; + struct kbase_timeline *timeline = kbdev->timeline; + + if (!atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags)) { + int rcode; + + ret = anon_inode_getfd( + "[mali_tlstream]", + &kbasep_tlstream_fops, + timeline, + O_RDONLY | O_CLOEXEC); + if (ret < 0) { + atomic_set(timeline->timeline_flags, 0); + return ret; + } + + /* Reset and initialize header streams. */ + kbase_tlstream_reset( + &timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]); + + timeline->obj_header_btc = obj_desc_header_size; + timeline->aux_header_btc = aux_desc_header_size; + + /* Start autoflush timer. */ + atomic_set(&timeline->autoflush_timer_active, 1); + rcode = mod_timer( + &timeline->autoflush_timer, + jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); + CSTD_UNUSED(rcode); + + /* If job dumping is enabled, readjust the software event's + * timeout as the default value of 3 seconds is often + * insufficient. + */ + if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) { + dev_info(kbdev->dev, + "Job dumping is enabled, readjusting the software event's timeout\n"); + atomic_set(&kbdev->js_data.soft_job_timeout_ms, + 1800000); + } + + /* Summary stream was cleared during acquire. + * Create static timeline objects that will be + * read by client. + */ + kbase_create_timeline_objects(kbdev); + +#ifdef CONFIG_MALI_DEVFREQ + /* Devfreq target tracepoints are only fired when the target + * changes, so we won't know the current target unless we + * send it now. + */ + kbase_tlstream_current_devfreq_target(kbdev); +#endif /* CONFIG_MALI_DEVFREQ */ + + } else { + ret = -EBUSY; + } + + return ret; +} + +void kbase_timeline_streams_flush(struct kbase_timeline *timeline) +{ + enum tl_stream_type stype; + + for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) + kbase_tlstream_flush_stream(&timeline->streams[stype]); +} + +void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline) +{ + kbase_tlstream_reset( + &timeline->streams[TL_STREAM_TYPE_OBJ]); + kbase_tlstream_reset( + &timeline->streams[TL_STREAM_TYPE_AUX]); +} + +#if MALI_UNIT_TEST +void kbase_timeline_stats(struct kbase_timeline *timeline, + u32 *bytes_collected, u32 *bytes_generated) +{ + enum tl_stream_type stype; + + KBASE_DEBUG_ASSERT(bytes_collected); + + /* Accumulate bytes generated per stream */ + *bytes_generated = 0; + for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT; + stype++) + *bytes_generated += atomic_read( + &timeline->streams[stype].bytes_generated); + + *bytes_collected = atomic_read(&timeline->bytes_collected); +} +#endif /* MALI_UNIT_TEST */ diff --git a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.h b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.h new file mode 100644 index 000000000000..cd48411b45cf --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline.h @@ -0,0 +1,121 @@ +/* + * + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#if !defined(_KBASE_TIMELINE_H) +#define _KBASE_TIMELINE_H + +#include + +/*****************************************************************************/ + +struct kbase_timeline; + +/** + * kbase_timeline_init - initialize timeline infrastructure in kernel + * @timeline: Newly created instance of kbase_timeline will be stored in + * this pointer. + * @timeline_flags: Timeline status will be written to this variable when a + * client is attached/detached. The variable must be valid + * while timeline instance is valid. + * Return: zero on success, negative number on error + */ +int kbase_timeline_init(struct kbase_timeline **timeline, + atomic_t *timeline_flags); + +/** + * kbase_timeline_term - terminate timeline infrastructure in kernel + * + * @timeline: Timeline instance to be terminated. It must be previously created + * with kbase_timeline_init(). + */ +void kbase_timeline_term(struct kbase_timeline *timeline); + +/** + * kbase_timeline_io_acquire - acquire timeline stream file descriptor + * @kbdev: Kbase device + * @flags: Timeline stream flags + * + * This descriptor is meant to be used by userspace timeline to gain access to + * kernel timeline stream. This stream is later broadcasted by user space to the + * timeline client. + * Only one entity can own the descriptor at any given time. Descriptor shall be + * closed if unused. If descriptor cannot be obtained (i.e. when it is already + * being used) return will be a negative value. + * + * Return: file descriptor on success, negative number on error + */ +int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags); + +/** + * kbase_timeline_streams_flush - flush timeline streams. + * @timeline: Timeline instance + * + * Function will flush pending data in all timeline streams. + */ +void kbase_timeline_streams_flush(struct kbase_timeline *timeline); + +/** + * kbase_timeline_streams_body_reset - reset timeline body streams. + * + * Function will discard pending data in all timeline body streams. + * @timeline: Timeline instance + */ +void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline); + +#if MALI_UNIT_TEST +/** + * kbase_timeline_test - start timeline stream data generator + * @kbdev: Kernel common context + * @tpw_count: Number of trace point writers in each context + * @msg_delay: Time delay in milliseconds between trace points written by one + * writer + * @msg_count: Number of trace points written by one writer + * @aux_msg: If non-zero aux messages will be included + * + * This test starts a requested number of asynchronous writers in both IRQ and + * thread context. Each writer will generate required number of test + * tracepoints (tracepoints with embedded information about writer that + * should be verified by user space reader). Tracepoints will be emitted in + * all timeline body streams. If aux_msg is non-zero writer will also + * generate not testable tracepoints (tracepoints without information about + * writer). These tracepoints are used to check correctness of remaining + * timeline message generating functions. Writer will wait requested time + * between generating another set of messages. This call blocks until all + * writers finish. + */ +void kbase_timeline_test( + struct kbase_device *kbdev, + unsigned int tpw_count, + unsigned int msg_delay, + unsigned int msg_count, + int aux_msg); + +/** + * kbase_timeline_stats - read timeline stream statistics + * @timeline: Timeline instance + * @bytes_collected: Will hold number of bytes read by the user + * @bytes_generated: Will hold number of bytes generated by trace points + */ +void kbase_timeline_stats(struct kbase_timeline *timeline, u32 *bytes_collected, u32 *bytes_generated); +#endif /* MALI_UNIT_TEST */ + +#endif /* _KBASE_TIMELINE_H */ diff --git a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline_io.c b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline_io.c new file mode 100644 index 000000000000..cdde928bbab9 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline_io.c @@ -0,0 +1,329 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_timeline_priv.h" +#include "mali_kbase_tlstream.h" +#include "mali_kbase_tracepoints.h" + +#include + +/* The timeline stream file operations functions. */ +static ssize_t kbasep_timeline_io_read( + struct file *filp, + char __user *buffer, + size_t size, + loff_t *f_pos); +static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait); +static int kbasep_timeline_io_release(struct inode *inode, struct file *filp); + +/* The timeline stream file operations structure. */ +const struct file_operations kbasep_tlstream_fops = { + .owner = THIS_MODULE, + .release = kbasep_timeline_io_release, + .read = kbasep_timeline_io_read, + .poll = kbasep_timeline_io_poll, +}; + +/** + * kbasep_timeline_io_packet_pending - check timeline streams for pending packets + * @timeline: Timeline instance + * @ready_stream: Pointer to variable where stream will be placed + * @rb_idx_raw: Pointer to variable where read buffer index will be placed + * + * Function checks all streams for pending packets. It will stop as soon as + * packet ready to be submitted to user space is detected. Variables under + * pointers, passed as the parameters to this function will be updated with + * values pointing to right stream and buffer. + * + * Return: non-zero if any of timeline streams has at last one packet ready + */ +static int kbasep_timeline_io_packet_pending( + struct kbase_timeline *timeline, + struct kbase_tlstream **ready_stream, + unsigned int *rb_idx_raw) +{ + enum tl_stream_type i; + + KBASE_DEBUG_ASSERT(ready_stream); + KBASE_DEBUG_ASSERT(rb_idx_raw); + + for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; ++i) { + struct kbase_tlstream *stream = &timeline->streams[i]; + *rb_idx_raw = atomic_read(&stream->rbi); + /* Read buffer index may be updated by writer in case of + * overflow. Read and write buffer indexes must be + * loaded in correct order. + */ + smp_rmb(); + if (atomic_read(&stream->wbi) != *rb_idx_raw) { + *ready_stream = stream; + return 1; + } + + } + + return 0; +} + +/** + * copy_stream_header() - copy timeline stream header. + * + * @buffer: Pointer to the buffer provided by user. + * @size: Maximum amount of data that can be stored in the buffer. + * @copy_len: Pointer to amount of bytes that has been copied already + * within the read system call. + * @hdr: Pointer to the stream header. + * @hdr_size: Header size. + * @hdr_btc: Pointer to the remaining number of bytes to copy. + * + * Returns: 0 if success, -1 otherwise. + */ +static inline int copy_stream_header( + char __user *buffer, size_t size, ssize_t *copy_len, + const char *hdr, + size_t hdr_size, + size_t *hdr_btc) +{ + const size_t offset = hdr_size - *hdr_btc; + const size_t copy_size = MIN(size - *copy_len, *hdr_btc); + + if (!*hdr_btc) + return 0; + + if (WARN_ON(*hdr_btc > hdr_size)) + return -1; + + if (copy_to_user(&buffer[*copy_len], &hdr[offset], copy_size)) + return -1; + + *hdr_btc -= copy_size; + *copy_len += copy_size; + + return 0; +} + +/** + * kbasep_timeline_copy_header - copy timeline headers to the user + * @timeline: Timeline instance + * @buffer: Pointer to the buffer provided by user + * @size: Maximum amount of data that can be stored in the buffer + * @copy_len: Pointer to amount of bytes that has been copied already + * within the read system call. + * + * This helper function checks if timeline headers have not been sent + * to the user, and if so, sends them. copy_len is respectively + * updated. + * + * Returns: 0 if success, -1 if copy_to_user has failed. + */ +static inline int kbasep_timeline_copy_headers( + struct kbase_timeline *timeline, + char __user *buffer, + size_t size, + ssize_t *copy_len) +{ + if (copy_stream_header(buffer, size, copy_len, + obj_desc_header, + obj_desc_header_size, + &timeline->obj_header_btc)) + return -1; + + if (copy_stream_header(buffer, size, copy_len, + aux_desc_header, + aux_desc_header_size, + &timeline->aux_header_btc)) + return -1; + return 0; +} + + +/** + * kbasep_timeline_io_read - copy data from streams to buffer provided by user + * @filp: Pointer to file structure + * @buffer: Pointer to the buffer provided by user + * @size: Maximum amount of data that can be stored in the buffer + * @f_pos: Pointer to file offset (unused) + * + * Return: number of bytes stored in the buffer + */ +static ssize_t kbasep_timeline_io_read( + struct file *filp, + char __user *buffer, + size_t size, + loff_t *f_pos) +{ + ssize_t copy_len = 0; + struct kbase_timeline *timeline; + + KBASE_DEBUG_ASSERT(filp); + KBASE_DEBUG_ASSERT(f_pos); + + if (WARN_ON(!filp->private_data)) + return -EFAULT; + + timeline = (struct kbase_timeline *) filp->private_data; + + if (!buffer) + return -EINVAL; + + if ((*f_pos < 0) || (size < PACKET_SIZE)) + return -EINVAL; + + mutex_lock(&timeline->reader_lock); + + while (copy_len < size) { + struct kbase_tlstream *stream = NULL; + unsigned int rb_idx_raw = 0; + unsigned int wb_idx_raw; + unsigned int rb_idx; + size_t rb_size; + + if (kbasep_timeline_copy_headers( + timeline, buffer, size, ©_len)) { + copy_len = -EFAULT; + break; + } + + /* If we already read some packets and there is no + * packet pending then return back to user. + * If we don't have any data yet, wait for packet to be + * submitted. + */ + if (copy_len > 0) { + if (!kbasep_timeline_io_packet_pending( + timeline, + &stream, + &rb_idx_raw)) + break; + } else { + if (wait_event_interruptible( + timeline->event_queue, + kbasep_timeline_io_packet_pending( + timeline, + &stream, + &rb_idx_raw))) { + copy_len = -ERESTARTSYS; + break; + } + } + + if (WARN_ON(!stream)) { + copy_len = -EFAULT; + break; + } + + /* Check if this packet fits into the user buffer. + * If so copy its content. + */ + rb_idx = rb_idx_raw % PACKET_COUNT; + rb_size = atomic_read(&stream->buffer[rb_idx].size); + if (rb_size > size - copy_len) + break; + if (copy_to_user( + &buffer[copy_len], + stream->buffer[rb_idx].data, + rb_size)) { + copy_len = -EFAULT; + break; + } + + /* If the distance between read buffer index and write + * buffer index became more than PACKET_COUNT, then overflow + * happened and we need to ignore the last portion of bytes + * that we have just sent to user. + */ + smp_rmb(); + wb_idx_raw = atomic_read(&stream->wbi); + + if (wb_idx_raw - rb_idx_raw < PACKET_COUNT) { + copy_len += rb_size; + atomic_inc(&stream->rbi); +#if MALI_UNIT_TEST + atomic_add(rb_size, &timeline->bytes_collected); +#endif /* MALI_UNIT_TEST */ + + } else { + const unsigned int new_rb_idx_raw = + wb_idx_raw - PACKET_COUNT + 1; + /* Adjust read buffer index to the next valid buffer */ + atomic_set(&stream->rbi, new_rb_idx_raw); + } + } + + mutex_unlock(&timeline->reader_lock); + + return copy_len; +} + +/** + * kbasep_timeline_io_poll - poll timeline stream for packets + * @filp: Pointer to file structure + * @wait: Pointer to poll table + * Return: POLLIN if data can be read without blocking, otherwise zero + */ +static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait) +{ + struct kbase_tlstream *stream; + unsigned int rb_idx; + struct kbase_timeline *timeline; + + KBASE_DEBUG_ASSERT(filp); + KBASE_DEBUG_ASSERT(wait); + + if (WARN_ON(!filp->private_data)) + return -EFAULT; + + timeline = (struct kbase_timeline *) filp->private_data; + + poll_wait(filp, &timeline->event_queue, wait); + if (kbasep_timeline_io_packet_pending(timeline, &stream, &rb_idx)) + return POLLIN; + return 0; +} + +/** + * kbasep_timeline_io_release - release timeline stream descriptor + * @inode: Pointer to inode structure + * @filp: Pointer to file structure + * + * Return always return zero + */ +static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) +{ + struct kbase_timeline *timeline; + + KBASE_DEBUG_ASSERT(inode); + KBASE_DEBUG_ASSERT(filp); + KBASE_DEBUG_ASSERT(filp->private_data); + + CSTD_UNUSED(inode); + + timeline = (struct kbase_timeline *) filp->private_data; + + + /* Stop autoflush timer before releasing access to streams. */ + atomic_set(&timeline->autoflush_timer_active, 0); + del_timer_sync(&timeline->autoflush_timer); + + atomic_set(timeline->timeline_flags, 0); + return 0; +} diff --git a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline_priv.h b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline_priv.h new file mode 100644 index 000000000000..3596584d85c6 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_timeline_priv.h @@ -0,0 +1,66 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#if !defined(_KBASE_TIMELINE_PRIV_H) +#define _KBASE_TIMELINE_PRIV_H + +#include +#include "mali_kbase_tlstream.h" + + +#include +#include +#include + +/** + * struct kbase_timeline - timeline state structure + * @streams: The timeline streams generated by kernel + * @autoflush_timer: Autoflush timer + * @autoflush_timer_active: If non-zero autoflush timer is active + * @reader_lock: Reader lock. Only one reader is allowed to + * have access to the timeline streams at any given time. + * @event_queue: Timeline stream event queue + * @bytes_collected: Number of bytes read by user + * @timeline_flags: Zero, if timeline is disabled. Timeline stream flags + * otherwise. See kbase_timeline_io_acquire(). + * @obj_header_btc: Remaining bytes to copy for the object stream header + * @aux_header_btc: Remaining bytes to copy for the aux stream header + */ +struct kbase_timeline { + struct kbase_tlstream streams[TL_STREAM_TYPE_COUNT]; + struct timer_list autoflush_timer; + atomic_t autoflush_timer_active; + struct mutex reader_lock; + wait_queue_head_t event_queue; +#if MALI_UNIT_TEST + atomic_t bytes_collected; +#endif /* MALI_UNIT_TEST */ + atomic_t *timeline_flags; + size_t obj_header_btc; + size_t aux_header_btc; +}; + +extern const struct file_operations kbasep_tlstream_fops; + +void kbase_create_timeline_objects(struct kbase_device *kbdev); + +#endif /* _KBASE_TIMELINE_PRIV_H */ diff --git a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tl_serialize.h b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tl_serialize.h new file mode 100644 index 000000000000..3e378279cf2c --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tl_serialize.h @@ -0,0 +1,125 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#if !defined(_KBASE_TL_SERIALIZE_H) +#define _KBASE_TL_SERIALIZE_H + +#include + +#include + +/* The number of nanoseconds in a second. */ +#define NSECS_IN_SEC 1000000000ull /* ns */ + +/** + * kbasep_serialize_bytes - serialize bytes to the message buffer + * + * Serialize bytes as is using memcpy() + * + * @buffer: Message buffer + * @pos: Message buffer offset + * @bytes: Bytes to serialize + * @len: Length of bytes array + * + * Return: updated position in the buffer + */ +static inline size_t kbasep_serialize_bytes( + char *buffer, + size_t pos, + const void *bytes, + size_t len) +{ + KBASE_DEBUG_ASSERT(buffer); + KBASE_DEBUG_ASSERT(bytes); + + memcpy(&buffer[pos], bytes, len); + + return pos + len; +} + +/** + * kbasep_serialize_string - serialize string to the message buffer + * + * String is serialized as 4 bytes for string size, + * then string content and then null terminator. + * + * @buffer: Message buffer + * @pos: Message buffer offset + * @string: String to serialize + * @max_write_size: Number of bytes that can be stored in buffer + * + * Return: updated position in the buffer + */ +static inline size_t kbasep_serialize_string( + char *buffer, + size_t pos, + const char *string, + size_t max_write_size) +{ + u32 string_len; + + KBASE_DEBUG_ASSERT(buffer); + KBASE_DEBUG_ASSERT(string); + /* Timeline string consists of at least string length and nul + * terminator. + */ + KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char)); + max_write_size -= sizeof(string_len); + + string_len = strlcpy( + &buffer[pos + sizeof(string_len)], + string, + max_write_size); + string_len += sizeof(char); + + /* Make sure that the source string fit into the buffer. */ + KBASE_DEBUG_ASSERT(string_len <= max_write_size); + + /* Update string length. */ + memcpy(&buffer[pos], &string_len, sizeof(string_len)); + + return pos + sizeof(string_len) + string_len; +} + +/** + * kbasep_serialize_timestamp - serialize timestamp to the message buffer + * + * Get current timestamp using kbasep_get_timestamp() + * and serialize it as 64 bit unsigned integer. + * + * @buffer: Message buffer + * @pos: Message buffer offset + * + * Return: updated position in the buffer + */ +static inline size_t kbasep_serialize_timestamp(void *buffer, size_t pos) +{ + u64 timestamp; + + timestamp = ktime_get_raw_ns(); + + return kbasep_serialize_bytes( + buffer, pos, + ×tamp, sizeof(timestamp)); +} +#endif /* _KBASE_TL_SERIALIZE_H */ + diff --git a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tlstream.c b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tlstream.c new file mode 100644 index 000000000000..bec4be71570e --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tlstream.c @@ -0,0 +1,298 @@ +/* + * + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_tlstream.h" +#include "mali_kbase_tl_serialize.h" +#include "mali_kbase_mipe_proto.h" + +/** + * kbasep_packet_header_setup - setup the packet header + * @buffer: pointer to the buffer + * @pkt_family: packet's family + * @pkt_type: packet's type + * @pkt_class: packet's class + * @stream_id: stream id + * @numbered: non-zero if this stream is numbered + * + * Function sets up immutable part of packet header in the given buffer. + */ +static void kbasep_packet_header_setup( + char *buffer, + enum tl_packet_family pkt_family, + enum tl_packet_class pkt_class, + enum tl_packet_type pkt_type, + unsigned int stream_id, + int numbered) +{ + u32 words[2] = { + MIPE_PACKET_HEADER_W0(pkt_family, pkt_class, pkt_type, stream_id), + MIPE_PACKET_HEADER_W1(0, !!numbered), + }; + memcpy(buffer, words, sizeof(words)); +} + +/** + * kbasep_packet_header_update - update the packet header + * @buffer: pointer to the buffer + * @data_size: amount of data carried in this packet + * @numbered: non-zero if the stream is numbered + * + * Function updates mutable part of packet header in the given buffer. + * Note that value of data_size must not including size of the header. + */ +static void kbasep_packet_header_update( + char *buffer, + size_t data_size, + int numbered) +{ + u32 word0; + u32 word1 = MIPE_PACKET_HEADER_W1((u32)data_size, !!numbered); + + KBASE_DEBUG_ASSERT(buffer); + CSTD_UNUSED(word0); + + memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1)); +} + +/** + * kbasep_packet_number_update - update the packet number + * @buffer: pointer to the buffer + * @counter: value of packet counter for this packet's stream + * + * Function updates packet number embedded within the packet placed in the + * given buffer. + */ +static void kbasep_packet_number_update(char *buffer, u32 counter) +{ + KBASE_DEBUG_ASSERT(buffer); + + memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter)); +} + +void kbase_tlstream_reset(struct kbase_tlstream *stream) +{ + unsigned int i; + + for (i = 0; i < PACKET_COUNT; i++) { + if (stream->numbered) + atomic_set( + &stream->buffer[i].size, + PACKET_HEADER_SIZE + + PACKET_NUMBER_SIZE); + else + atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE); + } + + atomic_set(&stream->wbi, 0); + atomic_set(&stream->rbi, 0); +} + +/* Configuration of timeline streams generated by kernel. */ +static const struct { + enum tl_packet_family pkt_family; + enum tl_packet_class pkt_class; + enum tl_packet_type pkt_type; + enum tl_stream_id stream_id; +} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = { + { + TL_PACKET_FAMILY_TL, + TL_PACKET_CLASS_OBJ, + TL_PACKET_TYPE_SUMMARY, + TL_STREAM_ID_KERNEL, + }, + { + TL_PACKET_FAMILY_TL, + TL_PACKET_CLASS_OBJ, + TL_PACKET_TYPE_BODY, + TL_STREAM_ID_KERNEL, + }, + { + TL_PACKET_FAMILY_TL, + TL_PACKET_CLASS_AUX, + TL_PACKET_TYPE_BODY, + TL_STREAM_ID_KERNEL, + }, +}; + +void kbase_tlstream_init( + struct kbase_tlstream *stream, + enum tl_stream_type stream_type, + wait_queue_head_t *ready_read) +{ + unsigned int i; + + KBASE_DEBUG_ASSERT(stream); + KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); + + spin_lock_init(&stream->lock); + + /* All packets carrying tracepoints shall be numbered. */ + if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type) + stream->numbered = 1; + else + stream->numbered = 0; + + for (i = 0; i < PACKET_COUNT; i++) + kbasep_packet_header_setup( + stream->buffer[i].data, + tl_stream_cfg[stream_type].pkt_family, + tl_stream_cfg[stream_type].pkt_class, + tl_stream_cfg[stream_type].pkt_type, + tl_stream_cfg[stream_type].stream_id, + stream->numbered); + +#if MALI_UNIT_TEST + atomic_set(&stream->bytes_generated, 0); +#endif + stream->ready_read = ready_read; + + kbase_tlstream_reset(stream); +} + +void kbase_tlstream_term(struct kbase_tlstream *stream) +{ + KBASE_DEBUG_ASSERT(stream); +} + +/** + * kbase_tlstream_msgbuf_submit - submit packet to user space + * @stream: Pointer to the stream structure + * @wb_idx_raw: Write buffer index + * @wb_size: Length of data stored in the current buffer + * + * Updates currently written buffer with the packet header. + * Then write index is incremented and the buffer is handed to user space. + * Parameters of the new buffer are returned using provided arguments. + * + * Return: length of data in the new buffer + * + * Warning: the user must update the stream structure with returned value. + */ +static size_t kbasep_tlstream_msgbuf_submit( + struct kbase_tlstream *stream, + unsigned int wb_idx_raw, + unsigned int wb_size) +{ + unsigned int wb_idx = wb_idx_raw % PACKET_COUNT; + + /* Set stream as flushed. */ + atomic_set(&stream->autoflush_counter, -1); + + kbasep_packet_header_update( + stream->buffer[wb_idx].data, + wb_size - PACKET_HEADER_SIZE, + stream->numbered); + + if (stream->numbered) + kbasep_packet_number_update( + stream->buffer[wb_idx].data, + wb_idx_raw); + + /* Increasing write buffer index will expose this packet to the reader. + * As stream->lock is not taken on reader side we must make sure memory + * is updated correctly before this will happen. */ + smp_wmb(); + atomic_inc(&stream->wbi); + + /* Inform user that packets are ready for reading. */ + wake_up_interruptible(stream->ready_read); + + wb_size = PACKET_HEADER_SIZE; + if (stream->numbered) + wb_size += PACKET_NUMBER_SIZE; + + return wb_size; +} + +char *kbase_tlstream_msgbuf_acquire( + struct kbase_tlstream *stream, + size_t msg_size, + unsigned long *flags) __acquires(&stream->lock) +{ + unsigned int wb_idx_raw; + unsigned int wb_idx; + size_t wb_size; + + KBASE_DEBUG_ASSERT( + PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >= + msg_size); + + spin_lock_irqsave(&stream->lock, *flags); + + wb_idx_raw = atomic_read(&stream->wbi); + wb_idx = wb_idx_raw % PACKET_COUNT; + wb_size = atomic_read(&stream->buffer[wb_idx].size); + + /* Select next buffer if data will not fit into current one. */ + if (PACKET_SIZE < wb_size + msg_size) { + wb_size = kbasep_tlstream_msgbuf_submit( + stream, wb_idx_raw, wb_size); + wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; + } + + /* Reserve space in selected buffer. */ + atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size); + +#if MALI_UNIT_TEST + atomic_add(msg_size, &stream->bytes_generated); +#endif /* MALI_UNIT_TEST */ + + return &stream->buffer[wb_idx].data[wb_size]; +} + +void kbase_tlstream_msgbuf_release( + struct kbase_tlstream *stream, + unsigned long flags) __releases(&stream->lock) +{ + /* Mark stream as containing unflushed data. */ + atomic_set(&stream->autoflush_counter, 0); + + spin_unlock_irqrestore(&stream->lock, flags); +} + +void kbase_tlstream_flush_stream( + struct kbase_tlstream *stream) +{ + unsigned long flags; + unsigned int wb_idx_raw; + unsigned int wb_idx; + size_t wb_size; + size_t min_size = PACKET_HEADER_SIZE; + + if (stream->numbered) + min_size += PACKET_NUMBER_SIZE; + + spin_lock_irqsave(&stream->lock, flags); + + wb_idx_raw = atomic_read(&stream->wbi); + wb_idx = wb_idx_raw % PACKET_COUNT; + wb_size = atomic_read(&stream->buffer[wb_idx].size); + + if (wb_size > min_size) { + wb_size = kbasep_tlstream_msgbuf_submit( + stream, wb_idx_raw, wb_size); + wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; + atomic_set(&stream->buffer[wb_idx].size, wb_size); + } + spin_unlock_irqrestore(&stream->lock, flags); +} + diff --git a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tlstream.h b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tlstream.h new file mode 100644 index 000000000000..427bb0969540 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tlstream.h @@ -0,0 +1,166 @@ +/* + * + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#if !defined(_KBASE_TLSTREAM_H) +#define _KBASE_TLSTREAM_H + +#include +#include +#include + +/* The maximum size of a single packet used by timeline. */ +#define PACKET_SIZE 4096 /* bytes */ + +/* The number of packets used by one timeline stream. */ +#if defined(CONFIG_MALI_JOB_DUMP) || defined(CONFIG_MALI_VECTOR_DUMP) + #define PACKET_COUNT 64 +#else + #define PACKET_COUNT 32 +#endif + +/* The maximum expected length of string in tracepoint descriptor. */ +#define STRLEN_MAX 64 /* bytes */ + +/** + * struct kbase_tlstream - timeline stream structure + * @lock: Message order lock + * @buffer: Array of buffers + * @wbi: Write buffer index + * @rbi: Read buffer index + * @numbered: If non-zero stream's packets are sequentially numbered + * @autoflush_counter: Counter tracking stream's autoflush state + * @ready_read: Pointer to a wait queue, which is signaled when + * timeline messages are ready for collection. + * @bytes_generated: Number of bytes generated by tracepoint messages + * + * This structure holds information needed to construct proper packets in the + * timeline stream. + * + * Each message in the sequence must bear a timestamp that is + * greater than the previous message in the same stream. For this reason + * a lock is held throughout the process of message creation. + * + * Each stream contains a set of buffers. Each buffer will hold one MIPE + * packet. In case there is no free space required to store the incoming + * message the oldest buffer is discarded. Each packet in timeline body + * stream has a sequence number embedded, this value must increment + * monotonically and is used by the packets receiver to discover these + * buffer overflows. + * + * The autoflush counter is set to a negative number when there is no data + * pending for flush and it is set to zero on every update of the buffer. The + * autoflush timer will increment the counter by one on every expiry. If there + * is no activity on the buffer for two consecutive timer expiries, the stream + * buffer will be flushed. + */ +struct kbase_tlstream { + spinlock_t lock; + + struct { + atomic_t size; /* number of bytes in buffer */ + char data[PACKET_SIZE]; /* buffer's data */ + } buffer[PACKET_COUNT]; + + atomic_t wbi; + atomic_t rbi; + + int numbered; + atomic_t autoflush_counter; + wait_queue_head_t *ready_read; +#if MALI_UNIT_TEST + atomic_t bytes_generated; +#endif +}; + +/* Types of streams generated by timeline. */ +enum tl_stream_type { + TL_STREAM_TYPE_FIRST, + TL_STREAM_TYPE_OBJ_SUMMARY = TL_STREAM_TYPE_FIRST, + TL_STREAM_TYPE_OBJ, + TL_STREAM_TYPE_AUX, + TL_STREAM_TYPE_COUNT +}; + +/** + * kbase_tlstream_init - initialize timeline stream + * @stream: Pointer to the stream structure + * @stream_type: Stream type + * @ready_read: Pointer to a wait queue to signal when + * timeline messages are ready for collection. + */ +void kbase_tlstream_init(struct kbase_tlstream *stream, + enum tl_stream_type stream_type, + wait_queue_head_t *ready_read); + +/** + * kbase_tlstream_term - terminate timeline stream + * @stream: Pointer to the stream structure + */ +void kbase_tlstream_term(struct kbase_tlstream *stream); + +/** + * kbase_tlstream_reset - reset stream + * @stream: Pointer to the stream structure + * + * Function discards all pending messages and resets packet counters. + */ +void kbase_tlstream_reset(struct kbase_tlstream *stream); + +/** + * kbase_tlstream_msgbuf_acquire - lock selected stream and reserve a buffer + * @stream: Pointer to the stream structure + * @msg_size: Message size + * @flags: Pointer to store flags passed back on stream release + * + * Lock the stream and reserve the number of bytes requested + * in msg_size for the user. + * + * Return: pointer to the buffer where a message can be stored + * + * Warning: The stream must be released with kbase_tlstream_msgbuf_release(). + * Only atomic operations are allowed while the stream is locked + * (i.e. do not use any operation that may sleep). + */ +char *kbase_tlstream_msgbuf_acquire(struct kbase_tlstream *stream, + size_t msg_size, unsigned long *flags) __acquires(&stream->lock); + +/** + * kbase_tlstream_msgbuf_release - unlock selected stream + * @stream: Pointer to the stream structure + * @flags: Value obtained during stream acquire + * + * Release the stream that has been previously + * locked with a call to kbase_tlstream_msgbuf_acquire(). + */ +void kbase_tlstream_msgbuf_release(struct kbase_tlstream *stream, + unsigned long flags) __releases(&stream->lock); + +/** + * kbase_tlstream_flush_stream - flush stream + * @stream: Pointer to the stream structure + * + * Flush pending data in the timeline stream. + */ +void kbase_tlstream_flush_stream(struct kbase_tlstream *stream); + +#endif /* _KBASE_TLSTREAM_H */ + diff --git a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.c b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.c new file mode 100644 index 000000000000..e445a3a3d683 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.c @@ -0,0 +1,3194 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. + * DO NOT EDIT. + */ + +#include "mali_kbase_tracepoints.h" +#include "mali_kbase_tlstream.h" +#include "mali_kbase_tl_serialize.h" + +/* clang-format off */ + +/* Message ids of trace events that are recorded in the timeline stream. */ +enum tl_msg_id_obj { + KBASE_TL_NEW_CTX, + KBASE_TL_NEW_GPU, + KBASE_TL_NEW_LPU, + KBASE_TL_NEW_ATOM, + KBASE_TL_NEW_AS, + KBASE_TL_DEL_CTX, + KBASE_TL_DEL_ATOM, + KBASE_TL_LIFELINK_LPU_GPU, + KBASE_TL_LIFELINK_AS_GPU, + KBASE_TL_RET_CTX_LPU, + KBASE_TL_RET_ATOM_CTX, + KBASE_TL_RET_ATOM_LPU, + KBASE_TL_NRET_CTX_LPU, + KBASE_TL_NRET_ATOM_CTX, + KBASE_TL_NRET_ATOM_LPU, + KBASE_TL_RET_AS_CTX, + KBASE_TL_NRET_AS_CTX, + KBASE_TL_RET_ATOM_AS, + KBASE_TL_NRET_ATOM_AS, + KBASE_TL_ATTRIB_ATOM_CONFIG, + KBASE_TL_ATTRIB_ATOM_PRIORITY, + KBASE_TL_ATTRIB_ATOM_STATE, + KBASE_TL_ATTRIB_ATOM_PRIORITIZED, + KBASE_TL_ATTRIB_ATOM_JIT, + KBASE_TL_JIT_USEDPAGES, + KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, + KBASE_TL_ATTRIB_ATOM_JITFREEINFO, + KBASE_TL_ATTRIB_AS_CONFIG, + KBASE_TL_EVENT_LPU_SOFTSTOP, + KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, + KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, + KBASE_TL_EVENT_ATOM_SOFTJOB_START, + KBASE_TL_EVENT_ATOM_SOFTJOB_END, + KBASE_TL_EVENT_ARB_GRANTED, + KBASE_TL_EVENT_ARB_STARTED, + KBASE_TL_EVENT_ARB_STOP_REQUESTED, + KBASE_TL_EVENT_ARB_STOPPED, + KBASE_JD_GPU_SOFT_RESET, + KBASE_TL_KBASE_NEW_DEVICE, + KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, + KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, + KBASE_TL_KBASE_NEW_CTX, + KBASE_TL_KBASE_DEL_CTX, + KBASE_TL_KBASE_CTX_ASSIGN_AS, + KBASE_TL_KBASE_CTX_UNASSIGN_AS, + KBASE_TL_KBASE_NEW_KCPUQUEUE, + KBASE_TL_KBASE_DEL_KCPUQUEUE, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, + KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, + KBASE_OBJ_MSG_COUNT, +}; + +/* Message ids of trace events that are recorded in the auxiliary stream. */ +enum tl_msg_id_aux { + KBASE_AUX_PM_STATE, + KBASE_AUX_PAGEFAULT, + KBASE_AUX_PAGESALLOC, + KBASE_AUX_DEVFREQ_TARGET, + KBASE_AUX_PROTECTED_ENTER_START, + KBASE_AUX_PROTECTED_ENTER_END, + KBASE_AUX_PROTECTED_LEAVE_START, + KBASE_AUX_PROTECTED_LEAVE_END, + KBASE_AUX_JIT_STATS, + KBASE_AUX_EVENT_JOB_SLOT, + KBASE_AUX_MSG_COUNT, +}; + +#define OBJ_TP_LIST \ + TRACEPOINT_DESC(KBASE_TL_NEW_CTX, \ + "object ctx is created", \ + "@pII", \ + "ctx,ctx_nr,tgid") \ + TRACEPOINT_DESC(KBASE_TL_NEW_GPU, \ + "object gpu is created", \ + "@pII", \ + "gpu,gpu_id,core_count") \ + TRACEPOINT_DESC(KBASE_TL_NEW_LPU, \ + "object lpu is created", \ + "@pII", \ + "lpu,lpu_nr,lpu_fn") \ + TRACEPOINT_DESC(KBASE_TL_NEW_ATOM, \ + "object atom is created", \ + "@pI", \ + "atom,atom_nr") \ + TRACEPOINT_DESC(KBASE_TL_NEW_AS, \ + "address space object is created", \ + "@pI", \ + "address_space,as_nr") \ + TRACEPOINT_DESC(KBASE_TL_DEL_CTX, \ + "context is destroyed", \ + "@p", \ + "ctx") \ + TRACEPOINT_DESC(KBASE_TL_DEL_ATOM, \ + "atom is destroyed", \ + "@p", \ + "atom") \ + TRACEPOINT_DESC(KBASE_TL_LIFELINK_LPU_GPU, \ + "lpu is deleted with gpu", \ + "@pp", \ + "lpu,gpu") \ + TRACEPOINT_DESC(KBASE_TL_LIFELINK_AS_GPU, \ + "address space is deleted with gpu", \ + "@pp", \ + "address_space,gpu") \ + TRACEPOINT_DESC(KBASE_TL_RET_CTX_LPU, \ + "context is retained by lpu", \ + "@pp", \ + "ctx,lpu") \ + TRACEPOINT_DESC(KBASE_TL_RET_ATOM_CTX, \ + "atom is retained by context", \ + "@pp", \ + "atom,ctx") \ + TRACEPOINT_DESC(KBASE_TL_RET_ATOM_LPU, \ + "atom is retained by lpu", \ + "@pps", \ + "atom,lpu,attrib_match_list") \ + TRACEPOINT_DESC(KBASE_TL_NRET_CTX_LPU, \ + "context is released by lpu", \ + "@pp", \ + "ctx,lpu") \ + TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_CTX, \ + "atom is released by context", \ + "@pp", \ + "atom,ctx") \ + TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_LPU, \ + "atom is released by lpu", \ + "@pp", \ + "atom,lpu") \ + TRACEPOINT_DESC(KBASE_TL_RET_AS_CTX, \ + "address space is retained by context", \ + "@pp", \ + "address_space,ctx") \ + TRACEPOINT_DESC(KBASE_TL_NRET_AS_CTX, \ + "address space is released by context", \ + "@pp", \ + "address_space,ctx") \ + TRACEPOINT_DESC(KBASE_TL_RET_ATOM_AS, \ + "atom is retained by address space", \ + "@pp", \ + "atom,address_space") \ + TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_AS, \ + "atom is released by address space", \ + "@pp", \ + "atom,address_space") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_CONFIG, \ + "atom job slot attributes", \ + "@pLLI", \ + "atom,descriptor,affinity,config") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITY, \ + "atom priority", \ + "@pI", \ + "atom,prio") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_STATE, \ + "atom state", \ + "@pI", \ + "atom,state") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITIZED, \ + "atom caused priority change", \ + "@p", \ + "atom") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JIT, \ + "jit done for atom", \ + "@pLLILILLL", \ + "atom,edit_addr,new_addr,jit_flags,mem_flags,j_id,com_pgs,extent,va_pgs") \ + TRACEPOINT_DESC(KBASE_TL_JIT_USEDPAGES, \ + "used pages for jit", \ + "@LI", \ + "used_pages,j_id") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, \ + "Information about JIT allocations", \ + "@pLLLIIIII", \ + "atom,va_pgs,com_pgs,extent,j_id,bin_id,max_allocs,jit_flags,usg_id") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JITFREEINFO, \ + "Information about JIT frees", \ + "@pI", \ + "atom,j_id") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_AS_CONFIG, \ + "address space attributes", \ + "@pLLL", \ + "address_space,transtab,memattr,transcfg") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_LPU_SOFTSTOP, \ + "softstop event on given lpu", \ + "@p", \ + "lpu") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, \ + "atom softstopped", \ + "@p", \ + "atom") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, \ + "atom softstop issued", \ + "@p", \ + "atom") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_START, \ + "atom soft job has started", \ + "@p", \ + "atom") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_END, \ + "atom soft job has completed", \ + "@p", \ + "atom") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_GRANTED, \ + "Arbiter has granted gpu access", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_STARTED, \ + "Driver is running again and able to process jobs", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_STOP_REQUESTED, \ + "Arbiter has requested driver to stop using gpu", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_STOPPED, \ + "Driver has stopped using gpu", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_JD_GPU_SOFT_RESET, \ + "gpu soft reset", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_DEVICE, \ + "New KBase Device", \ + "@IIII", \ + "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \ + "CSG is programmed to a slot", \ + "@III", \ + "kbase_device_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \ + "CSG is deprogrammed from a slot", \ + "@II", \ + "kbase_device_id,kbase_device_csg_slot_index") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_CTX, \ + "New KBase Context", \ + "@II", \ + "kernel_ctx_id,kbase_device_id") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEL_CTX, \ + "Delete KBase Context", \ + "@I", \ + "kernel_ctx_id") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CTX_ASSIGN_AS, \ + "Address Space is assigned to a KBase context", \ + "@II", \ + "kernel_ctx_id,kbase_device_as_index") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CTX_UNASSIGN_AS, \ + "Address Space is unassigned from a KBase context", \ + "@I", \ + "kernel_ctx_id") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_KCPUQUEUE, \ + "New KCPU Queue", \ + "@pII", \ + "kcpu_queue,kernel_ctx_id,kcpuq_num_pending_cmds") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEL_KCPUQUEUE, \ + "Delete KCPU Queue", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, \ + "KCPU Queue enqueues Signal on Fence", \ + "@pp", \ + "kcpu_queue,fence") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, \ + "KCPU Queue enqueues Wait on Fence", \ + "@pp", \ + "kcpu_queue,fence") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ + "Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ + "Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ + "@pLI", \ + "kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ + "End array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, \ + "Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, \ + "Array item of KCPU Queue enqueues Set on Cross Queue Sync Object", \ + "@pL", \ + "kcpu_queue,cqs_obj_gpu_addr") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, \ + "End array of KCPU Queue enqueues Set on Cross Queue Sync Object", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ + "Begin array of KCPU Queue enqueues Debug Copy", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ + "Array item of KCPU Queue enqueues Debug Copy", \ + "@pL", \ + "kcpu_queue,debugcopy_dst_size") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ + "End array of KCPU Queue enqueues Debug Copy", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \ + "KCPU Queue enqueues Map Import", \ + "@pL", \ + "kcpu_queue,map_import_buf_gpu_addr") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, \ + "KCPU Queue enqueues Unmap Import", \ + "@pL", \ + "kcpu_queue,map_import_buf_gpu_addr") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, \ + "KCPU Queue enqueues Unmap Import ignoring reference count", \ + "@pL", \ + "kcpu_queue,map_import_buf_gpu_addr") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ + "Begin array of KCPU Queue enqueues JIT Alloc", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ + "Array item of KCPU Queue enqueues JIT Alloc", \ + "@pLLLLIIIII", \ + "kcpu_queue,jit_alloc_gpu_alloc_addr_dest,jit_alloc_va_pages,jit_alloc_commit_pages,jit_alloc_extent,jit_alloc_jit_id,jit_alloc_bin_id,jit_alloc_max_allocations,jit_alloc_flags,jit_alloc_usage_id") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ + "End array of KCPU Queue enqueues JIT Alloc", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, \ + "Begin array of KCPU Queue enqueues JIT Free", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, \ + "Array item of KCPU Queue enqueues JIT Free", \ + "@pI", \ + "kcpu_queue,jit_alloc_jit_id") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, \ + "End array of KCPU Queue enqueues JIT Free", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, \ + "KCPU Queue starts a Signal on Fence", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \ + "KCPU Queue ends a Signal on Fence", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \ + "KCPU Queue starts a Wait on Fence", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \ + "KCPU Queue ends a Wait on Fence", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \ + "KCPU Queue starts a Wait on an array of Cross Queue Sync Objects", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \ + "KCPU Queue ends a Wait on an array of Cross Queue Sync Objects", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, \ + "KCPU Queue executes a Set on an array of Cross Queue Sync Objects", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, \ + "KCPU Queue starts an array of Debug Copys", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, \ + "KCPU Queue ends an array of Debug Copys", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \ + "KCPU Queue starts a Map Import", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \ + "KCPU Queue ends a Map Import", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \ + "KCPU Queue starts an Unmap Import", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \ + "KCPU Queue ends an Unmap Import", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, \ + "KCPU Queue starts an Unmap Import ignoring reference count", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, \ + "KCPU Queue ends an Unmap Import ignoring reference count", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \ + "KCPU Queue starts an array of JIT Allocs", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ + "Begin array of KCPU Queue ends an array of JIT Allocs", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ + "Array item of KCPU Queue ends an array of JIT Allocs", \ + "@pLL", \ + "kcpu_queue,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ + "End array of KCPU Queue ends an array of JIT Allocs", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START, \ + "KCPU Queue starts an array of JIT Frees", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ + "Begin array of KCPU Queue ends an array of JIT Frees", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ + "Array item of KCPU Queue ends an array of JIT Frees", \ + "@pL", \ + "kcpu_queue,jit_free_pages_used") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ + "End array of KCPU Queue ends an array of JIT Frees", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, \ + "KCPU Queue executes an Error Barrier", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, \ + "An overflow has happened with the CSFFW Timeline stream", \ + "@LL", \ + "csffw_timestamp,csffw_cycle") \ + +#define MIPE_HEADER_BLOB_VAR_NAME __obj_desc_header +#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL +#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_OBJ +#define MIPE_HEADER_TRACEPOINT_LIST OBJ_TP_LIST +#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_OBJ_MSG_COUNT + +#include "mali_kbase_mipe_gen_header.h" + +const char *obj_desc_header = (const char *) &__obj_desc_header; +const size_t obj_desc_header_size = sizeof(__obj_desc_header); + +#define AUX_TP_LIST \ + TRACEPOINT_DESC(KBASE_AUX_PM_STATE, \ + "PM state", \ + "@IL", \ + "core_type,core_state_bitset") \ + TRACEPOINT_DESC(KBASE_AUX_PAGEFAULT, \ + "Page fault", \ + "@IIL", \ + "ctx_nr,as_nr,page_cnt_change") \ + TRACEPOINT_DESC(KBASE_AUX_PAGESALLOC, \ + "Total alloc pages change", \ + "@IL", \ + "ctx_nr,page_cnt") \ + TRACEPOINT_DESC(KBASE_AUX_DEVFREQ_TARGET, \ + "New device frequency target", \ + "@L", \ + "target_freq") \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_START, \ + "enter protected mode start", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_END, \ + "enter protected mode end", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_START, \ + "leave protected mode start", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_END, \ + "leave protected mode end", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_AUX_JIT_STATS, \ + "per-bin JIT statistics", \ + "@IIIIII", \ + "ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages") \ + TRACEPOINT_DESC(KBASE_AUX_EVENT_JOB_SLOT, \ + "event on a given job slot", \ + "@pIII", \ + "ctx,slot_nr,atom_nr,event") \ + +#define MIPE_HEADER_BLOB_VAR_NAME __aux_desc_header +#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL +#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_AUX +#define MIPE_HEADER_TRACEPOINT_LIST AUX_TP_LIST +#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_AUX_MSG_COUNT + +#include "mali_kbase_mipe_gen_header.h" + +const char *aux_desc_header = (const char *) &__aux_desc_header; +const size_t aux_desc_header_size = sizeof(__aux_desc_header); + +void __kbase_tlstream_tl_new_ctx( + struct kbase_tlstream *stream, + const void *ctx, + u32 ctx_nr, + u32 tgid) +{ + const u32 msg_id = KBASE_TL_NEW_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx) + + sizeof(ctx_nr) + + sizeof(tgid) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx_nr, sizeof(ctx_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &tgid, sizeof(tgid)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_new_gpu( + struct kbase_tlstream *stream, + const void *gpu, + u32 gpu_id, + u32 core_count) +{ + const u32 msg_id = KBASE_TL_NEW_GPU; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + + sizeof(gpu_id) + + sizeof(core_count) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu_id, sizeof(gpu_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &core_count, sizeof(core_count)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_new_lpu( + struct kbase_tlstream *stream, + const void *lpu, + u32 lpu_nr, + u32 lpu_fn) +{ + const u32 msg_id = KBASE_TL_NEW_LPU; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(lpu) + + sizeof(lpu_nr) + + sizeof(lpu_fn) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu, sizeof(lpu)); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu_nr, sizeof(lpu_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu_fn, sizeof(lpu_fn)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_new_atom( + struct kbase_tlstream *stream, + const void *atom, + u32 atom_nr) +{ + const u32 msg_id = KBASE_TL_NEW_ATOM; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(atom_nr) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &atom_nr, sizeof(atom_nr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_new_as( + struct kbase_tlstream *stream, + const void *address_space, + u32 as_nr) +{ + const u32 msg_id = KBASE_TL_NEW_AS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(address_space) + + sizeof(as_nr) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &address_space, sizeof(address_space)); + pos = kbasep_serialize_bytes(buffer, + pos, &as_nr, sizeof(as_nr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_del_ctx( + struct kbase_tlstream *stream, + const void *ctx) +{ + const u32 msg_id = KBASE_TL_DEL_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_del_atom( + struct kbase_tlstream *stream, + const void *atom) +{ + const u32 msg_id = KBASE_TL_DEL_ATOM; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_lifelink_lpu_gpu( + struct kbase_tlstream *stream, + const void *lpu, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(lpu) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu, sizeof(lpu)); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_lifelink_as_gpu( + struct kbase_tlstream *stream, + const void *address_space, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(address_space) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &address_space, sizeof(address_space)); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_ret_ctx_lpu( + struct kbase_tlstream *stream, + const void *ctx, + const void *lpu) +{ + const u32 msg_id = KBASE_TL_RET_CTX_LPU; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx) + + sizeof(lpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu, sizeof(lpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_ret_atom_ctx( + struct kbase_tlstream *stream, + const void *atom, + const void *ctx) +{ + const u32 msg_id = KBASE_TL_RET_ATOM_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(ctx) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_ret_atom_lpu( + struct kbase_tlstream *stream, + const void *atom, + const void *lpu, + const char *attrib_match_list) +{ + const u32 msg_id = KBASE_TL_RET_ATOM_LPU; + const size_t s0 = sizeof(u32) + sizeof(char) + + strnlen(attrib_match_list, STRLEN_MAX); + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(lpu) + + s0 + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu, sizeof(lpu)); + pos = kbasep_serialize_string(buffer, + pos, attrib_match_list, s0); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_nret_ctx_lpu( + struct kbase_tlstream *stream, + const void *ctx, + const void *lpu) +{ + const u32 msg_id = KBASE_TL_NRET_CTX_LPU; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx) + + sizeof(lpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu, sizeof(lpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_nret_atom_ctx( + struct kbase_tlstream *stream, + const void *atom, + const void *ctx) +{ + const u32 msg_id = KBASE_TL_NRET_ATOM_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(ctx) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_nret_atom_lpu( + struct kbase_tlstream *stream, + const void *atom, + const void *lpu) +{ + const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(lpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu, sizeof(lpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_ret_as_ctx( + struct kbase_tlstream *stream, + const void *address_space, + const void *ctx) +{ + const u32 msg_id = KBASE_TL_RET_AS_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(address_space) + + sizeof(ctx) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &address_space, sizeof(address_space)); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_nret_as_ctx( + struct kbase_tlstream *stream, + const void *address_space, + const void *ctx) +{ + const u32 msg_id = KBASE_TL_NRET_AS_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(address_space) + + sizeof(ctx) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &address_space, sizeof(address_space)); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_ret_atom_as( + struct kbase_tlstream *stream, + const void *atom, + const void *address_space) +{ + const u32 msg_id = KBASE_TL_RET_ATOM_AS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(address_space) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &address_space, sizeof(address_space)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_nret_atom_as( + struct kbase_tlstream *stream, + const void *atom, + const void *address_space) +{ + const u32 msg_id = KBASE_TL_NRET_ATOM_AS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(address_space) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &address_space, sizeof(address_space)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_atom_config( + struct kbase_tlstream *stream, + const void *atom, + u64 descriptor, + u64 affinity, + u32 config) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(descriptor) + + sizeof(affinity) + + sizeof(config) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &descriptor, sizeof(descriptor)); + pos = kbasep_serialize_bytes(buffer, + pos, &affinity, sizeof(affinity)); + pos = kbasep_serialize_bytes(buffer, + pos, &config, sizeof(config)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_atom_priority( + struct kbase_tlstream *stream, + const void *atom, + u32 prio) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(prio) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &prio, sizeof(prio)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_atom_state( + struct kbase_tlstream *stream, + const void *atom, + u32 state) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_STATE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(state) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &state, sizeof(state)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_atom_prioritized( + struct kbase_tlstream *stream, + const void *atom) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITIZED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_atom_jit( + struct kbase_tlstream *stream, + const void *atom, + u64 edit_addr, + u64 new_addr, + u32 jit_flags, + u64 mem_flags, + u32 j_id, + u64 com_pgs, + u64 extent, + u64 va_pgs) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JIT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(edit_addr) + + sizeof(new_addr) + + sizeof(jit_flags) + + sizeof(mem_flags) + + sizeof(j_id) + + sizeof(com_pgs) + + sizeof(extent) + + sizeof(va_pgs) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &edit_addr, sizeof(edit_addr)); + pos = kbasep_serialize_bytes(buffer, + pos, &new_addr, sizeof(new_addr)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_flags, sizeof(jit_flags)); + pos = kbasep_serialize_bytes(buffer, + pos, &mem_flags, sizeof(mem_flags)); + pos = kbasep_serialize_bytes(buffer, + pos, &j_id, sizeof(j_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &com_pgs, sizeof(com_pgs)); + pos = kbasep_serialize_bytes(buffer, + pos, &extent, sizeof(extent)); + pos = kbasep_serialize_bytes(buffer, + pos, &va_pgs, sizeof(va_pgs)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_jit_usedpages( + struct kbase_tlstream *stream, + u64 used_pages, + u32 j_id) +{ + const u32 msg_id = KBASE_TL_JIT_USEDPAGES; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(used_pages) + + sizeof(j_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &used_pages, sizeof(used_pages)); + pos = kbasep_serialize_bytes(buffer, + pos, &j_id, sizeof(j_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_atom_jitallocinfo( + struct kbase_tlstream *stream, + const void *atom, + u64 va_pgs, + u64 com_pgs, + u64 extent, + u32 j_id, + u32 bin_id, + u32 max_allocs, + u32 jit_flags, + u32 usg_id) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITALLOCINFO; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(va_pgs) + + sizeof(com_pgs) + + sizeof(extent) + + sizeof(j_id) + + sizeof(bin_id) + + sizeof(max_allocs) + + sizeof(jit_flags) + + sizeof(usg_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &va_pgs, sizeof(va_pgs)); + pos = kbasep_serialize_bytes(buffer, + pos, &com_pgs, sizeof(com_pgs)); + pos = kbasep_serialize_bytes(buffer, + pos, &extent, sizeof(extent)); + pos = kbasep_serialize_bytes(buffer, + pos, &j_id, sizeof(j_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &bin_id, sizeof(bin_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &max_allocs, sizeof(max_allocs)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_flags, sizeof(jit_flags)); + pos = kbasep_serialize_bytes(buffer, + pos, &usg_id, sizeof(usg_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_atom_jitfreeinfo( + struct kbase_tlstream *stream, + const void *atom, + u32 j_id) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITFREEINFO; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(j_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &j_id, sizeof(j_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_as_config( + struct kbase_tlstream *stream, + const void *address_space, + u64 transtab, + u64 memattr, + u64 transcfg) +{ + const u32 msg_id = KBASE_TL_ATTRIB_AS_CONFIG; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(address_space) + + sizeof(transtab) + + sizeof(memattr) + + sizeof(transcfg) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &address_space, sizeof(address_space)); + pos = kbasep_serialize_bytes(buffer, + pos, &transtab, sizeof(transtab)); + pos = kbasep_serialize_bytes(buffer, + pos, &memattr, sizeof(memattr)); + pos = kbasep_serialize_bytes(buffer, + pos, &transcfg, sizeof(transcfg)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_lpu_softstop( + struct kbase_tlstream *stream, + const void *lpu) +{ + const u32 msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(lpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu, sizeof(lpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_atom_softstop_ex( + struct kbase_tlstream *stream, + const void *atom) +{ + const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_atom_softstop_issue( + struct kbase_tlstream *stream, + const void *atom) +{ + const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_atom_softjob_start( + struct kbase_tlstream *stream, + const void *atom) +{ + const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_atom_softjob_end( + struct kbase_tlstream *stream, + const void *atom) +{ + const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_arb_granted( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_EVENT_ARB_GRANTED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_arb_started( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_EVENT_ARB_STARTED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_arb_stop_requested( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_EVENT_ARB_STOP_REQUESTED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_arb_stopped( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_EVENT_ARB_STOPPED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_jd_gpu_soft_reset( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_pm_state( + struct kbase_tlstream *stream, + u32 core_type, + u64 core_state_bitset) +{ + const u32 msg_id = KBASE_AUX_PM_STATE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(core_type) + + sizeof(core_state_bitset) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &core_type, sizeof(core_type)); + pos = kbasep_serialize_bytes(buffer, + pos, &core_state_bitset, sizeof(core_state_bitset)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_pagefault( + struct kbase_tlstream *stream, + u32 ctx_nr, + u32 as_nr, + u64 page_cnt_change) +{ + const u32 msg_id = KBASE_AUX_PAGEFAULT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx_nr) + + sizeof(as_nr) + + sizeof(page_cnt_change) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx_nr, sizeof(ctx_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &as_nr, sizeof(as_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &page_cnt_change, sizeof(page_cnt_change)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_pagesalloc( + struct kbase_tlstream *stream, + u32 ctx_nr, + u64 page_cnt) +{ + const u32 msg_id = KBASE_AUX_PAGESALLOC; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx_nr) + + sizeof(page_cnt) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx_nr, sizeof(ctx_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &page_cnt, sizeof(page_cnt)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_devfreq_target( + struct kbase_tlstream *stream, + u64 target_freq) +{ + const u32 msg_id = KBASE_AUX_DEVFREQ_TARGET; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(target_freq) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &target_freq, sizeof(target_freq)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_protected_enter_start( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_protected_enter_end( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_protected_leave_start( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_protected_leave_end( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_jit_stats( + struct kbase_tlstream *stream, + u32 ctx_nr, + u32 bid, + u32 max_allocs, + u32 allocs, + u32 va_pages, + u32 ph_pages) +{ + const u32 msg_id = KBASE_AUX_JIT_STATS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx_nr) + + sizeof(bid) + + sizeof(max_allocs) + + sizeof(allocs) + + sizeof(va_pages) + + sizeof(ph_pages) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx_nr, sizeof(ctx_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &bid, sizeof(bid)); + pos = kbasep_serialize_bytes(buffer, + pos, &max_allocs, sizeof(max_allocs)); + pos = kbasep_serialize_bytes(buffer, + pos, &allocs, sizeof(allocs)); + pos = kbasep_serialize_bytes(buffer, + pos, &va_pages, sizeof(va_pages)); + pos = kbasep_serialize_bytes(buffer, + pos, &ph_pages, sizeof(ph_pages)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_event_job_slot( + struct kbase_tlstream *stream, + const void *ctx, + u32 slot_nr, + u32 atom_nr, + u32 event) +{ + const u32 msg_id = KBASE_AUX_EVENT_JOB_SLOT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx) + + sizeof(slot_nr) + + sizeof(atom_nr) + + sizeof(event) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + pos = kbasep_serialize_bytes(buffer, + pos, &slot_nr, sizeof(slot_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &atom_nr, sizeof(atom_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &event, sizeof(event)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_new_device( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_gpu_core_count, + u32 kbase_device_max_num_csgs, + u32 kbase_device_as_count) +{ + const u32 msg_id = KBASE_TL_KBASE_NEW_DEVICE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kbase_device_id) + + sizeof(kbase_device_gpu_core_count) + + sizeof(kbase_device_max_num_csgs) + + sizeof(kbase_device_as_count) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_id, sizeof(kbase_device_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_gpu_core_count, sizeof(kbase_device_gpu_core_count)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_max_num_csgs, sizeof(kbase_device_max_num_csgs)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_as_count, sizeof(kbase_device_as_count)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_device_program_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 gpu_cmdq_grp_handle, + u32 kbase_device_csg_slot_index) +{ + const u32 msg_id = KBASE_TL_KBASE_DEVICE_PROGRAM_CSG; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kbase_device_id) + + sizeof(gpu_cmdq_grp_handle) + + sizeof(kbase_device_csg_slot_index) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_id, sizeof(kbase_device_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_device_deprogram_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_csg_slot_index) +{ + const u32 msg_id = KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kbase_device_id) + + sizeof(kbase_device_csg_slot_index) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_id, sizeof(kbase_device_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_new_ctx( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 kbase_device_id) +{ + const u32 msg_id = KBASE_TL_KBASE_NEW_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + + sizeof(kbase_device_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_id, sizeof(kbase_device_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_del_ctx( + struct kbase_tlstream *stream, + u32 kernel_ctx_id) +{ + const u32 msg_id = KBASE_TL_KBASE_DEL_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_ctx_assign_as( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 kbase_device_as_index) +{ + const u32 msg_id = KBASE_TL_KBASE_CTX_ASSIGN_AS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + + sizeof(kbase_device_as_index) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_as_index, sizeof(kbase_device_as_index)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_ctx_unassign_as( + struct kbase_tlstream *stream, + u32 kernel_ctx_id) +{ + const u32 msg_id = KBASE_TL_KBASE_CTX_UNASSIGN_AS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_new_kcpuqueue( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 kernel_ctx_id, + u32 kcpuq_num_pending_cmds) +{ + const u32 msg_id = KBASE_TL_KBASE_NEW_KCPUQUEUE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(kernel_ctx_id) + + sizeof(kcpuq_num_pending_cmds) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpuq_num_pending_cmds, sizeof(kcpuq_num_pending_cmds)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_del_kcpuqueue( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_DEL_KCPUQUEUE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal( + struct kbase_tlstream *stream, + const void *kcpu_queue, + const void *fence) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(fence) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &fence, sizeof(fence)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue, + const void *fence) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(fence) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &fence, sizeof(fence)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr, + u32 cqs_obj_compare_value) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(cqs_obj_gpu_addr) + + sizeof(cqs_obj_compare_value) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); + pos = kbasep_serialize_bytes(buffer, + pos, &cqs_obj_compare_value, sizeof(cqs_obj_compare_value)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(cqs_obj_gpu_addr) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_debugcopy( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_debugcopy( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 debugcopy_dst_size) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(debugcopy_dst_size) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &debugcopy_dst_size, sizeof(debugcopy_dst_size)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_debugcopy( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(map_import_buf_gpu_addr) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(map_import_buf_gpu_addr) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(map_import_buf_gpu_addr) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 jit_alloc_gpu_alloc_addr_dest, + u64 jit_alloc_va_pages, + u64 jit_alloc_commit_pages, + u64 jit_alloc_extent, + u32 jit_alloc_jit_id, + u32 jit_alloc_bin_id, + u32 jit_alloc_max_allocations, + u32 jit_alloc_flags, + u32 jit_alloc_usage_id) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(jit_alloc_gpu_alloc_addr_dest) + + sizeof(jit_alloc_va_pages) + + sizeof(jit_alloc_commit_pages) + + sizeof(jit_alloc_extent) + + sizeof(jit_alloc_jit_id) + + sizeof(jit_alloc_bin_id) + + sizeof(jit_alloc_max_allocations) + + sizeof(jit_alloc_flags) + + sizeof(jit_alloc_usage_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_gpu_alloc_addr_dest, sizeof(jit_alloc_gpu_alloc_addr_dest)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_va_pages, sizeof(jit_alloc_va_pages)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_commit_pages, sizeof(jit_alloc_commit_pages)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_extent, sizeof(jit_alloc_extent)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_jit_id, sizeof(jit_alloc_jit_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_bin_id, sizeof(jit_alloc_bin_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_max_allocations, sizeof(jit_alloc_max_allocations)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_flags, sizeof(jit_alloc_flags)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_usage_id, sizeof(jit_alloc_usage_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 jit_alloc_jit_id) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(jit_alloc_jit_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_jit_id, sizeof(jit_alloc_jit_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 jit_alloc_gpu_alloc_addr, + u64 jit_alloc_mmu_flags) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(jit_alloc_gpu_alloc_addr) + + sizeof(jit_alloc_mmu_flags) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_gpu_alloc_addr, sizeof(jit_alloc_gpu_alloc_addr)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_mmu_flags, sizeof(jit_alloc_mmu_flags)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 jit_free_pages_used) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(jit_free_pages_used) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_free_pages_used, sizeof(jit_free_pages_used)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( + struct kbase_tlstream *stream, + u64 csffw_timestamp, + u64 csffw_cycle) +{ + const u32 msg_id = KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(csffw_timestamp) + + sizeof(csffw_cycle) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &csffw_timestamp, sizeof(csffw_timestamp)); + pos = kbasep_serialize_bytes(buffer, + pos, &csffw_cycle, sizeof(csffw_cycle)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +/* clang-format on */ diff --git a/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.h b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.h new file mode 100644 index 000000000000..7ea8ba8d0200 --- /dev/null +++ b/drivers/gpu/arm/b_r26p0/tl/mali_kbase_tracepoints.h @@ -0,0 +1,2381 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. + * DO NOT EDIT. + */ + +#if !defined(_KBASE_TRACEPOINTS_H) +#define _KBASE_TRACEPOINTS_H + +/* Tracepoints are abstract callbacks notifying that some important + * software or hardware event has happened. + * + * In this particular implementation, it results into a MIPE + * timeline event and, in some cases, it also fires an ftrace event + * (a.k.a. Gator events, see details below). + */ + +#include "mali_kbase.h" +#include "mali_kbase_gator.h" + +#include +#include + +/* clang-format off */ + +struct kbase_tlstream; + +extern const size_t __obj_stream_offset; +extern const size_t __aux_stream_offset; + +/* This macro dispatches a kbase_tlstream from + * a kbase_device instance. Only AUX or OBJ + * streams can be dispatched. It is aware of + * kbase_timeline binary representation and + * relies on offset variables: + * __obj_stream_offset and __aux_stream_offset. + */ +#define __TL_DISPATCH_STREAM(kbdev, stype) \ + ((struct kbase_tlstream *) \ + ((u8 *)kbdev->timeline + __ ## stype ## _stream_offset)) + +struct tp_desc; + +/* Descriptors of timeline messages transmitted in object events stream. */ +extern const char *obj_desc_header; +extern const size_t obj_desc_header_size; +/* Descriptors of timeline messages transmitted in auxiliary events stream. */ +extern const char *aux_desc_header; +extern const size_t aux_desc_header_size; + +#define TL_ATOM_STATE_IDLE 0 +#define TL_ATOM_STATE_READY 1 +#define TL_ATOM_STATE_DONE 2 +#define TL_ATOM_STATE_POSTED 3 + +#define TL_JS_EVENT_START GATOR_JOB_SLOT_START +#define TL_JS_EVENT_STOP GATOR_JOB_SLOT_STOP +#define TL_JS_EVENT_SOFT_STOP GATOR_JOB_SLOT_SOFT_STOPPED + +#define TLSTREAM_ENABLED (1 << 31) + +void __kbase_tlstream_tl_new_ctx( + struct kbase_tlstream *stream, + const void *ctx, + u32 ctx_nr, + u32 tgid); +void __kbase_tlstream_tl_new_gpu( + struct kbase_tlstream *stream, + const void *gpu, + u32 gpu_id, + u32 core_count); +void __kbase_tlstream_tl_new_lpu( + struct kbase_tlstream *stream, + const void *lpu, + u32 lpu_nr, + u32 lpu_fn); +void __kbase_tlstream_tl_new_atom( + struct kbase_tlstream *stream, + const void *atom, + u32 atom_nr); +void __kbase_tlstream_tl_new_as( + struct kbase_tlstream *stream, + const void *address_space, + u32 as_nr); +void __kbase_tlstream_tl_del_ctx( + struct kbase_tlstream *stream, + const void *ctx); +void __kbase_tlstream_tl_del_atom( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_lifelink_lpu_gpu( + struct kbase_tlstream *stream, + const void *lpu, + const void *gpu); +void __kbase_tlstream_tl_lifelink_as_gpu( + struct kbase_tlstream *stream, + const void *address_space, + const void *gpu); +void __kbase_tlstream_tl_ret_ctx_lpu( + struct kbase_tlstream *stream, + const void *ctx, + const void *lpu); +void __kbase_tlstream_tl_ret_atom_ctx( + struct kbase_tlstream *stream, + const void *atom, + const void *ctx); +void __kbase_tlstream_tl_ret_atom_lpu( + struct kbase_tlstream *stream, + const void *atom, + const void *lpu, + const char *attrib_match_list); +void __kbase_tlstream_tl_nret_ctx_lpu( + struct kbase_tlstream *stream, + const void *ctx, + const void *lpu); +void __kbase_tlstream_tl_nret_atom_ctx( + struct kbase_tlstream *stream, + const void *atom, + const void *ctx); +void __kbase_tlstream_tl_nret_atom_lpu( + struct kbase_tlstream *stream, + const void *atom, + const void *lpu); +void __kbase_tlstream_tl_ret_as_ctx( + struct kbase_tlstream *stream, + const void *address_space, + const void *ctx); +void __kbase_tlstream_tl_nret_as_ctx( + struct kbase_tlstream *stream, + const void *address_space, + const void *ctx); +void __kbase_tlstream_tl_ret_atom_as( + struct kbase_tlstream *stream, + const void *atom, + const void *address_space); +void __kbase_tlstream_tl_nret_atom_as( + struct kbase_tlstream *stream, + const void *atom, + const void *address_space); +void __kbase_tlstream_tl_attrib_atom_config( + struct kbase_tlstream *stream, + const void *atom, + u64 descriptor, + u64 affinity, + u32 config); +void __kbase_tlstream_tl_attrib_atom_priority( + struct kbase_tlstream *stream, + const void *atom, + u32 prio); +void __kbase_tlstream_tl_attrib_atom_state( + struct kbase_tlstream *stream, + const void *atom, + u32 state); +void __kbase_tlstream_tl_attrib_atom_prioritized( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_attrib_atom_jit( + struct kbase_tlstream *stream, + const void *atom, + u64 edit_addr, + u64 new_addr, + u32 jit_flags, + u64 mem_flags, + u32 j_id, + u64 com_pgs, + u64 extent, + u64 va_pgs); +void __kbase_tlstream_tl_jit_usedpages( + struct kbase_tlstream *stream, + u64 used_pages, + u32 j_id); +void __kbase_tlstream_tl_attrib_atom_jitallocinfo( + struct kbase_tlstream *stream, + const void *atom, + u64 va_pgs, + u64 com_pgs, + u64 extent, + u32 j_id, + u32 bin_id, + u32 max_allocs, + u32 jit_flags, + u32 usg_id); +void __kbase_tlstream_tl_attrib_atom_jitfreeinfo( + struct kbase_tlstream *stream, + const void *atom, + u32 j_id); +void __kbase_tlstream_tl_attrib_as_config( + struct kbase_tlstream *stream, + const void *address_space, + u64 transtab, + u64 memattr, + u64 transcfg); +void __kbase_tlstream_tl_event_lpu_softstop( + struct kbase_tlstream *stream, + const void *lpu); +void __kbase_tlstream_tl_event_atom_softstop_ex( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_event_atom_softstop_issue( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_event_atom_softjob_start( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_event_atom_softjob_end( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_event_arb_granted( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_tl_event_arb_started( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_tl_event_arb_stop_requested( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_tl_event_arb_stopped( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_jd_gpu_soft_reset( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_aux_pm_state( + struct kbase_tlstream *stream, + u32 core_type, + u64 core_state_bitset); +void __kbase_tlstream_aux_pagefault( + struct kbase_tlstream *stream, + u32 ctx_nr, + u32 as_nr, + u64 page_cnt_change); +void __kbase_tlstream_aux_pagesalloc( + struct kbase_tlstream *stream, + u32 ctx_nr, + u64 page_cnt); +void __kbase_tlstream_aux_devfreq_target( + struct kbase_tlstream *stream, + u64 target_freq); +void __kbase_tlstream_aux_protected_enter_start( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_aux_protected_enter_end( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_aux_protected_leave_start( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_aux_protected_leave_end( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_aux_jit_stats( + struct kbase_tlstream *stream, + u32 ctx_nr, + u32 bid, + u32 max_allocs, + u32 allocs, + u32 va_pages, + u32 ph_pages); +void __kbase_tlstream_aux_event_job_slot( + struct kbase_tlstream *stream, + const void *ctx, + u32 slot_nr, + u32 atom_nr, + u32 event); +void __kbase_tlstream_tl_kbase_new_device( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_gpu_core_count, + u32 kbase_device_max_num_csgs, + u32 kbase_device_as_count); +void __kbase_tlstream_tl_kbase_device_program_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 gpu_cmdq_grp_handle, + u32 kbase_device_csg_slot_index); +void __kbase_tlstream_tl_kbase_device_deprogram_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_csg_slot_index); +void __kbase_tlstream_tl_kbase_new_ctx( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 kbase_device_id); +void __kbase_tlstream_tl_kbase_del_ctx( + struct kbase_tlstream *stream, + u32 kernel_ctx_id); +void __kbase_tlstream_tl_kbase_ctx_assign_as( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 kbase_device_as_index); +void __kbase_tlstream_tl_kbase_ctx_unassign_as( + struct kbase_tlstream *stream, + u32 kernel_ctx_id); +void __kbase_tlstream_tl_kbase_new_kcpuqueue( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 kernel_ctx_id, + u32 kcpuq_num_pending_cmds); +void __kbase_tlstream_tl_kbase_del_kcpuqueue( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal( + struct kbase_tlstream *stream, + const void *kcpu_queue, + const void *fence); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue, + const void *fence); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr, + u32 cqs_obj_compare_value); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_debugcopy( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_debugcopy( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 debugcopy_dst_size); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_debugcopy( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 jit_alloc_gpu_alloc_addr_dest, + u64 jit_alloc_va_pages, + u64 jit_alloc_commit_pages, + u64 jit_alloc_extent, + u32 jit_alloc_jit_id, + u32 jit_alloc_bin_id, + u32 jit_alloc_max_allocations, + u32 jit_alloc_flags, + u32 jit_alloc_usage_id); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 jit_alloc_jit_id); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 jit_alloc_gpu_alloc_addr, + u64 jit_alloc_mmu_flags); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 jit_free_pages_used); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( + struct kbase_tlstream *stream, + u64 csffw_timestamp, + u64 csffw_cycle); + +struct kbase_tlstream; + +/** + * KBASE_TLSTREAM_TL_NEW_CTX - + * object ctx is created + * + * @kbdev: Kbase device + * @ctx: Name of the context object + * @ctx_nr: Kernel context number + * @tgid: Thread Group Id + */ +#define KBASE_TLSTREAM_TL_NEW_CTX( \ + kbdev, \ + ctx, \ + ctx_nr, \ + tgid \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_new_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + ctx, ctx_nr, tgid); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NEW_GPU - + * object gpu is created + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + * @gpu_id: Name of the GPU object + * @core_count: Number of cores this GPU hosts + */ +#define KBASE_TLSTREAM_TL_NEW_GPU( \ + kbdev, \ + gpu, \ + gpu_id, \ + core_count \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_new_gpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu, gpu_id, core_count); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NEW_LPU - + * object lpu is created + * + * @kbdev: Kbase device + * @lpu: Name of the Logical Processing Unit object + * @lpu_nr: Sequential number assigned to the newly created LPU + * @lpu_fn: Property describing functional abilities of this LPU + */ +#define KBASE_TLSTREAM_TL_NEW_LPU( \ + kbdev, \ + lpu, \ + lpu_nr, \ + lpu_fn \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_new_lpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + lpu, lpu_nr, lpu_fn); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NEW_ATOM - + * object atom is created + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @atom_nr: Sequential number of an atom + */ +#define KBASE_TLSTREAM_TL_NEW_ATOM( \ + kbdev, \ + atom, \ + atom_nr \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_new_atom( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, atom_nr); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NEW_AS - + * address space object is created + * + * @kbdev: Kbase device + * @address_space: Name of the address space object + * @as_nr: Address space number + */ +#define KBASE_TLSTREAM_TL_NEW_AS( \ + kbdev, \ + address_space, \ + as_nr \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_new_as( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + address_space, as_nr); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_DEL_CTX - + * context is destroyed + * + * @kbdev: Kbase device + * @ctx: Name of the context object + */ +#define KBASE_TLSTREAM_TL_DEL_CTX( \ + kbdev, \ + ctx \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_del_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + ctx); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_DEL_ATOM - + * atom is destroyed + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_DEL_ATOM( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_del_atom( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU - + * lpu is deleted with gpu + * + * @kbdev: Kbase device + * @lpu: Name of the Logical Processing Unit object + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU( \ + kbdev, \ + lpu, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_lifelink_lpu_gpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + lpu, gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_LIFELINK_AS_GPU - + * address space is deleted with gpu + * + * @kbdev: Kbase device + * @address_space: Name of the address space object + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_LIFELINK_AS_GPU( \ + kbdev, \ + address_space, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_lifelink_as_gpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + address_space, gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_RET_CTX_LPU - + * context is retained by lpu + * + * @kbdev: Kbase device + * @ctx: Name of the context object + * @lpu: Name of the Logical Processing Unit object + */ +#define KBASE_TLSTREAM_TL_RET_CTX_LPU( \ + kbdev, \ + ctx, \ + lpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_ret_ctx_lpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + ctx, lpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_RET_ATOM_CTX - + * atom is retained by context + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @ctx: Name of the context object + */ +#define KBASE_TLSTREAM_TL_RET_ATOM_CTX( \ + kbdev, \ + atom, \ + ctx \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_ret_atom_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, ctx); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_RET_ATOM_LPU - + * atom is retained by lpu + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @lpu: Name of the Logical Processing Unit object + * @attrib_match_list: List containing match operator attributes + */ +#define KBASE_TLSTREAM_TL_RET_ATOM_LPU( \ + kbdev, \ + atom, \ + lpu, \ + attrib_match_list \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_ret_atom_lpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, lpu, attrib_match_list); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NRET_CTX_LPU - + * context is released by lpu + * + * @kbdev: Kbase device + * @ctx: Name of the context object + * @lpu: Name of the Logical Processing Unit object + */ +#define KBASE_TLSTREAM_TL_NRET_CTX_LPU( \ + kbdev, \ + ctx, \ + lpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_nret_ctx_lpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + ctx, lpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - + * atom is released by context + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @ctx: Name of the context object + */ +#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX( \ + kbdev, \ + atom, \ + ctx \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_nret_atom_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, ctx); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - + * atom is released by lpu + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @lpu: Name of the Logical Processing Unit object + */ +#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU( \ + kbdev, \ + atom, \ + lpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_nret_atom_lpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, lpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_RET_AS_CTX - + * address space is retained by context + * + * @kbdev: Kbase device + * @address_space: Name of the address space object + * @ctx: Name of the context object + */ +#define KBASE_TLSTREAM_TL_RET_AS_CTX( \ + kbdev, \ + address_space, \ + ctx \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_ret_as_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + address_space, ctx); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NRET_AS_CTX - + * address space is released by context + * + * @kbdev: Kbase device + * @address_space: Name of the address space object + * @ctx: Name of the context object + */ +#define KBASE_TLSTREAM_TL_NRET_AS_CTX( \ + kbdev, \ + address_space, \ + ctx \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_nret_as_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + address_space, ctx); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_RET_ATOM_AS - + * atom is retained by address space + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @address_space: Name of the address space object + */ +#define KBASE_TLSTREAM_TL_RET_ATOM_AS( \ + kbdev, \ + atom, \ + address_space \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_ret_atom_as( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, address_space); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NRET_ATOM_AS - + * atom is released by address space + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @address_space: Name of the address space object + */ +#define KBASE_TLSTREAM_TL_NRET_ATOM_AS( \ + kbdev, \ + atom, \ + address_space \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_nret_atom_as( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, address_space); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - + * atom job slot attributes + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @descriptor: Job descriptor address + * @affinity: Job affinity + * @config: Job config + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG( \ + kbdev, \ + atom, \ + descriptor, \ + affinity, \ + config \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_attrib_atom_config( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, descriptor, affinity, config); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - + * atom priority + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @prio: Atom priority + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY( \ + kbdev, \ + atom, \ + prio \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_tl_attrib_atom_priority( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, prio); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - + * atom state + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @state: Atom state + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE( \ + kbdev, \ + atom, \ + state \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_tl_attrib_atom_state( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, state); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED - + * atom caused priority change + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_tl_attrib_atom_prioritized( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - + * jit done for atom + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @edit_addr: Address edited by jit + * @new_addr: Address placed into the edited location + * @jit_flags: Flags specifying the special requirements for + * the JIT allocation. + * @mem_flags: Flags defining the properties of a memory region + * @j_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + * @com_pgs: The minimum number of physical pages which + * should back the allocation. + * @extent: Granularity of physical pages to grow the + * allocation by during a fault. + * @va_pgs: The minimum number of virtual pages required + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT( \ + kbdev, \ + atom, \ + edit_addr, \ + new_addr, \ + jit_flags, \ + mem_flags, \ + j_id, \ + com_pgs, \ + extent, \ + va_pgs \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED) \ + __kbase_tlstream_tl_attrib_atom_jit( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, edit_addr, new_addr, jit_flags, mem_flags, j_id, com_pgs, extent, va_pgs); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_JIT_USEDPAGES - + * used pages for jit + * + * @kbdev: Kbase device + * @used_pages: Number of pages used for jit + * @j_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + */ +#define KBASE_TLSTREAM_TL_JIT_USEDPAGES( \ + kbdev, \ + used_pages, \ + j_id \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_jit_usedpages( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + used_pages, j_id); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO - + * Information about JIT allocations + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @va_pgs: The minimum number of virtual pages required + * @com_pgs: The minimum number of physical pages which + * should back the allocation. + * @extent: Granularity of physical pages to grow the + * allocation by during a fault. + * @j_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + * @bin_id: The JIT allocation bin, used in conjunction with + * max_allocations to limit the number of each + * type of JIT allocation. + * @max_allocs: Maximum allocations allowed in this bin. + * @jit_flags: Flags specifying the special requirements for + * the JIT allocation. + * @usg_id: A hint about which allocation should be reused. + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO( \ + kbdev, \ + atom, \ + va_pgs, \ + com_pgs, \ + extent, \ + j_id, \ + bin_id, \ + max_allocs, \ + jit_flags, \ + usg_id \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_attrib_atom_jitallocinfo( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, va_pgs, com_pgs, extent, j_id, bin_id, max_allocs, jit_flags, usg_id); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO - + * Information about JIT frees + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @j_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO( \ + kbdev, \ + atom, \ + j_id \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_attrib_atom_jitfreeinfo( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, j_id); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - + * address space attributes + * + * @kbdev: Kbase device + * @address_space: Name of the address space object + * @transtab: Configuration of the TRANSTAB register + * @memattr: Configuration of the MEMATTR register + * @transcfg: Configuration of the TRANSCFG register (or zero if not present) + */ +#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG( \ + kbdev, \ + address_space, \ + transtab, \ + memattr, \ + transcfg \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_attrib_as_config( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + address_space, transtab, memattr, transcfg); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP - + * softstop event on given lpu + * + * @kbdev: Kbase device + * @lpu: Name of the Logical Processing Unit object + */ +#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( \ + kbdev, \ + lpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_lpu_softstop( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + lpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX - + * atom softstopped + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_atom_softstop_ex( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE - + * atom softstop issued + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_atom_softstop_issue( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START - + * atom soft job has started + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_atom_softjob_start( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END - + * atom soft job has completed + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_atom_softjob_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ARB_GRANTED - + * Arbiter has granted gpu access + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_EVENT_ARB_GRANTED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_arb_granted( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ARB_STARTED - + * Driver is running again and able to process jobs + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_EVENT_ARB_STARTED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_arb_started( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ARB_STOP_REQUESTED - + * Arbiter has requested driver to stop using gpu + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_EVENT_ARB_STOP_REQUESTED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_arb_stop_requested( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ARB_STOPPED - + * Driver has stopped using gpu + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_EVENT_ARB_STOPPED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_arb_stopped( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - + * gpu soft reset + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_jd_gpu_soft_reset( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PM_STATE - + * PM state + * + * @kbdev: Kbase device + * @core_type: Core type (shader, tiler, l2 cache, l3 cache) + * @core_state_bitset: 64bits bitmask reporting power state of the cores + * (1-ON, 0-OFF) + */ +#define KBASE_TLSTREAM_AUX_PM_STATE( \ + kbdev, \ + core_type, \ + core_state_bitset \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pm_state( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + core_type, core_state_bitset); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PAGEFAULT - + * Page fault + * + * @kbdev: Kbase device + * @ctx_nr: Kernel context number + * @as_nr: Address space number + * @page_cnt_change: Number of pages to be added + */ +#define KBASE_TLSTREAM_AUX_PAGEFAULT( \ + kbdev, \ + ctx_nr, \ + as_nr, \ + page_cnt_change \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pagefault( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, as_nr, page_cnt_change); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PAGESALLOC - + * Total alloc pages change + * + * @kbdev: Kbase device + * @ctx_nr: Kernel context number + * @page_cnt: Number of pages used by the context + */ +#define KBASE_TLSTREAM_AUX_PAGESALLOC( \ + kbdev, \ + ctx_nr, \ + page_cnt \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pagesalloc( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, page_cnt); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - + * New device frequency target + * + * @kbdev: Kbase device + * @target_freq: New target frequency + */ +#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET( \ + kbdev, \ + target_freq \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_devfreq_target( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + target_freq); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - + * enter protected mode start + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_aux_protected_enter_start( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - + * enter protected mode end + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_aux_protected_enter_end( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - + * leave protected mode start + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_aux_protected_leave_start( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - + * leave protected mode end + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_aux_protected_leave_end( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_JIT_STATS - + * per-bin JIT statistics + * + * @kbdev: Kbase device + * @ctx_nr: Kernel context number + * @bid: JIT bin id + * @max_allocs: Maximum allocations allowed in this bin. + * @allocs: Number of active allocations in this bin + * @va_pages: Number of virtual pages allocated in this bin + * @ph_pages: Number of physical pages allocated in this bin + */ +#define KBASE_TLSTREAM_AUX_JIT_STATS( \ + kbdev, \ + ctx_nr, \ + bid, \ + max_allocs, \ + allocs, \ + va_pages, \ + ph_pages \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_jit_stats( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT - + * event on a given job slot + * + * @kbdev: Kbase device + * @ctx: Name of the context object + * @slot_nr: Job slot number + * @atom_nr: Sequential number of an atom + * @event: Event type. One of TL_JS_EVENT values + */ +#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT( \ + kbdev, \ + ctx, \ + slot_nr, \ + atom_nr, \ + event \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_event_job_slot( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx, slot_nr, atom_nr, event); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE - + * New KBase Device + * + * @kbdev: Kbase device + * @kbase_device_id: The id of the physical hardware + * @kbase_device_gpu_core_count: The number of gpu cores in the physical hardware + * @kbase_device_max_num_csgs: The max number of CSGs the physical hardware supports + * @kbase_device_as_count: The number of address spaces the physical hardware has available + */ +#define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \ + kbdev, \ + kbase_device_id, \ + kbase_device_gpu_core_count, \ + kbase_device_max_num_csgs, \ + kbase_device_as_count \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG - + * CSG is programmed to a slot + * + * @kbdev: Kbase device + * @kbase_device_id: The id of the physical hardware + * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace + * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed + */ +#define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \ + kbdev, \ + kbase_device_id, \ + gpu_cmdq_grp_handle, \ + kbase_device_csg_slot_index \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG - + * CSG is deprogrammed from a slot + * + * @kbdev: Kbase device + * @kbase_device_id: The id of the physical hardware + * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed + */ +#define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG( \ + kbdev, \ + kbase_device_id, \ + kbase_device_csg_slot_index \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_NEW_CTX - + * New KBase Context + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + * @kbase_device_id: The id of the physical hardware + */ +#define KBASE_TLSTREAM_TL_KBASE_NEW_CTX( \ + kbdev, \ + kernel_ctx_id, \ + kbase_device_id \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_DEL_CTX - + * Delete KBase Context + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + */ +#define KBASE_TLSTREAM_TL_KBASE_DEL_CTX( \ + kbdev, \ + kernel_ctx_id \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS - + * Address Space is assigned to a KBase context + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + * @kbase_device_as_index: The index of the device address space being assigned + */ +#define KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( \ + kbdev, \ + kernel_ctx_id, \ + kbase_device_as_index \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS - + * Address Space is unassigned from a KBase context + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + */ +#define KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( \ + kbdev, \ + kernel_ctx_id \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE - + * New KCPU Queue + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @kernel_ctx_id: Unique ID for the KBase Context + * @kcpuq_num_pending_cmds: Number of commands already enqueued + * in the KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE( \ + kbdev, \ + kcpu_queue, \ + kernel_ctx_id, \ + kcpuq_num_pending_cmds \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE - + * Delete KCPU Queue + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL - + * KCPU Queue enqueues Signal on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @fence: Fence object handle + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL( \ + kbdev, \ + kcpu_queue, \ + fence \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT - + * KCPU Queue enqueues Wait on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @fence: Fence object handle + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT( \ + kbdev, \ + kcpu_queue, \ + fence \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT - + * Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT - + * Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @cqs_obj_gpu_addr: CQS Object GPU ptr + * @cqs_obj_compare_value: Semaphore value that should be exceeded + * for the WAIT to pass + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ + kbdev, \ + kcpu_queue, \ + cqs_obj_gpu_addr, \ + cqs_obj_compare_value \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT - + * End array of KCPU Queue enqueues Wait on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET - + * Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET - + * Array item of KCPU Queue enqueues Set on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @cqs_obj_gpu_addr: CQS Object GPU ptr + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET( \ + kbdev, \ + kcpu_queue, \ + cqs_obj_gpu_addr \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET - + * End array of KCPU Queue enqueues Set on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY - + * Begin array of KCPU Queue enqueues Debug Copy + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY - + * Array item of KCPU Queue enqueues Debug Copy + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @debugcopy_dst_size: Debug Copy destination size + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ + kbdev, \ + kcpu_queue, \ + debugcopy_dst_size \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY - + * End array of KCPU Queue enqueues Debug Copy + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - + * KCPU Queue enqueues Map Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @map_import_buf_gpu_addr: Map import buffer GPU ptr + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \ + kbdev, \ + kcpu_queue, \ + map_import_buf_gpu_addr \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT - + * KCPU Queue enqueues Unmap Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @map_import_buf_gpu_addr: Map import buffer GPU ptr + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \ + kbdev, \ + kcpu_queue, \ + map_import_buf_gpu_addr \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE - + * KCPU Queue enqueues Unmap Import ignoring reference count + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @map_import_buf_gpu_addr: Map import buffer GPU ptr + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \ + kbdev, \ + kcpu_queue, \ + map_import_buf_gpu_addr \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC - + * Begin array of KCPU Queue enqueues JIT Alloc + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC - + * Array item of KCPU Queue enqueues JIT Alloc + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @jit_alloc_gpu_alloc_addr_dest: The GPU virtual address to write + * the JIT allocated GPU virtual address to + * @jit_alloc_va_pages: The minimum number of virtual pages required + * @jit_alloc_commit_pages: The minimum number of physical pages which + * should back the allocation + * @jit_alloc_extent: Granularity of physical pages to grow the allocation + * by during a fault + * @jit_alloc_jit_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. Zero is not a valid value + * @jit_alloc_bin_id: The JIT allocation bin, used in conjunction with + * max_allocations to limit the number of each type of JIT allocation + * @jit_alloc_max_allocations: The maximum number of allocations + * allowed within the bin specified by bin_id. Should be the same for all + * JIT allocations within the same bin. + * @jit_alloc_flags: Flags specifying the special requirements for the + * JIT allocation + * @jit_alloc_usage_id: A hint about which allocation should be + * reused. The kernel should attempt to use a previous allocation with the same + * usage_id + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ + kbdev, \ + kcpu_queue, \ + jit_alloc_gpu_alloc_addr_dest, \ + jit_alloc_va_pages, \ + jit_alloc_commit_pages, \ + jit_alloc_extent, \ + jit_alloc_jit_id, \ + jit_alloc_bin_id, \ + jit_alloc_max_allocations, \ + jit_alloc_flags, \ + jit_alloc_usage_id \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC - + * End array of KCPU Queue enqueues JIT Alloc + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE - + * Begin array of KCPU Queue enqueues JIT Free + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE - + * Array item of KCPU Queue enqueues JIT Free + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @jit_alloc_jit_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. Zero is not a valid value + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( \ + kbdev, \ + kcpu_queue, \ + jit_alloc_jit_id \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE - + * End array of KCPU Queue enqueues JIT Free + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START - + * KCPU Queue starts a Signal on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END - + * KCPU Queue ends a Signal on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START - + * KCPU Queue starts a Wait on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END - + * KCPU Queue ends a Wait on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START - + * KCPU Queue starts a Wait on an array of Cross Queue Sync Objects + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END - + * KCPU Queue ends a Wait on an array of Cross Queue Sync Objects + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET - + * KCPU Queue executes a Set on an array of Cross Queue Sync Objects + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START - + * KCPU Queue starts an array of Debug Copys + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END - + * KCPU Queue ends an array of Debug Copys + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - + * KCPU Queue starts a Map Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END - + * KCPU Queue ends a Map Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START - + * KCPU Queue starts an Unmap Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END - + * KCPU Queue ends an Unmap Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START - + * KCPU Queue starts an Unmap Import ignoring reference count + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END - + * KCPU Queue ends an Unmap Import ignoring reference count + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START - + * KCPU Queue starts an array of JIT Allocs + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - + * Begin array of KCPU Queue ends an array of JIT Allocs + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - + * Array item of KCPU Queue ends an array of JIT Allocs + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @jit_alloc_gpu_alloc_addr: The JIT allocated GPU virtual address + * @jit_alloc_mmu_flags: The MMU flags for the JIT allocation + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ + kbdev, \ + kcpu_queue, \ + jit_alloc_gpu_alloc_addr, \ + jit_alloc_mmu_flags \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - + * End array of KCPU Queue ends an array of JIT Allocs + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START - + * KCPU Queue starts an array of JIT Frees + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END - + * Begin array of KCPU Queue ends an array of JIT Frees + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END - + * Array item of KCPU Queue ends an array of JIT Frees + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @jit_free_pages_used: The actual number of pages used by the JIT + * allocation + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ + kbdev, \ + kcpu_queue, \ + jit_free_pages_used \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END - + * End array of KCPU Queue ends an array of JIT Frees + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER - + * KCPU Queue executes an Error Barrier + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW - + * An overflow has happened with the CSFFW Timeline stream + * + * @kbdev: Kbase device + * @csffw_timestamp: Timestamp of a CSFFW event + * @csffw_cycle: Cycle number of a CSFFW event + */ +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ + kbdev, \ + csffw_timestamp, \ + csffw_cycle \ + ) \ + do { } while (0) + + +/* Gator tracepoints are hooked into TLSTREAM interface. + * When the following tracepoints are called, corresponding + * Gator tracepoint will be called as well. + */ + +#if defined(CONFIG_MALI_GATOR_SUPPORT) +/* `event` is one of TL_JS_EVENT values here. + * The values of TL_JS_EVENT are guaranteed to match + * with corresponding GATOR_JOB_SLOT values. + */ +#undef KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT +#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, \ + context, slot_nr, atom_nr, event) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + kbase_trace_mali_job_slots_event(kbdev->id, \ + GATOR_MAKE_EVENT(event, slot_nr), \ + context, (u8) atom_nr); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_event_job_slot( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + context, slot_nr, atom_nr, event); \ + } while (0) + +#undef KBASE_TLSTREAM_AUX_PM_STATE +#define KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + kbase_trace_mali_pm_status(kbdev->id, \ + core_type, state); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pm_state( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + core_type, state); \ + } while (0) + +#undef KBASE_TLSTREAM_AUX_PAGEFAULT +#define KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, \ + ctx_nr, as_nr, page_cnt_change) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + kbase_trace_mali_page_fault_insert_pages(kbdev->id, \ + as_nr, \ + page_cnt_change); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pagefault( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, as_nr, page_cnt_change); \ + } while (0) + +/* kbase_trace_mali_total_alloc_pages_change is handled differently here. + * We stream the total amount of pages allocated for `kbdev` rather + * than `page_count`, which is per-context. + */ +#undef KBASE_TLSTREAM_AUX_PAGESALLOC +#define KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, ctx_nr, page_cnt) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + u32 global_pages_count = \ + atomic_read(&kbdev->memdev.used_pages); \ + \ + kbase_trace_mali_total_alloc_pages_change(kbdev->id, \ + global_pages_count); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pagesalloc( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, page_cnt); \ + } while (0) +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + +/* clang-format on */ +#endif -- 2.20.1