From: Jiyu Yang Date: Mon, 23 May 2016 08:16:30 +0000 (+0800) Subject: PD#125571 midgard r11p0 rel for t82x and t83x X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=c2f86ae4b55efb410a81957f974e384892c7e1d3;p=GitHub%2FLineageOS%2FG12%2Fandroid_hardware_amlogic_kernel-modules_mali-driver.git PD#125571 midgard r11p0 rel for t82x and t83x commit d28e5a582d83c19eafe00bc5e27f378ad67d82d0 Author: Jiyu Yang Date: Tue May 24 16:03:58 2016 +0800 PD125571 update t82x t83x lib for android L Change-Id: If605b9f9e98dfb571698363fe1c306771b3536a7 commit a835f407444207c4110c40bb3d9c0be2192bc1e2 Author: Jiyu Yang Date: Mon May 23 20:56:27 2016 +0800 PD#125571 add HAL_PIXEL_FORMAT_YCrCb_420_SP this will be removed when android N Change-Id: If23ba5d863b8f9d207b2923e82753949cbd0af55 commit 398ec136c3f1af79f6aa9c2c00912738cd66aed5 Author: Jiyu Yang Date: Mon May 23 13:00:51 2016 +0800 PD#125571 update library for t82x and t83x Change-Id: Id16041676497fa91072a9af0586d968420c16536 commit b61cc518957e374975abe11ce743c792261b770a Author: Jiyu Yang Date: Fri May 20 14:43:04 2016 +0800 TX041-SW-99002-r11p0-00rel0 Change-Id: I6d18ef50fad81b939cf3bff21b108456258e63de commit 210fd8146e5b64a83ba674abdfb8edbd53b22097 Author: Jiyu Yang Date: Fri May 20 14:39:56 2016 +0800 TX041-SW-99002-r10p0-00rel0 Change-Id: I9d6aad092a3e69236c38f078ab6633d029a07997 commit 017572cb09550913ecd52e43ff2eb0754c5115c3 Author: Jiyu Yang Date: Fri May 20 14:37:31 2016 +0800 TX041-SW-99002-r9p0-05rel0 Change-Id: Iab5b27d200621612c36deec6d1fef049af65db19 commit 4e9d6a0f22046d717c7f2599d5c89816e37d35d9 Author: Jiyu Yang Date: Fri May 20 14:36:15 2016 +0800 TX041-SW-99002-r8p0-02rel0 Change-Id: Ic59759da9c59a5055595d96f9826ec1f98bdf8ce Change-Id: I95c443fdc26dd1143ee90b2debdb40f94905e61c --- diff --git a/lib/t82x_ion/libGLES_mali_default_8a_32-l.so b/lib/t82x_ion/libGLES_mali_default_8a_32-l.so index f48cc73..6285562 100644 Binary files a/lib/t82x_ion/libGLES_mali_default_8a_32-l.so and b/lib/t82x_ion/libGLES_mali_default_8a_32-l.so differ diff --git a/lib/t82x_ion/libGLES_mali_default_8a_32-m.so b/lib/t82x_ion/libGLES_mali_default_8a_32-m.so deleted file mode 120000 index 54f3789..0000000 --- a/lib/t82x_ion/libGLES_mali_default_8a_32-m.so +++ /dev/null @@ -1 +0,0 @@ -libGLES_mali_default_8a_32-l.so \ No newline at end of file diff --git a/lib/t82x_ion/libGLES_mali_default_8a_32-m.so b/lib/t82x_ion/libGLES_mali_default_8a_32-m.so new file mode 100644 index 0000000..f0b72d0 Binary files /dev/null and b/lib/t82x_ion/libGLES_mali_default_8a_32-m.so differ diff --git a/lib/t83x_ion/libGLES_mali_default_7a_32.so b/lib/t83x_ion/libGLES_mali_default_7a_32.so deleted file mode 120000 index 4c92be9..0000000 --- a/lib/t83x_ion/libGLES_mali_default_7a_32.so +++ /dev/null @@ -1 +0,0 @@ -libGLES_mali_default_8a_32.so \ No newline at end of file diff --git a/lib/t83x_ion/libGLES_mali_default_8a_32-l.so b/lib/t83x_ion/libGLES_mali_default_8a_32-l.so index 2f92f8f..e0446d6 100644 Binary files a/lib/t83x_ion/libGLES_mali_default_8a_32-l.so and b/lib/t83x_ion/libGLES_mali_default_8a_32-l.so differ diff --git a/lib/t83x_ion/libGLES_mali_default_8a_32-m.so b/lib/t83x_ion/libGLES_mali_default_8a_32-m.so deleted file mode 120000 index 54f3789..0000000 --- a/lib/t83x_ion/libGLES_mali_default_8a_32-m.so +++ /dev/null @@ -1 +0,0 @@ -libGLES_mali_default_8a_32-l.so \ No newline at end of file diff --git a/lib/t83x_ion/libGLES_mali_default_8a_32-m.so b/lib/t83x_ion/libGLES_mali_default_8a_32-m.so new file mode 100644 index 0000000..fd9c84d Binary files /dev/null and b/lib/t83x_ion/libGLES_mali_default_8a_32-m.so differ diff --git a/lib/t83x_ion/libGLES_mali_default_8a_64-m.so b/lib/t83x_ion/libGLES_mali_default_8a_64-m.so deleted file mode 120000 index 8de2766..0000000 --- a/lib/t83x_ion/libGLES_mali_default_8a_64-m.so +++ /dev/null @@ -1 +0,0 @@ -libGLES_mali_default_8a_64-l.so \ No newline at end of file diff --git a/t83x/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt b/t83x/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt index acd2c6d..46b704b 100755 --- a/t83x/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt +++ b/t83x/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt @@ -42,6 +42,22 @@ for details. simultaneously, 0 otherwise. - Value between 0 and 63 (including). If job throttle is enabled, this is one less than the number of cores that can be started simultaneously. +- power_model : Sets power model parameters. Note that this model was designed for the Juno + platform, and may not be suitable for other platforms. A structure containing : + - compatible: Should be arm,mali-simple-power-model + - voltage: Voltage at reference point. Specified in mV. + - frequency: Frequency at reference point. Specified in MHz. + - dynamic-power: Dynamic power at reference frequency and voltage. Specified in mW. + - static-power: Static power at reference frequency. Specified in mW. + - ts: An array containing coefficients for the temperature scaling factor. + Used as : tsf = ts[3]*T^3 + ts[2]*T^2 + ts[1]*T + ts[0], where T = temperature + - thermal-zone: A string identifying the thermal zone used for the GPU +- system-coherency : Sets the coherency protocol to be used for coherent + accesses made from the GPU. + If not set then no coherency is used. + - 0 : ACE-Lite + - 1 : ACE + - 31 : No coherency Example for a Mali-T602: @@ -64,4 +80,13 @@ gpu@0xfc010000 { 160000 925000, 100000 912500, >; + power_model { + compatible = "arm,mali-simple-power-model"; + voltage = <800>; + frequency = <500>; + static-power = <500>; + dynamic-power = <1500>; + ts = <20000 2000 (-20) 2>; + thermal-zone = "gpu"; + }; }; diff --git a/t83x/kernel/drivers/base/dma_buf_lock/src/sconscript b/t83x/kernel/drivers/base/dma_buf_lock/src/sconscript index 251c9a6..b8724f1 100755 --- a/t83x/kernel/drivers/base/dma_buf_lock/src/sconscript +++ b/t83x/kernel/drivers/base/dma_buf_lock/src/sconscript @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2013, 2016 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -17,20 +17,17 @@ import os import re Import('env') -if env['v'] != '1': - env['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}' - src = [Glob('#kernel/drivers/base/dma_buf_lock/src/*.c'), Glob('#kernel/drivers/base/dma_buf_lock/src/*.h'), Glob('#kernel/drivers/base/dma_buf_lock/src/K*')] if env.GetOption('clean') : # Clean module env.Execute(Action("make clean", '[CLEAN] dma_buf_lock')) cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, []) - env.ProgTarget('dma_buf_lock', cmd) + env.KernelObjTarget('dma_buf_lock', cmd) else: # Build module makeAction=Action("cd ${SOURCE.dir} && make dma_buf_lock && cp dma_buf_lock.ko $STATIC_LIB_PATH/", '$MAKECOMSTR') cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [makeAction]) - env.ProgTarget('dma_buf_lock', cmd) + env.KernelObjTarget('dma_buf_lock', cmd) diff --git a/t83x/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c b/t83x/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c index 852c550..6270a52 100755 --- a/t83x/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c +++ b/t83x/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c @@ -382,7 +382,22 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, } /* alloc ready, let's export it */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)) + { + struct dma_buf_export_info export_info = { + .exp_name = "dma_buf_te", +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)) + .owner = THIS_MODULE, +#endif + .ops = &dma_buf_te_ops, + .size = alloc->nr_pages << PAGE_SHIFT, + .flags = O_CLOEXEC | O_RDWR, + .priv = alloc, + }; + + dma_buf = dma_buf_export(&export_info); + } +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) dma_buf = dma_buf_export(alloc, &dma_buf_te_ops, alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR, NULL); #else diff --git a/t83x/kernel/drivers/base/dma_buf_test_exporter/sconscript b/t83x/kernel/drivers/base/dma_buf_test_exporter/sconscript index bfb8a99..09fe7f3 100755 --- a/t83x/kernel/drivers/base/dma_buf_test_exporter/sconscript +++ b/t83x/kernel/drivers/base/dma_buf_test_exporter/sconscript @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -17,9 +17,6 @@ import os Import('env') -if env['v'] != '1': - env['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}' - src = [Glob('#kernel/drivers/base/dma_buf_test_exporter/*.c'), Glob('#kernel/include/linux/*.h'), Glob('#kernel/drivers/base/dma_buf_test_exporter/K*')] env.Append( CPPPATH = '#kernel/include' ) @@ -27,10 +24,10 @@ env.Append( CPPPATH = '#kernel/include' ) if env.GetOption('clean') : env.Execute(Action("make clean", '[CLEAN] dma-buf-test-exporter')) cmd = env.Command('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', src, []) - env.ProgTarget('dma-buf-test-exporter', cmd) + env.KernelObjTarget('dma-buf-test-exporter', cmd) else: makeAction=Action("cd ${SOURCE.dir} && make && ( ( [ -f dma-buf-test-exporter.ko ] && cp dma-buf-test-exporter.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/dma-buf-test-exporter.ko)", '$MAKECOMSTR') cmd = env.Command('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', src, [makeAction]) - env.ProgTarget('dma-buf-test-exporter', cmd) + env.KernelObjTarget('dma-buf-test-exporter', cmd) diff --git a/t83x/kernel/drivers/base/kds/sconscript b/t83x/kernel/drivers/base/kds/sconscript index 7fc1bc4..91a79fd 100755 --- a/t83x/kernel/drivers/base/kds/sconscript +++ b/t83x/kernel/drivers/base/kds/sconscript @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -33,9 +33,6 @@ for line in open(linux_config_file, 'r'): # still allow for building kds_test module. build_kds = 0 -if env['v'] != '1': - env['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}' - src = [Glob('#kernel/drivers/base/kds/*.c'), Glob('#kernel/include/linux/*.h'), Glob('#kernel/drivers/base/kds/K*')] env.Append( CPPPATH = '#kernel/include' ) @@ -48,19 +45,19 @@ if env.GetOption('clean') : if build_kds or (int(env['unit']) == 1): env.Execute(Action("make clean", '[CLEAN] kds')) cmd = env.Command('$STATIC_LIB_PATH/kds.ko', src, []) - env.ProgTarget('kds', cmd) + env.KernelObjTarget('kds', cmd) else: # Build KDS module if build_kds: makeAction=Action("cd ${SOURCE.dir} && make kds && cp kds.ko $STATIC_LIB_PATH/", '$MAKECOMSTR') cmd = env.Command('$STATIC_LIB_PATH/kds.ko', src, [makeAction]) - env.ProgTarget('kds', cmd) + env.KernelObjTarget('kds', cmd) # Build KDS test module if int(env['unit']) == 1: makeActionTest=Action("cd ${SOURCE.dir} && make kds_test && cp kds_test.ko $STATIC_LIB_PATH/", '$MAKECOMSTR') cmdTest = env.Command('$STATIC_LIB_PATH/kds_test.ko', src, [makeActionTest]) - env.ProgTarget('kds', cmdTest) + env.KernelObjTarget('kds', cmdTest) if build_kds: - Depends(cmdTest, cmd) + env.Depends(cmdTest, cmd) diff --git a/t83x/kernel/drivers/base/ump/src/imports/ion/sconscript b/t83x/kernel/drivers/base/ump/src/imports/ion/sconscript index 749bb1f..cff24c8 100755 --- a/t83x/kernel/drivers/base/ump/src/imports/ion/sconscript +++ b/t83x/kernel/drivers/base/ump/src/imports/ion/sconscript @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -23,9 +23,6 @@ env_ion = env.Clone() if env_ion['ump_ion'] != '1': Return() -if env_ion['v'] != '1': - env_ion['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}' - # Source files required for UMP. ion_src = [Glob('#kernel/drivers/base/ump/src/imports/ion/*.c')] @@ -49,4 +46,4 @@ for p in patterns: Clean(cmd, Glob('#kernel/drivers/base/ump/src/imports/ion/%s' % p)) env_ion.Depends('$STATIC_LIB_PATH/ump_ion_import.ko', '$STATIC_LIB_PATH/ump.ko') -env_ion.ProgTarget('ump', cmd) +env_ion.KernelObjTarget('ump', cmd) diff --git a/t83x/kernel/drivers/base/ump/src/sconscript b/t83x/kernel/drivers/base/ump/src/sconscript index 9cec770..d706e1e 100755 --- a/t83x/kernel/drivers/base/ump/src/sconscript +++ b/t83x/kernel/drivers/base/ump/src/sconscript @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -21,9 +21,6 @@ Import('env') # Clone the environment so changes don't affect other build files env_ump = env.Clone() -if env_ump['v'] != '1': - env_ump['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}' - # Source files required for UMP. ump_src = [Glob('#kernel/drivers/base/ump/src/linux/*.c'), Glob('#kernel/drivers/base/ump/src/common/*.c'), Glob('#kernel/drivers/base/ump/src/imports/*/*.c')] @@ -58,7 +55,7 @@ if env['os'] != 'android': if not kds_in_kernel: env.Depends('$STATIC_LIB_PATH/ump.ko', '$STATIC_LIB_PATH/kds.ko') -env_ump.ProgTarget('ump', cmd) +env_ump.KernelObjTarget('ump', cmd) SConscript( 'imports/sconscript' ) diff --git a/t83x/kernel/drivers/gpu/arm/midgard/Kbuild b/t83x/kernel/drivers/gpu/arm/midgard/Kbuild index 899b9ef..687a512 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/Kbuild +++ b/t83x/kernel/drivers/gpu/arm/midgard/Kbuild @@ -15,7 +15,7 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r7p0-02rel0" +MALI_RELEASE_NAME ?= "r11p0-00rel0" # Paths required for build KBASE_PATH = $(src) @@ -88,7 +88,6 @@ SRC := \ mali_kbase_context.c \ mali_kbase_pm.c \ mali_kbase_config.c \ - mali_kbase_security.c \ mali_kbase_instr.c \ mali_kbase_vinstr.c \ mali_kbase_softjobs.c \ @@ -111,13 +110,15 @@ SRC := \ mali_kbase_debug_job_fault.c \ mali_kbase_smc.c \ mali_kbase_mem_pool.c \ - mali_kbase_mem_pool_debugfs.c + mali_kbase_mem_pool_debugfs.c \ + mali_kbase_tlstream.c -ifeq ($(CONFIG_MALI_MIPE_ENABLED),y) - SRC += mali_kbase_tlstream.c - ifeq ($(MALI_UNIT_TEST),1) - SRC += mali_kbase_tlstream_test.c - endif +ifeq ($(MALI_UNIT_TEST),1) + SRC += mali_kbase_tlstream_test.c +endif + +ifeq ($(MALI_CUSTOMER_RELEASE),0) + SRC += mali_kbase_regs_dump_debugfs.c endif # Job Scheduler Policy: Completely Fair Scheduler @@ -192,13 +193,6 @@ endif endif ifeq ($(CONFIG_MALI_PLATFORM_DEVICETREE),y) - SRC += platform/devicetree/mali_kbase_runtime_pm.c - SRC += platform/devicetree/mali_kbase_config_devicetree.c - SRC += platform/devicetree/mali_clock.c - SRC += platform/devicetree/mpgpu.c - SRC += platform/devicetree/meson_main2.c - SRC += platform/devicetree/platform_gx.c - SRC += platform/devicetree/scaling.c ccflags-y += -I$(src)/platform/devicetree endif @@ -232,3 +226,19 @@ mali_kbase-y += $(BACKEND:.c=.o) ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL) subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL) + +# Default to devicetree platform if neither a fake platform or a thirdparty +# platform is configured. +ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY)$(CONFIG_MALI_PLATFORM_FAKE),) +CONFIG_MALI_PLATFORM_DEVICETREE := y +endif + +mali_kbase-$(CONFIG_MALI_PLATFORM_DEVICETREE) += \ + platform/devicetree/mali_clock.o \ + platform/devicetree/mpgpu.o \ + platform/devicetree/meson_main2.o \ + platform/devicetree/platform_gx.o \ + platform/devicetree/scaling.o \ + platform/devicetree/mali_kbase_runtime_pm.o \ + platform/devicetree/mali_kbase_config_devicetree.o +ccflags-$(CONFIG_MALI_PLATFORM_DEVICETREE) += -I$(src)/platform/devicetree diff --git a/t83x/kernel/drivers/gpu/arm/midgard/Kconfig b/t83x/kernel/drivers/gpu/arm/midgard/Kconfig index 1543043..8a33841 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/Kconfig +++ b/t83x/kernel/drivers/gpu/arm/midgard/Kconfig @@ -16,6 +16,7 @@ menuconfig MALI_MIDGARD tristate "Mali Midgard series support" + select GPU_TRACEPOINTS if ANDROID default n help Enable this option to build support for a ARM Mali Midgard GPU. @@ -23,53 +24,22 @@ menuconfig MALI_MIDGARD To compile this driver as a module, choose M here: this will generate a single module, called mali_kbase. -choice - prompt "Streamline support" - depends on MALI_MIDGARD - default MALI_TIMELINE_DISABLED - help - Select streamline support configuration. - -config MALI_TIMELINE_DISABLED - bool "Streamline support disabled" - help - Disable support for ARM Streamline Performance Analyzer. - - Timeline support will not be included in - kernel code. - Debug stream will not be generated. - config MALI_GATOR_SUPPORT bool "Streamline support via Gator" + depends on MALI_MIDGARD + default n help Adds diagnostic support for use with the ARM Streamline Performance Analyzer. You will need the Gator device driver already loaded before loading this driver when enabling Streamline debug support. - -config MALI_MIPE_ENABLED - bool "Streamline support via MIPE" - help - Adds diagnostic support for use with the ARM Streamline Performance Analyzer. - - Stream will be transmitted directly to Mali GPU library. - Compatible version of the library is required to read debug stream generated by kernel. - -endchoice + This is a legacy interface required by older versions of Streamline. config MALI_MIDGARD_DVFS - bool "Enable DVFS" - depends on MALI_MIDGARD + bool "Enable legacy DVFS" + depends on MALI_MIDGARD && !MALI_DEVFREQ && !MALI_PLATFORM_DEVICETREE default n help - Choose this option to enable DVFS in the Mali Midgard DDK. - -config MALI_MIDGARD_RT_PM - bool "Enable Runtime power management" - depends on MALI_MIDGARD - depends on PM_RUNTIME - default n - help - Choose this option to enable runtime power management in the Mali Midgard DDK. + Choose this option to enable legacy DVFS in the Mali Midgard DDK. config MALI_MIDGARD_ENABLE_TRACE bool "Enable kbase tracing" @@ -79,13 +49,6 @@ config MALI_MIDGARD_ENABLE_TRACE Enables tracing in kbase. Trace log available through the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled -config MALI_MIDGARD_DEBUG_SYS - bool "Enable sysfs for the Mali Midgard DDK " - depends on MALI_MIDGARD && SYSFS - default n - help - Enables sysfs for the Mali Midgard DDK. Set/Monitor the Mali Midgard DDK - config MALI_DEVFREQ bool "devfreq support for Mali" depends on MALI_MIDGARD && PM_DEVFREQ @@ -107,9 +70,22 @@ menuconfig MALI_EXPERT Enabling this option and modifying the default settings may produce a driver with performance or other limitations. +config MALI_PRFCNT_SET_SECONDARY + bool "Use secondary set of performance counters" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Select this option to use secondary set of performance counters. Kernel + features that depend on an access to the primary set of counters may + become unavailable. Enabling this option will prevent power management + from working optimally and may cause instrumentation tools to return + bogus results. + + If unsure, say N. + config MALI_DEBUG_SHADER_SPLIT_FS bool "Allow mapping of shader cores via sysfs" - depends on MALI_MIDGARD && MALI_MIDGARD_DEBUG_SYS && MALI_EXPERT + depends on MALI_MIDGARD && MALI_EXPERT default n help Select this option to provide a sysfs entry for runtime configuration of shader @@ -128,10 +104,24 @@ config MALI_PLATFORM_FAKE choice prompt "Platform configuration" depends on MALI_MIDGARD && MALI_EXPERT - default MALI_PLATFORM_VEXPRESS + default MALI_PLATFORM_DEVICETREE help Select the SOC platform that contains a Mali Midgard GPU +config MALI_PLATFORM_DEVICETREE + bool "Device Tree platform" + depends on OF + help + Select this option to use Device Tree with the Mali driver. + + When using this option the Mali driver will get the details of the + GPU hardware from the Device Tree. This means that the same driver + binary can run on multiple platforms as long as all the GPU hardware + details are described in the device tree. + + Device Tree is the recommended method for the Mali driver platform + integration. + config MALI_PLATFORM_VEXPRESS depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4) bool "Versatile Express" @@ -199,11 +189,4 @@ config MALI_SYSTEM_TRACE minimal overhead when not in use. Enable only if you know what you are doing. -config MALI_GPU_TRACEPOINTS - bool "Enable GPU tracepoints" - depends on MALI_MIDGARD && ANDROID - select GPU_TRACEPOINTS - help - Enables GPU tracepoints using Android trace event definitions. - source "drivers/gpu/arm/midgard/platform/Kconfig" diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild index cadd7d5..e38120a 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild @@ -57,3 +57,7 @@ ifeq ($(CONFIG_MALI_NO_MALI),y) # HW error simulation BACKEND += backend/gpu/mali_kbase_model_error_generator.c endif + +ifeq ($(CONFIG_DEVFREQ_THERMAL),y) + BACKEND += backend/gpu/mali_kbase_power_model_simple.c +endif diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c index 92a14fa..2f3c41a 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,4 +19,10 @@ #include #include +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, + u32 mode) +{ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) + kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL); +} diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h index 42069fc..fe98691 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,5 +22,13 @@ #include "mali_kbase.h" #include "mali_base_kernel.h" +/** + * kbase_cache_set_coherency_mode() - Sets the system coherency mode + * in the GPU. + * @kbdev: Device pointer + * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE + */ +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, + u32 mode); #endif /* _KBASE_CACHE_POLICY_H_ */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c index db97637..d25f84e 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -18,6 +18,9 @@ #include #include #include +#ifdef CONFIG_DEVFREQ_THERMAL +#include +#endif #include #include @@ -234,7 +237,9 @@ kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) stat->private_data = NULL; #ifdef CONFIG_DEVFREQ_THERMAL - memcpy(&kbdev->devfreq_cooling->last_status, stat, sizeof(*stat)); + if (kbdev->devfreq_cooling) + memcpy(&kbdev->devfreq_cooling->last_status, stat, + sizeof(*stat)); #endif return 0; @@ -296,14 +301,9 @@ static void kbase_devfreq_exit(struct device *dev) int kbase_devfreq_init(struct kbase_device *kbdev) { -#ifdef CONFIG_DEVFREQ_THERMAL - struct devfreq_cooling_ops *callbacks = POWER_MODEL_CALLBACKS; -#endif struct devfreq_dev_profile *dp; int err; - dev_dbg(kbdev->dev, "Init Mali devfreq\n"); - if (!kbdev->clock) return -ENODEV; @@ -337,12 +337,20 @@ int kbase_devfreq_init(struct kbase_device *kbdev) } #ifdef CONFIG_DEVFREQ_THERMAL - if (callbacks) { - + err = kbase_power_model_simple_init(kbdev); + if (err && err != -ENODEV && err != -EPROBE_DEFER) { + dev_err(kbdev->dev, + "Failed to initialize simple power model (%d)\n", + err); + goto cooling_failed; + } + if (err == -EPROBE_DEFER) + goto cooling_failed; + if (err != -ENODEV) { kbdev->devfreq_cooling = of_devfreq_cooling_register_power( kbdev->dev->of_node, kbdev->devfreq, - callbacks); + &power_model_simple_ops); if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) { err = PTR_ERR(kbdev->devfreq_cooling); dev_err(kbdev->dev, @@ -350,6 +358,8 @@ int kbase_devfreq_init(struct kbase_device *kbdev) err); goto cooling_failed; } + } else { + err = 0; } #endif @@ -360,8 +370,7 @@ cooling_failed: devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); #endif /* CONFIG_DEVFREQ_THERMAL */ opp_notifier_failed: - err = devfreq_remove_device(kbdev->devfreq); - if (err) + if (devfreq_remove_device(kbdev->devfreq)) dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); else kbdev->devfreq = NULL; @@ -376,7 +385,8 @@ void kbase_devfreq_term(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "Term Mali devfreq\n"); #ifdef CONFIG_DEVFREQ_THERMAL - devfreq_cooling_unregister(kbdev->devfreq_cooling); + if (kbdev->devfreq_cooling) + devfreq_cooling_unregister(kbdev->devfreq_cooling); #endif devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c index 591c013..d410cd2 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -83,3 +83,23 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev, GPU_CONTROL_REG(L2_PRESENT_HI), NULL); } +void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump) +{ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) { + /* Ensure we can access the GPU registers */ + kbase_pm_register_access_enable(kbdev); + + regdump->coherency_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(COHERENCY_FEATURES), NULL); + + /* We're done accessing the GPU registers for now. */ + kbase_pm_register_access_disable(kbdev); + } else { + /* Pre COHERENCY_FEATURES we only supported ACE_LITE */ + regdump->coherency_features = + COHERENCY_FEATURE_BIT(COHERENCY_NONE) | + COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); + } +} + diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c index 2c98707..4e70b34 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,7 @@ */ #include +#include #include #include #include @@ -78,6 +79,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, u32 irq_mask; int ret; u64 shader_cores_needed; + u32 prfcnt_config; KBASE_DEBUG_ASSERT(NULL == kbdev->hwcnt.suspended_kctx); @@ -151,9 +153,22 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, kbase_pm_request_l2_caches(kbdev); /* Configure */ + prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY + { + u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) + >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id); + + if (arch_v6) + prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; + } +#endif + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) - | PRFCNT_CONFIG_MODE_OFF, kctx); + prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), setup->dump_buffer & 0xFFFFFFFF, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), @@ -174,8 +189,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, setup->tiler_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | - PRFCNT_CONFIG_MODE_MANUAL, kctx); + prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx); /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h index 57c64f7..8ccc440 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h @@ -63,6 +63,9 @@ struct slot_rb { * @scheduling_timer: The timer tick used for rescheduling jobs * @timer_running: Is the timer running? The runpool_mutex must be * held whilst modifying this. + * @suspend_timer: Is the timer suspended? Set when a suspend + * occurs and cleared on resume. The runpool_mutex + * must be held whilst modifying this. * @reset_gpu: Set to a KBASE_RESET_xxx value (see comments) * @reset_workq: Work queue for performing the reset * @reset_work: Work item for performing the reset @@ -80,6 +83,7 @@ struct kbase_backend_data { struct hrtimer scheduling_timer; bool timer_running; + bool suspend_timer; atomic_t reset_gpu; diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c index a4b0c26..ddaae34 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,8 +15,6 @@ - - /* * Base kernel job manager APIs */ @@ -27,9 +25,7 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif #include #include #include @@ -41,12 +37,6 @@ #define beenthere(kctx, f, a...) \ dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) -#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS -u64 mali_js0_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; -u64 mali_js1_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; -u64 mali_js2_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; -#endif - #if KBASE_GPU_RESET_EN static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev); static void kbasep_reset_timeout_worker(struct work_struct *data); @@ -84,37 +74,16 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), jc_head >> 32, kctx); -#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS - { - u64 mask; - u32 value; - - if (0 == js) - mask = mali_js0_affinity_mask; - else if (1 == js) - mask = mali_js1_affinity_mask; - else - mask = mali_js2_affinity_mask; - - value = katom->affinity & (mask & 0xFFFFFFFF); - - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), - value, kctx); - - value = (katom->affinity >> 32) & ((mask>>32) & 0xFFFFFFFF); - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), - value, kctx); - } -#else kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), katom->affinity & 0xFFFFFFFF, kctx); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), katom->affinity >> 32, kctx); -#endif /* start MMU, medium priority, cache clean/flush on end, clean/flush on * start */ cfg = kctx->as_nr; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) + cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; #ifndef CONFIG_MALI_COH_GPU cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; @@ -124,6 +93,10 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, cfg |= JS_CONFIG_START_MMU; cfg |= JS_CONFIG_THREAD_PRI(8); + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) && + (katom->atom_flags & KBASE_KATOM_FLAG_SECURE)) + cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) { @@ -140,6 +113,9 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx); + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), + katom->flush_id, kctx); /* Write an approximate start timestamp. * It's approximate because there might be a job in the HEAD register. @@ -159,23 +135,26 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js), kctx, kbase_jd_atom_id(kctx, katom)); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_attrib_atom_config(katom, jc_head, katom->affinity, cfg); + kbase_tlstream_tl_ret_ctx_lpu( + kctx, + &kbdev->gpu_props.props.raw_props.js_features[ + katom->slot_nr]); kbase_tlstream_tl_ret_atom_as(katom, &kbdev->as[kctx->as_nr]); kbase_tlstream_tl_ret_atom_lpu( katom, - &kbdev->gpu_props.props.raw_props.js_features[js]); -#endif + &kbdev->gpu_props.props.raw_props.js_features[js], + "ctx_nr,atom_nr"); #ifdef CONFIG_GPU_TRACEPOINTS - if (kbase_backend_nr_atoms_submitted(kbdev, js) == 1) { + if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { /* If this is the only job on the slot, trace it as starting */ char js_string[16]; trace_gpu_sched_switch( kbasep_make_job_slot_string(js, js_string), ktime_to_ns(katom->start_timestamp), - (u32)katom->kctx, 0, katom->work_id); + (u32)katom->kctx->id, 0, katom->work_id); kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx; } #endif @@ -228,6 +207,27 @@ static void kbasep_job_slot_update_head_start_timestamp( } } +#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ + !defined(MALI_MIPE_ENABLED) +/** + * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint + * @kbdev: kbase device + * @i: job slot + * + * Get kbase atom by calling kbase_gpu_inspect for given job slot. + * Then use obtained katom and name of slot associated with the given + * job slot number in tracepoint call to the instrumentation module + * informing that given atom is no longer executed on given lpu (job slot). + */ +static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int i) +{ + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, i, 0); + + kbase_tlstream_tl_nret_atom_lpu(katom, + &kbdev->gpu_props.props.raw_props.js_features[i]); +} +#endif + void kbase_job_done(struct kbase_device *kbdev, u32 done) { unsigned long flags; @@ -288,9 +288,15 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) GATOR_JOB_SLOT_SOFT_STOPPED, i), NULL, 0); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_job_softstop(i); + +#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ + !defined(MALI_MIPE_ENABLED) + kbasep_trace_tl_nret_atom_lpu( + kbdev, i); #endif + /* Soft-stopped job - read the value of * JS_TAIL so that the job chain can * be resumed */ @@ -437,6 +443,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) failed = done >> 16; finished = (done & 0xFFFF) | failed; + if (done) + end_timestamp = ktime_get(); } while (finished & (1 << i)); kbasep_job_slot_update_head_start_timestamp(kbdev, i, @@ -459,12 +467,14 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) KBASE_EXPORT_TEST_API(kbase_job_done); static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev, - u16 core_reqs) + struct kbase_jd_atom *katom) { bool soft_stops_allowed = true; - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { - if ((core_reqs & BASE_JD_REQ_T) != 0) + if (kbase_jd_katom_is_secure(katom)) { + soft_stops_allowed = false; + } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { + if ((katom->core_req & BASE_JD_REQ_T) != 0) soft_stops_allowed = false; } return soft_stops_allowed; @@ -508,12 +518,13 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, if (action == JS_COMMAND_SOFT_STOP) { bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev, - core_reqs); + target_katom); if (!soft_stop_allowed) { #ifdef CONFIG_MALI_DEBUG - dev_dbg(kbdev->dev, "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", - (unsigned int)core_reqs); + dev_dbg(kbdev->dev, + "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", + (unsigned int)core_reqs); #endif /* CONFIG_MALI_DEBUG */ return; } @@ -521,9 +532,51 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, /* We are about to issue a soft stop, so mark the atom as having * been soft stopped */ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; - } - if (action == JS_COMMAND_HARD_STOP) { + /* Mark the point where we issue the soft-stop command */ + kbase_tlstream_aux_issue_job_softstop(target_katom); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { + int i; + + for (i = 0; + i < kbase_backend_nr_atoms_submitted(kbdev, js); + i++) { + struct kbase_jd_atom *katom; + + katom = kbase_gpu_inspect(kbdev, js, i); + + KBASE_DEBUG_ASSERT(katom); + + /* For HW_ISSUE_8316, only 'bad' jobs attacking + * the system can cause this issue: normally, + * all memory should be allocated in multiples + * of 4 pages, and growable memory should be + * changed size in multiples of 4 pages. + * + * Whilst such 'bad' jobs can be cleared by a + * GPU reset, the locking up of a uTLB entry + * caused by the bad job could also stall other + * ASs, meaning that other ASs' jobs don't + * complete in the 'grace' period before the + * reset. We don't want to lose other ASs' jobs + * when they would normally complete fine, so we + * must 'poke' the MMU regularly to help other + * ASs complete */ + kbase_as_poking_timer_retain_atom( + kbdev, katom->kctx, katom); + } + } + + if (kbase_hw_has_feature( + kbdev, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + action = (target_katom->atom_flags & + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_SOFT_STOP_1 : + JS_COMMAND_SOFT_STOP_0; + } + } else if (action == JS_COMMAND_HARD_STOP) { bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev, core_reqs); @@ -547,55 +600,21 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, * hard-stop fails, so it is safe to just return and * ignore the hard-stop request. */ - dev_warn(kbdev->dev, "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", - (unsigned int)core_reqs); + dev_warn(kbdev->dev, + "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", + (unsigned int)core_reqs); return; } target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED; - } - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316) && - action == JS_COMMAND_SOFT_STOP) { - int i; - - for (i = 0; i < kbase_backend_nr_atoms_submitted(kbdev, js); - i++) { - struct kbase_jd_atom *katom; - - katom = kbase_gpu_inspect(kbdev, js, i); - KBASE_DEBUG_ASSERT(katom); - - /* For HW_ISSUE_8316, only 'bad' jobs attacking the - * system can cause this issue: normally, all memory - * should be allocated in multiples of 4 pages, and - * growable memory should be changed size in multiples - * of 4 pages. - * - * Whilst such 'bad' jobs can be cleared by a GPU reset, - * the locking up of a uTLB entry caused by the bad job - * could also stall other ASs, meaning that other ASs' - * jobs don't complete in the 'grace' period before the - * reset. We don't want to lose other ASs' jobs when - * they would normally complete fine, so we must 'poke' - * the MMU regularly to help other ASs complete */ - kbase_as_poking_timer_retain_atom(kbdev, katom->kctx, - katom); - } - } - - if (kbase_hw_has_feature(kbdev, + if (kbase_hw_has_feature( + kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { - if (action == JS_COMMAND_SOFT_STOP) action = (target_katom->atom_flags & - KBASE_KATOM_FLAGS_JOBCHAIN) ? - JS_COMMAND_SOFT_STOP_1 : - JS_COMMAND_SOFT_STOP_0; - else - action = (target_katom->atom_flags & - KBASE_KATOM_FLAGS_JOBCHAIN) ? - JS_COMMAND_HARD_STOP_1 : - JS_COMMAND_HARD_STOP_0; + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_HARD_STOP_1 : + JS_COMMAND_HARD_STOP_0; + } } kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx); @@ -843,6 +862,21 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) mutex_unlock(&kctx->jctx.lock); } +u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) +{ + u32 flush_id = 0; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) { + mutex_lock(&kbdev->pm.lock); + if (kbdev->pm.backend.gpu_powered) + flush_id = kbase_reg_read(kbdev, + GPU_CONTROL_REG(LATEST_FLUSH), NULL); + mutex_unlock(&kbdev->pm.lock); + } + + return flush_id; +} + int kbase_job_slot_init(struct kbase_device *kbdev) { #if KBASE_GPU_RESET_EN @@ -1058,7 +1092,7 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, /* For soft-stop, don't enter if soft-stop not allowed, or isn't * causing disjoint */ if (hw_action == JS_COMMAND_SOFT_STOP && - !(kbasep_soft_stop_allowed(kbdev, core_reqs) && + !(kbasep_soft_stop_allowed(kbdev, target_katom) && (action & JS_COMMAND_SW_CAUSES_DISJOINT))) return; @@ -1145,7 +1179,7 @@ static void kbasep_save_hwcnt_setup(struct kbase_device *kbdev, static void kbasep_reset_timeout_worker(struct work_struct *data) { - unsigned long flags; + unsigned long flags, mmu_flags; struct kbase_device *kbdev; int i; ktime_t end_timestamp = ktime_get(); @@ -1155,8 +1189,6 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) bool try_schedule = false; bool restore_hwc = false; - u32 mmu_irq_mask; - KBASE_DEBUG_ASSERT(data); kbdev = container_of(data, struct kbase_device, @@ -1183,6 +1215,30 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) return; } + KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); + + spin_lock_irqsave(&kbdev->mmu_mask_change, mmu_flags); + /* We're about to flush out the IRQs and their bottom half's */ + kbdev->irq_reset_flush = true; + + /* Disable IRQ to avoid IRQ handlers to kick in after releasing the + * spinlock; this also clears any outstanding interrupts */ + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + kbase_pm_disable_interrupts(kbdev); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + spin_unlock_irqrestore(&kbdev->mmu_mask_change, mmu_flags); + + /* Ensure that any IRQ handlers have finished + * Must be done without any locks IRQ handlers will take */ + kbase_synchronize_irqs(kbdev); + + /* Flush out any in-flight work items */ + kbase_flush_mmu_wqs(kbdev); + + /* The flush has completed so reset the active indicator */ + kbdev->irq_reset_flush = false; + mutex_lock(&kbdev->pm.lock); /* We hold the pm lock, so there ought to be a current policy */ KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); @@ -1224,22 +1280,11 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_RESETTING; kbdev->hwcnt.backend.triggered = 0; - mmu_irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL); - /* Disable IRQ to avoid IRQ handlers to kick in after releasing the - * spinlock; this also clears any outstanding interrupts */ - kbase_pm_disable_interrupts(kbdev); spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - /* Ensure that any IRQ handlers have finished - * Must be done without any locks IRQ handlers will take */ - kbase_synchronize_irqs(kbdev); - /* Reset the GPU */ kbase_pm_init_hw(kbdev, 0); - /* Re-enabled IRQs */ - kbase_pm_enable_interrupts_mmu_mask(kbdev, mmu_irq_mask); - /* Complete any jobs that were still on the GPU */ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); kbase_backend_reset(kbdev, &end_timestamp); @@ -1267,6 +1312,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) mutex_unlock(&as->transaction_mutex); } + kbase_pm_enable_interrupts(kbdev); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING); @@ -1284,10 +1331,21 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) /* Restore the HW counters setup */ if (restore_hwc) { struct kbase_context *kctx = kbdev->hwcnt.kctx; + u32 prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; + +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY + u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) + >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id); + + if (arch_v6) + prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; +#endif kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | - PRFCNT_CONFIG_MODE_OFF, kctx); + prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), hwcnt_setup.dump_buffer & 0xFFFFFFFF, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), @@ -1309,8 +1367,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) hwcnt_setup.tiler_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | - PRFCNT_CONFIG_MODE_MANUAL, kctx); + prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, + kctx); /* If HW has PRLAM-8186 we can now re-enable the tiler HW * counters dump */ @@ -1362,6 +1420,10 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) break; } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Resume the vinstr core */ + kbase_vinstr_hwc_resume(kbdev->vinstr_ctx); + /* Note: counter dumping may now resume */ mutex_lock(&kbdev->pm.lock); diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c index 8601718..af6cddc 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -739,6 +739,26 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: + + if (katom[idx]->will_fail_event_code) { + kbase_gpu_mark_atom_for_return(kbdev, + katom[idx]); + /* Set EVENT_DONE so this atom will be + completed, not unpulled. */ + katom[idx]->event_code = + BASE_JD_EVENT_DONE; + /* Only return if head atom or previous + * atom already removed - as atoms must + * be returned in order. */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + break; + } + + cores_ready = kbasep_js_job_check_ref_cores(kbdev, js, katom[idx]); @@ -770,6 +790,13 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE: + /* Only submit if head atom or previous atom + * already submitted */ + if (idx == 1 && + (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && + katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) + break; + if (kbase_gpu_in_secure_mode(kbdev) != kbase_jd_katom_is_secure(katom[idx])) { int err = 0; @@ -813,11 +840,6 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) kbase_jd_katom_is_secure(katom[idx]) == kbase_gpu_in_secure_mode(kbdev), "Secure mode of atom (%d) doesn't match secure mode of GPU (%d)", kbase_jd_katom_is_secure(katom[idx]), kbase_gpu_in_secure_mode(kbdev)); - KBASE_DEBUG_ASSERT_MSG( - (kbase_jd_katom_is_secure(katom[idx]) && js == 0) || - !kbase_jd_katom_is_secure(katom[idx]), - "Secure atom on JS%d not supported", js); - katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; @@ -1060,7 +1082,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string), ktime_to_ns(*end_timestamp), - (u32)next_katom->kctx, 0, + (u32)next_katom->kctx->id, 0, next_katom->work_id); kbdev->hwaccess.backend.slot_rb[js].last_context = next_katom->kctx; @@ -1100,24 +1122,10 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) js, 0); if (katom) { - enum kbase_atom_gpu_rb_state gpu_rb_state = - katom->gpu_rb_state; - kbase_gpu_release_atom(kbdev, katom, NULL); kbase_gpu_dequeue_atom(kbdev, js, NULL); - - if (gpu_rb_state == - KBASE_ATOM_GPU_RB_SUBMITTED) { - katom->event_code = - BASE_JD_EVENT_JOB_CANCELLED; - kbase_jm_complete(kbdev, katom, - end_timestamp); - } else { - katom->event_code = - BASE_JD_EVENT_STOPPED; - kbase_jm_return_atom_to_js(kbdev, - katom); - } + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + kbase_jm_complete(kbdev, katom, end_timestamp); } } } @@ -1167,6 +1175,12 @@ static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) return -1; } +static void kbase_job_evicted(struct kbase_jd_atom *katom) +{ + kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom, + katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT); +} + bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, struct kbase_context *kctx, int js, @@ -1265,6 +1279,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, != 0) { /* idx1 removed successfully, * will be handled in IRQ */ + kbase_job_evicted(katom_idx1); kbase_gpu_remove_atom(kbdev, katom_idx1, action, true); @@ -1336,6 +1351,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, JS_HEAD_NEXT_HI), NULL) != 0) { /* idx1 removed successfully, will be * handled in IRQ once idx0 completes */ + kbase_job_evicted(katom_idx1); kbase_gpu_remove_atom(kbdev, katom_idx1, action, false); diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c index 89b8085..6a49669 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c @@ -122,7 +122,8 @@ bool kbase_js_choose_affinity(u64 * const affinity, if (1 == kbdev->gpu_props.num_cores) { /* trivial case only one core, nothing to do */ - *affinity = core_availability_mask; + *affinity = core_availability_mask & + kbdev->pm.debug_core_mask[js]; } else { if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { @@ -131,7 +132,8 @@ bool kbase_js_choose_affinity(u64 * const affinity, * the first core group */ *affinity = kbdev->gpu_props.props.coherency_info.group[0].core_mask - & core_availability_mask; + & core_availability_mask & + kbdev->pm.debug_core_mask[js]; } else { /* js[1], js[2] use core groups 0, 1 for * dual-core-group systems */ @@ -141,7 +143,8 @@ bool kbase_js_choose_affinity(u64 * const affinity, num_core_groups); *affinity = kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask - & core_availability_mask; + & core_availability_mask & + kbdev->pm.debug_core_mask[js]; /* If the job is specifically targeting core * group 1 and the core availability policy is @@ -155,7 +158,8 @@ bool kbase_js_choose_affinity(u64 * const affinity, } else { /* All cores are available when no core split is * required */ - *affinity = core_availability_mask; + *affinity = core_availability_mask & + kbdev->pm.debug_core_mask[js]; } } diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c index 04bfa51..1e9a7e4 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c @@ -38,10 +38,15 @@ */ static inline bool timer_callback_should_run(struct kbase_device *kbdev) { + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; s8 nr_running_ctxs; lockdep_assert_held(&kbdev->js_data.runpool_mutex); + /* Timer must stop if we are suspending */ + if (backend->suspend_timer) + return false; + /* nr_contexts_pullable is updated with the runpool_mutex. However, the * locking in the caller gives us a barrier that ensures * nr_contexts_pullable is up-to-date for reading */ @@ -270,7 +275,6 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); backend->timer_running = false; spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - /* From now on, return value of timer_callback_should_run() will * also cause the timer to not requeue itself. Its return value * cannot change, because it depends on variables updated with @@ -284,7 +288,6 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); backend->timer_running = true; spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - hrtimer_start(&backend->scheduling_timer, HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), HRTIMER_MODE_REL); @@ -314,3 +317,21 @@ void kbase_backend_timer_term(struct kbase_device *kbdev) hrtimer_cancel(&backend->scheduling_timer); } +void kbase_backend_timer_suspend(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + backend->suspend_timer = true; + + kbase_backend_ctx_count_changed(kbdev); +} + +void kbase_backend_timer_resume(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + backend->suspend_timer = false; + + kbase_backend_ctx_count_changed(kbdev); +} + diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h index 3c101e4..3f53779 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h @@ -41,4 +41,29 @@ int kbase_backend_timer_init(struct kbase_device *kbdev); */ void kbase_backend_timer_term(struct kbase_device *kbdev); +/** + * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling + * timer + * @kbdev: Device pointer + * + * This function should be called on suspend, after the active count has reached + * zero. This is required as the timer may have been started on job submission + * to the job scheduler, but before jobs are submitted to the GPU. + * + * Caller must hold runpool_mutex. + */ +void kbase_backend_timer_suspend(struct kbase_device *kbdev); + +/** + * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS + * scheduling timer + * @kbdev: Device pointer + * + * This function should be called on resume. Note that is is not guaranteed to + * re-start the timer, only evalute whether it should be re-started. + * + * Caller must hold runpool_mutex. + */ +void kbase_backend_timer_resume(struct kbase_device *kbdev); + #endif /* _KBASE_JS_BACKEND_H_ */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c index 1b613a1..c6c7b89 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,9 +20,7 @@ #include #include #include -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif #include #include @@ -165,6 +163,15 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) KBASE_MMU_FAULT_TYPE_BUS : KBASE_MMU_FAULT_TYPE_PAGE; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + as->fault_extra_addr = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_HI), + kctx); + as->fault_extra_addr <<= 32; + as->fault_extra_addr |= kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_LO), + kctx); +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ if (kbase_as_has_bus_fault(as)) { /* Mark bus fault as handled. @@ -203,13 +210,36 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, struct kbase_context *kctx) { struct kbase_mmu_setup *current_setup = &as->current_setup; -#if defined(CONFIG_MALI_MIPE_ENABLED) || \ - (defined(MALI_INCLUDE_TMIX) && \ - defined(CONFIG_MALI_COH_PAGES) && \ - defined(CONFIG_MALI_GPU_MMU_AARCH64)) u32 transcfg = 0; -#endif +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + transcfg = current_setup->transcfg & 0xFFFFFFFFUL; + + /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */ + /* Clear PTW_MEMATTR bits */ + transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; + /* Enable correct PTW_MEMATTR bits */ + transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; + + if (kbdev->system_coherency == COHERENCY_ACE) { + /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */ + /* Clear PTW_SH bits */ + transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); + /* Enable correct PTW_SH bits */ + transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); + } + + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), + transcfg, kctx); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), + (current_setup->transcfg >> 32) & 0xFFFFFFFFUL, kctx); + +#else /* CONFIG_MALI_GPU_MMU_AARCH64 */ + + if (kbdev->system_coherency == COHERENCY_ACE) + current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; + +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), current_setup->transtab & 0xFFFFFFFFUL, kctx); @@ -221,12 +251,10 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), (current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx); -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_attrib_as_config(as, current_setup->transtab, current_setup->memattr, transcfg); -#endif write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx); } @@ -282,8 +310,18 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, struct kbase_context *kctx, enum kbase_mmu_fault_type type) { + unsigned long flags; u32 pf_bf_mask; + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + + /* + * A reset is in-flight and we're flushing the IRQ + bottom half + * so don't update anything as it could race with the reset code. + */ + if (kbdev->irq_reset_flush) + goto unlock; + /* Clear the page (and bus fault IRQ as well in case one occurred) */ pf_bf_mask = MMU_PAGE_FAULT(as->number); if (type == KBASE_MMU_FAULT_TYPE_BUS || @@ -291,6 +329,9 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, pf_bf_mask |= MMU_BUS_ERROR(as->number); kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx); + +unlock: + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); } void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, @@ -303,6 +344,13 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, * occurred) */ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + /* + * A reset is in-flight and we're flushing the IRQ + bottom half + * so don't update anything as it could race with the reset code. + */ + if (kbdev->irq_reset_flush) + goto unlock; + irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) | MMU_PAGE_FAULT(as->number); @@ -312,5 +360,6 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx); +unlock: spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); } diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c index 9ff7baa..5805efe 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,15 +29,13 @@ #include #include +#include #include void kbase_pm_register_access_enable(struct kbase_device *kbdev) { struct kbase_pm_callback_conf *callbacks; -#ifdef CONFIG_MALI_PLATFORM_DEVICETREE - pm_runtime_enable(kbdev->dev); -#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; if (callbacks) @@ -56,9 +54,6 @@ void kbase_pm_register_access_disable(struct kbase_device *kbdev) callbacks->power_off_callback(kbdev); kbdev->pm.backend.gpu_powered = false; -#ifdef CONFIG_MALI_PLATFORM_DEVICETREE - pm_runtime_disable(kbdev->dev); -#endif } int kbase_hwaccess_pm_init(struct kbase_device *kbdev) @@ -96,6 +91,8 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) callbacks->power_runtime_on_callback; kbdev->pm.backend.callback_power_runtime_off = callbacks->power_runtime_off_callback; + kbdev->pm.backend.callback_power_runtime_idle = + callbacks->power_runtime_idle_callback; } else { kbdev->pm.backend.callback_power_on = NULL; kbdev->pm.backend.callback_power_off = NULL; @@ -105,6 +102,7 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) kbdev->pm.callback_power_runtime_term = NULL; kbdev->pm.backend.callback_power_runtime_on = NULL; kbdev->pm.backend.callback_power_runtime_off = NULL; + kbdev->pm.backend.callback_power_runtime_idle = NULL; } /* Initialise the metrics subsystem */ @@ -227,7 +225,9 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, kbasep_pm_read_present_cores(kbdev); - kbdev->pm.debug_core_mask = + kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = + kbdev->pm.debug_core_mask[1] = + kbdev->pm.debug_core_mask[2] = kbdev->gpu_props.props.raw_props.shader_present; /* Pretend the GPU is active to prevent a power policy turning the GPU @@ -321,9 +321,15 @@ void kbase_pm_power_changed(struct kbase_device *kbdev) } } -void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask) +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, + u64 new_core_mask_js0, u64 new_core_mask_js1, + u64 new_core_mask_js2) { - kbdev->pm.debug_core_mask = new_core_mask; + kbdev->pm.debug_core_mask[0] = new_core_mask_js0; + kbdev->pm.debug_core_mask[1] = new_core_mask_js1; + kbdev->pm.debug_core_mask[2] = new_core_mask_js2; + kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | + new_core_mask_js2; kbase_pm_update_cores_state_nolock(kbdev); } @@ -358,6 +364,8 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) WARN_ON(!kbase_pm_do_poweroff(kbdev, false)); } + kbase_backend_timer_suspend(kbdev); + mutex_unlock(&kbdev->pm.lock); mutex_unlock(&js_devdata->runpool_mutex); } @@ -368,8 +376,12 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) mutex_lock(&js_devdata->runpool_mutex); mutex_lock(&kbdev->pm.lock); + kbdev->pm.suspending = false; kbase_pm_do_poweron(kbdev, true); + + kbase_backend_timer_resume(kbdev); + mutex_unlock(&kbdev->pm.lock); mutex_unlock(&js_devdata->runpool_mutex); } diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c index 60b4758..4eada33 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c @@ -137,14 +137,14 @@ u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) /* All cores must be enabled when instrumentation is in use */ if (kbdev->pm.backend.instr_enabled) return kbdev->gpu_props.props.raw_props.shader_present & - kbdev->pm.debug_core_mask; + kbdev->pm.debug_core_mask_all; if (kbdev->pm.backend.ca_current_policy == NULL) return kbdev->gpu_props.props.raw_props.shader_present & - kbdev->pm.debug_core_mask; + kbdev->pm.debug_core_mask_all; return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) & - kbdev->pm.debug_core_mask; + kbdev->pm.debug_core_mask_all; } KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h index 893c271..3eaf1a7 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -200,8 +200,13 @@ union kbase_pm_ca_policy_data { * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq * @shader_poweroff_pending: Bit mask of shaders to be powered off on next * timer callback - * @poweroff_timer_needed: true if the poweroff timer is currently running, + * @poweroff_timer_needed: true if the poweroff timer is currently required, * false otherwise + * @poweroff_timer_running: true if the poweroff timer is currently running, + * false otherwise + * power_change_lock should be held when accessing, + * unless there is no way the timer can be running (eg + * hrtimer_cancel() was called immediately before) * @callback_power_on: Callback when the GPU needs to be turned on. See * &struct kbase_pm_callback_conf * @callback_power_off: Callback when the GPU may be turned off. See @@ -214,9 +219,8 @@ union kbase_pm_ca_policy_data { * &struct kbase_pm_callback_conf * @callback_power_runtime_off: Callback when the GPU may be turned off. See * &struct kbase_pm_callback_conf - * @callback_cci_snoop_ctrl: Callback when the GPU L2 power may transition. - * If enable is set then snoops should be enabled - * otherwise snoops should be disabled + * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See + * &struct kbase_pm_callback_conf * * Note: * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the @@ -274,6 +278,7 @@ struct kbase_pm_backend_data { u64 shader_poweroff_pending; bool poweroff_timer_needed; + bool poweroff_timer_running; int (*callback_power_on)(struct kbase_device *kbdev); void (*callback_power_off)(struct kbase_device *kbdev); @@ -281,7 +286,7 @@ struct kbase_pm_backend_data { void (*callback_power_resume)(struct kbase_device *kbdev); int (*callback_power_runtime_on)(struct kbase_device *kbdev); void (*callback_power_runtime_off)(struct kbase_device *kbdev); - + int (*callback_power_runtime_idle)(struct kbase_device *kbdev); }; diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c index bcaf20c..7675c91 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,9 +27,7 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif #include #include #include @@ -99,6 +97,39 @@ static u32 core_type_to_reg(enum kbase_pm_core_type core_type, return (u32)core_type + (u32)action; } +#ifdef CONFIG_ARM64 +static void mali_cci_flush_l2(struct kbase_device *kbdev) +{ + const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED; + u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + u32 raw; + + /* + * Note that we don't take the cache flush mutex here since + * we expect to be the last user of the L2, all other L2 users + * would have dropped their references, to initiate L2 power + * down, L2 power down being the only valid place for this + * to be called from. + */ + + kbase_reg_write(kbdev, + GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES, + NULL); + + raw = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), + NULL); + + /* Wait for cache flush to complete before continuing, exit on + * gpu resets or loop expiry. */ + while (((raw & mask) == 0) && --loops) { + raw = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), + NULL); + } +} +#endif /** * kbase_pm_invoke - Invokes an action on a core set @@ -134,7 +165,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, kbase_trace_mali_pm_power_off(core_type, cores); } #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) + if (cores) { u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); @@ -144,7 +175,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, state &= ~cores; kbase_tlstream_aux_pm_state(core_type, state); } -#endif + /* Tracing */ if (cores) { if (action == ACTION_PWRON) @@ -177,6 +208,8 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, case KBASE_PM_CORE_L2: KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL, 0u, lo); + /* disable snoops before L2 is turned off */ + kbase_pm_cache_snoop_disable(kbdev); break; default: break; @@ -404,6 +437,12 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, /* All are ready, none will be turned off, and none are * transitioning */ kbdev->pm.backend.l2_powered = 1; + /* + * Ensure snoops are enabled after L2 is powered up, + * note that kbase keeps track of the snoop state, so + * safe to repeatedly call. + */ + kbase_pm_cache_snoop_enable(kbdev); if (kbdev->l2_users_count > 0) { /* Notify any registered l2 cache users * (optimized out when no users waiting) */ @@ -665,7 +704,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER)); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_pm_state( KBASE_PM_CORE_L2, kbase_pm_get_ready_cores( @@ -679,7 +718,6 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) kbase_pm_get_ready_cores( kbdev, KBASE_PM_CORE_TILER)); -#endif KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL, kbdev->pm.backend.gpu_in_desired_state, @@ -840,30 +878,6 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); -void kbase_pm_enable_interrupts_mmu_mask(struct kbase_device *kbdev, u32 mask) -{ - unsigned long flags; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - /* - * Clear all interrupts, - * and unmask them all. - */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, - NULL); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL, - NULL); - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, - NULL); - kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL); - - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), mask, NULL); -} - void kbase_pm_disable_interrupts(struct kbase_device *kbdev) { unsigned long flags; @@ -921,6 +935,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) if (is_resume && kbdev->pm.backend.callback_power_resume) { kbdev->pm.backend.callback_power_resume(kbdev); + return; } else if (kbdev->pm.backend.callback_power_on) { kbdev->pm.backend.callback_power_on(kbdev); /* If your platform properly keeps the GPU state you may use the @@ -999,6 +1014,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend) return false; } + kbase_pm_cache_snoop_disable(kbdev); /* The GPU power may be turned off from this point */ kbdev->pm.backend.gpu_powered = false; @@ -1081,14 +1097,23 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327)) kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY /* Enable alternative hardware counter selection if configured. */ - if (DEFAULT_ALTERNATIVE_HWC) + if (!GPU_ID_IS_NEW_FORMAT(prod_id)) kbdev->hw_quirks_sc |= SC_ALT_COUNTERS; +#endif /* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797)) kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS; + if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) { + if (prod_id < 0x760 || prod_id == 0x6956) /* T60x, T62x, T72x */ + kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; + else if (prod_id >= 0x760 && prod_id <= 0x880) /* T76x, T8xx */ + kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; + } + kbdev->hw_quirks_tiler = kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG), NULL); @@ -1110,6 +1135,12 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT; + if (kbdev->system_coherency == COHERENCY_ACE) { + /* Allow memory configuration disparity to be ignored, we + * optimize the use of shared memory and thus we expect + * some disparity in the memory configuration */ + kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY; + } /* Only for T86x/T88x-based products after r2p0 */ if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) { @@ -1174,6 +1205,33 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) } +void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) +{ + if ((kbdev->system_coherency == COHERENCY_ACE) && + !kbdev->cci_snoop_enabled) { +#ifdef CONFIG_ARM64 + if (kbdev->snoop_enable_smc != 0) + kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); +#endif /* CONFIG_ARM64 */ + dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n"); + kbdev->cci_snoop_enabled = true; + } +} + +void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) +{ + if ((kbdev->system_coherency == COHERENCY_ACE) && + kbdev->cci_snoop_enabled) { +#ifdef CONFIG_ARM64 + if (kbdev->snoop_disable_smc != 0) { + mali_cci_flush_l2(kbdev); + kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0); + } +#endif /* CONFIG_ARM64 */ + dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n"); + kbdev->cci_snoop_enabled = false; + } +} int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) { @@ -1202,6 +1260,8 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) /* Ensure interrupts are off to begin with, this also clears any * outstanding interrupts */ kbase_pm_disable_interrupts(kbdev); + /* Ensure cache snoops are disabled before reset. */ + kbase_pm_cache_snoop_disable(kbdev); /* Prepare for the soft-reset */ kbdev->pm.backend.reset_done = false; @@ -1329,10 +1389,10 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) RESET_COMPLETED) { /* The interrupt is set in the RAWSTAT; this suggests that the * interrupts are not getting to the CPU */ - dev_warn(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); + dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); /* If interrupts aren't working we can't continue. */ destroy_hrtimer_on_stack(&rtdata.timer); - goto out; + return -EINVAL; } /* The GPU doesn't seem to be responding to the reset so try a hard @@ -1374,6 +1434,15 @@ out: kbase_pm_hw_issues_apply(kbdev); + kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { + u32 gpu_status = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_STATUS), NULL); + + kbdev->secure_mode = (gpu_status & + GPU_STATUS_PROTECTED_MODE_ACTIVE) != 0; + } /* If cycle counter was in use re-enable it, enable_irqs will only be * false when called from kbase_pm_powerup */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h index bcca37d..aa51b8c 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -154,17 +154,6 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend); */ void kbase_pm_enable_interrupts(struct kbase_device *kbdev); -/** - * kbase_pm_enable_interrupts_mmu_mask - Enable interrupts on the device, using - * the provided mask to set MMU_IRQ_MASK. - * - * Interrupts are also enabled after a call to kbase_pm_clock_on(). - * - * @kbdev: The kbase device structure for the device (must be a valid pointer) - * @mask: The mask to use for MMU_IRQ_MASK - */ -void kbase_pm_enable_interrupts_mmu_mask(struct kbase_device *kbdev, u32 mask); - /** * kbase_pm_disable_interrupts - Disable interrupts on the device. * @@ -512,5 +501,23 @@ void kbase_pm_power_changed(struct kbase_device *kbdev); void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *now); +/** + * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU + * If the GPU does not have coherency this is a no-op + * @kbdev: Device pointer + * + * This function should be called after L2 power up. + */ + +void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU + * If the GPU does not have coherency this is a no-op + * @kbdev: Device pointer + * + * This function should be called before L2 power off. + */ +void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev); #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c index e3c4829..343436f 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c @@ -183,10 +183,13 @@ static enum hrtimer_restart kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) { struct kbase_device *kbdev; + unsigned long flags; kbdev = container_of(timer, struct kbase_device, pm.backend.gpu_poweroff_timer); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + /* It is safe for this call to do nothing if the work item is already * queued. The worker function will read the must up-to-date state of * kbdev->pm.backend.gpu_poweroff_pending under lock. @@ -200,30 +203,27 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) &kbdev->pm.backend.gpu_poweroff_work); if (kbdev->pm.backend.shader_poweroff_pending) { - unsigned long flags; - - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); - - if (kbdev->pm.backend.shader_poweroff_pending) { - kbdev->pm.backend.shader_poweroff_pending_time--; + kbdev->pm.backend.shader_poweroff_pending_time--; - KBASE_DEBUG_ASSERT( + KBASE_DEBUG_ASSERT( kbdev->pm.backend.shader_poweroff_pending_time >= 0); - if (!kbdev->pm.backend.shader_poweroff_pending_time) - kbasep_pm_do_poweroff_cores(kbdev); - } - - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + if (!kbdev->pm.backend.shader_poweroff_pending_time) + kbasep_pm_do_poweroff_cores(kbdev); } if (kbdev->pm.backend.poweroff_timer_needed) { + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time); return HRTIMER_RESTART; } + kbdev->pm.backend.poweroff_timer_running = false; + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + return HRTIMER_NORESTART; } @@ -263,10 +263,13 @@ static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data) if (do_poweroff) { kbdev->pm.backend.poweroff_timer_needed = false; hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); + kbdev->pm.backend.poweroff_timer_running = false; + /* Power off the GPU */ if (!kbase_pm_do_poweroff(kbdev, false)) { /* GPU can not be powered off at present */ kbdev->pm.backend.poweroff_timer_needed = true; + kbdev->pm.backend.poweroff_timer_running = true; hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer, kbdev->pm.gpu_poweroff_time, HRTIMER_MODE_REL); @@ -316,13 +319,13 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) kbdev->pm.backend.poweroff_timer_needed = false; hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbdev->pm.backend.poweroff_timer_running = false; /* If wq is already running but is held off by pm.lock, make sure it has * no effect */ kbdev->pm.backend.gpu_poweroff_pending = 0; - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); - kbdev->pm.backend.shader_poweroff_pending = 0; kbdev->pm.backend.shader_poweroff_pending_time = 0; @@ -331,87 +334,106 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) void kbase_pm_update_active(struct kbase_device *kbdev) { + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; unsigned long flags; bool active; - lockdep_assert_held(&kbdev->pm.lock); + lockdep_assert_held(&pm->lock); /* pm_current_policy will never be NULL while pm.lock is held */ - KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); + KBASE_DEBUG_ASSERT(backend->pm_current_policy); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + spin_lock_irqsave(&pm->power_change_lock, flags); - active = kbdev->pm.backend.pm_current_policy->get_core_active(kbdev); + active = backend->pm_current_policy->get_core_active(kbdev); if (active) { - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - - if (kbdev->pm.backend.gpu_poweroff_pending) { + if (backend->gpu_poweroff_pending) { /* Cancel any pending power off request */ - kbdev->pm.backend.gpu_poweroff_pending = 0; + backend->gpu_poweroff_pending = 0; /* If a request was pending then the GPU was still * powered, so no need to continue */ - if (!kbdev->poweroff_pending) + if (!kbdev->poweroff_pending) { + spin_unlock_irqrestore(&pm->power_change_lock, + flags); return; + } } - if (!kbdev->pm.backend.poweroff_timer_needed && - !kbdev->pm.backend.gpu_powered && - (kbdev->pm.poweroff_gpu_ticks || - kbdev->pm.poweroff_shader_ticks)) { - kbdev->pm.backend.poweroff_timer_needed = true; - hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer, - kbdev->pm.gpu_poweroff_time, + if (!backend->poweroff_timer_running && !backend->gpu_powered && + (pm->poweroff_gpu_ticks || + pm->poweroff_shader_ticks)) { + backend->poweroff_timer_needed = true; + backend->poweroff_timer_running = true; + hrtimer_start(&backend->gpu_poweroff_timer, + pm->gpu_poweroff_time, HRTIMER_MODE_REL); } + spin_unlock_irqrestore(&pm->power_change_lock, flags); + /* Power on the GPU and any cores requested by the policy */ kbase_pm_do_poweron(kbdev, false); } else { /* It is an error for the power policy to power off the GPU * when there are contexts active */ - KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); + KBASE_DEBUG_ASSERT(pm->active_count == 0); - if (kbdev->pm.backend.shader_poweroff_pending) { - kbdev->pm.backend.shader_poweroff_pending = 0; - kbdev->pm.backend.shader_poweroff_pending_time = 0; + if (backend->shader_poweroff_pending) { + backend->shader_poweroff_pending = 0; + backend->shader_poweroff_pending_time = 0; } - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - - /* Request power off */ - if (kbdev->pm.backend.gpu_powered) { - if (kbdev->pm.poweroff_gpu_ticks) { - kbdev->pm.backend.gpu_poweroff_pending = - kbdev->pm.poweroff_gpu_ticks; - if (!kbdev->pm.backend.poweroff_timer_needed) { + if (pm->backend.gpu_powered) { + if (pm->poweroff_gpu_ticks) { + backend->gpu_poweroff_pending = + pm->poweroff_gpu_ticks; + backend->poweroff_timer_needed = true; + if (!backend->poweroff_timer_running) { /* Start timer if not running (eg if * power policy has been changed from * always_on to something else). This * will ensure the GPU is actually * powered off */ - kbdev->pm.backend.poweroff_timer_needed + backend->poweroff_timer_running = true; hrtimer_start( - &kbdev->pm.backend.gpu_poweroff_timer, - kbdev->pm.gpu_poweroff_time, + &backend->gpu_poweroff_timer, + pm->gpu_poweroff_time, HRTIMER_MODE_REL); } + spin_unlock_irqrestore(&pm->power_change_lock, + flags); } else { + spin_unlock_irqrestore(&pm->power_change_lock, + flags); + /* Power off the GPU immediately */ if (!kbase_pm_do_poweroff(kbdev, false)) { /* GPU can not be powered off at present */ - kbdev->pm.backend.poweroff_timer_needed - = true; - hrtimer_start( - &kbdev->pm.backend.gpu_poweroff_timer, - kbdev->pm.gpu_poweroff_time, - HRTIMER_MODE_REL); + spin_lock_irqsave( + &pm->power_change_lock, + flags); + backend->poweroff_timer_needed = true; + if (!backend->poweroff_timer_running) { + backend->poweroff_timer_running + = true; + hrtimer_start( + &backend->gpu_poweroff_timer, + pm->gpu_poweroff_time, + HRTIMER_MODE_REL); + } + spin_unlock_irqrestore( + &pm->power_change_lock, + flags); } } + } else { + spin_unlock_irqrestore(&pm->power_change_lock, flags); } } } @@ -478,7 +500,6 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) * Only reset poweroff_timer_needed if we're not in the middle * of the power off callback */ kbdev->pm.backend.poweroff_timer_needed = false; - hrtimer_try_to_cancel(&kbdev->pm.backend.gpu_poweroff_timer); } /* Ensure timer does not power off wanted cores and make sure to power diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c new file mode 100644 index 0000000..cd4f0a2 --- /dev/null +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c @@ -0,0 +1,160 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include +#include +#include +#include +#include +#include + +/* + * This model is primarily designed for the Juno platform. It may not be + * suitable for other platforms. + */ + +#define FALLBACK_STATIC_TEMPERATURE 55000 + +static u32 dynamic_coefficient; +static u32 static_coefficient; +static s32 ts[4]; +static struct thermal_zone_device *gpu_tz; + +static unsigned long model_static_power(unsigned long voltage) +{ + unsigned long temperature, temp; + unsigned long temp_squared, temp_cubed, temp_scaling_factor; + const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10; + + if (gpu_tz) { + int ret; + + ret = gpu_tz->ops->get_temp(gpu_tz, &temperature); + if (ret) { + pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n", + ret); + temperature = FALLBACK_STATIC_TEMPERATURE; + } + } else { + temperature = FALLBACK_STATIC_TEMPERATURE; + } + + /* Calculate the temperature scaling factor. To be applied to the + * voltage scaled power. + */ + temp = temperature / 1000; + temp_squared = temp * temp; + temp_cubed = temp_squared * temp; + temp_scaling_factor = + (ts[3] * temp_cubed) + + (ts[2] * temp_squared) + + (ts[1] * temp) + + ts[0]; + + return (((static_coefficient * voltage_cubed) >> 20) + * temp_scaling_factor) + / 1000000; +} + +static unsigned long model_dynamic_power(unsigned long freq, + unsigned long voltage) +{ + /* The inputs: freq (f) is in Hz, and voltage (v) in mV. + * The coefficient (c) is in mW/(MHz mV mV). + * + * This function calculates the dynamic power after this formula: + * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz) + */ + const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */ + const unsigned long f_mhz = freq / 1000000; /* MHz */ + + return (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */ +} + +struct devfreq_cooling_ops power_model_simple_ops = { + .get_static_power = model_static_power, + .get_dynamic_power = model_dynamic_power, +}; + +int kbase_power_model_simple_init(struct kbase_device *kbdev) +{ + struct device_node *power_model_node; + const char *tz_name; + u32 static_power, dynamic_power; + u32 voltage, voltage_squared, voltage_cubed, frequency; + + power_model_node = of_get_child_by_name(kbdev->dev->of_node, + "power_model"); + if (!power_model_node) { + dev_err(kbdev->dev, "could not find power_model node\n"); + return -ENODEV; + } + if (!of_device_is_compatible(power_model_node, + "arm,mali-simple-power-model")) { + dev_err(kbdev->dev, "power_model incompatible with simple power model\n"); + return -ENODEV; + } + + if (of_property_read_string(power_model_node, "thermal-zone", + &tz_name)) { + dev_err(kbdev->dev, "ts in power_model not available\n"); + return -EINVAL; + } + + gpu_tz = thermal_zone_get_zone_by_name(tz_name); + if (IS_ERR(gpu_tz)) { + pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n", + PTR_ERR(gpu_tz)); + gpu_tz = NULL; + + return -EPROBE_DEFER; + } + + if (of_property_read_u32(power_model_node, "static-power", + &static_power)) { + dev_err(kbdev->dev, "static-power in power_model not available\n"); + return -EINVAL; + } + if (of_property_read_u32(power_model_node, "dynamic-power", + &dynamic_power)) { + dev_err(kbdev->dev, "dynamic-power in power_model not available\n"); + return -EINVAL; + } + if (of_property_read_u32(power_model_node, "voltage", + &voltage)) { + dev_err(kbdev->dev, "voltage in power_model not available\n"); + return -EINVAL; + } + if (of_property_read_u32(power_model_node, "frequency", + &frequency)) { + dev_err(kbdev->dev, "frequency in power_model not available\n"); + return -EINVAL; + } + voltage_squared = (voltage * voltage) / 1000; + voltage_cubed = voltage * voltage * voltage; + static_coefficient = (static_power << 20) / (voltage_cubed >> 10); + dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared) + * 1000) / frequency; + + if (of_property_read_u32_array(power_model_node, "ts", ts, 4)) { + dev_err(kbdev->dev, "ts in power_model not available\n"); + return -EINVAL; + } + + return 0; +} + diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h new file mode 100644 index 0000000..d20de1e --- /dev/null +++ b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h @@ -0,0 +1,43 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _BASE_POWER_MODEL_SIMPLE_H_ +#define _BASE_POWER_MODEL_SIMPLE_H_ + +/** + * kbase_power_model_simple_init - Initialise the simple power model + * @kbdev: Device pointer + * + * The simple power model estimates power based on current voltage, temperature, + * and coefficients read from device tree. It does not take utilization into + * account. + * + * The power model requires coefficients from the power_model node in device + * tree. The absence of this node will prevent the model from functioning, but + * should not prevent the rest of the driver from running. + * + * Return: 0 on success + * -ENOSYS if the power_model node is not present in device tree + * -EPROBE_DEFER if the thermal zone specified in device tree is not + * currently available + * Any other negative value on failure + */ +int kbase_power_model_simple_init(struct kbase_device *kbdev); + +extern struct devfreq_cooling_ops power_model_simple_ops; + +#endif /* _BASE_POWER_MODEL_SIMPLE_H_ */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h index 5a15230..29c78c2 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,9 +45,10 @@ enum base_hw_feature { BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, BASE_HW_FEATURE_BRNDOUT_KILL, BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_V4, + BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_MODE, + BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_END }; @@ -161,5 +162,28 @@ static const enum base_hw_feature base_hw_features_t82x[] = { BASE_HW_FEATURE_END }; +static const enum base_hw_feature base_hw_features_tMIx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_END +}; #endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h index 9fae0f6..e111b07 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -57,7 +57,6 @@ enum base_hw_issue { BASE_HW_ISSUE_8986, BASE_HW_ISSUE_8987, BASE_HW_ISSUE_9010, - BASE_HW_ISSUE_9275, BASE_HW_ISSUE_9418, BASE_HW_ISSUE_9423, BASE_HW_ISSUE_9435, @@ -90,6 +89,7 @@ enum base_hw_issue { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -100,7 +100,15 @@ enum base_hw_issue { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -140,7 +148,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { BASE_HW_ISSUE_8986, BASE_HW_ISSUE_8987, BASE_HW_ISSUE_9010, - BASE_HW_ISSUE_9275, BASE_HW_ISSUE_9418, BASE_HW_ISSUE_9423, BASE_HW_ISSUE_9435, @@ -165,7 +172,10 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11035, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_3964, + GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -180,7 +190,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { BASE_HW_ISSUE_8778, BASE_HW_ISSUE_8975, BASE_HW_ISSUE_9010, - BASE_HW_ISSUE_9275, BASE_HW_ISSUE_9418, BASE_HW_ISSUE_9423, BASE_HW_ISSUE_9435, @@ -201,7 +210,9 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11035, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -216,7 +227,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { BASE_HW_ISSUE_8778, BASE_HW_ISSUE_8975, BASE_HW_ISSUE_9010, - BASE_HW_ISSUE_9275, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_9510, BASE_HW_ISSUE_10410, @@ -234,8 +244,10 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11035, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -264,6 +276,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -284,8 +297,10 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -302,6 +317,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = { BASE_HW_ISSUE_10959, BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -315,6 +331,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -325,7 +342,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -337,6 +356,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -347,7 +367,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -357,6 +379,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -367,7 +390,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -379,6 +404,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -389,7 +415,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -399,6 +427,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -409,7 +438,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -419,6 +450,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -426,7 +458,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -440,8 +474,10 @@ static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -455,8 +491,10 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -470,36 +508,43 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; static const enum base_hw_issue base_hw_issues_model_t72x[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_6402, - BASE_HW_ISSUE_9275, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, BASE_HW_ISSUE_10797, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, + GPUCORE_1619, BASE_HW_ISSUE_END }; static const enum base_hw_issue base_hw_issues_model_t76x[] = { BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9275, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_TMIX_7891, + GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -507,15 +552,17 @@ static const enum base_hw_issue base_hw_issues_model_t60x[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_6402, BASE_HW_ISSUE_8778, - BASE_HW_ISSUE_9275, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10472, BASE_HW_ISSUE_10931, BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, + GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -529,8 +576,11 @@ static const enum base_hw_issue base_hw_issues_model_t62x[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3964, + GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -539,6 +589,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -546,7 +597,9 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -555,13 +608,16 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -570,12 +626,14 @@ static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -584,23 +642,28 @@ static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; static const enum base_hw_issue base_hw_issues_model_tFRx[] = { BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9275, BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_TMIX_7891, + GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -609,13 +672,16 @@ static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -624,12 +690,14 @@ static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -638,23 +706,27 @@ static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; static const enum base_hw_issue base_hw_issues_model_t86x[] = { BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9275, BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_TMIX_7891, + GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -663,6 +735,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -670,6 +743,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -678,24 +752,29 @@ static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; static const enum base_hw_issue base_hw_issues_model_t83x[] = { BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9275, BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_TMIX_7891, + GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -704,6 +783,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -711,6 +791,8 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3964, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -719,6 +801,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -726,6 +809,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -734,27 +818,70 @@ static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = { BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; static const enum base_hw_issue base_hw_issues_model_t82x[] = { BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9275, BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_TMIX_7891, + GPUCORE_1619, BASE_HW_ISSUE_END }; +static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_END +}; +static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tMIx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + GPUCORE_1619, + BASE_HW_ISSUE_END +}; #endif /* _BASE_HWCONFIG_ISSUES_H_ */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h index 5662130..c59e8b2 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -42,10 +42,16 @@ /* Support UK9 IOCTLS */ #define BASE_LEGACY_UK9_SUPPORT 1 -typedef u64 base_mem_handle; +typedef struct base_mem_handle { + struct { + u64 handle; + } basep; +} base_mem_handle; #include "mali_base_mem_priv.h" #include "mali_kbase_profiling_gator_api.h" +#include "mali_midg_coherency.h" +#include "mali_kbase_gpu_id.h" /* * Dependency stuff, keep it private for now. May want to expose it if @@ -60,6 +66,10 @@ typedef u64 base_mem_handle; #define BASEP_JD_SEM_MASK_IN_WORD(x) (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1))) #define BASEP_JD_SEM_ARRAY_SIZE BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT) +/* Set/reset values for a software event */ +#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) +#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) + #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3 #define BASE_MAX_COHERENT_GROUPS 16 @@ -160,7 +170,9 @@ enum { /* IN */ BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence Outer shareable, required. */ - BASE_MEM_SECURE = (1U << 16) /**< Secure memory */ + BASE_MEM_SECURE = (1U << 16), /**< Secure memory */ + BASE_MEM_DONT_NEED = (1U << 17), /**< Not needed physical + memory */ }; @@ -169,7 +181,7 @@ enum { * * Must be kept in sync with the ::base_mem_alloc_flags flags */ -#define BASE_MEM_FLAGS_NR_BITS 17 +#define BASE_MEM_FLAGS_NR_BITS 18 /** * A mask for all output bits, excluding IN/OUT bits. @@ -182,9 +194,22 @@ enum { #define BASE_MEM_FLAGS_INPUT_MASK \ (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) +/** + * A mask for all the flags which are modifiable via the base_mem_set_flags + * interface. + */ +#define BASE_MEM_FLAGS_MODIFIABLE \ + (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ + BASE_MEM_COHERENT_LOCAL) /** - * @brief Memory types supported by @a base_mem_import + * enum base_mem_import_type - Memory types supported by @a base_mem_import + * + * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type + * @BASE_MEM_IMPORT_TYPE_UMP: UMP import. Handle type is ump_secure_id. + * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int) + * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a + * base_mem_import_user_buffer * * Each type defines what the supported handle type is. * @@ -196,21 +221,52 @@ enum { */ typedef enum base_mem_import_type { BASE_MEM_IMPORT_TYPE_INVALID = 0, - /** UMP import. Handle type is ump_secure_id. */ BASE_MEM_IMPORT_TYPE_UMP = 1, - /** UMM import. Handle type is a file descriptor (int) */ - BASE_MEM_IMPORT_TYPE_UMM = 2 + BASE_MEM_IMPORT_TYPE_UMM = 2, + BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3 } base_mem_import_type; /** - * @brief Invalid memory handle type. - * Return value from functions returning @a base_mem_handle on error. + * struct base_mem_import_user_buffer - Handle of an imported user buffer + * + * @ptr: kbase_pointer to imported user buffer + * @length: length of imported user buffer in bytes + * + * This structure is used to represent a handle of an imported user buffer. + */ + +struct base_mem_import_user_buffer { + kbase_pointer ptr; + u64 length; +}; + +/** + * @brief Invalid memory handle. + * + * Return value from functions returning @ref base_mem_handle on error. + * + * @warning @ref base_mem_handle_new_invalid must be used instead of this macro + * in C++ code or other situations where compound literals cannot be used. */ -#define BASE_MEM_INVALID_HANDLE (0ull << 12) +#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} }) + +/** + * @brief Special write-alloc memory handle. + * + * A special handle is used to represent a region where a special page is mapped + * with a write-alloc cache setup, typically used when the write result of the + * GPU isn't needed, but the GPU must write anyway. + * + * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro + * in C++ code or other situations where compound literals cannot be used. + */ +#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} }) + +#define BASEP_MEM_INVALID_HANDLE (0ull << 12) #define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) #define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) #define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) -#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) +#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) /* reserved handles ..-64< for future special handles */ #define BASE_MEM_COOKIE_BASE (64ul << 12) #define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ @@ -219,6 +275,7 @@ typedef enum base_mem_import_type { /* Mask to detect 4GB boundary alignment */ #define BASE_MEM_MASK_4GB 0xfffff000UL + /* Bit mask of cookies used for for memory allocation setup */ #define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ @@ -334,6 +391,28 @@ struct base_mem_aliasing_info { u64 length; }; +/** + * struct base_jit_alloc_info - Structure which describes a JIT allocation + * request. + * @gpu_alloc_addr: The GPU virtual address to write the JIT + * allocated GPU virtual address to. + * @va_pages: The minimum number of virtual pages required. + * @commit_pages: The minimum number of physical pages which + * should back the allocation. + * @extent: Granularity of physical pages to grow the + * allocation by during a fault. + * @id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + * Zero is not a valid value. + */ +struct base_jit_alloc_info { + u64 gpu_alloc_addr; + u64 va_pages; + u64 commit_pages; + u64 extent; + u8 id; +}; + /** * @brief Job dependency type. * @@ -386,6 +465,14 @@ typedef u16 base_jd_core_req; /* Requires fragment job with AFBC encoding */ #define BASE_JD_REQ_FS_AFBC (1U << 13) +/** + * SW-only requirement: coalesce completion events. + * If this bit is set then completion of this atom will not cause an event to + * be sent to userspace, whether successful or not; completion events will be + * deferred until an atom completes which does not have this bit set. + */ +#define BASE_JD_REQ_EVENT_COALESCE (1U << 5) + /** * SW Only requirement: the job chain requires a coherent core group. We don't * mind which coherent core group is used. @@ -455,6 +542,66 @@ typedef u16 base_jd_core_req; * - Priority is inherited from the replay job. */ #define BASE_JD_REQ_SOFT_REPLAY (BASE_JD_REQ_SOFT_JOB | 0x4) +/** + * SW only requirement: event wait/trigger job. + * + * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set. + * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the + * other waiting jobs. It completes immediately. + * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it + * possible for other jobs to wait upon. It completes immediately. + */ +#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) +#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) +#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) + +#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) + +/** + * SW only requirement: Just In Time allocation + * + * This job requests a JIT allocation based on the request in the + * @base_jit_alloc_info structure which is passed via the jc element of + * the atom. + * + * It should be noted that the id entry in @base_jit_alloc_info must not + * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE. + * + * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE + * soft job to free the JIT allocation is still made. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) +/** + * SW only requirement: Just In Time free + * + * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC + * to be freed. The ID of the JIT allocation is passed via the jc element of + * the atom. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) + +/** + * SW only requirement: Map external resource + * + * This job requests external resource(s) are mapped once the dependencies + * of the job have been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * @base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) +/** + * SW only requirement: Unmap external resource + * + * This job requests external resource(s) are unmapped once the dependencies + * of the job has been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * @base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) /** * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) @@ -463,9 +610,6 @@ typedef u16 base_jd_core_req; * * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. - * - * @note This is a more flexible variant of the @ref BASE_CONTEXT_HINT_ONLY_COMPUTE flag, - * allowing specific jobs to be marked as 'Only Compute' instead of the entire context */ #define BASE_JD_REQ_ONLY_COMPUTE (1U << 10) @@ -495,26 +639,21 @@ typedef u16 base_jd_core_req; #define BASEP_JD_REQ_EVENT_NEVER (1U << 14) /** -* These requirement bits are currently unused in base_jd_core_req (currently a u16) -*/ - -#define BASEP_JD_REQ_RESERVED_BIT5 (1U << 5) -#define BASEP_JD_REQ_RESERVED_BIT15 (1U << 15) - -/** -* Mask of all the currently unused requirement bits in base_jd_core_req. -*/ + * These requirement bits are currently unused in base_jd_core_req (currently a u16) + */ -#define BASEP_JD_REQ_RESERVED (BASEP_JD_REQ_RESERVED_BIT5 | \ - BASEP_JD_REQ_RESERVED_BIT15) +#define BASEP_JD_REQ_RESERVED (1U << 15) /** * Mask of all bits in base_jd_core_req that control the type of the atom. * * This allows dependency only atoms to have flags set */ -#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED | BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\ - BASE_JD_REQ_EXTERNAL_RESOURCES | BASEP_JD_REQ_EVENT_NEVER)) +#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED |\ + BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\ + BASE_JD_REQ_EXTERNAL_RESOURCES |\ + BASEP_JD_REQ_EVENT_NEVER |\ + BASE_JD_REQ_EVENT_COALESCE)) /** * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which @@ -656,6 +795,30 @@ typedef struct base_external_resource { u64 ext_resource; } base_external_resource; + +/** + * The maximum number of external resources which can be mapped/unmapped + * in a single request. + */ +#define BASE_EXT_RES_COUNT_MAX 10 + +/** + * struct base_external_resource_list - Structure which describes a list of + * external resources. + * @count: The number of resources. + * @ext_res: Array of external resources which is + * sized at allocation time. + */ +struct base_external_resource_list { + u64 count; + struct base_external_resource ext_res[1]; +}; + +struct base_jd_debug_copy_buffer { + u64 address; + u64 size; +}; + /** * @brief Setter for a dependency structure * @@ -754,11 +917,12 @@ static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, str /** * @brief External resource info initialization. * - * Sets up a external resource object to reference + * Sets up an external resource object to reference * a memory allocation and the type of access requested. * * @param[in] res The resource object to initialize - * @param handle The handle to the imported memory object + * @param handle The handle to the imported memory object, must be + * obtained by calling @ref base_mem_as_import_handle(). * @param access The type of access requested */ static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access) @@ -1365,8 +1529,7 @@ struct gpu_raw_gpu_props { u64 shader_present; u64 tiler_present; u64 l2_present; - u32 coherency_enabled; - u32 unused_1; /* keep for backward compatibility */ + u64 unused_1; /* keep for backward compatibility */ u32 l2_features; u32 suspend_size; /* API 8.2+ */ @@ -1387,7 +1550,11 @@ struct gpu_raw_gpu_props { u32 thread_max_barrier_size; u32 thread_features; - u32 coherency_features; + /* + * Note: This is the _selected_ coherency mode rather than the + * available modes as exposed in the coherency_features register. + */ + u32 coherency_mode; }; /** @@ -1441,28 +1608,7 @@ enum base_context_create_flags { /** Base context is a 'System Monitor' context for Hardware counters. * * One important side effect of this is that job submission is disabled. */ - BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1), - - /** Base context flag indicating a 'hint' that this context uses Compute - * Jobs only. - * - * Specifially, this means that it only sends atoms that do not - * contain the following @ref base_jd_core_req : - * - BASE_JD_REQ_FS - * - BASE_JD_REQ_T - * - * Violation of these requirements will cause the Job-Chains to be rejected. - * - * In addition, it is inadvisable for the atom's Job-Chains to contain Jobs - * of the following @ref gpu_job_type (whilst it may work now, it may not - * work in future) : - * - @ref GPU_JOB_VERTEX - * - @ref GPU_JOB_GEOMETRY - * - * @note An alternative to using this is to specify the BASE_JD_REQ_ONLY_COMPUTE - * requirement in atoms. - */ - BASE_CONTEXT_HINT_ONLY_COMPUTE = (1u << 2) + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1) }; /** @@ -1470,15 +1616,13 @@ enum base_context_create_flags { */ #define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \ (((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \ - ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \ - ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE)) + ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)) /** * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel */ #define BASE_CONTEXT_CREATE_KERNEL_FLAGS \ - (((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \ - ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE)) + ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) /** * Private flags used on the base context diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase.h index 8840d60..3764061 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -51,7 +51,6 @@ #include "mali_kbase_trace_timeline.h" #include "mali_kbase_js.h" #include "mali_kbase_mem.h" -#include "mali_kbase_security.h" #include "mali_kbase_utility.h" #include "mali_kbase_gpu_memory_debugfs.h" #include "mali_kbase_mem_profile_debugfs.h" @@ -82,7 +81,7 @@ struct kbase_device *kbase_device_alloc(void); */ /* -* API to acquire device list semaphone and return pointer +* API to acquire device list semaphore and return pointer * to the device list head */ const struct list_head *kbase_dev_list_get(void); @@ -140,7 +139,6 @@ void kbase_jd_done_worker(struct work_struct *data); void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, kbasep_js_atom_done_code done_code); void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom); -void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom); void kbase_jd_zap_context(struct kbase_context *kctx); bool jd_done_nolock(struct kbase_jd_atom *katom, struct list_head *completed_jobs_ctx); @@ -192,12 +190,21 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom); void kbase_finish_soft_job(struct kbase_jd_atom *katom); void kbase_cancel_soft_job(struct kbase_jd_atom *katom); void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev); +void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom); bool kbase_replay_process(struct kbase_jd_atom *katom); +enum hrtimer_restart kbasep_soft_event_timeout_worker(struct hrtimer *timer); +void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); +int kbasep_read_soft_event_status( + struct kbase_context *kctx, u64 evt, unsigned char *status); +int kbasep_write_soft_event_status( + struct kbase_context *kctx, u64 evt, unsigned char new_status); + /* api used internally for register access. Contains validation and tracing */ void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value); -void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size); +int kbase_device_trace_buffer_install( + struct kbase_context *kctx, u32 *tb, size_t size); void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx); /* api to be ported per OS, only need to do the raw register access */ @@ -346,6 +353,10 @@ void kbase_disjoint_state_down(struct kbase_device *kbdev); */ #define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2 +#if !defined(UINT64_MAX) + #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) +#endif + #if KBASE_TRACE_ENABLE void kbasep_trace_debugfs_init(struct kbase_device *kbdev); diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c index 2fb5e3e..c67b3e9 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,6 +43,11 @@ u32 kbase_cache_enabled(u32 flags, u32 nr_pages) void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { +/* Check if kernel is using coherency with GPU */ +#ifdef CONFIG_MALI_COH_KERN + if (kbdev->system_coherency == COHERENCY_ACE) + return; +#endif /* CONFIG_MALI_COH_KERN */ dma_sync_single_for_device(kbdev->dev, handle, size, dir); } @@ -50,5 +55,10 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { +/* Check if kernel is using coherency with GPU */ +#ifdef CONFIG_MALI_COH_KERN + if (kbdev->system_coherency == COHERENCY_ACE) + return; +#endif /* CONFIG_MALI_COH_KERN */ dma_sync_single_for_cpu(kbdev->dev, handle, size, dir); } diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h index 0285e25..356d52b 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,13 +45,6 @@ * @{ */ -#if !MALI_CUSTOMER_RELEASE -/* This flag is set for internal builds so we can run tests without credentials. */ -#define KBASE_HWCNT_DUMP_BYPASS_ROOT 1 -#else -#define KBASE_HWCNT_DUMP_BYPASS_ROOT 0 -#endif - #include /* Forward declaration of struct kbase_device */ @@ -105,7 +98,7 @@ struct kbase_pm_callback_conf { * The system integrator can decide whether to either do nothing, just switch off * the clocks to the GPU, or to completely power down the GPU. * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the - * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). */ void (*power_off_callback)(struct kbase_device *kbdev); @@ -115,7 +108,7 @@ struct kbase_pm_callback_conf { * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback. * If the GPU state has been lost then this function must return 1, otherwise it should return 0. * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the - * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). * * The return value of the first call to this function is ignored. * @@ -160,7 +153,7 @@ struct kbase_pm_callback_conf { * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback. * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. * - * @return 0 on success, else int erro code. + * @return 0 on success, else int error code. */ int (*power_runtime_init_callback)(struct kbase_device *kbdev); @@ -187,6 +180,32 @@ struct kbase_pm_callback_conf { * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. */ int (*power_runtime_on_callback)(struct kbase_device *kbdev); + + /* + * Optional callback for checking if GPU can be suspended when idle + * + * This callback will be called by the runtime power management core + * when the reference count goes to 0 to provide notification that the + * GPU now seems idle. + * + * If this callback finds that the GPU can't be powered off, or handles + * suspend by powering off directly or queueing up a power off, a + * non-zero value must be returned to prevent the runtime PM core from + * also triggering a suspend. + * + * Returning 0 will cause the runtime PM core to conduct a regular + * autosuspend. + * + * This callback is optional and if not provided regular autosuspend + * will be triggered. + * + * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use + * this feature. + * + * Return 0 if GPU can be suspended, positive value if it can not be + * suspeneded by runtime PM, else OS error code + */ + int (*power_runtime_idle_callback)(struct kbase_device *kbdev); }; /** diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h index ce5d070..ee7c96d 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h @@ -148,11 +148,6 @@ enum { */ #define DEFAULT_AWID_LIMIT KBASE_AID_32 -/** - * Default setting for using alternative hardware counters. - */ -#define DEFAULT_ALTERNATIVE_HWC false - /** * Default UMP device mapping. A UMP_DEVICE__SHIFT value which * defines which UMP device this GPU should be mapped to. @@ -219,6 +214,12 @@ enum { */ #define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ +/* + * Default timeout for software event jobs, after which these jobs will be + * cancelled. + */ +#define DEFAULT_JS_SOFT_EVENT_TIMEOUT ((u32)3000) /* 3s */ + /* * Default minimum number of scheduling ticks before the GPU is reset to clear a * "stuck" job diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c index 3a6e9f8..d53f728 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,7 @@ #include #include #include - +#include /** * kbase_create_context() - Create a kernel base context. @@ -72,11 +72,15 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) if (err) goto free_kctx; + err = kbase_mem_evictable_init(kctx); + if (err) + goto free_pool; + atomic_set(&kctx->used_pages, 0); err = kbase_jd_init(kctx); if (err) - goto free_pool; + goto deinit_evictable; err = kbasep_js_kctx_init(kctx); if (err) @@ -89,6 +93,7 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) mutex_init(&kctx->reg_lock); INIT_LIST_HEAD(&kctx->waiting_soft_jobs); + spin_lock_init(&kctx->waiting_soft_jobs_lock); #ifdef CONFIG_KDS INIT_LIST_HEAD(&kctx->waiting_kds_resource); #endif @@ -115,6 +120,14 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) err = kbase_region_tracker_init(kctx); if (err) goto no_region_tracker; + + err = kbase_sticky_resource_init(kctx); + if (err) + goto no_sticky; + + err = kbase_jit_init(kctx); + if (err) + goto no_jit; #ifdef CONFIG_GPU_TRACEPOINTS atomic_set(&kctx->jctx.work_id, 0); #endif @@ -126,8 +139,18 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) mutex_init(&kctx->vinstr_cli_lock); + hrtimer_init(&kctx->soft_event_timeout, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + kctx->soft_event_timeout.function = &kbasep_soft_event_timeout_worker; + return kctx; +no_jit: + kbase_gpu_vm_lock(kctx); + kbase_sticky_resource_term(kctx); + kbase_gpu_vm_unlock(kctx); +no_sticky: + kbase_region_tracker_term(kctx); no_region_tracker: kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false); no_sink_page: @@ -143,6 +166,8 @@ free_jd: /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ kbasep_js_kctx_term(kctx); kbase_jd_exit(kctx); +deinit_evictable: + kbase_mem_evictable_deinit(kctx); free_pool: kbase_mem_pool_term(&kctx->mem_pool); free_kctx: @@ -188,8 +213,18 @@ void kbase_destroy_context(struct kbase_context *kctx) kbase_jd_zap_context(kctx); kbase_event_cleanup(kctx); + /* + * JIT must be terminated before the code below as it must be called + * without the region lock being held. + * The code above ensures no new JIT allocations can be made by + * by the time we get to this point of context tear down. + */ + kbase_jit_term(kctx); + kbase_gpu_vm_lock(kctx); + kbase_sticky_resource_term(kctx); + /* MMU is disabled as part of scheduling out the context */ kbase_mmu_free_pgd(kctx); @@ -225,6 +260,7 @@ void kbase_destroy_context(struct kbase_context *kctx) if (pages != 0) dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); + kbase_mem_evictable_deinit(kctx); kbase_mem_pool_term(&kctx->mem_pool); WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); @@ -262,9 +298,6 @@ int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags) if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) js_kctx_info->ctx.flags &= ~((u32) KBASE_CTX_FLAG_SUBMIT_DISABLED); - if ((flags & BASE_CONTEXT_HINT_ONLY_COMPUTE) != 0) - js_kctx_info->ctx.flags |= (u32) KBASE_CTX_FLAG_HINT_ONLY_COMPUTE; - /* Latch the initial attributes into the Job Scheduler */ kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx); diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c index adf484d..429fea2 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c @@ -1,7 +1,6 @@ - /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,6 +34,9 @@ #include "mali_kbase_debug_mem_view.h" #include "mali_kbase_mem.h" #include "mali_kbase_mem_pool_debugfs.h" +#if !MALI_CUSTOMER_RELEASE +#include "mali_kbase_regs_dump_debugfs.h" +#endif /* !MALI_CUSTOMER_RELEASE */ #include #include #include @@ -61,7 +63,9 @@ #include #include #include /* is_compat_task */ +#include #include +#include #ifdef CONFIG_MALI_PLATFORM_DEVICETREE #include #endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ @@ -91,9 +95,7 @@ #include #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif /* GPU IRQ Tags */ #define JOB_IRQ_TAG 0 @@ -318,8 +320,10 @@ static int kbase_external_buffer_lock(struct kbase_context *kctx, resource_list_data.kds_resources, KDS_WAIT_BLOCKING); - if (IS_ERR_OR_NULL(lock)) { + if (!lock) { ret = -EINVAL; + } else if (IS_ERR(lock)) { + ret = PTR_ERR(lock); } else { ret = 0; fdata->lock = lock; @@ -349,7 +353,6 @@ out: } #endif /* CONFIG_KDS */ -#ifdef CONFIG_MALI_MIPE_ENABLED static void kbase_create_timeline_objects(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; @@ -389,7 +392,8 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx) list_for_each_entry(element, &kbdev->kctx_list, link) { kbase_tlstream_tl_summary_new_ctx( element->kctx, - (u32)(element->kctx->id)); + (u32)(element->kctx->id), + (u32)(element->kctx->tgid)); } /* Before releasing the lock, reset body stream buffers. * This will prevent context creation message to be directed to both @@ -401,7 +405,6 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx) * user space. */ kbase_tlstream_flush_streams(); } -#endif static void kbase_api_handshake(struct uku_version_check_args *version) { @@ -468,6 +471,34 @@ enum mali_error { MALI_ERROR_FUNCTION_FAILED, }; +enum { + inited_mem = (1u << 0), + inited_js = (1u << 1), + inited_pm_runtime_init = (1u << 2), +#ifdef CONFIG_MALI_DEVFREQ + inited_devfreq = (1u << 3), +#endif /* CONFIG_MALI_DEVFREQ */ + inited_tlstream = (1u << 4), + inited_backend_early = (1u << 5), + inited_backend_late = (1u << 6), + inited_device = (1u << 7), + inited_vinstr = (1u << 8), +#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY + inited_ipa = (1u << 9), +#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */ + inited_job_fault = (1u << 10), + inited_misc_register = (1u << 11), + inited_get_device = (1u << 12), + inited_sysfs_group = (1u << 13), + inited_dev_list = (1u << 14), + inited_debugfs = (1u << 15), + inited_gpu_device = (1u << 16), + inited_registers_map = (1u << 17), + inited_power_control = (1u << 19), + inited_buslogger = (1u << 20) +}; + + #ifdef CONFIG_MALI_DEBUG #define INACTIVE_WAIT_MS (5000) @@ -488,6 +519,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg struct kbase_device *kbdev; union uk_header *ukh = args; u32 id; + int ret = 0; KBASE_DEBUG_ASSERT(ukh != NULL); @@ -552,6 +584,18 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg /* setup complete, perform normal operation */ switch (id) { + case KBASE_FUNC_MEM_JIT_INIT: + { + struct kbase_uk_mem_jit_init *jit_init = args; + + if (sizeof(*jit_init) != args_size) + goto bad_size; + + if (kbase_region_tracker_init_jit(kctx, + jit_init->va_pages)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } case KBASE_FUNC_MEM_ALLOC: { struct kbase_uk_mem_alloc *mem = args; @@ -560,6 +604,13 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg if (sizeof(*mem) != args_size) goto bad_size; +#if defined(CONFIG_64BIT) + if (!kctx->is_compat) { + /* force SAME_VA if a 64-bit client */ + mem->flags |= BASE_MEM_SAME_VA; + } +#endif + reg = kbase_mem_alloc(kctx, mem->va_pages, mem->commit_pages, mem->extent, &mem->flags, &mem->gpu_va, @@ -568,11 +619,9 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg ukh->ret = MALI_ERROR_FUNCTION_FAILED; break; } - case KBASE_FUNC_MEM_IMPORT: - { + case KBASE_FUNC_MEM_IMPORT: { struct kbase_uk_mem_import *mem_import = args; - int __user *phandle; - int handle; + void __user *phandle; if (sizeof(*mem_import) != args_size) goto bad_size; @@ -583,26 +632,20 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg #endif phandle = mem_import->phandle.value; - switch (mem_import->type) { - case BASE_MEM_IMPORT_TYPE_UMP: - get_user(handle, phandle); - break; - case BASE_MEM_IMPORT_TYPE_UMM: - get_user(handle, phandle); - break; - default: - mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID; + if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; break; } - if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID || - kbase_mem_import(kctx, mem_import->type, - handle, &mem_import->gpu_va, - &mem_import->va_pages, - &mem_import->flags)) + if (kbase_mem_import(kctx, mem_import->type, phandle, + &mem_import->gpu_va, + &mem_import->va_pages, + &mem_import->flags)) { + mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID; ukh->ret = MALI_ERROR_FUNCTION_FAILED; + } break; - } + } case KBASE_FUNC_MEM_ALIAS: { struct kbase_uk_mem_alias *alias = args; struct base_mem_aliasing_info __user *user_ai; @@ -778,7 +821,7 @@ copy_failed: if (sizeof(*sn) != args_size) goto bad_size; - if (sn->sset.basep_sset.mem_handle & ~PAGE_MASK) { + if (sn->sset.basep_sset.mem_handle.basep.handle & ~PAGE_MASK) { dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid"); ukh->ret = MALI_ERROR_FUNCTION_FAILED; break; @@ -812,18 +855,10 @@ copy_failed: case KBASE_FUNC_HWCNT_SETUP: { struct kbase_uk_hwcnt_setup *setup = args; - bool access_allowed; if (sizeof(*setup) != args_size) goto bad_size; - access_allowed = kbase_security_has_capability( - kctx, - KBASE_SEC_INSTR_HW_COUNTERS_COLLECT, - KBASE_SEC_FLAG_NOAUDIT); - if (!access_allowed) - goto out_bad; - mutex_lock(&kctx->vinstr_cli_lock); if (kbase_vinstr_legacy_hwc_setup(kbdev->vinstr_ctx, &kctx->vinstr_cli, setup) != 0) @@ -856,18 +891,10 @@ copy_failed: case KBASE_FUNC_HWCNT_READER_SETUP: { struct kbase_uk_hwcnt_reader_setup *setup = args; - bool access_allowed; if (sizeof(*setup) != args_size) goto bad_size; - access_allowed = kbase_security_has_capability( - kctx, - KBASE_SEC_INSTR_HW_COUNTERS_COLLECT, - KBASE_SEC_FLAG_NOAUDIT); - if (!access_allowed) - goto out_bad; - mutex_lock(&kctx->vinstr_cli_lock); if (kbase_vinstr_hwcnt_reader_setup(kbdev->vinstr_ctx, setup) != 0) @@ -971,9 +998,10 @@ copy_failed: case KBASE_FUNC_EXT_BUFFER_LOCK: { #ifdef CONFIG_KDS - switch (kbase_external_buffer_lock(kctx, + ret = kbase_external_buffer_lock(kctx, (struct kbase_uk_ext_buff_kds_data *)args, - args_size)) { + args_size); + switch (ret) { case 0: ukh->ret = MALI_ERROR_NONE; break; @@ -1088,7 +1116,7 @@ copy_failed: goto bad_size; if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) { - dev_err(kbdev->dev, "buffer too big"); + dev_err(kbdev->dev, "buffer too big\n"); goto out_bad; } @@ -1108,12 +1136,30 @@ copy_failed: kfree(buf); goto out_bad; } - kbasep_mem_profile_debugfs_insert(kctx, buf, - add_data->len); + + if (kbasep_mem_profile_debugfs_insert(kctx, buf, + add_data->len)) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + kfree(buf); + goto out_bad; + } break; } -#ifdef CONFIG_MALI_MIPE_ENABLED + +#ifdef CONFIG_MALI_NO_MALI + case KBASE_FUNC_SET_PRFCNT_VALUES: + { + + struct kbase_uk_prfcnt_values *params = + ((struct kbase_uk_prfcnt_values *)args); + gpu_model_set_dummy_prfcnt_sample(params->data, + params->size); + + break; + } +#endif /* CONFIG_MALI_NO_MALI */ + case KBASE_FUNC_TLSTREAM_ACQUIRE: { struct kbase_uk_tlstream_acquire *tlstream_acquire = @@ -1173,7 +1219,6 @@ copy_failed: break; } #endif /* MALI_UNIT_TEST */ -#endif /* CONFIG_MALI_MIPE_ENABLED */ case KBASE_FUNC_GET_CONTEXT_ID: { @@ -1183,12 +1228,38 @@ copy_failed: break; } + case KBASE_FUNC_SOFT_EVENT_UPDATE: + { + struct kbase_uk_soft_event_update *update = args; + + if (sizeof(*update) != args_size) + goto bad_size; + + if (((update->new_status != BASE_JD_SOFT_EVENT_SET) && + (update->new_status != BASE_JD_SOFT_EVENT_RESET)) || + (update->flags != 0)) + goto out_bad; + + if (kbasep_write_soft_event_status( + kctx, update->evt, + update->new_status) != 0) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + if (update->new_status == BASE_JD_SOFT_EVENT_SET) + kbasep_complete_triggered_soft_events( + kctx, update->evt); + + break; + } + default: - dev_err(kbdev->dev, "unknown ioctl %u", id); + dev_err(kbdev->dev, "unknown ioctl %u\n", id); goto out_bad; } - return 0; + return ret; bad_size: dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id); @@ -1201,6 +1272,47 @@ static struct kbase_device *to_kbase_device(struct device *dev) return dev_get_drvdata(dev); } +static int assign_irqs(struct platform_device *pdev) +{ + struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + int i; + + if (!kbdev) + return -ENODEV; + + /* 3 IRQ resources */ + for (i = 0; i < 3; i++) { + struct resource *irq_res; + int irqtag; + + irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); + if (!irq_res) { + dev_err(kbdev->dev, "No IRQ resource at index %d\n", i); + return -ENOENT; + } + +#ifdef CONFIG_OF + if (!strcmp(irq_res->name, "JOB")) { + irqtag = JOB_IRQ_TAG; + } else if (!strcmp(irq_res->name, "MMU")) { + irqtag = MMU_IRQ_TAG; + } else if (!strcmp(irq_res->name, "GPU")) { + irqtag = GPU_IRQ_TAG; + } else { + dev_err(&pdev->dev, "Invalid irq res name: '%s'\n", + irq_res->name); + return -EINVAL; + } +#else + irqtag = i; +#endif /* CONFIG_OF */ + kbdev->irqs[irqtag].irq = irq_res->start; + kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK; + } + + return 0; +} + /* * API to acquire device list mutex and * return pointer to the device list head @@ -1270,6 +1382,7 @@ static int kbase_open(struct inode *inode, struct file *filp) init_waitqueue_head(&kctx->event_queue); filp->private_data = kctx; + kctx->filp = filp; kctx->infinite_cache_active = kbdev->infinite_cache_active_default; @@ -1292,7 +1405,8 @@ static int kbase_open(struct inode *inode, struct file *filp) debugfs_create_bool("infinite_cache", 0644, kctx->kctx_dentry, &kctx->infinite_cache_active); #endif /* CONFIG_MALI_COH_USER */ - kbasep_mem_profile_debugfs_add(kctx); + + mutex_init(&kctx->mem_profile_lock); kbasep_jd_debugfs_ctx_add(kctx); kbase_debug_mem_view_init(filp); @@ -1301,6 +1415,7 @@ static int kbase_open(struct inode *inode, struct file *filp) kbase_mem_pool_debugfs_add(kctx->kctx_dentry, &kctx->mem_pool); + kbase_jit_debugfs_add(kctx); #endif /* CONFIG_DEBUGFS */ dev_dbg(kbdev->dev, "created base context\n"); @@ -1313,11 +1428,10 @@ static int kbase_open(struct inode *inode, struct file *filp) mutex_lock(&kbdev->kctx_list_lock); element->kctx = kctx; list_add(&element->link, &kbdev->kctx_list); -#ifdef CONFIG_MALI_MIPE_ENABLED kbase_tlstream_tl_new_ctx( element->kctx, - (u32)(element->kctx->id)); -#endif + (u32)(element->kctx->id), + (u32)(element->kctx->tgid)); mutex_unlock(&kbdev->kctx_list_lock); } else { /* we don't treat this as a fail - just warn about it */ @@ -1338,14 +1452,12 @@ static int kbase_release(struct inode *inode, struct file *filp) struct kbasep_kctx_list_element *element, *tmp; bool found_element = false; -#ifdef CONFIG_MALI_MIPE_ENABLED kbase_tlstream_tl_del_ctx(kctx); -#endif #ifdef CONFIG_DEBUG_FS debugfs_remove_recursive(kctx->kctx_dentry); kbasep_mem_profile_debugfs_remove(kctx); - kbase_debug_job_fault_context_exit(kctx); + kbase_debug_job_fault_context_term(kctx); #endif mutex_lock(&kbdev->kctx_list_lock); @@ -1477,99 +1589,155 @@ static int kbase_check_flags(int flags) return 0; } -static unsigned long kbase_get_unmapped_area(struct file *filp, - const unsigned long addr, const unsigned long len, - const unsigned long pgoff, const unsigned long flags) -{ #ifdef CONFIG_64BIT - /* based on get_unmapped_area, but simplified slightly due to that some - * values are known in advance */ - struct kbase_context *kctx = filp->private_data; +/* The following function is taken from the kernel and just + * renamed. As it's not exported to modules we must copy-paste it here. + */ + +static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info + *info) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long length, low_limit, high_limit, gap_start, gap_end; - if (!kctx->is_compat && !addr && - kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long low_limit, high_limit, gap_start, gap_end; + /* Adjust search length to account for worst case alignment overhead */ + length = info->length + info->align_mask; + if (length < info->length) + return -ENOMEM; - /* Hardware has smaller VA than userspace, ensure the page - * comes from a VA which can be used on the GPU */ + /* + * Adjust search limits by the desired length. + * See implementation comment at top of unmapped_area(). + */ + gap_end = info->high_limit; + if (gap_end < length) + return -ENOMEM; + high_limit = gap_end - length; - gap_end = (1UL<<33); - if (gap_end < len) - return -ENOMEM; - high_limit = gap_end - len; - low_limit = PAGE_SIZE + len; + if (info->low_limit > high_limit) + return -ENOMEM; + low_limit = info->low_limit + length; - gap_start = mm->highest_vm_end; - if (gap_start <= high_limit) - goto found_highest; + /* Check highest gap, which does not precede any rbtree node */ + gap_start = mm->highest_vm_end; + if (gap_start <= high_limit) + goto found_highest; - if (RB_EMPTY_ROOT(&mm->mm_rb)) - return -ENOMEM; - vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); - if (vma->rb_subtree_gap < len) - return -ENOMEM; + /* Check if rbtree root looks promising */ + if (RB_EMPTY_ROOT(&mm->mm_rb)) + return -ENOMEM; + vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); + if (vma->rb_subtree_gap < length) + return -ENOMEM; - while (true) { - gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; - if (gap_start <= high_limit && vma->vm_rb.rb_right) { - struct vm_area_struct *right = - rb_entry(vma->vm_rb.rb_right, - struct vm_area_struct, vm_rb); - if (right->rb_subtree_gap >= len) { - vma = right; - continue; - } + while (true) { + /* Visit right subtree if it looks promising */ + gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; + if (gap_start <= high_limit && vma->vm_rb.rb_right) { + struct vm_area_struct *right = + rb_entry(vma->vm_rb.rb_right, + struct vm_area_struct, vm_rb); + if (right->rb_subtree_gap >= length) { + vma = right; + continue; } + } + check_current: - gap_end = vma->vm_start; - if (gap_end < low_limit) - return -ENOMEM; - if (gap_start <= high_limit && - gap_end - gap_start >= len) - goto found; - - if (vma->vm_rb.rb_left) { - struct vm_area_struct *left = - rb_entry(vma->vm_rb.rb_left, - struct vm_area_struct, vm_rb); - - if (left->rb_subtree_gap >= len) { - vma = left; - continue; - } + /* Check if current node has a suitable gap */ + gap_end = vma->vm_start; + if (gap_end < low_limit) + return -ENOMEM; + if (gap_start <= high_limit && gap_end - gap_start >= length) + goto found; + + /* Visit left subtree if it looks promising */ + if (vma->vm_rb.rb_left) { + struct vm_area_struct *left = + rb_entry(vma->vm_rb.rb_left, + struct vm_area_struct, vm_rb); + if (left->rb_subtree_gap >= length) { + vma = left; + continue; } - while (true) { - struct rb_node *prev = &vma->vm_rb; - - if (!rb_parent(prev)) - return -ENOMEM; - vma = rb_entry(rb_parent(prev), - struct vm_area_struct, vm_rb); - if (prev == vma->vm_rb.rb_right) { - gap_start = vma->vm_prev ? - vma->vm_prev->vm_end : 0; - goto check_current; - } + } + + /* Go back up the rbtree to find next candidate node */ + while (true) { + struct rb_node *prev = &vma->vm_rb; + if (!rb_parent(prev)) + return -ENOMEM; + vma = rb_entry(rb_parent(prev), + struct vm_area_struct, vm_rb); + if (prev == vma->vm_rb.rb_right) { + gap_start = vma->vm_prev ? + vma->vm_prev->vm_end : 0; + goto check_current; } } + } found: - if (gap_end > (1UL<<33)) - gap_end = (1UL<<33); + /* We found a suitable gap. Clip it with the original high_limit. */ + if (gap_end > info->high_limit) + gap_end = info->high_limit; found_highest: - gap_end -= len; + /* Compute highest gap address at the desired alignment */ + gap_end -= info->length; + gap_end -= (gap_end - info->align_offset) & info->align_mask; + + VM_BUG_ON(gap_end < info->low_limit); + VM_BUG_ON(gap_end < gap_start); + return gap_end; +} + + +static unsigned long kbase_get_unmapped_area(struct file *filp, + const unsigned long addr, const unsigned long len, + const unsigned long pgoff, const unsigned long flags) +{ + /* based on get_unmapped_area, but simplified slightly due to that some + * values are known in advance */ + struct kbase_context *kctx = filp->private_data; + struct mm_struct *mm = current->mm; + struct vm_unmapped_area_info info; + + /* err on fixed address */ + if ((flags & MAP_FIXED) || addr) + return -EINVAL; - VM_BUG_ON(gap_end < PAGE_SIZE); - VM_BUG_ON(gap_end < gap_start); - return gap_end; + /* too big? */ + if (len > TASK_SIZE - SZ_2M) + return -ENOMEM; + + if (kctx->is_compat) + return current->mm->get_unmapped_area(filp, addr, len, pgoff, + flags); + + if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) { + info.high_limit = kctx->same_va_end << PAGE_SHIFT; + info.align_mask = 0; + info.align_offset = 0; + } else { + info.high_limit = min_t(unsigned long, mm->mmap_base, + (kctx->same_va_end << PAGE_SHIFT)); + if (len >= SZ_2M) { + info.align_offset = SZ_2M; + info.align_mask = SZ_2M - 1; + } else { + info.align_mask = 0; + info.align_offset = 0; + } } -#endif - /* No special requirements - fallback to the default version */ - return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); + + info.flags = 0; + info.length = len; + info.low_limit = SZ_2M; + return kbase_unmapped_area_topdown(&info); } +#endif static const struct file_operations kbase_fops = { .owner = THIS_MODULE, @@ -1581,7 +1749,9 @@ static const struct file_operations kbase_fops = { .compat_ioctl = kbase_ioctl, .mmap = kbase_mmap, .check_flags = kbase_check_flags, +#ifdef CONFIG_64BIT .get_unmapped_area = kbase_get_unmapped_area, +#endif }; #ifndef CONFIG_MALI_NO_MALI @@ -1822,7 +1992,15 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, if (!kbdev) return -ENODEV; - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current core mask : 0x%llX\n", kbdev->pm.debug_core_mask); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current core mask (JS0) : 0x%llX\n", + kbdev->pm.debug_core_mask[0]); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current core mask (JS1) : 0x%llX\n", + kbdev->pm.debug_core_mask[1]); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current core mask (JS2) : 0x%llX\n", + kbdev->pm.debug_core_mask[2]); ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Available core mask : 0x%llX\n", kbdev->gpu_props.props.raw_props.shader_present); @@ -1844,36 +2022,63 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct kbase_device *kbdev; - u64 new_core_mask; - int rc; + u64 new_core_mask[3]; + int items; kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - rc = kstrtoull(buf, 16, &new_core_mask); - if (rc) - return rc; + items = sscanf(buf, "%llx %llx %llx", + &new_core_mask[0], &new_core_mask[1], + &new_core_mask[2]); + + if (items == 1) + new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; + + if (items == 1 || items == 3) { + u64 shader_present = + kbdev->gpu_props.props.raw_props.shader_present; + u64 group0_core_mask = + kbdev->gpu_props.props.coherency_info.group[0]. + core_mask; + + if ((new_core_mask[0] & shader_present) != new_core_mask[0] || + !(new_core_mask[0] & group0_core_mask) || + (new_core_mask[1] & shader_present) != + new_core_mask[1] || + !(new_core_mask[1] & group0_core_mask) || + (new_core_mask[2] & shader_present) != + new_core_mask[2] || + !(new_core_mask[2] & group0_core_mask)) { + dev_err(dev, "power_policy: invalid core specification\n"); + return -EINVAL; + } - if ((new_core_mask & kbdev->gpu_props.props.raw_props.shader_present) - != new_core_mask || - !(new_core_mask & kbdev->gpu_props.props.coherency_info.group[0].core_mask)) { - dev_err(dev, "power_policy: invalid core specification\n"); - return -EINVAL; - } + if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || + kbdev->pm.debug_core_mask[1] != + new_core_mask[1] || + kbdev->pm.debug_core_mask[2] != + new_core_mask[2]) { + unsigned long flags; - if (kbdev->pm.debug_core_mask != new_core_mask) { - unsigned long flags; + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); - spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], + new_core_mask[1], new_core_mask[2]); - kbase_pm_set_debug_core_mask(kbdev, new_core_mask); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, + flags); + } - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + return count; } - return count; + dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n" + "Use format \n" + "or \n"); + return -EINVAL; } /** The sysfs file @c core_mask. @@ -1884,121 +2089,69 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, */ static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); -#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS /** - * struct sc_split_config - * @tag: Short name - * @human_readable: Long name - * @js0_mask: Mask for job slot 0 - * @js1_mask: Mask for job slot 1 - * @js2_mask: Mask for job slot 2 + * set_soft_event_timeout() - Store callback for the soft_event_timeout sysfs + * file. + * + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The value written to the sysfs file. + * @count: The number of bytes written to the sysfs file. * - * Structure containing a single shader affinity split configuration. + * This allows setting the timeout for software event jobs. Waiting jobs will + * be cancelled after this period expires. This is expressed in milliseconds. + * + * Return: count if the function succeeded. An error code on failure. */ -struct sc_split_config { - char const *tag; - char const *human_readable; - u64 js0_mask; - u64 js1_mask; - u64 js2_mask; -}; +static ssize_t set_soft_event_timeout(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int soft_event_timeout_ms; -/* - * Array of available shader affinity split configurations. - */ -static struct sc_split_config const sc_split_configs[] = { - /* All must be the first config (default). */ - { - "all", "All cores", - 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL - }, - { - "mp1", "MP1 shader core", - 0x1, 0x1, 0x1 - }, - { - "mp2", "MP2 shader core", - 0x3, 0x3, 0x3 - }, - { - "mp4", "MP4 shader core", - 0xF, 0xF, 0xF - }, - { - "mp1_vf", "MP1 vertex + MP1 fragment shader core", - 0x2, 0x1, 0xFFFFFFFFFFFFFFFFULL - }, - { - "mp2_vf", "MP2 vertex + MP2 fragment shader core", - 0xA, 0x5, 0xFFFFFFFFFFFFFFFFULL - }, - /* This must be the last config. */ - { - NULL, NULL, - 0x0, 0x0, 0x0 - }, -}; + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; -/* Pointer to the currently active shader split configuration. */ -static struct sc_split_config const *current_sc_split_config = &sc_split_configs[0]; + if ((kstrtoint(buf, 0, &soft_event_timeout_ms) != 0) || + (soft_event_timeout_ms <= 0)) + return -EINVAL; -/** Show callback for the @c sc_split sysfs file - * - * Returns the current shader core affinity policy. - */ -static ssize_t show_split(struct device *dev, struct device_attribute *attr, char * const buf) -{ - ssize_t ret; - /* We know we are given a buffer which is PAGE_SIZE long. Our strings are all guaranteed - * to be shorter than that at this time so no length check needed. */ - ret = scnprintf(buf, PAGE_SIZE, "Current sc_split: '%s'\n", current_sc_split_config->tag); - return ret; + atomic_set(&kbdev->js_data.soft_event_timeout_ms, + soft_event_timeout_ms); + + return count; } -/** Store callback for the @c sc_split sysfs file. +/** + * show_soft_event_timeout() - Show callback for the soft_event_timeout sysfs + * file. * - * This function is called when the @c sc_split sysfs file is written to - * It modifies the system shader core affinity configuration to allow - * system profiling with different hardware configurations. + * This will return the timeout for the software event jobs. * - * @param dev The device with sysfs file is for - * @param attr The attributes of the sysfs file - * @param buf The value written to the sysfs file - * @param count The number of bytes written to the sysfs file + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer for the sysfs file contents. * - * @return @c count if the function succeeded. An error code on failure. + * Return: The number of bytes output to buf. */ -static ssize_t set_split(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +static ssize_t show_soft_event_timeout(struct device *dev, + struct device_attribute *attr, + char * const buf) { - struct sc_split_config const *config = &sc_split_configs[0]; - - /* Try to match: loop until we hit the last "NULL" entry */ - while (config->tag) { - if (sysfs_streq(config->tag, buf)) { - current_sc_split_config = config; - mali_js0_affinity_mask = config->js0_mask; - mali_js1_affinity_mask = config->js1_mask; - mali_js2_affinity_mask = config->js2_mask; - dev_dbg(dev, "Setting sc_split: '%s'\n", config->tag); - return count; - } - config++; - } + struct kbase_device *kbdev; - /* No match found in config list */ - dev_err(dev, "sc_split: invalid value\n"); - dev_err(dev, " Possible settings: mp[1|2|4], mp[1|2]_vf\n"); - return -ENOENT; -} + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; -/** The sysfs file @c sc_split - * - * This is used for configuring/querying the current shader core work affinity - * configuration. - */ -static DEVICE_ATTR(sc_split, S_IRUGO|S_IWUSR, show_split, set_split); -#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ + return scnprintf(buf, PAGE_SIZE, "%i\n", + atomic_read(&kbdev->js_data.soft_event_timeout_ms)); +} +static DEVICE_ATTR(soft_event_timeout, S_IRUGO | S_IWUSR, + show_soft_event_timeout, set_soft_event_timeout); /** Store callback for the @c js_timeouts sysfs file. * @@ -2731,12 +2884,15 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, { .id = GPU_ID_PI_T83X, .name = "Mali-T83x" }, { .id = GPU_ID_PI_T86X, .name = "Mali-T86x" }, { .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" }, + { .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TMIx" }, }; const char *product_name = "(Unknown Mali GPU)"; struct kbase_device *kbdev; u32 gpu_id; - unsigned product_id; + unsigned product_id, product_id_mask; unsigned i; + bool is_new_format; kbdev = to_kbase_device(dev); if (!kbdev) @@ -2744,10 +2900,20 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + is_new_format = GPU_ID_IS_NEW_FORMAT(product_id); + product_id_mask = + (is_new_format ? + GPU_ID2_PRODUCT_MODEL : + GPU_ID_VERSION_PRODUCT_ID) >> + GPU_ID_VERSION_PRODUCT_ID_SHIFT; for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) { - if (gpu_product_id_names[i].id == product_id) { - product_name = gpu_product_id_names[i].name; + const struct gpu_product_id_name *p = &gpu_product_id_names[i]; + + if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) && + (p->id & product_id_mask) == + (product_id & product_id_mask)) { + product_name = p->name; break; } } @@ -3047,26 +3213,53 @@ static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size, set_mem_pool_max_size); +static int kbasep_secure_mode_enable(struct kbase_device *kbdev) +{ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_SET_PROTECTED_MODE, NULL); + return 0; +} -static int kbasep_secure_mode_init(struct kbase_device *kbdev) +static int kbasep_secure_mode_disable(struct kbase_device *kbdev) { + if (!kbase_prepare_to_reset_gpu_locked(kbdev)) + return -EBUSY; + + kbase_reset_gpu_locked(kbdev); + + return 0; +} + +static struct kbase_secure_ops kbasep_secure_ops = { + .secure_mode_enable = kbasep_secure_mode_enable, + .secure_mode_disable = kbasep_secure_mode_disable, +}; +static void kbasep_secure_mode_init(struct kbase_device *kbdev) +{ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { + /* Use native secure ops */ + kbdev->secure_ops = &kbasep_secure_ops; + kbdev->secure_mode_support = true; + } #ifdef SECURE_CALLBACKS - kbdev->secure_ops = SECURE_CALLBACKS; - kbdev->secure_mode_support = false; + else { + kbdev->secure_ops = SECURE_CALLBACKS; + kbdev->secure_mode_support = false; - if (kbdev->secure_ops) { - int err; + if (kbdev->secure_ops) { + int err; - /* Make sure secure mode is disabled on startup */ - err = kbdev->secure_ops->secure_mode_disable(kbdev); + /* Make sure secure mode is disabled on startup */ + err = kbdev->secure_ops->secure_mode_disable(kbdev); - /* secure_mode_disable() returns -EINVAL if not supported */ - kbdev->secure_mode_support = (err != -EINVAL); + /* secure_mode_disable() returns -EINVAL if not + * supported + */ + kbdev->secure_mode_support = (err != -EINVAL); + } } #endif - - return 0; } #ifdef CONFIG_MALI_NO_MALI @@ -3105,13 +3298,147 @@ static int kbase_common_reg_map(struct kbase_device *kbdev) static void kbase_common_reg_unmap(struct kbase_device * const kbdev) { - iounmap(kbdev->reg); - release_mem_region(kbdev->reg_start, kbdev->reg_size); + if (kbdev->reg) { + iounmap(kbdev->reg); + release_mem_region(kbdev->reg_start, kbdev->reg_size); + kbdev->reg = NULL; + kbdev->reg_start = 0; + kbdev->reg_size = 0; + } } #endif /* CONFIG_MALI_NO_MALI */ +static int registers_map(struct kbase_device * const kbdev) +{ + /* the first memory resource is the physical address of the GPU + * registers */ + struct platform_device *pdev = to_platform_device(kbdev->dev); + struct resource *reg_res; + int err; -#ifdef CONFIG_DEBUG_FS + reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!reg_res) { + dev_err(kbdev->dev, "Invalid register resource\n"); + return -ENOENT; + } + + kbdev->reg_start = reg_res->start; + kbdev->reg_size = resource_size(reg_res); + + err = kbase_common_reg_map(kbdev); + if (err) { + dev_err(kbdev->dev, "Failed to map registers\n"); + return err; + } + + return 0; +} + +static void registers_unmap(struct kbase_device *kbdev) +{ + kbase_common_reg_unmap(kbdev); +} + +static int power_control_init(struct platform_device *pdev) +{ + struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + int err = 0; + + if (!kbdev) + return -ENODEV; + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_REGULATOR) + kbdev->regulator = regulator_get_optional(kbdev->dev, "mali"); + if (IS_ERR_OR_NULL(kbdev->regulator)) { + err = PTR_ERR(kbdev->regulator); + kbdev->regulator = NULL; + if (err == -EPROBE_DEFER) { + dev_err(&pdev->dev, "Failed to get regulator\n"); + return err; + } + dev_info(kbdev->dev, + "Continuing without Mali regulator control\n"); + /* Allow probe to continue without regulator */ + } +#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ + + kbdev->clock = clk_get(kbdev->dev, "clk_mali"); + if (IS_ERR_OR_NULL(kbdev->clock)) { + err = PTR_ERR(kbdev->clock); + kbdev->clock = NULL; + if (err == -EPROBE_DEFER) { + dev_err(&pdev->dev, "Failed to get clock\n"); + goto fail; + } + dev_info(kbdev->dev, "Continuing without Mali clock control\n"); + /* Allow probe to continue without clock. */ + } else { + err = clk_prepare_enable(kbdev->clock); + if (err) { + dev_err(kbdev->dev, + "Failed to prepare and enable clock (%d)\n", + err); + goto fail; + } + } + +#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP) + /* Register the OPPs if they are available in device tree */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + err = dev_pm_opp_of_add_table(kbdev->dev); +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) + err = of_init_opp_table(kbdev->dev); +#else + err = 0; +#endif /* LINUX_VERSION_CODE */ + if (err) + dev_dbg(kbdev->dev, "OPP table not found\n"); +#endif /* CONFIG_OF && CONFIG_PM_OPP */ + + return 0; + +fail: + +if (kbdev->clock != NULL) { + clk_put(kbdev->clock); + kbdev->clock = NULL; +} + +#ifdef CONFIG_REGULATOR + if (NULL != kbdev->regulator) { + regulator_put(kbdev->regulator); + kbdev->regulator = NULL; + } +#endif + + return err; +} + +static void power_control_term(struct kbase_device *kbdev) +{ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + dev_pm_opp_of_remove_table(kbdev->dev); +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) + of_free_opp_table(kbdev->dev); +#endif + + if (kbdev->clock) { + clk_disable_unprepare(kbdev->clock); + clk_put(kbdev->clock); + kbdev->clock = NULL; + } + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_REGULATOR) + if (kbdev->regulator) { + regulator_put(kbdev->regulator); + kbdev->regulator = NULL; + } +#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ +} + +#ifdef CONFIG_DEBUG_FS #if KBASE_GPU_RESET_EN #include @@ -3202,7 +3529,12 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) err = -ENOMEM; goto out; } - kbase_debug_job_fault_dev_init(kbdev); + +#if !MALI_CUSTOMER_RELEASE + kbasep_regs_dump_debugfs_add(kbdev); +#endif /* !MALI_CUSTOMER_RELEASE */ + + kbase_debug_job_fault_debugfs_init(kbdev); kbasep_gpu_memory_debugfs_init(kbdev); #if KBASE_GPU_RESET_EN debugfs_create_file("quirks_sc", 0644, @@ -3261,21 +3593,19 @@ static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } static void kbase_device_coherency_init(struct kbase_device *kbdev, u32 gpu_id) { - u32 selected_coherency = COHERENCY_NONE; - /* COHERENCY_NONE is always supported */ - u32 supported_coherency_bitmap = COHERENCY_FEATURE_BIT(COHERENCY_NONE); - #ifdef CONFIG_OF + u32 supported_coherency_bitmap = + kbdev->gpu_props.props.raw_props.coherency_mode; const void *coherency_override_dts; u32 override_coherency; #endif /* CONFIG_OF */ - kbdev->system_coherency = selected_coherency; + kbdev->system_coherency = COHERENCY_NONE; /* device tree may override the coherency */ #ifdef CONFIG_OF coherency_override_dts = of_get_property(kbdev->dev->of_node, - "override-coherency", + "system-coherency", NULL); if (coherency_override_dts) { @@ -3288,17 +3618,17 @@ static void kbase_device_coherency_init(struct kbase_device *kbdev, u32 gpu_id) kbdev->system_coherency = override_coherency; dev_info(kbdev->dev, - "Using coherency override, mode %u set from dtb", + "Using coherency mode %u set from dtb", override_coherency); } else dev_warn(kbdev->dev, - "Ignoring invalid coherency override, mode %u set from dtb", + "Ignoring unsupported coherency mode %u set from dtb", override_coherency); } #endif /* CONFIG_OF */ - kbdev->gpu_props.props.raw_props.coherency_features = + kbdev->gpu_props.props.raw_props.coherency_mode = kbdev->system_coherency; } @@ -3319,515 +3649,406 @@ static void kbase_logging_started_cb(void *data) #endif -static int kbase_common_device_init(struct kbase_device *kbdev) -{ - int err; - struct mali_base_gpu_core_props *core_props; - enum { - inited_mem = (1u << 0), - inited_js = (1u << 1), - inited_pm_runtime_init = (1u << 6), -#ifdef CONFIG_MALI_DEVFREQ - inited_devfreq = (1u << 9), -#endif /* CONFIG_MALI_DEVFREQ */ -#ifdef CONFIG_MALI_MIPE_ENABLED - inited_tlstream = (1u << 10), -#endif /* CONFIG_MALI_MIPE_ENABLED */ - inited_backend_early = (1u << 11), - inited_backend_late = (1u << 12), - inited_device = (1u << 13), - inited_vinstr = (1u << 19), - inited_ipa = (1u << 20) - }; - - int inited = 0; - u32 gpu_id; -#if defined(CONFIG_MALI_PLATFORM_VEXPRESS) - u32 ve_logic_tile = 0; -#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */ - - dev_set_drvdata(kbdev->dev, kbdev); - - err = kbase_backend_early_init(kbdev); - if (err) - goto out_partial; - inited |= inited_backend_early; +static struct attribute *kbase_attrs[] = { +#ifdef CONFIG_MALI_DEBUG + &dev_attr_debug_command.attr, + &dev_attr_js_softstop_always.attr, +#endif +#if !MALI_CUSTOMER_RELEASE + &dev_attr_force_replay.attr, +#endif + &dev_attr_js_timeouts.attr, + &dev_attr_soft_event_timeout.attr, + &dev_attr_gpuinfo.attr, + &dev_attr_dvfs_period.attr, + &dev_attr_pm_poweroff.attr, + &dev_attr_reset_timeout.attr, + &dev_attr_js_scheduling_period.attr, + &dev_attr_power_policy.attr, + &dev_attr_core_availability_policy.attr, + &dev_attr_core_mask.attr, + &dev_attr_mem_pool_size.attr, + &dev_attr_mem_pool_max_size.attr, + NULL +}; - scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, - kbase_dev_nr++); +static const struct attribute_group kbase_attr_group = { + .attrs = kbase_attrs, +}; - kbase_disjoint_init(kbdev); +static int kbase_platform_device_remove(struct platform_device *pdev) +{ + struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + const struct list_head *dev_list; - /* obtain min/max configured gpu frequencies */ - core_props = &(kbdev->gpu_props.props.core_props); + if (!kbdev) + return -ENODEV; - /* For versatile express platforms, min and max values of GPU frequency - * depend on the type of the logic tile; these values may not be known - * at the build time so in some cases a platform config file with wrong - * GPU freguency values may be included; to ensure the correct value of - * min and max GPU frequency is obtained, the type of the logic tile is - * read from the corresponding register on the platform and frequency - * values assigned accordingly.*/ -#if defined(CONFIG_MALI_PLATFORM_VEXPRESS) - ve_logic_tile = kbase_get_platform_logic_tile_type(); - - switch (ve_logic_tile) { - case 0x217: - /* Virtex 6, HBI0217 */ - core_props->gpu_freq_khz_min = VE_VIRTEX6_GPU_FREQ_MIN; - core_props->gpu_freq_khz_max = VE_VIRTEX6_GPU_FREQ_MAX; - break; - case 0x247: - /* Virtex 7, HBI0247 */ - core_props->gpu_freq_khz_min = VE_VIRTEX7_GPU_FREQ_MIN; - core_props->gpu_freq_khz_max = VE_VIRTEX7_GPU_FREQ_MAX; - break; - default: - /* all other logic tiles, i.e., Virtex 5 HBI0192 - * or unsuccessful reading from the platform - - * fall back to the config_platform default */ - core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; - core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; - break; +#ifdef CONFIG_MALI_FPGA_BUS_LOGGER + if (kbdev->inited_subsys & inited_buslogger) { + bl_core_client_unregister(kbdev->buslogger); + kbdev->inited_subsys &= ~inited_buslogger; } -#else - core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; - core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; -#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */ - - kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US; +#endif - err = kbase_device_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Can't initialize device (%d)\n", err); - goto out_partial; + if (kbdev->inited_subsys & inited_sysfs_group) { + sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); + kbdev->inited_subsys &= ~inited_sysfs_group; } - inited |= inited_device; - - kbdev->vinstr_ctx = kbase_vinstr_init(kbdev); - if (!kbdev->vinstr_ctx) { - dev_err(kbdev->dev, "Can't initialize virtual instrumentation core\n"); - goto out_partial; + if (kbdev->inited_subsys & inited_dev_list) { + dev_list = kbase_dev_list_get(); + list_del(&kbdev->entry); + kbase_dev_list_put(dev_list); + kbdev->inited_subsys &= ~inited_dev_list; } - inited |= inited_vinstr; - - kbdev->ipa_ctx = kbase_ipa_init(kbdev); - if (!kbdev->ipa_ctx) { - dev_err(kbdev->dev, "Can't initialize IPA\n"); - goto out_partial; + if (kbdev->inited_subsys & inited_misc_register) { + misc_deregister(&kbdev->mdev); + kbdev->inited_subsys &= ~inited_misc_register; } - inited |= inited_ipa; - - if (kbdev->pm.callback_power_runtime_init) { - err = kbdev->pm.callback_power_runtime_init(kbdev); - if (err) - goto out_partial; - - inited |= inited_pm_runtime_init; + if (kbdev->inited_subsys & inited_get_device) { + put_device(kbdev->dev); + kbdev->inited_subsys &= ~inited_get_device; } - err = kbase_mem_init(kbdev); - if (err) - goto out_partial; - - inited |= inited_mem; - - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - gpu_id &= GPU_ID_VERSION_PRODUCT_ID; - gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; - - kbase_device_coherency_init(kbdev, gpu_id); - - err = kbasep_secure_mode_init(kbdev); - if (err) - goto out_partial; - - err = kbasep_js_devdata_init(kbdev); - if (err) - goto out_partial; - - inited |= inited_js; - -#ifdef CONFIG_MALI_MIPE_ENABLED - err = kbase_tlstream_init(); - if (err) { - dev_err(kbdev->dev, "Couldn't initialize timeline stream\n"); - goto out_partial; + if (kbdev->inited_subsys & inited_debugfs) { + kbase_device_debugfs_term(kbdev); + kbdev->inited_subsys &= ~inited_debugfs; } - inited |= inited_tlstream; -#endif /* CONFIG_MALI_MIPE_ENABLED */ - err = kbase_backend_late_init(kbdev); - if (err) - goto out_partial; - inited |= inited_backend_late; - -#ifdef CONFIG_MALI_DEVFREQ - err = kbase_devfreq_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Couldn't initialize devfreq\n"); - goto out_partial; + if (kbdev->inited_subsys & inited_job_fault) { + kbase_debug_job_fault_dev_term(kbdev); + kbdev->inited_subsys &= ~inited_job_fault; } - inited |= inited_devfreq; -#endif /* CONFIG_MALI_DEVFREQ */ - err = kbase_device_debugfs_init(kbdev); - if (err) - goto out_partial; - - /* intialise the kctx list */ - mutex_init(&kbdev->kctx_list_lock); - INIT_LIST_HEAD(&kbdev->kctx_list); - - kbdev->mdev.minor = MISC_DYNAMIC_MINOR; - kbdev->mdev.name = kbdev->devname; - kbdev->mdev.fops = &kbase_fops; - kbdev->mdev.parent = get_device(kbdev->dev); - - err = misc_register(&kbdev->mdev); - if (err) { - dev_err(kbdev->dev, "Couldn't register misc dev %s\n", kbdev->devname); - goto out_misc; +#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY + if (kbdev->inited_subsys & inited_ipa) { + kbase_ipa_term(kbdev->ipa_ctx); + kbdev->inited_subsys &= ~inited_ipa; } +#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */ - { - const struct list_head *dev_list = kbase_dev_list_get(); - - list_add(&kbdev->entry, &kbase_dev_list); - kbase_dev_list_put(dev_list); + if (kbdev->inited_subsys & inited_vinstr) { + kbase_vinstr_term(kbdev->vinstr_ctx); + kbdev->inited_subsys &= ~inited_vinstr; } - dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device)); - - return 0; - -out_misc: - put_device(kbdev->dev); - kbase_device_debugfs_term(kbdev); -out_partial: - if (inited & inited_ipa) - kbase_ipa_term(kbdev->ipa_ctx); - if (inited & inited_vinstr) - kbase_vinstr_term(kbdev->vinstr_ctx); #ifdef CONFIG_MALI_DEVFREQ - if (inited & inited_devfreq) + if (kbdev->inited_subsys & inited_devfreq) { kbase_devfreq_term(kbdev); -#endif /* CONFIG_MALI_DEVFREQ */ - if (inited & inited_backend_late) + kbdev->inited_subsys &= ~inited_devfreq; + } +#endif + + if (kbdev->inited_subsys & inited_backend_late) { kbase_backend_late_term(kbdev); -#ifdef CONFIG_MALI_MIPE_ENABLED - if (inited & inited_tlstream) + kbdev->inited_subsys &= ~inited_backend_late; + } + + if (kbdev->inited_subsys & inited_tlstream) { kbase_tlstream_term(); -#endif /* CONFIG_MALI_MIPE_ENABLED */ + kbdev->inited_subsys &= ~inited_tlstream; + } + + /* Bring job and mem sys to a halt before we continue termination */ - if (inited & inited_js) + if (kbdev->inited_subsys & inited_js) kbasep_js_devdata_halt(kbdev); - if (inited & inited_mem) + if (kbdev->inited_subsys & inited_mem) kbase_mem_halt(kbdev); - if (inited & inited_js) + if (kbdev->inited_subsys & inited_js) { kbasep_js_devdata_term(kbdev); + kbdev->inited_subsys &= ~inited_js; + } - if (inited & inited_mem) + if (kbdev->inited_subsys & inited_mem) { kbase_mem_term(kbdev); + kbdev->inited_subsys &= ~inited_mem; + } - if (inited & inited_pm_runtime_init) { - if (kbdev->pm.callback_power_runtime_term) - kbdev->pm.callback_power_runtime_term(kbdev); + if (kbdev->inited_subsys & inited_pm_runtime_init) { + kbdev->pm.callback_power_runtime_term(kbdev); + kbdev->inited_subsys &= ~inited_pm_runtime_init; } - if (inited & inited_device) + if (kbdev->inited_subsys & inited_device) { kbase_device_term(kbdev); + kbdev->inited_subsys &= ~inited_device; + } - if (inited & inited_backend_early) + if (kbdev->inited_subsys & inited_backend_early) { kbase_backend_early_term(kbdev); + kbdev->inited_subsys &= ~inited_backend_early; + } - return err; -} + if (kbdev->inited_subsys & inited_power_control) { + power_control_term(kbdev); + kbdev->inited_subsys &= ~inited_power_control; + } -static struct attribute *kbase_attrs[] = { -#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS - &dev_attr_sc_split.attr, -#endif -#ifdef CONFIG_MALI_DEBUG - &dev_attr_debug_command.attr, - &dev_attr_js_softstop_always.attr, -#endif -#if !MALI_CUSTOMER_RELEASE - &dev_attr_force_replay.attr, -#endif - &dev_attr_js_timeouts.attr, - &dev_attr_gpuinfo.attr, - &dev_attr_dvfs_period.attr, - &dev_attr_pm_poweroff.attr, - &dev_attr_reset_timeout.attr, - &dev_attr_js_scheduling_period.attr, - &dev_attr_power_policy.attr, - &dev_attr_core_availability_policy.attr, - &dev_attr_core_mask.attr, - &dev_attr_mem_pool_size.attr, - &dev_attr_mem_pool_max_size.attr, - NULL -}; + if (kbdev->inited_subsys & inited_registers_map) { + registers_unmap(kbdev); + kbdev->inited_subsys &= ~inited_registers_map; + } -static const struct attribute_group kbase_attr_group = { - .attrs = kbase_attrs, -}; +#ifdef CONFIG_MALI_NO_MALI + if (kbdev->inited_subsys & inited_gpu_device) { + gpu_device_destroy(kbdev); + kbdev->inited_subsys &= ~inited_gpu_device; + } +#endif /* CONFIG_MALI_NO_MALI */ + + if (kbdev->inited_subsys != 0) + dev_err(kbdev->dev, "Missing sub system termination\n"); -static int kbase_common_device_remove(struct kbase_device *kbdev); + kbase_device_free(kbdev); + + return 0; +} static int kbase_platform_device_probe(struct platform_device *pdev) { struct kbase_device *kbdev; - struct resource *reg_res; + struct mali_base_gpu_core_props *core_props; + u32 gpu_id; + const struct list_head *dev_list; int err = 0; - int i; #ifdef CONFIG_OF err = kbase_platform_early_init(); if (err) { dev_err(&pdev->dev, "Early platform initialization failed\n"); + kbase_platform_device_remove(pdev); return err; } #endif kbdev = kbase_device_alloc(); if (!kbdev) { - dev_err(&pdev->dev, "Can't allocate device\n"); - err = -ENOMEM; - goto out; + dev_err(&pdev->dev, "Allocate device failed\n"); + kbase_platform_device_remove(pdev); + return -ENOMEM; } + + kbdev->dev = &pdev->dev; + dev_set_drvdata(kbdev->dev, kbdev); + #ifdef CONFIG_MALI_NO_MALI err = gpu_device_create(kbdev); if (err) { - dev_err(&pdev->dev, "Can't initialize dummy model\n"); - goto out_midg; + dev_err(&pdev->dev, "Dummy model initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } + kbdev->inited_subsys |= inited_gpu_device; #endif /* CONFIG_MALI_NO_MALI */ - kbdev->dev = &pdev->dev; - /* 3 IRQ resources */ - for (i = 0; i < 3; i++) { - struct resource *irq_res; - int irqtag; + err = assign_irqs(pdev); + if (err) { + dev_err(&pdev->dev, "IRQ search failed\n"); + kbase_platform_device_remove(pdev); + return err; + } - irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); - if (!irq_res) { - dev_err(kbdev->dev, "No IRQ resource at index %d\n", i); - err = -ENOENT; - goto out_platform_irq; - } + err = registers_map(kbdev); + if (err) { + dev_err(&pdev->dev, "Register map failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_registers_map; -#ifdef CONFIG_OF - if (!strcmp(irq_res->name, "JOB")) { - irqtag = JOB_IRQ_TAG; - } else if (!strcmp(irq_res->name, "MMU")) { - irqtag = MMU_IRQ_TAG; - } else if (!strcmp(irq_res->name, "GPU")) { - irqtag = GPU_IRQ_TAG; - } else { - dev_err(&pdev->dev, "Invalid irq res name: '%s'\n", - irq_res->name); - err = -EINVAL; - goto out_irq_name; - } -#else - irqtag = i; -#endif /* CONFIG_OF */ - kbdev->irqs[irqtag].irq = irq_res->start; - kbdev->irqs[irqtag].flags = (irq_res->flags & IRQF_TRIGGER_MASK); + err = power_control_init(pdev); + if (err) { + dev_err(&pdev->dev, "Power control initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } - /* the first memory resource is the physical address of the GPU - * registers */ - reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!reg_res) { - dev_err(kbdev->dev, "Invalid register resource\n"); - err = -ENOENT; - goto out_platform_mem; - } + kbdev->inited_subsys |= inited_power_control; - kbdev->reg_start = reg_res->start; - kbdev->reg_size = resource_size(reg_res); - err = kbase_common_reg_map(kbdev); - if (err) - goto out_reg_map; + err = kbase_backend_early_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Early backend initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_backend_early; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_REGULATOR) - kbdev->regulator = regulator_get_optional(kbdev->dev, "mali"); - if (IS_ERR_OR_NULL(kbdev->regulator)) { - dev_info(kbdev->dev, "Continuing without Mali regulator control\n"); - kbdev->regulator = NULL; - /* Allow probe to continue without regulator */ + scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, + kbase_dev_nr); + + kbase_disjoint_init(kbdev); + + /* obtain min/max configured gpu frequencies */ + core_props = &(kbdev->gpu_props.props.core_props); + core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; + core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; + + kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US; + + err = kbase_device_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Device initialization failed (%d)\n", err); + kbase_platform_device_remove(pdev); + return err; } -#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ + kbdev->inited_subsys |= inited_device; -#ifdef CONFIG_MALI_PLATFORM_DEVICETREE - pm_runtime_enable(kbdev->dev); -#endif - kbdev->clock = clk_get(kbdev->dev, "clk_mali"); - if (IS_ERR_OR_NULL(kbdev->clock)) { - dev_info(kbdev->dev, "Continuing without Mali clock control\n"); - kbdev->clock = NULL; - /* Allow probe to continue without clock. */ - } else { - err = clk_prepare_enable(kbdev->clock); + if (kbdev->pm.callback_power_runtime_init) { + err = kbdev->pm.callback_power_runtime_init(kbdev); if (err) { dev_err(kbdev->dev, - "Failed to prepare and enable clock (%d)\n", err); - goto out_clock_prepare; + "Runtime PM initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } + kbdev->inited_subsys |= inited_pm_runtime_init; } -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_PM_OPP) - /* Register the OPPs if they are available in device tree */ - if (of_init_opp_table(kbdev->dev) < 0) - dev_dbg(kbdev->dev, "OPP table not found\n"); -#endif + err = kbase_mem_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Memory subsystem initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_mem; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + gpu_id &= GPU_ID_VERSION_PRODUCT_ID; + gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + kbase_device_coherency_init(kbdev, gpu_id); + kbasep_secure_mode_init(kbdev); - err = kbase_common_device_init(kbdev); + err = kbasep_js_devdata_init(kbdev); if (err) { - dev_err(kbdev->dev, "Failed kbase_common_device_init\n"); - goto out_common_init; + dev_err(kbdev->dev, "Job JS devdata initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } + kbdev->inited_subsys |= inited_js; - err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); + err = kbase_tlstream_init(); if (err) { - dev_err(&pdev->dev, "Failed to create sysfs entries\n"); - goto out_sysfs; + dev_err(kbdev->dev, "Timeline stream initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } + kbdev->inited_subsys |= inited_tlstream; -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER - err = bl_core_client_register(kbdev->devname, - kbase_logging_started_cb, - kbdev, &kbdev->buslogger, - THIS_MODULE, NULL); + err = kbase_backend_late_init(kbdev); if (err) { - dev_err(kbdev->dev, "Couldn't register bus log client\n"); - goto out_bl_core_register; + dev_err(kbdev->dev, "Late backend initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; } + kbdev->inited_subsys |= inited_backend_late; - bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); -#endif - return 0; - -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER -out_bl_core_register: - sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); -#endif - -out_sysfs: - kbase_common_device_remove(kbdev); -out_common_init: -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) - of_free_opp_table(kbdev->dev); -#endif - clk_disable_unprepare(kbdev->clock); -out_clock_prepare: - clk_put(kbdev->clock); -#ifdef CONFIG_MALI_PLATFORM_DEVICETREE - pm_runtime_disable(kbdev->dev); -#endif -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_REGULATOR) - regulator_put(kbdev->regulator); -#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ - kbase_common_reg_unmap(kbdev); -out_reg_map: -out_platform_mem: -#ifdef CONFIG_OF -out_irq_name: -#endif -out_platform_irq: -#ifdef CONFIG_MALI_NO_MALI - gpu_device_destroy(kbdev); -out_midg: -#endif /* CONFIG_MALI_NO_MALI */ - kbase_device_free(kbdev); -out: - return err; -} +#ifdef CONFIG_MALI_DEVFREQ + err = kbase_devfreq_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Fevfreq initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_devfreq; +#endif /* CONFIG_MALI_DEVFREQ */ -static int kbase_common_device_remove(struct kbase_device *kbdev) -{ - kbase_ipa_term(kbdev->ipa_ctx); - kbase_vinstr_term(kbdev->vinstr_ctx); - sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); + kbdev->vinstr_ctx = kbase_vinstr_init(kbdev); + if (!kbdev->vinstr_ctx) { + dev_err(kbdev->dev, + "Virtual instrumentation initialization failed\n"); + kbase_platform_device_remove(pdev); + return -EINVAL; + } + kbdev->inited_subsys |= inited_vinstr; -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER - if (kbdev->buslogger) - bl_core_client_unregister(kbdev->buslogger); -#endif +#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY + kbdev->ipa_ctx = kbase_ipa_init(kbdev); + if (!kbdev->ipa_ctx) { + dev_err(kbdev->dev, "IPA initialization failed\n"); + kbase_platform_device_remove(pdev); + return -EINVAL; + } -#ifdef CONFIG_DEBUG_FS - debugfs_remove_recursive(kbdev->mali_debugfs_directory); -#endif -#ifdef CONFIG_MALI_DEVFREQ - kbase_devfreq_term(kbdev); -#endif + kbdev->inited_subsys |= inited_ipa; +#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */ - kbase_backend_late_term(kbdev); + err = kbase_debug_job_fault_dev_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Job fault debug initialization failed\n"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_job_fault; - if (kbdev->pm.callback_power_runtime_term) - kbdev->pm.callback_power_runtime_term(kbdev); -#ifdef CONFIG_MALI_PLATFORM_DEVICETREE - pm_runtime_disable(kbdev->dev); -#endif + err = kbase_device_debugfs_init(kbdev); + if (err) { + dev_err(kbdev->dev, "DebugFS initialization failed"); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_debugfs; -#ifdef CONFIG_MALI_MIPE_ENABLED - kbase_tlstream_term(); -#endif /* CONFIG_MALI_MIPE_ENABLED */ + /* initialize the kctx list */ + mutex_init(&kbdev->kctx_list_lock); + INIT_LIST_HEAD(&kbdev->kctx_list); - kbasep_js_devdata_halt(kbdev); - kbase_mem_halt(kbdev); + kbdev->mdev.minor = MISC_DYNAMIC_MINOR; + kbdev->mdev.name = kbdev->devname; + kbdev->mdev.fops = &kbase_fops; + kbdev->mdev.parent = get_device(kbdev->dev); + kbdev->inited_subsys |= inited_get_device; - kbasep_js_devdata_term(kbdev); - kbase_mem_term(kbdev); - kbase_backend_early_term(kbdev); + err = misc_register(&kbdev->mdev); + if (err) { + dev_err(kbdev->dev, "Misc device registration failed for %s\n", + kbdev->devname); + kbase_platform_device_remove(pdev); + return err; + } + kbdev->inited_subsys |= inited_misc_register; - { - const struct list_head *dev_list = kbase_dev_list_get(); + dev_list = kbase_dev_list_get(); + list_add(&kbdev->entry, &kbase_dev_list); + kbase_dev_list_put(dev_list); + kbdev->inited_subsys |= inited_dev_list; - list_del(&kbdev->entry); - kbase_dev_list_put(dev_list); - } - misc_deregister(&kbdev->mdev); - put_device(kbdev->dev); - kbase_common_reg_unmap(kbdev); - kbase_device_term(kbdev); - if (kbdev->clock) { - clk_disable_unprepare(kbdev->clock); - clk_put(kbdev->clock); - kbdev->clock = NULL; + err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); + if (err) { + dev_err(&pdev->dev, "SysFS group creation failed\n"); + kbase_platform_device_remove(pdev); + return err; } -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_REGULATOR) - regulator_put(kbdev->regulator); -#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ -#ifdef CONFIG_MALI_NO_MALI - gpu_device_destroy(kbdev); -#endif /* CONFIG_MALI_NO_MALI */ - kbase_device_free(kbdev); + kbdev->inited_subsys |= inited_sysfs_group; - return 0; -} +#ifdef CONFIG_MALI_FPGA_BUS_LOGGER + err = bl_core_client_register(kbdev->devname, + kbase_logging_started_cb, + kbdev, &kbdev->buslogger, + THIS_MODULE, NULL); + if (err == 0) { + kbdev->inited_subsys |= inited_buslogger; + bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); + } else { + dev_warn(kbdev->dev, "Bus log client registration failed\n"); + err = 0; + } +#endif -static int kbase_platform_device_remove(struct platform_device *pdev) -{ - struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + dev_info(kbdev->dev, + "Probed as %s\n", dev_name(kbdev->mdev.this_device)); - if (!kbdev) - return -ENODEV; + kbase_dev_nr++; - return kbase_common_device_remove(kbdev); + return err; } /** Suspend callback from the OS. @@ -3927,7 +4148,7 @@ static int kbase_device_runtime_suspend(struct device *dev) */ #ifdef KBASE_PM_RUNTIME -int kbase_device_runtime_resume(struct device *dev) +static int kbase_device_runtime_resume(struct device *dev) { int ret = 0; struct kbase_device *kbdev = to_kbase_device(dev); @@ -3952,21 +4173,30 @@ int kbase_device_runtime_resume(struct device *dev) } #endif /* KBASE_PM_RUNTIME */ -/** Runtime idle callback from the OS. - * - * This is called by Linux when the device appears to be inactive and it might be - * placed into a low power state + +#ifdef KBASE_PM_RUNTIME +/** + * kbase_device_runtime_idle - Runtime idle callback from the OS. + * @dev: The device to suspend * - * @param dev The device to suspend + * This is called by Linux when the device appears to be inactive and it might + * be placed into a low power state. * - * @return A standard Linux error code + * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend, + * otherwise a standard Linux error code */ - -#ifdef KBASE_PM_RUNTIME static int kbase_device_runtime_idle(struct device *dev) { - /* Avoid pm_runtime_suspend being called */ - return 1; + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + /* Use platform specific implementation if it exists. */ + if (kbdev->pm.backend.callback_power_runtime_idle) + return kbdev->pm.backend.callback_power_runtime_idle(kbdev); + + return 0; } #endif /* KBASE_PM_RUNTIME */ #ifndef CONFIG_MALI_DEVFREQ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c index 41ce051..f3e426f 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c @@ -386,7 +386,18 @@ static const struct file_operations kbasep_debug_job_fault_fops = { .release = debug_job_fault_release, }; -static int kbase_job_fault_event_init(struct kbase_device *kbdev) +/* + * Initialize debugfs entry for job fault dump + */ +void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_file("job_fault", S_IRUGO, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_debug_job_fault_fops); +} + + +int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) { INIT_LIST_HEAD(&kbdev->job_fault_event_list); @@ -396,24 +407,23 @@ static int kbase_job_fault_event_init(struct kbase_device *kbdev) kbdev->job_fault_resume_workq = alloc_workqueue( "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1); + if (!kbdev->job_fault_resume_workq) + return -ENOMEM; + + kbdev->job_fault_debug = false; return 0; } /* - * Initialize debugfs entry for job fault dump + * Release the relevant resource per device */ -void kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) +void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) { - debugfs_create_file("job_fault", S_IRUGO, - kbdev->mali_debugfs_directory, kbdev, - &kbasep_debug_job_fault_fops); - - kbase_job_fault_event_init(kbdev); - kbdev->job_fault_debug = false; - + destroy_workqueue(kbdev->job_fault_resume_workq); } + /* * Initialize the relevant data structure per context */ @@ -423,12 +433,12 @@ void kbase_debug_job_fault_context_init(struct kbase_context *kctx) /* We need allocate double size register range * Because this memory will keep the register address and value */ - kctx->reg_dump = kmalloc(0x4000 * 2, GFP_KERNEL); + kctx->reg_dump = vmalloc(0x4000 * 2); if (kctx->reg_dump == NULL) return; if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) { - kfree(kctx->reg_dump); + vfree(kctx->reg_dump); kctx->reg_dump = NULL; } INIT_LIST_HEAD(&kctx->job_fault_resume_event_list); @@ -439,9 +449,22 @@ void kbase_debug_job_fault_context_init(struct kbase_context *kctx) /* * release the relevant resource per context */ -void kbase_debug_job_fault_context_exit(struct kbase_context *kctx) +void kbase_debug_job_fault_context_term(struct kbase_context *kctx) +{ + vfree(kctx->reg_dump); +} + +#else /* CONFIG_DEBUG_FS */ + +int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) +{ + kbdev->job_fault_debug = false; + + return 0; +} + +void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) { - kfree(kctx->reg_dump); } -#endif +#endif /* CONFIG_DEBUG_FS */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h index 3734046..0930f90 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h @@ -25,11 +25,26 @@ #define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF /** - * kbase_debug_job_fault_dev_init - Initialize job fault debug sysfs - * and create the fault event wait queue per device + * kbase_debug_job_fault_dev_init - Create the fault event wait queue + * per device and initialize the required lists. + * @kbdev: Device pointer + * + * Return: Zero on success or a negative error code. + */ +int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev); + +/** + * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs + * @kbdev: Device pointer + */ +void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev); + +/** + * kbase_debug_job_fault_dev_term - Clean up resources created in + * kbase_debug_job_fault_dev_init. * @kbdev: Device pointer */ -void kbase_debug_job_fault_dev_init(struct kbase_device *kbdev); +void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev); /** * kbase_debug_job_fault_context_init - Initialize the relevant @@ -39,11 +54,11 @@ void kbase_debug_job_fault_dev_init(struct kbase_device *kbdev); void kbase_debug_job_fault_context_init(struct kbase_context *kctx); /** - * kbase_debug_job_fault_context_exit - Release the relevant + * kbase_debug_job_fault_context_term - Release the relevant * resource per context * @kctx: KBase context pointer */ -void kbase_debug_job_fault_context_exit(struct kbase_context *kctx); +void kbase_debug_job_fault_context_term(struct kbase_context *kctx); /** * kbase_debug_job_fault_process - Process the failed job. diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c index 1a3198e..42d1d83 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,7 +25,7 @@ #include #include -#if CONFIG_DEBUG_FS +#ifdef CONFIG_DEBUG_FS struct debug_mem_mapping { struct list_head node; diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h index 86fc9e4..c4af0c3 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,6 +38,7 @@ #include #include #include +#include #ifdef CONFIG_MALI_FPGA_BUS_LOGGER #include @@ -144,6 +145,8 @@ #define MIDGARD_MMU_TOPLEVEL 1 #endif +#define MIDGARD_MMU_BOTTOMLEVEL 3 + #define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR) /** setting in kbase_context::as_nr that indicates it's invalid */ @@ -185,6 +188,8 @@ #define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10) /* Atom requires GPU to be in secure mode */ #define KBASE_KATOM_FLAG_SECURE (1<<11) +/* Atom has been stored in linked list */ +#define KBASE_KATOM_FLAG_JSCTX_IN_LL (1<<12) /* SW related flags about types of JS_COMMAND action * NOTE: These must be masked off by JS_COMMAND_MASK */ @@ -386,11 +391,22 @@ struct kbase_jd_atom { /* Pointer to atom that has cross-slot dependency on this atom */ struct kbase_jd_atom *x_post_dep; + /* The GPU's flush count recorded at the time of submission, used for + * the cache flush optimisation */ + u32 flush_id; struct kbase_jd_atom_backend backend; #ifdef CONFIG_DEBUG_FS struct base_job_fault_event fault_event; #endif + + struct list_head queue; + + struct kbase_va_region *jit_addr_reg; + + /* If non-zero, this indicates that the atom will fail with the set + * event_code when the atom is processed. */ + enum base_jd_event_code will_fail_event_code; }; static inline bool kbase_jd_katom_is_secure(const struct kbase_jd_atom *katom) @@ -471,6 +487,7 @@ typedef u32 kbase_as_poke_state; struct kbase_mmu_setup { u64 transtab; u64 memattr; + u64 transcfg; }; /** @@ -489,6 +506,7 @@ struct kbase_as { enum kbase_mmu_fault_type fault_type; u32 fault_status; u64 fault_addr; + u64 fault_extra_addr; struct mutex transaction_mutex; struct kbase_mmu_setup current_setup; @@ -664,10 +682,11 @@ struct kbase_pm_device_data { wait_queue_head_t zero_active_count_wait; /** - * A bit mask identifying the available shader cores that are specified - * via sysfs + * Bit masks identifying the available shader cores that are specified + * via sysfs. One mask per job slot. */ - u64 debug_core_mask; + u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS]; + u64 debug_core_mask_all; /** * Lock protecting the power state of the device. @@ -784,9 +803,7 @@ struct kbase_device { int irq; int flags; } irqs[3]; -#ifdef CONFIG_HAVE_CLK struct clk *clock; -#endif #ifdef CONFIG_REGULATOR struct regulator *regulator; #endif @@ -872,7 +889,7 @@ struct kbase_device { s8 nr_user_address_spaces; /**< Number of address spaces available to user contexts */ /* Structure used for instrumentation and HW counters dumping */ - struct { + struct kbase_hwcnt { /* The lock should be used when accessing any of the following members */ spinlock_t lock; @@ -932,10 +949,6 @@ struct kbase_device { struct list_head kctx_list; struct mutex kctx_list_lock; -#ifdef CONFIG_MALI_MIDGARD_RT_PM - struct delayed_work runtime_pm_workqueue; -#endif - #ifdef CONFIG_PM_DEVFREQ struct devfreq_dev_profile devfreq_profile; struct devfreq *devfreq; @@ -952,6 +965,12 @@ struct kbase_device { struct kbase_trace_kbdev_timeline timeline; #endif + /* + * Control for enabling job dump on failure, set when control debugfs + * is opened. + */ + bool job_fault_debug; + #ifdef CONFIG_DEBUG_FS /* directory for debugfs entries */ struct dentry *mali_debugfs_directory; @@ -959,13 +978,19 @@ struct kbase_device { struct dentry *debugfs_ctx_directory; /* failed job dump, used for separate debug process */ - bool job_fault_debug; wait_queue_head_t job_fault_wq; wait_queue_head_t job_fault_resume_wq; struct workqueue_struct *job_fault_resume_workq; struct list_head job_fault_event_list; struct kbase_context *kctx_fault; +#if !MALI_CUSTOMER_RELEASE + /* Per-device data for register dumping interface */ + struct { + u16 reg_offset; /* Offset of a GPU_CONTROL register to be + dumped upon request */ + } regs_dump_debugfs_data; +#endif /* !MALI_CUSTOMER_RELEASE */ #endif /* CONFIG_DEBUG_FS */ /* fbdump profiling controls set by gator */ @@ -1002,11 +1027,23 @@ struct kbase_device { /* defaults for new context created for this device */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + bool infinite_cache_active_default; +#else u32 infinite_cache_active_default; +#endif size_t mem_pool_max_size_default; /* system coherency mode */ u32 system_coherency; + /* Flag to track when cci snoops have been enabled on the interface */ + bool cci_snoop_enabled; + + /* SMC function IDs to call into Trusted firmware to enable/disable + * cache snooping. Value of 0 indicates that they are not used + */ + u32 snoop_enable_smc; + u32 snoop_disable_smc; /* Secure operations */ struct kbase_secure_ops *secure_ops; @@ -1033,14 +1070,33 @@ struct kbase_device { */ struct bus_logger_client *buslogger; #endif + /* Boolean indicating if an IRQ flush during reset is in progress. */ + bool irq_reset_flush; + + /* list of inited sub systems. Used during terminate/error recovery */ + u32 inited_subsys; }; -/* JSCTX ringbuffer size must always be a power of 2 */ -#define JSCTX_RB_SIZE 256 -#define JSCTX_RB_MASK (JSCTX_RB_SIZE-1) +/* JSCTX ringbuffer size will always be a power of 2. The idx shift must be: + - >=2 (buffer size -> 4) + - <= 9 (buffer size 2^(9-1)=256) (technically, 10 works for the ringbuffer + but this is unnecessary as max atoms is 256) + */ +#define JSCTX_RB_IDX_SHIFT (8U) +#if ((JSCTX_RB_IDX_SHIFT < 2) || ((3 * JSCTX_RB_IDX_SHIFT) >= 32)) +#error "Invalid ring buffer size for 32bit atomic." +#endif +#define JSCTX_RB_SIZE (1U << (JSCTX_RB_IDX_SHIFT - 1U)) /* 1 bit for overflow */ +#define JSCTX_RB_SIZE_STORE (1U << JSCTX_RB_IDX_SHIFT) +#define JSCTX_RB_MASK (JSCTX_RB_SIZE - 1U) +#define JSCTX_RB_MASK_STORE (JSCTX_RB_SIZE_STORE - 1U) + +#define JSCTX_WR_OFFSET (0U) +#define JSCTX_RN_OFFSET (JSCTX_WR_OFFSET + JSCTX_RB_IDX_SHIFT) +#define JSCTX_RD_OFFSET (JSCTX_RN_OFFSET + JSCTX_RB_IDX_SHIFT) /** - * struct jsctx_rb_entry - Entry in &struct jsctx_rb ring buffer + * struct jsctx_rb_entry - Ringbuffer entry in &struct jsctx_queue. * @atom_id: Atom ID */ struct jsctx_rb_entry { @@ -1048,45 +1104,69 @@ struct jsctx_rb_entry { }; /** - * struct jsctx_rb - JS context atom ring buffer + * struct jsctx_queue - JS context atom queue, containing both ring buffer and linked list. * @entries: Array of size %JSCTX_RB_SIZE which holds the &struct * kbase_jd_atom pointers which make up the contents of the ring * buffer. - * @read_idx: Index into @entries. Indicates the next entry in @entries to - * read, and is incremented when pulling an atom, and decremented - * when unpulling. - * HW access lock must be held when accessing. - * @write_idx: Index into @entries. Indicates the next entry to use when - * adding atoms into the ring buffer, and is incremented when - * adding a new atom. - * jctx->lock must be held when accessing. - * @running_idx: Index into @entries. Indicates the last valid entry, and is - * incremented when remving atoms from the ring buffer. - * HW access lock must be held when accessing. + * @indicies: An atomic variable containing indicies for the ring buffer. + * Indicies are of size JSCTX_RB_IDX_SHIFT. + * The following are contained: + * - WR_IDX - Write index. Index of the NEXT slot to be written. + * - RN_IDX - Running index. Index of the tail of the list. + * This is the atom that has been running the longest. + * - RD_IDX - Read index. Index of the next atom to be pulled. + * @queue_head: Head item of the linked list queue. + * + * Locking: + * The linked list assumes jctx.lock is held. + * The ringbuffer serves as an intermediary between irq context and non-irq + * context, without the need for the two to share any lock. irq context can + * pull (and unpull) and only requires the runpool_irq.lock. While non-irq + * context can add and remove and only requires holding only jctx.lock. + * Error handling affecting both, or the whole ringbuffer in general, must + * hold both locks or otherwise ensure (f.ex deschedule/kill) only that thread + * is accessing the buffer. + * This means that RD_IDX is updated by irq-context (pull and unpull) and must + * hold runpool_irq.lock. While WR_IDX (add) and RN_IDX (remove) is updated by + * non-irq context and must hold jctx.lock. + * Note that pull (or sister function peek) must also access WR_IDX to ensure + * there is free space in the buffer, this is ok as WR_IDX is only increased. + * A similar situation is apparent with unpull and RN_IDX, but only one atom + * (already pulled) can cause either remove or unpull, so this will never + * conflict. * - * &struct jsctx_rb is a ring buffer of &struct kbase_jd_atom. + * &struct jsctx_queue is a queue of &struct kbase_jd_atom, + * part ringbuffer and part linked list. */ -struct jsctx_rb { +struct jsctx_queue { struct jsctx_rb_entry entries[JSCTX_RB_SIZE]; - u16 read_idx; /* HW access lock must be held when accessing */ - u16 write_idx; /* jctx->lock must be held when accessing */ - u16 running_idx; /* HW access lock must be held when accessing */ + atomic_t indicies; + + struct list_head queue_head; }; + + + + #define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ (((minor) & 0xFFF) << 8) | \ ((0 & 0xFF) << 0)) struct kbase_context { + struct file *filp; struct kbase_device *kbdev; int id; /* System wide unique id */ unsigned long api_version; phys_addr_t pgd; struct list_head event_list; + struct list_head event_coalesce_list; struct mutex event_mutex; - bool event_closed; + atomic_t event_closed; struct workqueue_struct *event_workq; + atomic_t event_count; + int event_coalesce_count; bool is_compat; @@ -1097,6 +1177,7 @@ struct kbase_context { struct page *aliasing_sink_page; + struct mutex mmu_lock; struct mutex reg_lock; /* To be converted to a rwlock? */ struct rb_root reg_rbtree; /* Red-Black tree of GPU regions (live regions) */ @@ -1113,7 +1194,12 @@ struct kbase_context { struct kbase_mem_pool mem_pool; + struct shrinker reclaim; + struct list_head evict_list; + struct mutex evict_lock; + struct list_head waiting_soft_jobs; + spinlock_t waiting_soft_jobs_lock; #ifdef CONFIG_KDS struct list_head waiting_kds_resource; #endif @@ -1138,6 +1224,8 @@ struct kbase_context { * All other flags must be added there */ spinlock_t mm_update_lock; struct mm_struct *process_mm; + /* End of the SAME_VA zone */ + u64 same_va_end; #ifdef CONFIG_MALI_TRACE_TIMELINE struct kbase_trace_kctx_timeline timeline; @@ -1147,8 +1235,10 @@ struct kbase_context { char *mem_profile_data; /* Size of @c mem_profile_data */ size_t mem_profile_size; - /* Spinlock guarding data */ - spinlock_t mem_profile_lock; + /* Mutex guarding memory profile state */ + struct mutex mem_profile_lock; + /* Memory profile file created */ + bool mem_profile_initialized; struct dentry *kctx_dentry; /* for job fault debug */ @@ -1161,7 +1251,7 @@ struct kbase_context { #endif /* CONFIG_DEBUG_FS */ - struct jsctx_rb jsctx_rb + struct jsctx_queue jsctx_queue [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; /* Number of atoms currently pulled from this context */ @@ -1172,7 +1262,11 @@ struct kbase_context { bool pulled; /* true if infinite cache is to be enabled for new allocations. Existing * allocations will not change. bool stored as a u32 per Linux API */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + bool infinite_cache_active; +#else u32 infinite_cache_active; +#endif /* Bitmask of slots that can be pulled from */ u32 slots_pullable; @@ -1196,6 +1290,52 @@ struct kbase_context { struct list_head completed_jobs; /* Number of work items currently pending on job_done_wq */ atomic_t work_count; + + /* true if context is counted in kbdev->js_data.nr_contexts_runnable */ + bool ctx_runnable_ref; + + /* Waiting soft-jobs will fail when this timer expires */ + struct hrtimer soft_event_timeout; + + /* JIT allocation management */ + struct kbase_va_region *jit_alloc[255]; + struct list_head jit_active_head; + struct list_head jit_pool_head; + struct list_head jit_destroy_head; + struct mutex jit_lock; + struct work_struct jit_work; + + /* External sticky resource management */ + struct list_head ext_res_meta_head; +}; + +/** + * struct kbase_ctx_ext_res_meta - Structure which binds an external resource + * to a @kbase_context. + * @ext_res_node: List head for adding the metadata to a + * @kbase_context. + * @alloc: The physical memory allocation structure + * which is mapped. + * @gpu_addr: The GPU virtual address the resource is + * mapped to. + * @refcount: Refcount to keep track of the number of + * active mappings. + * + * External resources can be mapped into multiple contexts as well as the same + * context multiple times. + * As kbase_va_region itself isn't refcounted we can't attach our extra + * information to it as it could be removed under our feet leaving external + * resources pinned. + * This metadata structure binds a single external resource to a single + * context, ensuring that per context refcount is tracked separately so it can + * be overridden when needed and abuses by the application (freeing the resource + * multiple times) don't effect the refcount of the physical allocation. + */ +struct kbase_ctx_ext_res_meta { + struct list_head ext_res_node; + struct kbase_mem_phy_alloc *alloc; + u64 gpu_addr; + u64 refcount; }; enum kbase_reg_access_type { @@ -1209,6 +1349,21 @@ enum kbase_share_attr_bits { SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */ }; +/** + * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent. + * @kbdev: kbase device + * + * Return: true if the device access are coherent, false if not. + */ +static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) +{ + if ((kbdev->system_coherency == COHERENCY_ACE_LITE) || + (kbdev->system_coherency == COHERENCY_ACE)) + return true; + + return false; +} + /* Conversion helpers for setting up high resolution timers */ #define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime((x)*1000000U)) #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) @@ -1221,4 +1376,29 @@ enum kbase_share_attr_bits { /* Maximum number of times a job can be replayed */ #define BASEP_JD_REPLAY_LIMIT 15 +/* JobDescriptorHeader - taken from the architecture specifications, the layout + * is currently identical for all GPU archs. */ +struct job_descriptor_header { + u32 exception_status; + u32 first_incomplete_task; + u64 fault_pointer; + u8 job_descriptor_size : 1; + u8 job_type : 7; + u8 job_barrier : 1; + u8 _reserved_01 : 1; + u8 _reserved_1 : 1; + u8 _reserved_02 : 1; + u8 _reserved_03 : 1; + u8 _reserved_2 : 1; + u8 _reserved_04 : 1; + u8 _reserved_05 : 1; + u16 job_index; + u16 job_dependency_index_1; + u16 job_dependency_index_2; + union { + u64 _64; + u32 _32; + } next_job; +}; + #endif /* _KBASE_DEFS_H_ */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c index 6b8a285..c55779c 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -145,8 +145,32 @@ static void kbase_device_all_as_term(struct kbase_device *kbdev) int kbase_device_init(struct kbase_device * const kbdev) { int i, err; +#ifdef CONFIG_ARM64 + struct device_node *np = NULL; +#endif /* CONFIG_ARM64 */ spin_lock_init(&kbdev->mmu_mask_change); +#ifdef CONFIG_ARM64 + kbdev->cci_snoop_enabled = false; + np = kbdev->dev->of_node; + if (np != NULL) { + if (of_property_read_u32(np, "snoop_enable_smc", + &kbdev->snoop_enable_smc)) + kbdev->snoop_enable_smc = 0; + if (of_property_read_u32(np, "snoop_disable_smc", + &kbdev->snoop_disable_smc)) + kbdev->snoop_disable_smc = 0; + /* Either both or none of the calls should be provided. */ + if (!((kbdev->snoop_disable_smc == 0 + && kbdev->snoop_enable_smc == 0) + || (kbdev->snoop_disable_smc != 0 + && kbdev->snoop_enable_smc != 0))) { + WARN_ON(1); + err = -EINVAL; + goto fail; + } + } +#endif /* CONFIG_ARM64 */ /* Get the list of workarounds for issues on the current HW * (identified by the GPU_ID register) */ @@ -159,6 +183,8 @@ int kbase_device_init(struct kbase_device * const kbdev) */ kbase_hw_set_features_mask(kbdev); + kbase_gpuprops_set_features(kbdev); + /* On Linux 4.0+, dma coherency is determined from device tree */ #if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops); @@ -206,7 +232,7 @@ int kbase_device_init(struct kbase_device * const kbdev) for (i = 0; i < FBDUMP_CONTROL_MAX; i++) kbdev->kbase_profiling_controls[i] = 0; - kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev); + kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev); atomic_set(&kbdev->ctx_num, 0); @@ -218,7 +244,11 @@ int kbase_device_init(struct kbase_device * const kbdev) kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); +#else kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ #ifdef CONFIG_MALI_DEBUG init_waitqueue_head(&kbdev->driver_inactive_wait); @@ -255,13 +285,20 @@ void kbase_device_free(struct kbase_device *kbdev) kfree(kbdev); } -void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size) +int kbase_device_trace_buffer_install( + struct kbase_context *kctx, u32 *tb, size_t size) { unsigned long flags; KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(tb); + /* Interface uses 16-bit value to track last accessed entry. Each entry + * is composed of two 32-bit words. + * This limits the size that can be handled without an overflow. */ + if (0xFFFF * (2 * sizeof(u32)) < size) + return -EINVAL; + /* set up the header */ /* magic number in the first 4 bytes */ tb[0] = TRACE_BUFFER_HEADER_SPECIAL; @@ -276,6 +313,8 @@ void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size kctx->jctx.tb_wrap_offset = size / 8; kctx->jctx.tb = tb; spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); + + return 0; } void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx) diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c index 25b30f0..bf8c304 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,10 +19,7 @@ #include #include - -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom) { @@ -38,10 +35,8 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight)); -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_nret_atom_ctx(katom, kctx); kbase_tlstream_tl_del_atom(katom); -#endif katom->status = KBASE_JD_ATOM_STATE_UNUSED; @@ -52,15 +47,10 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru int kbase_event_pending(struct kbase_context *ctx) { - int ret; - KBASE_DEBUG_ASSERT(ctx); - mutex_lock(&ctx->event_mutex); - ret = (!list_empty(&ctx->event_list)) || (true == ctx->event_closed); - mutex_unlock(&ctx->event_mutex); - - return ret; + return (atomic_read(&ctx->event_count) != 0) || + (atomic_read(&ctx->event_closed) != 0); } KBASE_EXPORT_TEST_API(kbase_event_pending); @@ -74,7 +64,7 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve mutex_lock(&ctx->event_mutex); if (list_empty(&ctx->event_list)) { - if (!ctx->event_closed) { + if (!atomic_read(&ctx->event_closed)) { mutex_unlock(&ctx->event_mutex); return -1; } @@ -90,6 +80,7 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve } /* normal event processing */ + atomic_dec(&ctx->event_count); atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]); list_del(ctx->event_list.next); @@ -151,6 +142,29 @@ static void kbase_event_process_noreport(struct kbase_context *kctx, } } +/** + * kbase_event_coalesce - Move pending events to the main event list + * @kctx: Context pointer + * + * kctx->event_list and kctx->event_coalesce_count must be protected + * by a lock unless this is the last thread using them + * (and we're about to terminate the lock). + * + * Return: The number of pending events moved to the main event list + */ +static int kbase_event_coalesce(struct kbase_context *kctx) +{ + const int event_count = kctx->event_coalesce_count; + + /* Join the list of pending events onto the tail of the main list + and reset it */ + list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list); + kctx->event_coalesce_count = 0; + + /* Return the number of events moved */ + return event_count; +} + void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) { if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { @@ -167,18 +181,31 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) return; } - mutex_lock(&ctx->event_mutex); - list_add_tail(&atom->dep_item[0], &ctx->event_list); - mutex_unlock(&ctx->event_mutex); + if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { + /* Don't report the event until other event(s) have completed */ + mutex_lock(&ctx->event_mutex); + list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list); + ++ctx->event_coalesce_count; + mutex_unlock(&ctx->event_mutex); + } else { + /* Report the event and any pending events now */ + int event_count = 1; + + mutex_lock(&ctx->event_mutex); + event_count += kbase_event_coalesce(ctx); + list_add_tail(&atom->dep_item[0], &ctx->event_list); + atomic_add(event_count, &ctx->event_count); + mutex_unlock(&ctx->event_mutex); - kbase_event_wakeup(ctx); + kbase_event_wakeup(ctx); + } } KBASE_EXPORT_TEST_API(kbase_event_post); void kbase_event_close(struct kbase_context *kctx) { mutex_lock(&kctx->event_mutex); - kctx->event_closed = true; + atomic_set(&kctx->event_closed, true); mutex_unlock(&kctx->event_mutex); kbase_event_wakeup(kctx); } @@ -188,8 +215,11 @@ int kbase_event_init(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(kctx); INIT_LIST_HEAD(&kctx->event_list); + INIT_LIST_HEAD(&kctx->event_coalesce_list); mutex_init(&kctx->event_mutex); - kctx->event_closed = false; + atomic_set(&kctx->event_count, 0); + kctx->event_coalesce_count = 0; + atomic_set(&kctx->event_closed, false); kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); if (NULL == kctx->event_workq) @@ -202,6 +232,8 @@ KBASE_EXPORT_TEST_API(kbase_event_init); void kbase_event_cleanup(struct kbase_context *kctx) { + int event_count; + KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(kctx->event_workq); @@ -214,6 +246,9 @@ void kbase_event_cleanup(struct kbase_context *kctx) * Note: use of kctx->event_list without a lock is safe because this must be the last * thread using it (because we're about to terminate the lock) */ + event_count = kbase_event_coalesce(kctx); + atomic_add(event_count, &kctx->event_count); + while (!list_empty(&kctx->event_list)) { struct base_jd_event_v2 event; diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c index a2174b2..0615641 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c @@ -189,23 +189,23 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn } /* If we are using any other device */ } else { - uint32_t nr_l2, nr_sc, j; + uint32_t nr_l2, nr_sc_bits, j; uint64_t core_mask; nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices; core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask; - nr_sc = hand->kbdev->gpu_props.props.coherency_info.group[0].num_cores; + nr_sc_bits = fls64(core_mask); /* The job manager and tiler sets of counters * are always present */ - in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc + nr_l2), GFP_KERNEL); + in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL); if (!in_out_info->hwc_layout) goto destroy_context; - dump_size = (2 + nr_sc + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; + dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; in_out_info->hwc_layout[i++] = JM_BLOCK; in_out_info->hwc_layout[i++] = TILER_BLOCK; diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h index d124e82..eb76f01 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h @@ -221,7 +221,7 @@ static const char * const hardware_counters_mali_t60x[] = { "T60x_LSC_DIRTY_LINE", "T60x_LSC_SNOOPS", "T60x_AXI_TLB_STALL", - "T60x_AXI_TLB_MIESS", + "T60x_AXI_TLB_MISS", "T60x_AXI_TLB_TRANSACTION", "T60x_LS_TLB_MISS", "T60x_LS_TLB_HIT", @@ -486,7 +486,7 @@ static const char * const hardware_counters_mali_t62x[] = { "T62x_LSC_DIRTY_LINE", "T62x_LSC_SNOOPS", "T62x_AXI_TLB_STALL", - "T62x_AXI_TLB_MIESS", + "T62x_AXI_TLB_MISS", "T62x_AXI_TLB_TRANSACTION", "T62x_LS_TLB_MISS", "T62x_LS_TLB_HIT", @@ -1018,7 +1018,7 @@ static const char * const hardware_counters_mali_t76x[] = { "T76x_LSC_DIRTY_LINE", "T76x_LSC_SNOOPS", "T76x_AXI_TLB_STALL", - "T76x_AXI_TLB_MIESS", + "T76x_AXI_TLB_MISS", "T76x_AXI_TLB_TRANSACTION", "T76x_LS_TLB_MISS", "T76x_LS_TLB_HIT", @@ -1284,7 +1284,7 @@ static const char * const hardware_counters_mali_t82x[] = { "T82x_LSC_DIRTY_LINE", "T82x_LSC_SNOOPS", "T82x_AXI_TLB_STALL", - "T82x_AXI_TLB_MIESS", + "T82x_AXI_TLB_MISS", "T82x_AXI_TLB_TRANSACTION", "T82x_LS_TLB_MISS", "T82x_LS_TLB_HIT", @@ -1550,7 +1550,7 @@ static const char * const hardware_counters_mali_t83x[] = { "T83x_LSC_DIRTY_LINE", "T83x_LSC_SNOOPS", "T83x_AXI_TLB_STALL", - "T83x_AXI_TLB_MIESS", + "T83x_AXI_TLB_MISS", "T83x_AXI_TLB_TRANSACTION", "T83x_LS_TLB_MISS", "T83x_LS_TLB_HIT", @@ -1816,7 +1816,7 @@ static const char * const hardware_counters_mali_t86x[] = { "T86x_LSC_DIRTY_LINE", "T86x_LSC_SNOOPS", "T86x_AXI_TLB_STALL", - "T86x_AXI_TLB_MIESS", + "T86x_AXI_TLB_MISS", "T86x_AXI_TLB_TRANSACTION", "T86x_LS_TLB_MISS", "T86x_LS_TLB_HIT", @@ -2082,7 +2082,7 @@ static const char * const hardware_counters_mali_t88x[] = { "T88x_LSC_DIRTY_LINE", "T88x_LSC_SNOOPS", "T88x_AXI_TLB_STALL", - "T88x_AXI_TLB_MIESS", + "T88x_AXI_TLB_MISS", "T88x_AXI_TLB_TRANSACTION", "T88x_LS_TLB_MISS", "T88x_LS_TLB_HIT", diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h new file mode 100644 index 0000000..a962ecb --- /dev/null +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h @@ -0,0 +1,112 @@ +/* + * + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + +#ifndef _KBASE_GPU_ID_H_ +#define _KBASE_GPU_ID_H_ + +/* GPU_ID register */ +#define GPU_ID_VERSION_STATUS_SHIFT 0 +#define GPU_ID_VERSION_MINOR_SHIFT 4 +#define GPU_ID_VERSION_MAJOR_SHIFT 12 +#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16 +#define GPU_ID_VERSION_STATUS (0xF << GPU_ID_VERSION_STATUS_SHIFT) +#define GPU_ID_VERSION_MINOR (0xFF << GPU_ID_VERSION_MINOR_SHIFT) +#define GPU_ID_VERSION_MAJOR (0xF << GPU_ID_VERSION_MAJOR_SHIFT) +#define GPU_ID_VERSION_PRODUCT_ID (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT) + +/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */ +#define GPU_ID_PI_T60X 0x6956 +#define GPU_ID_PI_T62X 0x0620 +#define GPU_ID_PI_T76X 0x0750 +#define GPU_ID_PI_T72X 0x0720 +#define GPU_ID_PI_TFRX 0x0880 +#define GPU_ID_PI_T86X 0x0860 +#define GPU_ID_PI_T82X 0x0820 +#define GPU_ID_PI_T83X 0x0830 + +/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */ +#define GPU_ID_PI_NEW_FORMAT_START 0x1000 +#define GPU_ID_IS_NEW_FORMAT(product_id) ((product_id) != GPU_ID_PI_T60X && \ + (product_id) >= \ + GPU_ID_PI_NEW_FORMAT_START) + +#define GPU_ID2_VERSION_STATUS_SHIFT 0 +#define GPU_ID2_VERSION_MINOR_SHIFT 4 +#define GPU_ID2_VERSION_MAJOR_SHIFT 12 +#define GPU_ID2_PRODUCT_MAJOR_SHIFT 16 +#define GPU_ID2_ARCH_REV_SHIFT 20 +#define GPU_ID2_ARCH_MINOR_SHIFT 24 +#define GPU_ID2_ARCH_MAJOR_SHIFT 28 +#define GPU_ID2_VERSION_STATUS (0xF << GPU_ID2_VERSION_STATUS_SHIFT) +#define GPU_ID2_VERSION_MINOR (0xFF << GPU_ID2_VERSION_MINOR_SHIFT) +#define GPU_ID2_VERSION_MAJOR (0xF << GPU_ID2_VERSION_MAJOR_SHIFT) +#define GPU_ID2_PRODUCT_MAJOR (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT) +#define GPU_ID2_ARCH_REV (0xF << GPU_ID2_ARCH_REV_SHIFT) +#define GPU_ID2_ARCH_MINOR (0xF << GPU_ID2_ARCH_MINOR_SHIFT) +#define GPU_ID2_ARCH_MAJOR (0xF << GPU_ID2_ARCH_MAJOR_SHIFT) +#define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR) + +/* Helper macro to create a partial GPU_ID (new format) that defines + a product ignoring its version. */ +#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \ + (((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ + ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ + ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + +/* Helper macro to create a partial GPU_ID (new format) that specifies the + revision (major, minor, status) of a product */ +#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \ + (((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ + ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ + ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) + +/* Helper macro to create a complete GPU_ID (new format) */ +#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ + version_major, version_minor, version_status) \ + (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \ + product_major) | \ + GPU_ID2_VERSION_MAKE(version_major, version_minor, \ + version_status)) + +/* Helper macro to create a partial GPU_ID (new format) that identifies + a particular GPU model by its arch_major and product_major. */ +#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \ + (((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + +/* Strip off the non-relevant bits from a product_id value and make it suitable + for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU + model. */ +#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \ + (((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ + GPU_ID2_PRODUCT_MODEL) + +#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) + +/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ +#define GPU_ID_S_15DEV0 0x1 +#define GPU_ID_S_EAC 0x2 + +/* Helper macro to create a GPU_ID assuming valid values for id, major, + minor, status */ +#define GPU_ID_MAKE(id, major, minor, status) \ + (((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ + ((major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ + ((minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ + ((status) << GPU_ID_VERSION_STATUS_SHIFT)) + +#endif /* _KBASE_GPU_ID_H_ */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c index ca26404..82f4c36 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c @@ -32,7 +32,6 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) { - ssize_t ret = 0; struct list_head *entry; const struct list_head *kbdev_list; @@ -43,14 +42,14 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) kbdev = list_entry(entry, struct kbase_device, entry); /* output the total memory usage and cap for this device */ - ret = seq_printf(sfile, "%-16s %10u\n", + seq_printf(sfile, "%-16s %10u\n", kbdev->devname, atomic_read(&(kbdev->memdev.used_pages))); mutex_lock(&kbdev->kctx_list_lock); list_for_each_entry(element, &kbdev->kctx_list, link) { /* output the memory usage and cap for each kctx * opened on this device */ - ret = seq_printf(sfile, " %s-0x%p %10u\n", + seq_printf(sfile, " %s-0x%p %10u\n", "kctx", element->kctx, atomic_read(&(element->kctx->used_pages))); @@ -58,7 +57,7 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) mutex_unlock(&kbdev->kctx_list_lock); } kbase_dev_list_put(kbdev_list); - return ret; + return 0; } /* diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c index d632a0b..7f77dba 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c @@ -87,7 +87,6 @@ int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpup if (kctx->api_version < KBASE_API_VERSION(8, 2)) kbase_props->props.raw_props.suspend_size = 0; - return 0; } @@ -200,7 +199,6 @@ static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kb gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads; gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size; gpu_props->raw_props.thread_features = regdump.thread_features; - } /** @@ -297,3 +295,20 @@ void kbase_gpuprops_set(struct kbase_device *kbdev) gpu_props->num_address_spaces = hweight32(raw->as_present); gpu_props->num_job_slots = hweight32(raw->js_present); } + +void kbase_gpuprops_set_features(struct kbase_device *kbdev) +{ + base_gpu_props *gpu_props; + struct kbase_gpuprops_regdump regdump; + + gpu_props = &kbdev->gpu_props.props; + + /* Dump relevant registers */ + kbase_backend_gpuprops_get_features(kbdev, ®dump); + + /* + * Copy the raw value from the register, later this will get turned + * into the selected coherency mode. + */ + gpu_props->raw_props.coherency_mode = regdump.coherency_features; +} diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h index af97d97..f3c95cc 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h @@ -39,6 +39,16 @@ struct kbase_device; */ void kbase_gpuprops_set(struct kbase_device *kbdev); +/** + * kbase_gpuprops_set_features - Set up Kbase GPU properties + * @kbdev: Device pointer + * + * This function sets up GPU properties that are dependent on the hardware + * features bitmask. This function must be preceeded by a call to + * kbase_hw_set_features_mask(). + */ +void kbase_gpuprops_set_features(struct kbase_device *kbdev); + /** * @brief Provide GPU properties to userside through UKU call. * diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h index 463fead..781375a 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h @@ -51,6 +51,7 @@ struct kbase_gpuprops_regdump { u32 tiler_present_hi; u32 l2_present_lo; u32 l2_present_hi; + u32 coherency_features; }; struct kbase_gpu_cache_props { diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c index fac65d4..de2461f 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,38 +31,50 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) { const enum base_hw_feature *features; u32 gpu_id; + u32 product_id; gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - gpu_id &= GPU_ID_VERSION_PRODUCT_ID; - gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; - - switch (gpu_id) { - case GPU_ID_PI_TFRX: - /* FALLTHROUGH */ - case GPU_ID_PI_T86X: - features = base_hw_features_tFxx; - break; - case GPU_ID_PI_T83X: - features = base_hw_features_t83x; - break; - case GPU_ID_PI_T82X: - features = base_hw_features_t82x; - break; - case GPU_ID_PI_T76X: - features = base_hw_features_t76x; - break; - case GPU_ID_PI_T72X: - features = base_hw_features_t72x; - break; - case GPU_ID_PI_T62X: - features = base_hw_features_t62x; - break; - case GPU_ID_PI_T60X: - features = base_hw_features_t60x; - break; - default: - features = base_hw_features_generic; - break; + product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; + product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + if (GPU_ID_IS_NEW_FORMAT(product_id)) { + switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { + case GPU_ID2_PRODUCT_TMIX: + features = base_hw_features_tMIx; + break; + default: + features = base_hw_features_generic; + break; + } + } else { + switch (product_id) { + case GPU_ID_PI_TFRX: + /* FALLTHROUGH */ + case GPU_ID_PI_T86X: + features = base_hw_features_tFxx; + break; + case GPU_ID_PI_T83X: + features = base_hw_features_t83x; + break; + case GPU_ID_PI_T82X: + features = base_hw_features_t82x; + break; + case GPU_ID_PI_T76X: + features = base_hw_features_t76x; + break; + case GPU_ID_PI_T72X: + features = base_hw_features_t72x; + break; + case GPU_ID_PI_T62X: + features = base_hw_features_t62x; + break; + case GPU_ID_PI_T60X: + features = base_hw_features_t60x; + break; + default: + features = base_hw_features_generic; + break; + } } for (; *features != BASE_HW_FEATURE_END; features++) @@ -73,135 +85,172 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) { const enum base_hw_issue *issues; u32 gpu_id; + u32 product_id; u32 impl_tech; gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; + product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; impl_tech = kbdev->gpu_props.props.thread_props.impl_tech; if (impl_tech != IMPLEMENTATION_MODEL) { - switch (gpu_id) { - case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0): - issues = base_hw_issues_t60x_r0p0_15dev0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC): - issues = base_hw_issues_t60x_r0p0_eac; - break; - case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0): - issues = base_hw_issues_t60x_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0): - issues = base_hw_issues_t62x_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1): - issues = base_hw_issues_t62x_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0): - issues = base_hw_issues_t62x_r1p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1): - issues = base_hw_issues_t76x_r0p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1): - issues = base_hw_issues_t76x_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9): - issues = base_hw_issues_t76x_r0p1_50rel0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1): - issues = base_hw_issues_t76x_r0p2; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1): - issues = base_hw_issues_t76x_r0p3; - break; - case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0): - issues = base_hw_issues_t76x_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1): - case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2): - issues = base_hw_issues_t72x_r0p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0): - issues = base_hw_issues_t72x_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0): - issues = base_hw_issues_t72x_r1p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2): - issues = base_hw_issues_tFRx_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0): - issues = base_hw_issues_tFRx_r0p2; - break; - case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8): - issues = base_hw_issues_tFRx_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0): - issues = base_hw_issues_tFRx_r2p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0): - issues = base_hw_issues_t86x_r0p2; - break; - case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8): - issues = base_hw_issues_t86x_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0): - issues = base_hw_issues_t86x_r2p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0): - issues = base_hw_issues_t83x_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8): - issues = base_hw_issues_t83x_r1p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0): - issues = base_hw_issues_t82x_r0p0; - break; - case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0): - issues = base_hw_issues_t82x_r0p1; - break; - case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0): - case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8): - issues = base_hw_issues_t82x_r1p0; - break; - default: - dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); - return -EINVAL; + if (GPU_ID_IS_NEW_FORMAT(product_id)) { + switch (gpu_id) { + case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 1): + issues = base_hw_issues_tMIx_r0p0_05dev0; + break; + case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 2): + issues = base_hw_issues_tMIx_r0p0; + break; + default: + if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == + GPU_ID2_PRODUCT_TMIX) { + issues = base_hw_issues_tMIx_r0p0; + } else { + dev_err(kbdev->dev, + "Unknown GPU ID %x", gpu_id); + return -EINVAL; + } + } + } else { + switch (gpu_id) { + case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0): + issues = base_hw_issues_t60x_r0p0_15dev0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC): + issues = base_hw_issues_t60x_r0p0_eac; + break; + case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0): + issues = base_hw_issues_t60x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0): + issues = base_hw_issues_t62x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1): + issues = base_hw_issues_t62x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0): + issues = base_hw_issues_t62x_r1p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1): + issues = base_hw_issues_t76x_r0p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1): + issues = base_hw_issues_t76x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9): + issues = base_hw_issues_t76x_r0p1_50rel0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1): + issues = base_hw_issues_t76x_r0p2; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1): + issues = base_hw_issues_t76x_r0p3; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0): + issues = base_hw_issues_t76x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1): + case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2): + issues = base_hw_issues_t72x_r0p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0): + issues = base_hw_issues_t72x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0): + issues = base_hw_issues_t72x_r1p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2): + issues = base_hw_issues_tFRx_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0): + issues = base_hw_issues_tFRx_r0p2; + break; + case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8): + issues = base_hw_issues_tFRx_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0): + issues = base_hw_issues_tFRx_r2p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0): + issues = base_hw_issues_t86x_r0p2; + break; + case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8): + issues = base_hw_issues_t86x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0): + issues = base_hw_issues_t86x_r2p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0): + issues = base_hw_issues_t83x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8): + issues = base_hw_issues_t83x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0): + issues = base_hw_issues_t82x_r0p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0): + issues = base_hw_issues_t82x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8): + issues = base_hw_issues_t82x_r1p0; + break; + default: + dev_err(kbdev->dev, + "Unknown GPU ID %x", gpu_id); + return -EINVAL; + } } } else { /* Software model */ - switch (gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT) { - case GPU_ID_PI_T60X: - issues = base_hw_issues_model_t60x; - break; - case GPU_ID_PI_T62X: - issues = base_hw_issues_model_t62x; - break; - case GPU_ID_PI_T72X: - issues = base_hw_issues_model_t72x; - break; - case GPU_ID_PI_T76X: - issues = base_hw_issues_model_t76x; - break; - case GPU_ID_PI_TFRX: - issues = base_hw_issues_model_tFRx; - break; - case GPU_ID_PI_T86X: - issues = base_hw_issues_model_t86x; - break; - case GPU_ID_PI_T83X: - issues = base_hw_issues_model_t83x; - break; - case GPU_ID_PI_T82X: - issues = base_hw_issues_model_t82x; - break; - default: - dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); - return -EINVAL; + if (GPU_ID_IS_NEW_FORMAT(product_id)) { + switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { + case GPU_ID2_PRODUCT_TMIX: + issues = base_hw_issues_model_tMIx; + break; + default: + dev_err(kbdev->dev, + "Unknown GPU ID %x", gpu_id); + return -EINVAL; + } + } else { + switch (product_id) { + case GPU_ID_PI_T60X: + issues = base_hw_issues_model_t60x; + break; + case GPU_ID_PI_T62X: + issues = base_hw_issues_model_t62x; + break; + case GPU_ID_PI_T72X: + issues = base_hw_issues_model_t72x; + break; + case GPU_ID_PI_T76X: + issues = base_hw_issues_model_t76x; + break; + case GPU_ID_PI_TFRX: + issues = base_hw_issues_model_tFRx; + break; + case GPU_ID_PI_T86X: + issues = base_hw_issues_model_t86x; + break; + case GPU_ID_PI_T83X: + issues = base_hw_issues_model_t83x; + break; + case GPU_ID_PI_T82X: + issues = base_hw_issues_model_t82x; + break; + default: + dev_err(kbdev->dev, "Unknown GPU ID %x", + gpu_id); + return -EINVAL; + } } } diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h index f93ca9d..cf8a813 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,4 +32,16 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump); +/** + * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU + * @kbdev: Device pointer + * @regdump: Pointer to struct kbase_gpuprops_regdump structure + * + * This function reads GPU properties that are dependent on the hardware + * features bitmask + */ +void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump); + + #endif /* _KBASE_HWACCESS_GPUPROPS_H_ */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h index 6bddaa8..2efa293 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h @@ -254,6 +254,15 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx); */ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx); +/** + * kbase_backend_get_current_flush_id - Return the current flush ID + * + * @kbdev: Device pointer + * + * Return: the current flush ID to be recorded for each job chain + */ +u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev); + #if KBASE_GPU_RESET_EN /** * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h index dbdcd3d..71c7d49 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h @@ -126,10 +126,13 @@ void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev); * * @param kbdev The kbase device structure for the device (must be a * valid pointer) - * @param new_core_mask The core mask to use + * @param new_core_mask_js0 The core mask to use for job slot 0 + * @param new_core_mask_js0 The core mask to use for job slot 1 + * @param new_core_mask_js0 The core mask to use for job slot 2 */ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, - u64 new_core_mask); + u64 new_core_mask_js0, u64 new_core_mask_js1, + u64 new_core_mask_js2); /** diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.c index 314ae08..fda317b 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.c @@ -66,18 +66,10 @@ int kbase_instr_hwcnt_enable(struct kbase_context *kctx, struct kbase_uk_hwcnt_setup *setup) { struct kbase_device *kbdev; - bool access_allowed; int err; kbdev = kctx->kbdev; - /* Determine if the calling task has access to this capability */ - access_allowed = kbase_security_has_capability(kctx, - KBASE_SEC_INSTR_HW_COUNTERS_COLLECT, - KBASE_SEC_FLAG_NOAUDIT); - if (!access_allowed) - return -EINVAL; - /* Mark the context as active so the GPU is kept turned on */ /* A suspend won't happen here, because we're in a syscall from a * userspace thread. */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c index 433103c..c579d0a 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,50 +21,39 @@ #define NR_IPA_GROUPS 8 +struct kbase_ipa_context; + /** * struct ipa_group - represents a single IPA group * @name: name of the IPA group * @capacitance: capacitance constant for IPA group + * @calc_power: function to calculate power for IPA group */ struct ipa_group { const char *name; u32 capacitance; + u32 (*calc_power)(struct kbase_ipa_context *, + struct ipa_group *); }; +#include + /** * struct kbase_ipa_context - IPA context per device - * @kbdev: pointer to kbase device - * @groups: array of IPA groups for this context - * @ipa_lock: protects the entire IPA context + * @kbdev: pointer to kbase device + * @groups: array of IPA groups for this context + * @vinstr_cli: vinstr client handle + * @vinstr_buffer: buffer to dump hardware counters onto + * @ipa_lock: protects the entire IPA context */ struct kbase_ipa_context { struct kbase_device *kbdev; struct ipa_group groups[NR_IPA_GROUPS]; + struct kbase_vinstr_client *vinstr_cli; + void *vinstr_buffer; struct mutex ipa_lock; }; -static struct ipa_group ipa_groups_def_v4[] = { - { .name = "group0", .capacitance = 0 }, - { .name = "group1", .capacitance = 0 }, - { .name = "group2", .capacitance = 0 }, - { .name = "group3", .capacitance = 0 }, - { .name = "group4", .capacitance = 0 }, - { .name = "group5", .capacitance = 0 }, - { .name = "group6", .capacitance = 0 }, - { .name = "group7", .capacitance = 0 }, -}; - -static struct ipa_group ipa_groups_def_v5[] = { - { .name = "group0", .capacitance = 0 }, - { .name = "group1", .capacitance = 0 }, - { .name = "group2", .capacitance = 0 }, - { .name = "group3", .capacitance = 0 }, - { .name = "group4", .capacitance = 0 }, - { .name = "group5", .capacitance = 0 }, - { .name = "group6", .capacitance = 0 }, - { .name = "group7", .capacitance = 0 }, -}; - static ssize_t show_ipa_group(struct device *dev, struct device_attribute *attr, char *buf) @@ -143,25 +132,10 @@ static struct attribute_group kbase_ipa_attr_group = { static void init_ipa_groups(struct kbase_ipa_context *ctx) { - struct kbase_device *kbdev = ctx->kbdev; - struct ipa_group *defs; - size_t i, len; - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) { - defs = ipa_groups_def_v4; - len = ARRAY_SIZE(ipa_groups_def_v4); - } else { - defs = ipa_groups_def_v5; - len = ARRAY_SIZE(ipa_groups_def_v5); - } - - for (i = 0; i < len; i++) { - ctx->groups[i].name = defs[i].name; - ctx->groups[i].capacitance = defs[i].capacitance; - } + memcpy(ctx->groups, ipa_groups_def, sizeof(ctx->groups)); } -#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)) +#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx) { struct kbase_device *kbdev = ctx->kbdev; @@ -171,7 +145,7 @@ static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx) size_t i; int err; - np = of_find_node_by_name(kbdev->dev->of_node, "ipa-groups"); + np = of_get_child_by_name(kbdev->dev->of_node, "ipa-groups"); if (!np) return 0; @@ -229,6 +203,172 @@ static int reset_ipa_groups(struct kbase_ipa_context *ctx) return update_ipa_groups_from_dt(ctx); } +static inline u32 read_hwcnt(struct kbase_ipa_context *ctx, + u32 offset) +{ + u8 *p = ctx->vinstr_buffer; + + return *(u32 *)&p[offset]; +} + +static inline u32 add_saturate(u32 a, u32 b) +{ + if (U32_MAX - a < b) + return U32_MAX; + return a + b; +} + +/* + * Calculate power estimation based on hardware counter `c' + * across all shader cores. + */ +static u32 calc_power_sc_single(struct kbase_ipa_context *ctx, + struct ipa_group *group, u32 c) +{ + struct kbase_device *kbdev = ctx->kbdev; + u64 core_mask; + u32 base = 0, r = 0; + + core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + while (core_mask != 0ull) { + if ((core_mask & 1ull) != 0ull) { + u64 n = read_hwcnt(ctx, base + c); + u32 d = read_hwcnt(ctx, GPU_ACTIVE); + u32 s = group->capacitance; + + r = add_saturate(r, div_u64(n * s, d)); + } + base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; + core_mask >>= 1; + } + return r; +} + +/* + * Calculate power estimation based on hardware counter `c1' + * and `c2' across all shader cores. + */ +static u32 calc_power_sc_double(struct kbase_ipa_context *ctx, + struct ipa_group *group, u32 c1, u32 c2) +{ + struct kbase_device *kbdev = ctx->kbdev; + u64 core_mask; + u32 base = 0, r = 0; + + core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + while (core_mask != 0ull) { + if ((core_mask & 1ull) != 0ull) { + u64 n = read_hwcnt(ctx, base + c1); + u32 d = read_hwcnt(ctx, GPU_ACTIVE); + u32 s = group->capacitance; + + r = add_saturate(r, div_u64(n * s, d)); + n = read_hwcnt(ctx, base + c2); + r = add_saturate(r, div_u64(n * s, d)); + } + base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; + core_mask >>= 1; + } + return r; +} + +static u32 calc_power_single(struct kbase_ipa_context *ctx, + struct ipa_group *group, u32 c) +{ + u64 n = read_hwcnt(ctx, c); + u32 d = read_hwcnt(ctx, GPU_ACTIVE); + u32 s = group->capacitance; + + return div_u64(n * s, d); +} + +static u32 calc_power_group0(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_single(ctx, group, L2_ANY_LOOKUP); +} + +static u32 calc_power_group1(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_single(ctx, group, TILER_ACTIVE); +} + +static u32 calc_power_group2(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_sc_single(ctx, group, FRAG_ACTIVE); +} + +static u32 calc_power_group3(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_sc_double(ctx, group, VARY_SLOT_32, + VARY_SLOT_16); +} + +static u32 calc_power_group4(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_sc_single(ctx, group, TEX_COORD_ISSUE); +} + +static u32 calc_power_group5(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_sc_single(ctx, group, EXEC_INSTR_COUNT); +} + +static u32 calc_power_group6(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_sc_double(ctx, group, BEATS_RD_LSC, + BEATS_WR_LSC); +} + +static u32 calc_power_group7(struct kbase_ipa_context *ctx, + struct ipa_group *group) +{ + return calc_power_sc_single(ctx, group, EXEC_CORE_ACTIVE); +} + +static int attach_vinstr(struct kbase_ipa_context *ctx) +{ + struct kbase_device *kbdev = ctx->kbdev; + struct kbase_uk_hwcnt_reader_setup setup; + size_t dump_size; + + dump_size = kbase_vinstr_dump_size(kbdev); + ctx->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL); + if (!ctx->vinstr_buffer) { + dev_err(kbdev->dev, "Failed to allocate IPA dump buffer"); + return -1; + } + + setup.jm_bm = ~0u; + setup.shader_bm = ~0u; + setup.tiler_bm = ~0u; + setup.mmu_l2_bm = ~0u; + ctx->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx, + &setup, ctx->vinstr_buffer); + if (!ctx->vinstr_cli) { + dev_err(kbdev->dev, "Failed to register IPA with vinstr core"); + kfree(ctx->vinstr_buffer); + ctx->vinstr_buffer = NULL; + return -1; + } + return 0; +} + +static void detach_vinstr(struct kbase_ipa_context *ctx) +{ + if (ctx->vinstr_cli) + kbase_vinstr_detach_client(ctx->vinstr_cli); + ctx->vinstr_cli = NULL; + kfree(ctx->vinstr_buffer); + ctx->vinstr_buffer = NULL; +} + struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev) { struct kbase_ipa_context *ctx; @@ -259,6 +399,33 @@ void kbase_ipa_term(struct kbase_ipa_context *ctx) { struct kbase_device *kbdev = ctx->kbdev; + detach_vinstr(ctx); sysfs_remove_group(&kbdev->dev->kobj, &kbase_ipa_attr_group); kfree(ctx); } + +u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err) +{ + struct ipa_group *group; + u32 power = 0; + size_t i; + + mutex_lock(&ctx->ipa_lock); + if (!ctx->vinstr_cli) { + *err = attach_vinstr(ctx); + if (*err < 0) + goto err0; + } + *err = kbase_vinstr_hwc_dump(ctx->vinstr_cli, + BASE_HWCNT_READER_EVENT_MANUAL); + if (*err) + goto err0; + for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) { + group = &ctx->groups[i]; + power = add_saturate(power, group->calc_power(ctx, group)); + } +err0: + mutex_unlock(&ctx->ipa_lock); + return power; +} +KBASE_EXPORT_TEST_API(kbase_ipa_dynamic_power); diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h index ed12375..e2234d1 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,3 +30,12 @@ struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev); * @ctx: pointer to the IPA context */ void kbase_ipa_term(struct kbase_ipa_context *ctx); + +/** + * kbase_ipa_dynamic_power - calculate power + * @ctx: pointer to the IPA context + * @err: 0 on success, negative on failure + * + * Return: returns power consumption as mw @ 1GHz @ 1V + */ +u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err); diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h new file mode 100644 index 0000000..101abfe --- /dev/null +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h @@ -0,0 +1,104 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#define NR_BYTES_PER_CNT 4 +#define NR_CNT_PER_BLOCK 64 + +#define JM_BASE (0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT) +#define TILER_BASE (1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT) +#define MMU_BASE (2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT) +#define SC0_BASE (3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT) + +#define GPU_ACTIVE (JM_BASE + NR_BYTES_PER_CNT * 6) +#define TILER_ACTIVE (TILER_BASE + NR_BYTES_PER_CNT * 45) +#define L2_ANY_LOOKUP (MMU_BASE + NR_BYTES_PER_CNT * 25) +#define FRAG_ACTIVE (SC0_BASE + NR_BYTES_PER_CNT * 4) +#define EXEC_CORE_ACTIVE (SC0_BASE + NR_BYTES_PER_CNT * 26) +#define EXEC_INSTR_COUNT (SC0_BASE + NR_BYTES_PER_CNT * 28) +#define TEX_COORD_ISSUE (SC0_BASE + NR_BYTES_PER_CNT * 40) +#define VARY_SLOT_32 (SC0_BASE + NR_BYTES_PER_CNT * 50) +#define VARY_SLOT_16 (SC0_BASE + NR_BYTES_PER_CNT * 51) +#define BEATS_RD_LSC (SC0_BASE + NR_BYTES_PER_CNT * 56) +#define BEATS_WR_LSC (SC0_BASE + NR_BYTES_PER_CNT * 61) + +static u32 calc_power_group0(struct kbase_ipa_context *ctx, + struct ipa_group *group); +static u32 calc_power_group1(struct kbase_ipa_context *ctx, + struct ipa_group *group); +static u32 calc_power_group2(struct kbase_ipa_context *ctx, + struct ipa_group *group); +static u32 calc_power_group3(struct kbase_ipa_context *ctx, + struct ipa_group *group); +static u32 calc_power_group4(struct kbase_ipa_context *ctx, + struct ipa_group *group); +static u32 calc_power_group5(struct kbase_ipa_context *ctx, + struct ipa_group *group); +static u32 calc_power_group6(struct kbase_ipa_context *ctx, + struct ipa_group *group); +static u32 calc_power_group7(struct kbase_ipa_context *ctx, + struct ipa_group *group); + +static struct ipa_group ipa_groups_def[] = { + /* L2 */ + { + .name = "group0", + .capacitance = 687, + .calc_power = calc_power_group0, + }, + /* TILER */ + { + .name = "group1", + .capacitance = 0, + .calc_power = calc_power_group1, + }, + /* FRAG */ + { + .name = "group2", + .capacitance = 23, + .calc_power = calc_power_group2, + }, + /* VARY */ + { + .name = "group3", + .capacitance = 108, + .calc_power = calc_power_group3, + }, + /* TEX */ + { + .name = "group4", + .capacitance = 128, + .calc_power = calc_power_group4, + }, + /* EXEC INSTR */ + { + .name = "group5", + .capacitance = 249, + .calc_power = calc_power_group5, + }, + /* LSC */ + { + .name = "group6", + .capacitance = 0, + .calc_power = calc_power_group6, + }, + /* EXEC OVERHEAD */ + { + .name = "group7", + .capacitance = 29, + .calc_power = calc_power_group7, + }, +}; diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c index dd2d187..c091ffe 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,19 +25,14 @@ #endif #include #include -#ifdef CONFIG_UMP -#include -#endif /* CONFIG_UMP */ #include #include #include +#include #include #include - -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif #define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) @@ -90,6 +85,10 @@ static int jd_run_atom(struct kbase_jd_atom *katom) return 0; } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { /* Soft-job */ + if (katom->will_fail_event_code) { + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + return 0; + } if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_SOFT_REPLAY) { if (!kbase_replay_process(katom)) @@ -97,9 +96,6 @@ static int jd_run_atom(struct kbase_jd_atom *katom) } else if (kbase_process_soft_job(katom) == 0) { kbase_finish_soft_job(katom); katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - } else { - /* The job has not completed */ - list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs); } return 0; } @@ -198,85 +194,6 @@ static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom) } #endif /* CONFIG_KDS */ -#ifdef CONFIG_DMA_SHARED_BUFFER -static int kbase_jd_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg) -{ - struct sg_table *sgt; - struct scatterlist *s; - int i; - phys_addr_t *pa; - int err; - size_t count = 0; - struct kbase_mem_phy_alloc *alloc; - - alloc = reg->gpu_alloc; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM); - KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt); - sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, DMA_BIDIRECTIONAL); - - if (IS_ERR_OR_NULL(sgt)) - return -EINVAL; - - /* save for later */ - alloc->imported.umm.sgt = sgt; - - pa = kbase_get_gpu_phy_pages(reg); - KBASE_DEBUG_ASSERT(pa); - - for_each_sg(sgt->sgl, s, sgt->nents, i) { - int j; - size_t pages = PFN_UP(sg_dma_len(s)); - - WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1), - "sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n", - sg_dma_len(s)); - - WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), - "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", - (unsigned long long) sg_dma_address(s)); - - for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++) - *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT); - WARN_ONCE(j < pages, - "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", - alloc->imported.umm.dma_buf->size); - } - - if (WARN_ONCE(count < reg->nr_pages, - "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", - alloc->imported.umm.dma_buf->size)) { - err = -EINVAL; - goto out; - } - - /* Update nents as we now have pages to map */ - alloc->nents = count; - - err = kbase_mmu_insert_pages(kctx, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD); - -out: - if (err) { - dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); - alloc->imported.umm.sgt = NULL; - } - - return err; -} - -static void kbase_jd_umm_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc) -{ - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(alloc); - KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment); - KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt); - dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, - alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); - alloc->imported.umm.sgt = NULL; - alloc->nents = 0; -} -#endif /* CONFIG_DMA_SHARED_BUFFER */ - void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) { #ifdef CONFIG_KDS @@ -316,31 +233,13 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) res_no = katom->nr_extres; while (res_no-- > 0) { - struct kbase_mem_phy_alloc *alloc; - - alloc = katom->extres[res_no].alloc; -#ifdef CONFIG_DMA_SHARED_BUFFER - if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { - alloc->imported.umm.current_mapping_usage_count--; - - if (0 == alloc->imported.umm.current_mapping_usage_count) { - struct kbase_va_region *reg; - - reg = kbase_region_tracker_find_region_base_address( - katom->kctx, - katom->extres[res_no].gpu_address); - - if (reg && reg->gpu_alloc == alloc) - kbase_mmu_teardown_pages( - katom->kctx, - reg->start_pfn, - kbase_reg_current_backed_size(reg)); + struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; + struct kbase_va_region *reg; - kbase_jd_umm_unmap(katom->kctx, alloc); - } - } -#endif /* CONFIG_DMA_SHARED_BUFFER */ - kbase_mem_phy_alloc_put(alloc); + reg = kbase_region_tracker_find_region_base_address( + katom->kctx, + katom->extres[res_no].gpu_address); + kbase_unmap_external_resource(katom->kctx, reg, alloc); } kfree(katom->extres); katom->extres = NULL; @@ -348,24 +247,6 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) kbase_gpu_vm_unlock(katom->kctx); } -#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS) -static void add_kds_resource(struct kds_resource *kds_res, struct kds_resource **kds_resources, u32 *kds_res_count, unsigned long *kds_access_bitmap, bool exclusive) -{ - u32 i; - - for (i = 0; i < *kds_res_count; i++) { - /* Duplicate resource, ignore */ - if (kds_resources[i] == kds_res) - return; - } - - kds_resources[*kds_res_count] = kds_res; - if (exclusive) - set_bit(*kds_res_count, kds_access_bitmap); - (*kds_res_count)++; -} -#endif - /* * Set up external resources needed by this job. * @@ -430,14 +311,22 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st } #endif /* CONFIG_KDS */ + /* Take the processes mmap lock */ + down_read(¤t->mm->mmap_sem); + /* need to keep the GPU VM locked while we set up UMM buffers */ kbase_gpu_vm_lock(katom->kctx); for (res_no = 0; res_no < katom->nr_extres; res_no++) { struct base_external_resource *res; struct kbase_va_region *reg; + struct kbase_mem_phy_alloc *alloc; + bool exclusive; res = &input_extres[res_no]; - reg = kbase_region_tracker_find_region_enclosing_address(katom->kctx, + exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) + ? true : false; + reg = kbase_region_tracker_find_region_enclosing_address( + katom->kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); /* did we find a matching region object? */ if (NULL == reg || (reg->flags & KBASE_REG_FREE)) { @@ -448,56 +337,17 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) && (reg->flags & KBASE_REG_SECURE)) { katom->atom_flags |= KBASE_KATOM_FLAG_SECURE; - if ((katom->core_req & BASE_JD_REQ_FS) == 0) { - WARN_RATELIMIT(1, "Secure non-fragment jobs not supported"); - goto failed_loop; - } } - /* decide what needs to happen for this resource */ - switch (reg->gpu_alloc->type) { - case BASE_MEM_IMPORT_TYPE_UMP: - { -#if defined(CONFIG_KDS) && defined(CONFIG_UMP) - struct kds_resource *kds_res; - - kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle); - if (kds_res) - add_kds_resource(kds_res, kds_resources, &kds_res_count, - kds_access_bitmap, - res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE); -#endif /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */ - break; - } -#ifdef CONFIG_DMA_SHARED_BUFFER - case BASE_MEM_IMPORT_TYPE_UMM: - { -#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS - struct kds_resource *kds_res; - - kds_res = get_dma_buf_kds_resource(reg->gpu_alloc->imported.umm.dma_buf); - if (kds_res) - add_kds_resource(kds_res, kds_resources, &kds_res_count, kds_access_bitmap, res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE); -#endif - reg->gpu_alloc->imported.umm.current_mapping_usage_count++; - if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) { - /* use a local variable to not pollute err_ret_val - * with a potential success value as some other gotos depend - * on the default error code stored in err_ret_val */ - int tmp; - - tmp = kbase_jd_umm_map(katom->kctx, reg); - if (tmp) { - /* failed to map this buffer, roll back */ - err_ret_val = tmp; - reg->gpu_alloc->imported.umm.current_mapping_usage_count--; - goto failed_loop; - } - } - break; - } + alloc = kbase_map_external_resource(katom->kctx, reg, + current->mm +#ifdef CONFIG_KDS + , &kds_res_count, kds_resources, + kds_access_bitmap, exclusive #endif - default: + ); + if (!alloc) { + err_ret_val = -EINVAL; goto failed_loop; } @@ -508,12 +358,15 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st * until the last read for an element. * */ katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ - katom->extres[res_no].alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + katom->extres[res_no].alloc = alloc; } /* successfully parsed the extres array */ /* drop the vm lock before we call into kds */ kbase_gpu_vm_unlock(katom->kctx); + /* Release the processes mmap lock */ + up_read(¤t->mm->mmap_sem); + #ifdef CONFIG_KDS if (kds_res_count) { int wait_failed; @@ -545,6 +398,8 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st #ifdef CONFIG_KDS failed_kds_setup: + /* Lock the processes mmap lock */ + down_read(¤t->mm->mmap_sem); /* lock before we unmap */ kbase_gpu_vm_lock(katom->kctx); @@ -554,30 +409,14 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st /* undo the loop work */ while (res_no-- > 0) { struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; -#ifdef CONFIG_DMA_SHARED_BUFFER - if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { - alloc->imported.umm.current_mapping_usage_count--; - if (0 == alloc->imported.umm.current_mapping_usage_count) { - struct kbase_va_region *reg; - - reg = kbase_region_tracker_find_region_base_address( - katom->kctx, - katom->extres[res_no].gpu_address); - - if (reg && reg->gpu_alloc == alloc) - kbase_mmu_teardown_pages(katom->kctx, - reg->start_pfn, - kbase_reg_current_backed_size(reg)); - - kbase_jd_umm_unmap(katom->kctx, alloc); - } - } -#endif /* CONFIG_DMA_SHARED_BUFFER */ - kbase_mem_phy_alloc_put(alloc); + kbase_unmap_external_resource(katom->kctx, NULL, alloc); } kbase_gpu_vm_unlock(katom->kctx); + /* Release the processes mmap lock */ + up_read(¤t->mm->mmap_sem); + early_err_out: kfree(katom->extres); katom->extres = NULL; @@ -590,8 +429,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st static inline void jd_resolve_dep(struct list_head *out_list, struct kbase_jd_atom *katom, - u8 d, - bool ctx_is_dying) + u8 d) { u8 other_d = !d; @@ -608,12 +446,7 @@ static inline void jd_resolve_dep(struct list_head *out_list, kbase_jd_katom_dep_clear(&dep_atom->dep[d]); if (katom->event_code != BASE_JD_EVENT_DONE && - (dep_type != BASE_JD_DEP_TYPE_ORDER || ctx_is_dying)) { - /* Atom failed, so remove the other dependencies and immediately fail the atom */ - if (kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) { - list_del(&dep_atom->dep_item[other_d]); - kbase_jd_katom_dep_clear(&dep_atom->dep[other_d]); - } + (dep_type != BASE_JD_DEP_TYPE_ORDER)) { #ifdef CONFIG_KDS if (!dep_atom->kds_dep_satisfied) { /* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and @@ -626,10 +459,17 @@ static inline void jd_resolve_dep(struct list_head *out_list, dep_atom->event_code = katom->event_code; KBASE_DEBUG_ASSERT(dep_atom->status != KBASE_JD_ATOM_STATE_UNUSED); - dep_atom->status = KBASE_JD_ATOM_STATE_COMPLETED; - list_add_tail(&dep_atom->dep_item[0], out_list); - } else if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) { + if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY) + != BASE_JD_REQ_SOFT_REPLAY) { + dep_atom->will_fail_event_code = + dep_atom->event_code; + } else { + dep_atom->status = + KBASE_JD_ATOM_STATE_COMPLETED; + } + } + if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) { #ifdef CONFIG_KDS if (dep_atom->kds_dep_satisfied) #endif @@ -702,7 +542,6 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, { struct kbase_context *kctx = katom->kctx; struct kbase_device *kbdev = kctx->kbdev; - struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; struct list_head completed_jobs; struct list_head runnable_jobs; bool need_to_try_schedule_context = false; @@ -717,7 +556,6 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, jd_check_force_failure(katom); #endif - /* This is needed in case an atom is failed due to being invalid, this * can happen *before* the jobs that the atom depends on have completed */ for (i = 0; i < 2; i++) { @@ -753,8 +591,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); for (i = 0; i < 2; i++) - jd_resolve_dep(&runnable_jobs, katom, i, - js_kctx_info->ctx.is_dying); + jd_resolve_dep(&runnable_jobs, katom, i); if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) kbase_jd_post_external_resources(katom); @@ -769,7 +606,8 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); - if (node->status != KBASE_JD_ATOM_STATE_COMPLETED) { + if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && + !kctx->jctx.sched_info.ctx.is_dying) { need_to_try_schedule_context |= jd_run_atom(node); } else { node->event_code = katom->event_code; @@ -794,6 +632,12 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, list_add_tail(&node->dep_item[0], &completed_jobs); } + /* Completing an atom might have freed up space + * in the ringbuffer, but only on that slot. */ + jsctx_ll_flush_to_rb(kctx, + katom->sched_priority, + katom->slot_nr); + /* Register a completed job as a disjoint event when the GPU * is in a disjoint state (ie. being reset or replaying jobs). */ @@ -881,6 +725,7 @@ bool jd_submit_atom(struct kbase_context *kctx, int i; int sched_prio; bool ret; + bool will_fail = false; /* Update the TOTAL number of jobs. This includes those not tracked by * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ @@ -904,6 +749,7 @@ bool jd_submit_atom(struct kbase_context *kctx, katom->need_cache_flush_cores_retained = 0; katom->x_pre_dep = NULL; katom->x_post_dep = NULL; + katom->will_fail_event_code = 0; #ifdef CONFIG_KDS /* Start by assuming that the KDS dependencies are satisfied, * kbase_jd_pre_external_resources will correct this if there are dependencies */ @@ -925,13 +771,16 @@ bool jd_submit_atom(struct kbase_context *kctx, dep_atom_type != BASE_JD_DEP_TYPE_DATA) { katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; katom->status = KBASE_JD_ATOM_STATE_COMPLETED; -#if defined(CONFIG_MALI_MIPE_ENABLED) + + /* Wrong dependency setup. Atom will be sent + * back to user space. Do not record any + * dependencies. */ kbase_tlstream_tl_new_atom( katom, kbase_jd_atom_id(kctx, katom)); kbase_tlstream_tl_ret_atom_ctx( katom, kctx); -#endif + ret = jd_done_nolock(katom, NULL); goto out; } @@ -956,7 +805,7 @@ bool jd_submit_atom(struct kbase_context *kctx, if (dep_atom->event_code == BASE_JD_EVENT_DONE) continue; /* don't stop this atom if it has an order dependency - * only to the failed one, try to submit it throught + * only to the failed one, try to submit it through * the normal path */ if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER && @@ -964,21 +813,18 @@ bool jd_submit_atom(struct kbase_context *kctx, continue; } - if (i == 1 && kbase_jd_katom_dep_atom(&katom->dep[0])) { - /* Remove the previous dependency */ - list_del(&katom->dep_item[0]); - kbase_jd_katom_dep_clear(&katom->dep[0]); - } - /* Atom has completed, propagate the error code if any */ katom->event_code = dep_atom->event_code; katom->status = KBASE_JD_ATOM_STATE_QUEUED; -#if defined(CONFIG_MALI_MIPE_ENABLED) + + /* This atom is going through soft replay or + * will be sent back to user space. Do not record any + * dependencies. */ kbase_tlstream_tl_new_atom( katom, kbase_jd_atom_id(kctx, katom)); kbase_tlstream_tl_ret_atom_ctx(katom, kctx); -#endif + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_SOFT_REPLAY) { if (kbase_replay_process(katom)) { @@ -986,9 +832,8 @@ bool jd_submit_atom(struct kbase_context *kctx, goto out; } } - ret = jd_done_nolock(katom, NULL); + will_fail = true; - goto out; } else { /* Atom is in progress, add this atom to the list */ list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]); @@ -997,17 +842,37 @@ bool jd_submit_atom(struct kbase_context *kctx, } } - /* These must occur after the above loop to ensure that an atom that - * depends on a previous atom with the same number behaves as expected */ - katom->event_code = BASE_JD_EVENT_DONE; - katom->status = KBASE_JD_ATOM_STATE_QUEUED; + if (will_fail) { + if (!queued) { + ret = jd_done_nolock(katom, NULL); + + goto out; + } else { + katom->will_fail_event_code = katom->event_code; + ret = false; -#if defined(CONFIG_MALI_MIPE_ENABLED) + goto out; + } + } else { + /* These must occur after the above loop to ensure that an atom + * that depends on a previous atom with the same number behaves + * as expected */ + katom->event_code = BASE_JD_EVENT_DONE; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; + } + + /* Create a new atom recording all dependencies it was set up with. */ kbase_tlstream_tl_new_atom( katom, kbase_jd_atom_id(kctx, katom)); kbase_tlstream_tl_ret_atom_ctx(katom, kctx); -#endif + for (i = 0; i < 2; i++) + if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type( + &katom->dep[i])) + kbase_tlstream_tl_dep_atom_atom( + (void *)kbase_jd_katom_dep_atom( + &katom->dep[i]), + (void *)katom); /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ if (!katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { @@ -1068,7 +933,8 @@ bool jd_submit_atom(struct kbase_context *kctx, #ifdef CONFIG_GPU_TRACEPOINTS katom->work_id = atomic_inc_return(&jctx->work_id); - trace_gpu_job_enqueue((u32)kctx, katom->work_id, kbasep_map_core_reqs_to_string(katom->core_req)); + trace_gpu_job_enqueue((u32)kctx->id, katom->work_id, + kbasep_map_core_reqs_to_string(katom->core_req)); #endif if (queued && !IS_GPU_ATOM(katom)) { @@ -1097,8 +963,7 @@ bool jd_submit_atom(struct kbase_context *kctx, ret = jd_done_nolock(katom, NULL); goto out; } - /* The job has not yet completed */ - list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs); + ret = false; } else if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { katom->status = KBASE_JD_ATOM_STATE_IN_JS; @@ -1130,6 +995,7 @@ int kbase_jd_submit(struct kbase_context *kctx, bool need_to_try_schedule_context = false; struct kbase_device *kbdev; void __user *user_addr; + u32 latest_flush; /* * kbase_jd_submit isn't expected to fail and so all errors with the jobs @@ -1159,6 +1025,9 @@ int kbase_jd_submit(struct kbase_context *kctx, KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(submit_data->nr_atoms, &kctx->timeline.jd_atoms_in_flight)); + /* All atoms submitted in this call have the same flush ID */ + latest_flush = kbase_backend_get_current_flush_id(kbdev); + for (i = 0; i < submit_data->nr_atoms; i++) { struct base_jd_atom_v2 user_atom; struct kbase_jd_atom *katom; @@ -1234,6 +1103,9 @@ while (false) #endif katom = &jctx->atoms[user_atom.atom_number]; + /* Record the flush ID for the cache flush optimisation */ + katom->flush_id = latest_flush; + while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) { /* Atom number is already in use, wait for the atom to * complete @@ -1329,7 +1201,6 @@ void kbase_jd_done_worker(struct work_struct *data) mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); - mutex_unlock(&jctx->lock); spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); @@ -1337,6 +1208,7 @@ void kbase_jd_done_worker(struct work_struct *data) kbase_js_unpull(kctx, katom); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&jctx->lock); return; } @@ -1521,51 +1393,6 @@ static void jd_cancel_worker(struct work_struct *data) kbase_js_sched_all(kbdev); } -/** - * jd_evict_worker - Work queue job evict function - * @data: a &struct work_struct - * - * Only called as part of evicting failed jobs. This is only called on jobs that - * were never submitted to HW Access. Jobs that were submitted are handled - * through kbase_jd_done_worker(). - * Operates serially with the kbase_jd_done_worker() on the work queue. - * - * We don't need to release most of the resources that would occur on - * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be - * running (by virtue of having not been submitted to HW Access). - */ -static void jd_evict_worker(struct work_struct *data) -{ - struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, - work); - struct kbase_jd_context *jctx; - struct kbase_context *kctx; - struct kbasep_js_kctx_info *js_kctx_info; - struct kbase_device *kbdev; - - /* Soft jobs should never reach this function */ - KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); - - kctx = katom->kctx; - kbdev = kctx->kbdev; - jctx = &kctx->jctx; - js_kctx_info = &kctx->jctx.sched_info; - - KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); - - /* Scheduler: Remove the job from the system */ - mutex_lock(&js_kctx_info->ctx.jsctx_mutex); - kbasep_js_remove_cancelled_job(kbdev, kctx, katom); - mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - - mutex_lock(&jctx->lock); - jd_done_nolock(katom, NULL); - /* katom may have been freed now, do not use! */ - mutex_unlock(&jctx->lock); - - kbase_js_sched_all(kbdev); -} - /** * kbase_jd_done - Complete a job that has been removed from the Hardware * @katom: atom which has been completed @@ -1609,7 +1436,8 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, #ifdef CONFIG_DEBUG_FS /* a failed job happened and is waiting for dumping*/ - if (kbase_debug_job_fault_process(katom, katom->event_code)) + if (!katom->will_fail_event_code && + kbase_debug_job_fault_process(katom, katom->event_code)) return; #endif @@ -1647,30 +1475,6 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) queue_work(kctx->jctx.job_done_wq, &katom->work); } -void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx; - struct kbasep_js_kctx_info *js_kctx_info; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - KBASE_DEBUG_ASSERT(NULL != katom); - kctx = katom->kctx; - KBASE_DEBUG_ASSERT(NULL != kctx); - - js_kctx_info = &kctx->jctx.sched_info; - - KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); - - /* This should only be done from a context that is currently scheduled - */ - KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled); - - WARN_ON(work_pending(&katom->work)); - - KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); - INIT_WORK(&katom->work, jd_evict_worker); - queue_work(kctx->jctx.job_done_wq, &katom->work); -} void kbase_jd_zap_context(struct kbase_context *kctx) { @@ -1693,6 +1497,7 @@ void kbase_jd_zap_context(struct kbase_context *kctx) * queued outside the job scheduler. */ + hrtimer_cancel(&kctx->soft_event_timeout); list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { katom = list_entry(entry, struct kbase_jd_atom, dep_item[0]); kbase_cancel_soft_job(katom); diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c index b37f280..0cf75f5 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -17,6 +17,8 @@ #include +#include + #include #ifdef CONFIG_DEBUG_FS @@ -41,6 +43,13 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) KBASE_DEBUG_ASSERT(kctx != NULL); + /* Print version */ + seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION); + + /* Print U/K API version */ + seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR, + BASE_UK_VERSION_MINOR); + /* Print table heading */ seq_puts(sfile, "atom id,core reqs,status,coreref status,predeps,start time,time on gpu\n"); diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h index 703e4cf..bc1878f 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,6 +27,8 @@ #include +#define MALI_JD_DEBUGFS_VERSION 1 + /** * kbasep_jd_debugfs_ctx_add() - Add debugfs entries for JD system * diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c index 54b8d9b..83228c0 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,9 +25,7 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif #include #include @@ -79,13 +77,6 @@ static int kbase_js_get_slot(struct kbase_device *kbdev, static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, kbasep_js_policy_ctx_job_cb callback); -static bool kbase_js_evict_atom(struct kbase_context *kctx, - struct kbase_jd_atom *katom_evict, - struct kbase_jd_atom *start_katom, - struct kbase_jd_atom *head_katom, - struct list_head *evict_list, - struct jsctx_rb *rb, int idx); - /* Helper for trace subcodes */ #if KBASE_TRACE_ENABLE static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, @@ -239,25 +230,31 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, return result; } -/** - * jsctx_rb_is_empty_prio(): - Check if ring buffer is empty - * @kctx: Pointer to kbase context with ring buffer. - * @js: Job slot id to check. - * @prio: Priority to check. - * - * Caller must hold runpool_irq.lock - * - * Return: true if the ring buffer is empty, false otherwise. - */ -static inline bool -jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio) -{ - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; +/* Helper macros to access and modify jsctx_queue.indicies */ +#define JSCTX_GET(offset, var, mask) \ + ((var >> offset) & mask) - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); +/* This wraps around to correct integer size automatically. */ +#define JSCTX_SET(var, offset, value, mask) \ + (var = ((var & ~(mask << offset)) /*Clear old bits */ \ + | (((value) & mask) << offset))) /* Set (after masking) new bits */ - return rb->running_idx == rb->write_idx; -} +#define JSCTX_GET_WR_IDX(var) \ + JSCTX_GET(JSCTX_WR_OFFSET, var, JSCTX_RB_MASK_STORE) +#define JSCTX_GET_RN_IDX(var) \ + JSCTX_GET(JSCTX_RN_OFFSET, var, JSCTX_RB_MASK_STORE) +#define JSCTX_GET_RD_IDX(var) \ + JSCTX_GET(JSCTX_RD_OFFSET, var, JSCTX_RB_MASK_STORE) + +#define JSCTX_GET_IDX_DIFF(lower, upper) \ + ((upper >= lower) ? (upper - lower) : (upper+JSCTX_RB_SIZE_STORE-lower)) + +#define JSCTX_SET_WR_IDX(var, value) \ + JSCTX_SET(var, JSCTX_WR_OFFSET, value, JSCTX_RB_MASK_STORE) +#define JSCTX_SET_RN_IDX(var, value) \ + JSCTX_SET(var, JSCTX_RN_OFFSET, value, JSCTX_RB_MASK_STORE) +#define JSCTX_SET_RD_IDX(var, value) \ + JSCTX_SET(var, JSCTX_RD_OFFSET, value, JSCTX_RB_MASK_STORE) /** * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms @@ -270,18 +267,15 @@ jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio) * ring buffer to be full (with running atoms) when this functions returns * true. * - * Caller must hold runpool_irq.lock - * * Return: true if there are no atoms to pull, false otherwise. */ static inline bool jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) { - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; - - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + unsigned int var = atomic_read(&rb->indicies); - return rb->read_idx == rb->write_idx; + return JSCTX_GET_RD_IDX(var) == JSCTX_GET_WR_IDX(var); } /** @@ -311,55 +305,29 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) } /** - * jsctx_rb_compact_prio(): - Compact a ring buffer - * @kctx: Pointer to kbase context with ring buffer. - * @js: Job slot id to compact. - * @prio: Priority id to compact. - */ -static inline void -jsctx_rb_compact_prio(struct kbase_context *kctx, int js, int prio) + * jsctx_rb_is_full(): - Check if the given ringbuffer is full. + * @queue: Pointer to the queue containing the ringbuffer. + * + * No locks explicitly required, result will always be consistent. + * But depending on usage, the caller should consider jctx.lock, + * for the result to remain correct. + * + * Return: true if the ringbuffer is full, false otherwise. + */ +static inline bool +jsctx_rb_is_full(struct jsctx_queue *queue) { - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; - u16 compact_idx = rb->write_idx - 1; - u16 end_idx = rb->running_idx - 1; - u16 i; - - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - lockdep_assert_held(&kctx->jctx.lock); - - for (i = compact_idx; i != end_idx; i--) { - if (rb->entries[i & JSCTX_RB_MASK].atom_id != - KBASEP_ATOM_ID_INVALID) { - WARN_ON(compact_idx < rb->running_idx); - rb->entries[compact_idx & JSCTX_RB_MASK].atom_id = - rb->entries[i & JSCTX_RB_MASK].atom_id; - - compact_idx--; - } - if (rb->read_idx == i) - rb->read_idx = compact_idx + 1; - } + unsigned int var = atomic_read(&queue->indicies); + u16 rn_idx = JSCTX_GET_RN_IDX(var); + u16 wr_idx = JSCTX_GET_WR_IDX(var); - rb->running_idx = compact_idx + 1; + return JSCTX_GET_IDX_DIFF(rn_idx, wr_idx) >= JSCTX_RB_SIZE; } -/** - * jsctx_rb_compact(): - Compact all priority ring buffers - * @kctx: Pointer to kbase context with ring buffer. - * @js: Job slot id to compact. - */ -static inline void -jsctx_rb_compact(struct kbase_context *kctx, int js) -{ - int prio; - - for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) - jsctx_rb_compact_prio(kctx, js, prio); -} /** - * jsctx_rb_foreach_prio(): - Execute callback for each entry in ring buffer - * @kctx: Pointer to kbase context with ring buffer. + * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue. + * @kctx: Pointer to kbase context with the queue. * @js: Job slot id to iterate. * @prio: Priority id to iterate. * @callback: Function pointer to callback. @@ -376,50 +344,64 @@ jsctx_rb_compact(struct kbase_context *kctx, int js) * calling this function. */ static void -jsctx_rb_foreach_prio(struct kbase_context *kctx, int js, int prio, +jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, kbasep_js_policy_ctx_job_cb callback) { - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; struct kbase_jd_atom *katom; - u16 write_idx = ACCESS_ONCE(rb->write_idx); + + struct list_head *pos, *q; + + unsigned int var = atomic_read(&queue->indicies); + u16 running_idx = JSCTX_GET_RN_IDX(var); + u16 read_idx = JSCTX_GET_RD_IDX(var); + u16 wr_idx = JSCTX_GET_WR_IDX(var); + u16 i; + const u16 count = JSCTX_GET_IDX_DIFF(running_idx, wr_idx); lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); /* There must be no jobs currently in HW access */ - WARN_ON(rb->read_idx != rb->running_idx); + WARN_ON(read_idx != JSCTX_GET_RN_IDX(var)); /* Invoke callback on all kbase_jd_atoms in the ring buffer, and * removes them from the buffer */ - while (rb->read_idx != write_idx) { - int id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id; + for (i = 0; i < count; i++) { + int id = queue->entries[read_idx & JSCTX_RB_MASK].atom_id; katom = kbase_jd_atom_from_id(kctx, id); + read_idx++; + callback(kctx->kbdev, katom); + } + atomic_set(&queue->indicies, 0); - rb->read_idx++; - rb->running_idx++; + list_for_each_safe(pos, q, &queue->queue_head) { + struct kbase_jd_atom *entry; - callback(kctx->kbdev, katom); + entry = list_entry(pos, struct kbase_jd_atom, queue); + list_del(pos); + callback(kctx->kbdev, entry); } } /** - * jsctx_rb_foreach(): - Execute callback for each entry in all priority rb - * @kctx: Pointer to kbase context with ring buffer. + * jsctx_queue_foreach(): - Execute callback for each entry in every queue + * @kctx: Pointer to kbase context with queue. * @js: Job slot id to iterate. * @callback: Function pointer to callback. * * Iterate over all the different priorities, and for each call - * jsctx_rb_foreach_prio() to iterate over the ring buffer and invoke @callback - * for each entry in buffer, and remove the entry from the buffer. + * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback + * for each entry, and remove the entry from the queue. */ static inline void -jsctx_rb_foreach(struct kbase_context *kctx, int js, +jsctx_queue_foreach(struct kbase_context *kctx, int js, kbasep_js_policy_ctx_job_cb callback) { int prio; for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) - jsctx_rb_foreach_prio(kctx, js, prio, callback); + jsctx_queue_foreach_prio(kctx, js, prio, callback); } /** @@ -436,15 +418,16 @@ jsctx_rb_foreach(struct kbase_context *kctx, int js, static inline struct kbase_jd_atom * jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) { - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; int id; + unsigned int var = atomic_read(&rb->indicies); lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - if (jsctx_rb_none_to_pull_prio(kctx, js, prio)) + if (JSCTX_GET_RD_IDX(var) == JSCTX_GET_WR_IDX(var)) return NULL; - id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id; + id = rb->entries[JSCTX_GET_RD_IDX(var) & JSCTX_RB_MASK].atom_id; return kbase_jd_atom_from_id(kctx, id); } @@ -457,6 +440,8 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a * pointer to the next atom, unless all the priority's ring buffers are empty. * + * Caller must hold the runpool_irq.lock. + * * Return: Pointer to next atom in buffer, or NULL if there is no atom. */ static inline struct kbase_jd_atom * @@ -464,6 +449,8 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) { int prio; + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_jd_atom *katom; @@ -491,16 +478,21 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) static inline struct kbase_jd_atom * jsctx_rb_peek_last(struct kbase_context *kctx, int js, int prio) { - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + unsigned int var = atomic_read(&rb->indicies); int id; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); lockdep_assert_held(&kctx->jctx.lock); - if (jsctx_rb_is_empty_prio(kctx, js, prio)) + if (!list_empty(&rb->queue_head)) { + return list_entry(rb->queue_head.prev, + struct kbase_jd_atom, queue); + } + + if (JSCTX_GET_RN_IDX(var) == JSCTX_GET_WR_IDX(var)) return NULL; - id = rb->entries[(rb->write_idx - 1) & JSCTX_RB_MASK].atom_id; + id = rb->entries[(JSCTX_GET_WR_IDX(var) - 1) & JSCTX_RB_MASK].atom_id; return kbase_jd_atom_from_id(kctx, id); } @@ -518,14 +510,23 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; int js = katom->slot_nr; - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + unsigned int oldvar, var; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); /* Atoms must be pulled in the correct order. */ WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); - rb->read_idx++; + do { + u16 rd_idx; + + oldvar = atomic_read(&rb->indicies); + var = oldvar; + rd_idx = JSCTX_GET_RD_IDX(var); + + JSCTX_SET_RD_IDX(var, rd_idx+1); + } while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar); } /** @@ -543,15 +544,27 @@ jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; int js = katom->slot_nr; - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + unsigned int oldvar, var; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - /* Atoms must be unpulled in correct order. */ - WARN_ON(rb->entries[(rb->read_idx - 1) & JSCTX_RB_MASK].atom_id != - kbase_jd_atom_id(kctx, katom)); + do { + u16 rd_idx; - rb->read_idx--; + oldvar = atomic_read(&rb->indicies); + var = oldvar; + + + rd_idx = JSCTX_GET_RD_IDX(var)-1; + + /* Atoms must be unpulled in correct order. */ + WARN_ON(rb->entries[rd_idx & JSCTX_RB_MASK].atom_id != + kbase_jd_atom_id(kctx, katom)); + + JSCTX_SET_RD_IDX(var, rd_idx); + } while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar); } /** @@ -571,18 +584,36 @@ jsctx_rb_add_atom(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; int js = katom->slot_nr; - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + unsigned int oldvar, var; + u16 wr_idx, running_idx, count; lockdep_assert_held(&kctx->jctx.lock); + oldvar = atomic_read(&rb->indicies); + var = oldvar; + + running_idx = JSCTX_GET_RN_IDX(var); + wr_idx = JSCTX_GET_WR_IDX(var); + count = JSCTX_GET_IDX_DIFF(running_idx, wr_idx); + /* Check if the ring buffer is full */ - if ((rb->write_idx - rb->running_idx) >= JSCTX_RB_SIZE) + if (count >= JSCTX_RB_SIZE) return -EBUSY; - rb->entries[rb->write_idx & JSCTX_RB_MASK].atom_id = - kbase_jd_atom_id(kctx, katom); - rb->write_idx++; + rb->entries[wr_idx & JSCTX_RB_MASK].atom_id = + kbase_jd_atom_id(kctx, katom); + + wr_idx++; + JSCTX_SET_WR_IDX(var, wr_idx); + while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar) { + oldvar = atomic_read(&rb->indicies); + var = oldvar; + wr_idx = JSCTX_GET_WR_IDX(var)+1; + + JSCTX_SET_WR_IDX(var, wr_idx); + } return 0; } @@ -602,73 +633,129 @@ jsctx_rb_remove(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; int js = katom->slot_nr; - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + unsigned int oldvar, var; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->jctx.lock); - /* Atoms must be completed in order. */ - WARN_ON(rb->entries[rb->running_idx & JSCTX_RB_MASK].atom_id != - kbase_jd_atom_id(kctx, katom)); - rb->running_idx++; + do { + unsigned int rn_idx; + + oldvar = atomic_read(&rb->indicies); + var = oldvar; + + rn_idx = JSCTX_GET_RN_IDX(var); + + JSCTX_SET_RN_IDX(var, rn_idx+1); + } while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar); } -/** - * jsctx_rb_evict(): - Evict atom, and dependents, from ring buffer - * @kctx: Pointer to kbase context with ring buffer. - * @start_katom: Pointer to the first katom to evict. - * @head_katom: Pointer to head katom. - * @evict_list: Pointer to head of list where evicted atoms are added. - * - * Iterate over the ring buffer starting at @start_katom and evict @start_atom - * and dependent atoms in ring buffer. - * - * @evict_list and @head_katom is passed on to kbase_js_evict_atom() which will - * examine the atom dependencies. - * - * jsctx_rb_evict() is only called by kbase_js_evict_deps(). - */ + static void -jsctx_rb_evict(struct kbase_context *kctx, - struct kbase_jd_atom *start_katom, - struct kbase_jd_atom *head_katom, - struct list_head *evict_list) +jsctx_ll_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) { - int prio = start_katom->sched_priority; - int js = start_katom->slot_nr; - struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; - bool atom_in_rb = false; - u16 i, start_idx; + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); lockdep_assert_held(&kctx->jctx.lock); - for (i = rb->running_idx; i != rb->write_idx; i++) { - if (rb->entries[i & JSCTX_RB_MASK].atom_id == - kbase_jd_atom_id(kctx, start_katom)) { - start_idx = i; - atom_in_rb = true; + list_add_tail(&katom->queue, &queue->queue_head); +} + +static bool kbase_js_ctx_pullable(struct kbase_context *kctx, + int js, + bool is_scheduled); +static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js); +static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js); + +void +jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js) +{ + unsigned long flags; + struct list_head *pos, *q; + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + bool flushed_any = false; + struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; + bool enqueue_required = false; + + lockdep_assert_held(&kctx->jctx.lock); + + + /* Early out for common case */ + if (list_empty(&queue->queue_head) || jsctx_rb_is_full(queue)) + return; + + + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + + spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, flags); + /* If slot will transition from unpullable to pullable then add to + * pullable list */ + if (jsctx_rb_none_to_pull(kctx, js)) + enqueue_required = true; + else + enqueue_required = false; + + list_for_each_safe(pos, q, &queue->queue_head) { + struct kbase_jd_atom *katom; + + katom = list_entry(pos, struct kbase_jd_atom, queue); + + KBASE_DEBUG_ASSERT(katom); + + if (jsctx_rb_add_atom(kctx, katom)) break; - } + + katom->atom_flags &= ~KBASE_KATOM_FLAG_JSCTX_IN_LL; + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; + flushed_any = true; + + list_del(pos); } - /* start_katom must still be in ring buffer. */ - if (i == rb->write_idx || !atom_in_rb) - return; - /* Evict all dependencies on same slot. */ - for (i = start_idx; i != rb->write_idx; i++) { - u8 katom_evict; + if (flushed_any) { + bool timer_sync = false; - katom_evict = rb->entries[i & JSCTX_RB_MASK].atom_id; - if (katom_evict != KBASEP_ATOM_ID_INVALID) { - if (!kbase_js_evict_atom(kctx, - &kctx->jctx.atoms[katom_evict], - start_katom, head_katom, - evict_list, rb, i)) - break; + if (enqueue_required) { + if (kbase_js_ctx_pullable(kctx, js, false)) + timer_sync = kbase_js_ctx_list_add_pullable( + kctx->kbdev, kctx, js); + else + timer_sync = kbase_js_ctx_list_add_unpullable( + kctx->kbdev, kctx, js); + /* If this context is active and the atom is the first + * on its slot, kick the job manager to attempt to + * fast-start the atom */ + if (kctx == kctx->kbdev->hwaccess.active_kctx) + kbase_jm_try_kick(kctx->kbdev, 1 << js); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, + flags); + + if (timer_sync) + kbase_backend_ctx_count_changed(kctx->kbdev); + + } else { + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, + flags); } + } else { + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); } + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + } /* @@ -742,6 +829,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES; jsdd->cfs_ctx_runtime_min_slices = DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES; + atomic_set(&jsdd->soft_event_timeout_ms, DEFAULT_JS_SOFT_EVENT_TIMEOUT); dev_dbg(kbdev->dev, "JS Config Attribs: "); dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", @@ -768,6 +856,8 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) jsdd->cfs_ctx_runtime_init_slices); dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u", jsdd->cfs_ctx_runtime_min_slices); + dev_dbg(kbdev->dev, "\tsoft_event_timeout:%i", + atomic_read(&jsdd->soft_event_timeout_ms)); if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss && jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && @@ -872,7 +962,7 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx) struct kbase_device *kbdev; struct kbasep_js_kctx_info *js_kctx_info; int err; - int i; + int i, j; KBASE_DEBUG_ASSERT(kctx != NULL); @@ -912,6 +1002,13 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx) if (js_kctx_info->init_status != JS_KCTX_INIT_ALL) return -EINVAL; + for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { + INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].queue_head); + atomic_set(&kctx->jsctx_queue[i][j].indicies, 0); + } + } + return 0; } @@ -921,6 +1018,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) struct kbasep_js_kctx_info *js_kctx_info; union kbasep_js_policy *js_policy; int js; + bool update_ctx_count = false; KBASE_DEBUG_ASSERT(kctx != NULL); @@ -937,14 +1035,31 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) } mutex_lock(&kbdev->js_data.queue_mutex); + mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + + if (kctx->ctx_runnable_ref) { + WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0); + atomic_dec(&kbdev->js_data.nr_contexts_runnable); + update_ctx_count = true; + kctx->ctx_runnable_ref = false; + } + + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); mutex_unlock(&kbdev->js_data.queue_mutex); if ((js_kctx_info->init_status & JS_KCTX_INIT_POLICY)) kbasep_js_policy_term_ctx(js_policy, kctx); js_kctx_info->init_status = JS_KCTX_INIT_NONE; + + if (update_ctx_count) { + mutex_lock(&kbdev->js_data.runpool_mutex); + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&kbdev->js_data.runpool_mutex); + } } /** @@ -982,8 +1097,11 @@ static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev, if (!kctx->slots_pullable) { kbdev->js_data.nr_contexts_pullable++; ret = true; - if (!atomic_read(&kctx->atoms_pulled)) + if (!atomic_read(&kctx->atoms_pulled)) { + WARN_ON(kctx->ctx_runnable_ref); + kctx->ctx_runnable_ref = true; atomic_inc(&kbdev->js_data.nr_contexts_runnable); + } } kctx->slots_pullable |= (1 << js); @@ -1025,8 +1143,11 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, if (!kctx->slots_pullable) { kbdev->js_data.nr_contexts_pullable++; ret = true; - if (!atomic_read(&kctx->atoms_pulled)) + if (!atomic_read(&kctx->atoms_pulled)) { + WARN_ON(kctx->ctx_runnable_ref); + kctx->ctx_runnable_ref = true; atomic_inc(&kbdev->js_data.nr_contexts_runnable); + } } kctx->slots_pullable |= (1 << js); @@ -1065,8 +1186,11 @@ static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev, if (kctx->slots_pullable == (1 << js)) { kbdev->js_data.nr_contexts_pullable--; ret = true; - if (!atomic_read(&kctx->atoms_pulled)) + if (!atomic_read(&kctx->atoms_pulled)) { + WARN_ON(!kctx->ctx_runnable_ref); + kctx->ctx_runnable_ref = false; atomic_dec(&kbdev->js_data.nr_contexts_runnable); + } } kctx->slots_pullable &= ~(1 << js); @@ -1105,8 +1229,11 @@ static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev, if (kctx->slots_pullable == (1 << js)) { kbdev->js_data.nr_contexts_pullable--; ret = true; - if (!atomic_read(&kctx->atoms_pulled)) + if (!atomic_read(&kctx->atoms_pulled)) { + WARN_ON(!kctx->ctx_runnable_ref); + kctx->ctx_runnable_ref = false; atomic_dec(&kbdev->js_data.nr_contexts_runnable); + } } kctx->slots_pullable &= ~(1 << js); @@ -1177,7 +1304,8 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, return false; /* next atom blocked */ if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) return false; if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) @@ -1206,7 +1334,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, /* Dependent atom must already have been submitted */ if (!(dep_atom->atom_flags & - KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)) { + (KBASE_KATOM_FLAG_JSCTX_IN_LL | + KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED))){ ret = false; break; } @@ -1368,6 +1497,10 @@ bool kbasep_js_add_job(struct kbase_context *kctx, /* Dependencies could not be represented */ --(js_kctx_info->ctx.nr_jobs); + /* Setting atom status back to queued as it still has unresolved + * dependencies */ + atom->status = KBASE_JD_ATOM_STATE_QUEUED; + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&js_devdata->runpool_mutex); @@ -1376,17 +1509,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom)); - if (kbase_js_dep_resolved_submit(kctx, atom, &enqueue_required) != 0) { - /* Ringbuffer was full (should be impossible) - fail the job */ - --(js_kctx_info->ctx.nr_jobs); - - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - mutex_unlock(&js_devdata->runpool_mutex); - - atom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - - goto out_unlock; - } + enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, kbasep_js_trace_get_refcnt_nolock(kbdev, kctx)); @@ -1728,10 +1851,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_mmu_as_released(kctx->as_nr); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_nret_as_ctx(&kbdev->as[kctx->as_nr], kctx); - kbase_tlstream_tl_nret_gpu_ctx(kbdev, kctx); -#endif kbase_backend_release_ctx_irq(kbdev, kctx); @@ -2076,10 +2196,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_mmu_as_in_use(kctx->as_nr); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) - kbase_tlstream_tl_ret_gpu_ctx(kbdev, kctx); kbase_tlstream_tl_ret_as_ctx(&kbdev->as[kctx->as_nr], kctx); -#endif /* Cause any future waiter-on-termination to wait until the context is * descheduled */ @@ -2365,37 +2482,78 @@ static int kbase_js_get_slot(struct kbase_device *kbdev, return 1; } -int kbase_js_dep_resolved_submit(struct kbase_context *kctx, - struct kbase_jd_atom *katom, - bool *enqueue_required) +bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom) { + bool enqueue_required; + katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->jctx.lock); /* If slot will transition from unpullable to pullable then add to * pullable list */ if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) { - *enqueue_required = true; + enqueue_required = true; } else { - *enqueue_required = false; + enqueue_required = false; } /* Check if there are lower priority jobs to soft stop */ kbase_job_slot_ctx_priority_check_locked(kctx, katom); /* Add atom to ring buffer. */ - if (unlikely(jsctx_rb_add_atom(kctx, katom))) { - /* The ring buffer is full. This should be impossible as the - * job dispatcher can not submit enough atoms to exceed the - * ring buffer size. Fail the job. - */ - WARN(1, "Job submit while JSCTX ringbuffer already full\n"); - return -EINVAL; + if (jsctx_rb_add_atom(kctx, katom)) { + jsctx_ll_add(kctx, katom); + enqueue_required = false; + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_LL; + } else { + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; } + return enqueue_required; +} + +/** + * kbase_js_evict_deps - Evict dependencies of a failed atom. + * @kctx: Context pointer + * @katom: Pointer to the atom that has failed. + * @js: The job slot the katom was run on. + * @prio: Priority of the katom. + * + * Remove all post dependencies of an atom from the context ringbuffers. + * + * The original atom's event_code will be propogated to all dependent atoms. + * + * Context: Caller must hold the HW access lock + */ +static void kbase_js_evict_deps(struct kbase_context *kctx, + struct kbase_jd_atom *katom, int js, int prio) +{ + struct kbase_jd_atom *x_dep = katom->x_post_dep; + struct kbase_jd_atom *next_katom = jsctx_rb_peek_prio(kctx, js, prio); - katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - return 0; + if (next_katom && + (next_katom->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV)) { + KBASE_DEBUG_ASSERT(next_katom->status != + KBASE_JD_ATOM_STATE_HW_COMPLETED); + + next_katom->will_fail_event_code = katom->event_code; + + } + + /* Has cross slot depenency. */ + if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_LL | + KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED))) { + /* Remove dependency.*/ + x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + + /* Fail if it had a data dependency. */ + if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) { + x_dep->will_fail_event_code = katom->event_code; + } + } } struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) @@ -2435,7 +2593,8 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) return NULL; if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) @@ -2444,8 +2603,11 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) kctx->pulled = true; pulled = atomic_inc_return(&kctx->atoms_pulled); - if (pulled == 1 && !kctx->slots_pullable) + if (pulled == 1 && !kctx->slots_pullable) { + WARN_ON(kctx->ctx_runnable_ref); + kctx->ctx_runnable_ref = true; atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable); + } atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]); jsctx_rb_pull(kctx, katom); @@ -2475,6 +2637,8 @@ static void js_return_worker(struct work_struct *data) u64 affinity = katom->affinity; enum kbase_atom_coreref_state coreref_state = katom->coreref_state; + kbase_tlstream_aux_job_softstop_ex(katom); + kbase_backend_complete_wq(kbdev, katom); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) @@ -2497,8 +2661,11 @@ static void js_return_worker(struct work_struct *data) timer_sync |= kbase_js_ctx_list_remove(kbdev, kctx, js); if (!atomic_read(&kctx->atoms_pulled)) { - if (!kctx->slots_pullable) + if (!kctx->slots_pullable) { + WARN_ON(!kctx->ctx_runnable_ref); + kctx->ctx_runnable_ref = false; atomic_dec(&kbdev->js_data.nr_contexts_runnable); + } if (kctx->as_nr != KBASEP_AS_NR_INVALID && !js_kctx_info->ctx.is_dying) { @@ -2563,112 +2730,6 @@ void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) queue_work(kctx->jctx.job_done_wq, &katom->work); } -static bool kbase_js_evict_atom(struct kbase_context *kctx, - struct kbase_jd_atom *katom_evict, - struct kbase_jd_atom *start_katom, - struct kbase_jd_atom *head_katom, - struct list_head *evict_list, - struct jsctx_rb *rb, int idx) -{ - struct kbase_jd_atom *x_dep = katom_evict->x_post_dep; - - if (!(katom_evict->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) && - katom_evict != start_katom) - return false; - - if (katom_evict->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - WARN_ON(katom_evict->event_code != head_katom->event_code); - - return false; - } - - if (katom_evict->status == KBASE_JD_ATOM_STATE_HW_COMPLETED && - katom_evict != head_katom) - return false; - - /* Evict cross dependency if present */ - if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) - && (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) - list_add_tail(&x_dep->dep_item[0], evict_list); - - /* If cross dependency is present and does not have a data dependency - * then unblock */ - if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) - && !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) - x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; - - if (katom_evict != head_katom) { - rb->entries[idx & JSCTX_RB_MASK].atom_id = - KBASEP_ATOM_ID_INVALID; - - katom_evict->event_code = head_katom->event_code; - katom_evict->atom_flags &= - ~KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; - - if (katom_evict->atom_flags & KBASE_KATOM_FLAG_HOLDING_CTX_REF) - kbase_jd_done(katom_evict, katom_evict->slot_nr, NULL, - 0); - else - kbase_jd_evict(kctx->kbdev, katom_evict); - } - - return true; -} - -/** - * kbase_js_evict_deps - Evict dependencies - * @kctx: Context pointer - * @head_katom: Pointer to the atom to evict - * - * Remove all post dependencies of an atom from the context ringbuffers. - * - * The original atom's event_code will be propogated to all dependent atoms. - * - * Context: Caller must hold both jctx and HW access locks - */ -static void kbase_js_evict_deps(struct kbase_context *kctx, - struct kbase_jd_atom *head_katom) -{ - struct list_head evict_list; - - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - - INIT_LIST_HEAD(&evict_list); - - list_add_tail(&head_katom->dep_item[0], &evict_list); - - while (!list_empty(&evict_list)) { - struct kbase_jd_atom *start_katom; - - start_katom = list_entry(evict_list.prev, struct kbase_jd_atom, - dep_item[0]); - list_del(evict_list.prev); - - jsctx_rb_evict(kctx, start_katom, head_katom, &evict_list); - } -} - -/** - * kbase_js_compact - Compact JSCTX ringbuffers - * @kctx: Context pointer - * - * Compact the JSCTX ringbuffers, removing any NULL entries - * - * Context: Caller must hold both jctx and HW access locks - */ -static void kbase_js_compact(struct kbase_context *kctx) -{ - struct kbase_device *kbdev = kctx->kbdev; - int js; - - lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - - for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) - jsctx_rb_compact(kctx, js); -} - bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom *katom) { @@ -2692,20 +2753,19 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) { - if (katom->event_code != BASE_JD_EVENT_DONE) - kbase_js_evict_deps(kctx, katom); - jsctx_rb_remove(kctx, katom); context_idle = !atomic_dec_return(&kctx->atoms_pulled); atomic_dec(&kctx->atoms_pulled_slot[atom_slot]); - if (!atomic_read(&kctx->atoms_pulled) && !kctx->slots_pullable) + if (!atomic_read(&kctx->atoms_pulled) && + !kctx->slots_pullable) { + WARN_ON(!kctx->ctx_runnable_ref); + kctx->ctx_runnable_ref = false; atomic_dec(&kbdev->js_data.nr_contexts_runnable); - - if (katom->event_code != BASE_JD_EVENT_DONE) - kbase_js_compact(kctx); + } } + WARN_ON(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_LL); if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) && jsctx_rb_none_to_pull(kctx, atom_slot)) @@ -2769,19 +2829,31 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + if (katom->will_fail_event_code) + katom->event_code = katom->will_fail_event_code; + katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; + if (katom->event_code != BASE_JD_EVENT_DONE) { + kbase_js_evict_deps(kctx, katom, katom->slot_nr, + katom->sched_priority); + } + #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP, katom->slot_nr), NULL, 0); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_tl_nret_atom_lpu( katom, &kbdev->gpu_props.props.raw_props.js_features[ katom->slot_nr]); kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]); -#endif + kbase_tlstream_tl_nret_ctx_lpu( + kctx, + &kbdev->gpu_props.props.raw_props.js_features[ + katom->slot_nr]); + /* Calculate the job's time used */ if (end_timestamp != NULL) { /* Only calculating it for jobs that really run on the HW (e.g. @@ -3148,7 +3220,6 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, u32 js; kbdev = kctx->kbdev; - js_devdata = &kbdev->js_data; spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); @@ -3158,7 +3229,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, /* Invoke callback on jobs on each slot in turn */ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) - jsctx_rb_foreach(kctx, js, callback); + jsctx_queue_foreach(kctx, js, callback); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); } diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h index 868c680..bdb820a 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h @@ -491,15 +491,22 @@ void kbasep_js_resume(struct kbase_device *kbdev); * @param[in] kctx Context pointer * @param[in] atom Pointer to the atom to submit * - * @return 0 if submit succeeded - * error code if the atom can not be submitted at this - * time, due to insufficient space in the ringbuffer, or dependencies - * that can not be represented. - */ -int kbase_js_dep_resolved_submit(struct kbase_context *kctx, - struct kbase_jd_atom *katom, - bool *enqueue_required); + * @return Whether the context requires to be enqueued. */ +bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom); +/** + * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer. + * @kctx: Context Pointer + * @prio: Priority (specifies the queue together with js). + * @js: Job slot (specifies the queue together with prio). + * + * Pushes all possible atoms from the linked list to the ringbuffer. + * Number of atoms are limited to free space in the ringbuffer and + * number of available atoms in the linked list. + * + */ +void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); /** * @brief Pull an atom from a context in the job scheduler for execution. * diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c index 8891bff..e6e611b 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c @@ -209,13 +209,6 @@ void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kba /* Transfer attributes held in the context flags for contexts that have submit enabled */ - if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_HINT_ONLY_COMPUTE) != false) { - /* Compute context */ - runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); - } - /* NOTE: Whether this is a non-compute context depends on the jobs being - * run, e.g. it might be submitting jobs with BASE_JD_REQ_ONLY_COMPUTE */ - /* ... More attributes can be added here ... */ /* The context should not have been scheduled yet, so ASSERT if this caused diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h index d65b494..75d4b98 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -50,10 +50,7 @@ enum { KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0), /** Set if the context uses an address space and should be kept scheduled in */ - KBASE_CTX_FLAG_PRIVILEGED = (1u << 1), - - /** Kernel-side equivalent of BASE_CONTEXT_HINT_ONLY_COMPUTE. Non-mutable after creation flags set */ - KBASE_CTX_FLAG_HINT_ONLY_COMPUTE = (1u << 2) + KBASE_CTX_FLAG_PRIVILEGED = (1u << 1) /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */ }; @@ -126,15 +123,10 @@ typedef void (*kbasep_js_policy_ctx_job_cb)(struct kbase_device *kbdev, struct k * - The runpool holds a refcount of how many contexts in the runpool have this * attribute. * - The context holds a refcount of how many atoms have this attribute. - * - * Examples of use: - * - Finding out when there are a mix of @ref BASE_CONTEXT_HINT_ONLY_COMPUTE - * and ! @ref BASE_CONTEXT_HINT_ONLY_COMPUTE contexts in the runpool */ enum kbasep_js_ctx_attr { /** Attribute indicating a context that contains Compute jobs. That is, - * @ref BASE_CONTEXT_HINT_ONLY_COMPUTE is \b set and/or the context has jobs of type - * @ref BASE_JD_REQ_ONLY_COMPUTE + * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE * * @note A context can be both 'Compute' and 'Non Compute' if it contains * both types of jobs. @@ -356,6 +348,9 @@ struct kbasep_js_device_data { u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */ u32 cfs_ctx_runtime_min_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */ + /**< Value for JS_SOFT_EVENT_TIMEOUT */ + atomic_t soft_event_timeout_ms; + /** List of suspended soft jobs */ struct list_head suspended_soft_jobs_list; @@ -410,7 +405,7 @@ struct kbasep_js_kctx_info { * * You may not access any of these members from IRQ context. */ - struct { + struct kbase_jsctx { struct mutex jsctx_mutex; /**< Job Scheduler Context lock */ /** Number of jobs ready to run - does \em not include the jobs waiting in diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h index 2094586..debd011 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h @@ -708,7 +708,7 @@ void kbasep_js_policy_deregister_job(union kbasep_js_policy *js_policy, struct k bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx, struct kbase_jd_atom ** const katom_ptr); /** - * @brief Requeue a Job back into the the Job Scheduler Policy Run Pool + * @brief Requeue a Job back into the Job Scheduler Policy Run Pool * * This will be used to enqueue a job after its creation and also to requeue * a job into the Run Pool that was previously dequeued (running). It notifies diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c index 2909f20..385d56a 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,9 @@ #ifdef CONFIG_DMA_SHARED_BUFFER #include #endif /* CONFIG_DMA_SHARED_BUFFER */ - +#ifdef CONFIG_UMP +#include +#endif /* CONFIG_UMP */ #include #include #include @@ -36,10 +38,7 @@ #include #include #include - -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif /** * @brief Check the zone compatibility of two regions. @@ -392,13 +391,33 @@ int kbase_add_va_region(struct kbase_context *kctx, { u64 start_pfn; - tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg, nr_pages, align); - if (!tmp) { + /* + * Depending on the zone the allocation request is for + * we might need to retry it. + */ + do { + tmp = kbase_region_tracker_find_region_meeting_reqs( + kctx, reg, nr_pages, align); + if (tmp) { + start_pfn = (tmp->start_pfn + align - 1) & + ~(align - 1); + err = kbase_insert_va_region_nolock(kctx, reg, + tmp, start_pfn, nr_pages); + break; + } + + /* + * If the allocation is not from the same zone as JIT + * then don't retry, we're out of VA and there is + * nothing which can be done about it. + */ + if ((reg->flags & KBASE_REG_ZONE_MASK) != + KBASE_REG_ZONE_CUSTOM_VA) + break; + } while (kbase_jit_evict(kctx)); + + if (!tmp) err = -ENOMEM; - goto exit; - } - start_pfn = (tmp->start_pfn + align - 1) & ~(align - 1); - err = kbase_insert_va_region_nolock(kctx, reg, tmp, start_pfn, nr_pages); } exit: @@ -410,7 +429,10 @@ KBASE_EXPORT_TEST_API(kbase_add_va_region); /** * @brief Initialize the internal region tracker data structure. */ -static void kbase_region_tracker_ds_init(struct kbase_context *kctx, struct kbase_va_region *same_va_reg, struct kbase_va_region *exec_reg, struct kbase_va_region *custom_va_reg) +static void kbase_region_tracker_ds_init(struct kbase_context *kctx, + struct kbase_va_region *same_va_reg, + struct kbase_va_region *exec_reg, + struct kbase_va_region *custom_va_reg) { kctx->reg_rbtree = RB_ROOT; kbase_region_tracker_insert(kctx, same_va_reg); @@ -448,6 +470,11 @@ int kbase_region_tracker_init(struct kbase_context *kctx) size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE; u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; + u64 same_va_pages; + int err; + + /* Take the lock as kbase_free_alloced_region requires it */ + kbase_gpu_vm_lock(kctx); #if defined(CONFIG_ARM64) same_va_bits = VA_BITS; @@ -464,24 +491,29 @@ int kbase_region_tracker_init(struct kbase_context *kctx) same_va_bits = 33; #endif - if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) - return -EINVAL; + if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) { + err = -EINVAL; + goto fail_unlock; + } + same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; /* all have SAME_VA */ same_va_reg = kbase_alloc_free_region(kctx, 1, - (1ULL << (same_va_bits - PAGE_SHIFT)) - 1, + same_va_pages, KBASE_REG_ZONE_SAME_VA); - if (!same_va_reg) - return -ENOMEM; + if (!same_va_reg) { + err = -ENOMEM; + goto fail_unlock; + } #ifdef CONFIG_64BIT - /* only 32-bit clients have the other two zones */ + /* 32-bit clients have exec and custom VA zones */ if (kctx->is_compat) { #endif if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { - kbase_free_alloced_region(same_va_reg); - return -EINVAL; + err = -EINVAL; + goto fail_free_same_va; } /* If the current size of TMEM is out of range of the * virtual address space addressable by the MMU then @@ -496,8 +528,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx) KBASE_REG_ZONE_EXEC); if (!exec_reg) { - kbase_free_alloced_region(same_va_reg); - return -ENOMEM; + err = -ENOMEM; + goto fail_free_same_va; } custom_va_reg = kbase_alloc_free_region(kctx, @@ -505,9 +537,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx) custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); if (!custom_va_reg) { - kbase_free_alloced_region(same_va_reg); - kbase_free_alloced_region(exec_reg); - return -ENOMEM; + err = -ENOMEM; + goto fail_free_exec; } #ifdef CONFIG_64BIT } @@ -515,7 +546,102 @@ int kbase_region_tracker_init(struct kbase_context *kctx) kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg); + kctx->same_va_end = same_va_pages + 1; + + kbase_gpu_vm_unlock(kctx); return 0; + +fail_free_exec: + kbase_free_alloced_region(exec_reg); +fail_free_same_va: + kbase_free_alloced_region(same_va_reg); +fail_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +} + +int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages) +{ +#ifdef CONFIG_64BIT + struct kbase_va_region *same_va; + struct kbase_va_region *custom_va_reg; + u64 same_va_bits; + u64 total_va_size; + int err; + + /* + * Nothing to do for 32-bit clients, JIT uses the existing + * custom VA zone. + */ + if (kctx->is_compat) + return 0; + +#if defined(CONFIG_ARM64) + same_va_bits = VA_BITS; +#elif defined(CONFIG_X86_64) + same_va_bits = 47; +#elif defined(CONFIG_64BIT) +#error Unsupported 64-bit architecture +#endif + + if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) + same_va_bits = 33; + + total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; + + kbase_gpu_vm_lock(kctx); + + /* + * Modify the same VA free region after creation. Be careful to ensure + * that allocations haven't been made as they could cause an overlap + * to happen with existing same VA allocations and the custom VA zone. + */ + same_va = kbase_region_tracker_find_region_base_address(kctx, + PAGE_SIZE); + if (!same_va) { + err = -ENOMEM; + goto fail_unlock; + } + + /* The region flag or region size has changed since creation so bail. */ + if ((!(same_va->flags & KBASE_REG_FREE)) || + (same_va->nr_pages != total_va_size)) { + err = -ENOMEM; + goto fail_unlock; + } + + /* It's safe to adjust the same VA zone now */ + same_va->nr_pages -= jit_va_pages; + kctx->same_va_end -= jit_va_pages; + + /* + * Create a custom VA zone at the end of the VA for allocations which + * JIT can use so it doesn't have to allocate VA from the kernel. + */ + custom_va_reg = kbase_alloc_free_region(kctx, + kctx->same_va_end, + jit_va_pages, + KBASE_REG_ZONE_CUSTOM_VA); + if (!custom_va_reg) { + /* + * The context will be destroyed if we fail here so no point + * reverting the change we made to same_va. + */ + err = -ENOMEM; + goto fail_unlock; + } + + kbase_region_tracker_insert(kctx, custom_va_reg); + + kbase_gpu_vm_unlock(kctx); + return 0; + +fail_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +#else + return 0; +#endif } int kbase_mem_init(struct kbase_device *kbdev) @@ -613,8 +739,46 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region); */ void kbase_free_alloced_region(struct kbase_va_region *reg) { - KBASE_DEBUG_ASSERT(NULL != reg); if (!(reg->flags & KBASE_REG_FREE)) { + /* + * The physical allocation should have been removed from the + * eviction list before this function is called. However, in the + * case of abnormal process termination or the app leaking the + * memory kbase_mem_free_region is not called so it can still be + * on the list at termination time of the region tracker. + */ + if (!list_empty(®->gpu_alloc->evict_node)) { + /* + * Unlink the physical allocation before unmaking it + * evictable so that the allocation isn't grown back to + * its last backed size as we're going to unmap it + * anyway. + */ + reg->cpu_alloc->reg = NULL; + if (reg->cpu_alloc != reg->gpu_alloc) + reg->gpu_alloc->reg = NULL; + + /* + * If a region has been made evictable then we must + * unmake it before trying to free it. + * If the memory hasn't been reclaimed it will be + * unmapped and freed below, if it has been reclaimed + * then the operations below are no-ops. + */ + if (reg->flags & KBASE_REG_DONT_NEED) { + KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == + KBASE_MEM_TYPE_NATIVE); + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + } + + /* + * Remove the region from the sticky resource metadata + * list should it be there. + */ + kbase_sticky_resource_release(reg->kctx, NULL, + reg->start_pfn << PAGE_SHIFT, true); + kbase_mem_phy_alloc_put(reg->cpu_alloc); kbase_mem_phy_alloc_put(reg->gpu_alloc); /* To detect use-after-free in debug builds */ @@ -891,10 +1055,10 @@ static int kbase_do_syncset(struct kbase_context *kctx, /* find the region where the virtual address is contained */ reg = kbase_region_tracker_find_region_enclosing_address(kctx, - sset->mem_handle); + sset->mem_handle.basep.handle); if (!reg) { dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX", - sset->mem_handle); + sset->mem_handle.basep.handle); err = -EINVAL; goto out_unlock; } @@ -908,7 +1072,7 @@ static int kbase_do_syncset(struct kbase_context *kctx, map = kbasep_find_enclosing_cpu_mapping_of_region(reg, start, size); if (!map) { dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", - start, sset->mem_handle); + start, sset->mem_handle.basep.handle); err = -EINVAL; goto out_unlock; } @@ -989,6 +1153,28 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL != reg); lockdep_assert_held(&kctx->reg_lock); + + /* + * Unlink the physical allocation before unmaking it evictable so + * that the allocation isn't grown back to its last backed size + * as we're going to unmap it anyway. + */ + reg->cpu_alloc->reg = NULL; + if (reg->cpu_alloc != reg->gpu_alloc) + reg->gpu_alloc->reg = NULL; + + /* + * If a region has been made evictable then we must unmake it + * before trying to free it. + * If the memory hasn't been reclaimed it will be unmapped and freed + * below, if it has been reclaimed then the operations below are no-ops. + */ + if (reg->flags & KBASE_REG_DONT_NEED) { + KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == + KBASE_MEM_TYPE_NATIVE); + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + err = kbase_gpu_munmap(kctx, reg); if (err) { dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n"); @@ -1046,7 +1232,6 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) kbase_free_alloced_region(reg); } else { /* A real GPU va */ - /* Validate the region */ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); if (!reg || (reg->flags & KBASE_REG_FREE)) { @@ -1063,7 +1248,6 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) err = -EINVAL; goto out_unlock; } - err = kbase_mem_free_region(kctx, reg); } @@ -1124,6 +1308,8 @@ int kbase_alloc_phy_pages_helper( struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested) { + int new_page_count __maybe_unused; + KBASE_DEBUG_ASSERT(alloc); KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); KBASE_DEBUG_ASSERT(alloc->imported.kctx); @@ -1131,7 +1317,8 @@ int kbase_alloc_phy_pages_helper( if (nr_pages_requested == 0) goto done; /*nothing to do*/ - kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->used_pages); + new_page_count = kbase_atomic_add_pages( + nr_pages_requested, &alloc->imported.kctx->used_pages); kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages); /* Increase mm counters before we allocate pages so that this @@ -1142,9 +1329,9 @@ int kbase_alloc_phy_pages_helper( nr_pages_requested, alloc->pages + alloc->nents) != 0) goto no_alloc; -#if defined(CONFIG_MALI_MIPE_ENABLED) - kbase_tlstream_aux_pagesalloc((s64)nr_pages_requested); -#endif + kbase_tlstream_aux_pagesalloc( + (u32)alloc->imported.kctx->id, + (u64)new_page_count); alloc->nents += nr_pages_requested; done: @@ -1162,10 +1349,12 @@ int kbase_free_phy_pages_helper( struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free) { + struct kbase_context *kctx = alloc->imported.kctx; bool syncback; + bool reclaimed = (alloc->evicted != 0); phys_addr_t *start_free; + int new_page_count __maybe_unused; - KBASE_DEBUG_ASSERT(alloc); KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); KBASE_DEBUG_ASSERT(alloc->imported.kctx); KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); @@ -1178,19 +1367,29 @@ int kbase_free_phy_pages_helper( syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; - kbase_mem_pool_free_pages(&alloc->imported.kctx->mem_pool, + kbase_mem_pool_free_pages(&kctx->mem_pool, nr_pages_to_free, start_free, - syncback); + syncback, + reclaimed); alloc->nents -= nr_pages_to_free; - kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_to_free); - kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->used_pages); - kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->kbdev->memdev.used_pages); -#if defined(CONFIG_MALI_MIPE_ENABLED) - kbase_tlstream_aux_pagesalloc(-(s64)nr_pages_to_free); -#endif + /* + * If the allocation was not evicted (i.e. evicted == 0) then + * the page accounting needs to be done. + */ + if (!reclaimed) { + kbase_process_page_usage_dec(kctx, nr_pages_to_free); + new_page_count = kbase_atomic_sub_pages(nr_pages_to_free, + &kctx->used_pages); + kbase_atomic_sub_pages(nr_pages_to_free, + &kctx->kbdev->memdev.used_pages); + + kbase_tlstream_aux_pagesalloc( + (u32)kctx->id, + (u64)new_page_count); + } return 0; } @@ -1203,7 +1402,12 @@ void kbase_mem_kref_free(struct kref *kref) switch (alloc->type) { case KBASE_MEM_TYPE_NATIVE: { - KBASE_DEBUG_ASSERT(alloc->imported.kctx); + WARN_ON(!alloc->imported.kctx); + /* + * The physical allocation must have been removed from the + * eviction list before trying to free it. + */ + WARN_ON(!list_empty(&alloc->evict_node)); kbase_free_phy_pages_helper(alloc, alloc->nents); break; } @@ -1236,6 +1440,9 @@ void kbase_mem_kref_free(struct kref *kref) dma_buf_put(alloc->imported.umm.dma_buf); break; #endif + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + kfree(alloc->imported.user_buf.pages); + break; case KBASE_MEM_TYPE_TB:{ void *tb; @@ -1278,9 +1485,11 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0) goto out_term; + reg->cpu_alloc->reg = reg; if (reg->cpu_alloc != reg->gpu_alloc) { if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0) goto out_rollback; + reg->gpu_alloc->reg = reg; } return 0; @@ -1374,3 +1583,921 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx) } KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); + +struct kbase_jit_debugfs_data { + int (*func)(struct kbase_jit_debugfs_data *); + struct mutex lock; + struct kbase_context *kctx; + u64 active_value; + u64 pool_value; + u64 destroy_value; + char buffer[50]; +}; + +static int kbase_jit_debugfs_common_open(struct inode *inode, + struct file *file, int (*func)(struct kbase_jit_debugfs_data *)) +{ + struct kbase_jit_debugfs_data *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->func = func; + mutex_init(&data->lock); + data->kctx = (struct kbase_context *) inode->i_private; + + file->private_data = data; + + return nonseekable_open(inode, file); +} + +static ssize_t kbase_jit_debugfs_common_read(struct file *file, + char __user *buf, size_t len, loff_t *ppos) +{ + struct kbase_jit_debugfs_data *data; + size_t size; + int ret; + + data = (struct kbase_jit_debugfs_data *) file->private_data; + mutex_lock(&data->lock); + + if (*ppos) { + size = strnlen(data->buffer, sizeof(data->buffer)); + } else { + if (!data->func) { + ret = -EACCES; + goto out_unlock; + } + + if (data->func(data)) { + ret = -EACCES; + goto out_unlock; + } + + size = scnprintf(data->buffer, sizeof(data->buffer), + "%llu,%llu,%llu", data->active_value, + data->pool_value, data->destroy_value); + } + + ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size); + +out_unlock: + mutex_unlock(&data->lock); + return ret; +} + +static int kbase_jit_debugfs_common_release(struct inode *inode, + struct file *file) +{ + kfree(file->private_data); + return 0; +} + +#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + return kbase_jit_debugfs_common_open(inode, file, __func); \ +} \ +static const struct file_operations __fops = { \ + .owner = THIS_MODULE, \ + .open = __fops ## _open, \ + .release = kbase_jit_debugfs_common_release, \ + .read = kbase_jit_debugfs_common_read, \ + .write = NULL, \ + .llseek = generic_file_llseek, \ +} + +static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct list_head *tmp; + + mutex_lock(&kctx->jit_lock); + list_for_each(tmp, &kctx->jit_active_head) { + data->active_value++; + } + + list_for_each(tmp, &kctx->jit_pool_head) { + data->pool_value++; + } + + list_for_each(tmp, &kctx->jit_destroy_head) { + data->destroy_value++; + } + mutex_unlock(&kctx->jit_lock); + + return 0; +} +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops, + kbase_jit_debugfs_count_get); + +static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jit_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + data->active_value += reg->nr_pages; + } + + list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { + data->pool_value += reg->nr_pages; + } + + list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { + data->destroy_value += reg->nr_pages; + } + mutex_unlock(&kctx->jit_lock); + + return 0; +} +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops, + kbase_jit_debugfs_vm_get); + +static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jit_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + data->active_value += reg->gpu_alloc->nents; + } + + list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { + data->pool_value += reg->gpu_alloc->nents; + } + + list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { + data->destroy_value += reg->gpu_alloc->nents; + } + mutex_unlock(&kctx->jit_lock); + + return 0; +} +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, + kbase_jit_debugfs_phys_get); + +void kbase_jit_debugfs_add(struct kbase_context *kctx) +{ + /* Debugfs entry for getting the number of JIT allocations. */ + debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_count_fops); + + /* + * Debugfs entry for getting the total number of virtual pages + * used by JIT allocations. + */ + debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_vm_fops); + + /* + * Debugfs entry for getting the number of physical pages used + * by JIT allocations. + */ + debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_phys_fops); +} + +/** + * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations + * @work: Work item + * + * This function does the work of freeing JIT allocations whose physical + * backing has been released. + */ +static void kbase_jit_destroy_worker(struct work_struct *work) +{ + struct kbase_context *kctx; + struct kbase_va_region *reg; + + kctx = container_of(work, struct kbase_context, jit_work); + do { + mutex_lock(&kctx->jit_lock); + if (list_empty(&kctx->jit_destroy_head)) + reg = NULL; + else + reg = list_first_entry(&kctx->jit_destroy_head, + struct kbase_va_region, jit_node); + + if (reg) { + list_del(®->jit_node); + mutex_unlock(&kctx->jit_lock); + + kbase_gpu_vm_lock(kctx); + kbase_mem_free_region(kctx, reg); + kbase_gpu_vm_unlock(kctx); + } else + mutex_unlock(&kctx->jit_lock); + } while (reg); +} + +int kbase_jit_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->jit_active_head); + INIT_LIST_HEAD(&kctx->jit_pool_head); + INIT_LIST_HEAD(&kctx->jit_destroy_head); + mutex_init(&kctx->jit_lock); + INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); + + return 0; +} + +struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + struct base_jit_alloc_info *info) +{ + struct kbase_va_region *reg = NULL; + struct kbase_va_region *walker; + struct kbase_va_region *temp; + size_t current_diff = SIZE_MAX; + + int ret; + + mutex_lock(&kctx->jit_lock); + /* + * Scan the pool for an existing allocation which meets our + * requirements and remove it. + */ + list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) { + + if (walker->nr_pages >= info->va_pages) { + size_t min_size, max_size, diff; + + /* + * The JIT allocations VA requirements have been + * meet, it's suitable but other allocations + * might be a better fit. + */ + min_size = min_t(size_t, walker->gpu_alloc->nents, + info->commit_pages); + max_size = max_t(size_t, walker->gpu_alloc->nents, + info->commit_pages); + diff = max_size - min_size; + + if (current_diff > diff) { + current_diff = diff; + reg = walker; + } + + /* The allocation is an exact match, stop looking */ + if (current_diff == 0) + break; + } + } + + if (reg) { + /* + * Remove the found region from the pool and add it to the + * active list. + */ + list_del_init(®->jit_node); + list_add(®->jit_node, &kctx->jit_active_head); + + /* Release the jit lock before modifying the allocation */ + mutex_unlock(&kctx->jit_lock); + + kbase_gpu_vm_lock(kctx); + + /* Make the physical backing no longer reclaimable */ + if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) + goto update_failed; + + /* Grow the backing if required */ + if (reg->gpu_alloc->nents < info->commit_pages) { + size_t delta; + size_t old_size = reg->gpu_alloc->nents; + + /* Allocate some more pages */ + delta = info->commit_pages - reg->gpu_alloc->nents; + if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta) + != 0) + goto update_failed; + + if (reg->cpu_alloc != reg->gpu_alloc) { + if (kbase_alloc_phy_pages_helper( + reg->cpu_alloc, delta) != 0) { + kbase_free_phy_pages_helper( + reg->gpu_alloc, delta); + goto update_failed; + } + } + + ret = kbase_mem_grow_gpu_mapping(kctx, reg, + info->commit_pages, old_size); + /* + * The grow failed so put the allocation back in the + * pool and return failure. + */ + if (ret) + goto update_failed; + } + kbase_gpu_vm_unlock(kctx); + } else { + /* No suitable JIT allocation was found so create a new one */ + u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | + BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF; + u64 gpu_addr; + u16 alignment; + + mutex_unlock(&kctx->jit_lock); + + reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, + info->extent, &flags, &gpu_addr, &alignment); + if (!reg) + goto out_unlocked; + + mutex_lock(&kctx->jit_lock); + list_add(®->jit_node, &kctx->jit_active_head); + mutex_unlock(&kctx->jit_lock); + } + + return reg; + +update_failed: + /* + * An update to an allocation from the pool failed, chances + * are slim a new allocation would fair any better so return + * the allocation to the pool and return the function with failure. + */ + kbase_gpu_vm_unlock(kctx); + mutex_lock(&kctx->jit_lock); + list_del_init(®->jit_node); + list_add(®->jit_node, &kctx->jit_pool_head); + mutex_unlock(&kctx->jit_lock); +out_unlocked: + return NULL; +} + +void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + /* The physical backing of memory in the pool is always reclaimable */ + down_read(&kctx->process_mm->mmap_sem); + kbase_gpu_vm_lock(kctx); + kbase_mem_evictable_make(reg->gpu_alloc); + kbase_gpu_vm_unlock(kctx); + up_read(&kctx->process_mm->mmap_sem); + + mutex_lock(&kctx->jit_lock); + list_del_init(®->jit_node); + list_add(®->jit_node, &kctx->jit_pool_head); + mutex_unlock(&kctx->jit_lock); +} + +void kbase_jit_backing_lost(struct kbase_va_region *reg) +{ + struct kbase_context *kctx = reg->kctx; + + /* + * JIT allocations will always be on a list, if the region + * is not on a list then it's not a JIT allocation. + */ + if (list_empty(®->jit_node)) + return; + + /* + * Freeing the allocation requires locks we might not be able + * to take now, so move the allocation to the free list and kick + * the worker which will do the freeing. + */ + mutex_lock(&kctx->jit_lock); + list_del_init(®->jit_node); + list_add(®->jit_node, &kctx->jit_destroy_head); + mutex_unlock(&kctx->jit_lock); + + schedule_work(&kctx->jit_work); +} + +bool kbase_jit_evict(struct kbase_context *kctx) +{ + struct kbase_va_region *reg = NULL; + + lockdep_assert_held(&kctx->reg_lock); + + /* Free the oldest allocation from the pool */ + mutex_lock(&kctx->jit_lock); + if (!list_empty(&kctx->jit_pool_head)) { + reg = list_entry(kctx->jit_pool_head.prev, + struct kbase_va_region, jit_node); + list_del(®->jit_node); + } + mutex_unlock(&kctx->jit_lock); + + if (reg) + kbase_mem_free_region(kctx, reg); + + return (reg != NULL); +} + +void kbase_jit_term(struct kbase_context *kctx) +{ + struct kbase_va_region *walker; + + /* Free all allocations for this context */ + + /* + * Flush the freeing of allocations whose backing has been freed + * (i.e. everything in jit_destroy_head). + */ + cancel_work_sync(&kctx->jit_work); + + kbase_gpu_vm_lock(kctx); + /* Free all allocations from the pool */ + while (!list_empty(&kctx->jit_pool_head)) { + walker = list_first_entry(&kctx->jit_pool_head, + struct kbase_va_region, jit_node); + list_del(&walker->jit_node); + kbase_mem_free_region(kctx, walker); + } + + /* Free all allocations from active list */ + while (!list_empty(&kctx->jit_active_head)) { + walker = list_first_entry(&kctx->jit_active_head, + struct kbase_va_region, jit_node); + list_del(&walker->jit_node); + kbase_mem_free_region(kctx, walker); + } + kbase_gpu_vm_unlock(kctx); +} + +static int kbase_jd_user_buf_map(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + long pinned_pages; + struct kbase_mem_phy_alloc *alloc; + struct page **pages; + phys_addr_t *pa; + long i; + int err = -ENOMEM; + unsigned long address; + struct task_struct *owner; + struct device *dev; + unsigned long offset; + unsigned long local_size; + + alloc = reg->gpu_alloc; + pa = kbase_get_gpu_phy_pages(reg); + address = alloc->imported.user_buf.address; + owner = alloc->imported.user_buf.owner; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); + + pages = alloc->imported.user_buf.pages; + + pinned_pages = get_user_pages(owner, owner->mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR, + 0, pages, NULL); + + if (pinned_pages <= 0) + return pinned_pages; + + if (pinned_pages != alloc->imported.user_buf.nr_pages) { + for (i = 0; i < pinned_pages; i++) + put_page(pages[i]); + return -ENOMEM; + } + + dev = kctx->kbdev->dev; + offset = address & ~PAGE_MASK; + local_size = alloc->imported.user_buf.size; + + for (i = 0; i < pinned_pages; i++) { + dma_addr_t dma_addr; + unsigned long min; + + min = MIN(PAGE_SIZE - offset, local_size); + dma_addr = dma_map_page(dev, pages[i], + offset, min, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, dma_addr)) + goto unwind; + + alloc->imported.user_buf.dma_addrs[i] = dma_addr; + pa[i] = page_to_phys(pages[i]); + + local_size -= min; + offset = 0; + } + + alloc->nents = pinned_pages; + + err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa, + kbase_reg_current_backed_size(reg), + reg->flags); + if (err == 0) + return 0; + + alloc->nents = 0; + /* fall down */ +unwind: + while (i--) { + dma_unmap_page(kctx->kbdev->dev, + alloc->imported.user_buf.dma_addrs[i], + PAGE_SIZE, DMA_BIDIRECTIONAL); + put_page(pages[i]); + pages[i] = NULL; + } + + return err; +} + +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc, bool writeable) +{ + long i; + struct page **pages; + unsigned long size = alloc->imported.user_buf.size; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); + pages = alloc->imported.user_buf.pages; + for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { + unsigned long local_size; + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + + local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); + dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, + DMA_BIDIRECTIONAL); + if (writeable) + set_page_dirty_lock(pages[i]); + put_page(pages[i]); + pages[i] = NULL; + + size -= local_size; + } + alloc->nents = 0; +} + +#ifdef CONFIG_DMA_SHARED_BUFFER +static int kbase_jd_umm_map(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + struct sg_table *sgt; + struct scatterlist *s; + int i; + phys_addr_t *pa; + int err; + size_t count = 0; + struct kbase_mem_phy_alloc *alloc; + + alloc = reg->gpu_alloc; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM); + KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt); + sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, + DMA_BIDIRECTIONAL); + + if (IS_ERR_OR_NULL(sgt)) + return -EINVAL; + + /* save for later */ + alloc->imported.umm.sgt = sgt; + + pa = kbase_get_gpu_phy_pages(reg); + KBASE_DEBUG_ASSERT(pa); + + for_each_sg(sgt->sgl, s, sgt->nents, i) { + int j; + size_t pages = PFN_UP(sg_dma_len(s)); + + WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1), + "sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n", + sg_dma_len(s)); + + WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), + "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", + (unsigned long long) sg_dma_address(s)); + + for (j = 0; (j < pages) && (count < reg->nr_pages); j++, + count++) + *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT); + WARN_ONCE(j < pages, + "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", + alloc->imported.umm.dma_buf->size); + } + + if (WARN_ONCE(count < reg->nr_pages, + "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", + alloc->imported.umm.dma_buf->size)) { + err = -EINVAL; + goto out; + } + + /* Update nents as we now have pages to map */ + alloc->nents = count; + + err = kbase_mmu_insert_pages(kctx, reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), + reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD); + +out: + if (err) { + dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, + alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + alloc->imported.umm.sgt = NULL; + } + + return err; +} + +static void kbase_jd_umm_unmap(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc) +{ + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(alloc); + KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment); + KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt); + dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, + alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + alloc->imported.umm.sgt = NULL; + alloc->nents = 0; +} +#endif /* CONFIG_DMA_SHARED_BUFFER */ + +#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) \ + || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS) +static void add_kds_resource(struct kds_resource *kds_res, + struct kds_resource **kds_resources, u32 *kds_res_count, + unsigned long *kds_access_bitmap, bool exclusive) +{ + u32 i; + + for (i = 0; i < *kds_res_count; i++) { + /* Duplicate resource, ignore */ + if (kds_resources[i] == kds_res) + return; + } + + kds_resources[*kds_res_count] = kds_res; + if (exclusive) + set_bit(*kds_res_count, kds_access_bitmap); + (*kds_res_count)++; +} +#endif + +struct kbase_mem_phy_alloc *kbase_map_external_resource( + struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm +#ifdef CONFIG_KDS + , u32 *kds_res_count, struct kds_resource **kds_resources, + unsigned long *kds_access_bitmap, bool exclusive +#endif + ) +{ + int err; + + /* decide what needs to happen for this resource */ + switch (reg->gpu_alloc->type) { + case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { + if (reg->gpu_alloc->imported.user_buf.owner->mm != locked_mm) + goto exit; + + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; + if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { + err = kbase_jd_user_buf_map(kctx, reg); + if (err) { + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; + goto exit; + } + } + } + break; + case BASE_MEM_IMPORT_TYPE_UMP: { +#if defined(CONFIG_KDS) && defined(CONFIG_UMP) + if (kds_res_count) { + struct kds_resource *kds_res; + + kds_res = ump_dd_kds_resource_get( + reg->gpu_alloc->imported.ump_handle); + if (kds_res) + add_kds_resource(kds_res, kds_resources, + kds_res_count, + kds_access_bitmap, exclusive); + } +#endif /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */ + break; + } +#ifdef CONFIG_DMA_SHARED_BUFFER + case BASE_MEM_IMPORT_TYPE_UMM: { +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + if (kds_res_count) { + struct kds_resource *kds_res; + + kds_res = get_dma_buf_kds_resource( + reg->gpu_alloc->imported.umm.dma_buf); + if (kds_res) + add_kds_resource(kds_res, kds_resources, + kds_res_count, + kds_access_bitmap, exclusive); + } +#endif + reg->gpu_alloc->imported.umm.current_mapping_usage_count++; + if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) { + err = kbase_jd_umm_map(kctx, reg); + if (err) { + reg->gpu_alloc->imported.umm.current_mapping_usage_count--; + goto exit; + } + } + break; + } +#endif + default: + goto exit; + } + + return kbase_mem_phy_alloc_get(reg->gpu_alloc); +exit: + return NULL; +} + +void kbase_unmap_external_resource(struct kbase_context *kctx, + struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) +{ + switch (alloc->type) { +#ifdef CONFIG_DMA_SHARED_BUFFER + case KBASE_MEM_TYPE_IMPORTED_UMM: { + alloc->imported.umm.current_mapping_usage_count--; + + if (0 == alloc->imported.umm.current_mapping_usage_count) { + if (reg && reg->gpu_alloc == alloc) + kbase_mmu_teardown_pages( + kctx, + reg->start_pfn, + kbase_reg_current_backed_size(reg)); + + kbase_jd_umm_unmap(kctx, alloc); + } + } + break; +#endif /* CONFIG_DMA_SHARED_BUFFER */ + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + alloc->imported.user_buf.current_mapping_usage_count--; + + if (0 == alloc->imported.user_buf.current_mapping_usage_count) { + bool writeable = true; + + if (reg && reg->gpu_alloc == alloc) + kbase_mmu_teardown_pages( + kctx, + reg->start_pfn, + kbase_reg_current_backed_size(reg)); + + if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0)) + writeable = false; + + kbase_jd_user_buf_unmap(kctx, alloc, writeable); + } + } + break; + default: + break; + } + kbase_mem_phy_alloc_put(alloc); +} + +struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( + struct kbase_context *kctx, u64 gpu_addr) +{ + struct kbase_ctx_ext_res_meta *meta = NULL; + struct kbase_ctx_ext_res_meta *walker; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Walk the per context externel resource metadata list for the + * metadata which matches the region which is being acquired. + */ + list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { + if (walker->gpu_addr == gpu_addr) { + meta = walker; + break; + } + } + + /* No metadata exists so create one. */ + if (!meta) { + struct kbase_va_region *reg; + + /* Find the region */ + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, gpu_addr); + if (NULL == reg || (reg->flags & KBASE_REG_FREE)) + goto failed; + + /* Allocate the metadata object */ + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + goto failed; + + /* + * Fill in the metadata object and acquire a reference + * for the physical resource. + */ + meta->alloc = kbase_map_external_resource(kctx, reg, NULL +#ifdef CONFIG_KDS + , NULL, NULL, + NULL, false +#endif + ); + + if (!meta->alloc) + goto fail_map; + + meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; + meta->refcount = 1; + + list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); + } else { + if (meta->refcount == UINT_MAX) + goto failed; + + meta->refcount++; + } + + return meta; + +fail_map: + kfree(meta); +failed: + return NULL; +} + +bool kbase_sticky_resource_release(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr, bool force) +{ + struct kbase_ctx_ext_res_meta *walker; + + lockdep_assert_held(&kctx->reg_lock); + + /* Search of the metadata if one isn't provided. */ + if (!meta) { + /* + * Walk the per context externel resource metadata list for the + * metadata which matches the region which is being released. + */ + list_for_each_entry(walker, &kctx->ext_res_meta_head, + ext_res_node) { + if (walker->gpu_addr == gpu_addr) { + meta = walker; + break; + } + } + } + + /* No metadata so just return. */ + if (!meta) + return false; + + meta->refcount--; + if ((meta->refcount == 0) || force) { + /* + * Last reference to the metadata, drop the physical memory + * reference and free the metadata. + */ + struct kbase_va_region *reg; + + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, + meta->gpu_addr); + + kbase_unmap_external_resource(kctx, reg, meta->alloc); + list_del(&meta->ext_res_node); + kfree(meta); + } + + return true; +} + +int kbase_sticky_resource_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->ext_res_meta_head); + + return 0; +} + +void kbase_sticky_resource_term(struct kbase_context *kctx) +{ + struct kbase_ctx_ext_res_meta *walker; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Free any sticky resources which haven't been unmapped. + * + * Note: + * We don't care about refcounts at this point as no future + * references to the meta data will be made. + * Region termination would find these if we didn't free them + * here, but it's more efficient if we do the clean up here. + */ + while (!list_empty(&kctx->ext_res_meta_head)) { + walker = list_first_entry(&kctx->ext_res_meta_head, + struct kbase_ctx_ext_res_meta, ext_res_node); + + kbase_sticky_resource_release(kctx, walker, 0, true); + } +} diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h index 1839cce..8f7629a 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,7 +30,9 @@ #endif #include - +#ifdef CONFIG_KDS +#include +#endif /* CONFIG_KDS */ #ifdef CONFIG_UMP #include #endif /* CONFIG_UMP */ @@ -41,6 +43,8 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include "mali_kbase_gator.h" #endif +/* Required for kbase_mem_evictable_unmake */ +#include "mali_kbase_mem_linux.h" /* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ @@ -75,6 +79,7 @@ enum kbase_memory_type { KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_IMPORTED_UMP, KBASE_MEM_TYPE_IMPORTED_UMM, + KBASE_MEM_TYPE_IMPORTED_USER_BUF, KBASE_MEM_TYPE_ALIAS, KBASE_MEM_TYPE_TB, KBASE_MEM_TYPE_RAW @@ -111,6 +116,16 @@ struct kbase_mem_phy_alloc { /* kbase_cpu_mappings */ struct list_head mappings; + /* Node used to store this allocation on the eviction list */ + struct list_head evict_node; + /* Physical backing size when the pages where evicted */ + size_t evicted; + /* + * Back reference to the region structure which created this + * allocation, or NULL if it has been freed. + */ + struct kbase_va_region *reg; + /* type of buffer */ enum kbase_memory_type type; @@ -136,6 +151,15 @@ struct kbase_mem_phy_alloc { } alias; /* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */ struct kbase_context *kctx; + struct { + unsigned long address; + unsigned long size; + unsigned long nr_pages; + struct page **pages; + unsigned int current_mapping_usage_count; + struct task_struct *owner; + dma_addr_t *dma_addrs; + } user_buf; } imported; }; @@ -232,6 +256,8 @@ struct kbase_va_region { #define KBASE_REG_SECURE (1ul << 19) +#define KBASE_REG_DONT_NEED (1ul << 20) + #define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) /* only used with 32-bit clients */ @@ -266,6 +292,8 @@ struct kbase_va_region { /* non-NULL if this memory object is a kds_resource */ struct kds_resource *kds_res; + /* List head used to store the region in the JIT allocation pool */ + struct list_head jit_node; }; /* Common functions */ @@ -308,12 +336,22 @@ static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg) static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type) { struct kbase_mem_phy_alloc *alloc; - const size_t alloc_size = - sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages; + size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages; + size_t per_page_size = sizeof(*alloc->pages); + + /* Imported pages may have page private data already in use */ + if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { + alloc_size += nr_pages * + sizeof(*alloc->imported.user_buf.dma_addrs); + per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs); + } - /* Prevent nr_pages*sizeof + sizeof(*alloc) from wrapping around. */ + /* + * Prevent nr_pages*per_page_size + sizeof(*alloc) from + * wrapping around. + */ if (nr_pages > ((((size_t) -1) - sizeof(*alloc)) - / sizeof(*alloc->pages))) + / per_page_size)) return ERR_PTR(-ENOMEM); /* Allocate based on the size to reduce internal fragmentation of vmem */ @@ -336,6 +374,10 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, en INIT_LIST_HEAD(&alloc->mappings); alloc->type = type; + if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) + alloc->imported.user_buf.dma_addrs = + (void *) (alloc->pages + nr_pages); + return alloc; } @@ -354,14 +396,17 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, else if (!reg->cpu_alloc) return -ENOMEM; reg->cpu_alloc->imported.kctx = kctx; + INIT_LIST_HEAD(®->cpu_alloc->evict_node); if (kctx->infinite_cache_active && (reg->flags & KBASE_REG_CPU_CACHED)) { reg->gpu_alloc = kbase_alloc_create(reg->nr_pages, KBASE_MEM_TYPE_NATIVE); reg->gpu_alloc->imported.kctx = kctx; + INIT_LIST_HEAD(®->gpu_alloc->evict_node); } else { reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); } + INIT_LIST_HEAD(®->jit_node); reg->flags &= ~KBASE_REG_FREE; return 0; } @@ -481,11 +526,13 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, * @pages: Pointer to array holding the physical addresses of the pages to * free. * @dirty: Whether any pages may be dirty in the cache. + * @reclaimed: Whether the pages where reclaimable and thus should bypass + * the pool and go straight to the kernel. * * Like kbase_mem_pool_free() but optimized for freeing many pages. */ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, - phys_addr_t *pages, bool dirty); + phys_addr_t *pages, bool dirty, bool reclaimed); /** * kbase_mem_pool_size - Get number of free pages in memory pool @@ -537,6 +584,7 @@ size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); int kbase_region_tracker_init(struct kbase_context *kctx); +int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages); void kbase_region_tracker_term(struct kbase_context *kctx); struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr); @@ -748,7 +796,7 @@ static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr) SetPagePrivate(p); if (sizeof(dma_addr_t) > sizeof(p->private)) { /* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the - * private filed stays the same. So we have to be clever and + * private field stays the same. So we have to be clever and * use the fact that we only store DMA addresses of whole pages, * so the low bits should be zero */ KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1))); @@ -830,4 +878,134 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size, enum dma_data_direction dir); +/** + * kbase_jit_debugfs_add - Add per context debugfs entry for JIT. + * @kctx: kbase context + */ +void kbase_jit_debugfs_add(struct kbase_context *kctx); + +/** + * kbase_jit_init - Initialize the JIT memory pool management + * @kctx: kbase context + * + * Returns zero on success or negative error number on failure. + */ +int kbase_jit_init(struct kbase_context *kctx); + +/** + * kbase_jit_allocate - Allocate JIT memory + * @kctx: kbase context + * @info: JIT allocation information + * + * Return: JIT allocation on success or NULL on failure. + */ +struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + struct base_jit_alloc_info *info); + +/** + * kbase_jit_free - Free a JIT allocation + * @kctx: kbase context + * @reg: JIT allocation + * + * Frees a JIT allocation and places it into the free pool for later reuse. + */ +void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg); + +/** + * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing + * @reg: JIT allocation + */ +void kbase_jit_backing_lost(struct kbase_va_region *reg); + +/** + * kbase_jit_evict - Evict a JIT allocation from the pool + * @kctx: kbase context + * + * Evict the least recently used JIT allocation from the pool. This can be + * required if normal VA allocations are failing due to VA exhaustion. + * + * Return: True if a JIT allocation was freed, false otherwise. + */ +bool kbase_jit_evict(struct kbase_context *kctx); + +/** + * kbase_jit_term - Terminate the JIT memory pool management + * @kctx: kbase context + */ +void kbase_jit_term(struct kbase_context *kctx); + +/** + * kbase_map_external_resource - Map an external resource to the GPU. + * @kctx: kbase context. + * @reg: The region to map. + * @locked_mm: The mm_struct which has been locked for this operation. + * @kds_res_count: The number of KDS resources. + * @kds_resources: Array of KDS resources. + * @kds_access_bitmap: Access bitmap for KDS. + * @exclusive: If the KDS resource requires exclusive access. + * + * Return: The physical allocation which backs the region on success or NULL + * on failure. + */ +struct kbase_mem_phy_alloc *kbase_map_external_resource( + struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm +#ifdef CONFIG_KDS + , u32 *kds_res_count, struct kds_resource **kds_resources, + unsigned long *kds_access_bitmap, bool exclusive +#endif + ); + +/** + * kbase_unmap_external_resource - Unmap an external resource from the GPU. + * @kctx: kbase context. + * @reg: The region to unmap or NULL if it has already been released. + * @alloc: The physical allocation being unmapped. + */ +void kbase_unmap_external_resource(struct kbase_context *kctx, + struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); + +/** + * kbase_sticky_resource_init - Initialize sticky resource management. + * @kctx: kbase context + * + * Returns zero on success or negative error number on failure. + */ +int kbase_sticky_resource_init(struct kbase_context *kctx); + +/** + * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource. + * @kctx: kbase context. + * @gpu_addr: The GPU address of the external resource. + * + * Return: The metadata object which represents the binding between the + * external resource and the kbase context on success or NULL on failure. + */ +struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( + struct kbase_context *kctx, u64 gpu_addr); + +/** + * kbase_sticky_resource_release - Release a reference on a sticky resource. + * @kctx: kbase context. + * @meta: Binding metadata. + * @gpu_addr: GPU address of the external resource. + * @force: If the release is being forced. + * + * If meta is NULL then gpu_addr will be used to scan the metadata list and + * find the matching metadata (if any), otherwise the provided meta will be + * used and gpu_addr will be ignored. + * + * If force is true then the refcount in the metadata is ignored and the + * resource will be forced freed. + * + * Return: True if the release found the metadata and the reference was dropped. + */ +bool kbase_sticky_resource_release(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr, bool force); + +/** + * kbase_sticky_resource_term - Terminate sticky resource management. + * @kctx: kbase context + */ +void kbase_sticky_resource_term(struct kbase_context *kctx); #endif /* _KBASE_MEM_H_ */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c index 3e4481a..0abe0e6 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,24 +26,65 @@ #include #include #include +#include #include #include #include #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) #include -#endif +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) */ #ifdef CONFIG_DMA_SHARED_BUFFER #include #endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ +#include #include #include #include #include +#if defined(CONFIG_MALI_MIPE_ENABLED) +#include +#endif + static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); static const struct vm_operations_struct kbase_vm_ops; +/** + * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation + * @kctx: Context the region belongs to + * @reg: The GPU region + * @new_pages: The number of pages after the shrink + * @old_pages: The number of pages before the shrink + * + * Return: 0 on success, -errno on error. + * + * Shrink (or completely remove) all CPU mappings which reference the shrunk + * part of the allocation. + * + * Note: Caller must be holding the processes mmap_sem lock. + */ +static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + +/** + * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation + * @kctx: Context the region belongs to + * @reg: The GPU region or NULL if there isn't one + * @new_pages: The number of pages after the shrink + * @old_pages: The number of pages before the shrink + * + * Return: 0 on success, negative -errno on error + * + * Unmap the shrunk pages from the GPU mapping. Note that the size of the region + * itself is unmodified as we still need to reserve the VA, only the page tables + * will be modified by this function. + */ +static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment) { int zone; @@ -76,9 +117,6 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages #if defined(CONFIG_64BIT) if (kctx->is_compat) cpu_va_bits = 32; - else - /* force SAME_VA if a 64-bit client */ - *flags |= BASE_MEM_SAME_VA; #endif if (!kbase_check_alloc_flags(*flags)) { @@ -89,12 +127,12 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages } if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && - kctx->kbdev->system_coherency != COHERENCY_ACE) { + !kbase_device_is_cpu_coherent(kctx->kbdev)) { dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable"); goto bad_flags; } if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && - kctx->kbdev->system_coherency != COHERENCY_ACE) { + !kbase_device_is_cpu_coherent(kctx->kbdev)) { /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ *flags &= ~BASE_MEM_COHERENT_SYSTEM; } @@ -141,20 +179,29 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages /* mmap needed to setup VA? */ if (*flags & BASE_MEM_SAME_VA) { + unsigned long prot = PROT_NONE; + unsigned long va_size = va_pages << PAGE_SHIFT; + unsigned long va_map = va_size; + unsigned long cookie, cookie_nr; + unsigned long cpu_addr; + /* Bind to a cookie */ if (!kctx->cookies) { dev_err(dev, "No cookies available for allocation!"); + kbase_gpu_vm_unlock(kctx); goto no_cookie; } /* return a cookie */ - *gpu_va = __ffs(kctx->cookies); - kctx->cookies &= ~(1UL << *gpu_va); - BUG_ON(kctx->pending_regions[*gpu_va]); - kctx->pending_regions[*gpu_va] = reg; + cookie_nr = __ffs(kctx->cookies); + kctx->cookies &= ~(1UL << cookie_nr); + BUG_ON(kctx->pending_regions[cookie_nr]); + kctx->pending_regions[cookie_nr] = reg; + + kbase_gpu_vm_unlock(kctx); /* relocate to correct base */ - *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); - *gpu_va <<= PAGE_SHIFT; + cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE); + cookie <<= PAGE_SHIFT; /* See if we must align memory due to GPU PC bits vs CPU VA */ if ((*flags & BASE_MEM_PROT_GPU_EX) && @@ -162,21 +209,101 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages *va_alignment = gpu_pc_bits; reg->flags |= KBASE_REG_ALIGNED; } + + /* + * Pre-10.1 UKU userland calls mmap for us so return the + * unaligned address and skip the map. + */ + if (kctx->api_version < KBASE_API_VERSION(10, 1)) { + *gpu_va = (u64) cookie; + return reg; + } + + /* + * GPUCORE-2190: + * + * We still need to return alignment for old userspace. + */ + if (*va_alignment) + va_map += 3 * (1UL << *va_alignment); + + if (*flags & BASE_MEM_PROT_CPU_RD) + prot |= PROT_READ; + if (*flags & BASE_MEM_PROT_CPU_WR) + prot |= PROT_WRITE; + + cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot, + MAP_SHARED, cookie); + + if (IS_ERR_VALUE(cpu_addr)) { + kctx->pending_regions[cookie_nr] = NULL; + kctx->cookies |= (1UL << cookie_nr); + goto no_mmap; + } + + /* + * If we had to allocate extra VA space to force the + * alignment release it. + */ + if (*va_alignment) { + unsigned long alignment = 1UL << *va_alignment; + unsigned long align_mask = alignment - 1; + unsigned long addr; + unsigned long addr_end; + unsigned long aligned_addr; + unsigned long aligned_addr_end; + + addr = cpu_addr; + addr_end = addr + va_map; + + aligned_addr = (addr + align_mask) & + ~((u64) align_mask); + aligned_addr_end = aligned_addr + va_size; + + if ((aligned_addr_end & BASE_MEM_MASK_4GB) == 0) { + /* + * Can't end at 4GB boundary on some GPUs as + * it will halt the shader. + */ + aligned_addr += 2 * alignment; + aligned_addr_end += 2 * alignment; + } else if ((aligned_addr & BASE_MEM_MASK_4GB) == 0) { + /* + * Can't start at 4GB boundary on some GPUs as + * it will halt the shader. + */ + aligned_addr += alignment; + aligned_addr_end += alignment; + } + + /* anything to chop off at the start? */ + if (addr != aligned_addr) + vm_munmap(addr, aligned_addr - addr); + + /* anything at the end? */ + if (addr_end != aligned_addr_end) + vm_munmap(aligned_addr_end, + addr_end - aligned_addr_end); + + *gpu_va = (u64) aligned_addr; + } else + *gpu_va = (u64) cpu_addr; } else /* we control the VA */ { if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) { dev_warn(dev, "Failed to map memory on GPU"); + kbase_gpu_vm_unlock(kctx); goto no_mmap; } /* return real GPU VA */ *gpu_va = reg->start_pfn << PAGE_SHIFT; + + kbase_gpu_vm_unlock(kctx); } - kbase_gpu_vm_unlock(kctx); return reg; no_mmap: no_cookie: - kbase_gpu_vm_unlock(kctx); no_mem: kbase_mem_phy_alloc_put(reg->cpu_alloc); kbase_mem_phy_alloc_put(reg->gpu_alloc); @@ -254,12 +381,292 @@ out_unlock: return ret; } +/** + * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the + * Ephemeral memory eviction list. + * @s: Shrinker + * @sc: Shrinker control + * + * Return: Number of pages which can be freed. + */ +static +unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_context *kctx; + struct kbase_mem_phy_alloc *alloc; + unsigned long pages = 0; + + kctx = container_of(s, struct kbase_context, reclaim); + + mutex_lock(&kctx->evict_lock); + + list_for_each_entry(alloc, &kctx->evict_list, evict_node) + pages += alloc->nents; + + mutex_unlock(&kctx->evict_lock); + return pages; +} + +/** + * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction + * list for pages and try to reclaim them. + * @s: Shrinker + * @sc: Shrinker control + * + * Return: Number of pages freed (can be less then requested) or -1 if the + * shrinker failed to free pages in its pool. + * + * Note: + * This function accesses region structures without taking the region lock, + * this is required as the OOM killer can call the shrinker after the region + * lock has already been held. + * This is safe as we can guarantee that a region on the eviction list will + * not be freed (kbase_mem_free_region removes the allocation from the list + * before destroying it), or modified by other parts of the driver. + * The eviction list itself is guarded by the eviction lock and the MMU updates + * are protected by their own lock. + */ +static +unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_context *kctx; + struct kbase_mem_phy_alloc *alloc; + struct kbase_mem_phy_alloc *tmp; + unsigned long freed = 0; + + kctx = container_of(s, struct kbase_context, reclaim); + mutex_lock(&kctx->evict_lock); + + list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { + int err; + + err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg, + 0, alloc->nents); + if (err != 0) { + /* + * Failed to remove GPU mapping, tell the shrinker + * to stop trying to shrink our slab even though we + * have pages in it. + */ + freed = -1; + goto out_unlock; + } + + /* + * Update alloc->evicted before freeing the backing so the + * helper can determine that it needs to bypass the accounting + * and memory pool. + */ + alloc->evicted = alloc->nents; + + kbase_free_phy_pages_helper(alloc, alloc->evicted); + freed += alloc->evicted; + list_del_init(&alloc->evict_node); + + /* + * Inform the JIT allocator this region has lost backing + * as it might need to free the allocation. + */ + kbase_jit_backing_lost(alloc->reg); + + /* Enough pages have been freed so stop now */ + if (freed > sc->nr_to_scan) + break; + } +out_unlock: + mutex_unlock(&kctx->evict_lock); + + return freed; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s, + struct shrink_control *sc) +{ + if (sc->nr_to_scan == 0) + return kbase_mem_evictable_reclaim_count_objects(s, sc); + + return kbase_mem_evictable_reclaim_scan_objects(s, sc); +} +#endif + +int kbase_mem_evictable_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->evict_list); + mutex_init(&kctx->evict_lock); + + /* Register shrinker */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) + kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink; +#else + kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; + kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; +#endif + kctx->reclaim.seeks = DEFAULT_SEEKS; + /* Kernel versions prior to 3.1 : + * struct shrinker does not define batch */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) + kctx->reclaim.batch = 0; +#endif + register_shrinker(&kctx->reclaim); + return 0; +} + +void kbase_mem_evictable_deinit(struct kbase_context *kctx) +{ + unregister_shrinker(&kctx->reclaim); +} + +/** + * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. + * @alloc: The physical allocation + */ +static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_context *kctx = alloc->imported.kctx; + int __maybe_unused new_page_count; + int i; + + for (i = 0; i < alloc->nents; i++) { + struct page *p = phys_to_page(alloc->pages[i]); + + zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE); + } + + kbase_process_page_usage_dec(kctx, alloc->nents); + new_page_count = kbase_atomic_sub_pages(alloc->nents, + &kctx->used_pages); + kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); + +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_pagesalloc( + (u32)kctx->id, + (u64)new_page_count); +#endif +} + +/** + * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable. + * @alloc: The physical allocation + */ +static +void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_context *kctx = alloc->imported.kctx; + int __maybe_unused new_page_count; + int i; + + new_page_count = kbase_atomic_add_pages(alloc->nents, + &kctx->used_pages); + kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); + + /* Increase mm counters so that the allocation is accounted for + * against the process and thus is visible to the OOM killer, + * then remove it from the reclaimable accounting. */ + kbase_process_page_usage_inc(kctx, alloc->nents); + + for (i = 0; i < alloc->nents; i++) { + struct page *p = phys_to_page(alloc->pages[i]); + + zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE); + } + +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_pagesalloc( + (u32)kctx->id, + (u64)new_page_count); +#endif +} + +int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) +{ + struct kbase_context *kctx = gpu_alloc->imported.kctx; + int err; + + lockdep_assert_held(&kctx->reg_lock); + + /* This alloction can't already be on a list. */ + WARN_ON(!list_empty(&gpu_alloc->evict_node)); + + /* + * Try to shrink the CPU mappings as required, if we fail then + * fail the process of making this allocation evictable. + */ + err = kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, + 0, gpu_alloc->nents); + if (err) + return -EINVAL; + + /* + * Add the allocation to the eviction list, after this point the shrink + * can reclaim it. + */ + mutex_lock(&kctx->evict_lock); + list_add(&gpu_alloc->evict_node, &kctx->evict_list); + mutex_unlock(&kctx->evict_lock); + kbase_mem_evictable_mark_reclaim(gpu_alloc); + + gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED; + return 0; +} + +bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) +{ + struct kbase_context *kctx = gpu_alloc->imported.kctx; + int err = 0; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * First remove the allocation from the eviction list as it's no + * longer eligible for eviction. + */ + mutex_lock(&kctx->evict_lock); + list_del_init(&gpu_alloc->evict_node); + mutex_unlock(&kctx->evict_lock); + + if (gpu_alloc->evicted == 0) { + /* + * The backing is still present, update the VM stats as it's + * in use again. + */ + kbase_mem_evictable_unmark_reclaim(gpu_alloc); + } else { + /* If the region is still alive ... */ + if (gpu_alloc->reg) { + /* ... allocate replacement backing ... */ + err = kbase_alloc_phy_pages_helper(gpu_alloc, + gpu_alloc->evicted); + + /* + * ... and grow the mapping back to its + * pre-eviction size. + */ + if (!err) + err = kbase_mem_grow_gpu_mapping(kctx, + gpu_alloc->reg, + gpu_alloc->evicted, 0); + + gpu_alloc->evicted = 0; + } + } + + /* If the region is still alive remove the DONT_NEED attribute. */ + if (gpu_alloc->reg) + gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED; + + return (err == 0); +} + int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask) { struct kbase_va_region *reg; int ret = -EINVAL; unsigned int real_flags = 0; unsigned int prev_flags = 0; + bool prev_needed, new_needed; KBASE_DEBUG_ASSERT(kctx); @@ -270,11 +677,11 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in flags &= mask; /* check for only supported flags */ - if (flags & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL)) + if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE)) goto out; /* mask covers bits we don't support? */ - if (mask & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL)) + if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE)) goto out; /* convert flags */ @@ -284,6 +691,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in real_flags |= KBASE_REG_SHARE_IN; /* now we can lock down the context, and find the region */ + down_write(¤t->mm->mmap_sem); kbase_gpu_vm_lock(kctx); /* Validate the region */ @@ -291,6 +699,28 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in if (!reg || (reg->flags & KBASE_REG_FREE)) goto out_unlock; + /* Is the region being transitioning between not needed and needed? */ + prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; + new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; + if (prev_needed != new_needed) { + /* Aliased allocations can't be made ephemeral */ + if (atomic_read(®->cpu_alloc->gpu_mappings) > 1) + goto out_unlock; + + if (new_needed) { + /* Only native allocations can be marked not needed */ + if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { + ret = -EINVAL; + goto out_unlock; + } + ret = kbase_mem_evictable_make(reg->gpu_alloc); + if (ret) + goto out_unlock; + } else { + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + } + /* limit to imported memory */ if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) && (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) @@ -333,6 +763,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in out_unlock: kbase_gpu_vm_unlock(kctx); + up_write(¤t->mm->mmap_sem); out: return ret; } @@ -352,10 +783,6 @@ static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, um ump_alloc_flags cpu_flags; ump_alloc_flags gpu_flags; - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(va_pages); - KBASE_DEBUG_ASSERT(flags); - if (*flags & BASE_MEM_SECURE) goto bad_flags; @@ -554,6 +981,106 @@ no_buf: } #endif /* CONFIG_DMA_SHARED_BUFFER */ + +static struct kbase_va_region *kbase_mem_from_user_buffer( + struct kbase_context *kctx, unsigned long address, + unsigned long size, u64 *va_pages, u64 *flags) +{ + struct kbase_va_region *reg; + long faulted_pages; + int zone = KBASE_REG_ZONE_CUSTOM_VA; + + *va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) - + PFN_DOWN(address); + if (!*va_pages) + goto bad_size; + + if (*va_pages > (UINT64_MAX / PAGE_SIZE)) + /* 64-bit address range is the max */ + goto bad_size; + + /* SAME_VA generally not supported with imported memory (no known use cases) */ + *flags &= ~BASE_MEM_SAME_VA; + +#ifdef CONFIG_64BIT + if (!kctx->is_compat) { + /* 64-bit tasks must MMAP anyway, but not expose this address to + * clients */ + *flags |= BASE_MEM_NEED_MMAP; + zone = KBASE_REG_ZONE_SAME_VA; + } +#endif + reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone); + + if (!reg) + goto no_region; + + reg->gpu_alloc = kbase_alloc_create(*va_pages, + KBASE_MEM_TYPE_IMPORTED_USER_BUF); + if (IS_ERR_OR_NULL(reg->gpu_alloc)) + goto no_alloc_obj; + + reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + reg->flags &= ~KBASE_REG_FREE; + reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */ + reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */ + + if (*flags & BASE_MEM_PROT_CPU_WR) + reg->flags |= KBASE_REG_CPU_WR; + + if (*flags & BASE_MEM_PROT_CPU_RD) + reg->flags |= KBASE_REG_CPU_RD; + + if (*flags & BASE_MEM_PROT_GPU_WR) + reg->flags |= KBASE_REG_GPU_WR; + + if (*flags & BASE_MEM_PROT_GPU_RD) + reg->flags |= KBASE_REG_GPU_RD; + + down_read(¤t->mm->mmap_sem); + + /* A sanity check that get_user_pages will work on the memory */ + /* (so the initial import fails on weird memory regions rather than */ + /* the job failing when we try to handle the external resources). */ + /* It doesn't take a reference to the pages (because the page list is NULL). */ + /* We can't really store the page list because that would involve */ + /* keeping the pages pinned - instead we pin/unpin around the job */ + /* (as part of the external resources handling code) */ + faulted_pages = get_user_pages(current, current->mm, address, *va_pages, + reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL); + up_read(¤t->mm->mmap_sem); + + if (faulted_pages != *va_pages) + goto fault_mismatch; + + reg->gpu_alloc->imported.user_buf.size = size; + reg->gpu_alloc->imported.user_buf.address = address; + reg->gpu_alloc->imported.user_buf.nr_pages = faulted_pages; + reg->gpu_alloc->imported.user_buf.pages = kmalloc_array(faulted_pages, + sizeof(struct page *), GFP_KERNEL); + reg->gpu_alloc->imported.user_buf.owner = current; + + if (!reg->gpu_alloc->imported.user_buf.pages) + goto no_page_array; + + reg->gpu_alloc->nents = 0; + reg->extent = 0; + + return reg; + +no_page_array: +fault_mismatch: + kbase_mem_phy_alloc_put(reg->gpu_alloc); +no_alloc_obj: + kfree(reg); +no_region: +bad_size: + return NULL; + +} + + u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages) @@ -632,8 +1159,9 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* validate and add src handles */ for (i = 0; i < nents; i++) { - if (ai[i].handle < BASE_MEM_FIRST_FREE_ADDRESS) { - if (ai[i].handle != BASE_MEM_WRITE_ALLOC_PAGES_HANDLE) + if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) { + if (ai[i].handle.basep.handle != + BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE) goto bad_handle; /* unsupported magic handle */ if (!ai[i].length) goto bad_handle; /* must be > 0 */ @@ -645,13 +1173,17 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, struct kbase_va_region *aliasing_reg; struct kbase_mem_phy_alloc *alloc; - aliasing_reg = kbase_region_tracker_find_region_base_address(kctx, (ai[i].handle >> PAGE_SHIFT) << PAGE_SHIFT); + aliasing_reg = kbase_region_tracker_find_region_base_address( + kctx, + (ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT); /* validate found region */ if (!aliasing_reg) goto bad_handle; /* Not found */ if (aliasing_reg->flags & KBASE_REG_FREE) goto bad_handle; /* Free region */ + if (aliasing_reg->flags & KBASE_REG_DONT_NEED) + goto bad_handle; /* Ephemeral region */ if (!aliasing_reg->gpu_alloc) goto bad_handle; /* No alloc */ if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) @@ -736,7 +1268,9 @@ bad_flags: return 0; } -int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, int handle, u64 *gpu_va, u64 *va_pages, u64 *flags) +int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, + void __user *phandle, u64 *gpu_va, u64 *va_pages, + u64 *flags) { struct kbase_va_region *reg; @@ -759,19 +1293,53 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, switch (type) { #ifdef CONFIG_UMP - case BASE_MEM_IMPORT_TYPE_UMP: - reg = kbase_mem_from_ump(kctx, (ump_secure_id)handle, va_pages, flags); - break; + case BASE_MEM_IMPORT_TYPE_UMP: { + ump_secure_id id; + + if (get_user(id, (ump_secure_id __user *)phandle)) + reg = NULL; + else + reg = kbase_mem_from_ump(kctx, id, va_pages, flags); + } + break; #endif /* CONFIG_UMP */ #ifdef CONFIG_DMA_SHARED_BUFFER - case BASE_MEM_IMPORT_TYPE_UMM: - reg = kbase_mem_from_umm(kctx, handle, va_pages, flags); - break; + case BASE_MEM_IMPORT_TYPE_UMM: { + int fd; + + if (get_user(fd, (int __user *)phandle)) + reg = NULL; + else + reg = kbase_mem_from_umm(kctx, fd, va_pages, flags); + } + break; #endif /* CONFIG_DMA_SHARED_BUFFER */ - default: + case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { + struct base_mem_import_user_buffer user_buffer; + void __user *uptr; + + if (copy_from_user(&user_buffer, phandle, + sizeof(user_buffer))) { + reg = NULL; + } else { +#ifdef CONFIG_COMPAT + if (kctx->is_compat) + uptr = compat_ptr(user_buffer.ptr.compat_value); + else +#endif + uptr = user_buffer.ptr.value; + + reg = kbase_mem_from_user_buffer(kctx, + (unsigned long)uptr, user_buffer.length, + va_pages, flags); + } + break; + } + default: { reg = NULL; break; } + } if (!reg) goto no_reg; @@ -837,6 +1405,7 @@ static int zap_range_nolock(struct mm_struct *mm, int err = -EINVAL; /* in case end < start */ while (start < end) { + unsigned long local_start; unsigned long local_end; vma = find_vma_intersection(mm, start, end); @@ -847,12 +1416,17 @@ static int zap_range_nolock(struct mm_struct *mm, if (vma->vm_ops != vm_ops) goto try_next; + local_start = vma->vm_start; + + if (start > local_start) + local_start = start; + local_end = vma->vm_end; if (end < local_end) local_end = end; - err = zap_vma_ptes(vma, start, local_end - start); + err = zap_vma_ptes(vma, local_start, local_end - local_start); if (unlikely(err)) break; @@ -864,19 +1438,110 @@ try_next: return err; } +int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages) +{ + phys_addr_t *phy_pages; + u64 delta = new_pages - old_pages; + int ret = 0; + + lockdep_assert_held(&kctx->reg_lock); + + /* Map the new pages into the GPU */ + phy_pages = kbase_get_gpu_phy_pages(reg); + ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages, + phy_pages + old_pages, delta, reg->flags); + + return ret; +} + +static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages) +{ + struct kbase_mem_phy_alloc *cpu_alloc = reg->cpu_alloc; + struct kbase_cpu_mapping *mapping; + int err; + + lockdep_assert_held(&kctx->process_mm->mmap_sem); + + list_for_each_entry(mapping, &cpu_alloc->mappings, mappings_list) { + unsigned long mapping_size; + + mapping_size = (mapping->vm_end - mapping->vm_start) + >> PAGE_SHIFT; + + /* is this mapping affected ?*/ + if ((mapping->page_off + mapping_size) > new_pages) { + unsigned long first_bad = 0; + + if (new_pages > mapping->page_off) + first_bad = new_pages - mapping->page_off; + + err = zap_range_nolock(current->mm, + &kbase_vm_ops, + mapping->vm_start + + (first_bad << PAGE_SHIFT), + mapping->vm_end); + + WARN(err, + "Failed to zap VA range (0x%lx - 0x%lx);\n", + mapping->vm_start + + (first_bad << PAGE_SHIFT), + mapping->vm_end + ); + + /* The zap failed, give up and exit */ + if (err) + goto failed; + } + } + + return 0; + +failed: + return err; +} + +static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages) +{ + u64 delta = old_pages - new_pages; + int ret = 0; + + ret = kbase_mmu_teardown_pages(kctx, + reg->start_pfn + new_pages, delta); + if (ret) + return ret; + +#ifndef CONFIG_MALI_NO_MALI + if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) { + /* + * Wait for GPU to flush write buffer before freeing + * physical pages. + */ + kbase_wait_write_flush(kctx); + } +#endif + + return ret; +} + int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason) { u64 old_pages; u64 delta; int res = -EINVAL; struct kbase_va_region *reg; - phys_addr_t *phy_pages; + bool read_locked = false; KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(failure_reason); KBASE_DEBUG_ASSERT(gpu_addr != 0); - down_read(¤t->mm->mmap_sem); + down_write(¤t->mm->mmap_sem); kbase_gpu_vm_lock(kctx); /* Validate the region */ @@ -910,6 +1575,11 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE; goto out_unlock; } + /* can't grow regions which are ephemeral */ + if (reg->flags & BASE_MEM_DONT_NEED) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE; + goto out_unlock; + } if (new_pages == reg->gpu_alloc->nents) { /* no change */ @@ -917,14 +1587,17 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en goto out_unlock; } - phy_pages = kbase_get_gpu_phy_pages(reg); old_pages = kbase_reg_current_backed_size(reg); - if (new_pages > old_pages) { - /* growing */ - int err; - delta = new_pages - old_pages; + + /* + * No update to the mm so downgrade the writer lock to a read + * lock so other readers aren't blocked after this point. + */ + downgrade_write(¤t->mm->mmap_sem); + read_locked = true; + /* Allocate some more pages */ if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) { *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; @@ -939,9 +1612,15 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en goto out_unlock; } } - err = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages, - phy_pages + old_pages, delta, reg->flags); - if (err) { + + /* No update required for CPU mappings, that's done on fault. */ + + /* Update GPU mapping. */ + res = kbase_mem_grow_gpu_mapping(kctx, reg, + new_pages, old_pages); + + /* On error free the new pages */ + if (res) { kbase_free_phy_pages_helper(reg->cpu_alloc, delta); if (reg->cpu_alloc != reg->gpu_alloc) kbase_free_phy_pages_helper(reg->gpu_alloc, @@ -950,60 +1629,35 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en goto out_unlock; } } else { - /* shrinking */ - struct kbase_cpu_mapping *mapping; - int err; + delta = old_pages - new_pages; - /* first, unmap from any mappings affected */ - list_for_each_entry(mapping, ®->cpu_alloc->mappings, mappings_list) { - unsigned long mapping_size = (mapping->vm_end - mapping->vm_start) >> PAGE_SHIFT; - - /* is this mapping affected ?*/ - if ((mapping->page_off + mapping_size) > new_pages) { - unsigned long first_bad = 0; - int zap_res; - - if (new_pages > mapping->page_off) - first_bad = new_pages - mapping->page_off; - - zap_res = zap_range_nolock(current->mm, - &kbase_vm_ops, - mapping->vm_start + - (first_bad << PAGE_SHIFT), - mapping->vm_end); - WARN(zap_res, - "Failed to zap VA range (0x%lx - 0x%lx);\n", - mapping->vm_start + - (first_bad << PAGE_SHIFT), - mapping->vm_end - ); - } + /* Update all CPU mapping(s) */ + res = kbase_mem_shrink_cpu_mapping(kctx, reg, + new_pages, old_pages); + if (res) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; + goto out_unlock; } - /* Free some pages */ - delta = old_pages - new_pages; - err = kbase_mmu_teardown_pages(kctx, reg->start_pfn + new_pages, - delta); - if (err) { + /* Update the GPU mapping */ + res = kbase_mem_shrink_gpu_mapping(kctx, reg, + new_pages, old_pages); + if (res) { *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; goto out_unlock; } -#ifndef CONFIG_MALI_NO_MALI - if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) { - /* Wait for GPU to flush write buffer before freeing physical pages */ - kbase_wait_write_flush(kctx); - } -#endif + kbase_free_phy_pages_helper(reg->cpu_alloc, delta); if (reg->cpu_alloc != reg->gpu_alloc) kbase_free_phy_pages_helper(reg->gpu_alloc, delta); } - res = 0; - out_unlock: kbase_gpu_vm_unlock(kctx); - up_read(¤t->mm->mmap_sem); + if (read_locked) + up_read(¤t->mm->mmap_sem); + else + up_write(¤t->mm->mmap_sem); return res; } @@ -1075,6 +1729,10 @@ static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (map->page_off + rel_pgoff >= map->alloc->nents) goto locked_bad_fault; + /* Fault on access to DONT_NEED regions */ + if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED)) + goto locked_bad_fault; + /* insert all valid pages from the fault location */ for (i = rel_pgoff; i < MIN((vma->vm_end - vma->vm_start) >> PAGE_SHIFT, @@ -1222,7 +1880,11 @@ static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_st goto out; } - kbase_device_trace_buffer_install(kctx, tb, size); + err = kbase_device_trace_buffer_install(kctx, tb, size); + if (err) { + vfree(tb); + goto out; + } } else { err = -EINVAL; goto out; @@ -1438,8 +2100,8 @@ int kbase_mmap(struct file *file, struct vm_area_struct *vma) rcu_read_unlock(); switch (vma->vm_pgoff) { - case PFN_DOWN(BASE_MEM_INVALID_HANDLE): - case PFN_DOWN(BASE_MEM_WRITE_ALLOC_PAGES_HANDLE): + case PFN_DOWN(BASEP_MEM_INVALID_HANDLE): + case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE): /* Illegal handle for direct map */ err = -EINVAL; goto out_unlock; @@ -1686,6 +2348,9 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, if (page_index + page_count > kbase_reg_current_backed_size(reg)) goto out_unlock; + if (reg->flags & KBASE_REG_DONT_NEED) + goto out_unlock; + page_array = kbase_get_cpu_phy_pages(reg); if (!page_array) goto out_unlock; @@ -1720,6 +2385,12 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0; sync_needed = map->is_cached; +#ifdef CONFIG_MALI_COH_KERN + /* kernel can use coherent memory if supported */ + if (kctx->kbdev->system_coherency == COHERENCY_ACE) + sync_needed = false; +#endif + if (sync_needed) { /* Sync first page */ size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); @@ -1761,6 +2432,11 @@ void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK); bool sync_needed = map->is_cached; vunmap(addr); +#ifdef CONFIG_MALI_COH_KERN + /* kernel can use coherent memory if supported */ + if (kctx->kbdev->system_coherency == COHERENCY_ACE) + sync_needed = false; +#endif if (sync_needed) { off_t offset = (uintptr_t)map->addr & ~PAGE_MASK; size_t size = map->size; diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h index 1d85415..6c0fb56 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010, 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,12 +34,80 @@ struct kbase_hwc_dma_mapping { struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment); int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages); -int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, int handle, u64 *gpu_va, u64 *va_pages, u64 *flags); +int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, + void __user *phandle, u64 *gpu_va, u64 *va_pages, + u64 *flags); u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages); int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask); int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason); int kbase_mmap(struct file *file, struct vm_area_struct *vma); +/** + * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction + * mechanism. + * @kctx: The kbase context to initialize. + * + * Return: Zero on success or -errno on failure. + */ +int kbase_mem_evictable_init(struct kbase_context *kctx); + +/** + * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction + * mechanism. + * @kctx: The kbase context to de-initialize. + */ +void kbase_mem_evictable_deinit(struct kbase_context *kctx); + +/** + * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation + * @kctx: Context the region belongs to + * @reg: The GPU region + * @new_pages: The number of pages after the grow + * @old_pages: The number of pages before the grow + * + * Return: 0 on success, -errno on error. + * + * Expand the GPU mapping to encompass the new psychical pages which have + * been added to the allocation. + * + * Note: Caller must be holding the region lock. + */ +int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + +/** + * kbase_mem_evictable_make - Make a physical allocation eligible for eviction + * @gpu_alloc: The physical allocation to make evictable + * + * Return: 0 on success, -errno on error. + * + * Take the provided region and make all the physical pages within it + * reclaimable by the kernel, updating the per-process VM stats as well. + * Remove any CPU mappings (as these can't be removed in the shrinker callback + * as mmap_sem might already be taken) but leave the GPU mapping intact as + * and until the shrinker reclaims the allocation. + * + * Note: Must be called with the region lock of the containing context. + */ +int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc); + +/** + * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for + * eviction. + * @alloc: The physical allocation to remove eviction eligibility from. + * + * Return: True if the allocation had its backing restored and false if + * it hasn't. + * + * Make the physical pages in the region no longer reclaimable and update the + * per-process stats, if the shrinker has already evicted the memory then + * re-allocate it if the region is still alive. + * + * Note: Must be called with the region lock of the containing context. + */ +bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc); + struct kbase_vmap_struct { u64 gpu_addr; struct kbase_mem_phy_alloc *cpu_alloc; diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c index a049205..c0f47be 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,13 @@ #include #include -/* Backwards compatibility with kernels using the old carveout allocator */ +/* This function is only provided for backwards compatibility with kernels + * which use the old carveout allocator. + * + * The forward declaration is to keep sparse happy. + */ +int __init kbase_carveout_mem_reserve( + phys_addr_t size); int __init kbase_carveout_mem_reserve(phys_addr_t size) { return 0; @@ -37,6 +43,9 @@ int __init kbase_carveout_mem_reserve(phys_addr_t size) kbase_mem_pool_max_size(pool), \ ##__VA_ARGS__) +#define NOT_DIRTY false +#define NOT_RECLAIMED false + static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool) { spin_lock(&pool->pool_lock); @@ -73,6 +82,8 @@ static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, list_add(&p->lru, &pool->page_list); pool->cur_size++; + zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE); + pool_dbg(pool, "added page\n"); } @@ -86,8 +97,14 @@ static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p) static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, struct list_head *page_list, size_t nr_pages) { + struct page *p; + lockdep_assert_held(&pool->pool_lock); + list_for_each_entry(p, page_list, lru) { + zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE); + } + list_splice(page_list, &pool->page_list); pool->cur_size += nr_pages; @@ -115,6 +132,8 @@ static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool) list_del_init(&p->lru); pool->cur_size--; + zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE); + pool_dbg(pool, "removed page\n"); return p; @@ -489,7 +508,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, return 0; err_rollback: - kbase_mem_pool_free_pages(pool, i, pages, false); + kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED); return err; } @@ -532,7 +551,7 @@ static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, } void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, - phys_addr_t *pages, bool dirty) + phys_addr_t *pages, bool dirty, bool reclaimed) { struct kbase_mem_pool *next_pool = pool->next_pool; struct page *p; @@ -542,22 +561,24 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, pool_dbg(pool, "free_pages(%zu):\n", nr_pages); - /* Add to this pool */ - nr_to_pool = kbase_mem_pool_capacity(pool); - nr_to_pool = min(nr_pages, nr_to_pool); + if (!reclaimed) { + /* Add to this pool */ + nr_to_pool = kbase_mem_pool_capacity(pool); + nr_to_pool = min(nr_pages, nr_to_pool); - kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); + kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); - i += nr_to_pool; + i += nr_to_pool; - if (i != nr_pages && next_pool) { - /* Spill to next pool (may overspill) */ - nr_to_pool = kbase_mem_pool_capacity(next_pool); - nr_to_pool = min(nr_pages - i, nr_to_pool); + if (i != nr_pages && next_pool) { + /* Spill to next pool (may overspill) */ + nr_to_pool = kbase_mem_pool_capacity(next_pool); + nr_to_pool = min(nr_pages - i, nr_to_pool); - kbase_mem_pool_add_array(next_pool, nr_to_pool, pages + i, - true, dirty); - i += nr_to_pool; + kbase_mem_pool_add_array(next_pool, nr_to_pool, + pages + i, true, dirty); + i += nr_to_pool; + } } /* Free any remaining pages to kernel */ @@ -566,6 +587,10 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, continue; p = phys_to_page(pages[i]); + if (reclaimed) + zone_page_state_add(-1, page_zone(p), + NR_SLAB_RECLAIMABLE); + kbase_mem_pool_free_page(pool, p); pages[i] = 0; } diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c index bf60c19..0b19d05 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c @@ -19,19 +19,6 @@ #ifdef CONFIG_DEBUG_FS -/* mam_profile file name max length 22 based on format _\0 */ -#define KBASEP_DEBUGFS_FNAME_SIZE_MAX (10+1+10+1) - -void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, - size_t size) -{ - spin_lock(&kctx->mem_profile_lock); - kfree(kctx->mem_profile_data); - kctx->mem_profile_data = data; - kctx->mem_profile_size = size; - spin_unlock(&kctx->mem_profile_lock); -} - /** Show callback for the @c mem_profile debugfs file. * * This function is called to get the contents of the @c mem_profile debugfs @@ -40,19 +27,19 @@ void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, * @param sfile The debugfs entry * @param data Data associated with the entry * - * @return 0 if successfully prints data in debugfs entry file - * -1 if it encountered an error + * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise */ static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data) { struct kbase_context *kctx = sfile->private; - KBASE_DEBUG_ASSERT(kctx != NULL); + mutex_lock(&kctx->mem_profile_lock); - spin_lock(&kctx->mem_profile_lock); seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size); + seq_putc(sfile, '\n'); - spin_unlock(&kctx->mem_profile_lock); + + mutex_unlock(&kctx->mem_profile_lock); return 0; } @@ -72,34 +59,60 @@ static const struct file_operations kbasep_mem_profile_debugfs_fops = { .release = single_release, }; -void kbasep_mem_profile_debugfs_add(struct kbase_context *kctx) +int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size) { - KBASE_DEBUG_ASSERT(kctx != NULL); + int err = 0; + + mutex_lock(&kctx->mem_profile_lock); - spin_lock_init(&kctx->mem_profile_lock); + dev_dbg(kctx->kbdev->dev, "initialised: %d", + kctx->mem_profile_initialized); - debugfs_create_file("mem_profile", S_IRUGO, kctx->kctx_dentry, kctx, - &kbasep_mem_profile_debugfs_fops); + if (!kctx->mem_profile_initialized) { + if (!debugfs_create_file("mem_profile", S_IRUGO, + kctx->kctx_dentry, kctx, + &kbasep_mem_profile_debugfs_fops)) { + err = -EAGAIN; + } else { + kctx->mem_profile_initialized = true; + } + } + + if (kctx->mem_profile_initialized) { + kfree(kctx->mem_profile_data); + kctx->mem_profile_data = data; + kctx->mem_profile_size = size; + } + + dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d", + err, kctx->mem_profile_initialized); + + mutex_unlock(&kctx->mem_profile_lock); + + return err; } void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx) { - KBASE_DEBUG_ASSERT(kctx != NULL); + mutex_lock(&kctx->mem_profile_lock); + + dev_dbg(kctx->kbdev->dev, "initialised: %d", + kctx->mem_profile_initialized); - spin_lock(&kctx->mem_profile_lock); kfree(kctx->mem_profile_data); kctx->mem_profile_data = NULL; - spin_unlock(&kctx->mem_profile_lock); + kctx->mem_profile_size = 0; + + mutex_unlock(&kctx->mem_profile_lock); } #else /* CONFIG_DEBUG_FS */ -/** - * @brief Stub function for when debugfs is disabled - */ -void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, - size_t size) +int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size) { kfree(data); + return 0; } #endif /* CONFIG_DEBUG_FS */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h index 205bd37..9555197 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,29 +30,31 @@ #include #include -/** - * @brief Add new entry to Mali memory profile debugfs - */ -void kbasep_mem_profile_debugfs_add(struct kbase_context *kctx); - /** * @brief Remove entry from Mali memory profile debugfs */ void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx); /** - * @brief Insert data to debugfs file, so it can be read by userspce + * @brief Insert @p data to the debugfs file so it can be read by userspace + * + * The function takes ownership of @p data and frees it later when new data + * is inserted. * - * Function takes ownership of @c data and frees it later when new data - * are inserted. + * If the debugfs entry corresponding to the @p kctx doesn't exist, + * an attempt will be made to create it. * - * @param kctx Context to which file data should be inserted - * @param data NULL-terminated string to be inserted to mem_profile file, - without trailing new line character - * @param size @c buf length + * @param kctx The context whose debugfs file @p data should be inserted to + * @param data A NULL-terminated string to be inserted to the debugfs file, + * without the trailing new line character + * @param size The length of the @p data string + * @return 0 if @p data inserted correctly + * -EAGAIN in case of error + * @post @ref mem_profile_initialized will be set to @c true + * the first time this function succeeds. */ -void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, - size_t size); +int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size); #endif /*_KBASE_MEM_PROFILE_DEBUGFS_H*/ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c index c061f2a..bf45d39 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,9 +30,7 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#endif #include #define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) @@ -46,18 +44,22 @@ /** * kbase_mmu_sync_pgd - sync page directory to memory - * @dev: Device pointer. + * @kbdev: Device pointer. * @handle: Address of DMA region. * @size: Size of the region to sync. * * This should be called after each page directory update. */ -static void kbase_mmu_sync_pgd(struct device *dev, +static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, dma_addr_t handle, size_t size) { - - dma_sync_single_for_device(dev, handle, size, DMA_TO_DEVICE); + /* If page table is not coherent then ensure the gpu can read + * the pages from memory + */ + if (kbdev->system_coherency != COHERENCY_ACE) + dma_sync_single_for_device(kbdev->dev, handle, size, + DMA_TO_DEVICE); } /* @@ -136,6 +138,18 @@ void page_fault_worker(struct work_struct *data) dev_warn(kbdev->dev, "Access flag unexpectedly set"); goto fault_done; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: + + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Address size fault"); + goto fault_done; + + case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory attributes fault"); + goto fault_done; +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ default: kbase_mmu_report_fault_and_kill(kctx, faulting_as, @@ -164,6 +178,13 @@ void page_fault_worker(struct work_struct *data) goto fault_done; } + if ((region->flags & KBASE_REG_DONT_NEED)) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Don't need memory can't be grown"); + goto fault_done; + } + /* find the size we need to grow it by */ /* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address * validating the fault_adress to be within a size_t from the start_pfn */ @@ -238,14 +259,9 @@ void page_fault_worker(struct work_struct *data) /* alloc success */ KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages); - /* AS transaction begin */ - mutex_lock(&faulting_as->transaction_mutex); - /* set up the new pages */ err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_gpu_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags); if (err) { - /* failed to insert pages, handle as a normal PF */ - mutex_unlock(&faulting_as->transaction_mutex); kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); if (region->gpu_alloc != region->cpu_alloc) kbase_free_phy_pages_helper(region->cpu_alloc, @@ -259,11 +275,10 @@ void page_fault_worker(struct work_struct *data) #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_page_fault_insert_pages(as_no, new_pages); #endif -#if defined(CONFIG_MALI_MIPE_ENABLED) - kbase_tlstream_aux_pagefault( - as_no, - atomic_read(&kctx->used_pages)); -#endif + kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages); + + /* AS transaction begin */ + mutex_lock(&faulting_as->transaction_mutex); /* flush L2 and unlock the VA (resumes the MMU) */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367)) @@ -316,15 +331,20 @@ phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx) u64 *page; int i; struct page *p; + int new_page_count __maybe_unused; KBASE_DEBUG_ASSERT(NULL != kctx); - kbase_atomic_add_pages(1, &kctx->used_pages); + new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages); kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages); p = kbase_mem_pool_alloc(&kctx->mem_pool); if (!p) goto sub_pages; + kbase_tlstream_aux_pagesalloc( + (u32)kctx->id, + (u64)new_page_count); + page = kmap(p); if (NULL == page) goto alloc_free; @@ -334,7 +354,7 @@ phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx) for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) kctx->kbdev->mmu_mode->entry_invalidate(&page[i]); - kbase_mmu_sync_pgd(kctx->kbdev->dev, kbase_dma_addr(p), PAGE_SIZE); + kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE); kunmap(p); return page_to_phys(p); @@ -360,7 +380,7 @@ static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, KBASE_DEBUG_ASSERT(pgd); KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mmu_lock); /* * Architecture spec defines level-0 as being the top-most. @@ -388,8 +408,7 @@ static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd); - kbase_mmu_sync_pgd(kctx->kbdev->dev, - kbase_dma_addr(p), PAGE_SIZE); + kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE); /* Rely on the caller to update the address space flags. */ } @@ -402,9 +421,10 @@ static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn) phys_addr_t pgd; int l; - pgd = kctx->pgd; + lockdep_assert_held(&kctx->mmu_lock); - for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) { + pgd = kctx->pgd; + for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) { pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l); /* Handle failure condition */ if (!pgd) { @@ -424,7 +444,7 @@ static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *k KBASE_DEBUG_ASSERT(pgd); KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mmu_lock); /* * Architecture spec defines level-0 as being the top-most. @@ -449,9 +469,11 @@ static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context phys_addr_t pgd; int l; + lockdep_assert_held(&kctx->mmu_lock); + pgd = kctx->pgd; - for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) { + for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) { pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l); /* Should never fail */ KBASE_DEBUG_ASSERT(0 != pgd); @@ -472,7 +494,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vp /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mmu_lock); mmu_mode = kctx->kbdev->mmu_mode; @@ -500,9 +522,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vp vpfn += count; nr -= count; - kbase_mmu_sync_pgd(kctx->kbdev->dev, - kbase_dma_addr(p), - PAGE_SIZE); + kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE); kunmap_atomic(pgd_page); } @@ -522,13 +542,14 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, bool recover_required = false; u64 recover_vpfn = vpfn; size_t recover_count = 0; + int err; KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(0 != vpfn); /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - lockdep_assert_held(&kctx->reg_lock); + mutex_lock(&kctx->mmu_lock); while (nr) { unsigned int i; @@ -556,7 +577,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - return -EINVAL; + err = -EINVAL; + goto fail_unlock; } p = pfn_to_page(PFN_DOWN(pgd)); @@ -570,7 +592,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - return -ENOMEM; + err = -ENOMEM; + goto fail_unlock; } for (i = 0; i < count; i++) { @@ -584,10 +607,9 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, vpfn += count; nr -= count; - kbase_mmu_sync_pgd(kctx->kbdev->dev, - kbase_dma_addr(p) + - (index * sizeof(u64)), - count * sizeof(u64)); + kbase_mmu_sync_pgd(kctx->kbdev, + kbase_dma_addr(p) + (index * sizeof(u64)), + count * sizeof(u64)); kunmap(p); /* We have started modifying the page table. @@ -596,7 +618,12 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_required = true; recover_count += count; } + mutex_unlock(&kctx->mmu_lock); return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu_lock); + return err; } /* @@ -613,13 +640,14 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, bool recover_required = false; u64 recover_vpfn = vpfn; size_t recover_count = 0; + int err; KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(0 != vpfn); /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - lockdep_assert_held(&kctx->reg_lock); + mutex_lock(&kctx->mmu_lock); while (nr) { unsigned int i; @@ -647,7 +675,8 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - return -EINVAL; + err = -EINVAL; + goto fail_unlock; } p = pfn_to_page(PFN_DOWN(pgd)); @@ -661,7 +690,8 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - return -ENOMEM; + err = -ENOMEM; + goto fail_unlock; } for (i = 0; i < count; i++) { @@ -676,10 +706,9 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, vpfn += count; nr -= count; - kbase_mmu_sync_pgd(kctx->kbdev->dev, - kbase_dma_addr(p) + - (index * sizeof(u64)), - count * sizeof(u64)); + kbase_mmu_sync_pgd(kctx->kbdev, + kbase_dma_addr(p) + (index * sizeof(u64)), + count * sizeof(u64)); kunmap(p); /* We have started modifying the page table. If further pages @@ -688,7 +717,13 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, recover_required = true; recover_count += count; } + + mutex_unlock(&kctx->mmu_lock); return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu_lock); + return err; } KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); @@ -782,17 +817,18 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) struct kbase_device *kbdev; size_t requested_nr = nr; struct kbase_mmu_mode const *mmu_mode; + int err; KBASE_DEBUG_ASSERT(NULL != kctx); beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr); - lockdep_assert_held(&kctx->reg_lock); - if (0 == nr) { /* early out if nothing to do */ return 0; } + mutex_lock(&kctx->mmu_lock); + kbdev = kctx->kbdev; mmu_mode = kbdev->mmu_mode; @@ -808,14 +844,16 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) pgd = mmu_get_bottom_pgd(kctx, vpfn); if (!pgd) { dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n"); - return -EINVAL; + err = -EINVAL; + goto fail_unlock; } p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n"); - return -ENOMEM; + err = -ENOMEM; + goto fail_unlock; } for (i = 0; i < count; i++) @@ -824,16 +862,20 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) vpfn += count; nr -= count; - kbase_mmu_sync_pgd(kctx->kbdev->dev, - kbase_dma_addr(p) + - (index * sizeof(u64)), - count * sizeof(u64)); + kbase_mmu_sync_pgd(kctx->kbdev, + kbase_dma_addr(p) + (index * sizeof(u64)), + count * sizeof(u64)); kunmap(p); } + mutex_unlock(&kctx->mmu_lock); kbase_mmu_flush(kctx, vpfn, requested_nr); return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu_lock); + return err; } KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); @@ -856,12 +898,13 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph u64 *pgd_page; size_t requested_nr = nr; struct kbase_mmu_mode const *mmu_mode; + int err; KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(0 != vpfn); KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - lockdep_assert_held(&kctx->reg_lock); + mutex_lock(&kctx->mmu_lock); mmu_mode = kctx->kbdev->mmu_mode; @@ -880,14 +923,16 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph pgd = mmu_get_bottom_pgd(kctx, vpfn); if (!pgd) { dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n"); - return -EINVAL; + err = -EINVAL; + goto fail_unlock; } p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { dev_warn(kctx->kbdev->dev, "kmap failure\n"); - return -ENOMEM; + err = -ENOMEM; + goto fail_unlock; } for (i = 0; i < count; i++) @@ -898,17 +943,20 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph vpfn += count; nr -= count; - kbase_mmu_sync_pgd(kctx->kbdev->dev, - kbase_dma_addr(p) + - (index * sizeof(u64)), - count * sizeof(u64)); + kbase_mmu_sync_pgd(kctx->kbdev, + kbase_dma_addr(p) + (index * sizeof(u64)), + count * sizeof(u64)); kunmap(pfn_to_page(PFN_DOWN(pgd))); } + mutex_unlock(&kctx->mmu_lock); kbase_mmu_flush(kctx, vpfn, requested_nr); - return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu_lock); + return err; } /* This is a debug feature only */ @@ -936,7 +984,7 @@ static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int struct kbase_mmu_mode const *mmu_mode; KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mmu_lock); pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); /* kmap_atomic should NEVER fail. */ @@ -952,7 +1000,7 @@ static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]); if (target_pgd) { - if (level < 2) { + if (level < (MIDGARD_MMU_BOTTOMLEVEL - 1)) { mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64))); } else { /* @@ -981,6 +1029,8 @@ int kbase_mmu_init(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages); + mutex_init(&kctx->mmu_lock); + /* Preallocate MMU depth of four pages for mmu_teardown_level to use */ kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); @@ -1001,18 +1051,24 @@ void kbase_mmu_term(struct kbase_context *kctx) void kbase_mmu_free_pgd(struct kbase_context *kctx) { + int new_page_count __maybe_unused; + KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages); - lockdep_assert_held(&kctx->reg_lock); - + mutex_lock(&kctx->mmu_lock); mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages); + mutex_unlock(&kctx->mmu_lock); beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd); kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true); kbase_process_page_usage_dec(kctx, 1); - kbase_atomic_sub_pages(1, &kctx->used_pages); + new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages); kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); + + kbase_tlstream_aux_pagesalloc( + (u32)kctx->id, + (u64)new_page_count); } KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd); @@ -1027,7 +1083,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, struct kbase_mmu_mode const *mmu_mode; KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->mmu_lock); mmu_mode = kctx->kbdev->mmu_mode; @@ -1052,16 +1108,21 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, *size_left -= size; } - for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { - if (mmu_mode->pte_is_valid(pgd_page[i])) { - target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]); - - dump_size = kbasep_mmu_dump_level(kctx, target_pgd, level + 1, buffer, size_left); - if (!dump_size) { - kunmap(pfn_to_page(PFN_DOWN(pgd))); - return 0; + if (level < MIDGARD_MMU_BOTTOMLEVEL) { + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { + if (mmu_mode->pte_is_valid(pgd_page[i])) { + target_pgd = mmu_mode->pte_to_phy_addr( + pgd_page[i]); + + dump_size = kbasep_mmu_dump_level(kctx, + target_pgd, level + 1, + buffer, size_left); + if (!dump_size) { + kunmap(pfn_to_page(PFN_DOWN(pgd))); + return 0; + } + size += dump_size; } - size += dump_size; } } @@ -1077,13 +1138,13 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) KBASE_DEBUG_ASSERT(kctx); - lockdep_assert_held(&kctx->reg_lock); - if (0 == nr_pages) { /* can't dump in a 0 sized buffer, early out */ return NULL; } + mutex_lock(&kctx->mmu_lock); + size_left = nr_pages * PAGE_SIZE; KBASE_DEBUG_ASSERT(0 != size_left); @@ -1105,7 +1166,7 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup); config[0] = as_setup.transtab; config[1] = as_setup.memattr; - config[2] = 0; + config[2] = as_setup.transcfg; memcpy(buffer, &config, sizeof(config)); mmu_dump_buffer += sizeof(config); size_left -= sizeof(config); @@ -1119,10 +1180,8 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) &mmu_dump_buffer, &size_left); - if (!size) { - vfree(kaddr); - return NULL; - } + if (!size) + goto fail_free; /* Add on the size for the end marker */ size += sizeof(u64); @@ -1133,15 +1192,20 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) { /* The buffer isn't big enough - free the memory and return failure */ - vfree(kaddr); - return NULL; + goto fail_free; } /* Add the end marker */ memcpy(mmu_dump_buffer, &end_marker, sizeof(u64)); } + mutex_unlock(&kctx->mmu_lock); return kaddr; + +fail_free: + vfree(kaddr); + mutex_unlock(&kctx->mmu_lock); + return NULL; } KBASE_EXPORT_TEST_API(kbase_mmu_dump); @@ -1306,6 +1370,15 @@ const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code) e = "TRANSLATION_FAULT"; break; case 0xC8: +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ e = "PERMISSION_FAULT"; break; case 0xD0: @@ -1319,8 +1392,38 @@ const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code) e = "TRANSTAB_BUS_FAULT"; break; case 0xD8: +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ e = "ACCESS_FLAG"; break; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + case 0xE0: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + e = "ADDRESS_SIZE_FAULT"; + break; + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + e = "MEMORY_ATTRIBUTES_FAULT"; +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ break; default: e = "UNKNOWN"; @@ -1334,7 +1437,12 @@ static const char *access_type_name(struct kbase_device *kbdev, u32 fault_status) { switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { + case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + return "ATOMIC"; +#else return "UNKNOWN"; +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ case AS_FAULTSTATUS_ACCESS_TYPE_READ: return "READ"; case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: @@ -1667,8 +1775,15 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_contex */ kbasep_js_clear_submit_allowed(js_devdata, kctx); +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + dev_warn(kbdev->dev, + "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", + as->number, as->fault_addr, + as->fault_extra_addr); +#else dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", as->number, as->fault_addr); +#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ /* * We need to switch to UNMAPPED mode - but we do this in a diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c index 079ef81..683cabb 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -77,17 +77,25 @@ static void mmu_get_as_setup(struct kbase_context *kctx, (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY << (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL << - (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | + (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | (AS_MEMATTR_LPAE_WRITE_ALLOC << - (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + (AS_MEMATTR_LPAE_OUTER_IMPL_DEF << + (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | + (AS_MEMATTR_LPAE_OUTER_WA << + (AS_MEMATTR_INDEX_OUTER_WA * 8)) | 0; /* The other indices are unused for now */ - setup->transtab = (u64)kctx->pgd & - ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK); - - setup->transtab |= AS_TRANSTAB_LPAE_ADRMODE_TABLE; - setup->transtab |= AS_TRANSTAB_LPAE_READ_INNER; + setup->transtab = ((u64)kctx->pgd & + ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) | + AS_TRANSTAB_LPAE_ADRMODE_TABLE | + AS_TRANSTAB_LPAE_READ_INNER; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY; +#else + setup->transcfg = 0; +#endif } static void mmu_update(struct kbase_context *kctx) @@ -109,6 +117,9 @@ static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED; +#ifdef CONFIG_MALI_GPU_MMU_AARCH64 + current_setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY; +#endif /* Apply the address space setting */ kbase_mmu_hw_configure(kbdev, as, NULL); diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c index 71f005e..6ac49df 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,15 +27,11 @@ #include #define JOB_NOT_STARTED 0 -#define JOB_TYPE_MASK 0xfe -#define JOB_TYPE_NULL (1 << 1) -#define JOB_TYPE_VERTEX (5 << 1) -#define JOB_TYPE_TILER (7 << 1) -#define JOB_TYPE_FUSED (8 << 1) -#define JOB_TYPE_FRAGMENT (9 << 1) - -#define JOB_FLAG_DESC_SIZE (1 << 0) -#define JOB_FLAG_PERFORM_JOB_BARRIER (1 << 8) +#define JOB_TYPE_NULL (1) +#define JOB_TYPE_VERTEX (5) +#define JOB_TYPE_TILER (7) +#define JOB_TYPE_FUSED (8) +#define JOB_TYPE_FRAGMENT (9) #define JOB_HEADER_32_FBD_OFFSET (31*4) #define JOB_HEADER_64_FBD_OFFSET (44*4) @@ -58,17 +54,9 @@ #define JOB_SOURCE_ID(status) (((status) >> 16) & 0xFFFF) #define JOB_POLYGON_LIST (0x03) -struct job_head { - u32 status; - u32 not_complete_index; - u64 fault_addr; - u16 flags; - u16 index; - u16 dependencies[2]; - union { - u64 _64; - u32 _32; - } next; +struct fragment_job { + struct job_descriptor_header header; + u32 x[2]; union { u64 _64; @@ -77,28 +65,43 @@ struct job_head { }; static void dump_job_head(struct kbase_context *kctx, char *head_str, - struct job_head *job) + struct job_descriptor_header *job) { #ifdef CONFIG_MALI_DEBUG dev_dbg(kctx->kbdev->dev, "%s\n", head_str); - dev_dbg(kctx->kbdev->dev, "addr = %p\n" - "status = %x\n" - "not_complete_index = %x\n" - "fault_addr = %llx\n" - "flags = %x\n" - "index = %x\n" - "dependencies = %x,%x\n", - job, job->status, job->not_complete_index, - job->fault_addr, job->flags, job->index, - job->dependencies[0], - job->dependencies[1]); - - if (job->flags & JOB_FLAG_DESC_SIZE) + dev_dbg(kctx->kbdev->dev, + "addr = %p\n" + "exception_status = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n" + "first_incomplete_task = %x\n" + "fault_pointer = %llx\n" + "job_descriptor_size = %x\n" + "job_type = %x\n" + "job_barrier = %x\n" + "_reserved_01 = %x\n" + "_reserved_02 = %x\n" + "_reserved_03 = %x\n" + "_reserved_04/05 = %x,%x\n" + "job_index = %x\n" + "dependencies = %x,%x\n", + job, job->exception_status, + JOB_SOURCE_ID(job->exception_status), + (job->exception_status >> 8) & 0x3, + job->exception_status & 0xFF, + job->first_incomplete_task, + job->fault_pointer, job->job_descriptor_size, + job->job_type, job->job_barrier, job->_reserved_01, + job->_reserved_02, job->_reserved_03, + job->_reserved_04, job->_reserved_05, + job->job_index, + job->job_dependency_index_1, + job->job_dependency_index_2); + + if (job->job_descriptor_size) dev_dbg(kctx->kbdev->dev, "next = %llx\n", - job->next._64); + job->next_job._64); else dev_dbg(kctx->kbdev->dev, "next = %x\n", - job->next._32); + job->next_job._32); #endif } @@ -372,77 +375,81 @@ static int kbasep_replay_reset_job(struct kbase_context *kctx, u32 default_weight, u16 hw_job_id_offset, bool first_in_chain, bool fragment_chain) { - struct job_head *job; + struct fragment_job *frag_job; + struct job_descriptor_header *job; u64 new_job_header; struct kbase_vmap_struct map; - job = kbase_vmap(kctx, *job_header, sizeof(*job), &map); - if (!job) { + frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map); + if (!frag_job) { dev_err(kctx->kbdev->dev, "kbasep_replay_parse_jc: failed to map jc\n"); return -EINVAL; } + job = &frag_job->header; dump_job_head(kctx, "Job header:", job); - if (job->status == JOB_NOT_STARTED && !fragment_chain) { + if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) { dev_err(kctx->kbdev->dev, "Job already not started\n"); goto out_unmap; } - job->status = JOB_NOT_STARTED; + job->exception_status = JOB_NOT_STARTED; - if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_VERTEX) - job->flags = (job->flags & ~JOB_TYPE_MASK) | JOB_TYPE_NULL; + if (job->job_type == JOB_TYPE_VERTEX) + job->job_type = JOB_TYPE_NULL; - if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FUSED) { + if (job->job_type == JOB_TYPE_FUSED) { dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n"); goto out_unmap; } if (first_in_chain) - job->flags |= JOB_FLAG_PERFORM_JOB_BARRIER; + job->job_barrier = 1; - if ((job->dependencies[0] + hw_job_id_offset) > JOB_HEADER_ID_MAX || - (job->dependencies[1] + hw_job_id_offset) > JOB_HEADER_ID_MAX || - (job->index + hw_job_id_offset) > JOB_HEADER_ID_MAX) { + if ((job->job_dependency_index_1 + hw_job_id_offset) > + JOB_HEADER_ID_MAX || + (job->job_dependency_index_2 + hw_job_id_offset) > + JOB_HEADER_ID_MAX || + (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) { dev_err(kctx->kbdev->dev, "Job indicies/dependencies out of valid range\n"); goto out_unmap; } - if (job->dependencies[0]) - job->dependencies[0] += hw_job_id_offset; - if (job->dependencies[1]) - job->dependencies[1] += hw_job_id_offset; + if (job->job_dependency_index_1) + job->job_dependency_index_1 += hw_job_id_offset; + if (job->job_dependency_index_2) + job->job_dependency_index_2 += hw_job_id_offset; - job->index += hw_job_id_offset; + job->job_index += hw_job_id_offset; - if (job->flags & JOB_FLAG_DESC_SIZE) { - new_job_header = job->next._64; - if (!job->next._64) - job->next._64 = prev_jc; + if (job->job_descriptor_size) { + new_job_header = job->next_job._64; + if (!job->next_job._64) + job->next_job._64 = prev_jc; } else { - new_job_header = job->next._32; - if (!job->next._32) - job->next._32 = prev_jc; + new_job_header = job->next_job._32; + if (!job->next_job._32) + job->next_job._32 = prev_jc; } dump_job_head(kctx, "Updated to:", job); - if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_TILER) { - bool job_64 = (job->flags & JOB_FLAG_DESC_SIZE) != 0; + if (job->job_type == JOB_TYPE_TILER) { + bool job_64 = job->job_descriptor_size != 0; if (kbasep_replay_reset_tiler_job(kctx, *job_header, tiler_heap_free, hierarchy_mask, default_weight, job_64) != 0) goto out_unmap; - } else if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FRAGMENT) { + } else if (job->job_type == JOB_TYPE_FRAGMENT) { u64 fbd_address; - if (job->flags & JOB_FLAG_DESC_SIZE) - fbd_address = job->fragment_fbd._64; + if (job->job_descriptor_size) + fbd_address = frag_job->fragment_fbd._64; else - fbd_address = (u64)job->fragment_fbd._32; + fbd_address = (u64)frag_job->fragment_fbd._32; if (fbd_address & FBD_TYPE) { if (kbasep_replay_reset_mfbd(kctx, @@ -485,7 +492,7 @@ static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx, u64 jc, u16 *hw_job_id) { while (jc) { - struct job_head *job; + struct job_descriptor_header *job; struct kbase_vmap_struct map; dev_dbg(kctx->kbdev->dev, @@ -498,13 +505,13 @@ static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx, return -EINVAL; } - if (job->index > *hw_job_id) - *hw_job_id = job->index; + if (job->job_index > *hw_job_id) + *hw_job_id = job->job_index; - if (job->flags & JOB_FLAG_DESC_SIZE) - jc = job->next._64; + if (job->job_descriptor_size) + jc = job->next_job._64; else - jc = job->next._32; + jc = job->next_job._32; kbase_vunmap(kctx, &map); } @@ -957,7 +964,7 @@ static bool kbase_replay_fault_check(struct kbase_jd_atom *katom) base_jd_replay_payload *payload; u64 job_header; u64 job_loop_detect; - struct job_head *job; + struct job_descriptor_header *job; struct kbase_vmap_struct job_map; struct kbase_vmap_struct map; bool err = false; @@ -1012,41 +1019,22 @@ static bool kbase_replay_fault_check(struct kbase_jd_atom *katom) } -#ifdef CONFIG_MALI_DEBUG - dev_dbg(dev, "\njob_head structure:\n" - "Source ID:0x%x Access:0x%x Exception:0x%x\n" - "at job addr = %p\n" - "not_complete_index = 0x%x\n" - "fault_addr = 0x%llx\n" - "flags = 0x%x\n" - "index = 0x%x\n" - "dependencies = 0x%x,0x%x\n", - JOB_SOURCE_ID(job->status), - ((job->status >> 8) & 0x3), - (job->status & 0xFF), - job, - job->not_complete_index, - job->fault_addr, - job->flags, - job->index, - job->dependencies[0], - job->dependencies[1]); -#endif + dump_job_head(kctx, "\njob_head structure:\n", job); /* Replay only when the polygon list reader caused the * DATA_INVALID_FAULT */ if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) && - (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->status))) { + (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) { err = true; kbase_vunmap(kctx, &job_map); break; } /* Move on to next fragment job in the list */ - if (job->flags & JOB_FLAG_DESC_SIZE) - job_header = job->next._64; + if (job->job_descriptor_size) + job_header = job->next_job._64; else - job_header = job->next._32; + job_header = job->next_job._32; kbase_vunmap(kctx, &job_map); diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_security.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_security.c deleted file mode 100755 index a0bb352..0000000 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_security.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - - - -/** - * @file mali_kbase_security.c - * Base kernel security capability API - */ - -#include - -static inline bool kbasep_am_i_root(void) -{ -#if KBASE_HWCNT_DUMP_BYPASS_ROOT - return true; -#else - /* Check if root */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) - if (uid_eq(current_euid(), GLOBAL_ROOT_UID)) - return true; -#else - if (current_euid() == 0) - return true; -#endif /*LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)*/ - return false; -#endif /*KBASE_HWCNT_DUMP_BYPASS_ROOT*/ -} - -/** - * kbase_security_has_capability - see mali_kbase_caps.h for description. - */ - -bool kbase_security_has_capability(struct kbase_context *kctx, enum kbase_security_capability cap, u32 flags) -{ - /* Assume failure */ - bool access_allowed = false; - bool audit = KBASE_SEC_FLAG_AUDIT & flags; - - KBASE_DEBUG_ASSERT(NULL != kctx); - CSTD_UNUSED(kctx); - - /* Detect unsupported flags */ - KBASE_DEBUG_ASSERT(((~KBASE_SEC_FLAG_MASK) & flags) == 0); - - /* Determine if access is allowed for the given cap */ - switch (cap) { - case KBASE_SEC_MODIFY_PRIORITY: - case KBASE_SEC_INSTR_HW_COUNTERS_COLLECT: - /* Access is granted only if the caller is privileged */ - access_allowed = kbasep_am_i_root(); - break; - } - - /* Report problem if requested */ - if (!access_allowed && audit) - dev_warn(kctx->kbdev->dev, "Security capability failure: %d, %p", cap, (void *)kctx); - - return access_allowed; -} - -KBASE_EXPORT_TEST_API(kbase_security_has_capability); diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_security.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_security.h deleted file mode 100755 index 024a7ee..0000000 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_security.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - - - - - -/** - * @file mali_kbase_security.h - * Base kernel security capability APIs - */ - -#ifndef _KBASE_SECURITY_H_ -#define _KBASE_SECURITY_H_ - -/* Security flags */ -#define KBASE_SEC_FLAG_NOAUDIT (0u << 0) /* Silently handle privilege failure */ -#define KBASE_SEC_FLAG_AUDIT (1u << 0) /* Write audit message on privilege failure */ -#define KBASE_SEC_FLAG_MASK (KBASE_SEC_FLAG_AUDIT) /* Mask of all valid flag bits */ - -/* List of unique capabilities that have security access privileges */ -enum kbase_security_capability { - /* Instrumentation Counters access privilege */ - KBASE_SEC_INSTR_HW_COUNTERS_COLLECT = 1, - KBASE_SEC_MODIFY_PRIORITY - /* Add additional access privileges here */ -}; - -/** - * kbase_security_has_capability - determine whether a task has a particular effective capability - * @param[in] kctx The task context. - * @param[in] cap The capability to check for. - * @param[in] flags Additional configuration information - * Such as whether to write an audit message or not. - * @return true if success (capability is allowed), false otherwise. - */ - -bool kbase_security_has_capability(struct kbase_context *kctx, enum kbase_security_capability cap, u32 flags); - -#endif /* _KBASE_SECURITY_H_ */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c index 17455fe..43175c8 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c @@ -25,16 +25,21 @@ static noinline u64 invoke_smc_fid(u64 function_id, u64 arg0, u64 arg1, u64 arg2) { + register u64 x0 asm("x0") = function_id; + register u64 x1 asm("x1") = arg0; + register u64 x2 asm("x2") = arg1; + register u64 x3 asm("x3") = arg2; + asm volatile( __asmeq("%0", "x0") __asmeq("%1", "x1") __asmeq("%2", "x2") __asmeq("%3", "x3") - "smc #0\n" - : "+r" (function_id) - : "r" (arg0), "r" (arg1), "r" (arg2)); + "smc #0\n" + : "+r" (x0) + : "r" (x1), "r" (x2), "r" (x3)); - return function_id; + return x0; } u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2) diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c index 637eba8..f1dd011 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,8 +25,13 @@ #include #include "mali_kbase_sync.h" #endif +#include #include +#include #include +#include +#include +#include /* Mask to check cache alignment of data structures */ #define KBASE_CACHE_ALIGNMENT_MASK ((1<kctx; + unsigned long lflags; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +static struct page *kbasep_translate_gpu_addr_to_kernel_page( + struct kbase_context *kctx, u64 gpu_addr) +{ + u64 pfn; + struct kbase_va_region *reg; + phys_addr_t addr = 0; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + pfn = gpu_addr >> PAGE_SHIFT; + + kbase_gpu_vm_lock(kctx); + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, gpu_addr); + if (!reg || (reg->flags & KBASE_REG_FREE)) + goto err_vm_unlock; + addr = reg->cpu_alloc->pages[pfn - reg->start_pfn]; + kbase_gpu_vm_unlock(kctx); + + if (!addr) + goto err; + + return pfn_to_page(PFN_DOWN(addr)); + +err_vm_unlock: + kbase_gpu_vm_unlock(kctx); +err: + return NULL; +} + +int kbasep_read_soft_event_status( + struct kbase_context *kctx, u64 evt, unsigned char *status) +{ + struct page *pg = kbasep_translate_gpu_addr_to_kernel_page( + kctx, evt); + unsigned char *mapped_pg; + u32 offset = evt & ~PAGE_MASK; + + KBASE_DEBUG_ASSERT(NULL != status); + + if (!pg) + return -1; + + mapped_pg = (unsigned char *)kmap_atomic(pg); + KBASE_DEBUG_ASSERT(NULL != mapped_pg); /* kmap_atomic() must not fail */ + *status = *(mapped_pg + offset); + kunmap_atomic(mapped_pg); + + return 0; +} + +int kbasep_write_soft_event_status( + struct kbase_context *kctx, u64 evt, unsigned char new_status) +{ + struct page *pg = kbasep_translate_gpu_addr_to_kernel_page( + kctx, evt); + unsigned char *mapped_pg; + u32 offset = evt & ~PAGE_MASK; + + KBASE_DEBUG_ASSERT((new_status == BASE_JD_SOFT_EVENT_SET) || + (new_status == BASE_JD_SOFT_EVENT_RESET)); + + if (!pg) + return -1; + + mapped_pg = (unsigned char *)kmap_atomic(pg); + KBASE_DEBUG_ASSERT(NULL != mapped_pg); /* kmap_atomic() must not fail */ + *(mapped_pg + offset) = new_status; + kunmap_atomic(mapped_pg); + + return 0; +} + static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) { struct kbase_va_region *reg; @@ -68,6 +156,9 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list); mutex_unlock(&js_devdata->runpool_mutex); + /* Also adding this to the list of waiting soft job */ + kbasep_add_waiting_soft_job(katom); + return pm_active_err; } @@ -234,18 +325,19 @@ static int kbase_fence_wait(struct kbase_jd_atom *katom) if (ret == 1) { /* Already signalled */ return 0; - } else if (ret < 0) { - goto cancel_atom; } - return 1; - cancel_atom: - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - /* We should cause the dependant jobs in the bag to be failed, - * to do this we schedule the work queue to complete this job */ - KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); - INIT_WORK(&katom->work, kbase_fence_wait_worker); - queue_work(katom->kctx->jctx.job_done_wq, &katom->work); + if (ret < 0) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + /* We should cause the dependent jobs in the bag to be failed, + * to do this we schedule the work queue to complete this job */ + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); + INIT_WORK(&katom->work, kbase_fence_wait_worker); + queue_work(katom->kctx->jctx.job_done_wq, &katom->work); + } + + kbasep_add_waiting_soft_job(katom); + return 1; } @@ -266,6 +358,600 @@ static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom) } #endif /* CONFIG_SYNC */ +static void kbasep_soft_event_complete_job(struct work_struct *work) +{ + struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, + work); + struct kbase_context *kctx = katom->kctx; + int resched; + + mutex_lock(&kctx->jctx.lock); + resched = jd_done_nolock(katom, NULL); + mutex_unlock(&kctx->jctx.lock); + + if (resched) + kbase_js_sched_all(kctx->kbdev); +} + +void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt) +{ + int cancel_timer = 1; + struct list_head *entry, *tmp; + unsigned long lflags; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { + struct kbase_jd_atom *katom = list_entry( + entry, struct kbase_jd_atom, dep_item[0]); + + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == + BASE_JD_REQ_SOFT_EVENT_WAIT) { + if (katom->jc == evt) { + list_del(&katom->dep_item[0]); + + katom->event_code = BASE_JD_EVENT_DONE; + INIT_WORK(&katom->work, + kbasep_soft_event_complete_job); + queue_work(kctx->jctx.job_done_wq, + &katom->work); + } else { + /* There are still other waiting jobs, we cannot + * cancel the timer yet */ + cancel_timer = 0; + } + } + } + + if (cancel_timer) + hrtimer_try_to_cancel(&kctx->soft_event_timeout); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +enum hrtimer_restart kbasep_soft_event_timeout_worker(struct hrtimer *timer) +{ + struct kbase_context *kctx = container_of(timer, struct kbase_context, + soft_event_timeout); + u32 timeout_ms = (u32)atomic_read( + &kctx->kbdev->js_data.soft_event_timeout_ms); + ktime_t cur_time = ktime_get(); + enum hrtimer_restart restarting = HRTIMER_NORESTART; + unsigned long lflags; + struct list_head *entry, *tmp; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { + struct kbase_jd_atom *katom = list_entry( + entry, struct kbase_jd_atom, dep_item[0]); + + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == + BASE_JD_REQ_SOFT_EVENT_WAIT) { + s64 elapsed_time = + ktime_to_ms(ktime_sub(cur_time, + katom->start_timestamp)); + if (elapsed_time > (s64)timeout_ms) { + /* Take it out of the list to ensure that it + * will be cancelled in all cases */ + list_del(&katom->dep_item[0]); + + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + INIT_WORK(&katom->work, + kbasep_soft_event_complete_job); + queue_work(kctx->jctx.job_done_wq, + &katom->work); + } else { + restarting = HRTIMER_RESTART; + } + } + } + + if (restarting) + hrtimer_add_expires(timer, HR_TIMER_DELAY_MSEC(timeout_ms)); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); + + return restarting; +} + +static int kbasep_soft_event_wait(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + ktime_t remaining; + unsigned char status; + + /* The status of this soft-job is stored in jc */ + if (kbasep_read_soft_event_status(kctx, katom->jc, &status) != 0) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + return 0; + } + + if (status == BASE_JD_SOFT_EVENT_SET) + return 0; /* Event already set, nothing to do */ + + /* Record the start time of this atom so we could cancel it at + * the right time */ + katom->start_timestamp = ktime_get(); + + /* Add the atom to the waiting list before the timer is + * (re)started to make sure that it gets processed */ + kbasep_add_waiting_soft_job(katom); + + /* Schedule cancellation of this atom after a period if it is + * not active */ + remaining = hrtimer_get_remaining(&kctx->soft_event_timeout); + if (remaining.tv64 <= 0) { + int timeout_ms = atomic_read( + &kctx->kbdev->js_data.soft_event_timeout_ms); + hrtimer_start(&kctx->soft_event_timeout, + HR_TIMER_DELAY_MSEC((u64)timeout_ms), + HRTIMER_MODE_REL); + } + + return 1; +} + +static void kbasep_soft_event_update(struct kbase_jd_atom *katom, + unsigned char new_status) +{ + /* Complete jobs waiting on the same event */ + struct kbase_context *kctx = katom->kctx; + + if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + return; + } + + if (new_status == BASE_JD_SOFT_EVENT_SET) + kbasep_complete_triggered_soft_events(kctx, katom->jc); +} + +static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) +{ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(katom->kctx->kbdev); +} + +struct kbase_debug_copy_buffer { + u64 size; + struct page **pages; + int nr_pages; + u64 offset; +}; + +static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers = + (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc; + unsigned int i; + unsigned int nr = katom->nr_extres; + + if (!buffers) + return; + + for (i = 0; i < nr; i++) { + int p; + + if (!buffers[i].pages) + break; + for (p = 0; p < buffers[i].nr_pages; p++) { + struct page *pg = buffers[i].pages[p]; + + if (pg) + put_page(pg); + } + kfree(buffers[i].pages); + } + kfree(buffers); + + katom->jc = 0; +} + +static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers; + struct base_jd_debug_copy_buffer *user_buffers = NULL; + unsigned int i; + unsigned int nr = katom->nr_extres; + int ret = 0; + void __user *user_structs = (void __user *)(uintptr_t)katom->jc; + + if (!user_structs) + return -EINVAL; + + buffers = kmalloc_array(nr, sizeof(*buffers), GFP_KERNEL); + if (!buffers) { + ret = -ENOMEM; + katom->jc = 0; + goto out_cleanup; + } + katom->jc = (u64)(uintptr_t)buffers; + + user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL); + + if (!user_buffers) { + ret = -ENOMEM; + goto out_cleanup; + } + + if (copy_from_user(user_buffers, user_structs, + sizeof(*user_buffers)*nr)) { + ret = -EINVAL; + goto out_cleanup; + } + + down_read(¤t->mm->mmap_sem); + for (i = 0; i < nr; i++) { + u64 addr = user_buffers[i].address; + u64 page_addr = addr & PAGE_MASK; + u64 end_page_addr = addr + user_buffers[i].size - 1; + u64 last_page_addr = end_page_addr & PAGE_MASK; + int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1; + int pinned_pages; + + if (!user_buffers[i].address) { + memset(&buffers[i], 0, + sizeof(struct kbase_debug_copy_buffer)); + continue; + } + + buffers[i].nr_pages = nr_pages; + buffers[i].offset = addr & ~PAGE_MASK; + buffers[i].size = user_buffers[i].size; + + buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *), + GFP_KERNEL); + if (!buffers[i].pages) { + ret = -ENOMEM; + goto out_unlock; + } + + pinned_pages = get_user_pages(current, current->mm, page_addr, + nr_pages, + 1, /* Write */ + 0, /* No force */ + buffers[i].pages, + NULL); + if (pinned_pages < 0) { + ret = pinned_pages; + goto out_unlock; + } + if (pinned_pages != nr_pages) { + ret = -EINVAL; + goto out_unlock; + } + } + up_read(¤t->mm->mmap_sem); + + kfree(user_buffers); + + return ret; + +out_unlock: + up_read(¤t->mm->mmap_sem); + +out_cleanup: + kfree(buffers); + kfree(user_buffers); + + /* Frees allocated memory for kbase_debug_copy_job struct, including + * members, and sets jc to 0 */ + kbase_debug_copy_finish(katom); + + return ret; +} + +static int kbase_debug_copy(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers = + (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc; + unsigned int i; + + for (i = 0; i < katom->nr_extres; i++) { + u64 offset = buffers[i].offset; + u64 buffer_space = buffers[i].size; + int p; + + for (p = 0; p < buffers[i].nr_pages; p++) { + struct page *pg = buffers[i].pages[p]; + void *kpage = kmap(pg); + u64 page_space = PAGE_SIZE-offset; + u64 space; + + if (page_space <= buffer_space) + space = page_space; + else + space = buffer_space; + + /* Temporary - GPUCORE-1843 covers the implementation + * of the actual copying. */ + memset(kpage+offset, 0x4B, space); + + if (!PageReserved(pg)) + SetPageDirty(pg); + + kunmap(pg); + offset = 0; + buffer_space -= space; + } + } + + return 0; +} + +static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) +{ + __user void *data = (__user void *)(uintptr_t) katom->jc; + struct base_jit_alloc_info *info; + struct kbase_context *kctx = katom->kctx; + int ret; + + /* Fail the job if there is no info structure */ + if (!data) { + ret = -EINVAL; + goto fail; + } + + /* Copy the information for safe access and future storage */ + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto fail; + } + + if (copy_from_user(info, data, sizeof(*info)) != 0) { + ret = -EINVAL; + goto free_info; + } + + /* If the ID is zero or is in use then fail the job */ + if ((info->id == 0) || (kctx->jit_alloc[info->id])) { + ret = -EINVAL; + goto free_info; + } + + /* Set the jit_alloc to a non-zero value so we know the ID is in use */ + kctx->jit_alloc[info->id] = (struct kbase_va_region *) -1; + + /* Sanity check that the PA fits within the VA */ + if (info->va_pages < info->commit_pages) { + ret = -EINVAL; + goto free_info; + } + + /* Ensure the GPU address is correctly aligned */ + if ((info->gpu_alloc_addr & 0x7) != 0) { + ret = -EINVAL; + goto free_info; + } + + /* Replace the user pointer with our kernel allocated info structure */ + katom->jc = (u64)(uintptr_t) info; + + /* + * Note: + * The provided info->gpu_alloc_addr isn't validated here as + * userland can cache allocations which means that even + * though the region is valid it doesn't represent the + * same thing it used to. + * + * Complete validation of va_pages, commit_pages and extent + * isn't done here as it will be done during the call to + * kbase_mem_alloc. + */ + return 0; + +free_info: + kfree(info); +fail: + katom->jc = 0; + return ret; +} + +static void kbase_jit_allocate_process(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct base_jit_alloc_info *info; + struct kbase_va_region *reg; + struct kbase_vmap_struct mapping; + u64 *ptr; + + info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc; + + /* Create a JIT allocation */ + reg = kbase_jit_allocate(kctx, info); + if (!reg) { + katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; + return; + } + + /* + * Write the address of the JIT allocation to the user provided + * GPU allocation. + */ + ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), + &mapping); + if (!ptr) { + /* + * Leave the allocation "live" as the JIT free jit will be + * submitted anyway. + */ + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return; + } + + *ptr = reg->start_pfn << PAGE_SHIFT; + kbase_vunmap(kctx, &mapping); + + katom->event_code = BASE_JD_EVENT_DONE; + + /* + * Bind it to the user provided ID. Do this last so we can check for + * the JIT free racing this JIT alloc job. + */ + kctx->jit_alloc[info->id] = reg; +} + +static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom) +{ + struct base_jit_alloc_info *info; + + info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc; + /* Free the info structure */ + kfree(info); +} + +static void kbase_jit_free_process(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + u8 id = (u8) katom->jc; + + /* + * If the ID is zero or it is not in use yet then fail the job. + */ + if ((id == 0) || (kctx->jit_alloc[id] == NULL)) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return; + } + + /* + * If the ID is valid but the allocation request failed still succeed + * this soft job but don't try and free the allocation. + */ + if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1) + kbase_jit_free(kctx, kctx->jit_alloc[id]); + + kctx->jit_alloc[id] = NULL; +} + +static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) +{ + __user struct base_external_resource_list *user_ext_res; + struct base_external_resource_list *ext_res; + u64 count = 0; + size_t copy_size; + int ret; + + user_ext_res = (__user struct base_external_resource_list *) + (uintptr_t) katom->jc; + + /* Fail the job if there is no info structure */ + if (!user_ext_res) { + ret = -EINVAL; + goto fail; + } + + if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) { + ret = -EINVAL; + goto fail; + } + + /* Is the number of external resources in range? */ + if (!count || count > BASE_EXT_RES_COUNT_MAX) { + ret = -EINVAL; + goto fail; + } + + /* Copy the information for safe access and future storage */ + copy_size = sizeof(*ext_res); + copy_size += sizeof(struct base_external_resource) * (count - 1); + ext_res = kzalloc(copy_size, GFP_KERNEL); + if (!ext_res) { + ret = -ENOMEM; + goto fail; + } + + if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) { + ret = -EINVAL; + goto free_info; + } + + /* + * Overwrite the count with the first value incase it was changed + * after the fact. + */ + ext_res->count = count; + + /* + * Replace the user pointer with our kernel allocated + * ext_res structure. + */ + katom->jc = (u64)(uintptr_t) ext_res; + + return 0; + +free_info: + kfree(ext_res); +fail: + return ret; +} + +static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) +{ + struct base_external_resource_list *ext_res; + int i; + bool failed = false; + + ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc; + if (!ext_res) + goto failed_jc; + + kbase_gpu_vm_lock(katom->kctx); + + for (i = 0; i < ext_res->count; i++) { + u64 gpu_addr; + + gpu_addr = ext_res->ext_res[i].ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE; + if (map) { + if (!kbase_sticky_resource_acquire(katom->kctx, + gpu_addr)) + goto failed_loop; + } else + if (!kbase_sticky_resource_release(katom->kctx, NULL, + gpu_addr, false)) + failed = true; + } + + /* + * In the case of unmap we continue unmapping other resources in the + * case of failure but will always report failure if _any_ unmap + * request fails. + */ + if (failed) + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + else + katom->event_code = BASE_JD_EVENT_DONE; + + kbase_gpu_vm_unlock(katom->kctx); + + return; + +failed_loop: + while (--i > 0) { + u64 gpu_addr; + + gpu_addr = ext_res->ext_res[i].ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE; + + kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr, + false); + } + + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_vm_unlock(katom->kctx); + +failed_jc: + return; +} + +static void kbase_ext_res_finish(struct kbase_jd_atom *katom) +{ + struct base_external_resource_list *ext_res; + + ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc; + /* Free the info structure */ + kfree(ext_res); +} + int kbase_process_soft_job(struct kbase_jd_atom *katom) { switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { @@ -284,6 +970,28 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) #endif /* CONFIG_SYNC */ case BASE_JD_REQ_SOFT_REPLAY: return kbase_replay_process(katom); + case BASE_JD_REQ_SOFT_EVENT_WAIT: + return kbasep_soft_event_wait(katom); + case BASE_JD_REQ_SOFT_EVENT_SET: + kbasep_soft_event_update(katom, BASE_JD_SOFT_EVENT_SET); + break; + case BASE_JD_REQ_SOFT_EVENT_RESET: + kbasep_soft_event_update(katom, BASE_JD_SOFT_EVENT_RESET); + break; + case BASE_JD_REQ_SOFT_DEBUG_COPY: + return kbase_debug_copy(katom); + case BASE_JD_REQ_SOFT_JIT_ALLOC: + kbase_jit_allocate_process(katom); + break; + case BASE_JD_REQ_SOFT_JIT_FREE: + kbase_jit_free_process(katom); + break; + case BASE_JD_REQ_SOFT_EXT_RES_MAP: + kbase_ext_res_process(katom, true); + break; + case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: + kbase_ext_res_process(katom, false); + break; } /* Atom is complete */ @@ -298,6 +1006,9 @@ void kbase_cancel_soft_job(struct kbase_jd_atom *katom) kbase_fence_cancel_wait(katom); break; #endif + case BASE_JD_REQ_SOFT_EVENT_WAIT: + kbasep_soft_event_cancel_job(katom); + break; default: /* This soft-job doesn't support cancellation! */ KBASE_DEBUG_ASSERT(0); @@ -355,8 +1066,23 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) } break; #endif /* CONFIG_SYNC */ + case BASE_JD_REQ_SOFT_JIT_ALLOC: + return kbase_jit_allocate_prepare(katom); case BASE_JD_REQ_SOFT_REPLAY: + case BASE_JD_REQ_SOFT_JIT_FREE: break; + case BASE_JD_REQ_SOFT_EVENT_WAIT: + case BASE_JD_REQ_SOFT_EVENT_SET: + case BASE_JD_REQ_SOFT_EVENT_RESET: + if (katom->jc == 0) + return -EINVAL; + break; + case BASE_JD_REQ_SOFT_DEBUG_COPY: + return kbase_debug_copy_prepare(katom); + case BASE_JD_REQ_SOFT_EXT_RES_MAP: + return kbase_ext_res_prepare(katom); + case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: + return kbase_ext_res_prepare(katom); default: /* Unsupported soft-job */ return -EINVAL; @@ -386,6 +1112,19 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom) katom->fence = NULL; break; #endif /* CONFIG_SYNC */ + + case BASE_JD_REQ_SOFT_DEBUG_COPY: + kbase_debug_copy_finish(katom); + break; + case BASE_JD_REQ_SOFT_JIT_ALLOC: + kbase_jit_allocate_finish(katom); + break; + case BASE_JD_REQ_SOFT_EXT_RES_MAP: + kbase_ext_res_finish(katom); + break; + case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: + kbase_ext_res_finish(katom); + break; } } @@ -426,12 +1165,9 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) kbase_finish_soft_job(katom_iter); resched |= jd_done_nolock(katom_iter, NULL); } else { - /* The job has not completed */ KBASE_DEBUG_ASSERT((katom_iter->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_SOFT_REPLAY); - list_add_tail(&katom_iter->dep_item[0], - &kctx->waiting_soft_jobs); } mutex_unlock(&kctx->jctx.lock); diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c index 99428d1..e41efb8 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -29,11 +29,12 @@ #include #include #include +#include /*****************************************************************************/ -/* The version of timeline stream. */ -#define KBASEP_TLSTREAM_VERSION 1 +/* The version of swtrace protocol used in timeline stream. */ +#define SWTRACE_VERSION 3 /* The maximum expected length of string in tracepoint descriptor. */ #define STRLEN_MAX 64 /* bytes */ @@ -41,14 +42,11 @@ /* The number of nanoseconds in a second. */ #define NSECS_IN_SEC 1000000000ull /* ns */ -/* The number of nanoseconds to wait before autoflushing the stream. */ -#define AUTOFLUSH_TIMEOUT (2ull * NSECS_IN_SEC) /* ns */ - /* The period of autoflush checker execution in milliseconds. */ #define AUTOFLUSH_INTERVAL 1000 /* ms */ /* The maximum size of a single packet used by timeline. */ -#define PACKET_SIZE 2048 /* bytes */ +#define PACKET_SIZE 4096 /* bytes */ /* The number of packets used by one timeline stream. */ #define PACKET_COUNT 16 @@ -120,7 +118,7 @@ enum tl_packet_type { }; /* Message ids of trace events that are recorded in the timeline stream. */ -enum tl_msg_id { +enum tl_msg_id_obj { /* Timeline object events. */ KBASE_TL_NEW_CTX, KBASE_TL_NEW_GPU, @@ -131,25 +129,30 @@ enum tl_msg_id { KBASE_TL_DEL_ATOM, KBASE_TL_LIFELINK_LPU_GPU, KBASE_TL_LIFELINK_AS_GPU, - KBASE_TL_RET_GPU_CTX, + KBASE_TL_RET_CTX_LPU, KBASE_TL_RET_ATOM_CTX, KBASE_TL_RET_ATOM_LPU, - KBASE_TL_NRET_GPU_CTX, + KBASE_TL_NRET_CTX_LPU, KBASE_TL_NRET_ATOM_CTX, KBASE_TL_NRET_ATOM_LPU, KBASE_TL_RET_AS_CTX, KBASE_TL_NRET_AS_CTX, KBASE_TL_RET_ATOM_AS, KBASE_TL_NRET_ATOM_AS, + KBASE_TL_DEP_ATOM_ATOM, KBASE_TL_ATTRIB_ATOM_CONFIG, KBASE_TL_ATTRIB_AS_CONFIG, - /* Job dump specific events (part of timeline stream). */ - KBASE_JD_GPU_SOFT_RESET, + /* Job dump specific events. */ + KBASE_JD_GPU_SOFT_RESET +}; - /* Timeline non-object events. */ +/* Message ids of trace events that are recorded in the auxiliary stream. */ +enum tl_msg_id_aux { KBASE_AUX_PM_STATE, + KBASE_AUX_ISSUE_JOB_SOFTSTOP, KBASE_AUX_JOB_SOFTSTOP, + KBASE_AUX_JOB_SOFTSTOP_EX, KBASE_AUX_PAGEFAULT, KBASE_AUX_PAGESALLOC }; @@ -163,7 +166,7 @@ enum tl_msg_id { * @wbi: write buffer index * @rbi: read buffer index * @numbered: if non-zero stream's packets are sequentially numbered - * @last_write_time: timestamp indicating last write + * @autoflush_counter: counter tracking stream's autoflush state * * This structure holds information needed to construct proper packets in the * timeline stream. Each message in sequence must bear timestamp that is greater @@ -174,6 +177,11 @@ enum tl_msg_id { * Each packet in timeline body stream has sequence number embedded (this value * must increment monotonically and is used by packets receiver to discover * buffer overflows. + * Autoflush counter is set to negative number when there is no data pending + * for flush and it is set to zero on every update of the buffer. Autoflush + * timer will increment the counter by one on every expiry. In case there will + * be no activity on the buffer during two consecutive timer expiries, stream + * buffer will be flushed. */ struct tl_stream { spinlock_t lock; @@ -187,7 +195,7 @@ struct tl_stream { atomic_t rbi; int numbered; - u64 last_write_time; + atomic_t autoflush_counter; }; /** @@ -238,9 +246,6 @@ static atomic_t autoflush_timer_active; * streams at any given time. */ static DEFINE_MUTEX(tl_reader_lock); -/* Indicator of whether the timeline stream file descriptor is already used. */ -static atomic_t tlstream_busy = {0}; - /* Timeline stream event queue. */ static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue); @@ -266,8 +271,8 @@ static const struct tp_desc tp_desc_obj[] = { KBASE_TL_NEW_CTX, __stringify(KBASE_TL_NEW_CTX), "object ctx is created", - "@pI", - "ctx,ctx_nr" + "@pII", + "ctx,ctx_nr,tgid" }, { KBASE_TL_NEW_GPU, @@ -326,11 +331,11 @@ static const struct tp_desc tp_desc_obj[] = { "address_space,gpu" }, { - KBASE_TL_RET_GPU_CTX, - __stringify(KBASE_TL_RET_GPU_CTX), - "gpu is retained by context", + KBASE_TL_RET_CTX_LPU, + __stringify(KBASE_TL_RET_CTX_LPU), + "context is retained by lpu", "@pp", - "gpu,ctx" + "ctx,lpu" }, { KBASE_TL_RET_ATOM_CTX, @@ -343,22 +348,22 @@ static const struct tp_desc tp_desc_obj[] = { KBASE_TL_RET_ATOM_LPU, __stringify(KBASE_TL_RET_ATOM_LPU), "atom is retained by lpu", - "@pp", - "atom,lpu" + "@pps", + "atom,lpu,attrib_match_list" }, { - KBASE_TL_NRET_GPU_CTX, - __stringify(KBASE_TL_NRET_GPU_CTX), - "gpu is released by context", + KBASE_TL_NRET_CTX_LPU, + __stringify(KBASE_TL_NRET_CTX_LPU), + "context is released by lpu", "@pp", - "gpu,ctx" + "ctx,lpu" }, { KBASE_TL_NRET_ATOM_CTX, __stringify(KBASE_TL_NRET_ATOM_CTX), "atom is released by context", "@pp", - "atom,context" + "atom,ctx" }, { KBASE_TL_NRET_ATOM_LPU, @@ -395,6 +400,13 @@ static const struct tp_desc tp_desc_obj[] = { "@pp", "atom,address_space" }, + { + KBASE_TL_DEP_ATOM_ATOM, + __stringify(KBASE_TL_DEP_ATOM_ATOM), + "atom2 depends on atom1", + "@pp", + "atom1,atom2" + }, { KBASE_TL_ATTRIB_ATOM_CONFIG, __stringify(KBASE_TL_ATTRIB_ATOM_CONFIG), @@ -427,6 +439,13 @@ static const struct tp_desc tp_desc_aux[] = { "@IL", "core_type,core_state_bitset" }, + { + KBASE_AUX_ISSUE_JOB_SOFTSTOP, + __stringify(KBASE_AUX_ISSUE_JOB_SOFTSTOP), + "Issuing job soft stop", + "@p", + "atom" + }, { KBASE_AUX_JOB_SOFTSTOP, __stringify(KBASE_AUX_JOB_SOFTSTOP), @@ -434,19 +453,26 @@ static const struct tp_desc tp_desc_aux[] = { "@I", "tag_id" }, + { + KBASE_AUX_JOB_SOFTSTOP_EX, + __stringify(KBASE_AUX_JOB_SOFTSTOP_EX), + "Job soft stop, more details", + "@pI", + "atom,job_type" + }, { KBASE_AUX_PAGEFAULT, __stringify(KBASE_AUX_PAGEFAULT), "Page fault", - "@II", - "as_id,page_cnt" + "@IL", + "ctx_nr,page_cnt_change" }, { KBASE_AUX_PAGESALLOC, __stringify(KBASE_AUX_PAGESALLOC), "Total alloc pages change", - "@l", - "page_cnt_change" + "@IL", + "ctx_nr,page_cnt" } }; @@ -460,6 +486,11 @@ static atomic_t tlstream_bytes_generated = {0}; /*****************************************************************************/ +/* Indicator of whether the timeline stream file descriptor is used. */ +atomic_t kbase_tlstream_enabled = {0}; + +/*****************************************************************************/ + /** * kbasep_tlstream_get_timestamp - return timestamp * @@ -763,6 +794,9 @@ static size_t kbasep_tlstream_msgbuf_submit( unsigned int rb_idx_raw = atomic_read(&stream->rbi); unsigned int wb_idx = wb_idx_raw % PACKET_COUNT; + /* Set stream as flushed. */ + atomic_set(&stream->autoflush_counter, -1); + kbasep_tlstream_packet_header_update( stream->buffer[wb_idx].data, wb_size - PACKET_HEADER_SIZE); @@ -811,14 +845,14 @@ static size_t kbasep_tlstream_msgbuf_submit( * * Return: pointer to the buffer where message can be stored * - * Warning: Stream must be relased with kbasep_tlstream_msgbuf_release(). + * Warning: Stream must be released with kbasep_tlstream_msgbuf_release(). * Only atomic operations are allowed while stream is locked * (i.e. do not use any operation that may sleep). */ static char *kbasep_tlstream_msgbuf_acquire( enum tl_stream_type stream_type, size_t msg_size, - unsigned long *flags) + unsigned long *flags) __acquires(&stream->lock) { struct tl_stream *stream; unsigned int wb_idx_raw; @@ -865,14 +899,16 @@ static char *kbasep_tlstream_msgbuf_acquire( */ static void kbasep_tlstream_msgbuf_release( enum tl_stream_type stream_type, - unsigned long flags) + unsigned long flags) __releases(&stream->lock) { struct tl_stream *stream; KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); stream = tl_stream[stream_type]; - stream->last_write_time = kbasep_tlstream_get_timestamp(); + + /* Mark stream as containing unflushed data. */ + atomic_set(&stream->autoflush_counter, 0); spin_unlock_irqrestore(&stream->lock, flags); } @@ -921,7 +957,6 @@ static void kbasep_tlstream_flush_stream(enum tl_stream_type stype) */ static void kbasep_tlstream_autoflush_timer_callback(unsigned long data) { - u64 timestamp = kbasep_tlstream_get_timestamp(); enum tl_stream_type stype; int rcode; @@ -935,6 +970,22 @@ static void kbasep_tlstream_autoflush_timer_callback(unsigned long data) size_t wb_size; size_t min_size = PACKET_HEADER_SIZE; + int af_cnt = atomic_read(&stream->autoflush_counter); + + /* Check if stream contain unflushed data. */ + if (0 > af_cnt) + continue; + + /* Check if stream should be flushed now. */ + if (af_cnt != atomic_cmpxchg( + &stream->autoflush_counter, + af_cnt, + af_cnt + 1)) + continue; + if (!af_cnt) + continue; + + /* Autoflush this stream. */ if (stream->numbered) min_size += PACKET_NUMBER_SIZE; @@ -944,16 +995,12 @@ static void kbasep_tlstream_autoflush_timer_callback(unsigned long data) wb_idx = wb_idx_raw % PACKET_COUNT; wb_size = atomic_read(&stream->buffer[wb_idx].size); - if ( - (wb_size > min_size) && - ( - timestamp - stream->last_write_time > - AUTOFLUSH_TIMEOUT)) { - + if (wb_size > min_size) { wb_size = kbasep_tlstream_msgbuf_submit( stream, wb_idx_raw, wb_size); wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; - atomic_set(&stream->buffer[wb_idx].size, wb_size); + atomic_set(&stream->buffer[wb_idx].size, + wb_size); } spin_unlock_irqrestore(&stream->lock, flags); } @@ -1071,8 +1118,10 @@ static ssize_t kbasep_tlstream_read( break; } - /* Verify if there was no overflow in selected stream. Make sure - * that if incorrect size was used we will know about it. */ + /* If the rbi still points to the packet we just processed + * then there was no overflow so we add the copied size to + * copy_len and move rbi on to the next packet + */ smp_rmb(); if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) { copy_len += rb_size; @@ -1122,7 +1171,12 @@ static int kbasep_tlstream_release(struct inode *inode, struct file *filp) KBASE_DEBUG_ASSERT(filp); CSTD_UNUSED(inode); CSTD_UNUSED(filp); - atomic_set(&tlstream_busy, 0); + + /* Stop autoflush timer before releasing access to streams. */ + atomic_set(&autoflush_timer_active, 0); + del_timer_sync(&autoflush_timer); + + atomic_set(&kbase_tlstream_enabled, 0); return 0; } @@ -1140,7 +1194,7 @@ static void kbasep_tlstream_timeline_header( const struct tp_desc *tp_desc, u32 tp_count) { - const u8 tv = KBASEP_TLSTREAM_VERSION; /* tlstream version */ + const u8 tv = SWTRACE_VERSION; /* protocol version */ const u8 ps = sizeof(void *); /* pointer size */ size_t msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count); char *buffer; @@ -1211,7 +1265,6 @@ static void kbasep_tlstream_timeline_header( int kbase_tlstream_init(void) { enum tl_stream_type i; - int rcode; /* Prepare stream structures. */ for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { @@ -1229,14 +1282,10 @@ int kbase_tlstream_init(void) } /* Initialize autoflush timer. */ - atomic_set(&autoflush_timer_active, 1); + atomic_set(&autoflush_timer_active, 0); setup_timer(&autoflush_timer, kbasep_tlstream_autoflush_timer_callback, 0); - rcode = mod_timer( - &autoflush_timer, - jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); - CSTD_UNUSED(rcode); return 0; } @@ -1245,9 +1294,6 @@ void kbase_tlstream_term(void) { enum tl_stream_type i; - atomic_set(&autoflush_timer_active, 0); - del_timer_sync(&autoflush_timer); - for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { kbasep_timeline_stream_term(tl_stream[i]); kfree(tl_stream[i]); @@ -1256,14 +1302,16 @@ void kbase_tlstream_term(void) int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd) { - if (0 == atomic_cmpxchg(&tlstream_busy, 0, 1)) { + if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, 1)) { + int rcode; + *fd = anon_inode_getfd( "[mali_tlstream]", &kbasep_tlstream_fops, kctx, O_RDONLY | O_CLOEXEC); if (0 > *fd) { - atomic_set(&tlstream_busy, 0); + atomic_set(&kbase_tlstream_enabled, 0); return *fd; } @@ -1282,6 +1330,14 @@ int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd) TL_STREAM_TYPE_AUX_HEADER, tp_desc_aux, ARRAY_SIZE(tp_desc_aux)); + + /* Start autoflush timer. */ + atomic_set(&autoflush_timer_active, 1); + rcode = mod_timer( + &autoflush_timer, + jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); + CSTD_UNUSED(rcode); + } else { *fd = -EBUSY; } @@ -1317,11 +1373,12 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated) /*****************************************************************************/ -void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr) +void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid) { const u32 msg_id = KBASE_TL_NEW_CTX; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr); + sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) + + sizeof(tgid); unsigned long flags; char *buffer; size_t pos = 0; @@ -1337,12 +1394,15 @@ void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr) buffer, pos, &context, sizeof(context)); pos = kbasep_tlstream_write_bytes( buffer, pos, &nr, sizeof(nr)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &tgid, sizeof(tgid)); + KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) +void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) { const u32 msg_id = KBASE_TL_NEW_GPU; const size_t msg_size = @@ -1370,7 +1430,7 @@ void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) +void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) { const u32 msg_id = KBASE_TL_NEW_LPU; const size_t msg_size = @@ -1398,7 +1458,7 @@ void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) +void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) { const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU; const size_t msg_size = @@ -1423,7 +1483,7 @@ void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void kbase_tlstream_tl_summary_new_as(void *as, u32 nr) +void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr) { const u32 msg_id = KBASE_TL_NEW_AS; const size_t msg_size = @@ -1448,7 +1508,7 @@ void kbase_tlstream_tl_summary_new_as(void *as, u32 nr) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) +void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) { const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU; const size_t msg_size = @@ -1475,11 +1535,12 @@ void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) /*****************************************************************************/ -void kbase_tlstream_tl_new_ctx(void *context, u32 nr) +void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid) { const u32 msg_id = KBASE_TL_NEW_CTX; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr); + sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) + + sizeof(tgid); unsigned long flags; char *buffer; size_t pos = 0; @@ -1495,12 +1556,14 @@ void kbase_tlstream_tl_new_ctx(void *context, u32 nr) buffer, pos, &context, sizeof(context)); pos = kbasep_tlstream_write_bytes( buffer, pos, &nr, sizeof(nr)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &tgid, sizeof(tgid)); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_new_atom(void *atom, u32 nr) +void __kbase_tlstream_tl_new_atom(void *atom, u32 nr) { const u32 msg_id = KBASE_TL_NEW_ATOM; const size_t msg_size = @@ -1525,7 +1588,7 @@ void kbase_tlstream_tl_new_atom(void *atom, u32 nr) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_del_ctx(void *context) +void __kbase_tlstream_tl_del_ctx(void *context) { const u32 msg_id = KBASE_TL_DEL_CTX; const size_t msg_size = @@ -1548,7 +1611,7 @@ void kbase_tlstream_tl_del_ctx(void *context) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_del_atom(void *atom) +void __kbase_tlstream_tl_del_atom(void *atom) { const u32 msg_id = KBASE_TL_DEL_ATOM; const size_t msg_size = @@ -1571,11 +1634,11 @@ void kbase_tlstream_tl_del_atom(void *atom) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context) +void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) { - const u32 msg_id = KBASE_TL_RET_GPU_CTX; + const u32 msg_id = KBASE_TL_RET_CTX_LPU; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(context); + sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu); unsigned long flags; char *buffer; size_t pos = 0; @@ -1587,16 +1650,16 @@ void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context) pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &gpu, sizeof(gpu)); pos = kbasep_tlstream_write_bytes( buffer, pos, &context, sizeof(context)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &lpu, sizeof(lpu)); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) +void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) { const u32 msg_id = KBASE_TL_RET_ATOM_CTX; const size_t msg_size = @@ -1621,11 +1684,15 @@ void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_atom_lpu(void *atom, void *lpu) +void __kbase_tlstream_tl_ret_atom_lpu( + void *atom, void *lpu, const char *attrib_match_list) { const u32 msg_id = KBASE_TL_RET_ATOM_LPU; + const size_t msg_s0 = sizeof(u32) + sizeof(char) + + strnlen(attrib_match_list, STRLEN_MAX); const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu); + sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + sizeof(lpu) + msg_s0; unsigned long flags; char *buffer; size_t pos = 0; @@ -1641,16 +1708,18 @@ void kbase_tlstream_tl_ret_atom_lpu(void *atom, void *lpu) buffer, pos, &atom, sizeof(atom)); pos = kbasep_tlstream_write_bytes( buffer, pos, &lpu, sizeof(lpu)); + pos = kbasep_tlstream_write_string( + buffer, pos, attrib_match_list, msg_s0); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context) +void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) { - const u32 msg_id = KBASE_TL_NRET_GPU_CTX; + const u32 msg_id = KBASE_TL_NRET_CTX_LPU; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(context); + sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu); unsigned long flags; char *buffer; size_t pos = 0; @@ -1662,16 +1731,16 @@ void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context) pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &gpu, sizeof(gpu)); pos = kbasep_tlstream_write_bytes( buffer, pos, &context, sizeof(context)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &lpu, sizeof(lpu)); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) +void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) { const u32 msg_id = KBASE_TL_NRET_ATOM_CTX; const size_t msg_size = @@ -1696,7 +1765,32 @@ void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) +void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2) +{ + const u32 msg_id = KBASE_TL_DEP_ATOM_ATOM; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom1, sizeof(atom1)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom2, sizeof(atom2)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) { const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; const size_t msg_size = @@ -1721,7 +1815,7 @@ void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) +void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) { const u32 msg_id = KBASE_TL_RET_AS_CTX; const size_t msg_size = @@ -1746,7 +1840,7 @@ void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) +void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) { const u32 msg_id = KBASE_TL_NRET_AS_CTX; const size_t msg_size = @@ -1771,7 +1865,7 @@ void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_ret_atom_as(void *atom, void *as) +void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as) { const u32 msg_id = KBASE_TL_RET_ATOM_AS; const size_t msg_size = @@ -1796,7 +1890,7 @@ void kbase_tlstream_tl_ret_atom_as(void *atom, void *as) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_nret_atom_as(void *atom, void *as) +void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as) { const u32 msg_id = KBASE_TL_NRET_ATOM_AS; const size_t msg_size = @@ -1821,7 +1915,7 @@ void kbase_tlstream_tl_nret_atom_as(void *atom, void *as) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_attrib_atom_config( +void __kbase_tlstream_tl_attrib_atom_config( void *atom, u64 jd, u64 affinity, u32 config) { const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG; @@ -1852,7 +1946,7 @@ void kbase_tlstream_tl_attrib_atom_config( kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_tl_attrib_as_config( +void __kbase_tlstream_tl_attrib_as_config( void *as, u64 transtab, u64 memattr, u64 transcfg) { const u32 msg_id = KBASE_TL_ATTRIB_AS_CONFIG; @@ -1883,7 +1977,7 @@ void kbase_tlstream_tl_attrib_as_config( kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void kbase_tlstream_jd_gpu_soft_reset(void *gpu) +void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) { const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; const size_t msg_size = @@ -1908,7 +2002,7 @@ void kbase_tlstream_jd_gpu_soft_reset(void *gpu) /*****************************************************************************/ -void kbase_tlstream_aux_pm_state(u32 core_type, u64 state) +void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state) { const u32 msg_id = KBASE_AUX_PM_STATE; const size_t msg_size = @@ -1933,7 +2027,28 @@ void kbase_tlstream_aux_pm_state(u32 core_type, u64 state) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void kbase_tlstream_aux_job_softstop(u32 js_id) +void __kbase_tlstream_aux_issue_job_softstop(void *katom) +{ + const u32 msg_id = KBASE_AUX_ISSUE_JOB_SOFTSTOP; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(katom); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_AUX, msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} + +void __kbase_tlstream_aux_job_softstop(u32 js_id) { const u32 msg_id = KBASE_AUX_JOB_SOFTSTOP; const size_t msg_size = @@ -1955,12 +2070,17 @@ void kbase_tlstream_aux_job_softstop(u32 js_id) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void kbase_tlstream_aux_pagefault(u32 mmu_as, u32 page_count) +/** + * __kbase_tlstream_aux_job_softstop_ex_record - record the trace point + * @katom: the atom that has been soft-stopped + * @job_type: the job type + */ +static void __kbase_tlstream_aux_job_softstop_ex_record( + void *katom, u32 job_type) { - const u32 msg_id = KBASE_AUX_PAGEFAULT; + const u32 msg_id = KBASE_AUX_JOB_SOFTSTOP_EX; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(mmu_as) + - sizeof(page_count); + sizeof(msg_id) + sizeof(u64) + sizeof(katom) + sizeof(job_type); unsigned long flags; char *buffer; size_t pos = 0; @@ -1971,19 +2091,50 @@ void kbase_tlstream_aux_pagefault(u32 mmu_as, u32 page_count) pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes(buffer, pos, &mmu_as, sizeof(mmu_as)); + pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom)); pos = kbasep_tlstream_write_bytes( - buffer, pos, &page_count, sizeof(page_count)); + buffer, pos, &job_type, sizeof(job_type)); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void kbase_tlstream_aux_pagesalloc(s64 page_count_change) +void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom) { - const u32 msg_id = KBASE_AUX_PAGESALLOC; + struct kbase_context *kctx = katom->kctx; + u64 jd = katom->jc; + + while (jd != 0) { + struct job_descriptor_header *job; + struct kbase_vmap_struct map; + + job = kbase_vmap(kctx, jd, sizeof(*job), &map); + if (!job) { + dev_err(kctx->kbdev->dev, + "__kbase_tlstream_aux_job_softstop_ex: failed to map job descriptor 0x%llx for atom 0x%p\n", + jd, (void *)katom); + break; + } + if (job->exception_status != BASE_JD_EVENT_STOPPED) { + kbase_vunmap(kctx, &map); + break; + } + + __kbase_tlstream_aux_job_softstop_ex_record( + katom, job->job_type); + + jd = job->job_descriptor_size ? + job->next_job._64 : job->next_job._32; + kbase_vunmap(kctx, &map); + } +} + +void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) +{ + const u32 msg_id = KBASE_AUX_PAGEFAULT; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(page_count_change); + sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) + + sizeof(page_count_change); unsigned long flags; char *buffer; size_t pos = 0; @@ -1994,6 +2145,7 @@ void kbase_tlstream_aux_pagesalloc(s64 page_count_change) pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr)); pos = kbasep_tlstream_write_bytes( buffer, pos, &page_count_change, sizeof(page_count_change)); @@ -2002,3 +2154,27 @@ void kbase_tlstream_aux_pagesalloc(s64 page_count_change) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } +void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count) +{ + const u32 msg_id = KBASE_AUX_PAGESALLOC; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) + + sizeof(page_count); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_AUX, msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &page_count, sizeof(page_count)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} + diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h index 3017104..6c5c596 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -102,18 +102,65 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated); /*****************************************************************************/ +void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid); +void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); +void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); +void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); +void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr); +void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu); +void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid); +void __kbase_tlstream_tl_new_atom(void *atom, u32 nr); +void __kbase_tlstream_tl_del_ctx(void *context); +void __kbase_tlstream_tl_del_atom(void *atom); +void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu); +void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); +void __kbase_tlstream_tl_ret_atom_lpu( + void *atom, void *lpu, const char *attrib_match_list); +void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu); +void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); +void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); +void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); +void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); +void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as); +void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as); +void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2); +void __kbase_tlstream_tl_attrib_atom_config( + void *atom, u64 jd, u64 affinity, u32 config); +void __kbase_tlstream_tl_attrib_as_config( + void *as, u64 transtab, u64 memattr, u64 transcfg); +void __kbase_tlstream_jd_gpu_soft_reset(void *gpu); +void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state); +void __kbase_tlstream_aux_issue_job_softstop(void *katom); +void __kbase_tlstream_aux_job_softstop(u32 js_id); +void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom); +void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); +void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count); + +extern atomic_t kbase_tlstream_enabled; + +#define __TRACE_IF_ENABLED(trace_name, ...) \ + do { \ + int enabled = atomic_read(&kbase_tlstream_enabled); \ + if (enabled) \ + __kbase_tlstream_##trace_name(__VA_ARGS__); \ + } while (0) + +/*****************************************************************************/ + /** * kbase_tlstream_tl_summary_new_ctx - create context object in timeline * summary * @context: name of the context object * @nr: context number + * @tgid: thread Group Id * * Function emits a timeline message informing about context creation. Context * is created with context number (its attribute), that can be used to link * kbase context with userspace context. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr); +#define kbase_tlstream_tl_summary_new_ctx(context, nr, tgid) \ + __TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid) /** * kbase_tlstream_tl_summary_new_gpu - create GPU object in timeline summary @@ -125,7 +172,8 @@ void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr); * created with two attributes: id and core count. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); +#define kbase_tlstream_tl_summary_new_gpu(gpu, id, core_count) \ + __TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count) /** * kbase_tlstream_tl_summary_new_lpu - create LPU object in timeline summary @@ -138,7 +186,8 @@ void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); * and function bearing information about this LPU abilities. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); +#define kbase_tlstream_tl_summary_new_lpu(lpu, nr, fn) \ + __TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn) /** * kbase_tlstream_tl_summary_lifelink_lpu_gpu - lifelink LPU object to GPU @@ -149,7 +198,8 @@ void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); * along with GPU object. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); +#define kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, gpu) \ + __TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu) /** * kbase_tlstream_tl_summary_new_as - create address space object in timeline summary @@ -161,7 +211,8 @@ void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); * address space. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_new_as(void *as, u32 nr); +#define kbase_tlstream_tl_summary_new_as(as, nr) \ + __TRACE_IF_ENABLED(tl_summary_new_as, as, nr) /** * kbase_tlstream_tl_summary_lifelink_as_gpu - lifelink address space object to GPU @@ -172,18 +223,21 @@ void kbase_tlstream_tl_summary_new_as(void *as, u32 nr); * shall be deleted along with GPU object. * This message is directed to timeline summary stream. */ -void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu); +#define kbase_tlstream_tl_summary_lifelink_as_gpu(as, gpu) \ + __TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu) /** * kbase_tlstream_tl_new_ctx - create context object in timeline * @context: name of the context object * @nr: context number + * @tgid: thread Group Id * * Function emits a timeline message informing about context creation. Context * is created with context number (its attribute), that can be used to link * kbase context with userspace context. */ -void kbase_tlstream_tl_new_ctx(void *context, u32 nr); +#define kbase_tlstream_tl_new_ctx(context, nr, tgid) \ + __TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid) /** * kbase_tlstream_tl_new_atom - create atom object in timeline @@ -194,7 +248,8 @@ void kbase_tlstream_tl_new_ctx(void *context, u32 nr); * created with atom number (its attribute) that links it with actual work * bucket id understood by hardware. */ -void kbase_tlstream_tl_new_atom(void *atom, u32 nr); +#define kbase_tlstream_tl_new_atom(atom, nr) \ + __TRACE_IF_ENABLED(tl_new_atom, atom, nr) /** * kbase_tlstream_tl_del_ctx - destroy context object in timeline @@ -203,7 +258,8 @@ void kbase_tlstream_tl_new_atom(void *atom, u32 nr); * Function emits a timeline message informing that context object ceased to * exist. */ -void kbase_tlstream_tl_del_ctx(void *context); +#define kbase_tlstream_tl_del_ctx(context) \ + __TRACE_IF_ENABLED(tl_del_ctx, context) /** * kbase_tlstream_tl_del_atom - destroy atom object in timeline @@ -212,17 +268,19 @@ void kbase_tlstream_tl_del_ctx(void *context); * Function emits a timeline message informing that atom object ceased to * exist. */ -void kbase_tlstream_tl_del_atom(void *atom); +#define kbase_tlstream_tl_del_atom(atom) \ + __TRACE_IF_ENABLED(tl_del_atom, atom) /** - * kbase_tlstream_tl_ret_gpu_ctx - retain GPU by context - * @gpu: name of the GPU object + * kbase_tlstream_tl_ret_ctx_lpu - retain context by LPU * @context: name of the context object + * @lpu: name of the Logical Processing Unit object * - * Function emits a timeline message informing that GPU object is being held - * by context and must not be deleted unless it is released. + * Function emits a timeline message informing that context is being held + * by LPU and must not be deleted unless it is released. */ -void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context); +#define kbase_tlstream_tl_ret_ctx_lpu(context, lpu) \ + __TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu) /** * kbase_tlstream_tl_ret_atom_ctx - retain atom by context @@ -232,27 +290,31 @@ void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context); * Function emits a timeline message informing that atom object is being held * by context and must not be deleted unless it is released. */ -void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); +#define kbase_tlstream_tl_ret_atom_ctx(atom, context) \ + __TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context) /** * kbase_tlstream_tl_ret_atom_lpu - retain atom by LPU - * @atom: name of the atom object - * @lpu: name of the Logical Processing Unit object + * @atom: name of the atom object + * @lpu: name of the Logical Processing Unit object + * @attrib_match_list: list containing match operator attributes * * Function emits a timeline message informing that atom object is being held * by LPU and must not be deleted unless it is released. */ -void kbase_tlstream_tl_ret_atom_lpu(void *atom, void *lpu); +#define kbase_tlstream_tl_ret_atom_lpu(atom, lpu, attrib_match_list) \ + __TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list) /** - * kbase_tlstream_tl_nret_gpu_ctx - release GPU by context - * @gpu: name of the GPU object + * kbase_tlstream_tl_nret_ctx_lpu - release context by LPU * @context: name of the context object + * @lpu: name of the Logical Processing Unit object * - * Function emits a timeline message informing that GPU object is being released - * by context. + * Function emits a timeline message informing that context is being released + * by LPU object. */ -void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context); +#define kbase_tlstream_tl_nret_ctx_lpu(context, lpu) \ + __TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu) /** * kbase_tlstream_tl_nret_atom_ctx - release atom by context @@ -262,7 +324,8 @@ void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context); * Function emits a timeline message informing that atom object is being * released by context. */ -void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); +#define kbase_tlstream_tl_nret_atom_ctx(atom, context) \ + __TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context) /** * kbase_tlstream_tl_nret_atom_lpu - release atom by LPU @@ -272,7 +335,8 @@ void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); * Function emits a timeline message informing that atom object is being * released by LPU. */ -void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); +#define kbase_tlstream_tl_nret_atom_lpu(atom, lpu) \ + __TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu) /** * kbase_tlstream_tl_ret_as_ctx - lifelink address space object to context @@ -282,7 +346,8 @@ void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); * Function emits a timeline message informing that address space object * is being held by the context object. */ -void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); +#define kbase_tlstream_tl_ret_as_ctx(as, ctx) \ + __TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx) /** * kbase_tlstream_tl_nret_as_ctx - release address space by context @@ -292,7 +357,8 @@ void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); * Function emits a timeline message informing that address space object * is being released by atom. */ -void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); +#define kbase_tlstream_tl_nret_as_ctx(as, ctx) \ + __TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx) /** * kbase_tlstream_tl_ret_atom_as - retain atom by address space @@ -302,7 +368,8 @@ void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); * Function emits a timeline message informing that atom object is being held * by address space and must not be deleted unless it is released. */ -void kbase_tlstream_tl_ret_atom_as(void *atom, void *as); +#define kbase_tlstream_tl_ret_atom_as(atom, as) \ + __TRACE_IF_ENABLED(tl_ret_atom_as, atom, as) /** * kbase_tlstream_tl_nret_atom_as - release atom by address space @@ -312,7 +379,19 @@ void kbase_tlstream_tl_ret_atom_as(void *atom, void *as); * Function emits a timeline message informing that atom object is being * released by address space. */ -void kbase_tlstream_tl_nret_atom_as(void *atom, void *as); +#define kbase_tlstream_tl_nret_atom_as(atom, as) \ + __TRACE_IF_ENABLED(tl_nret_atom_as, atom, as) + +/** + * kbase_tlstream_tl_dep_atom_atom - parent atom depends on child atom + * @atom1: name of the child atom object + * @atom2: name of the parent atom object that depends on child atom + * + * Function emits a timeline message informing that parent atom waits for + * child atom object to be completed before start its execution. + */ +#define kbase_tlstream_tl_dep_atom_atom(atom1, atom2) \ + __TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2) /** * kbase_tlstream_tl_attrib_atom_config - atom job slot attributes @@ -323,8 +402,8 @@ void kbase_tlstream_tl_nret_atom_as(void *atom, void *as); * * Function emits a timeline message containing atom attributes. */ -void kbase_tlstream_tl_attrib_atom_config( - void *atom, u64 jd, u64 affinity, u32 config); +#define kbase_tlstream_tl_attrib_atom_config(atom, jd, affinity, config) \ + __TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config) /** * kbase_tlstream_tl_attrib_as_config - address space attributes @@ -335,8 +414,8 @@ void kbase_tlstream_tl_attrib_atom_config( * * Function emits a timeline message containing address space attributes. */ -void kbase_tlstream_tl_attrib_as_config( - void *as, u64 transtab, u64 memattr, u64 transcfg); +#define kbase_tlstream_tl_attrib_as_config(as, transtab, memattr, transcfg) \ + __TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg) /** * kbase_tlstream_jd_gpu_soft_reset - The GPU is being soft reset @@ -345,36 +424,62 @@ void kbase_tlstream_tl_attrib_as_config( * This imperative tracepoint is specific to job dumping. * Function emits a timeline message indicating GPU soft reset. */ -void kbase_tlstream_jd_gpu_soft_reset(void *gpu); +#define kbase_tlstream_jd_gpu_soft_reset(gpu) \ + __TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu) /** * kbase_tlstream_aux_pm_state - timeline message: power management state * @core_type: core type (shader, tiler, l2 cache, l3 cache) * @state: 64bits bitmask reporting power state of the cores (1-ON, 0-OFF) */ -void kbase_tlstream_aux_pm_state(u32 core_type, u64 state); +#define kbase_tlstream_aux_pm_state(core_type, state) \ + __TRACE_IF_ENABLED(aux_pm_state, core_type, state) + +/** + * kbase_tlstream_aux_issue_job_softstop - a soft-stop command is being issued + * @katom: the atom that is being soft-stopped + */ +#define kbase_tlstream_aux_issue_job_softstop(katom) \ + __TRACE_IF_ENABLED(aux_issue_job_softstop, katom) /** * kbase_tlstream_aux_job_softstop - soft job stop occurred * @js_id: job slot id */ -void kbase_tlstream_aux_job_softstop(u32 js_id); +#define kbase_tlstream_aux_job_softstop(js_id) \ + __TRACE_IF_ENABLED(aux_job_softstop, js_id) + +/** + * kbase_tlstream_aux_job_softstop_ex - extra info about soft-stopped atom + * @katom: the atom that has been soft-stopped + * + * This trace point adds more details about the soft-stopped atom. These details + * can't be safety collected inside the interrupt handler so we're doing it + * inside a worker. + * + * Note: this is not the same information that is recorded in the trace point, + * refer to __kbase_tlstream_aux_job_softstop_ex() for more details. + */ +#define kbase_tlstream_aux_job_softstop_ex(katom) \ + __TRACE_IF_ENABLED(aux_job_softstop_ex, katom) /** * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event * resulting in new pages being mapped - * @mmu_as: MMU address space number - * @page_count: number of currently used pages + * @ctx_nr: kernel context number + * @page_count_change: number of pages to be added */ -void kbase_tlstream_aux_pagefault(u32 mmu_as, u32 page_count); +#define kbase_tlstream_aux_pagefault(ctx_nr, page_count_change) \ + __TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change) /** * kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated * pages is changed - * @page_count_change: number of pages to be added or subtracted (according to - * the sign) + * @ctx_nr: kernel context number + * @page_count: number of pages used by the context */ -void kbase_tlstream_aux_pagesalloc(s64 page_count_change); +#define kbase_tlstream_aux_pagesalloc(ctx_nr, page_count) \ + __TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count) #endif /* _KBASE_TLSTREAM_H */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c index aac9858..a606ae8 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -102,7 +102,7 @@ static const struct file_operations kbasep_trace_timeline_debugfs_fops = { .open = kbasep_trace_timeline_debugfs_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release, }; void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev) diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h index 8108677..93ddb5a 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2008-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2008-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -41,8 +41,17 @@ #include "mali_kbase_gpuprops_types.h" +/* + * 10.1: + * - Do mmap in kernel for SAME_VA memory allocations rather then + * calling back into the kernel as a 2nd stage of the allocation request. + * + * 10.2: + * - Add KBASE_FUNC_MEM_JIT_INIT which allows clients to request a custom VA + * region for use with JIT (ignored on 32-bit platforms) + */ #define BASE_UK_VERSION_MAJOR 10 -#define BASE_UK_VERSION_MINOR 0 +#define BASE_UK_VERSION_MINOR 2 struct kbase_uk_mem_alloc { union uk_header header; @@ -321,8 +330,8 @@ struct kbase_uk_context_id { int id; }; -#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \ - defined(CONFIG_MALI_MIPE_ENABLED) +#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ + !defined(MALI_MIPE_ENABLED) /** * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure * @header: UK structure header @@ -353,7 +362,7 @@ struct kbase_uk_tlstream_flush { #if MALI_UNIT_TEST /** - * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure + * struct kbase_uk_tlstream_test - User/Kernel space data exchange structure * @header: UK structure header * @tpw_count: number of trace point writers in each context * @msg_delay: time delay between tracepoints from one writer in milliseconds @@ -374,7 +383,7 @@ struct kbase_uk_tlstream_test { }; /** - * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure + * struct kbase_uk_tlstream_stats - User/Kernel space data exchange structure * @header: UK structure header * @bytes_collected: number of bytes read by user * @bytes_generated: number of bytes generated by tracepoints @@ -390,7 +399,53 @@ struct kbase_uk_tlstream_stats { u32 bytes_generated; }; #endif /* MALI_UNIT_TEST */ -#endif /* MALI_KTLSTREAM_ENABLED */ +#endif /* MALI_MIPE_ENABLED */ + +/** + * struct struct kbase_uk_prfcnt_value for the KBASE_FUNC_SET_PRFCNT_VALUES ioctl + * @header: UK structure header + * @data: Counter samples for the dummy model + * @size:............Size of the counter sample data + */ +struct kbase_uk_prfcnt_values { + union uk_header header; + /* IN */ + u32 *data; + u32 size; +}; + +/** + * struct kbase_uk_soft_event_update - User/Kernel space data exchange structure + * @header: UK structure header + * @evt: the GPU address containing the event + * @new_status: the new event status, must be either BASE_JD_SOFT_EVENT_SET or + * BASE_JD_SOFT_EVENT_RESET + * @flags: reserved for future uses, must be set to 0 + * + * This structure is used to update the status of a software event. If the + * event's status is set to BASE_JD_SOFT_EVENT_SET, any job currently waiting + * on this event will complete. + */ +struct kbase_uk_soft_event_update { + union uk_header header; + /* IN */ + u64 evt; + u32 new_status; + u32 flags; +}; + +/** + * struct kbase_uk_mem_jit_init - User/Kernel space data exchange structure + * @header: UK structure header + * @va_pages: Number of virtual pages required for JIT + * + * This structure is used when requesting initialization of JIT. + */ +struct kbase_uk_mem_jit_init { + union uk_header header; + /* IN */ + u64 va_pages; +}; enum kbase_uk_function_id { KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0), @@ -443,18 +498,26 @@ enum kbase_uk_function_id { KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31), -#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \ - defined(CONFIG_MALI_MIPE_ENABLED) +#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ + !defined(MALI_MIPE_ENABLED) KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 32), #if MALI_UNIT_TEST KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33), KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34), #endif /* MALI_UNIT_TEST */ KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35), -#endif /* MALI_KTLSTREAM_ENABLED */ +#endif /* MALI_MIPE_ENABLED */ KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36), +#ifdef SUPPORT_MALI_NO_MALI + KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37), +#endif + + KBASE_FUNC_SOFT_EVENT_UPDATE = (UK_FUNC_ID + 38), + + KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39), + KBASE_FUNC_MAX }; diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c index bfa8bfa..371122f 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,6 +30,7 @@ #include #include #include +#include /*****************************************************************************/ @@ -115,6 +116,7 @@ struct kbase_vinstr_context { * @dump_size: size of one dump buffer in bytes * @bitmap: bitmap request for JM, TILER, SHADER and MMU counters * @legacy_buffer: userspace hwcnt dump buffer (legacy interface) + * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface) * @accum_buffer: temporary accumulation buffer for preserving counters * @dump_time: next time this clients shall request hwcnt dump * @dump_interval: interval between periodic hwcnt dumps @@ -134,6 +136,7 @@ struct kbase_vinstr_client { size_t dump_size; u32 bitmap[4]; void __user *legacy_buffer; + void *kernel_buffer; void *accum_buffer; u64 dump_time; u32 dump_interval; @@ -225,11 +228,11 @@ static void hwcnt_bitmap_union(u32 dst[4], u32 src[4]) dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM]; } -static size_t kbasep_vinstr_dump_size(struct kbase_vinstr_context *vinstr_ctx) +size_t kbase_vinstr_dump_size(struct kbase_device *kbdev) { - struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev; size_t dump_size; +#ifndef CONFIG_MALI_NO_MALI if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) { u32 nr_cg; @@ -237,7 +240,9 @@ static size_t kbasep_vinstr_dump_size(struct kbase_vinstr_context *vinstr_ctx) dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; - } else { + } else +#endif /* CONFIG_MALI_NO_MALI */ + { /* assume v5 for now */ base_gpu_props *props = &kbdev->gpu_props.props; u32 nr_l2 = props->l2_props.num_l2_slices; @@ -251,6 +256,13 @@ static size_t kbasep_vinstr_dump_size(struct kbase_vinstr_context *vinstr_ctx) } return dump_size; } +KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size); + +static size_t kbasep_vinstr_dump_size_ctx( + struct kbase_vinstr_context *vinstr_ctx) +{ + return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev); +} static int kbasep_vinstr_map_kernel_dump_buffer( struct kbase_vinstr_context *vinstr_ctx) @@ -261,7 +273,7 @@ static int kbasep_vinstr_map_kernel_dump_buffer( u16 va_align = 0; flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR; - vinstr_ctx->dump_size = kbasep_vinstr_dump_size(vinstr_ctx); + vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx); nr_pages = PFN_UP(vinstr_ctx->dump_size); reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, @@ -298,6 +310,8 @@ static void kbasep_vinstr_unmap_kernel_dump_buffer( */ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) { + struct kbase_device *kbdev = vinstr_ctx->kbdev; + struct kbasep_kctx_list_element *element; int err; vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true); @@ -313,10 +327,39 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) return err; } + /* Add kernel context to list of contexts associated with device. */ + element = kzalloc(sizeof(*element), GFP_KERNEL); + if (element) { + element->kctx = vinstr_ctx->kctx; + mutex_lock(&kbdev->kctx_list_lock); + list_add(&element->link, &kbdev->kctx_list); + + /* Inform timeline client about new context. + * Do this while holding the lock to avoid tracepoint + * being created in both body and summary stream. */ + kbase_tlstream_tl_new_ctx( + vinstr_ctx->kctx, + (u32)(vinstr_ctx->kctx->id), + (u32)(vinstr_ctx->kctx->tgid)); + + mutex_unlock(&kbdev->kctx_list_lock); + } else { + /* Don't treat this as a fail - just warn about it. */ + dev_warn(kbdev->dev, + "couldn't add kctx to kctx_list\n"); + } + err = enable_hwcnt(vinstr_ctx); if (err) { kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); kbase_destroy_context(vinstr_ctx->kctx); + if (element) { + mutex_lock(&kbdev->kctx_list_lock); + list_del(&element->link); + kfree(element); + mutex_unlock(&kbdev->kctx_list_lock); + } + kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx); vinstr_ctx->kctx = NULL; return err; } @@ -329,6 +372,13 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) disable_hwcnt(vinstr_ctx); kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); kbase_destroy_context(vinstr_ctx->kctx); + if (element) { + mutex_lock(&kbdev->kctx_list_lock); + list_del(&element->link); + kfree(element); + mutex_unlock(&kbdev->kctx_list_lock); + } + kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx); vinstr_ctx->kctx = NULL; return -EFAULT; } @@ -342,32 +392,55 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) */ static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx) { + struct kbase_device *kbdev = vinstr_ctx->kbdev; + struct kbasep_kctx_list_element *element; + struct kbasep_kctx_list_element *tmp; + bool found = false; + /* Release hw counters dumping resources. */ vinstr_ctx->thread = NULL; disable_hwcnt(vinstr_ctx); kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); kbase_destroy_context(vinstr_ctx->kctx); + + /* Remove kernel context from the device's contexts list. */ + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) { + if (element->kctx == vinstr_ctx->kctx) { + list_del(&element->link); + kfree(element); + found = true; + } + } + mutex_unlock(&kbdev->kctx_list_lock); + + if (!found) + dev_warn(kbdev->dev, "kctx not in kctx_list\n"); + + /* Inform timeline client about context destruction. */ + kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx); + vinstr_ctx->kctx = NULL; } /** * kbasep_vinstr_attach_client - Attach a client to the vinstr core - * @vinstr_ctx: vinstr context - * @buffer_count: requested number of dump buffers - * @bitmap: bitmaps describing which counters should be enabled - * @argp: pointer where notification descriptor shall be stored + * @vinstr_ctx: vinstr context + * @buffer_count: requested number of dump buffers + * @bitmap: bitmaps describing which counters should be enabled + * @argp: pointer where notification descriptor shall be stored + * @kernel_buffer: pointer to kernel side buffer * * Return: vinstr opaque client handle or NULL on failure */ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count, - u32 bitmap[4], void *argp) + u32 bitmap[4], void *argp, void *kernel_buffer) { struct task_struct *thread = NULL; struct kbase_vinstr_client *cli; KBASE_DEBUG_ASSERT(vinstr_ctx); - KBASE_DEBUG_ASSERT(argp); KBASE_DEBUG_ASSERT(buffer_count >= 0); KBASE_DEBUG_ASSERT(buffer_count <= MAX_BUFFER_COUNT); KBASE_DEBUG_ASSERT(!(buffer_count & (buffer_count - 1))); @@ -405,7 +478,7 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( /* The GPU resets the counter block every time there is a request * to dump it. We need a per client kernel buffer for accumulating * the counters. */ - cli->dump_size = kbasep_vinstr_dump_size(vinstr_ctx); + cli->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx); cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL); if (!cli->accum_buffer) goto error; @@ -437,6 +510,8 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( O_RDONLY | O_CLOEXEC); if (0 > *fd) goto error; + } else if (kernel_buffer) { + cli->kernel_buffer = kernel_buffer; } else { cli->legacy_buffer = (void __user *)argp; } @@ -475,11 +550,7 @@ error: return NULL; } -/** - * kbasep_vinstr_detach_client - Detach a client from the vinstr core - * @cli: Pointer to vinstr client - */ -static void kbasep_vinstr_detach_client(struct kbase_vinstr_client *cli) +void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli) { struct kbase_vinstr_context *vinstr_ctx; struct kbase_vinstr_client *iter, *tmp; @@ -540,6 +611,7 @@ static void kbasep_vinstr_detach_client(struct kbase_vinstr_client *cli) if (thread) kthread_stop(thread); } +KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client); /* Accumulate counters in the dump buffer */ static void accum_dump_buffer(void *dst, void *src, size_t dump_size) @@ -702,9 +774,12 @@ static void patch_dump_buffer_hdr_v5( static void accum_clients(struct kbase_vinstr_context *vinstr_ctx) { struct kbase_vinstr_client *iter; - int v4; + int v4 = 0; +#ifndef CONFIG_MALI_NO_MALI v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4); +#endif + list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) { /* Don't bother accumulating clients whose hwcnt requests * have not yet been honoured. */ @@ -791,6 +866,11 @@ static int kbasep_vinstr_collect_and_accumulate( { int rcode; +#ifdef CONFIG_MALI_NO_MALI + /* The dummy model needs the CPU mapping. */ + gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va); +#endif + /* Request HW counters dump. * Disable preemption to make dump timestamp more accurate. */ preempt_disable(); @@ -865,6 +945,23 @@ static int kbasep_vinstr_fill_dump_buffer_legacy( return rcode; } +/** + * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer + * allocated in kernel space + * @cli: requesting client + * + * Return: zero on success + * + * This is part of the kernel client interface. + */ +static int kbasep_vinstr_fill_dump_buffer_kernel( + struct kbase_vinstr_client *cli) +{ + memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size); + + return 0; +} + /** * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst * @vinstr_ctx: vinstr context @@ -910,6 +1007,8 @@ static int kbasep_vinstr_update_client( if (cli->buffer_count) rcode = kbasep_vinstr_fill_dump_buffer( cli, timestamp, event_id); + else if (cli->kernel_buffer) + rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli); else rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli); @@ -1299,14 +1398,18 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event( static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( struct kbase_vinstr_client *cli, u32 __user *hwver) { +#ifndef CONFIG_MALI_NO_MALI struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx; - u32 ver; +#endif + + u32 ver = 5; +#ifndef CONFIG_MALI_NO_MALI KBASE_DEBUG_ASSERT(vinstr_ctx); + if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4)) + ver = 4; +#endif - ver = 4; - if (!kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4)) - ver = 5; return put_user(ver, hwver); } @@ -1451,7 +1554,7 @@ static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, cli = filp->private_data; KBASE_DEBUG_ASSERT(cli); - kbasep_vinstr_detach_client(cli); + kbase_vinstr_detach_client(cli); return 0; } @@ -1525,7 +1628,8 @@ int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx, vinstr_ctx, setup->buffer_count, bitmap, - &setup->fd); + &setup->fd, + NULL); if (!cli) return -ENOMEM; @@ -1557,7 +1661,8 @@ int kbase_vinstr_legacy_hwc_setup( vinstr_ctx, 0, bitmap, - (void *)(long)setup->dump_buffer); + (void *)(long)setup->dump_buffer, + NULL); if (!(*cli)) return -ENOMEM; @@ -1565,13 +1670,37 @@ int kbase_vinstr_legacy_hwc_setup( if (!*cli) return -EINVAL; - kbasep_vinstr_detach_client(*cli); + kbase_vinstr_detach_client(*cli); *cli = NULL; } return 0; } +struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup( + struct kbase_vinstr_context *vinstr_ctx, + struct kbase_uk_hwcnt_reader_setup *setup, + void *kernel_buffer) +{ + u32 bitmap[4]; + + if (!vinstr_ctx || !setup || !kernel_buffer) + return NULL; + + bitmap[SHADER_HWCNT_BM] = setup->shader_bm; + bitmap[TILER_HWCNT_BM] = setup->tiler_bm; + bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; + bitmap[JM_HWCNT_BM] = setup->jm_bm; + + return kbasep_vinstr_attach_client( + vinstr_ctx, + 0, + bitmap, + NULL, + kernel_buffer); +} +KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup); + int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli, enum base_hwcnt_reader_event event_id) { @@ -1615,6 +1744,7 @@ exit: return rcode; } +KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump); int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli) { @@ -1658,6 +1788,11 @@ void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx) KBASE_DEBUG_ASSERT(vinstr_ctx); mutex_lock(&vinstr_ctx->lock); + if (!vinstr_ctx->nclients || vinstr_ctx->suspended) { + mutex_unlock(&vinstr_ctx->lock); + return; + } + kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused); vinstr_ctx->suspended = true; vinstr_ctx->suspended_clients = vinstr_ctx->waiting_clients; @@ -1670,6 +1805,11 @@ void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx) KBASE_DEBUG_ASSERT(vinstr_ctx); mutex_lock(&vinstr_ctx->lock); + if (!vinstr_ctx->nclients || !vinstr_ctx->suspended) { + mutex_unlock(&vinstr_ctx->lock); + return; + } + vinstr_ctx->suspended = false; vinstr_ctx->waiting_clients = vinstr_ctx->suspended_clients; vinstr_ctx->reprogram = true; diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h index 12340e5..d32462a 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h @@ -66,6 +66,22 @@ int kbase_vinstr_legacy_hwc_setup( struct kbase_vinstr_client **cli, struct kbase_uk_hwcnt_setup *setup); +/** + * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side + * client + * @vinstr_ctx: vinstr context + * @setup: reader's configuration + * @kernel_buffer: pointer to dump buffer + * + * setup->buffer_count and setup->fd are not used for kernel side clients. + * + * Return: pointer to client structure, or NULL on failure + */ +struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup( + struct kbase_vinstr_context *vinstr_ctx, + struct kbase_uk_hwcnt_reader_setup *setup, + void *kernel_buffer); + /** * kbase_vinstr_hwc_dump - issue counter dump for vinstr client * @cli: pointer to vinstr client @@ -100,5 +116,19 @@ void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx); */ void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx); +/** + * kbase_vinstr_dump_size - Return required size of dump buffer + * @kbdev: device pointer + * + * Return : buffer size in bytes + */ +size_t kbase_vinstr_dump_size(struct kbase_device *kbdev); + +/** + * kbase_vinstr_detach_client - Detach a client from the vinstr core + * @cli: Pointer to vinstr client + */ +void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli); + #endif /* _KBASE_VINSTR_H_ */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h index fc3cf32..93fc5ea 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,19 +15,15 @@ - - #if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MALI_H -#include -#include - #undef TRACE_SYSTEM #define TRACE_SYSTEM mali -#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM) #define TRACE_INCLUDE_FILE mali_linux_trace +#include + #define MALI_JOB_SLOTS_EVENT_CHANGED /** diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h new file mode 100644 index 0000000..a509cbd --- /dev/null +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h @@ -0,0 +1,26 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _MIDG_COHERENCY_H_ +#define _MIDG_COHERENCY_H_ + +#define COHERENCY_ACE_LITE 0 +#define COHERENCY_ACE 1 +#define COHERENCY_NONE 31 +#define COHERENCY_FEATURE_BIT(x) (1 << (x)) + +#endif /* _MIDG_COHERENCY_H_ */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h index c3def83..6b1d67d 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,11 +15,12 @@ - - #ifndef _MIDGARD_REGMAP_H_ #define _MIDGARD_REGMAP_H_ +#include "mali_midg_coherency.h" +#include "mali_kbase_gpu_id.h" + /* * Begin Register Offsets */ @@ -57,7 +58,7 @@ #define GPU_COMMAND 0x030 /* (WO) */ #define GPU_STATUS 0x034 /* (RO) */ - +#define LATEST_FLUSH 0x038 /* (RO) */ #define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ @@ -168,6 +169,8 @@ #define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ #define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ +#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ +#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ #define JM_CONFIG 0xF00 /* (RW) Job Manager configuration register (Implementation specific register) */ #define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration settings (Implementation specific register) */ @@ -224,6 +227,7 @@ #define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ +#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ #define MEMORY_MANAGEMENT_BASE 0x2000 #define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) @@ -265,6 +269,14 @@ #define AS_STATUS 0x28 /* (RO) Status flags for address space n */ +/* (RW) Translation table configuration for address space n, low word */ +#define AS_TRANSCFG_LO 0x30 +/* (RW) Translation table configuration for address space n, high word */ +#define AS_TRANSCFG_HI 0x34 +/* (RO) Secondary fault address for address space n, low word */ +#define AS_FAULTEXTRA_LO 0x38 +/* (RO) Secondary fault address for address space n, high word */ +#define AS_FAULTEXTRA_HI 0x3C /* End Register Offsets */ @@ -292,6 +304,11 @@ #define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003 +/* + * Begin AARCH64 MMU TRANSTAB register values + */ +#define MMU_HW_OUTA_BITS 40 +#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) /* * Begin MMU STATUS register values @@ -304,12 +321,38 @@ #define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) #define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) #define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3<<8) +#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0<<8) #define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1<<8) #define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2<<8) #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3<<8) +/* + * Begin MMU TRANSCFG register values + */ + +#define AS_TRANSCFG_ADRMODE_LEGACY 0 +#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 +#define AS_TRANSCFG_ADRMODE_IDENTITY 2 +#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 +#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 + +#define AS_TRANSCFG_ADRMODE_MASK 0xF + + +/* + * Begin TRANSCFG register values + */ +#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24) +#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24) +#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24) + +#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28)) +#define AS_TRANSCFG_PTW_SH_OS (2 << 28) +#define AS_TRANSCFG_PTW_SH_IS (3 << 28) /* * Begin Command Values @@ -347,6 +390,8 @@ #define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION #define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) #define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) +#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) +#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) #define JS_CONFIG_THREAD_PRI(n) ((n) << 16) /* JS_STATUS register values */ @@ -399,19 +444,35 @@ #define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ #define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ #define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ +#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ /* End Command Values */ /* GPU_STATUS values */ #define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ +#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ /* PRFCNT_CONFIG register values */ -#define PRFCNT_CONFIG_AS_SHIFT 4 /* address space bitmap starts from bit 4 of the register */ +#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ +#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ +#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ + #define PRFCNT_CONFIG_MODE_OFF 0 /* The performance counters are disabled. */ #define PRFCNT_CONFIG_MODE_MANUAL 1 /* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */ #define PRFCNT_CONFIG_MODE_TILE 2 /* The performance counters are enabled, and are written out each time a tile finishes rendering. */ /* AS_MEMATTR values: */ +/* Use GPU implementation-defined caching policy. */ +#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull +/* The attribute set to force all resources to be cached. */ +#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full +/* Inner write-alloc cache setup, no outer caching */ +#define AS_MEMATTR_WRITE_ALLOC 0x8Dull + +/* Set to implementation defined, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull +/* Set to write back memory, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull /* Use GPU implementation-defined caching policy. */ #define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull @@ -441,39 +502,6 @@ /* Outer coherent, write alloc inner */ #define AS_MEMATTR_INDEX_OUTER_WA 4 -/* GPU_ID register */ -#define GPU_ID_VERSION_STATUS_SHIFT 0 -#define GPU_ID_VERSION_MINOR_SHIFT 4 -#define GPU_ID_VERSION_MAJOR_SHIFT 12 -#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16 -#define GPU_ID_VERSION_STATUS (0xF << GPU_ID_VERSION_STATUS_SHIFT) -#define GPU_ID_VERSION_MINOR (0xFF << GPU_ID_VERSION_MINOR_SHIFT) -#define GPU_ID_VERSION_MAJOR (0xF << GPU_ID_VERSION_MAJOR_SHIFT) -#define GPU_ID_VERSION_PRODUCT_ID (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT) - -/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */ -#define GPU_ID_PI_T60X 0x6956 -#define GPU_ID_PI_T62X 0x0620 -#define GPU_ID_PI_T76X 0x0750 -#define GPU_ID_PI_T72X 0x0720 -#define GPU_ID_PI_TFRX 0x0880 -#define GPU_ID_PI_T86X 0x0860 -#define GPU_ID_PI_T82X 0x0820 -#define GPU_ID_PI_T83X 0x0830 - -/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ -#define GPU_ID_S_15DEV0 0x1 -#define GPU_ID_S_EAC 0x2 - -/* Helper macro to create a GPU_ID assuming valid values for id, major, minor, status */ -#define GPU_ID_MAKE(id, major, minor, status) \ - (((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ - ((major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ - ((minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ - ((status) << GPU_ID_VERSION_STATUS_SHIFT)) - -/* End GPU_ID register */ - /* JS_FEATURES register */ #define JS_FEATURE_NULL_JOB (1u << 1) @@ -489,6 +517,8 @@ /* End JS_FEATURES register */ /* L2_MMU_CONFIG register */ +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT (24) #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) @@ -520,19 +550,14 @@ /* End THREAD_* registers */ -/* COHERENCY_* values*/ -#define COHERENCY_ACE_LITE 0 -#define COHERENCY_ACE 1 -#define COHERENCY_NONE 31 -#define COHERENCY_FEATURE_BIT(x) (1 << (x)) -/* End COHERENCY_* values */ - /* SHADER_CONFIG register */ #define SC_ALT_COUNTERS (1ul << 3) #define SC_OVERRIDE_FWD_PIXEL_KILL (1ul << 4) #define SC_SDC_DISABLE_OQ_DISCARD (1ul << 6) +#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) #define SC_LS_PAUSEBUFFER_DISABLE (1ul << 16) +#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) #define SC_ENABLE_TEXGRD_FLAGS (1ul << 25) /* End SHADER_CONFIG register */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild index b9a30da..0833cac 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild +++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild @@ -11,3 +11,20 @@ # Boston, MA 02110-1301, USA. # # +ifeq ($(CONFIG_MALI_MIDGARD),y) +obj-y += platform/devicetree/mali_clock.c +obj-y += platform/devicetree/mpgpu.c +obj-y += platform/devicetree/meson_main2.c +obj-y += platform/devicetree/platform_gx.c +obj-y += platform/devicetree/scaling.c +obj-y += mali_kbase_runtime_pm.c +obj-y += mali_kbase_config_devicetree.c +else ifeq ($(CONFIG_MALI_MIDGARD),m) +SRC += platform/devicetree/mali_clock.c +SRC += platform/devicetree/mpgpu.c +SRC += platform/devicetree/meson_main2.c +SRC += platform/devicetree/platform_gx.c +SRC += platform/devicetree/scaling.c +SRC += platform/devicetree/mali_kbase_runtime_pm.c +SRC += platform/devicetree/mali_kbase_config_devicetree.c +endif diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c index 59c36b4..987043d 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c @@ -89,9 +89,17 @@ struct devfreq_cooling_ops t83x_model_ops = { #endif +#include + int kbase_platform_early_init(void) { /* Nothing needed at this stage */ return 0; } +static struct kbase_platform_config dummy_platform_config; + +struct kbase_platform_config *kbase_get_platform_config(void) +{ + return &dummy_platform_config; +} diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c index 3c9e65d..62f974c 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c @@ -177,14 +177,7 @@ int kbase_device_runtime_init(struct kbase_device *kbdev) { dev_dbg(kbdev->dev, "kbase_device_runtime_init\n"); pm_runtime_enable(kbdev->dev); -#ifdef CONFIG_MALI_MIDGARD_DEBUG_SYS - { - int err = kbase_platform_create_sysfs_file(kbdev->dev); - - if (err) - return err; - } -#endif /* CONFIG_MALI_MIDGARD_DEBUG_SYS */ + return 0; } diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c index 8266951..3baf3d9 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c @@ -16,9 +16,6 @@ #include -#ifdef CONFIG_DEVFREQ_THERMAL -#include -#endif #include #include #include @@ -68,74 +65,6 @@ struct kbase_pm_callback_conf pm_callbacks = { .power_resume_callback = NULL }; -#ifdef CONFIG_DEVFREQ_THERMAL - -#define FALLBACK_STATIC_TEMPERATURE 55000 - -static unsigned long juno_model_static_power(unsigned long voltage) -{ - struct thermal_zone_device *tz; - unsigned long temperature, temp; - unsigned long temp_squared, temp_cubed, temp_scaling_factor; - const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10); - const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10; - - tz = thermal_zone_get_zone_by_name("gpu"); - if (IS_ERR(tz)) { - pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n", - PTR_ERR(tz)); - temperature = FALLBACK_STATIC_TEMPERATURE; - } else { - int ret; - - ret = tz->ops->get_temp(tz, &temperature); - if (ret) { - pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n", - ret); - temperature = FALLBACK_STATIC_TEMPERATURE; - } - } - - /* Calculate the temperature scaling factor. To be applied to the - * voltage scaled power. - */ - temp = temperature / 1000; - temp_squared = temp * temp; - temp_cubed = temp_squared * temp; - temp_scaling_factor = - (2 * temp_cubed) - - (80 * temp_squared) - + (4700 * temp) - + 32000; - - return (((coefficient * voltage_cubed) >> 20) - * temp_scaling_factor) - / 1000000; -} - -static unsigned long juno_model_dynamic_power(unsigned long freq, - unsigned long voltage) -{ - /* The inputs: freq (f) is in Hz, and voltage (v) in mV. - * The coefficient (c) is in mW/(MHz mV mV). - * - * This function calculates the dynamic power after this formula: - * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz) - */ - const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */ - const unsigned long f_mhz = freq / 1000000; /* MHz */ - const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */ - - return (coefficient * v2 * f_mhz) / 1000000; /* mW */ -} - -struct devfreq_cooling_ops juno_model_ops = { - .get_static_power = juno_model_static_power, - .get_dynamic_power = juno_model_dynamic_power, -}; - -#endif /* CONFIG_DEVFREQ_THERMAL */ - /* * Juno Secure Mode integration */ diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h b/t83x/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h index fa5e9e9..5fc6d9e 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h @@ -70,16 +70,6 @@ */ #define PLATFORM_FUNCS (NULL) -/** Power model for IPA - * - * Attached value: pointer to @ref mali_pa_model_ops - */ -#ifdef CONFIG_DEVFREQ_THERMAL -#define POWER_MODEL_CALLBACKS (&juno_model_ops) -#else -#define POWER_MODEL_CALLBACKS (NULL) -#endif - /** * Secure mode switch * diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h index ac5060a..eb957d3 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h +++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,7 @@ * Attached value: number in kHz * Default value: NA */ -#define GPU_FREQ_KHZ_MAX (5000) +#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq() /** * Minimum frequency GPU will be clocked at. Given in kHz. * This must be specified as there is no default value. @@ -32,16 +32,7 @@ * Attached value: number in kHz * Default value: NA */ -#define GPU_FREQ_KHZ_MIN (5000) - -/** - * Values used for determining the GPU frequency based on the LogicTile type - * Used by the function kbase_get_platform_logic_tile_type - */ -#define VE_VIRTEX6_GPU_FREQ_MIN 5000 -#define VE_VIRTEX6_GPU_FREQ_MAX 5000 -#define VE_VIRTEX7_GPU_FREQ_MIN 40000 -#define VE_VIRTEX7_GPU_FREQ_MAX 40000 +#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq() /** * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock @@ -81,12 +72,6 @@ */ #define PLATFORM_FUNCS (NULL) -/** Power model for IPA - * - * Attached value: pointer to @ref mali_pa_model_ops - */ -#define POWER_MODEL_CALLBACKS (NULL) - /** * Secure mode switch * diff --git a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c index 9bc51f1..4665f98 100755 --- a/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c +++ b/t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,6 +52,18 @@ #define IS_SINGLE_BIT_SET(val, pos) (val&(1<connector.encoder = &pl111_encoder->encoder; + pl111_encoder->encoder.crtc = &priv->pl111_crtc->crtc; + goto finish; out_config: diff --git a/t83x/kernel/drivers/gpu/drm/pl111/sconscript b/t83x/kernel/drivers/gpu/drm/pl111/sconscript index c5011a7..5c47de7 100755 --- a/t83x/kernel/drivers/gpu/drm/pl111/sconscript +++ b/t83x/kernel/drivers/gpu/drm/pl111/sconscript @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2010-2013, 2015 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2013, 2015-2016 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -48,5 +48,5 @@ else: # need Module.symvers from drm.ko #drm_env.Depends('$STATIC_LIB_PATH/pl111_drm.ko', '$STATIC_LIB_PATH/drm.ko') -env.ProgTarget('x11', cmd) +drm_env.KernelObjTarget('x11', cmd) diff --git a/t83x/kernel/drivers/gpu/drm/sconscript b/t83x/kernel/drivers/gpu/drm/sconscript index d030e71..a90fa89 100755 --- a/t83x/kernel/drivers/gpu/drm/sconscript +++ b/t83x/kernel/drivers/gpu/drm/sconscript @@ -15,7 +15,7 @@ Import('env') -if 'x11' in env['winsys']: +if 'x11' in env['winsys'] or 'gbm' in env['winsys']: # pl111_drm isn't released so only try to build it if it's there if Glob('pl111/sconscript') and (env['platform_config'] == 'vexpress' or env['platform_config'] == 'vexpress_6xvirtex7_10mhz'): SConscript('pl111/sconscript')