PD#125571 midgard r11p0 rel for t82x and t83x
authorJiyu Yang <jiyu.yang@amlogic.com>
Mon, 23 May 2016 08:16:30 +0000 (16:16 +0800)
committerJiyu Yang <jiyu.yang@amlogic.com>
Tue, 24 May 2016 10:30:06 +0000 (03:30 -0700)
commit d28e5a582d83c19eafe00bc5e27f378ad67d82d0
Author: Jiyu Yang <jiyu.yang@amlogic.com>
Date:   Tue May 24 16:03:58 2016 +0800

    PD125571 update t82x t83x lib for android L

    Change-Id: If605b9f9e98dfb571698363fe1c306771b3536a7

commit a835f407444207c4110c40bb3d9c0be2192bc1e2
Author: Jiyu Yang <jiyu.yang@amlogic.com>
Date:   Mon May 23 20:56:27 2016 +0800

    PD#125571 add HAL_PIXEL_FORMAT_YCrCb_420_SP

    this will be removed when android N
    Change-Id: If23ba5d863b8f9d207b2923e82753949cbd0af55

commit 398ec136c3f1af79f6aa9c2c00912738cd66aed5
Author: Jiyu Yang <jiyu.yang@amlogic.com>
Date:   Mon May 23 13:00:51 2016 +0800

    PD#125571 update library for t82x and t83x

    Change-Id: Id16041676497fa91072a9af0586d968420c16536

commit b61cc518957e374975abe11ce743c792261b770a
Author: Jiyu Yang <jiyu.yang@amlogic.com>
Date:   Fri May 20 14:43:04 2016 +0800

    TX041-SW-99002-r11p0-00rel0

    Change-Id: I6d18ef50fad81b939cf3bff21b108456258e63de

commit 210fd8146e5b64a83ba674abdfb8edbd53b22097
Author: Jiyu Yang <jiyu.yang@amlogic.com>
Date:   Fri May 20 14:39:56 2016 +0800

    TX041-SW-99002-r10p0-00rel0

    Change-Id: I9d6aad092a3e69236c38f078ab6633d029a07997

commit 017572cb09550913ecd52e43ff2eb0754c5115c3
Author: Jiyu Yang <jiyu.yang@amlogic.com>
Date:   Fri May 20 14:37:31 2016 +0800

    TX041-SW-99002-r9p0-05rel0

    Change-Id: Iab5b27d200621612c36deec6d1fef049af65db19

commit 4e9d6a0f22046d717c7f2599d5c89816e37d35d9
Author: Jiyu Yang <jiyu.yang@amlogic.com>
Date:   Fri May 20 14:36:15 2016 +0800

    TX041-SW-99002-r8p0-02rel0

    Change-Id: Ic59759da9c59a5055595d96f9826ec1f98bdf8ce

Change-Id: I95c443fdc26dd1143ee90b2debdb40f94905e61c

111 files changed:
lib/t82x_ion/libGLES_mali_default_8a_32-l.so
lib/t82x_ion/libGLES_mali_default_8a_32-m.so [changed from symlink to file mode: 0644]
lib/t83x_ion/libGLES_mali_default_7a_32.so [deleted symlink]
lib/t83x_ion/libGLES_mali_default_8a_32-l.so
lib/t83x_ion/libGLES_mali_default_8a_32-m.so [changed from symlink to file mode: 0644]
lib/t83x_ion/libGLES_mali_default_8a_64-m.so [deleted symlink]
t83x/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt
t83x/kernel/drivers/base/dma_buf_lock/src/sconscript
t83x/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
t83x/kernel/drivers/base/dma_buf_test_exporter/sconscript
t83x/kernel/drivers/base/kds/sconscript
t83x/kernel/drivers/base/ump/src/imports/ion/sconscript
t83x/kernel/drivers/base/ump/src/sconscript
t83x/kernel/drivers/gpu/arm/midgard/Kbuild
t83x/kernel/drivers/gpu/arm/midgard/Kconfig
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c [new file with mode: 0644]
t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h [new file with mode: 0644]
t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
t83x/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
t83x/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_context.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_device.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h [new file with mode: 0644]
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_instr.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h [new file with mode: 0644]
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_replay.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_security.c [deleted file]
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_security.h [deleted file]
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_uku.h
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
t83x/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h
t83x/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h [new file with mode: 0644]
t83x/kernel/drivers/gpu/arm/midgard/mali_midg_regmap.h
t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
t83x/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
t83x/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
t83x/kernel/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
t83x/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
t83x/kernel/drivers/gpu/arm/midgard/sconscript
t83x/kernel/drivers/gpu/drm/pl111/pl111_drm_device.c
t83x/kernel/drivers/gpu/drm/pl111/sconscript
t83x/kernel/drivers/gpu/drm/sconscript

index f48cc73d36dad44bd4e113af8bea155827001e53..62855624b583917f4777c6d71c4300b23a3bb78e 100644 (file)
Binary files a/lib/t82x_ion/libGLES_mali_default_8a_32-l.so and b/lib/t82x_ion/libGLES_mali_default_8a_32-l.so differ
deleted file mode 120000 (symlink)
index 54f37896b9b306a44fd1a926b47706e76b97f8cf..0000000000000000000000000000000000000000
+++ /dev/null
@@ -1 +0,0 @@
-libGLES_mali_default_8a_32-l.so
\ No newline at end of file
new file mode 100644 (file)
index 0000000000000000000000000000000000000000..f0b72d02f081f052c0998b45d32be5785c9bea83
Binary files /dev/null and b/lib/t82x_ion/libGLES_mali_default_8a_32-m.so differ
diff --git a/lib/t83x_ion/libGLES_mali_default_7a_32.so b/lib/t83x_ion/libGLES_mali_default_7a_32.so
deleted file mode 120000 (symlink)
index 4c92be9..0000000
+++ /dev/null
@@ -1 +0,0 @@
-libGLES_mali_default_8a_32.so
\ No newline at end of file
index 2f92f8f9d6dcc5224d105f6adfe7484d26d0e4c1..e0446d6de6de3807f04d621541003f8c8e5f0dcf 100644 (file)
Binary files a/lib/t83x_ion/libGLES_mali_default_8a_32-l.so and b/lib/t83x_ion/libGLES_mali_default_8a_32-l.so differ
deleted file mode 120000 (symlink)
index 54f37896b9b306a44fd1a926b47706e76b97f8cf..0000000000000000000000000000000000000000
+++ /dev/null
@@ -1 +0,0 @@
-libGLES_mali_default_8a_32-l.so
\ No newline at end of file
new file mode 100644 (file)
index 0000000000000000000000000000000000000000..fd9c84dd10b7e44c257c7dee75a503f69757599a
Binary files /dev/null and b/lib/t83x_ion/libGLES_mali_default_8a_32-m.so differ
diff --git a/lib/t83x_ion/libGLES_mali_default_8a_64-m.so b/lib/t83x_ion/libGLES_mali_default_8a_64-m.so
deleted file mode 120000 (symlink)
index 8de2766..0000000
+++ /dev/null
@@ -1 +0,0 @@
-libGLES_mali_default_8a_64-l.so
\ No newline at end of file
index acd2c6d8f39c8782303f24355dd6379475627281..46b704b13826c986f0cf6480f1e8dc82f1c47798 100755 (executable)
@@ -42,6 +42,22 @@ for details.
          simultaneously, 0 otherwise.
        - Value between 0 and 63 (including). If job throttle is enabled, this is one
          less than the number of cores that can be started simultaneously.
+- power_model : Sets power model parameters. Note that this model was designed for the Juno
+               platform, and may not be suitable for other platforms. A structure containing :
+       - compatible: Should be arm,mali-simple-power-model
+       - voltage: Voltage at reference point. Specified in mV.
+       - frequency: Frequency at reference point. Specified in MHz.
+       - dynamic-power: Dynamic power at reference frequency and voltage. Specified in mW.
+       - static-power: Static power at reference frequency. Specified in mW.
+       - ts: An array containing coefficients for the temperature scaling factor.
+         Used as : tsf = ts[3]*T^3 + ts[2]*T^2 + ts[1]*T + ts[0], where T = temperature
+       - thermal-zone: A string identifying the thermal zone used for the GPU
+- system-coherency : Sets the coherency protocol to be used for coherent
+                    accesses made from the GPU.
+                    If not set then no coherency is used.
+       - 0  : ACE-Lite
+       - 1  : ACE
+       - 31 : No coherency
 
 Example for a Mali-T602:
 
@@ -64,4 +80,13 @@ gpu@0xfc010000 {
                160000  925000,
                100000  912500,
        >;
+       power_model {
+               compatible = "arm,mali-simple-power-model";
+               voltage = <800>;
+               frequency = <500>;
+               static-power = <500>;
+               dynamic-power = <1500>;
+               ts = <20000 2000 (-20) 2>;
+               thermal-zone = "gpu";
+       };
 };
index 251c9a6f53d50a9584e31789e5c2fa9ccd27bdd7..b8724f1c7e08ab04f4dd5953939d2f6db9d07f95 100755 (executable)
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2013, 2016 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -17,20 +17,17 @@ import os
 import re
 Import('env')
 
-if env['v'] != '1':
-       env['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}'
-
 src = [Glob('#kernel/drivers/base/dma_buf_lock/src/*.c'), Glob('#kernel/drivers/base/dma_buf_lock/src/*.h'), Glob('#kernel/drivers/base/dma_buf_lock/src/K*')]
 
 if env.GetOption('clean') :
        # Clean module
        env.Execute(Action("make clean", '[CLEAN] dma_buf_lock'))
        cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [])
-       env.ProgTarget('dma_buf_lock', cmd)
+       env.KernelObjTarget('dma_buf_lock', cmd)
 
 else:
        # Build module
        makeAction=Action("cd ${SOURCE.dir} && make dma_buf_lock && cp dma_buf_lock.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
        cmd = env.Command('$STATIC_LIB_PATH/dma_buf_lock.ko', src, [makeAction])
-       env.ProgTarget('dma_buf_lock', cmd)
+       env.KernelObjTarget('dma_buf_lock', cmd)
 
index 852c55075eb24f7a6bd84616c4d6613b51ae334e..6270a5250eb1586b86d4f59e49160be6264aed36 100755 (executable)
@@ -382,7 +382,22 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf,
        }
 
        /* alloc ready, let's export it */
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0))
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0))
+       {
+               struct dma_buf_export_info export_info = {
+                       .exp_name = "dma_buf_te",
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0))
+                       .owner = THIS_MODULE,
+#endif
+                       .ops = &dma_buf_te_ops,
+                       .size = alloc->nr_pages << PAGE_SHIFT,
+                       .flags = O_CLOEXEC | O_RDWR,
+                       .priv = alloc,
+               };
+
+               dma_buf = dma_buf_export(&export_info);
+       }
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0))
        dma_buf = dma_buf_export(alloc, &dma_buf_te_ops,
                        alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR, NULL);
 #else
index bfb8a99f9d45d296df6d4c63f8e12c067af58c01..09fe7f3e8ffb4bf5e1674d772add41d10471a5f5 100755 (executable)
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -17,9 +17,6 @@
 import os
 Import('env')
 
-if env['v'] != '1':
-       env['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}'
-
 src = [Glob('#kernel/drivers/base/dma_buf_test_exporter/*.c'), Glob('#kernel/include/linux/*.h'), Glob('#kernel/drivers/base/dma_buf_test_exporter/K*')]
 
 env.Append( CPPPATH = '#kernel/include' )
@@ -27,10 +24,10 @@ env.Append( CPPPATH = '#kernel/include' )
 if env.GetOption('clean') :
        env.Execute(Action("make clean", '[CLEAN] dma-buf-test-exporter'))
        cmd = env.Command('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', src, [])
-       env.ProgTarget('dma-buf-test-exporter', cmd)
+       env.KernelObjTarget('dma-buf-test-exporter', cmd)
 else:
        makeAction=Action("cd ${SOURCE.dir} && make && ( ( [ -f dma-buf-test-exporter.ko ] && cp dma-buf-test-exporter.ko $STATIC_LIB_PATH/ ) || touch $STATIC_LIB_PATH/dma-buf-test-exporter.ko)", '$MAKECOMSTR')
        cmd = env.Command('$STATIC_LIB_PATH/dma-buf-test-exporter.ko', src, [makeAction])
-       env.ProgTarget('dma-buf-test-exporter', cmd)
+       env.KernelObjTarget('dma-buf-test-exporter', cmd)
 
        
index 7fc1bc4182e5ee29aecb5ce0df7a1a89aa473f24..91a79fd4a40c56cb617bd8c861165805eb92a77a 100755 (executable)
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -33,9 +33,6 @@ for line in open(linux_config_file, 'r'):
                # still allow for building kds_test module.
                build_kds = 0
 
-if env['v'] != '1':
-       env['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}'
-
 src = [Glob('#kernel/drivers/base/kds/*.c'), Glob('#kernel/include/linux/*.h'), Glob('#kernel/drivers/base/kds/K*')]
 
 env.Append( CPPPATH = '#kernel/include' )
@@ -48,19 +45,19 @@ if env.GetOption('clean') :
        if build_kds or (int(env['unit']) == 1):
                env.Execute(Action("make clean", '[CLEAN] kds'))
                cmd = env.Command('$STATIC_LIB_PATH/kds.ko', src, [])
-               env.ProgTarget('kds', cmd)
+               env.KernelObjTarget('kds', cmd)
 else:
        # Build KDS module
        if build_kds:
                makeAction=Action("cd ${SOURCE.dir} && make kds && cp kds.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
                cmd = env.Command('$STATIC_LIB_PATH/kds.ko', src, [makeAction])
-               env.ProgTarget('kds', cmd)
+               env.KernelObjTarget('kds', cmd)
 
        # Build KDS test module
        if int(env['unit']) == 1:
                makeActionTest=Action("cd ${SOURCE.dir} && make kds_test && cp kds_test.ko $STATIC_LIB_PATH/", '$MAKECOMSTR')
                cmdTest = env.Command('$STATIC_LIB_PATH/kds_test.ko', src, [makeActionTest])
-               env.ProgTarget('kds', cmdTest)
+               env.KernelObjTarget('kds', cmdTest)
                if build_kds:
-                       Depends(cmdTest, cmd)
+                       env.Depends(cmdTest, cmd)
 
index 749bb1f541f0ea85ff3ca7a4141fe0a6fdebd19b..cff24c8ec14163bd51353782258d5aecf0a8cc2b 100755 (executable)
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2013, 2016 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -23,9 +23,6 @@ env_ion = env.Clone()
 if env_ion['ump_ion'] != '1':
        Return()
 
-if env_ion['v'] != '1':
-       env_ion['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}'
-
 # Source files required for UMP.
 ion_src = [Glob('#kernel/drivers/base/ump/src/imports/ion/*.c')]
 
@@ -49,4 +46,4 @@ for p in patterns:
        Clean(cmd, Glob('#kernel/drivers/base/ump/src/imports/ion/%s' % p))
 
 env_ion.Depends('$STATIC_LIB_PATH/ump_ion_import.ko', '$STATIC_LIB_PATH/ump.ko')
-env_ion.ProgTarget('ump', cmd)
+env_ion.KernelObjTarget('ump', cmd)
index 9cec770fe26a450db8d688220c9580c4ddfffad7..d706e1e5e4ac32736e1ec90970dbcf2a6375bfd3 100755 (executable)
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -21,9 +21,6 @@ Import('env')
 # Clone the environment so changes don't affect other build files
 env_ump = env.Clone()
 
-if env_ump['v'] != '1':
-       env_ump['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}'
-
 # Source files required for UMP.
 ump_src = [Glob('#kernel/drivers/base/ump/src/linux/*.c'), Glob('#kernel/drivers/base/ump/src/common/*.c'), Glob('#kernel/drivers/base/ump/src/imports/*/*.c')]
 
@@ -58,7 +55,7 @@ if env['os'] != 'android':
        if not kds_in_kernel:
                env.Depends('$STATIC_LIB_PATH/ump.ko', '$STATIC_LIB_PATH/kds.ko')
 
-env_ump.ProgTarget('ump', cmd)
+env_ump.KernelObjTarget('ump', cmd)
 
 SConscript( 'imports/sconscript' )
 
index 899b9ef3a797ffe4ec562f06b8e6e46ac9fe9c15..687a512d865311ceba902154729366ad541cad6b 100755 (executable)
@@ -15,7 +15,7 @@
 
 
 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "r7p0-02rel0"
+MALI_RELEASE_NAME ?= "r11p0-00rel0"
 
 # Paths required for build
 KBASE_PATH = $(src)
@@ -88,7 +88,6 @@ SRC := \
        mali_kbase_context.c \
        mali_kbase_pm.c \
        mali_kbase_config.c \
-       mali_kbase_security.c \
        mali_kbase_instr.c \
        mali_kbase_vinstr.c \
        mali_kbase_softjobs.c \
@@ -111,13 +110,15 @@ SRC := \
        mali_kbase_debug_job_fault.c \
        mali_kbase_smc.c \
        mali_kbase_mem_pool.c \
-       mali_kbase_mem_pool_debugfs.c
+       mali_kbase_mem_pool_debugfs.c \
+       mali_kbase_tlstream.c
 
-ifeq ($(CONFIG_MALI_MIPE_ENABLED),y)
-       SRC += mali_kbase_tlstream.c
-       ifeq ($(MALI_UNIT_TEST),1)
-               SRC += mali_kbase_tlstream_test.c
-       endif
+ifeq ($(MALI_UNIT_TEST),1)
+       SRC += mali_kbase_tlstream_test.c
+endif
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+       SRC += mali_kbase_regs_dump_debugfs.c
 endif
 
 # Job Scheduler Policy: Completely Fair Scheduler
@@ -192,13 +193,6 @@ endif
 endif
 
 ifeq ($(CONFIG_MALI_PLATFORM_DEVICETREE),y)
-       SRC += platform/devicetree/mali_kbase_runtime_pm.c
-       SRC += platform/devicetree/mali_kbase_config_devicetree.c
-       SRC += platform/devicetree/mali_clock.c
-       SRC += platform/devicetree/mpgpu.c
-       SRC += platform/devicetree/meson_main2.c
-       SRC += platform/devicetree/platform_gx.c
-       SRC += platform/devicetree/scaling.c
        ccflags-y += -I$(src)/platform/devicetree
 endif
 
@@ -232,3 +226,19 @@ mali_kbase-y += $(BACKEND:.c=.o)
 
 ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
 subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+
+# Default to devicetree platform if neither a fake platform or a thirdparty
+# platform is configured.
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY)$(CONFIG_MALI_PLATFORM_FAKE),)
+CONFIG_MALI_PLATFORM_DEVICETREE := y
+endif
+
+mali_kbase-$(CONFIG_MALI_PLATFORM_DEVICETREE) += \
+    platform/devicetree/mali_clock.o \
+    platform/devicetree/mpgpu.o \
+    platform/devicetree/meson_main2.o \
+    platform/devicetree/platform_gx.o \
+    platform/devicetree/scaling.o \
+       platform/devicetree/mali_kbase_runtime_pm.o \
+       platform/devicetree/mali_kbase_config_devicetree.o
+ccflags-$(CONFIG_MALI_PLATFORM_DEVICETREE) += -I$(src)/platform/devicetree
index 1543043800417fec7f3907fd077401fbc7426f23..8a33841af5a55c7130bb36e2a7f1fb3ccea9582f 100755 (executable)
@@ -16,6 +16,7 @@
 
 menuconfig MALI_MIDGARD
        tristate "Mali Midgard series support"
+       select GPU_TRACEPOINTS if ANDROID
        default n
        help
          Enable this option to build support for a ARM Mali Midgard GPU.
@@ -23,53 +24,22 @@ menuconfig MALI_MIDGARD
          To compile this driver as a module, choose M here:
          this will generate a single module, called mali_kbase.
 
-choice
-       prompt "Streamline support"
-       depends on MALI_MIDGARD
-       default MALI_TIMELINE_DISABLED
-       help
-         Select streamline support configuration.
-
-config MALI_TIMELINE_DISABLED
-       bool "Streamline support disabled"
-       help
-         Disable support for ARM Streamline Performance Analyzer.
-
-         Timeline support will not be included in
-         kernel code.
-         Debug stream will not be generated.
-
 config MALI_GATOR_SUPPORT
        bool "Streamline support via Gator"
+       depends on MALI_MIDGARD
+       default n
        help
          Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
          You will need the Gator device driver already loaded before loading this driver when enabling
          Streamline debug support.
-
-config MALI_MIPE_ENABLED
-       bool "Streamline support via MIPE"
-       help
-         Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
-
-         Stream will be transmitted directly to Mali GPU library.
-         Compatible version of the library is required to read debug stream generated by kernel.
-
-endchoice
+         This is a legacy interface required by older versions of Streamline.
 
 config MALI_MIDGARD_DVFS
-       bool "Enable DVFS"
-       depends on MALI_MIDGARD
+       bool "Enable legacy DVFS"
+       depends on MALI_MIDGARD && !MALI_DEVFREQ && !MALI_PLATFORM_DEVICETREE
        default n
        help
-         Choose this option to enable DVFS in the Mali Midgard DDK.
-
-config MALI_MIDGARD_RT_PM
-       bool "Enable Runtime power management"
-       depends on MALI_MIDGARD
-       depends on PM_RUNTIME
-       default n
-       help
-         Choose this option to enable runtime power management in the Mali Midgard DDK.
+         Choose this option to enable legacy DVFS in the Mali Midgard DDK.
 
 config MALI_MIDGARD_ENABLE_TRACE
        bool "Enable kbase tracing"
@@ -79,13 +49,6 @@ config MALI_MIDGARD_ENABLE_TRACE
          Enables tracing in kbase.  Trace log available through
          the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
 
-config MALI_MIDGARD_DEBUG_SYS
-       bool "Enable sysfs for the Mali Midgard DDK "
-       depends on MALI_MIDGARD && SYSFS
-       default n
-       help
-         Enables sysfs for the Mali Midgard DDK. Set/Monitor the Mali Midgard DDK
-
 config MALI_DEVFREQ
        bool "devfreq support for Mali"
        depends on MALI_MIDGARD && PM_DEVFREQ
@@ -107,9 +70,22 @@ menuconfig MALI_EXPERT
          Enabling this option and modifying the default settings may produce a driver with performance or
          other limitations.
 
+config MALI_PRFCNT_SET_SECONDARY
+       bool "Use secondary set of performance counters"
+       depends on MALI_MIDGARD && MALI_EXPERT
+       default n
+       help
+         Select this option to use secondary set of performance counters. Kernel
+         features that depend on an access to the primary set of counters may
+         become unavailable. Enabling this option will prevent power management
+         from working optimally and may cause instrumentation tools to return
+         bogus results.
+
+         If unsure, say N.
+
 config MALI_DEBUG_SHADER_SPLIT_FS
        bool "Allow mapping of shader cores via sysfs"
-       depends on MALI_MIDGARD && MALI_MIDGARD_DEBUG_SYS && MALI_EXPERT
+       depends on MALI_MIDGARD && MALI_EXPERT
        default n
        help
          Select this option to provide a sysfs entry for runtime configuration of shader
@@ -128,10 +104,24 @@ config MALI_PLATFORM_FAKE
 choice
        prompt "Platform configuration"
        depends on MALI_MIDGARD && MALI_EXPERT
-       default MALI_PLATFORM_VEXPRESS
+       default MALI_PLATFORM_DEVICETREE
        help
          Select the SOC platform that contains a Mali Midgard GPU
 
+config MALI_PLATFORM_DEVICETREE
+       bool "Device Tree platform"
+       depends on OF
+       help
+         Select this option to use Device Tree with the Mali driver.
+
+         When using this option the Mali driver will get the details of the
+         GPU hardware from the Device Tree. This means that the same driver
+         binary can run on multiple platforms as long as all the GPU hardware
+         details are described in the device tree.
+
+         Device Tree is the recommended method for the Mali driver platform
+         integration.
+
 config MALI_PLATFORM_VEXPRESS
        depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
        bool "Versatile Express"
@@ -199,11 +189,4 @@ config MALI_SYSTEM_TRACE
          minimal overhead when not in use. Enable only if you know what
          you are doing.
 
-config MALI_GPU_TRACEPOINTS
-       bool "Enable GPU tracepoints"
-       depends on MALI_MIDGARD && ANDROID
-       select GPU_TRACEPOINTS
-       help
-         Enables GPU tracepoints using Android trace event definitions.
-
 source "drivers/gpu/arm/midgard/platform/Kconfig"
index cadd7d5fc3c5eb67d1751384fbe3fc693980eb05..e38120aeb153d1775d7600ff71e29e95e9fbefb9 100755 (executable)
@@ -57,3 +57,7 @@ ifeq ($(CONFIG_MALI_NO_MALI),y)
        # HW error simulation
        BACKEND += backend/gpu/mali_kbase_model_error_generator.c
 endif
+
+ifeq ($(CONFIG_DEVFREQ_THERMAL),y)
+       BACKEND += backend/gpu/mali_kbase_power_model_simple.c
+endif
index 92a14fa1bae1287da6bb09acc26b0a4667558d5f..2f3c41a55807024f764223e3d6354d48b35a754e 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+               u32 mode)
+{
+       if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG))
+               kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL);
+}
 
index 42069fc88a1ffd0b623c93456f5a123d776f7ea6..fe9869109a82576070b21ad2ec36bbe5fddf8308 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 #include "mali_kbase.h"
 #include "mali_base_kernel.h"
 
+/**
+  * kbase_cache_set_coherency_mode() - Sets the system coherency mode
+  *                    in the GPU.
+  * @kbdev:    Device pointer
+  * @mode:     Coherency mode. COHERENCY_ACE/ACE_LITE
+  */
+void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
+               u32 mode);
 
 #endif                         /* _KBASE_CACHE_POLICY_H_ */
index db97637c9bc8148b225d766bb5d786be040543f0..d25f84ed4d34b072ed2d44fef4979c9c0cd59ad9 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -18,6 +18,9 @@
 #include <mali_kbase.h>
 #include <mali_kbase_config_defaults.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <backend/gpu/mali_kbase_power_model_simple.h>
+#endif
 
 #include <linux/clk.h>
 #include <linux/devfreq.h>
@@ -234,7 +237,9 @@ kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
        stat->private_data = NULL;
 
 #ifdef CONFIG_DEVFREQ_THERMAL
-       memcpy(&kbdev->devfreq_cooling->last_status, stat, sizeof(*stat));
+       if (kbdev->devfreq_cooling)
+               memcpy(&kbdev->devfreq_cooling->last_status, stat,
+                               sizeof(*stat));
 #endif
 
        return 0;
@@ -296,14 +301,9 @@ static void kbase_devfreq_exit(struct device *dev)
 
 int kbase_devfreq_init(struct kbase_device *kbdev)
 {
-#ifdef CONFIG_DEVFREQ_THERMAL
-       struct devfreq_cooling_ops *callbacks = POWER_MODEL_CALLBACKS;
-#endif
        struct devfreq_dev_profile *dp;
        int err;
 
-       dev_dbg(kbdev->dev, "Init Mali devfreq\n");
-
        if (!kbdev->clock)
                return -ENODEV;
 
@@ -337,12 +337,20 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
        }
 
 #ifdef CONFIG_DEVFREQ_THERMAL
-       if (callbacks) {
-
+       err = kbase_power_model_simple_init(kbdev);
+       if (err && err != -ENODEV && err != -EPROBE_DEFER) {
+               dev_err(kbdev->dev,
+                       "Failed to initialize simple power model (%d)\n",
+                       err);
+               goto cooling_failed;
+       }
+       if (err == -EPROBE_DEFER)
+               goto cooling_failed;
+       if (err != -ENODEV) {
                kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
                                kbdev->dev->of_node,
                                kbdev->devfreq,
-                               callbacks);
+                               &power_model_simple_ops);
                if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
                        err = PTR_ERR(kbdev->devfreq_cooling);
                        dev_err(kbdev->dev,
@@ -350,6 +358,8 @@ int kbase_devfreq_init(struct kbase_device *kbdev)
                                err);
                        goto cooling_failed;
                }
+       } else {
+               err = 0;
        }
 #endif
 
@@ -360,8 +370,7 @@ cooling_failed:
        devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
 #endif /* CONFIG_DEVFREQ_THERMAL */
 opp_notifier_failed:
-       err = devfreq_remove_device(kbdev->devfreq);
-       if (err)
+       if (devfreq_remove_device(kbdev->devfreq))
                dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
        else
                kbdev->devfreq = NULL;
@@ -376,7 +385,8 @@ void kbase_devfreq_term(struct kbase_device *kbdev)
        dev_dbg(kbdev->dev, "Term Mali devfreq\n");
 
 #ifdef CONFIG_DEVFREQ_THERMAL
-       devfreq_cooling_unregister(kbdev->devfreq_cooling);
+       if (kbdev->devfreq_cooling)
+               devfreq_cooling_unregister(kbdev->devfreq_cooling);
 #endif
 
        devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
index 591c013c534911173b56cc15ac6e435e6be1f41f..d410cd297889c85bb18e96df522002dca7d5f937 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -83,3 +83,23 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
                                GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
 }
 
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+                                       struct kbase_gpuprops_regdump *regdump)
+{
+       if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) {
+               /* Ensure we can access the GPU registers */
+               kbase_pm_register_access_enable(kbdev);
+
+               regdump->coherency_features = kbase_reg_read(kbdev,
+                               GPU_CONTROL_REG(COHERENCY_FEATURES), NULL);
+
+               /* We're done accessing the GPU registers for now. */
+               kbase_pm_register_access_disable(kbdev);
+       } else {
+               /* Pre COHERENCY_FEATURES we only supported ACE_LITE */
+               regdump->coherency_features =
+                               COHERENCY_FEATURE_BIT(COHERENCY_NONE) |
+                               COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE);
+       }
+}
+
index 2c987071a77ca497e05884bae45b21e47e2e0773..4e70b34ffaa6707a9fba8e19680160c15c1c9d5d 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,7 @@
  */
 
 #include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
 #include <mali_midg_regmap.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
@@ -78,6 +79,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
        u32 irq_mask;
        int ret;
        u64 shader_cores_needed;
+       u32 prfcnt_config;
 
        KBASE_DEBUG_ASSERT(NULL == kbdev->hwcnt.suspended_kctx);
 
@@ -151,9 +153,22 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
        kbase_pm_request_l2_caches(kbdev);
 
        /* Configure */
+       prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+       {
+               u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+               u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+                       >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+               int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+               if (arch_v6)
+                       prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+       }
+#endif
+
        kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
-                                       (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT)
-                                       | PRFCNT_CONFIG_MODE_OFF, kctx);
+                       prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
        kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
                                        setup->dump_buffer & 0xFFFFFFFF, kctx);
        kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
@@ -174,8 +189,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
                                                        setup->tiler_bm, kctx);
 
        kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
-                               (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) |
-                               PRFCNT_CONFIG_MODE_MANUAL, kctx);
+                       prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
 
        /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
         */
index 57c64f7db93fdc5a56fb7cc0d423046be4849053..8ccc440171a2b6429f458525dedced33de86a2ad 100755 (executable)
@@ -63,6 +63,9 @@ struct slot_rb {
  * @scheduling_timer:          The timer tick used for rescheduling jobs
  * @timer_running:             Is the timer running? The runpool_mutex must be
  *                             held whilst modifying this.
+ * @suspend_timer:              Is the timer suspended? Set when a suspend
+ *                              occurs and cleared on resume. The runpool_mutex
+ *                              must be held whilst modifying this.
  * @reset_gpu:                 Set to a KBASE_RESET_xxx value (see comments)
  * @reset_workq:               Work queue for performing the reset
  * @reset_work:                        Work item for performing the reset
@@ -80,6 +83,7 @@ struct kbase_backend_data {
        struct hrtimer scheduling_timer;
 
        bool timer_running;
+       bool suspend_timer;
 
        atomic_t reset_gpu;
 
index a4b0c26cddecd5eb25792ac9e7f28e6736a24fe1..ddaae344951f4aad9875523b63ff222a58728a29 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -15,8 +15,6 @@
 
 
 
-
-
 /*
  * Base kernel job manager APIs
  */
@@ -27,9 +25,7 @@
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 #include <mali_kbase_gator.h>
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
 #include <mali_kbase_hw.h>
 #include <mali_kbase_config_defaults.h>
 #include <mali_kbase_hwaccess_jm.h>
 #define beenthere(kctx, f, a...) \
                        dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
 
-#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
-u64 mali_js0_affinity_mask = 0xFFFFFFFFFFFFFFFFULL;
-u64 mali_js1_affinity_mask = 0xFFFFFFFFFFFFFFFFULL;
-u64 mali_js2_affinity_mask = 0xFFFFFFFFFFFFFFFFULL;
-#endif
-
 #if KBASE_GPU_RESET_EN
 static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
 static void kbasep_reset_timeout_worker(struct work_struct *data);
@@ -84,37 +74,16 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
        kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
                                                jc_head >> 32, kctx);
 
-#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
-       {
-               u64 mask;
-               u32 value;
-
-               if (0 == js)
-                       mask = mali_js0_affinity_mask;
-               else if (1 == js)
-                       mask = mali_js1_affinity_mask;
-               else
-                       mask = mali_js2_affinity_mask;
-
-               value = katom->affinity & (mask & 0xFFFFFFFF);
-
-               kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
-                                                               value, kctx);
-
-               value = (katom->affinity >> 32) & ((mask>>32) & 0xFFFFFFFF);
-               kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
-                                                               value, kctx);
-       }
-#else
        kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
                                        katom->affinity & 0xFFFFFFFF, kctx);
        kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
                                        katom->affinity >> 32, kctx);
-#endif
 
        /* start MMU, medium priority, cache clean/flush on end, clean/flush on
         * start */
        cfg = kctx->as_nr;
+       if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+               cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
 
 #ifndef CONFIG_MALI_COH_GPU
        cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
@@ -124,6 +93,10 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
        cfg |= JS_CONFIG_START_MMU;
        cfg |= JS_CONFIG_THREAD_PRI(8);
 
+       if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) &&
+               (katom->atom_flags & KBASE_KATOM_FLAG_SECURE))
+               cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
+
        if (kbase_hw_has_feature(kbdev,
                                BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
                if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
@@ -140,6 +113,9 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 
        kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
 
+       if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
+               kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
+                               katom->flush_id, kctx);
 
        /* Write an approximate start timestamp.
         * It's approximate because there might be a job in the HEAD register.
@@ -159,23 +135,26 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
                                GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
                                kctx, kbase_jd_atom_id(kctx, katom));
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
        kbase_tlstream_tl_attrib_atom_config(katom, jc_head,
                        katom->affinity, cfg);
+       kbase_tlstream_tl_ret_ctx_lpu(
+               kctx,
+               &kbdev->gpu_props.props.raw_props.js_features[
+                       katom->slot_nr]);
        kbase_tlstream_tl_ret_atom_as(katom, &kbdev->as[kctx->as_nr]);
        kbase_tlstream_tl_ret_atom_lpu(
                        katom,
-                       &kbdev->gpu_props.props.raw_props.js_features[js]);
-#endif
+                       &kbdev->gpu_props.props.raw_props.js_features[js],
+                       "ctx_nr,atom_nr");
 #ifdef CONFIG_GPU_TRACEPOINTS
-       if (kbase_backend_nr_atoms_submitted(kbdev, js) == 1) {
+       if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
                /* If this is the only job on the slot, trace it as starting */
                char js_string[16];
 
                trace_gpu_sched_switch(
                                kbasep_make_job_slot_string(js, js_string),
                                ktime_to_ns(katom->start_timestamp),
-                               (u32)katom->kctx, 0, katom->work_id);
+                               (u32)katom->kctx->id, 0, katom->work_id);
                kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
        }
 #endif
@@ -228,6 +207,27 @@ static void kbasep_job_slot_update_head_start_timestamp(
        }
 }
 
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+       !defined(MALI_MIPE_ENABLED)
+/**
+ * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint
+ * @kbdev: kbase device
+ * @i: job slot
+ *
+ * Get kbase atom by calling kbase_gpu_inspect for given job slot.
+ * Then use obtained katom and name of slot associated with the given
+ * job slot number in tracepoint call to the instrumentation module
+ * informing that given atom is no longer executed on given lpu (job slot).
+ */
+static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int i)
+{
+       struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, i, 0);
+
+       kbase_tlstream_tl_nret_atom_lpu(katom,
+               &kbdev->gpu_props.props.raw_props.js_features[i]);
+}
+#endif
+
 void kbase_job_done(struct kbase_device *kbdev, u32 done)
 {
        unsigned long flags;
@@ -288,9 +288,15 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
                                                GATOR_JOB_SLOT_SOFT_STOPPED, i),
                                                                NULL, 0);
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
+
                                        kbase_tlstream_aux_job_softstop(i);
+
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+       !defined(MALI_MIPE_ENABLED)
+                                       kbasep_trace_tl_nret_atom_lpu(
+                                               kbdev, i);
 #endif
+
                                        /* Soft-stopped job - read the value of
                                         * JS<n>_TAIL so that the job chain can
                                         * be resumed */
@@ -437,6 +443,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 
                        failed = done >> 16;
                        finished = (done & 0xFFFF) | failed;
+                       if (done)
+                               end_timestamp = ktime_get();
                } while (finished & (1 << i));
 
                kbasep_job_slot_update_head_start_timestamp(kbdev, i,
@@ -459,12 +467,14 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 KBASE_EXPORT_TEST_API(kbase_job_done);
 
 static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
-                                                               u16 core_reqs)
+                                       struct kbase_jd_atom *katom)
 {
        bool soft_stops_allowed = true;
 
-       if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
-               if ((core_reqs & BASE_JD_REQ_T) != 0)
+       if (kbase_jd_katom_is_secure(katom)) {
+               soft_stops_allowed = false;
+       } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+               if ((katom->core_req & BASE_JD_REQ_T) != 0)
                        soft_stops_allowed = false;
        }
        return soft_stops_allowed;
@@ -508,12 +518,13 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
 
        if (action == JS_COMMAND_SOFT_STOP) {
                bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
-                                                               core_reqs);
+                                                               target_katom);
 
                if (!soft_stop_allowed) {
 #ifdef CONFIG_MALI_DEBUG
-                       dev_dbg(kbdev->dev, "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
-                                               (unsigned int)core_reqs);
+                       dev_dbg(kbdev->dev,
+                                       "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+                                       (unsigned int)core_reqs);
 #endif                         /* CONFIG_MALI_DEBUG */
                        return;
                }
@@ -521,9 +532,51 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
                /* We are about to issue a soft stop, so mark the atom as having
                 * been soft stopped */
                target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
-       }
 
-       if (action == JS_COMMAND_HARD_STOP) {
+               /* Mark the point where we issue the soft-stop command */
+               kbase_tlstream_aux_issue_job_softstop(target_katom);
+
+               if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+                       int i;
+
+                       for (i = 0;
+                            i < kbase_backend_nr_atoms_submitted(kbdev, js);
+                            i++) {
+                               struct kbase_jd_atom *katom;
+
+                               katom = kbase_gpu_inspect(kbdev, js, i);
+
+                               KBASE_DEBUG_ASSERT(katom);
+
+                               /* For HW_ISSUE_8316, only 'bad' jobs attacking
+                                * the system can cause this issue: normally,
+                                * all memory should be allocated in multiples
+                                * of 4 pages, and growable memory should be
+                                * changed size in multiples of 4 pages.
+                                *
+                                * Whilst such 'bad' jobs can be cleared by a
+                                * GPU reset, the locking up of a uTLB entry
+                                * caused by the bad job could also stall other
+                                * ASs, meaning that other ASs' jobs don't
+                                * complete in the 'grace' period before the
+                                * reset. We don't want to lose other ASs' jobs
+                                * when they would normally complete fine, so we
+                                * must 'poke' the MMU regularly to help other
+                                * ASs complete */
+                               kbase_as_poking_timer_retain_atom(
+                                               kbdev, katom->kctx, katom);
+                       }
+               }
+
+               if (kbase_hw_has_feature(
+                               kbdev,
+                               BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+                       action = (target_katom->atom_flags &
+                                       KBASE_KATOM_FLAGS_JOBCHAIN) ?
+                               JS_COMMAND_SOFT_STOP_1 :
+                               JS_COMMAND_SOFT_STOP_0;
+               }
+       } else if (action == JS_COMMAND_HARD_STOP) {
                bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
                                                                core_reqs);
 
@@ -547,55 +600,21 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
                         * hard-stop fails, so it is safe to just return and
                         * ignore the hard-stop request.
                         */
-                       dev_warn(kbdev->dev, "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
-                                               (unsigned int)core_reqs);
+                       dev_warn(kbdev->dev,
+                                       "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+                                       (unsigned int)core_reqs);
                        return;
                }
                target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
-       }
-
-       if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316) &&
-                                       action == JS_COMMAND_SOFT_STOP) {
-               int i;
-
-               for (i = 0; i < kbase_backend_nr_atoms_submitted(kbdev, js);
-                                                                       i++) {
-                       struct kbase_jd_atom *katom;
-
-                       katom = kbase_gpu_inspect(kbdev, js, i);
 
-                       KBASE_DEBUG_ASSERT(katom);
-
-                       /* For HW_ISSUE_8316, only 'bad' jobs attacking the
-                        * system can cause this issue: normally, all memory
-                        * should be allocated in multiples of 4 pages, and
-                        * growable memory should be changed size in multiples
-                        * of 4 pages.
-                        *
-                        * Whilst such 'bad' jobs can be cleared by a GPU reset,
-                        * the locking up of a uTLB entry caused by the bad job
-                        * could also stall other ASs, meaning that other ASs'
-                        * jobs don't complete in the 'grace' period before the
-                        * reset. We don't want to lose other ASs' jobs when
-                        * they would normally complete fine, so we must 'poke'
-                        * the MMU regularly to help other ASs complete */
-                       kbase_as_poking_timer_retain_atom(kbdev, katom->kctx,
-                                                                       katom);
-               }
-       }
-
-       if (kbase_hw_has_feature(kbdev,
+               if (kbase_hw_has_feature(
+                               kbdev,
                                BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
-               if (action == JS_COMMAND_SOFT_STOP)
                        action = (target_katom->atom_flags &
-                                               KBASE_KATOM_FLAGS_JOBCHAIN) ?
-                                       JS_COMMAND_SOFT_STOP_1 :
-                                       JS_COMMAND_SOFT_STOP_0;
-               else
-                       action = (target_katom->atom_flags &
-                                               KBASE_KATOM_FLAGS_JOBCHAIN) ?
-                                       JS_COMMAND_HARD_STOP_1 :
-                                       JS_COMMAND_HARD_STOP_0;
+                                       KBASE_KATOM_FLAGS_JOBCHAIN) ?
+                               JS_COMMAND_HARD_STOP_1 :
+                               JS_COMMAND_HARD_STOP_0;
+               }
        }
 
        kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
@@ -843,6 +862,21 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
        mutex_unlock(&kctx->jctx.lock);
 }
 
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
+{
+       u32 flush_id = 0;
+
+       if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
+               mutex_lock(&kbdev->pm.lock);
+               if (kbdev->pm.backend.gpu_powered)
+                       flush_id = kbase_reg_read(kbdev,
+                                       GPU_CONTROL_REG(LATEST_FLUSH), NULL);
+               mutex_unlock(&kbdev->pm.lock);
+       }
+
+       return flush_id;
+}
+
 int kbase_job_slot_init(struct kbase_device *kbdev)
 {
 #if KBASE_GPU_RESET_EN
@@ -1058,7 +1092,7 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
        /* For soft-stop, don't enter if soft-stop not allowed, or isn't
         * causing disjoint */
        if (hw_action == JS_COMMAND_SOFT_STOP &&
-                       !(kbasep_soft_stop_allowed(kbdev, core_reqs) &&
+                       !(kbasep_soft_stop_allowed(kbdev, target_katom) &&
                          (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
                return;
 
@@ -1145,7 +1179,7 @@ static void kbasep_save_hwcnt_setup(struct kbase_device *kbdev,
 
 static void kbasep_reset_timeout_worker(struct work_struct *data)
 {
-       unsigned long flags;
+       unsigned long flags, mmu_flags;
        struct kbase_device *kbdev;
        int i;
        ktime_t end_timestamp = ktime_get();
@@ -1155,8 +1189,6 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
        bool try_schedule = false;
        bool restore_hwc = false;
 
-       u32 mmu_irq_mask;
-
        KBASE_DEBUG_ASSERT(data);
 
        kbdev = container_of(data, struct kbase_device,
@@ -1183,6 +1215,30 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
                return;
        }
 
+       KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
+
+       spin_lock_irqsave(&kbdev->mmu_mask_change, mmu_flags);
+       /* We're about to flush out the IRQs and their bottom half's */
+       kbdev->irq_reset_flush = true;
+
+       /* Disable IRQ to avoid IRQ handlers to kick in after releasing the
+        * spinlock; this also clears any outstanding interrupts */
+       spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+       kbase_pm_disable_interrupts(kbdev);
+       spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+       spin_unlock_irqrestore(&kbdev->mmu_mask_change, mmu_flags);
+
+       /* Ensure that any IRQ handlers have finished
+        * Must be done without any locks IRQ handlers will take */
+       kbase_synchronize_irqs(kbdev);
+
+       /* Flush out any in-flight work items */
+       kbase_flush_mmu_wqs(kbdev);
+
+       /* The flush has completed so reset the active indicator */
+       kbdev->irq_reset_flush = false;
+
        mutex_lock(&kbdev->pm.lock);
        /* We hold the pm lock, so there ought to be a current policy */
        KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
@@ -1224,22 +1280,11 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
        kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_RESETTING;
        kbdev->hwcnt.backend.triggered = 0;
 
-       mmu_irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
-       /* Disable IRQ to avoid IRQ handlers to kick in after releasing the
-        * spinlock; this also clears any outstanding interrupts */
-       kbase_pm_disable_interrupts(kbdev);
        spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 
-       /* Ensure that any IRQ handlers have finished
-        * Must be done without any locks IRQ handlers will take */
-       kbase_synchronize_irqs(kbdev);
-
        /* Reset the GPU */
        kbase_pm_init_hw(kbdev, 0);
 
-       /* Re-enabled IRQs */
-       kbase_pm_enable_interrupts_mmu_mask(kbdev, mmu_irq_mask);
-
        /* Complete any jobs that were still on the GPU */
        spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
        kbase_backend_reset(kbdev, &end_timestamp);
@@ -1267,6 +1312,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
                mutex_unlock(&as->transaction_mutex);
        }
 
+       kbase_pm_enable_interrupts(kbdev);
+
        atomic_set(&kbdev->hwaccess.backend.reset_gpu,
                                                KBASE_RESET_GPU_NOT_PENDING);
 
@@ -1284,10 +1331,21 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
        /* Restore the HW counters setup */
        if (restore_hwc) {
                struct kbase_context *kctx = kbdev->hwcnt.kctx;
+               u32 prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+               u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+               u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
+                       >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+               int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
+
+               if (arch_v6)
+                       prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
+#endif
 
                kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
-                               (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) |
-                               PRFCNT_CONFIG_MODE_OFF, kctx);
+                               prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
+
                kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
                                hwcnt_setup.dump_buffer & 0xFFFFFFFF, kctx);
                kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
@@ -1309,8 +1367,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
                                                hwcnt_setup.tiler_bm, kctx);
 
                kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
-                               (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) |
-                               PRFCNT_CONFIG_MODE_MANUAL, kctx);
+                               prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL,
+                               kctx);
 
                /* If HW has PRLAM-8186 we can now re-enable the tiler HW
                 * counters dump */
@@ -1362,6 +1420,10 @@ static void kbasep_reset_timeout_worker(struct work_struct *data)
                break;
        }
        spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+       /* Resume the vinstr core */
+       kbase_vinstr_hwc_resume(kbdev->vinstr_ctx);
+
        /* Note: counter dumping may now resume */
 
        mutex_lock(&kbdev->pm.lock);
index 86017181066735f8b148c36977949147fb447da7..af6cddcdb04325d27a115bbb392ad49be0064567 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -739,6 +739,26 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev)
 
                        /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
                        case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+
+                               if (katom[idx]->will_fail_event_code) {
+                                       kbase_gpu_mark_atom_for_return(kbdev,
+                                                       katom[idx]);
+                                       /* Set EVENT_DONE so this atom will be
+                                          completed, not unpulled. */
+                                       katom[idx]->event_code =
+                                               BASE_JD_EVENT_DONE;
+                                       /* Only return if head atom or previous
+                                        * atom already removed - as atoms must
+                                        * be returned in order. */
+                                       if (idx == 0 || katom[0]->gpu_rb_state ==
+                                                       KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+                                               kbase_gpu_dequeue_atom(kbdev, js, NULL);
+                                               kbase_jm_return_atom_to_js(kbdev, katom[idx]);
+                                       }
+                                       break;
+                               }
+
+
                                cores_ready =
                                        kbasep_js_job_check_ref_cores(kbdev, js,
                                                                katom[idx]);
@@ -770,6 +790,13 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev)
                        /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
 
                        case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE:
+                               /* Only submit if head atom or previous atom
+                                * already submitted */
+                               if (idx == 1 &&
+                                       (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED &&
+                                       katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+                                       break;
+
                                if (kbase_gpu_in_secure_mode(kbdev) != kbase_jd_katom_is_secure(katom[idx])) {
                                        int err = 0;
 
@@ -813,11 +840,6 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev)
                                        kbase_jd_katom_is_secure(katom[idx]) == kbase_gpu_in_secure_mode(kbdev),
                                        "Secure mode of atom (%d) doesn't match secure mode of GPU (%d)",
                                        kbase_jd_katom_is_secure(katom[idx]), kbase_gpu_in_secure_mode(kbdev));
-                               KBASE_DEBUG_ASSERT_MSG(
-                                       (kbase_jd_katom_is_secure(katom[idx]) && js == 0) ||
-                                       !kbase_jd_katom_is_secure(katom[idx]),
-                                       "Secure atom on JS%d not supported", js);
-
                                katom[idx]->gpu_rb_state =
                                        KBASE_ATOM_GPU_RB_READY;
 
@@ -1060,7 +1082,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
                        trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
                                                                js_string),
                                                ktime_to_ns(*end_timestamp),
-                                               (u32)next_katom->kctx, 0,
+                                               (u32)next_katom->kctx->id, 0,
                                                next_katom->work_id);
                        kbdev->hwaccess.backend.slot_rb[js].last_context =
                                                        next_katom->kctx;
@@ -1100,24 +1122,10 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
                                                                        js, 0);
 
                        if (katom) {
-                               enum kbase_atom_gpu_rb_state gpu_rb_state =
-                                                       katom->gpu_rb_state;
-
                                kbase_gpu_release_atom(kbdev, katom, NULL);
                                kbase_gpu_dequeue_atom(kbdev, js, NULL);
-
-                               if (gpu_rb_state ==
-                                               KBASE_ATOM_GPU_RB_SUBMITTED) {
-                                       katom->event_code =
-                                               BASE_JD_EVENT_JOB_CANCELLED;
-                                       kbase_jm_complete(kbdev, katom,
-                                                               end_timestamp);
-                               } else {
-                                       katom->event_code =
-                                                       BASE_JD_EVENT_STOPPED;
-                                       kbase_jm_return_atom_to_js(kbdev,
-                                                       katom);
-                               }
+                               katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+                               kbase_jm_complete(kbdev, katom, end_timestamp);
                        }
                }
        }
@@ -1167,6 +1175,12 @@ static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
        return -1;
 }
 
+static void kbase_job_evicted(struct kbase_jd_atom *katom)
+{
+       kbase_timeline_job_slot_done(katom->kctx->kbdev, katom->kctx, katom,
+                       katom->slot_nr, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
+}
+
 bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
                                        struct kbase_context *kctx,
                                        int js,
@@ -1265,6 +1279,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
                                                                        != 0) {
                                                /* idx1 removed successfully,
                                                 * will be handled in IRQ */
+                                               kbase_job_evicted(katom_idx1);
                                                kbase_gpu_remove_atom(kbdev,
                                                                katom_idx1,
                                                                action, true);
@@ -1336,6 +1351,7 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
                                                JS_HEAD_NEXT_HI), NULL) != 0) {
                                        /* idx1 removed successfully, will be
                                         * handled in IRQ once idx0 completes */
+                                       kbase_job_evicted(katom_idx1);
                                        kbase_gpu_remove_atom(kbdev, katom_idx1,
                                                                        action,
                                                                        false);
index 89b8085e28b293a0a4764ef99fa7a9126307120a..6a49669af63026a2b1f97a981d4c10f4d1190914 100755 (executable)
@@ -122,7 +122,8 @@ bool kbase_js_choose_affinity(u64 * const affinity,
 
        if (1 == kbdev->gpu_props.num_cores) {
                /* trivial case only one core, nothing to do */
-               *affinity = core_availability_mask;
+               *affinity = core_availability_mask &
+                               kbdev->pm.debug_core_mask[js];
        } else {
                if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
                                        BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
@@ -131,7 +132,8 @@ bool kbase_js_choose_affinity(u64 * const affinity,
                                 * the first core group */
                                *affinity =
                                kbdev->gpu_props.props.coherency_info.group[0].core_mask
-                                               & core_availability_mask;
+                                               & core_availability_mask &
+                                               kbdev->pm.debug_core_mask[js];
                        } else {
                                /* js[1], js[2] use core groups 0, 1 for
                                 * dual-core-group systems */
@@ -141,7 +143,8 @@ bool kbase_js_choose_affinity(u64 * const affinity,
                                                        num_core_groups);
                                *affinity =
                                kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
-                                               & core_availability_mask;
+                                               & core_availability_mask &
+                                               kbdev->pm.debug_core_mask[js];
 
                                /* If the job is specifically targeting core
                                 * group 1 and the core availability policy is
@@ -155,7 +158,8 @@ bool kbase_js_choose_affinity(u64 * const affinity,
                } else {
                        /* All cores are available when no core split is
                         * required */
-                       *affinity = core_availability_mask;
+                       *affinity = core_availability_mask &
+                                       kbdev->pm.debug_core_mask[js];
                }
        }
 
index 04bfa51903970534074455a72f213d5e41a1521c..1e9a7e4c466da9f0cf6a86bb0401d56d9133e662 100755 (executable)
  */
 static inline bool timer_callback_should_run(struct kbase_device *kbdev)
 {
+       struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
        s8 nr_running_ctxs;
 
        lockdep_assert_held(&kbdev->js_data.runpool_mutex);
 
+       /* Timer must stop if we are suspending */
+       if (backend->suspend_timer)
+               return false;
+
        /* nr_contexts_pullable is updated with the runpool_mutex. However, the
         * locking in the caller gives us a barrier that ensures
         * nr_contexts_pullable is up-to-date for reading */
@@ -270,7 +275,6 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
                spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
                backend->timer_running = false;
                spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
-
                /* From now on, return value of timer_callback_should_run() will
                 * also cause the timer to not requeue itself. Its return value
                 * cannot change, because it depends on variables updated with
@@ -284,7 +288,6 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
                spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
                backend->timer_running = true;
                spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
-
                hrtimer_start(&backend->scheduling_timer,
                        HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
                                                        HRTIMER_MODE_REL);
@@ -314,3 +317,21 @@ void kbase_backend_timer_term(struct kbase_device *kbdev)
        hrtimer_cancel(&backend->scheduling_timer);
 }
 
+void kbase_backend_timer_suspend(struct kbase_device *kbdev)
+{
+       struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+       backend->suspend_timer = true;
+
+       kbase_backend_ctx_count_changed(kbdev);
+}
+
+void kbase_backend_timer_resume(struct kbase_device *kbdev)
+{
+       struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+       backend->suspend_timer = false;
+
+       kbase_backend_ctx_count_changed(kbdev);
+}
+
index 3c101e4320d8c459475867b628a782c48f16965f..3f53779c67471f5ad102d46b7022aea7a7adc33f 100755 (executable)
@@ -41,4 +41,29 @@ int kbase_backend_timer_init(struct kbase_device *kbdev);
  */
 void kbase_backend_timer_term(struct kbase_device *kbdev);
 
+/**
+ * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling
+ *                               timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on suspend, after the active count has reached
+ * zero. This is required as the timer may have been started on job submission
+ * to the job scheduler, but before jobs are submitted to the GPU.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS
+ *                              scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called on resume. Note that is is not guaranteed to
+ * re-start the timer, only evalute whether it should be re-started.
+ *
+ * Caller must hold runpool_mutex.
+ */
+void kbase_backend_timer_resume(struct kbase_device *kbdev);
+
 #endif /* _KBASE_JS_BACKEND_H_ */
index 1b613a1967c9fd1e1ef6b6bfbd1b1da3dc881a04..c6c7b89712da3e92ba5c165cb0dfbd07fa66e6fb 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,9 +20,7 @@
 #include <mali_kbase.h>
 #include <mali_kbase_mem.h>
 #include <mali_kbase_mmu_hw.h>
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
 #include <backend/gpu/mali_kbase_mmu_hw_direct.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 
@@ -165,6 +163,15 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
                                KBASE_MMU_FAULT_TYPE_BUS :
                                KBASE_MMU_FAULT_TYPE_PAGE;
 
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+               as->fault_extra_addr = kbase_reg_read(kbdev,
+                               MMU_AS_REG(as_no, AS_FAULTEXTRA_HI),
+                               kctx);
+               as->fault_extra_addr <<= 32;
+               as->fault_extra_addr |= kbase_reg_read(kbdev,
+                               MMU_AS_REG(as_no, AS_FAULTEXTRA_LO),
+                               kctx);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
 
                if (kbase_as_has_bus_fault(as)) {
                        /* Mark bus fault as handled.
@@ -203,13 +210,36 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
                struct kbase_context *kctx)
 {
        struct kbase_mmu_setup *current_setup = &as->current_setup;
-#if defined(CONFIG_MALI_MIPE_ENABLED) || \
-       (defined(MALI_INCLUDE_TMIX) &&      \
-        defined(CONFIG_MALI_COH_PAGES) &&   \
-        defined(CONFIG_MALI_GPU_MMU_AARCH64))
        u32 transcfg = 0;
-#endif
 
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+       transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+
+       /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
+       /* Clear PTW_MEMATTR bits */
+       transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
+       /* Enable correct PTW_MEMATTR bits */
+       transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+
+       if (kbdev->system_coherency == COHERENCY_ACE) {
+               /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
+               /* Clear PTW_SH bits */
+               transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK);
+               /* Enable correct PTW_SH bits */
+               transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
+       }
+
+       kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
+                       transcfg, kctx);
+       kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+                       (current_setup->transcfg >> 32) & 0xFFFFFFFFUL, kctx);
+
+#else /* CONFIG_MALI_GPU_MMU_AARCH64 */
+
+       if (kbdev->system_coherency == COHERENCY_ACE)
+               current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
+
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
 
        kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
                        current_setup->transtab & 0xFFFFFFFFUL, kctx);
@@ -221,12 +251,10 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
        kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
                        (current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
 
-#if defined(CONFIG_MALI_MIPE_ENABLED)
        kbase_tlstream_tl_attrib_as_config(as,
                        current_setup->transtab,
                        current_setup->memattr,
                        transcfg);
-#endif
 
        write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
 }
@@ -282,8 +310,18 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
 void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
                struct kbase_context *kctx, enum kbase_mmu_fault_type type)
 {
+       unsigned long flags;
        u32 pf_bf_mask;
 
+       spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+       /*
+        * A reset is in-flight and we're flushing the IRQ + bottom half
+        * so don't update anything as it could race with the reset code.
+        */
+       if (kbdev->irq_reset_flush)
+               goto unlock;
+
        /* Clear the page (and bus fault IRQ as well in case one occurred) */
        pf_bf_mask = MMU_PAGE_FAULT(as->number);
        if (type == KBASE_MMU_FAULT_TYPE_BUS ||
@@ -291,6 +329,9 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
                pf_bf_mask |= MMU_BUS_ERROR(as->number);
 
        kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+
+unlock:
+       spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
 }
 
 void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
@@ -303,6 +344,13 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
         * occurred) */
        spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
 
+       /*
+        * A reset is in-flight and we're flushing the IRQ + bottom half
+        * so don't update anything as it could race with the reset code.
+        */
+       if (kbdev->irq_reset_flush)
+               goto unlock;
+
        irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
                        MMU_PAGE_FAULT(as->number);
 
@@ -312,5 +360,6 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
 
        kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
 
+unlock:
        spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
 }
index 9ff7baadec7a32cf6ce69612a38a62e8241c9046..5805efea11258e4b762472cfcbb6c98895e24172 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 
 #include <mali_kbase_pm.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
 void kbase_pm_register_access_enable(struct kbase_device *kbdev)
 {
        struct kbase_pm_callback_conf *callbacks;
 
-#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
-       pm_runtime_enable(kbdev->dev);
-#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
        callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
 
        if (callbacks)
@@ -56,9 +54,6 @@ void kbase_pm_register_access_disable(struct kbase_device *kbdev)
                callbacks->power_off_callback(kbdev);
 
        kbdev->pm.backend.gpu_powered = false;
-#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
-       pm_runtime_disable(kbdev->dev);
-#endif
 }
 
 int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
@@ -96,6 +91,8 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
                                        callbacks->power_runtime_on_callback;
                kbdev->pm.backend.callback_power_runtime_off =
                                        callbacks->power_runtime_off_callback;
+               kbdev->pm.backend.callback_power_runtime_idle =
+                                       callbacks->power_runtime_idle_callback;
        } else {
                kbdev->pm.backend.callback_power_on = NULL;
                kbdev->pm.backend.callback_power_off = NULL;
@@ -105,6 +102,7 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
                kbdev->pm.callback_power_runtime_term = NULL;
                kbdev->pm.backend.callback_power_runtime_on = NULL;
                kbdev->pm.backend.callback_power_runtime_off = NULL;
+               kbdev->pm.backend.callback_power_runtime_idle = NULL;
        }
 
        /* Initialise the metrics subsystem */
@@ -227,7 +225,9 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
 
        kbasep_pm_read_present_cores(kbdev);
 
-       kbdev->pm.debug_core_mask =
+       kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
+                       kbdev->pm.debug_core_mask[1] =
+                       kbdev->pm.debug_core_mask[2] =
                        kbdev->gpu_props.props.raw_props.shader_present;
 
        /* Pretend the GPU is active to prevent a power policy turning the GPU
@@ -321,9 +321,15 @@ void kbase_pm_power_changed(struct kbase_device *kbdev)
        }
 }
 
-void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask)
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+               u64 new_core_mask_js0, u64 new_core_mask_js1,
+               u64 new_core_mask_js2)
 {
-       kbdev->pm.debug_core_mask = new_core_mask;
+       kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
+       kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
+       kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
+       kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 |
+                       new_core_mask_js2;
 
        kbase_pm_update_cores_state_nolock(kbdev);
 }
@@ -358,6 +364,8 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
                WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
        }
 
+       kbase_backend_timer_suspend(kbdev);
+
        mutex_unlock(&kbdev->pm.lock);
        mutex_unlock(&js_devdata->runpool_mutex);
 }
@@ -368,8 +376,12 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
 
        mutex_lock(&js_devdata->runpool_mutex);
        mutex_lock(&kbdev->pm.lock);
+
        kbdev->pm.suspending = false;
        kbase_pm_do_poweron(kbdev, true);
+
+       kbase_backend_timer_resume(kbdev);
+
        mutex_unlock(&kbdev->pm.lock);
        mutex_unlock(&js_devdata->runpool_mutex);
 }
index 60b4758d92bb139b49ddbb1d8099c027d53aa835..4eada33cc02114a47d4f2ad6a56f127f0e3992e8 100755 (executable)
@@ -137,14 +137,14 @@ u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
        /* All cores must be enabled when instrumentation is in use */
        if (kbdev->pm.backend.instr_enabled)
                return kbdev->gpu_props.props.raw_props.shader_present &
-                               kbdev->pm.debug_core_mask;
+                               kbdev->pm.debug_core_mask_all;
 
        if (kbdev->pm.backend.ca_current_policy == NULL)
                return kbdev->gpu_props.props.raw_props.shader_present &
-                               kbdev->pm.debug_core_mask;
+                               kbdev->pm.debug_core_mask_all;
 
        return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
-                                               kbdev->pm.debug_core_mask;
+                                               kbdev->pm.debug_core_mask_all;
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
index 893c271ed1e010c277ca470a37566430dab9a8ab..3eaf1a7e211945e88b01f2ed755232c917f9e7d8 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -200,8 +200,13 @@ union kbase_pm_ca_policy_data {
  * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
  * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
  *                           timer callback
- * @poweroff_timer_needed: true if the poweroff timer is currently running,
+ * @poweroff_timer_needed: true if the poweroff timer is currently required,
  *                         false otherwise
+ * @poweroff_timer_running: true if the poweroff timer is currently running,
+ *                          false otherwise
+ *                          power_change_lock should be held when accessing,
+ *                          unless there is no way the timer can be running (eg
+ *                          hrtimer_cancel() was called immediately before)
  * @callback_power_on: Callback when the GPU needs to be turned on. See
  *                     &struct kbase_pm_callback_conf
  * @callback_power_off: Callback when the GPU may be turned off. See
@@ -214,9 +219,8 @@ union kbase_pm_ca_policy_data {
  *                             &struct kbase_pm_callback_conf
  * @callback_power_runtime_off: Callback when the GPU may be turned off. See
  *                              &struct kbase_pm_callback_conf
- * @callback_cci_snoop_ctrl: Callback when the GPU L2 power may transition.
- *                           If enable is set then snoops should be enabled
- *                           otherwise snoops should be disabled
+ * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
+ *                              &struct kbase_pm_callback_conf
  *
  * Note:
  * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the
@@ -274,6 +278,7 @@ struct kbase_pm_backend_data {
        u64 shader_poweroff_pending;
 
        bool poweroff_timer_needed;
+       bool poweroff_timer_running;
 
        int (*callback_power_on)(struct kbase_device *kbdev);
        void (*callback_power_off)(struct kbase_device *kbdev);
@@ -281,7 +286,7 @@ struct kbase_pm_backend_data {
        void (*callback_power_resume)(struct kbase_device *kbdev);
        int (*callback_power_runtime_on)(struct kbase_device *kbdev);
        void (*callback_power_runtime_off)(struct kbase_device *kbdev);
-
+       int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
 };
 
 
index bcaf20c43af23eae1d19e679da0cff242f1b3555..7675c91be2da6ecf6c765e21162059188a152d82 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,9 +27,7 @@
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 #include <mali_kbase_gator.h>
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
 #include <mali_kbase_pm.h>
 #include <mali_kbase_cache_policy.h>
 #include <mali_kbase_config_defaults.h>
@@ -99,6 +97,39 @@ static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
        return (u32)core_type + (u32)action;
 }
 
+#ifdef CONFIG_ARM64
+static void mali_cci_flush_l2(struct kbase_device *kbdev)
+{
+       const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED;
+       u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+       u32 raw;
+
+       /*
+        * Note that we don't take the cache flush mutex here since
+        * we expect to be the last user of the L2, all other L2 users
+        * would have dropped their references, to initiate L2 power
+        * down, L2 power down being the only valid place for this
+        * to be called from.
+        */
+
+       kbase_reg_write(kbdev,
+                       GPU_CONTROL_REG(GPU_COMMAND),
+                       GPU_COMMAND_CLEAN_INV_CACHES,
+                       NULL);
+
+       raw = kbase_reg_read(kbdev,
+               GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+               NULL);
+
+       /* Wait for cache flush to complete before continuing, exit on
+        * gpu resets or loop expiry. */
+       while (((raw & mask) == 0) && --loops) {
+               raw = kbase_reg_read(kbdev,
+                                       GPU_CONTROL_REG(GPU_IRQ_RAWSTAT),
+                                       NULL);
+       }
+}
+#endif
 
 /**
  * kbase_pm_invoke - Invokes an action on a core set
@@ -134,7 +165,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev,
                        kbase_trace_mali_pm_power_off(core_type, cores);
        }
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
+
        if (cores) {
                u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
 
@@ -144,7 +175,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev,
                        state &= ~cores;
                kbase_tlstream_aux_pm_state(core_type, state);
        }
-#endif
+
        /* Tracing */
        if (cores) {
                if (action == ACTION_PWRON)
@@ -177,6 +208,8 @@ static void kbase_pm_invoke(struct kbase_device *kbdev,
                        case KBASE_PM_CORE_L2:
                                KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
                                                                        0u, lo);
+                               /* disable snoops before L2 is turned off */
+                               kbase_pm_cache_snoop_disable(kbdev);
                                break;
                        default:
                                break;
@@ -404,6 +437,12 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
                        /* All are ready, none will be turned off, and none are
                         * transitioning */
                        kbdev->pm.backend.l2_powered = 1;
+                       /*
+                        * Ensure snoops are enabled after L2 is powered up,
+                        * note that kbase keeps track of the snoop state, so
+                        * safe to repeatedly call.
+                        */
+                       kbase_pm_cache_snoop_enable(kbdev);
                        if (kbdev->l2_users_count > 0) {
                                /* Notify any registered l2 cache users
                                 * (optimized out when no users waiting) */
@@ -665,7 +704,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
                                                kbase_pm_get_ready_cores(kbdev,
                                                        KBASE_PM_CORE_TILER));
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
+
                kbase_tlstream_aux_pm_state(
                                KBASE_PM_CORE_L2,
                                kbase_pm_get_ready_cores(
@@ -679,7 +718,6 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
                                kbase_pm_get_ready_cores(
                                        kbdev,
                                        KBASE_PM_CORE_TILER));
-#endif
 
                KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
                                kbdev->pm.backend.gpu_in_desired_state,
@@ -840,30 +878,6 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
 
 KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
 
-void kbase_pm_enable_interrupts_mmu_mask(struct kbase_device *kbdev, u32 mask)
-{
-       unsigned long flags;
-
-       KBASE_DEBUG_ASSERT(NULL != kbdev);
-       /*
-        * Clear all interrupts,
-        * and unmask them all.
-        */
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
-       kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
-                                                                       NULL);
-       kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
-                                                                       NULL);
-       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
-
-       kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
-                                                                       NULL);
-       kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
-
-       kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
-       kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), mask, NULL);
-}
-
 void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
 {
        unsigned long flags;
@@ -921,6 +935,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 
        if (is_resume && kbdev->pm.backend.callback_power_resume) {
                kbdev->pm.backend.callback_power_resume(kbdev);
+               return;
        } else if (kbdev->pm.backend.callback_power_on) {
                kbdev->pm.backend.callback_power_on(kbdev);
                /* If your platform properly keeps the GPU state you may use the
@@ -999,6 +1014,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
                return false;
        }
 
+       kbase_pm_cache_snoop_disable(kbdev);
 
        /* The GPU power may be turned off from this point */
        kbdev->pm.backend.gpu_powered = false;
@@ -1081,14 +1097,23 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
        if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
                kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
 
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
        /* Enable alternative hardware counter selection if configured. */
-       if (DEFAULT_ALTERNATIVE_HWC)
+       if (!GPU_ID_IS_NEW_FORMAT(prod_id))
                kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+#endif
 
        /* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
        if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
                kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
 
+       if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) {
+               if (prod_id < 0x760 || prod_id == 0x6956) /* T60x, T62x, T72x */
+                       kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE;
+               else if (prod_id >= 0x760 && prod_id <= 0x880) /* T76x, T8xx */
+                       kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
+       }
+
        kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
                        GPU_CONTROL_REG(TILER_CONFIG), NULL);
 
@@ -1110,6 +1135,12 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
        kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
                                L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
 
+       if (kbdev->system_coherency == COHERENCY_ACE) {
+               /* Allow memory configuration disparity to be ignored, we
+                * optimize the use of shared memory and thus we expect
+                * some disparity in the memory configuration */
+               kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
+       }
 
        /* Only for T86x/T88x-based products after r2p0 */
        if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) {
@@ -1174,6 +1205,33 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
 
 }
 
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
+{
+       if ((kbdev->system_coherency == COHERENCY_ACE) &&
+               !kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+               if (kbdev->snoop_enable_smc != 0)
+                       kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0);
+#endif /* CONFIG_ARM64 */
+               dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n");
+               kbdev->cci_snoop_enabled = true;
+       }
+}
+
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev)
+{
+       if ((kbdev->system_coherency == COHERENCY_ACE) &&
+               kbdev->cci_snoop_enabled) {
+#ifdef CONFIG_ARM64
+               if (kbdev->snoop_disable_smc != 0) {
+                       mali_cci_flush_l2(kbdev);
+                       kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0);
+               }
+#endif /* CONFIG_ARM64 */
+               dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n");
+               kbdev->cci_snoop_enabled = false;
+       }
+}
 
 int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
 {
@@ -1202,6 +1260,8 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
        /* Ensure interrupts are off to begin with, this also clears any
         * outstanding interrupts */
        kbase_pm_disable_interrupts(kbdev);
+       /* Ensure cache snoops are disabled before reset. */
+       kbase_pm_cache_snoop_disable(kbdev);
        /* Prepare for the soft-reset */
        kbdev->pm.backend.reset_done = false;
 
@@ -1329,10 +1389,10 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
                                                        RESET_COMPLETED) {
                /* The interrupt is set in the RAWSTAT; this suggests that the
                 * interrupts are not getting to the CPU */
-               dev_warn(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+               dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
                /* If interrupts aren't working we can't continue. */
                destroy_hrtimer_on_stack(&rtdata.timer);
-               goto out;
+               return -EINVAL;
        }
 
        /* The GPU doesn't seem to be responding to the reset so try a hard
@@ -1374,6 +1434,15 @@ out:
 
        kbase_pm_hw_issues_apply(kbdev);
 
+       kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency);
+
+       if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+               u32 gpu_status = kbase_reg_read(kbdev,
+                               GPU_CONTROL_REG(GPU_STATUS), NULL);
+
+               kbdev->secure_mode = (gpu_status &
+                               GPU_STATUS_PROTECTED_MODE_ACTIVE) != 0;
+       }
 
        /* If cycle counter was in use re-enable it, enable_irqs will only be
         * false when called from kbase_pm_powerup */
index bcca37d0540216a1b36cddb6b9cb1969af35fdf0..aa51b8cdef8fc3bfced5d0ca1b6ca01367a0308f 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -154,17 +154,6 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
  */
 void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
 
-/**
- * kbase_pm_enable_interrupts_mmu_mask - Enable interrupts on the device, using
- *                                       the provided mask to set MMU_IRQ_MASK.
- *
- * Interrupts are also enabled after a call to kbase_pm_clock_on().
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- * @mask:  The mask to use for MMU_IRQ_MASK
- */
-void kbase_pm_enable_interrupts_mmu_mask(struct kbase_device *kbdev, u32 mask);
-
 /**
  * kbase_pm_disable_interrupts - Disable interrupts on the device.
  *
@@ -512,5 +501,23 @@ void kbase_pm_power_changed(struct kbase_device *kbdev);
 void kbase_pm_metrics_update(struct kbase_device *kbdev,
                                ktime_t *now);
 
+/**
+ * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:     Device pointer
+ *
+ * This function should be called after L2 power up.
+ */
+
+void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU
+ * If the GPU does not have coherency this is a no-op
+ * @kbdev:     Device pointer
+ *
+ * This function should be called before L2 power off.
+ */
+void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev);
 
 #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
index e3c4829019ae05154e1319f9b950c211d00f5ad8..343436fc353dd7e9e47409ae21300034ad9fc168 100755 (executable)
@@ -183,10 +183,13 @@ static enum hrtimer_restart
 kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
 {
        struct kbase_device *kbdev;
+       unsigned long flags;
 
        kbdev = container_of(timer, struct kbase_device,
                                                pm.backend.gpu_poweroff_timer);
 
+       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
        /* It is safe for this call to do nothing if the work item is already
         * queued. The worker function will read the must up-to-date state of
         * kbdev->pm.backend.gpu_poweroff_pending under lock.
@@ -200,30 +203,27 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
                                        &kbdev->pm.backend.gpu_poweroff_work);
 
        if (kbdev->pm.backend.shader_poweroff_pending) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
-
-               if (kbdev->pm.backend.shader_poweroff_pending) {
-                       kbdev->pm.backend.shader_poweroff_pending_time--;
+               kbdev->pm.backend.shader_poweroff_pending_time--;
 
-                       KBASE_DEBUG_ASSERT(
+               KBASE_DEBUG_ASSERT(
                                kbdev->pm.backend.shader_poweroff_pending_time
                                                                        >= 0);
 
-                       if (!kbdev->pm.backend.shader_poweroff_pending_time)
-                               kbasep_pm_do_poweroff_cores(kbdev);
-               }
-
-               spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+               if (!kbdev->pm.backend.shader_poweroff_pending_time)
+                       kbasep_pm_do_poweroff_cores(kbdev);
        }
 
        if (kbdev->pm.backend.poweroff_timer_needed) {
+               spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
                hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
 
                return HRTIMER_RESTART;
        }
 
+       kbdev->pm.backend.poweroff_timer_running = false;
+       spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
        return HRTIMER_NORESTART;
 }
 
@@ -263,10 +263,13 @@ static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
        if (do_poweroff) {
                kbdev->pm.backend.poweroff_timer_needed = false;
                hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+               kbdev->pm.backend.poweroff_timer_running = false;
+
                /* Power off the GPU */
                if (!kbase_pm_do_poweroff(kbdev, false)) {
                        /* GPU can not be powered off at present */
                        kbdev->pm.backend.poweroff_timer_needed = true;
+                       kbdev->pm.backend.poweroff_timer_running = true;
                        hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer,
                                        kbdev->pm.gpu_poweroff_time,
                                        HRTIMER_MODE_REL);
@@ -316,13 +319,13 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
 
        kbdev->pm.backend.poweroff_timer_needed = false;
        hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       kbdev->pm.backend.poweroff_timer_running = false;
 
        /* If wq is already running but is held off by pm.lock, make sure it has
         * no effect */
        kbdev->pm.backend.gpu_poweroff_pending = 0;
 
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
-
        kbdev->pm.backend.shader_poweroff_pending = 0;
        kbdev->pm.backend.shader_poweroff_pending_time = 0;
 
@@ -331,87 +334,106 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
 
 void kbase_pm_update_active(struct kbase_device *kbdev)
 {
+       struct kbase_pm_device_data *pm = &kbdev->pm;
+       struct kbase_pm_backend_data *backend = &pm->backend;
        unsigned long flags;
        bool active;
 
-       lockdep_assert_held(&kbdev->pm.lock);
+       lockdep_assert_held(&pm->lock);
 
        /* pm_current_policy will never be NULL while pm.lock is held */
-       KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+       KBASE_DEBUG_ASSERT(backend->pm_current_policy);
 
-       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+       spin_lock_irqsave(&pm->power_change_lock, flags);
 
-       active = kbdev->pm.backend.pm_current_policy->get_core_active(kbdev);
+       active = backend->pm_current_policy->get_core_active(kbdev);
 
        if (active) {
-               spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
-
-               if (kbdev->pm.backend.gpu_poweroff_pending) {
+               if (backend->gpu_poweroff_pending) {
                        /* Cancel any pending power off request */
-                       kbdev->pm.backend.gpu_poweroff_pending = 0;
+                       backend->gpu_poweroff_pending = 0;
 
                        /* If a request was pending then the GPU was still
                         * powered, so no need to continue */
-                       if (!kbdev->poweroff_pending)
+                       if (!kbdev->poweroff_pending) {
+                               spin_unlock_irqrestore(&pm->power_change_lock,
+                                               flags);
                                return;
+                       }
                }
 
-               if (!kbdev->pm.backend.poweroff_timer_needed &&
-                               !kbdev->pm.backend.gpu_powered &&
-                               (kbdev->pm.poweroff_gpu_ticks ||
-                               kbdev->pm.poweroff_shader_ticks)) {
-                       kbdev->pm.backend.poweroff_timer_needed = true;
-                       hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer,
-                                       kbdev->pm.gpu_poweroff_time,
+               if (!backend->poweroff_timer_running && !backend->gpu_powered &&
+                               (pm->poweroff_gpu_ticks ||
+                               pm->poweroff_shader_ticks)) {
+                       backend->poweroff_timer_needed = true;
+                       backend->poweroff_timer_running = true;
+                       hrtimer_start(&backend->gpu_poweroff_timer,
+                                       pm->gpu_poweroff_time,
                                        HRTIMER_MODE_REL);
                }
 
+               spin_unlock_irqrestore(&pm->power_change_lock, flags);
+
                /* Power on the GPU and any cores requested by the policy */
                kbase_pm_do_poweron(kbdev, false);
        } else {
                /* It is an error for the power policy to power off the GPU
                 * when there are contexts active */
-               KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+               KBASE_DEBUG_ASSERT(pm->active_count == 0);
 
-               if (kbdev->pm.backend.shader_poweroff_pending) {
-                       kbdev->pm.backend.shader_poweroff_pending = 0;
-                       kbdev->pm.backend.shader_poweroff_pending_time = 0;
+               if (backend->shader_poweroff_pending) {
+                       backend->shader_poweroff_pending = 0;
+                       backend->shader_poweroff_pending_time = 0;
                }
 
-               spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
-
-
                /* Request power off */
-               if (kbdev->pm.backend.gpu_powered) {
-                       if (kbdev->pm.poweroff_gpu_ticks) {
-                               kbdev->pm.backend.gpu_poweroff_pending =
-                                               kbdev->pm.poweroff_gpu_ticks;
-                               if (!kbdev->pm.backend.poweroff_timer_needed) {
+               if (pm->backend.gpu_powered) {
+                       if (pm->poweroff_gpu_ticks) {
+                               backend->gpu_poweroff_pending =
+                                               pm->poweroff_gpu_ticks;
+                               backend->poweroff_timer_needed = true;
+                               if (!backend->poweroff_timer_running) {
                                        /* Start timer if not running (eg if
                                         * power policy has been changed from
                                         * always_on to something else). This
                                         * will ensure the GPU is actually
                                         * powered off */
-                                       kbdev->pm.backend.poweroff_timer_needed
+                                       backend->poweroff_timer_running
                                                        = true;
                                        hrtimer_start(
-                                       &kbdev->pm.backend.gpu_poweroff_timer,
-                                               kbdev->pm.gpu_poweroff_time,
+                                               &backend->gpu_poweroff_timer,
+                                               pm->gpu_poweroff_time,
                                                HRTIMER_MODE_REL);
                                }
+                               spin_unlock_irqrestore(&pm->power_change_lock,
+                                               flags);
                        } else {
+                               spin_unlock_irqrestore(&pm->power_change_lock,
+                                               flags);
+
                                /* Power off the GPU immediately */
                                if (!kbase_pm_do_poweroff(kbdev, false)) {
                                        /* GPU can not be powered off at present
                                         */
-                                       kbdev->pm.backend.poweroff_timer_needed
-                                                       = true;
-                                       hrtimer_start(
-                                       &kbdev->pm.backend.gpu_poweroff_timer,
-                                               kbdev->pm.gpu_poweroff_time,
-                                               HRTIMER_MODE_REL);
+                                       spin_lock_irqsave(
+                                                       &pm->power_change_lock,
+                                                       flags);
+                                       backend->poweroff_timer_needed = true;
+                                       if (!backend->poweroff_timer_running) {
+                                               backend->poweroff_timer_running
+                                                               = true;
+                                               hrtimer_start(
+                                               &backend->gpu_poweroff_timer,
+                                                       pm->gpu_poweroff_time,
+                                                       HRTIMER_MODE_REL);
+                                       }
+                                       spin_unlock_irqrestore(
+                                                       &pm->power_change_lock,
+                                                       flags);
                                }
                        }
+               } else {
+                       spin_unlock_irqrestore(&pm->power_change_lock, flags);
                }
        }
 }
@@ -478,7 +500,6 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
                 * Only reset poweroff_timer_needed if we're not in the middle
                 * of the power off callback */
                kbdev->pm.backend.poweroff_timer_needed = false;
-               hrtimer_try_to_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
        }
 
        /* Ensure timer does not power off wanted cores and make sure to power
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c
new file mode 100644 (file)
index 0000000..cd4f0a2
--- /dev/null
@@ -0,0 +1,160 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+#include <linux/of.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <backend/gpu/mali_kbase_power_model_simple.h>
+
+/*
+ * This model is primarily designed for the Juno platform. It may not be
+ * suitable for other platforms.
+ */
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static u32 dynamic_coefficient;
+static u32 static_coefficient;
+static s32 ts[4];
+static struct thermal_zone_device *gpu_tz;
+
+static unsigned long model_static_power(unsigned long voltage)
+{
+       unsigned long temperature, temp;
+       unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+       const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+       if (gpu_tz) {
+               int ret;
+
+               ret = gpu_tz->ops->get_temp(gpu_tz, &temperature);
+               if (ret) {
+                       pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+                                       ret);
+                       temperature = FALLBACK_STATIC_TEMPERATURE;
+               }
+       } else {
+               temperature = FALLBACK_STATIC_TEMPERATURE;
+       }
+
+       /* Calculate the temperature scaling factor. To be applied to the
+        * voltage scaled power.
+        */
+       temp = temperature / 1000;
+       temp_squared = temp * temp;
+       temp_cubed = temp_squared * temp;
+       temp_scaling_factor =
+                       (ts[3] * temp_cubed)
+                       + (ts[2] * temp_squared)
+                       + (ts[1] * temp)
+                       + ts[0];
+
+       return (((static_coefficient * voltage_cubed) >> 20)
+                       * temp_scaling_factor)
+                               / 1000000;
+}
+
+static unsigned long model_dynamic_power(unsigned long freq,
+               unsigned long voltage)
+{
+       /* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+        * The coefficient (c) is in mW/(MHz mV mV).
+        *
+        * This function calculates the dynamic power after this formula:
+        * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+        */
+       const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+       const unsigned long f_mhz = freq / 1000000; /* MHz */
+
+       return (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+
+struct devfreq_cooling_ops power_model_simple_ops = {
+       .get_static_power = model_static_power,
+       .get_dynamic_power = model_dynamic_power,
+};
+
+int kbase_power_model_simple_init(struct kbase_device *kbdev)
+{
+       struct device_node *power_model_node;
+       const char *tz_name;
+       u32 static_power, dynamic_power;
+       u32 voltage, voltage_squared, voltage_cubed, frequency;
+
+       power_model_node = of_get_child_by_name(kbdev->dev->of_node,
+                       "power_model");
+       if (!power_model_node) {
+               dev_err(kbdev->dev, "could not find power_model node\n");
+               return -ENODEV;
+       }
+       if (!of_device_is_compatible(power_model_node,
+                       "arm,mali-simple-power-model")) {
+               dev_err(kbdev->dev, "power_model incompatible with simple power model\n");
+               return -ENODEV;
+       }
+
+       if (of_property_read_string(power_model_node, "thermal-zone",
+                       &tz_name)) {
+               dev_err(kbdev->dev, "ts in power_model not available\n");
+               return -EINVAL;
+       }
+
+       gpu_tz = thermal_zone_get_zone_by_name(tz_name);
+       if (IS_ERR(gpu_tz)) {
+               pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+                               PTR_ERR(gpu_tz));
+               gpu_tz = NULL;
+
+               return -EPROBE_DEFER;
+       }
+
+       if (of_property_read_u32(power_model_node, "static-power",
+                       &static_power)) {
+               dev_err(kbdev->dev, "static-power in power_model not available\n");
+               return -EINVAL;
+       }
+       if (of_property_read_u32(power_model_node, "dynamic-power",
+                       &dynamic_power)) {
+               dev_err(kbdev->dev, "dynamic-power in power_model not available\n");
+               return -EINVAL;
+       }
+       if (of_property_read_u32(power_model_node, "voltage",
+                       &voltage)) {
+               dev_err(kbdev->dev, "voltage in power_model not available\n");
+               return -EINVAL;
+       }
+       if (of_property_read_u32(power_model_node, "frequency",
+                       &frequency)) {
+               dev_err(kbdev->dev, "frequency in power_model not available\n");
+               return -EINVAL;
+       }
+       voltage_squared = (voltage * voltage) / 1000;
+       voltage_cubed = voltage * voltage * voltage;
+       static_coefficient = (static_power << 20) / (voltage_cubed >> 10);
+       dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared)
+                       * 1000) / frequency;
+
+       if (of_property_read_u32_array(power_model_node, "ts", ts, 4)) {
+               dev_err(kbdev->dev, "ts in power_model not available\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h b/t83x/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h
new file mode 100644 (file)
index 0000000..d20de1e
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_POWER_MODEL_SIMPLE_H_
+#define _BASE_POWER_MODEL_SIMPLE_H_
+
+/**
+ * kbase_power_model_simple_init - Initialise the simple power model
+ * @kbdev: Device pointer
+ *
+ * The simple power model estimates power based on current voltage, temperature,
+ * and coefficients read from device tree. It does not take utilization into
+ * account.
+ *
+ * The power model requires coefficients from the power_model node in device
+ * tree. The absence of this node will prevent the model from functioning, but
+ * should not prevent the rest of the driver from running.
+ *
+ * Return: 0 on success
+ *         -ENOSYS if the power_model node is not present in device tree
+ *         -EPROBE_DEFER if the thermal zone specified in device tree is not
+ *         currently available
+ *         Any other negative value on failure
+ */
+int kbase_power_model_simple_init(struct kbase_device *kbdev);
+
+extern struct devfreq_cooling_ops power_model_simple_ops;
+
+#endif /* _BASE_POWER_MODEL_SIMPLE_H_ */
index 5a1523034c15ed2e7d99aa68612e2b7215e19c99..29c78c2ba6d82a8eb8d4a287bfa0c1d37a9318ff 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -45,9 +45,10 @@ enum base_hw_feature {
        BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
        BASE_HW_FEATURE_BRNDOUT_KILL,
        BASE_HW_FEATURE_WARPING,
-       BASE_HW_FEATURE_FLUSH_REDUCTION,
        BASE_HW_FEATURE_V4,
+       BASE_HW_FEATURE_FLUSH_REDUCTION,
        BASE_HW_FEATURE_PROTECTED_MODE,
+       BASE_HW_FEATURE_COHERENCY_REG,
        BASE_HW_FEATURE_END
 };
 
@@ -161,5 +162,28 @@ static const enum base_hw_feature base_hw_features_t82x[] = {
        BASE_HW_FEATURE_END
 };
 
+static const enum base_hw_feature base_hw_features_tMIx[] = {
+       BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+       BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+       BASE_HW_FEATURE_WARPING,
+       BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+       BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+       BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+       BASE_HW_FEATURE_BRNDOUT_CC,
+       BASE_HW_FEATURE_BRNDOUT_KILL,
+       BASE_HW_FEATURE_LD_ST_LEA_TEX,
+       BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+       BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+       BASE_HW_FEATURE_MRT,
+       BASE_HW_FEATURE_MSAA_16X,
+       BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+       BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+       BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+       BASE_HW_FEATURE_TEST4_DATUM_MODE,
+       BASE_HW_FEATURE_FLUSH_REDUCTION,
+       BASE_HW_FEATURE_PROTECTED_MODE,
+       BASE_HW_FEATURE_COHERENCY_REG,
+       BASE_HW_FEATURE_END
+};
 
 #endif /* _BASE_HWCONFIG_FEATURES_H_ */
index 9fae0f6c806ac0290484bf82caca9332d8d6cf17..e111b0777f1f46b3dc003589904f9679e10bfcdf 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -57,7 +57,6 @@ enum base_hw_issue {
        BASE_HW_ISSUE_8986,
        BASE_HW_ISSUE_8987,
        BASE_HW_ISSUE_9010,
-       BASE_HW_ISSUE_9275,
        BASE_HW_ISSUE_9418,
        BASE_HW_ISSUE_9423,
        BASE_HW_ISSUE_9435,
@@ -90,6 +89,7 @@ enum base_hw_issue {
        BASE_HW_ISSUE_11024,
        BASE_HW_ISSUE_11035,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_26,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
@@ -100,7 +100,15 @@ enum base_hw_issue {
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3960,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_T76X_3966,
+       BASE_HW_ISSUE_TMIX_7891,
+       BASE_HW_ISSUE_TMIX_7940,
+       BASE_HW_ISSUE_TMIX_8042,
+       BASE_HW_ISSUE_TMIX_8133,
+       BASE_HW_ISSUE_TMIX_8138,
+       BASE_HW_ISSUE_TMIX_8206,
+       GPUCORE_1619,
        BASE_HW_ISSUE_END
 };
 
@@ -140,7 +148,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
        BASE_HW_ISSUE_8986,
        BASE_HW_ISSUE_8987,
        BASE_HW_ISSUE_9010,
-       BASE_HW_ISSUE_9275,
        BASE_HW_ISSUE_9418,
        BASE_HW_ISSUE_9423,
        BASE_HW_ISSUE_9435,
@@ -165,7 +172,10 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
        BASE_HW_ISSUE_11012,
        BASE_HW_ISSUE_11020,
        BASE_HW_ISSUE_11035,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
+       BASE_HW_ISSUE_T76X_3964,
+       GPUCORE_1619,
        BASE_HW_ISSUE_END
 };
 
@@ -180,7 +190,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
        BASE_HW_ISSUE_8778,
        BASE_HW_ISSUE_8975,
        BASE_HW_ISSUE_9010,
-       BASE_HW_ISSUE_9275,
        BASE_HW_ISSUE_9418,
        BASE_HW_ISSUE_9423,
        BASE_HW_ISSUE_9435,
@@ -201,7 +210,9 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
        BASE_HW_ISSUE_11012,
        BASE_HW_ISSUE_11020,
        BASE_HW_ISSUE_11035,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_END
 };
 
@@ -216,7 +227,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
        BASE_HW_ISSUE_8778,
        BASE_HW_ISSUE_8975,
        BASE_HW_ISSUE_9010,
-       BASE_HW_ISSUE_9275,
        BASE_HW_ISSUE_9435,
        BASE_HW_ISSUE_9510,
        BASE_HW_ISSUE_10410,
@@ -234,8 +244,10 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
        BASE_HW_ISSUE_11012,
        BASE_HW_ISSUE_11020,
        BASE_HW_ISSUE_11035,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_END
 };
 
@@ -264,6 +276,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
        BASE_HW_ISSUE_11024,
        BASE_HW_ISSUE_11035,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_END
@@ -284,8 +297,10 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
        BASE_HW_ISSUE_11020,
        BASE_HW_ISSUE_11024,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_END
 };
 
@@ -302,6 +317,7 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
        BASE_HW_ISSUE_10959,
        BASE_HW_ISSUE_11012,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_END
@@ -315,6 +331,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
        BASE_HW_ISSUE_11020,
        BASE_HW_ISSUE_11024,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_26,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
@@ -325,7 +342,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3960,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_T76X_3966,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
@@ -337,6 +356,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
        BASE_HW_ISSUE_11020,
        BASE_HW_ISSUE_11024,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_26,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
@@ -347,7 +367,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3960,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_T76X_3966,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
@@ -357,6 +379,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_26,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
@@ -367,7 +390,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3960,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_T76X_3966,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
@@ -379,6 +404,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
        BASE_HW_ISSUE_11020,
        BASE_HW_ISSUE_11024,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_26,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
@@ -389,7 +415,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3960,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_T76X_3966,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
@@ -399,6 +427,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_26,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
@@ -409,7 +438,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3960,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_T76X_3966,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
@@ -419,6 +450,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
@@ -426,7 +458,9 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3960,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_T76X_3966,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
@@ -440,8 +474,10 @@ static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_END
 };
 
@@ -455,8 +491,10 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_END
 };
 
@@ -470,36 +508,43 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_END
 };
 
 static const enum base_hw_issue base_hw_issues_model_t72x[] = {
        BASE_HW_ISSUE_5736,
        BASE_HW_ISSUE_6402,
-       BASE_HW_ISSUE_9275,
        BASE_HW_ISSUE_9435,
        BASE_HW_ISSUE_10471,
        BASE_HW_ISSUE_10797,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
+       BASE_HW_ISSUE_T76X_3964,
+       GPUCORE_1619,
        BASE_HW_ISSUE_END
 };
 
 static const enum base_hw_issue base_hw_issues_model_t76x[] = {
        BASE_HW_ISSUE_5736,
-       BASE_HW_ISSUE_9275,
        BASE_HW_ISSUE_9435,
        BASE_HW_ISSUE_11020,
        BASE_HW_ISSUE_11024,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
        BASE_HW_ISSUE_T76X_3700,
        BASE_HW_ISSUE_T76X_3793,
+       BASE_HW_ISSUE_T76X_3964,
+       BASE_HW_ISSUE_TMIX_7891,
+       GPUCORE_1619,
        BASE_HW_ISSUE_END
 };
 
@@ -507,15 +552,17 @@ static const enum base_hw_issue base_hw_issues_model_t60x[] = {
        BASE_HW_ISSUE_5736,
        BASE_HW_ISSUE_6402,
        BASE_HW_ISSUE_8778,
-       BASE_HW_ISSUE_9275,
        BASE_HW_ISSUE_9435,
        BASE_HW_ISSUE_10472,
        BASE_HW_ISSUE_10931,
        BASE_HW_ISSUE_11012,
        BASE_HW_ISSUE_11020,
        BASE_HW_ISSUE_11024,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
+       BASE_HW_ISSUE_T76X_3964,
+       GPUCORE_1619,
        BASE_HW_ISSUE_END
 };
 
@@ -529,8 +576,11 @@ static const enum base_hw_issue base_hw_issues_model_t62x[] = {
        BASE_HW_ISSUE_11020,
        BASE_HW_ISSUE_11024,
        BASE_HW_ISSUE_11042,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
+       BASE_HW_ISSUE_T76X_3964,
+       GPUCORE_1619,
        BASE_HW_ISSUE_END
 };
 
@@ -539,6 +589,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
        BASE_HW_ISSUE_10821,
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
@@ -546,7 +597,9 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3960,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_T76X_3966,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
@@ -555,13 +608,16 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
        BASE_HW_ISSUE_10821,
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
        BASE_HW_ISSUE_T76X_3700,
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_T76X_3966,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
@@ -570,12 +626,14 @@ static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
        BASE_HW_ISSUE_10821,
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
        BASE_HW_ISSUE_T76X_3700,
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3966,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
@@ -584,23 +642,28 @@ static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
        BASE_HW_ISSUE_10821,
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
        BASE_HW_ISSUE_T76X_3700,
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3966,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
 static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
        BASE_HW_ISSUE_5736,
-       BASE_HW_ISSUE_9275,
        BASE_HW_ISSUE_9435,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
        BASE_HW_ISSUE_T76X_3700,
        BASE_HW_ISSUE_T76X_3793,
+       BASE_HW_ISSUE_T76X_3964,
+       BASE_HW_ISSUE_TMIX_7891,
+       GPUCORE_1619,
        BASE_HW_ISSUE_END
 };
 
@@ -609,13 +672,16 @@ static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
        BASE_HW_ISSUE_10821,
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
        BASE_HW_ISSUE_T76X_3700,
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
+       BASE_HW_ISSUE_T76X_3964,
        BASE_HW_ISSUE_T76X_3966,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
@@ -624,12 +690,14 @@ static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
        BASE_HW_ISSUE_10821,
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
        BASE_HW_ISSUE_T76X_3700,
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3966,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
@@ -638,23 +706,27 @@ static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
        BASE_HW_ISSUE_10821,
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
        BASE_HW_ISSUE_T76X_3700,
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3966,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
 static const enum base_hw_issue base_hw_issues_model_t86x[] = {
        BASE_HW_ISSUE_5736,
-       BASE_HW_ISSUE_9275,
        BASE_HW_ISSUE_9435,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
        BASE_HW_ISSUE_T76X_3700,
        BASE_HW_ISSUE_T76X_3793,
+       BASE_HW_ISSUE_TMIX_7891,
+       GPUCORE_1619,
        BASE_HW_ISSUE_END
 };
 
@@ -663,6 +735,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
        BASE_HW_ISSUE_10821,
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
@@ -670,6 +743,7 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3960,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
@@ -678,24 +752,29 @@ static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
        BASE_HW_ISSUE_10821,
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
        BASE_HW_ISSUE_T76X_3700,
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3960,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
 static const enum base_hw_issue base_hw_issues_model_t83x[] = {
        BASE_HW_ISSUE_5736,
-       BASE_HW_ISSUE_9275,
        BASE_HW_ISSUE_9435,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
        BASE_HW_ISSUE_T76X_3700,
        BASE_HW_ISSUE_T76X_3793,
+       BASE_HW_ISSUE_T76X_3964,
+       BASE_HW_ISSUE_TMIX_7891,
+       GPUCORE_1619,
        BASE_HW_ISSUE_END
 };
 
@@ -704,6 +783,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
        BASE_HW_ISSUE_10821,
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
@@ -711,6 +791,8 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3960,
+       BASE_HW_ISSUE_T76X_3964,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
@@ -719,6 +801,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
        BASE_HW_ISSUE_10821,
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
@@ -726,6 +809,7 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3960,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
@@ -734,27 +818,70 @@ static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
        BASE_HW_ISSUE_10821,
        BASE_HW_ISSUE_10883,
        BASE_HW_ISSUE_10946,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
        BASE_HW_ISSUE_T76X_3700,
        BASE_HW_ISSUE_T76X_3793,
        BASE_HW_ISSUE_T76X_3953,
        BASE_HW_ISSUE_T76X_3960,
+       BASE_HW_ISSUE_TMIX_7891,
        BASE_HW_ISSUE_END
 };
 
 static const enum base_hw_issue base_hw_issues_model_t82x[] = {
        BASE_HW_ISSUE_5736,
-       BASE_HW_ISSUE_9275,
        BASE_HW_ISSUE_9435,
+       BASE_HW_ISSUE_11051,
        BASE_HW_ISSUE_T76X_1909,
        BASE_HW_ISSUE_T76X_1963,
        BASE_HW_ISSUE_T76X_3086,
        BASE_HW_ISSUE_T76X_3700,
        BASE_HW_ISSUE_T76X_3793,
+       BASE_HW_ISSUE_TMIX_7891,
+       GPUCORE_1619,
        BASE_HW_ISSUE_END
 };
 
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
+       BASE_HW_ISSUE_9435,
+       BASE_HW_ISSUE_10682,
+       BASE_HW_ISSUE_10821,
+       BASE_HW_ISSUE_T76X_3700,
+       BASE_HW_ISSUE_T76X_3953,
+       BASE_HW_ISSUE_TMIX_7891,
+       BASE_HW_ISSUE_TMIX_8042,
+       BASE_HW_ISSUE_TMIX_8133,
+       BASE_HW_ISSUE_TMIX_8138,
+       BASE_HW_ISSUE_END
+};
 
+static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = {
+       BASE_HW_ISSUE_9435,
+       BASE_HW_ISSUE_10682,
+       BASE_HW_ISSUE_10821,
+       BASE_HW_ISSUE_T76X_3700,
+       BASE_HW_ISSUE_TMIX_7891,
+       BASE_HW_ISSUE_TMIX_7940,
+       BASE_HW_ISSUE_TMIX_8042,
+       BASE_HW_ISSUE_TMIX_8133,
+       BASE_HW_ISSUE_TMIX_8138,
+       BASE_HW_ISSUE_TMIX_8206,
+       BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tMIx[] = {
+       BASE_HW_ISSUE_5736,
+       BASE_HW_ISSUE_9435,
+       BASE_HW_ISSUE_T76X_3700,
+       BASE_HW_ISSUE_TMIX_7891,
+       BASE_HW_ISSUE_TMIX_7940,
+       BASE_HW_ISSUE_TMIX_8042,
+       BASE_HW_ISSUE_TMIX_8133,
+       BASE_HW_ISSUE_TMIX_8138,
+       BASE_HW_ISSUE_TMIX_8206,
+       GPUCORE_1619,
+       BASE_HW_ISSUE_END
+};
 
 #endif /* _BASE_HWCONFIG_ISSUES_H_ */
index 56621309c5ad5c6baf958528bc16497ef7036da2..c59e8b2c04cae1ac6bd566243d36a3e0b662ec5b 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 /* Support UK9 IOCTLS */
 #define BASE_LEGACY_UK9_SUPPORT 1
 
-typedef u64 base_mem_handle;
+typedef struct base_mem_handle {
+       struct {
+               u64 handle;
+       } basep;
+} base_mem_handle;
 
 #include "mali_base_mem_priv.h"
 #include "mali_kbase_profiling_gator_api.h"
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
 
 /*
  * Dependency stuff, keep it private for now. May want to expose it if
@@ -60,6 +66,10 @@ typedef u64 base_mem_handle;
 #define BASEP_JD_SEM_MASK_IN_WORD(x)    (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1)))
 #define BASEP_JD_SEM_ARRAY_SIZE         BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT)
 
+/* Set/reset values for a software event */
+#define BASE_JD_SOFT_EVENT_SET             ((unsigned char)1)
+#define BASE_JD_SOFT_EVENT_RESET           ((unsigned char)0)
+
 #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
 
 #define BASE_MAX_COHERENT_GROUPS 16
@@ -160,7 +170,9 @@ enum {
 /* IN */
        BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence
                                             Outer shareable, required. */
-       BASE_MEM_SECURE = (1U << 16)           /**< Secure memory */
+       BASE_MEM_SECURE = (1U << 16),          /**< Secure memory */
+       BASE_MEM_DONT_NEED = (1U << 17),       /**< Not needed physical
+                                                   memory */
 
 };
 
@@ -169,7 +181,7 @@ enum {
  *
  * Must be kept in sync with the ::base_mem_alloc_flags flags
  */
-#define BASE_MEM_FLAGS_NR_BITS 17
+#define BASE_MEM_FLAGS_NR_BITS 18
 
 /**
   * A mask for all output bits, excluding IN/OUT bits.
@@ -182,9 +194,22 @@ enum {
 #define BASE_MEM_FLAGS_INPUT_MASK \
        (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
 
+/**
+ * A mask for all the flags which are modifiable via the base_mem_set_flags
+ * interface.
+ */
+#define BASE_MEM_FLAGS_MODIFIABLE \
+       (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
+        BASE_MEM_COHERENT_LOCAL)
 
 /**
- * @brief Memory types supported by @a base_mem_import
+ * enum base_mem_import_type - Memory types supported by @a base_mem_import
+ *
+ * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
+ * @BASE_MEM_IMPORT_TYPE_UMP: UMP import. Handle type is ump_secure_id.
+ * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
+ * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
+ * base_mem_import_user_buffer
  *
  * Each type defines what the supported handle type is.
  *
@@ -196,21 +221,52 @@ enum {
  */
 typedef enum base_mem_import_type {
        BASE_MEM_IMPORT_TYPE_INVALID = 0,
-       /** UMP import. Handle type is ump_secure_id. */
        BASE_MEM_IMPORT_TYPE_UMP = 1,
-       /** UMM import. Handle type is a file descriptor (int) */
-       BASE_MEM_IMPORT_TYPE_UMM = 2
+       BASE_MEM_IMPORT_TYPE_UMM = 2,
+       BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
 } base_mem_import_type;
 
 /**
- * @brief Invalid memory handle type.
- * Return value from functions returning @a base_mem_handle on error.
+ * struct base_mem_import_user_buffer - Handle of an imported user buffer
+ *
+ * @ptr:       kbase_pointer to imported user buffer
+ * @length:    length of imported user buffer in bytes
+ *
+ * This structure is used to represent a handle of an imported user buffer.
+ */
+
+struct base_mem_import_user_buffer {
+       kbase_pointer ptr;
+       u64 length;
+};
+
+/**
+ * @brief Invalid memory handle.
+ *
+ * Return value from functions returning @ref base_mem_handle on error.
+ *
+ * @warning @ref base_mem_handle_new_invalid must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
  */
-#define BASE_MEM_INVALID_HANDLE                (0ull  << 12)
+#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} })
+
+/**
+ * @brief Special write-alloc memory handle.
+ *
+ * A special handle is used to represent a region where a special page is mapped
+ * with a write-alloc cache setup, typically used when the write result of the
+ * GPU isn't needed, but the GPU must write anyway.
+ *
+ * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro
+ *          in C++ code or other situations where compound literals cannot be used.
+ */
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} })
+
+#define BASEP_MEM_INVALID_HANDLE               (0ull  << 12)
 #define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
 #define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
 #define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
-#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE      (4ull  << 12)
+#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
 /* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
 #define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
 #define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
@@ -219,6 +275,7 @@ typedef enum base_mem_import_type {
 /* Mask to detect 4GB boundary alignment */
 #define BASE_MEM_MASK_4GB  0xfffff000UL
 
+
 /* Bit mask of cookies used for for memory allocation setup */
 #define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
 
@@ -334,6 +391,28 @@ struct base_mem_aliasing_info {
        u64 length;
 };
 
+/**
+ * struct base_jit_alloc_info - Structure which describes a JIT allocation
+ *                              request.
+ * @gpu_alloc_addr:             The GPU virtual address to write the JIT
+ *                              allocated GPU virtual address to.
+ * @va_pages:                   The minimum number of virtual pages required.
+ * @commit_pages:               The minimum number of physical pages which
+ *                              should back the allocation.
+ * @extent:                     Granularity of physical pages to grow the
+ *                              allocation by during a fault.
+ * @id:                         Unique ID provided by the caller, this is used
+ *                              to pair allocation and free requests.
+ *                              Zero is not a valid value.
+ */
+struct base_jit_alloc_info {
+       u64 gpu_alloc_addr;
+       u64 va_pages;
+       u64 commit_pages;
+       u64 extent;
+       u8 id;
+};
+
 /**
  * @brief Job dependency type.
  *
@@ -386,6 +465,14 @@ typedef u16 base_jd_core_req;
 /* Requires fragment job with AFBC encoding */
 #define BASE_JD_REQ_FS_AFBC  (1U << 13)
 
+/**
+ * SW-only requirement: coalesce completion events.
+ * If this bit is set then completion of this atom will not cause an event to
+ * be sent to userspace, whether successful or not; completion events will be
+ * deferred until an atom completes which does not have this bit set.
+ */
+#define BASE_JD_REQ_EVENT_COALESCE (1U << 5)
+
 /**
  * SW Only requirement: the job chain requires a coherent core group. We don't
  * mind which coherent core group is used.
@@ -455,6 +542,66 @@ typedef u16 base_jd_core_req;
  * - Priority is inherited from the replay job.
  */
 #define BASE_JD_REQ_SOFT_REPLAY                 (BASE_JD_REQ_SOFT_JOB | 0x4)
+/**
+ * SW only requirement: event wait/trigger job.
+ *
+ * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
+ * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
+ *   other waiting jobs. It completes immediately.
+ * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
+ *   possible for other jobs to wait upon. It completes immediately.
+ */
+#define BASE_JD_REQ_SOFT_EVENT_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x5)
+#define BASE_JD_REQ_SOFT_EVENT_SET              (BASE_JD_REQ_SOFT_JOB | 0x6)
+#define BASE_JD_REQ_SOFT_EVENT_RESET            (BASE_JD_REQ_SOFT_JOB | 0x7)
+
+#define BASE_JD_REQ_SOFT_DEBUG_COPY             (BASE_JD_REQ_SOFT_JOB | 0x8)
+
+/**
+ * SW only requirement: Just In Time allocation
+ *
+ * This job requests a JIT allocation based on the request in the
+ * @base_jit_alloc_info structure which is passed via the jc element of
+ * the atom.
+ *
+ * It should be noted that the id entry in @base_jit_alloc_info must not
+ * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE.
+ *
+ * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE
+ * soft job to free the JIT allocation is still made.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_ALLOC              (BASE_JD_REQ_SOFT_JOB | 0x9)
+/**
+ * SW only requirement: Just In Time free
+ *
+ * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC
+ * to be freed. The ID of the JIT allocation is passed via the jc element of
+ * the atom.
+ *
+ * The job will complete immediately.
+ */
+#define BASE_JD_REQ_SOFT_JIT_FREE               (BASE_JD_REQ_SOFT_JOB | 0xa)
+
+/**
+ * SW only requirement: Map external resource
+ *
+ * This job requests external resource(s) are mapped once the dependencies
+ * of the job have been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_MAP            (BASE_JD_REQ_SOFT_JOB | 0xb)
+/**
+ * SW only requirement: Unmap external resource
+ *
+ * This job requests external resource(s) are unmapped once the dependencies
+ * of the job has been satisfied. The list of external resources are
+ * passed via the jc element of the atom which is a pointer to a
+ * @base_external_resource_list.
+ */
+#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP          (BASE_JD_REQ_SOFT_JOB | 0xc)
 
 /**
  * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
@@ -463,9 +610,6 @@ typedef u16 base_jd_core_req;
  *
  * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
  * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
- *
- * @note This is a more flexible variant of the @ref BASE_CONTEXT_HINT_ONLY_COMPUTE flag,
- * allowing specific jobs to be marked as 'Only Compute' instead of the entire context
  */
 #define BASE_JD_REQ_ONLY_COMPUTE    (1U << 10)
 
@@ -495,26 +639,21 @@ typedef u16 base_jd_core_req;
 #define BASEP_JD_REQ_EVENT_NEVER (1U << 14)
 
 /**
-* These requirement bits are currently unused in base_jd_core_req (currently a u16)
-*/
-
-#define BASEP_JD_REQ_RESERVED_BIT5 (1U << 5)
-#define BASEP_JD_REQ_RESERVED_BIT15 (1U << 15)
-
-/**
-* Mask of all the currently unused requirement bits in base_jd_core_req.
-*/
+ * These requirement bits are currently unused in base_jd_core_req (currently a u16)
+ */
 
-#define BASEP_JD_REQ_RESERVED (BASEP_JD_REQ_RESERVED_BIT5 | \
-                               BASEP_JD_REQ_RESERVED_BIT15)
+#define BASEP_JD_REQ_RESERVED (1U << 15)
 
 /**
  * Mask of all bits in base_jd_core_req that control the type of the atom.
  *
  * This allows dependency only atoms to have flags set
  */
-#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED | BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\
-                               BASE_JD_REQ_EXTERNAL_RESOURCES | BASEP_JD_REQ_EVENT_NEVER))
+#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED |\
+                               BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\
+                               BASE_JD_REQ_EXTERNAL_RESOURCES |\
+                               BASEP_JD_REQ_EVENT_NEVER |\
+                               BASE_JD_REQ_EVENT_COALESCE))
 
 /**
  * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
@@ -656,6 +795,30 @@ typedef struct base_external_resource {
        u64 ext_resource;
 } base_external_resource;
 
+
+/**
+ * The maximum number of external resources which can be mapped/unmapped
+ * in a single request.
+ */
+#define BASE_EXT_RES_COUNT_MAX 10
+
+/**
+ * struct base_external_resource_list - Structure which describes a list of
+ *                                      external resources.
+ * @count:                              The number of resources.
+ * @ext_res:                            Array of external resources which is
+ *                                      sized at allocation time.
+ */
+struct base_external_resource_list {
+       u64 count;
+       struct base_external_resource ext_res[1];
+};
+
+struct base_jd_debug_copy_buffer {
+       u64 address;
+       u64 size;
+};
+
 /**
  * @brief Setter for a dependency structure
  *
@@ -754,11 +917,12 @@ static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, str
 /**
  * @brief External resource info initialization.
  *
- * Sets up a external resource object to reference
+ * Sets up an external resource object to reference
  * a memory allocation and the type of access requested.
  *
  * @param[in] res     The resource object to initialize
- * @param     handle  The handle to the imported memory object
+ * @param     handle  The handle to the imported memory object, must be
+ *                    obtained by calling @ref base_mem_as_import_handle().
  * @param     access  The type of access requested
  */
 static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
@@ -1365,8 +1529,7 @@ struct gpu_raw_gpu_props {
        u64 shader_present;
        u64 tiler_present;
        u64 l2_present;
-       u32 coherency_enabled;
-       u32 unused_1; /* keep for backward compatibility */
+       u64 unused_1; /* keep for backward compatibility */
 
        u32 l2_features;
        u32 suspend_size; /* API 8.2+ */
@@ -1387,7 +1550,11 @@ struct gpu_raw_gpu_props {
        u32 thread_max_barrier_size;
        u32 thread_features;
 
-       u32 coherency_features;
+       /*
+        * Note: This is the _selected_ coherency mode rather than the
+        * available modes as exposed in the coherency_features register.
+        */
+       u32 coherency_mode;
 };
 
 /**
@@ -1441,28 +1608,7 @@ enum base_context_create_flags {
        /** Base context is a 'System Monitor' context for Hardware counters.
         *
         * One important side effect of this is that job submission is disabled. */
-       BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1),
-
-       /** Base context flag indicating a 'hint' that this context uses Compute
-        * Jobs only.
-        *
-        * Specifially, this means that it only sends atoms that <b>do not</b>
-        * contain the following @ref base_jd_core_req :
-        * - BASE_JD_REQ_FS
-        * - BASE_JD_REQ_T
-        *
-        * Violation of these requirements will cause the Job-Chains to be rejected.
-        *
-        * In addition, it is inadvisable for the atom's Job-Chains to contain Jobs
-        * of the following @ref gpu_job_type (whilst it may work now, it may not
-        * work in future) :
-        * - @ref GPU_JOB_VERTEX
-        * - @ref GPU_JOB_GEOMETRY
-        *
-        * @note An alternative to using this is to specify the BASE_JD_REQ_ONLY_COMPUTE
-        * requirement in atoms.
-        */
-       BASE_CONTEXT_HINT_ONLY_COMPUTE = (1u << 2)
+       BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1)
 };
 
 /**
@@ -1470,15 +1616,13 @@ enum base_context_create_flags {
  */
 #define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
        (((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
-         ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \
-         ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE))
+         ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED))
 
 /**
  * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel
  */
 #define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
-       (((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \
-         ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE))
+       ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
 
 /**
  * Private flags used on the base context
index 8840d60ab1be6e5e267c6822e28c13eb2faee93c..3764061903d93b47b83b2a0a4f0afe1264772ded 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -51,7 +51,6 @@
 #include "mali_kbase_trace_timeline.h"
 #include "mali_kbase_js.h"
 #include "mali_kbase_mem.h"
-#include "mali_kbase_security.h"
 #include "mali_kbase_utility.h"
 #include "mali_kbase_gpu_memory_debugfs.h"
 #include "mali_kbase_mem_profile_debugfs.h"
@@ -82,7 +81,7 @@ struct kbase_device *kbase_device_alloc(void);
 */
 
 /*
-* API to acquire device list semaphone and return pointer
+* API to acquire device list semaphore and return pointer
 * to the device list head
 */
 const struct list_head *kbase_dev_list_get(void);
@@ -140,7 +139,6 @@ void kbase_jd_done_worker(struct work_struct *data);
 void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
                kbasep_js_atom_done_code done_code);
 void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
-void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
 void kbase_jd_zap_context(struct kbase_context *kctx);
 bool jd_done_nolock(struct kbase_jd_atom *katom,
                struct list_head *completed_jobs_ctx);
@@ -192,12 +190,21 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
 void kbase_finish_soft_job(struct kbase_jd_atom *katom);
 void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
 void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom);
 
 bool kbase_replay_process(struct kbase_jd_atom *katom);
 
+enum hrtimer_restart kbasep_soft_event_timeout_worker(struct hrtimer *timer);
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
+int kbasep_read_soft_event_status(
+               struct kbase_context *kctx, u64 evt, unsigned char *status);
+int kbasep_write_soft_event_status(
+               struct kbase_context *kctx, u64 evt, unsigned char new_status);
+
 /* api used internally for register access. Contains validation and tracing */
 void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
-void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size);
+int kbase_device_trace_buffer_install(
+               struct kbase_context *kctx, u32 *tb, size_t size);
 void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
 
 /* api to be ported per OS, only need to do the raw register access */
@@ -346,6 +353,10 @@ void kbase_disjoint_state_down(struct kbase_device *kbdev);
  */
 #define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
 
+#if !defined(UINT64_MAX)
+       #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#endif
+
 #if KBASE_TRACE_ENABLE
 void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
 
index 2fb5e3edf49f64095866f5ec3ce4fa48cba55ebf..c67b3e97f1af7843937b7577bb0b74ebd6f2bb4a 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -43,6 +43,11 @@ u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
 void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
                size_t size, enum dma_data_direction dir)
 {
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+       if (kbdev->system_coherency == COHERENCY_ACE)
+               return;
+#endif /* CONFIG_MALI_COH_KERN */
        dma_sync_single_for_device(kbdev->dev, handle, size, dir);
 }
 
@@ -50,5 +55,10 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
 void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
                size_t size, enum dma_data_direction dir)
 {
+/* Check if kernel is using coherency with GPU */
+#ifdef CONFIG_MALI_COH_KERN
+       if (kbdev->system_coherency == COHERENCY_ACE)
+               return;
+#endif /* CONFIG_MALI_COH_KERN */
        dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
 }
index 0285e25840291050b7dec635e7895b9162bf5c7f..356d52bcd77464c5b24336199d7027a135df06ec 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * @{
  */
 
-#if !MALI_CUSTOMER_RELEASE
-/* This flag is set for internal builds so we can run tests without credentials. */
-#define KBASE_HWCNT_DUMP_BYPASS_ROOT 1
-#else
-#define KBASE_HWCNT_DUMP_BYPASS_ROOT 0
-#endif
-
 #include <linux/rbtree.h>
 
 /* Forward declaration of struct kbase_device */
@@ -105,7 +98,7 @@ struct kbase_pm_callback_conf {
         * The system integrator can decide whether to either do nothing, just switch off
         * the clocks to the GPU, or to completely power down the GPU.
         * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
-        * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+        * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
         */
        void (*power_off_callback)(struct kbase_device *kbdev);
 
@@ -115,7 +108,7 @@ struct kbase_pm_callback_conf {
         * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
         * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
         * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
-        * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+        * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
         *
         * The return value of the first call to this function is ignored.
         *
@@ -160,7 +153,7 @@ struct kbase_pm_callback_conf {
         * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
         * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
         *
-        * @return 0 on success, else int erro code.
+        * @return 0 on success, else int error code.
         */
         int (*power_runtime_init_callback)(struct kbase_device *kbdev);
 
@@ -187,6 +180,32 @@ struct kbase_pm_callback_conf {
         * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
         */
        int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+
+       /*
+        * Optional callback for checking if GPU can be suspended when idle
+        *
+        * This callback will be called by the runtime power management core
+        * when the reference count goes to 0 to provide notification that the
+        * GPU now seems idle.
+        *
+        * If this callback finds that the GPU can't be powered off, or handles
+        * suspend by powering off directly or queueing up a power off, a
+        * non-zero value must be returned to prevent the runtime PM core from
+        * also triggering a suspend.
+        *
+        * Returning 0 will cause the runtime PM core to conduct a regular
+        * autosuspend.
+        *
+        * This callback is optional and if not provided regular autosuspend
+        * will be triggered.
+        *
+        * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use
+        * this feature.
+        *
+        * Return 0 if GPU can be suspended, positive value if it can not be
+        * suspeneded by runtime PM, else OS error code
+        */
+       int (*power_runtime_idle_callback)(struct kbase_device *kbdev);
 };
 
 /**
index ce5d0703911c2b2ffd6baa33f8fdbf72ad9df39f..ee7c96d8a2c81321ed13cdf142786a8b4045942c 100755 (executable)
@@ -148,11 +148,6 @@ enum {
  */
 #define DEFAULT_AWID_LIMIT KBASE_AID_32
 
-/**
- * Default setting for using alternative hardware counters.
- */
-#define DEFAULT_ALTERNATIVE_HWC false
-
 /**
  * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
  * defines which UMP device this GPU should be mapped to.
@@ -219,6 +214,12 @@ enum {
  */
 #define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
 
+/*
+ * Default timeout for software event jobs, after which these jobs will be
+ * cancelled.
+ */
+#define DEFAULT_JS_SOFT_EVENT_TIMEOUT ((u32)3000) /* 3s */
+
 /*
  * Default minimum number of scheduling ticks before the GPU is reset to clear a
  * "stuck" job
index 3a6e9f867206a33f7deddd84df5ad3f8cc3c0006..d53f728d08e8f1014051598ad28cc8e9877cd3b2 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,7 +24,7 @@
 #include <mali_kbase.h>
 #include <mali_midg_regmap.h>
 #include <mali_kbase_instr.h>
-
+#include <mali_kbase_mem_linux.h>
 
 /**
  * kbase_create_context() - Create a kernel base context.
@@ -72,11 +72,15 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
        if (err)
                goto free_kctx;
 
+       err = kbase_mem_evictable_init(kctx);
+       if (err)
+               goto free_pool;
+
        atomic_set(&kctx->used_pages, 0);
 
        err = kbase_jd_init(kctx);
        if (err)
-               goto free_pool;
+               goto deinit_evictable;
 
        err = kbasep_js_kctx_init(kctx);
        if (err)
@@ -89,6 +93,7 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
        mutex_init(&kctx->reg_lock);
 
        INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+       spin_lock_init(&kctx->waiting_soft_jobs_lock);
 #ifdef CONFIG_KDS
        INIT_LIST_HEAD(&kctx->waiting_kds_resource);
 #endif
@@ -115,6 +120,14 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
        err = kbase_region_tracker_init(kctx);
        if (err)
                goto no_region_tracker;
+
+       err = kbase_sticky_resource_init(kctx);
+       if (err)
+               goto no_sticky;
+
+       err = kbase_jit_init(kctx);
+       if (err)
+               goto no_jit;
 #ifdef CONFIG_GPU_TRACEPOINTS
        atomic_set(&kctx->jctx.work_id, 0);
 #endif
@@ -126,8 +139,18 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat)
 
        mutex_init(&kctx->vinstr_cli_lock);
 
+       hrtimer_init(&kctx->soft_event_timeout, CLOCK_MONOTONIC,
+                    HRTIMER_MODE_REL);
+       kctx->soft_event_timeout.function = &kbasep_soft_event_timeout_worker;
+
        return kctx;
 
+no_jit:
+       kbase_gpu_vm_lock(kctx);
+       kbase_sticky_resource_term(kctx);
+       kbase_gpu_vm_unlock(kctx);
+no_sticky:
+       kbase_region_tracker_term(kctx);
 no_region_tracker:
        kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false);
 no_sink_page:
@@ -143,6 +166,8 @@ free_jd:
        /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
        kbasep_js_kctx_term(kctx);
        kbase_jd_exit(kctx);
+deinit_evictable:
+       kbase_mem_evictable_deinit(kctx);
 free_pool:
        kbase_mem_pool_term(&kctx->mem_pool);
 free_kctx:
@@ -188,8 +213,18 @@ void kbase_destroy_context(struct kbase_context *kctx)
        kbase_jd_zap_context(kctx);
        kbase_event_cleanup(kctx);
 
+       /*
+        * JIT must be terminated before the code below as it must be called
+        * without the region lock being held.
+        * The code above ensures no new JIT allocations can be made by
+        * by the time we get to this point of context tear down.
+        */
+       kbase_jit_term(kctx);
+
        kbase_gpu_vm_lock(kctx);
 
+       kbase_sticky_resource_term(kctx);
+
        /* MMU is disabled as part of scheduling out the context */
        kbase_mmu_free_pgd(kctx);
 
@@ -225,6 +260,7 @@ void kbase_destroy_context(struct kbase_context *kctx)
        if (pages != 0)
                dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
 
+       kbase_mem_evictable_deinit(kctx);
        kbase_mem_pool_term(&kctx->mem_pool);
        WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
 
@@ -262,9 +298,6 @@ int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
        if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
                js_kctx_info->ctx.flags &= ~((u32) KBASE_CTX_FLAG_SUBMIT_DISABLED);
 
-       if ((flags & BASE_CONTEXT_HINT_ONLY_COMPUTE) != 0)
-               js_kctx_info->ctx.flags |= (u32) KBASE_CTX_FLAG_HINT_ONLY_COMPUTE;
-
        /* Latch the initial attributes into the Job Scheduler */
        kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
 
index adf484d6ba375f1802bfb36461c803e070a4adde..429fea2d77990972897bd72da16a29cac4dece62 100755 (executable)
@@ -1,7 +1,6 @@
-
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -35,6 +34,9 @@
 #include "mali_kbase_debug_mem_view.h"
 #include "mali_kbase_mem.h"
 #include "mali_kbase_mem_pool_debugfs.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_regs_dump_debugfs.h"
+#endif /* !MALI_CUSTOMER_RELEASE */
 #include <mali_kbase_hwaccess_backend.h>
 #include <mali_kbase_hwaccess_jm.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
@@ -61,7 +63,9 @@
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/compat.h>      /* is_compat_task */
+#include <linux/mman.h>
 #include <linux/version.h>
+#include <linux/security.h>
 #ifdef CONFIG_MALI_PLATFORM_DEVICETREE
 #include <linux/pm_runtime.h>
 #endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
@@ -91,9 +95,7 @@
 #include <linux/opp.h>
 #endif
 
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
 
 /* GPU IRQ Tags */
 #define        JOB_IRQ_TAG     0
@@ -318,8 +320,10 @@ static int kbase_external_buffer_lock(struct kbase_context *kctx,
                                        resource_list_data.kds_resources,
                                        KDS_WAIT_BLOCKING);
 
-                       if (IS_ERR_OR_NULL(lock)) {
+                       if (!lock) {
                                ret = -EINVAL;
+                       } else if (IS_ERR(lock)) {
+                               ret = PTR_ERR(lock);
                        } else {
                                ret = 0;
                                fdata->lock = lock;
@@ -349,7 +353,6 @@ out:
 }
 #endif /* CONFIG_KDS */
 
-#ifdef CONFIG_MALI_MIPE_ENABLED
 static void kbase_create_timeline_objects(struct kbase_context *kctx)
 {
        struct kbase_device             *kbdev = kctx->kbdev;
@@ -389,7 +392,8 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx)
        list_for_each_entry(element, &kbdev->kctx_list, link) {
                kbase_tlstream_tl_summary_new_ctx(
                                element->kctx,
-                               (u32)(element->kctx->id));
+                               (u32)(element->kctx->id),
+                               (u32)(element->kctx->tgid));
        }
        /* Before releasing the lock, reset body stream buffers.
         * This will prevent context creation message to be directed to both
@@ -401,7 +405,6 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx)
         * user space. */
        kbase_tlstream_flush_streams();
 }
-#endif
 
 static void kbase_api_handshake(struct uku_version_check_args *version)
 {
@@ -468,6 +471,34 @@ enum mali_error {
        MALI_ERROR_FUNCTION_FAILED,
 };
 
+enum {
+       inited_mem = (1u << 0),
+       inited_js = (1u << 1),
+       inited_pm_runtime_init = (1u << 2),
+#ifdef CONFIG_MALI_DEVFREQ
+       inited_devfreq = (1u << 3),
+#endif /* CONFIG_MALI_DEVFREQ */
+       inited_tlstream = (1u << 4),
+       inited_backend_early = (1u << 5),
+       inited_backend_late = (1u << 6),
+       inited_device = (1u << 7),
+       inited_vinstr = (1u << 8),
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+       inited_ipa = (1u << 9),
+#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
+       inited_job_fault = (1u << 10),
+       inited_misc_register = (1u << 11),
+       inited_get_device = (1u << 12),
+       inited_sysfs_group = (1u << 13),
+       inited_dev_list = (1u << 14),
+       inited_debugfs = (1u << 15),
+       inited_gpu_device = (1u << 16),
+       inited_registers_map = (1u << 17),
+       inited_power_control = (1u << 19),
+       inited_buslogger = (1u << 20)
+};
+
+
 #ifdef CONFIG_MALI_DEBUG
 #define INACTIVE_WAIT_MS (5000)
 
@@ -488,6 +519,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg
        struct kbase_device *kbdev;
        union uk_header *ukh = args;
        u32 id;
+       int ret = 0;
 
        KBASE_DEBUG_ASSERT(ukh != NULL);
 
@@ -552,6 +584,18 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg
 
        /* setup complete, perform normal operation */
        switch (id) {
+       case KBASE_FUNC_MEM_JIT_INIT:
+               {
+                       struct kbase_uk_mem_jit_init *jit_init = args;
+
+                       if (sizeof(*jit_init) != args_size)
+                               goto bad_size;
+
+                       if (kbase_region_tracker_init_jit(kctx,
+                                       jit_init->va_pages))
+                               ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+                       break;
+               }
        case KBASE_FUNC_MEM_ALLOC:
                {
                        struct kbase_uk_mem_alloc *mem = args;
@@ -560,6 +604,13 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg
                        if (sizeof(*mem) != args_size)
                                goto bad_size;
 
+#if defined(CONFIG_64BIT)
+                       if (!kctx->is_compat) {
+                               /* force SAME_VA if a 64-bit client */
+                               mem->flags |= BASE_MEM_SAME_VA;
+                       }
+#endif
+
                        reg = kbase_mem_alloc(kctx, mem->va_pages,
                                        mem->commit_pages, mem->extent,
                                        &mem->flags, &mem->gpu_va,
@@ -568,11 +619,9 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg
                                ukh->ret = MALI_ERROR_FUNCTION_FAILED;
                        break;
                }
-       case KBASE_FUNC_MEM_IMPORT:
-               {
+       case KBASE_FUNC_MEM_IMPORT: {
                        struct kbase_uk_mem_import *mem_import = args;
-                       int __user *phandle;
-                       int handle;
+                       void __user *phandle;
 
                        if (sizeof(*mem_import) != args_size)
                                goto bad_size;
@@ -583,26 +632,20 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg
 #endif
                                phandle = mem_import->phandle.value;
 
-                       switch (mem_import->type) {
-                       case BASE_MEM_IMPORT_TYPE_UMP:
-                               get_user(handle, phandle);
-                               break;
-                       case BASE_MEM_IMPORT_TYPE_UMM:
-                               get_user(handle, phandle);
-                               break;
-                       default:
-                               mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID;
+                       if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID) {
+                               ukh->ret = MALI_ERROR_FUNCTION_FAILED;
                                break;
                        }
 
-                       if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID ||
-                                       kbase_mem_import(kctx, mem_import->type,
-                                       handle, &mem_import->gpu_va,
-                                       &mem_import->va_pages,
-                                       &mem_import->flags))
+                       if (kbase_mem_import(kctx, mem_import->type, phandle,
+                                               &mem_import->gpu_va,
+                                               &mem_import->va_pages,
+                                               &mem_import->flags)) {
+                               mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID;
                                ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+                       }
                        break;
-               }
+       }
        case KBASE_FUNC_MEM_ALIAS: {
                        struct kbase_uk_mem_alias *alias = args;
                        struct base_mem_aliasing_info __user *user_ai;
@@ -778,7 +821,7 @@ copy_failed:
                        if (sizeof(*sn) != args_size)
                                goto bad_size;
 
-                       if (sn->sset.basep_sset.mem_handle & ~PAGE_MASK) {
+                       if (sn->sset.basep_sset.mem_handle.basep.handle & ~PAGE_MASK) {
                                dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid");
                                ukh->ret = MALI_ERROR_FUNCTION_FAILED;
                                break;
@@ -812,18 +855,10 @@ copy_failed:
        case KBASE_FUNC_HWCNT_SETUP:
                {
                        struct kbase_uk_hwcnt_setup *setup = args;
-                       bool access_allowed;
 
                        if (sizeof(*setup) != args_size)
                                goto bad_size;
 
-                       access_allowed = kbase_security_has_capability(
-                                       kctx,
-                                       KBASE_SEC_INSTR_HW_COUNTERS_COLLECT,
-                                       KBASE_SEC_FLAG_NOAUDIT);
-                       if (!access_allowed)
-                               goto out_bad;
-
                        mutex_lock(&kctx->vinstr_cli_lock);
                        if (kbase_vinstr_legacy_hwc_setup(kbdev->vinstr_ctx,
                                        &kctx->vinstr_cli, setup) != 0)
@@ -856,18 +891,10 @@ copy_failed:
        case KBASE_FUNC_HWCNT_READER_SETUP:
                {
                        struct kbase_uk_hwcnt_reader_setup *setup = args;
-                       bool access_allowed;
 
                        if (sizeof(*setup) != args_size)
                                goto bad_size;
 
-                       access_allowed = kbase_security_has_capability(
-                                       kctx,
-                                       KBASE_SEC_INSTR_HW_COUNTERS_COLLECT,
-                                       KBASE_SEC_FLAG_NOAUDIT);
-                       if (!access_allowed)
-                               goto out_bad;
-
                        mutex_lock(&kctx->vinstr_cli_lock);
                        if (kbase_vinstr_hwcnt_reader_setup(kbdev->vinstr_ctx,
                                        setup) != 0)
@@ -971,9 +998,10 @@ copy_failed:
        case KBASE_FUNC_EXT_BUFFER_LOCK:
                {
 #ifdef CONFIG_KDS
-                       switch (kbase_external_buffer_lock(kctx,
+                       ret = kbase_external_buffer_lock(kctx,
                                (struct kbase_uk_ext_buff_kds_data *)args,
-                               args_size)) {
+                               args_size);
+                       switch (ret) {
                        case 0:
                                ukh->ret = MALI_ERROR_NONE;
                                break;
@@ -1088,7 +1116,7 @@ copy_failed:
                                goto bad_size;
 
                        if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
-                               dev_err(kbdev->dev, "buffer too big");
+                               dev_err(kbdev->dev, "buffer too big\n");
                                goto out_bad;
                        }
 
@@ -1108,12 +1136,30 @@ copy_failed:
                                kfree(buf);
                                goto out_bad;
                        }
-                       kbasep_mem_profile_debugfs_insert(kctx, buf,
-                                       add_data->len);
+
+                       if (kbasep_mem_profile_debugfs_insert(kctx, buf,
+                                                       add_data->len)) {
+                               ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+                               kfree(buf);
+                               goto out_bad;
+                       }
 
                        break;
                }
-#ifdef CONFIG_MALI_MIPE_ENABLED
+
+#ifdef CONFIG_MALI_NO_MALI
+       case KBASE_FUNC_SET_PRFCNT_VALUES:
+               {
+
+                       struct kbase_uk_prfcnt_values *params =
+                         ((struct kbase_uk_prfcnt_values *)args);
+                       gpu_model_set_dummy_prfcnt_sample(params->data,
+                                       params->size);
+
+                       break;
+               }
+#endif /* CONFIG_MALI_NO_MALI */
+
        case KBASE_FUNC_TLSTREAM_ACQUIRE:
                {
                        struct kbase_uk_tlstream_acquire *tlstream_acquire =
@@ -1173,7 +1219,6 @@ copy_failed:
                        break;
                }
 #endif /* MALI_UNIT_TEST */
-#endif /* CONFIG_MALI_MIPE_ENABLED */
 
        case KBASE_FUNC_GET_CONTEXT_ID:
                {
@@ -1183,12 +1228,38 @@ copy_failed:
                        break;
                }
 
+       case KBASE_FUNC_SOFT_EVENT_UPDATE:
+               {
+                       struct kbase_uk_soft_event_update *update = args;
+
+                       if (sizeof(*update) != args_size)
+                               goto bad_size;
+
+                       if (((update->new_status != BASE_JD_SOFT_EVENT_SET) &&
+                           (update->new_status != BASE_JD_SOFT_EVENT_RESET)) ||
+                           (update->flags != 0))
+                               goto out_bad;
+
+                       if (kbasep_write_soft_event_status(
+                                               kctx, update->evt,
+                                               update->new_status) != 0) {
+                               ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+                               break;
+                       }
+
+                       if (update->new_status == BASE_JD_SOFT_EVENT_SET)
+                               kbasep_complete_triggered_soft_events(
+                                               kctx, update->evt);
+
+                       break;
+               }
+
        default:
-               dev_err(kbdev->dev, "unknown ioctl %u", id);
+               dev_err(kbdev->dev, "unknown ioctl %u\n", id);
                goto out_bad;
        }
 
-       return 0;
+       return ret;
 
  bad_size:
        dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id);
@@ -1201,6 +1272,47 @@ static struct kbase_device *to_kbase_device(struct device *dev)
        return dev_get_drvdata(dev);
 }
 
+static int assign_irqs(struct platform_device *pdev)
+{
+       struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+       int i;
+
+       if (!kbdev)
+               return -ENODEV;
+
+       /* 3 IRQ resources */
+       for (i = 0; i < 3; i++) {
+               struct resource *irq_res;
+               int irqtag;
+
+               irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+               if (!irq_res) {
+                       dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+                       return -ENOENT;
+               }
+
+#ifdef CONFIG_OF
+               if (!strcmp(irq_res->name, "JOB")) {
+                       irqtag = JOB_IRQ_TAG;
+               } else if (!strcmp(irq_res->name, "MMU")) {
+                       irqtag = MMU_IRQ_TAG;
+               } else if (!strcmp(irq_res->name, "GPU")) {
+                       irqtag = GPU_IRQ_TAG;
+               } else {
+                       dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+                               irq_res->name);
+                       return -EINVAL;
+               }
+#else
+               irqtag = i;
+#endif /* CONFIG_OF */
+               kbdev->irqs[irqtag].irq = irq_res->start;
+               kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK;
+       }
+
+       return 0;
+}
+
 /*
  * API to acquire device list mutex and
  * return pointer to the device list head
@@ -1270,6 +1382,7 @@ static int kbase_open(struct inode *inode, struct file *filp)
 
        init_waitqueue_head(&kctx->event_queue);
        filp->private_data = kctx;
+       kctx->filp = filp;
 
        kctx->infinite_cache_active = kbdev->infinite_cache_active_default;
 
@@ -1292,7 +1405,8 @@ static int kbase_open(struct inode *inode, struct file *filp)
        debugfs_create_bool("infinite_cache", 0644, kctx->kctx_dentry,
                        &kctx->infinite_cache_active);
 #endif /* CONFIG_MALI_COH_USER */
-       kbasep_mem_profile_debugfs_add(kctx);
+
+       mutex_init(&kctx->mem_profile_lock);
 
        kbasep_jd_debugfs_ctx_add(kctx);
        kbase_debug_mem_view_init(filp);
@@ -1301,6 +1415,7 @@ static int kbase_open(struct inode *inode, struct file *filp)
 
        kbase_mem_pool_debugfs_add(kctx->kctx_dentry, &kctx->mem_pool);
 
+       kbase_jit_debugfs_add(kctx);
 #endif /* CONFIG_DEBUGFS */
 
        dev_dbg(kbdev->dev, "created base context\n");
@@ -1313,11 +1428,10 @@ static int kbase_open(struct inode *inode, struct file *filp)
                        mutex_lock(&kbdev->kctx_list_lock);
                        element->kctx = kctx;
                        list_add(&element->link, &kbdev->kctx_list);
-#ifdef CONFIG_MALI_MIPE_ENABLED
                        kbase_tlstream_tl_new_ctx(
                                        element->kctx,
-                                       (u32)(element->kctx->id));
-#endif
+                                       (u32)(element->kctx->id),
+                                       (u32)(element->kctx->tgid));
                        mutex_unlock(&kbdev->kctx_list_lock);
                } else {
                        /* we don't treat this as a fail - just warn about it */
@@ -1338,14 +1452,12 @@ static int kbase_release(struct inode *inode, struct file *filp)
        struct kbasep_kctx_list_element *element, *tmp;
        bool found_element = false;
 
-#ifdef CONFIG_MALI_MIPE_ENABLED
        kbase_tlstream_tl_del_ctx(kctx);
-#endif
 
 #ifdef CONFIG_DEBUG_FS
        debugfs_remove_recursive(kctx->kctx_dentry);
        kbasep_mem_profile_debugfs_remove(kctx);
-       kbase_debug_job_fault_context_exit(kctx);
+       kbase_debug_job_fault_context_term(kctx);
 #endif
 
        mutex_lock(&kbdev->kctx_list_lock);
@@ -1477,99 +1589,155 @@ static int kbase_check_flags(int flags)
        return 0;
 }
 
-static unsigned long kbase_get_unmapped_area(struct file *filp,
-               const unsigned long addr, const unsigned long len,
-               const unsigned long pgoff, const unsigned long flags)
-{
 #ifdef CONFIG_64BIT
-       /* based on get_unmapped_area, but simplified slightly due to that some
-        * values are known in advance */
-       struct kbase_context *kctx = filp->private_data;
+/* The following function is taken from the kernel and just
+ * renamed. As it's not exported to modules we must copy-paste it here.
+ */
+
+static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
+               *info)
+{
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       unsigned long length, low_limit, high_limit, gap_start, gap_end;
 
-       if (!kctx->is_compat && !addr &&
-               kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) {
-               struct mm_struct *mm = current->mm;
-               struct vm_area_struct *vma;
-               unsigned long low_limit, high_limit, gap_start, gap_end;
+       /* Adjust search length to account for worst case alignment overhead */
+       length = info->length + info->align_mask;
+       if (length < info->length)
+               return -ENOMEM;
 
-               /* Hardware has smaller VA than userspace, ensure the page
-                * comes from a VA which can be used on the GPU */
+       /*
+        * Adjust search limits by the desired length.
+        * See implementation comment at top of unmapped_area().
+        */
+       gap_end = info->high_limit;
+       if (gap_end < length)
+               return -ENOMEM;
+       high_limit = gap_end - length;
 
-               gap_end = (1UL<<33);
-               if (gap_end < len)
-                       return -ENOMEM;
-               high_limit = gap_end - len;
-               low_limit = PAGE_SIZE + len;
+       if (info->low_limit > high_limit)
+               return -ENOMEM;
+       low_limit = info->low_limit + length;
 
-               gap_start = mm->highest_vm_end;
-               if (gap_start <= high_limit)
-                       goto found_highest;
+       /* Check highest gap, which does not precede any rbtree node */
+       gap_start = mm->highest_vm_end;
+       if (gap_start <= high_limit)
+               goto found_highest;
 
-               if (RB_EMPTY_ROOT(&mm->mm_rb))
-                       return -ENOMEM;
-               vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
-               if (vma->rb_subtree_gap < len)
-                       return -ENOMEM;
+       /* Check if rbtree root looks promising */
+       if (RB_EMPTY_ROOT(&mm->mm_rb))
+               return -ENOMEM;
+       vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+       if (vma->rb_subtree_gap < length)
+               return -ENOMEM;
 
-               while (true) {
-                       gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
-                       if (gap_start <= high_limit && vma->vm_rb.rb_right) {
-                               struct vm_area_struct *right =
-                                       rb_entry(vma->vm_rb.rb_right,
-                                                struct vm_area_struct, vm_rb);
-                               if (right->rb_subtree_gap >= len) {
-                                       vma = right;
-                                       continue;
-                               }
+       while (true) {
+               /* Visit right subtree if it looks promising */
+               gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+               if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+                       struct vm_area_struct *right =
+                               rb_entry(vma->vm_rb.rb_right,
+                                        struct vm_area_struct, vm_rb);
+                       if (right->rb_subtree_gap >= length) {
+                               vma = right;
+                               continue;
                        }
+               }
+
 check_current:
-                       gap_end = vma->vm_start;
-                       if (gap_end < low_limit)
-                               return -ENOMEM;
-                       if (gap_start <= high_limit &&
-                           gap_end - gap_start >= len)
-                               goto found;
-
-                       if (vma->vm_rb.rb_left) {
-                               struct vm_area_struct *left =
-                                       rb_entry(vma->vm_rb.rb_left,
-                                                struct vm_area_struct, vm_rb);
-
-                               if (left->rb_subtree_gap >= len) {
-                                       vma = left;
-                                       continue;
-                               }
+               /* Check if current node has a suitable gap */
+               gap_end = vma->vm_start;
+               if (gap_end < low_limit)
+                       return -ENOMEM;
+               if (gap_start <= high_limit && gap_end - gap_start >= length)
+                       goto found;
+
+               /* Visit left subtree if it looks promising */
+               if (vma->vm_rb.rb_left) {
+                       struct vm_area_struct *left =
+                               rb_entry(vma->vm_rb.rb_left,
+                                        struct vm_area_struct, vm_rb);
+                       if (left->rb_subtree_gap >= length) {
+                               vma = left;
+                               continue;
                        }
-                       while (true) {
-                               struct rb_node *prev = &vma->vm_rb;
-
-                               if (!rb_parent(prev))
-                                       return -ENOMEM;
-                               vma = rb_entry(rb_parent(prev),
-                                               struct vm_area_struct, vm_rb);
-                               if (prev == vma->vm_rb.rb_right) {
-                                       gap_start = vma->vm_prev ?
-                                               vma->vm_prev->vm_end : 0;
-                                       goto check_current;
-                               }
+               }
+
+               /* Go back up the rbtree to find next candidate node */
+               while (true) {
+                       struct rb_node *prev = &vma->vm_rb;
+                       if (!rb_parent(prev))
+                               return -ENOMEM;
+                       vma = rb_entry(rb_parent(prev),
+                                      struct vm_area_struct, vm_rb);
+                       if (prev == vma->vm_rb.rb_right) {
+                               gap_start = vma->vm_prev ?
+                                       vma->vm_prev->vm_end : 0;
+                               goto check_current;
                        }
                }
+       }
 
 found:
-               if (gap_end > (1UL<<33))
-                       gap_end = (1UL<<33);
+       /* We found a suitable gap. Clip it with the original high_limit. */
+       if (gap_end > info->high_limit)
+               gap_end = info->high_limit;
 
 found_highest:
-               gap_end -= len;
+       /* Compute highest gap address at the desired alignment */
+       gap_end -= info->length;
+       gap_end -= (gap_end - info->align_offset) & info->align_mask;
+
+       VM_BUG_ON(gap_end < info->low_limit);
+       VM_BUG_ON(gap_end < gap_start);
+       return gap_end;
+}
+
+
+static unsigned long kbase_get_unmapped_area(struct file *filp,
+               const unsigned long addr, const unsigned long len,
+               const unsigned long pgoff, const unsigned long flags)
+{
+       /* based on get_unmapped_area, but simplified slightly due to that some
+        * values are known in advance */
+       struct kbase_context *kctx = filp->private_data;
+       struct mm_struct *mm = current->mm;
+       struct vm_unmapped_area_info info;
+
+       /* err on fixed address */
+       if ((flags & MAP_FIXED) || addr)
+               return -EINVAL;
 
-               VM_BUG_ON(gap_end < PAGE_SIZE);
-               VM_BUG_ON(gap_end < gap_start);
-               return gap_end;
+       /* too big? */
+       if (len > TASK_SIZE - SZ_2M)
+               return -ENOMEM;
+
+       if (kctx->is_compat)
+               return current->mm->get_unmapped_area(filp, addr, len, pgoff,
+                               flags);
+
+       if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) {
+               info.high_limit = kctx->same_va_end << PAGE_SHIFT;
+               info.align_mask = 0;
+               info.align_offset = 0;
+       } else {
+               info.high_limit = min_t(unsigned long, mm->mmap_base,
+                                       (kctx->same_va_end << PAGE_SHIFT));
+               if (len >= SZ_2M) {
+                       info.align_offset = SZ_2M;
+                       info.align_mask = SZ_2M - 1;
+               } else {
+                       info.align_mask = 0;
+                       info.align_offset = 0;
+               }
        }
-#endif
-       /* No special requirements - fallback to the default version */
-       return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+
+       info.flags = 0;
+       info.length = len;
+       info.low_limit = SZ_2M;
+       return kbase_unmapped_area_topdown(&info);
 }
+#endif
 
 static const struct file_operations kbase_fops = {
        .owner = THIS_MODULE,
@@ -1581,7 +1749,9 @@ static const struct file_operations kbase_fops = {
        .compat_ioctl = kbase_ioctl,
        .mmap = kbase_mmap,
        .check_flags = kbase_check_flags,
+#ifdef CONFIG_64BIT
        .get_unmapped_area = kbase_get_unmapped_area,
+#endif
 };
 
 #ifndef CONFIG_MALI_NO_MALI
@@ -1822,7 +1992,15 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr,
        if (!kbdev)
                return -ENODEV;
 
-       ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current core mask : 0x%llX\n", kbdev->pm.debug_core_mask);
+       ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+                       "Current core mask (JS0) : 0x%llX\n",
+                       kbdev->pm.debug_core_mask[0]);
+       ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+                       "Current core mask (JS1) : 0x%llX\n",
+                       kbdev->pm.debug_core_mask[1]);
+       ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+                       "Current core mask (JS2) : 0x%llX\n",
+                       kbdev->pm.debug_core_mask[2]);
        ret += scnprintf(buf + ret, PAGE_SIZE - ret,
                        "Available core mask : 0x%llX\n",
                        kbdev->gpu_props.props.raw_props.shader_present);
@@ -1844,36 +2022,63 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr,
 static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
        struct kbase_device *kbdev;
-       u64 new_core_mask;
-       int rc;
+       u64 new_core_mask[3];
+       int items;
 
        kbdev = to_kbase_device(dev);
 
        if (!kbdev)
                return -ENODEV;
 
-       rc = kstrtoull(buf, 16, &new_core_mask);
-       if (rc)
-               return rc;
+       items = sscanf(buf, "%llx %llx %llx",
+                       &new_core_mask[0], &new_core_mask[1],
+                       &new_core_mask[2]);
+
+       if (items == 1)
+               new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
+
+       if (items == 1 || items == 3) {
+               u64 shader_present =
+                               kbdev->gpu_props.props.raw_props.shader_present;
+               u64 group0_core_mask =
+                               kbdev->gpu_props.props.coherency_info.group[0].
+                               core_mask;
+
+               if ((new_core_mask[0] & shader_present) != new_core_mask[0] ||
+                               !(new_core_mask[0] & group0_core_mask) ||
+                       (new_core_mask[1] & shader_present) !=
+                                               new_core_mask[1] ||
+                               !(new_core_mask[1] & group0_core_mask) ||
+                       (new_core_mask[2] & shader_present) !=
+                                               new_core_mask[2] ||
+                               !(new_core_mask[2] & group0_core_mask)) {
+                       dev_err(dev, "power_policy: invalid core specification\n");
+                       return -EINVAL;
+               }
 
-       if ((new_core_mask & kbdev->gpu_props.props.raw_props.shader_present)
-                       != new_core_mask ||
-           !(new_core_mask & kbdev->gpu_props.props.coherency_info.group[0].core_mask)) {
-               dev_err(dev, "power_policy: invalid core specification\n");
-               return -EINVAL;
-       }
+               if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
+                               kbdev->pm.debug_core_mask[1] !=
+                                               new_core_mask[1] ||
+                               kbdev->pm.debug_core_mask[2] !=
+                                               new_core_mask[2]) {
+                       unsigned long flags;
 
-       if (kbdev->pm.debug_core_mask != new_core_mask) {
-               unsigned long flags;
+                       spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
 
-               spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+                       kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
+                                       new_core_mask[1], new_core_mask[2]);
 
-               kbase_pm_set_debug_core_mask(kbdev, new_core_mask);
+                       spin_unlock_irqrestore(&kbdev->pm.power_change_lock,
+                                       flags);
+               }
 
-               spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+               return count;
        }
 
-       return count;
+       dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n"
+               "Use format <core_mask>\n"
+               "or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+       return -EINVAL;
 }
 
 /** The sysfs file @c core_mask.
@@ -1884,121 +2089,69 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr,
  */
 static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
 
-#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
 /**
- * struct sc_split_config
- * @tag: Short name
- * @human_readable: Long name
- * @js0_mask: Mask for job slot 0
- * @js1_mask: Mask for job slot 1
- * @js2_mask: Mask for job slot 2
+ * set_soft_event_timeout() - Store callback for the soft_event_timeout sysfs
+ * file.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
  *
- * Structure containing a single shader affinity split configuration.
+ * This allows setting the timeout for software event jobs. Waiting jobs will
+ * be cancelled after this period expires. This is expressed in milliseconds.
+ *
+ * Return: count if the function succeeded. An error code on failure.
  */
-struct sc_split_config {
-       char const *tag;
-       char const *human_readable;
-       u64          js0_mask;
-       u64          js1_mask;
-       u64          js2_mask;
-};
+static ssize_t set_soft_event_timeout(struct device *dev,
+                                     struct device_attribute *attr,
+                                     const char *buf, size_t count)
+{
+       struct kbase_device *kbdev;
+       int soft_event_timeout_ms;
 
-/*
- * Array of available shader affinity split configurations.
- */
-static struct sc_split_config const sc_split_configs[] = {
-       /* All must be the first config (default). */
-       {
-               "all", "All cores",
-               0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
-       },
-       {
-               "mp1", "MP1 shader core",
-               0x1, 0x1, 0x1
-       },
-       {
-               "mp2", "MP2 shader core",
-               0x3, 0x3, 0x3
-       },
-       {
-               "mp4", "MP4 shader core",
-               0xF, 0xF, 0xF
-       },
-       {
-               "mp1_vf", "MP1 vertex + MP1 fragment shader core",
-               0x2, 0x1, 0xFFFFFFFFFFFFFFFFULL
-       },
-       {
-               "mp2_vf", "MP2 vertex + MP2 fragment shader core",
-               0xA, 0x5, 0xFFFFFFFFFFFFFFFFULL
-       },
-       /* This must be the last config. */
-       {
-               NULL, NULL,
-               0x0, 0x0, 0x0
-       },
-};
+       kbdev = to_kbase_device(dev);
+       if (!kbdev)
+               return -ENODEV;
 
-/* Pointer to the currently active shader split configuration. */
-static struct sc_split_config const *current_sc_split_config = &sc_split_configs[0];
+       if ((kstrtoint(buf, 0, &soft_event_timeout_ms) != 0) ||
+           (soft_event_timeout_ms <= 0))
+               return -EINVAL;
 
-/** Show callback for the @c sc_split sysfs file
- *
- * Returns the current shader core affinity policy.
- */
-static ssize_t show_split(struct device *dev, struct device_attribute *attr, char * const buf)
-{
-       ssize_t ret;
-       /* We know we are given a buffer which is PAGE_SIZE long. Our strings are all guaranteed
-        * to be shorter than that at this time so no length check needed. */
-       ret = scnprintf(buf, PAGE_SIZE, "Current sc_split: '%s'\n", current_sc_split_config->tag);
-       return ret;
+       atomic_set(&kbdev->js_data.soft_event_timeout_ms,
+                  soft_event_timeout_ms);
+
+       return count;
 }
 
-/** Store callback for the @c sc_split sysfs file.
+/**
+ * show_soft_event_timeout() - Show callback for the soft_event_timeout sysfs
+ * file.
  *
- * This function is called when the @c sc_split sysfs file is written to
- * It modifies the system shader core affinity configuration to allow
- * system profiling with different hardware configurations.
+ * This will return the timeout for the software event jobs.
  *
- * @param dev  The device with sysfs file is for
- * @param attr The attributes of the sysfs file
- * @param buf  The value written to the sysfs file
- * @param count        The number of bytes written to the sysfs file
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer for the sysfs file contents.
  *
- * @return @c count if the function succeeded. An error code on failure.
+ * Return: The number of bytes output to buf.
  */
-static ssize_t set_split(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t show_soft_event_timeout(struct device *dev,
+                                      struct device_attribute *attr,
+                                      char * const buf)
 {
-       struct sc_split_config const *config = &sc_split_configs[0];
-
-       /* Try to match: loop until we hit the last "NULL" entry */
-       while (config->tag) {
-               if (sysfs_streq(config->tag, buf)) {
-                       current_sc_split_config = config;
-                       mali_js0_affinity_mask  = config->js0_mask;
-                       mali_js1_affinity_mask  = config->js1_mask;
-                       mali_js2_affinity_mask  = config->js2_mask;
-                       dev_dbg(dev, "Setting sc_split: '%s'\n", config->tag);
-                       return count;
-               }
-               config++;
-       }
+       struct kbase_device *kbdev;
 
-       /* No match found in config list */
-       dev_err(dev, "sc_split: invalid value\n");
-       dev_err(dev, "  Possible settings: mp[1|2|4], mp[1|2]_vf\n");
-       return -ENOENT;
-}
+       kbdev = to_kbase_device(dev);
+       if (!kbdev)
+               return -ENODEV;
 
-/** The sysfs file @c sc_split
- *
- * This is used for configuring/querying the current shader core work affinity
- * configuration.
- */
-static DEVICE_ATTR(sc_split, S_IRUGO|S_IWUSR, show_split, set_split);
-#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+       return scnprintf(buf, PAGE_SIZE, "%i\n",
+                        atomic_read(&kbdev->js_data.soft_event_timeout_ms));
+}
 
+static DEVICE_ATTR(soft_event_timeout, S_IRUGO | S_IWUSR,
+                  show_soft_event_timeout, set_soft_event_timeout);
 
 /** Store callback for the @c js_timeouts sysfs file.
  *
@@ -2731,12 +2884,15 @@ static ssize_t kbase_show_gpuinfo(struct device *dev,
                { .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
                { .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
                { .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
+               { .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+                 .name = "Mali-TMIx" },
        };
        const char *product_name = "(Unknown Mali GPU)";
        struct kbase_device *kbdev;
        u32 gpu_id;
-       unsigned product_id;
+       unsigned product_id, product_id_mask;
        unsigned i;
+       bool is_new_format;
 
        kbdev = to_kbase_device(dev);
        if (!kbdev)
@@ -2744,10 +2900,20 @@ static ssize_t kbase_show_gpuinfo(struct device *dev,
 
        gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
        product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+       is_new_format = GPU_ID_IS_NEW_FORMAT(product_id);
+       product_id_mask =
+               (is_new_format ?
+                       GPU_ID2_PRODUCT_MODEL :
+                       GPU_ID_VERSION_PRODUCT_ID) >>
+               GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 
        for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
-               if (gpu_product_id_names[i].id == product_id) {
-                       product_name = gpu_product_id_names[i].name;
+               const struct gpu_product_id_name *p = &gpu_product_id_names[i];
+
+               if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) &&
+                   (p->id & product_id_mask) ==
+                   (product_id & product_id_mask)) {
+                       product_name = p->name;
                        break;
                }
        }
@@ -3047,26 +3213,53 @@ static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
                set_mem_pool_max_size);
 
 
+static int kbasep_secure_mode_enable(struct kbase_device *kbdev)
+{
+       kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+               GPU_COMMAND_SET_PROTECTED_MODE, NULL);
+       return 0;
+}
 
-static int kbasep_secure_mode_init(struct kbase_device *kbdev)
+static int kbasep_secure_mode_disable(struct kbase_device *kbdev)
 {
+       if (!kbase_prepare_to_reset_gpu_locked(kbdev))
+               return -EBUSY;
+
+       kbase_reset_gpu_locked(kbdev);
+
+       return 0;
+}
+
+static struct kbase_secure_ops kbasep_secure_ops = {
+       .secure_mode_enable = kbasep_secure_mode_enable,
+       .secure_mode_disable = kbasep_secure_mode_disable,
+};
 
+static void kbasep_secure_mode_init(struct kbase_device *kbdev)
+{
+       if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+               /* Use native secure ops */
+               kbdev->secure_ops = &kbasep_secure_ops;
+               kbdev->secure_mode_support = true;
+       }
 #ifdef SECURE_CALLBACKS
-       kbdev->secure_ops = SECURE_CALLBACKS;
-       kbdev->secure_mode_support = false;
+       else {
+               kbdev->secure_ops = SECURE_CALLBACKS;
+               kbdev->secure_mode_support = false;
 
-       if (kbdev->secure_ops) {
-               int err;
+               if (kbdev->secure_ops) {
+                       int err;
 
-               /* Make sure secure mode is disabled on startup */
-               err = kbdev->secure_ops->secure_mode_disable(kbdev);
+                       /* Make sure secure mode is disabled on startup */
+                       err = kbdev->secure_ops->secure_mode_disable(kbdev);
 
-               /* secure_mode_disable() returns -EINVAL if not supported */
-               kbdev->secure_mode_support = (err != -EINVAL);
+                       /* secure_mode_disable() returns -EINVAL if not
+                        * supported
+                        */
+                       kbdev->secure_mode_support = (err != -EINVAL);
+               }
        }
 #endif
-
-       return 0;
 }
 
 #ifdef CONFIG_MALI_NO_MALI
@@ -3105,13 +3298,147 @@ static int kbase_common_reg_map(struct kbase_device *kbdev)
 
 static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
 {
-       iounmap(kbdev->reg);
-       release_mem_region(kbdev->reg_start, kbdev->reg_size);
+       if (kbdev->reg) {
+               iounmap(kbdev->reg);
+               release_mem_region(kbdev->reg_start, kbdev->reg_size);
+               kbdev->reg = NULL;
+               kbdev->reg_start = 0;
+               kbdev->reg_size = 0;
+       }
 }
 #endif /* CONFIG_MALI_NO_MALI */
 
+static int registers_map(struct kbase_device * const kbdev)
+{
+               /* the first memory resource is the physical address of the GPU
+                * registers */
+               struct platform_device *pdev = to_platform_device(kbdev->dev);
+               struct resource *reg_res;
+               int err;
 
-#ifdef CONFIG_DEBUG_FS
+               reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+               if (!reg_res) {
+                       dev_err(kbdev->dev, "Invalid register resource\n");
+                       return -ENOENT;
+               }
+
+               kbdev->reg_start = reg_res->start;
+               kbdev->reg_size = resource_size(reg_res);
+
+               err = kbase_common_reg_map(kbdev);
+               if (err) {
+                       dev_err(kbdev->dev, "Failed to map registers\n");
+                       return err;
+               }
+
+       return 0;
+}
+
+static void registers_unmap(struct kbase_device *kbdev)
+{
+       kbase_common_reg_unmap(kbdev);
+}
+
+static int power_control_init(struct platform_device *pdev)
+{
+       struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+       int err = 0;
+
+       if (!kbdev)
+               return -ENODEV;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+                       && defined(CONFIG_REGULATOR)
+       kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
+       if (IS_ERR_OR_NULL(kbdev->regulator)) {
+               err = PTR_ERR(kbdev->regulator);
+               kbdev->regulator = NULL;
+               if (err == -EPROBE_DEFER) {
+                       dev_err(&pdev->dev, "Failed to get regulator\n");
+                       return err;
+               }
+               dev_info(kbdev->dev,
+                       "Continuing without Mali regulator control\n");
+               /* Allow probe to continue without regulator */
+       }
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+       kbdev->clock = clk_get(kbdev->dev, "clk_mali");
+       if (IS_ERR_OR_NULL(kbdev->clock)) {
+               err = PTR_ERR(kbdev->clock);
+               kbdev->clock = NULL;
+               if (err == -EPROBE_DEFER) {
+                       dev_err(&pdev->dev, "Failed to get clock\n");
+                       goto fail;
+               }
+               dev_info(kbdev->dev, "Continuing without Mali clock control\n");
+               /* Allow probe to continue without clock. */
+       } else {
+               err = clk_prepare_enable(kbdev->clock);
+               if (err) {
+                       dev_err(kbdev->dev,
+                               "Failed to prepare and enable clock (%d)\n",
+                               err);
+                       goto fail;
+               }
+       }
+
+#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP)
+       /* Register the OPPs if they are available in device tree */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+       err = dev_pm_opp_of_add_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+       err = of_init_opp_table(kbdev->dev);
+#else
+       err = 0;
+#endif /* LINUX_VERSION_CODE */
+       if (err)
+               dev_dbg(kbdev->dev, "OPP table not found\n");
+#endif /* CONFIG_OF && CONFIG_PM_OPP */
+
+       return 0;
+
+fail:
+
+if (kbdev->clock != NULL) {
+       clk_put(kbdev->clock);
+       kbdev->clock = NULL;
+}
+
+#ifdef CONFIG_REGULATOR
+       if (NULL != kbdev->regulator) {
+               regulator_put(kbdev->regulator);
+               kbdev->regulator = NULL;
+       }
+#endif
+
+       return err;
+}
+
+static void power_control_term(struct kbase_device *kbdev)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+       dev_pm_opp_of_remove_table(kbdev->dev);
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+       of_free_opp_table(kbdev->dev);
+#endif
+
+       if (kbdev->clock) {
+               clk_disable_unprepare(kbdev->clock);
+               clk_put(kbdev->clock);
+               kbdev->clock = NULL;
+       }
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+                       && defined(CONFIG_REGULATOR)
+       if (kbdev->regulator) {
+               regulator_put(kbdev->regulator);
+               kbdev->regulator = NULL;
+       }
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+}
+
+#ifdef CONFIG_DEBUG_FS
 
 #if KBASE_GPU_RESET_EN
 #include <mali_kbase_hwaccess_jm.h>
@@ -3202,7 +3529,12 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev)
                err = -ENOMEM;
                goto out;
        }
-       kbase_debug_job_fault_dev_init(kbdev);
+
+#if !MALI_CUSTOMER_RELEASE
+       kbasep_regs_dump_debugfs_add(kbdev);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+       kbase_debug_job_fault_debugfs_init(kbdev);
        kbasep_gpu_memory_debugfs_init(kbdev);
 #if KBASE_GPU_RESET_EN
        debugfs_create_file("quirks_sc", 0644,
@@ -3261,21 +3593,19 @@ static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
 
 static void kbase_device_coherency_init(struct kbase_device *kbdev, u32 gpu_id)
 {
-       u32 selected_coherency = COHERENCY_NONE;
-       /* COHERENCY_NONE is always supported */
-       u32 supported_coherency_bitmap = COHERENCY_FEATURE_BIT(COHERENCY_NONE);
-
 #ifdef CONFIG_OF
+       u32 supported_coherency_bitmap =
+               kbdev->gpu_props.props.raw_props.coherency_mode;
        const void *coherency_override_dts;
        u32 override_coherency;
 #endif /* CONFIG_OF */
 
-       kbdev->system_coherency = selected_coherency;
+       kbdev->system_coherency = COHERENCY_NONE;
 
        /* device tree may override the coherency */
 #ifdef CONFIG_OF
        coherency_override_dts = of_get_property(kbdev->dev->of_node,
-                                               "override-coherency",
+                                               "system-coherency",
                                                NULL);
        if (coherency_override_dts) {
 
@@ -3288,17 +3618,17 @@ static void kbase_device_coherency_init(struct kbase_device *kbdev, u32 gpu_id)
                        kbdev->system_coherency = override_coherency;
 
                        dev_info(kbdev->dev,
-                               "Using coherency override, mode %u set from dtb",
+                               "Using coherency mode %u set from dtb",
                                override_coherency);
                } else
                        dev_warn(kbdev->dev,
-                               "Ignoring invalid coherency override, mode %u set from dtb",
+                               "Ignoring unsupported coherency mode %u set from dtb",
                                override_coherency);
        }
 
 #endif /* CONFIG_OF */
 
-       kbdev->gpu_props.props.raw_props.coherency_features =
+       kbdev->gpu_props.props.raw_props.coherency_mode =
                kbdev->system_coherency;
 }
 
@@ -3319,515 +3649,406 @@ static void kbase_logging_started_cb(void *data)
 #endif
 
 
-static int kbase_common_device_init(struct kbase_device *kbdev)
-{
-       int err;
-       struct mali_base_gpu_core_props *core_props;
-       enum {
-               inited_mem = (1u << 0),
-               inited_js = (1u << 1),
-               inited_pm_runtime_init = (1u << 6),
-#ifdef CONFIG_MALI_DEVFREQ
-               inited_devfreq = (1u << 9),
-#endif /* CONFIG_MALI_DEVFREQ */
-#ifdef CONFIG_MALI_MIPE_ENABLED
-               inited_tlstream = (1u << 10),
-#endif /* CONFIG_MALI_MIPE_ENABLED */
-               inited_backend_early = (1u << 11),
-               inited_backend_late = (1u << 12),
-               inited_device = (1u << 13),
-               inited_vinstr = (1u << 19),
-               inited_ipa = (1u << 20)
-       };
-
-       int inited = 0;
-       u32 gpu_id;
-#if defined(CONFIG_MALI_PLATFORM_VEXPRESS)
-       u32 ve_logic_tile = 0;
-#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */
-
-       dev_set_drvdata(kbdev->dev, kbdev);
-
-       err = kbase_backend_early_init(kbdev);
-       if (err)
-               goto out_partial;
-       inited |= inited_backend_early;
+static struct attribute *kbase_attrs[] = {
+#ifdef CONFIG_MALI_DEBUG
+       &dev_attr_debug_command.attr,
+       &dev_attr_js_softstop_always.attr,
+#endif
+#if !MALI_CUSTOMER_RELEASE
+       &dev_attr_force_replay.attr,
+#endif
+       &dev_attr_js_timeouts.attr,
+       &dev_attr_soft_event_timeout.attr,
+       &dev_attr_gpuinfo.attr,
+       &dev_attr_dvfs_period.attr,
+       &dev_attr_pm_poweroff.attr,
+       &dev_attr_reset_timeout.attr,
+       &dev_attr_js_scheduling_period.attr,
+       &dev_attr_power_policy.attr,
+       &dev_attr_core_availability_policy.attr,
+       &dev_attr_core_mask.attr,
+       &dev_attr_mem_pool_size.attr,
+       &dev_attr_mem_pool_max_size.attr,
+       NULL
+};
 
-       scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
-                       kbase_dev_nr++);
+static const struct attribute_group kbase_attr_group = {
+       .attrs = kbase_attrs,
+};
 
-       kbase_disjoint_init(kbdev);
+static int kbase_platform_device_remove(struct platform_device *pdev)
+{
+       struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+       const struct list_head *dev_list;
 
-       /* obtain min/max configured gpu frequencies */
-       core_props = &(kbdev->gpu_props.props.core_props);
+       if (!kbdev)
+               return -ENODEV;
 
-       /* For versatile express platforms, min and max values of GPU frequency
-        * depend on the type of the logic tile; these values may not be known
-        * at the build time so in some cases a platform config file with wrong
-        * GPU freguency values may be included; to ensure the correct value of
-        * min and max GPU frequency is obtained, the type of the logic tile is
-        * read from the corresponding register on the platform and frequency
-        * values assigned accordingly.*/
-#if defined(CONFIG_MALI_PLATFORM_VEXPRESS)
-       ve_logic_tile = kbase_get_platform_logic_tile_type();
-
-       switch (ve_logic_tile) {
-       case 0x217:
-               /* Virtex 6, HBI0217 */
-               core_props->gpu_freq_khz_min = VE_VIRTEX6_GPU_FREQ_MIN;
-               core_props->gpu_freq_khz_max = VE_VIRTEX6_GPU_FREQ_MAX;
-               break;
-       case 0x247:
-               /* Virtex 7, HBI0247 */
-               core_props->gpu_freq_khz_min = VE_VIRTEX7_GPU_FREQ_MIN;
-               core_props->gpu_freq_khz_max = VE_VIRTEX7_GPU_FREQ_MAX;
-               break;
-       default:
-               /* all other logic tiles, i.e., Virtex 5 HBI0192
-                * or unsuccessful reading from the platform -
-                * fall back to the config_platform default */
-               core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
-               core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
-               break;
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+       if (kbdev->inited_subsys & inited_buslogger) {
+               bl_core_client_unregister(kbdev->buslogger);
+               kbdev->inited_subsys &= ~inited_buslogger;
        }
-#else
-               core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
-               core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
-#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */
-
-       kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US;
+#endif
 
-       err = kbase_device_init(kbdev);
-       if (err) {
-               dev_err(kbdev->dev, "Can't initialize device (%d)\n", err);
-               goto out_partial;
+       if (kbdev->inited_subsys & inited_sysfs_group) {
+               sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
+               kbdev->inited_subsys &= ~inited_sysfs_group;
        }
 
-       inited |= inited_device;
-
-       kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
-       if (!kbdev->vinstr_ctx) {
-               dev_err(kbdev->dev, "Can't initialize virtual instrumentation core\n");
-               goto out_partial;
+       if (kbdev->inited_subsys & inited_dev_list) {
+               dev_list = kbase_dev_list_get();
+               list_del(&kbdev->entry);
+               kbase_dev_list_put(dev_list);
+               kbdev->inited_subsys &= ~inited_dev_list;
        }
 
-       inited |= inited_vinstr;
-
-       kbdev->ipa_ctx = kbase_ipa_init(kbdev);
-       if (!kbdev->ipa_ctx) {
-               dev_err(kbdev->dev, "Can't initialize IPA\n");
-               goto out_partial;
+       if (kbdev->inited_subsys & inited_misc_register) {
+               misc_deregister(&kbdev->mdev);
+               kbdev->inited_subsys &= ~inited_misc_register;
        }
 
-       inited |= inited_ipa;
-
-       if (kbdev->pm.callback_power_runtime_init) {
-               err = kbdev->pm.callback_power_runtime_init(kbdev);
-               if (err)
-                       goto out_partial;
-
-               inited |= inited_pm_runtime_init;
+       if (kbdev->inited_subsys & inited_get_device) {
+               put_device(kbdev->dev);
+               kbdev->inited_subsys &= ~inited_get_device;
        }
 
-       err = kbase_mem_init(kbdev);
-       if (err)
-               goto out_partial;
-
-       inited |= inited_mem;
-
-       gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
-       gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
-       gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
-
-       kbase_device_coherency_init(kbdev, gpu_id);
-
-       err = kbasep_secure_mode_init(kbdev);
-       if (err)
-               goto out_partial;
-
-       err = kbasep_js_devdata_init(kbdev);
-       if (err)
-               goto out_partial;
-
-       inited |= inited_js;
-
-#ifdef CONFIG_MALI_MIPE_ENABLED
-       err = kbase_tlstream_init();
-       if (err) {
-               dev_err(kbdev->dev, "Couldn't initialize timeline stream\n");
-               goto out_partial;
+       if (kbdev->inited_subsys & inited_debugfs) {
+               kbase_device_debugfs_term(kbdev);
+               kbdev->inited_subsys &= ~inited_debugfs;
        }
-       inited |= inited_tlstream;
-#endif /* CONFIG_MALI_MIPE_ENABLED */
 
-       err = kbase_backend_late_init(kbdev);
-       if (err)
-               goto out_partial;
-       inited |= inited_backend_late;
-
-#ifdef CONFIG_MALI_DEVFREQ
-       err = kbase_devfreq_init(kbdev);
-       if (err) {
-               dev_err(kbdev->dev, "Couldn't initialize devfreq\n");
-               goto out_partial;
+       if (kbdev->inited_subsys & inited_job_fault) {
+               kbase_debug_job_fault_dev_term(kbdev);
+               kbdev->inited_subsys &= ~inited_job_fault;
        }
-       inited |= inited_devfreq;
-#endif /* CONFIG_MALI_DEVFREQ */
 
-       err = kbase_device_debugfs_init(kbdev);
-       if (err)
-               goto out_partial;
-
-       /* intialise the kctx list */
-       mutex_init(&kbdev->kctx_list_lock);
-       INIT_LIST_HEAD(&kbdev->kctx_list);
-
-       kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
-       kbdev->mdev.name = kbdev->devname;
-       kbdev->mdev.fops = &kbase_fops;
-       kbdev->mdev.parent = get_device(kbdev->dev);
-
-       err = misc_register(&kbdev->mdev);
-       if (err) {
-               dev_err(kbdev->dev, "Couldn't register misc dev %s\n", kbdev->devname);
-               goto out_misc;
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+       if (kbdev->inited_subsys & inited_ipa) {
+               kbase_ipa_term(kbdev->ipa_ctx);
+               kbdev->inited_subsys &= ~inited_ipa;
        }
+#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
 
-       {
-               const struct list_head *dev_list = kbase_dev_list_get();
-
-               list_add(&kbdev->entry, &kbase_dev_list);
-               kbase_dev_list_put(dev_list);
+       if (kbdev->inited_subsys & inited_vinstr) {
+               kbase_vinstr_term(kbdev->vinstr_ctx);
+               kbdev->inited_subsys &= ~inited_vinstr;
        }
 
-       dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device));
-
-       return 0;
-
-out_misc:
-       put_device(kbdev->dev);
-       kbase_device_debugfs_term(kbdev);
-out_partial:
-       if (inited & inited_ipa)
-               kbase_ipa_term(kbdev->ipa_ctx);
-       if (inited & inited_vinstr)
-               kbase_vinstr_term(kbdev->vinstr_ctx);
 #ifdef CONFIG_MALI_DEVFREQ
-       if (inited & inited_devfreq)
+       if (kbdev->inited_subsys & inited_devfreq) {
                kbase_devfreq_term(kbdev);
-#endif /* CONFIG_MALI_DEVFREQ */
-       if (inited & inited_backend_late)
+               kbdev->inited_subsys &= ~inited_devfreq;
+       }
+#endif
+
+       if (kbdev->inited_subsys & inited_backend_late) {
                kbase_backend_late_term(kbdev);
-#ifdef CONFIG_MALI_MIPE_ENABLED
-       if (inited & inited_tlstream)
+               kbdev->inited_subsys &= ~inited_backend_late;
+       }
+
+       if (kbdev->inited_subsys & inited_tlstream) {
                kbase_tlstream_term();
-#endif /* CONFIG_MALI_MIPE_ENABLED */
+               kbdev->inited_subsys &= ~inited_tlstream;
+       }
+
+       /* Bring job and mem sys to a halt before we continue termination */
 
-       if (inited & inited_js)
+       if (kbdev->inited_subsys & inited_js)
                kbasep_js_devdata_halt(kbdev);
 
-       if (inited & inited_mem)
+       if (kbdev->inited_subsys & inited_mem)
                kbase_mem_halt(kbdev);
 
-       if (inited & inited_js)
+       if (kbdev->inited_subsys & inited_js) {
                kbasep_js_devdata_term(kbdev);
+               kbdev->inited_subsys &= ~inited_js;
+       }
 
-       if (inited & inited_mem)
+       if (kbdev->inited_subsys & inited_mem) {
                kbase_mem_term(kbdev);
+               kbdev->inited_subsys &= ~inited_mem;
+       }
 
-       if (inited & inited_pm_runtime_init) {
-               if (kbdev->pm.callback_power_runtime_term)
-                       kbdev->pm.callback_power_runtime_term(kbdev);
+       if (kbdev->inited_subsys & inited_pm_runtime_init) {
+               kbdev->pm.callback_power_runtime_term(kbdev);
+               kbdev->inited_subsys &= ~inited_pm_runtime_init;
        }
 
-       if (inited & inited_device)
+       if (kbdev->inited_subsys & inited_device) {
                kbase_device_term(kbdev);
+               kbdev->inited_subsys &= ~inited_device;
+       }
 
-       if (inited & inited_backend_early)
+       if (kbdev->inited_subsys & inited_backend_early) {
                kbase_backend_early_term(kbdev);
+               kbdev->inited_subsys &= ~inited_backend_early;
+       }
 
-       return err;
-}
 
+       if (kbdev->inited_subsys & inited_power_control) {
+               power_control_term(kbdev);
+               kbdev->inited_subsys &= ~inited_power_control;
+       }
 
-static struct attribute *kbase_attrs[] = {
-#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
-       &dev_attr_sc_split.attr,
-#endif
-#ifdef CONFIG_MALI_DEBUG
-       &dev_attr_debug_command.attr,
-       &dev_attr_js_softstop_always.attr,
-#endif
-#if !MALI_CUSTOMER_RELEASE
-       &dev_attr_force_replay.attr,
-#endif
-       &dev_attr_js_timeouts.attr,
-       &dev_attr_gpuinfo.attr,
-       &dev_attr_dvfs_period.attr,
-       &dev_attr_pm_poweroff.attr,
-       &dev_attr_reset_timeout.attr,
-       &dev_attr_js_scheduling_period.attr,
-       &dev_attr_power_policy.attr,
-       &dev_attr_core_availability_policy.attr,
-       &dev_attr_core_mask.attr,
-       &dev_attr_mem_pool_size.attr,
-       &dev_attr_mem_pool_max_size.attr,
-       NULL
-};
+       if (kbdev->inited_subsys & inited_registers_map) {
+               registers_unmap(kbdev);
+               kbdev->inited_subsys &= ~inited_registers_map;
+       }
 
-static const struct attribute_group kbase_attr_group = {
-       .attrs = kbase_attrs,
-};
+#ifdef CONFIG_MALI_NO_MALI
+       if (kbdev->inited_subsys & inited_gpu_device) {
+               gpu_device_destroy(kbdev);
+               kbdev->inited_subsys &= ~inited_gpu_device;
+       }
+#endif /* CONFIG_MALI_NO_MALI */
+
+       if (kbdev->inited_subsys != 0)
+               dev_err(kbdev->dev, "Missing sub system termination\n");
 
-static int kbase_common_device_remove(struct kbase_device *kbdev);
+       kbase_device_free(kbdev);
+
+       return 0;
+}
 
 static int kbase_platform_device_probe(struct platform_device *pdev)
 {
        struct kbase_device *kbdev;
-       struct resource *reg_res;
+       struct mali_base_gpu_core_props *core_props;
+       u32 gpu_id;
+       const struct list_head *dev_list;
        int err = 0;
-       int i;
 
 #ifdef CONFIG_OF
        err = kbase_platform_early_init();
        if (err) {
                dev_err(&pdev->dev, "Early platform initialization failed\n");
+               kbase_platform_device_remove(pdev);
                return err;
        }
 #endif
 
        kbdev = kbase_device_alloc();
        if (!kbdev) {
-               dev_err(&pdev->dev, "Can't allocate device\n");
-               err = -ENOMEM;
-               goto out;
+               dev_err(&pdev->dev, "Allocate device failed\n");
+               kbase_platform_device_remove(pdev);
+               return -ENOMEM;
        }
+
+       kbdev->dev = &pdev->dev;
+       dev_set_drvdata(kbdev->dev, kbdev);
+
 #ifdef CONFIG_MALI_NO_MALI
        err = gpu_device_create(kbdev);
        if (err) {
-               dev_err(&pdev->dev, "Can't initialize dummy model\n");
-               goto out_midg;
+               dev_err(&pdev->dev, "Dummy model initialization failed\n");
+               kbase_platform_device_remove(pdev);
+               return err;
        }
+       kbdev->inited_subsys |= inited_gpu_device;
 #endif /* CONFIG_MALI_NO_MALI */
 
-       kbdev->dev = &pdev->dev;
-       /* 3 IRQ resources */
-       for (i = 0; i < 3; i++) {
-               struct resource *irq_res;
-               int irqtag;
+       err = assign_irqs(pdev);
+       if (err) {
+               dev_err(&pdev->dev, "IRQ search failed\n");
+               kbase_platform_device_remove(pdev);
+               return err;
+       }
 
-               irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
-               if (!irq_res) {
-                       dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
-                       err = -ENOENT;
-                       goto out_platform_irq;
-               }
+       err = registers_map(kbdev);
+       if (err) {
+               dev_err(&pdev->dev, "Register map failed\n");
+               kbase_platform_device_remove(pdev);
+               return err;
+       }
+       kbdev->inited_subsys |= inited_registers_map;
 
-#ifdef CONFIG_OF
-               if (!strcmp(irq_res->name, "JOB")) {
-                       irqtag = JOB_IRQ_TAG;
-               } else if (!strcmp(irq_res->name, "MMU")) {
-                       irqtag = MMU_IRQ_TAG;
-               } else if (!strcmp(irq_res->name, "GPU")) {
-                       irqtag = GPU_IRQ_TAG;
-               } else {
-                       dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
-                               irq_res->name);
-                       err = -EINVAL;
-                       goto out_irq_name;
-               }
-#else
-               irqtag = i;
-#endif /* CONFIG_OF */
-               kbdev->irqs[irqtag].irq = irq_res->start;
-               kbdev->irqs[irqtag].flags = (irq_res->flags & IRQF_TRIGGER_MASK);
+       err = power_control_init(pdev);
+       if (err) {
+               dev_err(&pdev->dev, "Power control initialization failed\n");
+               kbase_platform_device_remove(pdev);
+               return err;
        }
-               /* the first memory resource is the physical address of the GPU
-                * registers */
-               reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-               if (!reg_res) {
-                       dev_err(kbdev->dev, "Invalid register resource\n");
-                       err = -ENOENT;
-                       goto out_platform_mem;
-               }
+       kbdev->inited_subsys |= inited_power_control;
 
-               kbdev->reg_start = reg_res->start;
-               kbdev->reg_size = resource_size(reg_res);
 
-               err = kbase_common_reg_map(kbdev);
-               if (err)
-                       goto out_reg_map;
+       err = kbase_backend_early_init(kbdev);
+       if (err) {
+               dev_err(kbdev->dev, "Early backend initialization failed\n");
+               kbase_platform_device_remove(pdev);
+               return err;
+       }
+       kbdev->inited_subsys |= inited_backend_early;
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
-                       && defined(CONFIG_REGULATOR)
-       kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
-       if (IS_ERR_OR_NULL(kbdev->regulator)) {
-               dev_info(kbdev->dev, "Continuing without Mali regulator control\n");
-               kbdev->regulator = NULL;
-               /* Allow probe to continue without regulator */
+       scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
+                       kbase_dev_nr);
+
+       kbase_disjoint_init(kbdev);
+
+       /* obtain min/max configured gpu frequencies */
+       core_props = &(kbdev->gpu_props.props.core_props);
+       core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
+       core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+
+       kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US;
+
+       err = kbase_device_init(kbdev);
+       if (err) {
+               dev_err(kbdev->dev, "Device initialization failed (%d)\n", err);
+               kbase_platform_device_remove(pdev);
+               return err;
        }
-#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+       kbdev->inited_subsys |= inited_device;
 
-#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
-       pm_runtime_enable(kbdev->dev);
-#endif
-       kbdev->clock = clk_get(kbdev->dev, "clk_mali");
-       if (IS_ERR_OR_NULL(kbdev->clock)) {
-               dev_info(kbdev->dev, "Continuing without Mali clock control\n");
-               kbdev->clock = NULL;
-               /* Allow probe to continue without clock. */
-       } else {
-               err = clk_prepare_enable(kbdev->clock);
+       if (kbdev->pm.callback_power_runtime_init) {
+               err = kbdev->pm.callback_power_runtime_init(kbdev);
                if (err) {
                        dev_err(kbdev->dev,
-                               "Failed to prepare and enable clock (%d)\n", err);
-                       goto out_clock_prepare;
+                               "Runtime PM initialization failed\n");
+                       kbase_platform_device_remove(pdev);
+                       return err;
                }
+               kbdev->inited_subsys |= inited_pm_runtime_init;
        }
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) && defined(CONFIG_OF) \
-                       && defined(CONFIG_PM_OPP)
-       /* Register the OPPs if they are available in device tree */
-       if (of_init_opp_table(kbdev->dev) < 0)
-               dev_dbg(kbdev->dev, "OPP table not found\n");
-#endif
+       err = kbase_mem_init(kbdev);
+       if (err) {
+               dev_err(kbdev->dev, "Memory subsystem initialization failed\n");
+               kbase_platform_device_remove(pdev);
+               return err;
+       }
+       kbdev->inited_subsys |= inited_mem;
+
+       gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+       gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+       gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+       kbase_device_coherency_init(kbdev, gpu_id);
 
+       kbasep_secure_mode_init(kbdev);
 
-       err = kbase_common_device_init(kbdev);
+       err = kbasep_js_devdata_init(kbdev);
        if (err) {
-               dev_err(kbdev->dev, "Failed kbase_common_device_init\n");
-               goto out_common_init;
+               dev_err(kbdev->dev, "Job JS devdata initialization failed\n");
+               kbase_platform_device_remove(pdev);
+               return err;
        }
+       kbdev->inited_subsys |= inited_js;
 
-       err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+       err = kbase_tlstream_init();
        if (err) {
-               dev_err(&pdev->dev, "Failed to create sysfs entries\n");
-               goto out_sysfs;
+               dev_err(kbdev->dev, "Timeline stream initialization failed\n");
+               kbase_platform_device_remove(pdev);
+               return err;
        }
+       kbdev->inited_subsys |= inited_tlstream;
 
-#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
-       err = bl_core_client_register(kbdev->devname,
-                                               kbase_logging_started_cb,
-                                               kbdev, &kbdev->buslogger,
-                                               THIS_MODULE, NULL);
+       err = kbase_backend_late_init(kbdev);
        if (err) {
-               dev_err(kbdev->dev, "Couldn't register bus log client\n");
-               goto out_bl_core_register;
+               dev_err(kbdev->dev, "Late backend initialization failed\n");
+               kbase_platform_device_remove(pdev);
+               return err;
        }
+       kbdev->inited_subsys |= inited_backend_late;
 
-       bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
-#endif
-       return 0;
-
-#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
-out_bl_core_register:
-       sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
-#endif
-
-out_sysfs:
-       kbase_common_device_remove(kbdev);
-out_common_init:
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
-       of_free_opp_table(kbdev->dev);
-#endif
-       clk_disable_unprepare(kbdev->clock);
-out_clock_prepare:
-       clk_put(kbdev->clock);
-#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
-       pm_runtime_disable(kbdev->dev);
-#endif
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
-                       && defined(CONFIG_REGULATOR)
-       regulator_put(kbdev->regulator);
-#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
-               kbase_common_reg_unmap(kbdev);
-out_reg_map:
-out_platform_mem:
-#ifdef CONFIG_OF
-out_irq_name:
-#endif
-out_platform_irq:
-#ifdef CONFIG_MALI_NO_MALI
-       gpu_device_destroy(kbdev);
-out_midg:
-#endif /* CONFIG_MALI_NO_MALI */
-       kbase_device_free(kbdev);
-out:
-       return err;
-}
+#ifdef CONFIG_MALI_DEVFREQ
+       err = kbase_devfreq_init(kbdev);
+       if (err) {
+               dev_err(kbdev->dev, "Fevfreq initialization failed\n");
+               kbase_platform_device_remove(pdev);
+               return err;
+       }
+       kbdev->inited_subsys |= inited_devfreq;
+#endif /* CONFIG_MALI_DEVFREQ */
 
-static int kbase_common_device_remove(struct kbase_device *kbdev)
-{
-       kbase_ipa_term(kbdev->ipa_ctx);
-       kbase_vinstr_term(kbdev->vinstr_ctx);
-       sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
+       kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
+       if (!kbdev->vinstr_ctx) {
+               dev_err(kbdev->dev,
+                       "Virtual instrumentation initialization failed\n");
+               kbase_platform_device_remove(pdev);
+               return -EINVAL;
+       }
+       kbdev->inited_subsys |= inited_vinstr;
 
-#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
-       if (kbdev->buslogger)
-               bl_core_client_unregister(kbdev->buslogger);
-#endif
+#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY
+       kbdev->ipa_ctx = kbase_ipa_init(kbdev);
+       if (!kbdev->ipa_ctx) {
+               dev_err(kbdev->dev, "IPA initialization failed\n");
+               kbase_platform_device_remove(pdev);
+               return -EINVAL;
+       }
 
-#ifdef CONFIG_DEBUG_FS
-       debugfs_remove_recursive(kbdev->mali_debugfs_directory);
-#endif
-#ifdef CONFIG_MALI_DEVFREQ
-       kbase_devfreq_term(kbdev);
-#endif
+       kbdev->inited_subsys |= inited_ipa;
+#endif  /* CONFIG_MALI_PRFCNT_SET_SECONDARY */
 
-       kbase_backend_late_term(kbdev);
+       err = kbase_debug_job_fault_dev_init(kbdev);
+       if (err) {
+               dev_err(kbdev->dev, "Job fault debug initialization failed\n");
+               kbase_platform_device_remove(pdev);
+               return err;
+       }
+       kbdev->inited_subsys |= inited_job_fault;
 
-       if (kbdev->pm.callback_power_runtime_term)
-               kbdev->pm.callback_power_runtime_term(kbdev);
-#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
-       pm_runtime_disable(kbdev->dev);
-#endif
+       err = kbase_device_debugfs_init(kbdev);
+       if (err) {
+               dev_err(kbdev->dev, "DebugFS initialization failed");
+               kbase_platform_device_remove(pdev);
+               return err;
+       }
+       kbdev->inited_subsys |= inited_debugfs;
 
-#ifdef CONFIG_MALI_MIPE_ENABLED
-       kbase_tlstream_term();
-#endif /* CONFIG_MALI_MIPE_ENABLED */
+       /* initialize the kctx list */
+       mutex_init(&kbdev->kctx_list_lock);
+       INIT_LIST_HEAD(&kbdev->kctx_list);
 
-       kbasep_js_devdata_halt(kbdev);
-       kbase_mem_halt(kbdev);
+       kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+       kbdev->mdev.name = kbdev->devname;
+       kbdev->mdev.fops = &kbase_fops;
+       kbdev->mdev.parent = get_device(kbdev->dev);
+       kbdev->inited_subsys |= inited_get_device;
 
-       kbasep_js_devdata_term(kbdev);
-       kbase_mem_term(kbdev);
-       kbase_backend_early_term(kbdev);
+       err = misc_register(&kbdev->mdev);
+       if (err) {
+               dev_err(kbdev->dev, "Misc device registration failed for %s\n",
+                       kbdev->devname);
+               kbase_platform_device_remove(pdev);
+               return err;
+       }
+       kbdev->inited_subsys |= inited_misc_register;
 
-       {
-               const struct list_head *dev_list = kbase_dev_list_get();
+       dev_list = kbase_dev_list_get();
+       list_add(&kbdev->entry, &kbase_dev_list);
+       kbase_dev_list_put(dev_list);
+       kbdev->inited_subsys |= inited_dev_list;
 
-               list_del(&kbdev->entry);
-               kbase_dev_list_put(dev_list);
-       }
-       misc_deregister(&kbdev->mdev);
-       put_device(kbdev->dev);
-               kbase_common_reg_unmap(kbdev);
-       kbase_device_term(kbdev);
-       if (kbdev->clock) {
-               clk_disable_unprepare(kbdev->clock);
-               clk_put(kbdev->clock);
-               kbdev->clock = NULL;
+       err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+       if (err) {
+               dev_err(&pdev->dev, "SysFS group creation failed\n");
+               kbase_platform_device_remove(pdev);
+               return err;
        }
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
-                       && defined(CONFIG_REGULATOR)
-       regulator_put(kbdev->regulator);
-#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
-#ifdef CONFIG_MALI_NO_MALI
-       gpu_device_destroy(kbdev);
-#endif /* CONFIG_MALI_NO_MALI */
-       kbase_device_free(kbdev);
+       kbdev->inited_subsys |= inited_sysfs_group;
 
-       return 0;
-}
+#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+       err = bl_core_client_register(kbdev->devname,
+                                               kbase_logging_started_cb,
+                                               kbdev, &kbdev->buslogger,
+                                               THIS_MODULE, NULL);
+       if (err == 0) {
+               kbdev->inited_subsys |= inited_buslogger;
+               bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024);
+       } else {
+               dev_warn(kbdev->dev, "Bus log client registration failed\n");
+               err = 0;
+       }
+#endif
 
-static int kbase_platform_device_remove(struct platform_device *pdev)
-{
-       struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+       dev_info(kbdev->dev,
+                       "Probed as %s\n", dev_name(kbdev->mdev.this_device));
 
-       if (!kbdev)
-               return -ENODEV;
+       kbase_dev_nr++;
 
-       return kbase_common_device_remove(kbdev);
+       return err;
 }
 
 /** Suspend callback from the OS.
@@ -3927,7 +4148,7 @@ static int kbase_device_runtime_suspend(struct device *dev)
  */
 
 #ifdef KBASE_PM_RUNTIME
-int kbase_device_runtime_resume(struct device *dev)
+static int kbase_device_runtime_resume(struct device *dev)
 {
        int ret = 0;
        struct kbase_device *kbdev = to_kbase_device(dev);
@@ -3952,21 +4173,30 @@ int kbase_device_runtime_resume(struct device *dev)
 }
 #endif /* KBASE_PM_RUNTIME */
 
-/** Runtime idle callback from the OS.
- *
- * This is called by Linux when the device appears to be inactive and it might be
- * placed into a low power state
+
+#ifdef KBASE_PM_RUNTIME
+/**
+ * kbase_device_runtime_idle - Runtime idle callback from the OS.
+ * @dev: The device to suspend
  *
- * @param dev  The device to suspend
+ * This is called by Linux when the device appears to be inactive and it might
+ * be placed into a low power state.
  *
- * @return A standard Linux error code
+ * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend,
+ * otherwise a standard Linux error code
  */
-
-#ifdef KBASE_PM_RUNTIME
 static int kbase_device_runtime_idle(struct device *dev)
 {
-       /* Avoid pm_runtime_suspend being called */
-       return 1;
+       struct kbase_device *kbdev = to_kbase_device(dev);
+
+       if (!kbdev)
+               return -ENODEV;
+
+       /* Use platform specific implementation if it exists. */
+       if (kbdev->pm.backend.callback_power_runtime_idle)
+               return kbdev->pm.backend.callback_power_runtime_idle(kbdev);
+
+       return 0;
 }
 #endif /* KBASE_PM_RUNTIME */
 #ifndef CONFIG_MALI_DEVFREQ
index 41ce05130d8f9636d54d9044abdc2919db8c42d5..f3e426f9539b8ac5670a1646dcff8ef608fd3ee7 100755 (executable)
@@ -386,7 +386,18 @@ static const struct file_operations kbasep_debug_job_fault_fops = {
        .release = debug_job_fault_release,
 };
 
-static int kbase_job_fault_event_init(struct kbase_device *kbdev)
+/*
+ *  Initialize debugfs entry for job fault dump
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
+{
+       debugfs_create_file("job_fault", S_IRUGO,
+                       kbdev->mali_debugfs_directory, kbdev,
+                       &kbasep_debug_job_fault_fops);
+}
+
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
 {
 
        INIT_LIST_HEAD(&kbdev->job_fault_event_list);
@@ -396,24 +407,23 @@ static int kbase_job_fault_event_init(struct kbase_device *kbdev)
 
        kbdev->job_fault_resume_workq = alloc_workqueue(
                        "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1);
+       if (!kbdev->job_fault_resume_workq)
+               return -ENOMEM;
+
+       kbdev->job_fault_debug = false;
 
        return 0;
 }
 
 /*
- *  Initialize debugfs entry for job fault dump
+ * Release the relevant resource per device
  */
-void kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
 {
-       debugfs_create_file("job_fault", S_IRUGO,
-                       kbdev->mali_debugfs_directory, kbdev,
-                       &kbasep_debug_job_fault_fops);
-
-       kbase_job_fault_event_init(kbdev);
-       kbdev->job_fault_debug = false;
-
+       destroy_workqueue(kbdev->job_fault_resume_workq);
 }
 
+
 /*
  *  Initialize the relevant data structure per context
  */
@@ -423,12 +433,12 @@ void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
        /* We need allocate double size register range
         * Because this memory will keep the register address and value
         */
-       kctx->reg_dump = kmalloc(0x4000 * 2, GFP_KERNEL);
+       kctx->reg_dump = vmalloc(0x4000 * 2);
        if (kctx->reg_dump == NULL)
                return;
 
        if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) {
-               kfree(kctx->reg_dump);
+               vfree(kctx->reg_dump);
                kctx->reg_dump = NULL;
        }
        INIT_LIST_HEAD(&kctx->job_fault_resume_event_list);
@@ -439,9 +449,22 @@ void kbase_debug_job_fault_context_init(struct kbase_context *kctx)
 /*
  *  release the relevant resource per context
  */
-void kbase_debug_job_fault_context_exit(struct kbase_context *kctx)
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx)
+{
+       vfree(kctx->reg_dump);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
+{
+       kbdev->job_fault_debug = false;
+
+       return 0;
+}
+
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev)
 {
-       kfree(kctx->reg_dump);
 }
 
-#endif
+#endif /* CONFIG_DEBUG_FS */
index 3734046f3fd9f4f5edab3bf9678662aa670ff088..0930f905e4efcaeb37a12f16e1a7bbdc2b8b335a 100755 (executable)
 #define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF
 
 /**
- * kbase_debug_job_fault_dev_init - Initialize job fault debug sysfs
- *             and create the fault event wait queue per device
+ * kbase_debug_job_fault_dev_init - Create the fault event wait queue
+ *             per device and initialize the required lists.
+ * @kbdev:     Device pointer
+ *
+ * Return: Zero on success or a negative error code.
+ */
+int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs
+ * @kbdev:     Device pointer
+ */
+void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_debug_job_fault_dev_term - Clean up resources created in
+ *             kbase_debug_job_fault_dev_init.
  * @kbdev:     Device pointer
  */
-void kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
+void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
 
 /**
  * kbase_debug_job_fault_context_init - Initialize the relevant
@@ -39,11 +54,11 @@ void kbase_debug_job_fault_dev_init(struct kbase_device *kbdev);
 void kbase_debug_job_fault_context_init(struct kbase_context *kctx);
 
 /**
- * kbase_debug_job_fault_context_exit - Release the relevant
+ * kbase_debug_job_fault_context_term - Release the relevant
  *             resource per context
  * @kctx: KBase context pointer
  */
-void kbase_debug_job_fault_context_exit(struct kbase_context *kctx);
+void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
 
 /**
  * kbase_debug_job_fault_process - Process the failed job.
index 1a3198e5b535bbbaac632de503559802ccac867c..42d1d832c0a393c6f06b4a2f12e906248429829a 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,7 +25,7 @@
 #include <linux/list.h>
 #include <linux/file.h>
 
-#if CONFIG_DEBUG_FS
+#ifdef CONFIG_DEBUG_FS
 
 struct debug_mem_mapping {
        struct list_head node;
index 86fc9e40ee1da5dbc75873685579e2c0d6e32c43..c4af0c38b3e0b9b8c8a4f5dd804ec551b9b2c909 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -38,6 +38,7 @@
 #include <linux/atomic.h>
 #include <linux/mempool.h>
 #include <linux/slab.h>
+#include <linux/file.h>
 
 #ifdef CONFIG_MALI_FPGA_BUS_LOGGER
 #include <linux/bus_logger.h>
 #define MIDGARD_MMU_TOPLEVEL    1
 #endif
 
+#define MIDGARD_MMU_BOTTOMLEVEL 3
+
 #define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
 
 /** setting in kbase_context::as_nr that indicates it's invalid */
 #define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
 /* Atom requires GPU to be in secure mode */
 #define KBASE_KATOM_FLAG_SECURE (1<<11)
+/* Atom has been stored in linked list */
+#define KBASE_KATOM_FLAG_JSCTX_IN_LL (1<<12)
 
 /* SW related flags about types of JS_COMMAND action
  * NOTE: These must be masked off by JS_COMMAND_MASK */
@@ -386,11 +391,22 @@ struct kbase_jd_atom {
        /* Pointer to atom that has cross-slot dependency on this atom */
        struct kbase_jd_atom *x_post_dep;
 
+       /* The GPU's flush count recorded at the time of submission, used for
+        * the cache flush optimisation */
+       u32 flush_id;
 
        struct kbase_jd_atom_backend backend;
 #ifdef CONFIG_DEBUG_FS
        struct base_job_fault_event fault_event;
 #endif
+
+       struct list_head queue;
+
+       struct kbase_va_region *jit_addr_reg;
+
+       /* If non-zero, this indicates that the atom will fail with the set
+        * event_code when the atom is processed. */
+       enum base_jd_event_code will_fail_event_code;
 };
 
 static inline bool kbase_jd_katom_is_secure(const struct kbase_jd_atom *katom)
@@ -471,6 +487,7 @@ typedef u32 kbase_as_poke_state;
 struct kbase_mmu_setup {
        u64     transtab;
        u64     memattr;
+       u64     transcfg;
 };
 
 /**
@@ -489,6 +506,7 @@ struct kbase_as {
        enum kbase_mmu_fault_type fault_type;
        u32 fault_status;
        u64 fault_addr;
+       u64 fault_extra_addr;
        struct mutex transaction_mutex;
 
        struct kbase_mmu_setup current_setup;
@@ -664,10 +682,11 @@ struct kbase_pm_device_data {
        wait_queue_head_t zero_active_count_wait;
 
        /**
-        * A bit mask identifying the available shader cores that are specified
-        * via sysfs
+        * Bit masks identifying the available shader cores that are specified
+        * via sysfs. One mask per job slot.
         */
-       u64 debug_core_mask;
+       u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
+       u64 debug_core_mask_all;
 
        /**
         * Lock protecting the power state of the device.
@@ -784,9 +803,7 @@ struct kbase_device {
                int irq;
                int flags;
        } irqs[3];
-#ifdef CONFIG_HAVE_CLK
        struct clk *clock;
-#endif
 #ifdef CONFIG_REGULATOR
        struct regulator *regulator;
 #endif
@@ -872,7 +889,7 @@ struct kbase_device {
        s8 nr_user_address_spaces;                        /**< Number of address spaces available to user contexts */
 
        /* Structure used for instrumentation and HW counters dumping */
-       struct {
+       struct kbase_hwcnt {
                /* The lock should be used when accessing any of the following members */
                spinlock_t lock;
 
@@ -932,10 +949,6 @@ struct kbase_device {
        struct list_head        kctx_list;
        struct mutex            kctx_list_lock;
 
-#ifdef CONFIG_MALI_MIDGARD_RT_PM
-       struct delayed_work runtime_pm_workqueue;
-#endif
-
 #ifdef CONFIG_PM_DEVFREQ
        struct devfreq_dev_profile devfreq_profile;
        struct devfreq *devfreq;
@@ -952,6 +965,12 @@ struct kbase_device {
        struct kbase_trace_kbdev_timeline timeline;
 #endif
 
+       /*
+        * Control for enabling job dump on failure, set when control debugfs
+        * is opened.
+        */
+       bool job_fault_debug;
+
 #ifdef CONFIG_DEBUG_FS
        /* directory for debugfs entries */
        struct dentry *mali_debugfs_directory;
@@ -959,13 +978,19 @@ struct kbase_device {
        struct dentry *debugfs_ctx_directory;
 
        /* failed job dump, used for separate debug process */
-       bool job_fault_debug;
        wait_queue_head_t job_fault_wq;
        wait_queue_head_t job_fault_resume_wq;
        struct workqueue_struct *job_fault_resume_workq;
        struct list_head job_fault_event_list;
        struct kbase_context *kctx_fault;
 
+#if !MALI_CUSTOMER_RELEASE
+       /* Per-device data for register dumping interface */
+       struct {
+               u16 reg_offset; /* Offset of a GPU_CONTROL register to be
+                                  dumped upon request */
+       } regs_dump_debugfs_data;
+#endif /* !MALI_CUSTOMER_RELEASE */
 #endif /* CONFIG_DEBUG_FS */
 
        /* fbdump profiling controls set by gator */
@@ -1002,11 +1027,23 @@ struct kbase_device {
 
 
        /* defaults for new context created for this device */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+       bool infinite_cache_active_default;
+#else
        u32 infinite_cache_active_default;
+#endif
        size_t mem_pool_max_size_default;
 
        /* system coherency mode  */
        u32 system_coherency;
+       /* Flag to track when cci snoops have been enabled on the interface */
+       bool cci_snoop_enabled;
+
+       /* SMC function IDs to call into Trusted firmware to enable/disable
+        * cache snooping. Value of 0 indicates that they are not used
+        */
+       u32 snoop_enable_smc;
+       u32 snoop_disable_smc;
 
        /* Secure operations */
        struct kbase_secure_ops *secure_ops;
@@ -1033,14 +1070,33 @@ struct kbase_device {
         */
        struct bus_logger_client *buslogger;
 #endif
+       /* Boolean indicating if an IRQ flush during reset is in progress. */
+       bool irq_reset_flush;
+
+       /* list of inited sub systems. Used during terminate/error recovery */
+       u32 inited_subsys;
 };
 
-/* JSCTX ringbuffer size must always be a power of 2 */
-#define JSCTX_RB_SIZE 256
-#define JSCTX_RB_MASK (JSCTX_RB_SIZE-1)
+/* JSCTX ringbuffer size will always be a power of 2. The idx shift must be:
+   - >=2 (buffer size -> 4)
+   - <= 9 (buffer size 2^(9-1)=256) (technically, 10 works for the ringbuffer
+                               but this is unnecessary as max atoms is 256)
+ */
+#define JSCTX_RB_IDX_SHIFT (8U)
+#if ((JSCTX_RB_IDX_SHIFT < 2) || ((3 * JSCTX_RB_IDX_SHIFT) >= 32))
+#error "Invalid ring buffer size for 32bit atomic."
+#endif
+#define JSCTX_RB_SIZE (1U << (JSCTX_RB_IDX_SHIFT - 1U)) /* 1 bit for overflow */
+#define JSCTX_RB_SIZE_STORE (1U << JSCTX_RB_IDX_SHIFT)
+#define JSCTX_RB_MASK (JSCTX_RB_SIZE - 1U)
+#define JSCTX_RB_MASK_STORE (JSCTX_RB_SIZE_STORE - 1U)
+
+#define JSCTX_WR_OFFSET         (0U)
+#define JSCTX_RN_OFFSET         (JSCTX_WR_OFFSET   + JSCTX_RB_IDX_SHIFT)
+#define JSCTX_RD_OFFSET         (JSCTX_RN_OFFSET + JSCTX_RB_IDX_SHIFT)
 
 /**
- * struct jsctx_rb_entry - Entry in &struct jsctx_rb ring buffer
+ * struct jsctx_rb_entry - Ringbuffer entry in &struct jsctx_queue.
  * @atom_id: Atom ID
  */
 struct jsctx_rb_entry {
@@ -1048,45 +1104,69 @@ struct jsctx_rb_entry {
 };
 
 /**
- * struct jsctx_rb - JS context atom ring buffer
+ * struct jsctx_queue - JS context atom queue, containing both ring buffer and linked list.
  * @entries:     Array of size %JSCTX_RB_SIZE which holds the &struct
  *               kbase_jd_atom pointers which make up the contents of the ring
  *               buffer.
- * @read_idx:    Index into @entries. Indicates the next entry in @entries to
- *               read, and is incremented when pulling an atom, and decremented
- *               when unpulling.
- *               HW access lock must be held when accessing.
- * @write_idx:   Index into @entries. Indicates the next entry to use when
- *               adding atoms into the ring buffer, and is incremented when
- *               adding a new atom.
- *               jctx->lock must be held when accessing.
- * @running_idx: Index into @entries. Indicates the last valid entry, and is
- *               incremented when remving atoms from the ring buffer.
- *               HW access lock must be held when accessing.
+ * @indicies:    An atomic variable containing indicies for the ring buffer.
+ *               Indicies are of size JSCTX_RB_IDX_SHIFT.
+ *               The following are contained:
+ *                - WR_IDX - Write index. Index of the NEXT slot to be written.
+ *                - RN_IDX - Running index. Index of the tail of the list.
+ *                           This is the atom that has been running the longest.
+ *                - RD_IDX - Read index. Index of the next atom to be pulled.
+ * @queue_head:  Head item of the linked list queue.
+ *
+ * Locking:
+ * The linked list assumes jctx.lock is held.
+ * The ringbuffer serves as an intermediary between irq context and non-irq
+ * context, without the need for the two to share any lock. irq context can
+ * pull (and unpull) and only requires the runpool_irq.lock. While non-irq
+ * context can add and remove and only requires holding only jctx.lock.
+ * Error handling affecting both, or the whole ringbuffer in general, must
+ * hold both locks or otherwise ensure (f.ex deschedule/kill) only that thread
+ * is accessing the buffer.
+ * This means that RD_IDX is updated by irq-context (pull and unpull) and must
+ * hold runpool_irq.lock. While WR_IDX (add) and RN_IDX (remove) is updated by
+ * non-irq context and must hold jctx.lock.
+ * Note that pull (or sister function peek) must also access WR_IDX to ensure
+ * there is free space in the buffer, this is ok as WR_IDX is only increased.
+ * A similar situation is apparent with unpull and RN_IDX, but only one atom
+ * (already pulled) can cause either remove or unpull, so this will never
+ * conflict.
  *
- * &struct jsctx_rb is a ring buffer of &struct kbase_jd_atom.
+ * &struct jsctx_queue is a queue of &struct kbase_jd_atom,
+ * part ringbuffer and part linked list.
  */
-struct jsctx_rb {
+struct jsctx_queue {
        struct jsctx_rb_entry entries[JSCTX_RB_SIZE];
 
-       u16 read_idx; /* HW access lock must be held when accessing */
-       u16 write_idx; /* jctx->lock must be held when accessing */
-       u16 running_idx; /* HW access lock must be held when accessing */
+       atomic_t indicies;
+
+       struct list_head queue_head;
 };
 
+
+
+
+
 #define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
                                         (((minor) & 0xFFF) << 8) | \
                                         ((0 & 0xFF) << 0))
 
 struct kbase_context {
+       struct file *filp;
        struct kbase_device *kbdev;
        int id; /* System wide unique id */
        unsigned long api_version;
        phys_addr_t pgd;
        struct list_head event_list;
+       struct list_head event_coalesce_list;
        struct mutex event_mutex;
-       bool event_closed;
+       atomic_t event_closed;
        struct workqueue_struct *event_workq;
+       atomic_t event_count;
+       int event_coalesce_count;
 
        bool is_compat;
 
@@ -1097,6 +1177,7 @@ struct kbase_context {
 
        struct page *aliasing_sink_page;
 
+       struct mutex            mmu_lock;
        struct mutex            reg_lock; /* To be converted to a rwlock? */
        struct rb_root          reg_rbtree; /* Red-Black tree of GPU regions (live regions) */
 
@@ -1113,7 +1194,12 @@ struct kbase_context {
 
        struct kbase_mem_pool mem_pool;
 
+       struct shrinker         reclaim;
+       struct list_head        evict_list;
+       struct mutex            evict_lock;
+
        struct list_head waiting_soft_jobs;
+       spinlock_t waiting_soft_jobs_lock;
 #ifdef CONFIG_KDS
        struct list_head waiting_kds_resource;
 #endif
@@ -1138,6 +1224,8 @@ struct kbase_context {
         * All other flags must be added there */
        spinlock_t         mm_update_lock;
        struct mm_struct *process_mm;
+       /* End of the SAME_VA zone */
+       u64 same_va_end;
 
 #ifdef CONFIG_MALI_TRACE_TIMELINE
        struct kbase_trace_kctx_timeline timeline;
@@ -1147,8 +1235,10 @@ struct kbase_context {
        char *mem_profile_data;
        /* Size of @c mem_profile_data */
        size_t mem_profile_size;
-       /* Spinlock guarding data */
-       spinlock_t mem_profile_lock;
+       /* Mutex guarding memory profile state */
+       struct mutex mem_profile_lock;
+       /* Memory profile file created */
+       bool mem_profile_initialized;
        struct dentry *kctx_dentry;
 
        /* for job fault debug */
@@ -1161,7 +1251,7 @@ struct kbase_context {
 
 #endif /* CONFIG_DEBUG_FS */
 
-       struct jsctx_rb jsctx_rb
+       struct jsctx_queue jsctx_queue
                [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
 
        /* Number of atoms currently pulled from this context */
@@ -1172,7 +1262,11 @@ struct kbase_context {
        bool pulled;
        /* true if infinite cache is to be enabled for new allocations. Existing
         * allocations will not change. bool stored as a u32 per Linux API */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
+       bool infinite_cache_active;
+#else
        u32 infinite_cache_active;
+#endif
        /* Bitmask of slots that can be pulled from */
        u32 slots_pullable;
 
@@ -1196,6 +1290,52 @@ struct kbase_context {
        struct list_head completed_jobs;
        /* Number of work items currently pending on job_done_wq */
        atomic_t work_count;
+
+       /* true if context is counted in kbdev->js_data.nr_contexts_runnable */
+       bool ctx_runnable_ref;
+
+       /* Waiting soft-jobs will fail when this timer expires */
+       struct hrtimer soft_event_timeout;
+
+       /* JIT allocation management */
+       struct kbase_va_region *jit_alloc[255];
+       struct list_head jit_active_head;
+       struct list_head jit_pool_head;
+       struct list_head jit_destroy_head;
+       struct mutex jit_lock;
+       struct work_struct jit_work;
+
+       /* External sticky resource management */
+       struct list_head ext_res_meta_head;
+};
+
+/**
+ * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
+ *                                 to a @kbase_context.
+ * @ext_res_node:                  List head for adding the metadata to a
+ *                                 @kbase_context.
+ * @alloc:                         The physical memory allocation structure
+ *                                 which is mapped.
+ * @gpu_addr:                      The GPU virtual address the resource is
+ *                                 mapped to.
+ * @refcount:                      Refcount to keep track of the number of
+ *                                 active mappings.
+ *
+ * External resources can be mapped into multiple contexts as well as the same
+ * context multiple times.
+ * As kbase_va_region itself isn't refcounted we can't attach our extra
+ * information to it as it could be removed under our feet leaving external
+ * resources pinned.
+ * This metadata structure binds a single external resource to a single
+ * context, ensuring that per context refcount is tracked separately so it can
+ * be overridden when needed and abuses by the application (freeing the resource
+ * multiple times) don't effect the refcount of the physical allocation.
+ */
+struct kbase_ctx_ext_res_meta {
+       struct list_head ext_res_node;
+       struct kbase_mem_phy_alloc *alloc;
+       u64 gpu_addr;
+       u64 refcount;
 };
 
 enum kbase_reg_access_type {
@@ -1209,6 +1349,21 @@ enum kbase_share_attr_bits {
        SHARE_INNER_BITS = (3ULL << 8)  /* inner shareable coherency */
 };
 
+/**
+ * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
+ * @kbdev: kbase device
+ *
+ * Return: true if the device access are coherent, false if not.
+ */
+static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
+{
+       if ((kbdev->system_coherency == COHERENCY_ACE_LITE) ||
+                       (kbdev->system_coherency == COHERENCY_ACE))
+               return true;
+
+       return false;
+}
+
 /* Conversion helpers for setting up high resolution timers */
 #define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime((x)*1000000U))
 #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
@@ -1221,4 +1376,29 @@ enum kbase_share_attr_bits {
 /* Maximum number of times a job can be replayed */
 #define BASEP_JD_REPLAY_LIMIT 15
 
+/* JobDescriptorHeader - taken from the architecture specifications, the layout
+ * is currently identical for all GPU archs. */
+struct job_descriptor_header {
+       u32 exception_status;
+       u32 first_incomplete_task;
+       u64 fault_pointer;
+       u8 job_descriptor_size : 1;
+       u8 job_type : 7;
+       u8 job_barrier : 1;
+       u8 _reserved_01 : 1;
+       u8 _reserved_1 : 1;
+       u8 _reserved_02 : 1;
+       u8 _reserved_03 : 1;
+       u8 _reserved_2 : 1;
+       u8 _reserved_04 : 1;
+       u8 _reserved_05 : 1;
+       u16 job_index;
+       u16 job_dependency_index_1;
+       u16 job_dependency_index_2;
+       union {
+               u64 _64;
+               u32 _32;
+       } next_job;
+};
+
 #endif                         /* _KBASE_DEFS_H_ */
index 6b8a2854a7a7b32c554aa3ecc6a208478883dd66..c55779cbad4db701030365f7ca12b78f05d8ee8c 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -145,8 +145,32 @@ static void kbase_device_all_as_term(struct kbase_device *kbdev)
 int kbase_device_init(struct kbase_device * const kbdev)
 {
        int i, err;
+#ifdef CONFIG_ARM64
+       struct device_node *np = NULL;
+#endif /* CONFIG_ARM64 */
 
        spin_lock_init(&kbdev->mmu_mask_change);
+#ifdef CONFIG_ARM64
+       kbdev->cci_snoop_enabled = false;
+       np = kbdev->dev->of_node;
+       if (np != NULL) {
+               if (of_property_read_u32(np, "snoop_enable_smc",
+                                       &kbdev->snoop_enable_smc))
+                       kbdev->snoop_enable_smc = 0;
+               if (of_property_read_u32(np, "snoop_disable_smc",
+                                       &kbdev->snoop_disable_smc))
+                       kbdev->snoop_disable_smc = 0;
+               /* Either both or none of the calls should be provided. */
+               if (!((kbdev->snoop_disable_smc == 0
+                       && kbdev->snoop_enable_smc == 0)
+                       || (kbdev->snoop_disable_smc != 0
+                       && kbdev->snoop_enable_smc != 0))) {
+                       WARN_ON(1);
+                       err = -EINVAL;
+                       goto fail;
+               }
+       }
+#endif /* CONFIG_ARM64 */
        /* Get the list of workarounds for issues on the current HW
         * (identified by the GPU_ID register)
         */
@@ -159,6 +183,8 @@ int kbase_device_init(struct kbase_device * const kbdev)
         */
        kbase_hw_set_features_mask(kbdev);
 
+       kbase_gpuprops_set_features(kbdev);
+
        /* On Linux 4.0+, dma coherency is determined from device tree */
 #if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
        set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
@@ -206,7 +232,7 @@ int kbase_device_init(struct kbase_device * const kbdev)
        for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
                kbdev->kbase_profiling_controls[i] = 0;
 
-               kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+       kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
 
        atomic_set(&kbdev->ctx_num, 0);
 
@@ -218,7 +244,11 @@ int kbase_device_init(struct kbase_device * const kbdev)
 
        kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
 
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+       kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
+#else
        kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
 
 #ifdef CONFIG_MALI_DEBUG
        init_waitqueue_head(&kbdev->driver_inactive_wait);
@@ -255,13 +285,20 @@ void kbase_device_free(struct kbase_device *kbdev)
        kfree(kbdev);
 }
 
-void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size)
+int kbase_device_trace_buffer_install(
+               struct kbase_context *kctx, u32 *tb, size_t size)
 {
        unsigned long flags;
 
        KBASE_DEBUG_ASSERT(kctx);
        KBASE_DEBUG_ASSERT(tb);
 
+       /* Interface uses 16-bit value to track last accessed entry. Each entry
+        * is composed of two 32-bit words.
+        * This limits the size that can be handled without an overflow. */
+       if (0xFFFF * (2 * sizeof(u32)) < size)
+               return -EINVAL;
+
        /* set up the header */
        /* magic number in the first 4 bytes */
        tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
@@ -276,6 +313,8 @@ void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size
        kctx->jctx.tb_wrap_offset = size / 8;
        kctx->jctx.tb = tb;
        spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+
+       return 0;
 }
 
 void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
index 25b30f0b976e51a0a9b67bcd856a8879d9d611bc..bf8c304610eb3b0d15f9cfdba399100261440e7c 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 
 #include <mali_kbase.h>
 #include <mali_kbase_debug.h>
-
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
 
 static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
@@ -38,10 +35,8 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru
 
        KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
 
-#if defined(CONFIG_MALI_MIPE_ENABLED)
        kbase_tlstream_tl_nret_atom_ctx(katom, kctx);
        kbase_tlstream_tl_del_atom(katom);
-#endif
 
        katom->status = KBASE_JD_ATOM_STATE_UNUSED;
 
@@ -52,15 +47,10 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru
 
 int kbase_event_pending(struct kbase_context *ctx)
 {
-       int ret;
-
        KBASE_DEBUG_ASSERT(ctx);
 
-       mutex_lock(&ctx->event_mutex);
-       ret = (!list_empty(&ctx->event_list)) || (true == ctx->event_closed);
-       mutex_unlock(&ctx->event_mutex);
-
-       return ret;
+       return (atomic_read(&ctx->event_count) != 0) ||
+                       (atomic_read(&ctx->event_closed) != 0);
 }
 
 KBASE_EXPORT_TEST_API(kbase_event_pending);
@@ -74,7 +64,7 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve
        mutex_lock(&ctx->event_mutex);
 
        if (list_empty(&ctx->event_list)) {
-               if (!ctx->event_closed) {
+               if (!atomic_read(&ctx->event_closed)) {
                        mutex_unlock(&ctx->event_mutex);
                        return -1;
                }
@@ -90,6 +80,7 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve
        }
 
        /* normal event processing */
+       atomic_dec(&ctx->event_count);
        atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
        list_del(ctx->event_list.next);
 
@@ -151,6 +142,29 @@ static void kbase_event_process_noreport(struct kbase_context *kctx,
        }
 }
 
+/**
+ * kbase_event_coalesce - Move pending events to the main event list
+ * @kctx:  Context pointer
+ *
+ * kctx->event_list and kctx->event_coalesce_count must be protected
+ * by a lock unless this is the last thread using them
+ * (and we're about to terminate the lock).
+ *
+ * Return: The number of pending events moved to the main event list
+ */
+static int kbase_event_coalesce(struct kbase_context *kctx)
+{
+       const int event_count = kctx->event_coalesce_count;
+
+       /* Join the list of pending events onto the tail of the main list
+          and reset it */
+       list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list);
+       kctx->event_coalesce_count = 0;
+
+       /* Return the number of events moved */
+       return event_count;
+}
+
 void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
 {
        if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
@@ -167,18 +181,31 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
                return;
        }
 
-       mutex_lock(&ctx->event_mutex);
-       list_add_tail(&atom->dep_item[0], &ctx->event_list);
-       mutex_unlock(&ctx->event_mutex);
+       if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
+               /* Don't report the event until other event(s) have completed */
+               mutex_lock(&ctx->event_mutex);
+               list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list);
+               ++ctx->event_coalesce_count;
+               mutex_unlock(&ctx->event_mutex);
+       } else {
+               /* Report the event and any pending events now */
+               int event_count = 1;
+
+               mutex_lock(&ctx->event_mutex);
+               event_count += kbase_event_coalesce(ctx);
+               list_add_tail(&atom->dep_item[0], &ctx->event_list);
+               atomic_add(event_count, &ctx->event_count);
+               mutex_unlock(&ctx->event_mutex);
 
-       kbase_event_wakeup(ctx);
+               kbase_event_wakeup(ctx);
+       }
 }
 KBASE_EXPORT_TEST_API(kbase_event_post);
 
 void kbase_event_close(struct kbase_context *kctx)
 {
        mutex_lock(&kctx->event_mutex);
-       kctx->event_closed = true;
+       atomic_set(&kctx->event_closed, true);
        mutex_unlock(&kctx->event_mutex);
        kbase_event_wakeup(kctx);
 }
@@ -188,8 +215,11 @@ int kbase_event_init(struct kbase_context *kctx)
        KBASE_DEBUG_ASSERT(kctx);
 
        INIT_LIST_HEAD(&kctx->event_list);
+       INIT_LIST_HEAD(&kctx->event_coalesce_list);
        mutex_init(&kctx->event_mutex);
-       kctx->event_closed = false;
+       atomic_set(&kctx->event_count, 0);
+       kctx->event_coalesce_count = 0;
+       atomic_set(&kctx->event_closed, false);
        kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
 
        if (NULL == kctx->event_workq)
@@ -202,6 +232,8 @@ KBASE_EXPORT_TEST_API(kbase_event_init);
 
 void kbase_event_cleanup(struct kbase_context *kctx)
 {
+       int event_count;
+
        KBASE_DEBUG_ASSERT(kctx);
        KBASE_DEBUG_ASSERT(kctx->event_workq);
 
@@ -214,6 +246,9 @@ void kbase_event_cleanup(struct kbase_context *kctx)
         * Note: use of kctx->event_list without a lock is safe because this must be the last
         * thread using it (because we're about to terminate the lock)
         */
+       event_count = kbase_event_coalesce(kctx);
+       atomic_add(event_count, &kctx->event_count);
+
        while (!list_empty(&kctx->event_list)) {
                struct base_jd_event_v2 event;
 
index a2174b24ac3cd90ecedfb72d9cc36a286afaba99..061564104b67630ec1bead65865b05a06e8ae882 100755 (executable)
@@ -189,23 +189,23 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn
                }
        /* If we are using any other device */
        } else {
-               uint32_t nr_l2, nr_sc, j;
+               uint32_t nr_l2, nr_sc_bits, j;
                uint64_t core_mask;
 
                nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
 
                core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
 
-               nr_sc = hand->kbdev->gpu_props.props.coherency_info.group[0].num_cores;
+               nr_sc_bits = fls64(core_mask);
 
                /* The job manager and tiler sets of counters
                 * are always present */
-               in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc + nr_l2), GFP_KERNEL);
+               in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL);
 
                if (!in_out_info->hwc_layout)
                        goto destroy_context;
 
-               dump_size = (2 + nr_sc + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+               dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
 
                in_out_info->hwc_layout[i++] = JM_BLOCK;
                in_out_info->hwc_layout[i++] = TILER_BLOCK;
index d124e82edd0ae2d85617fff384ea9891cee16719..eb76f01b0fda4bb42e5c5fb41d65e405d1a8ca07 100755 (executable)
@@ -221,7 +221,7 @@ static const char * const hardware_counters_mali_t60x[] = {
        "T60x_LSC_DIRTY_LINE",
        "T60x_LSC_SNOOPS",
        "T60x_AXI_TLB_STALL",
-       "T60x_AXI_TLB_MIESS",
+       "T60x_AXI_TLB_MISS",
        "T60x_AXI_TLB_TRANSACTION",
        "T60x_LS_TLB_MISS",
        "T60x_LS_TLB_HIT",
@@ -486,7 +486,7 @@ static const char * const hardware_counters_mali_t62x[] = {
        "T62x_LSC_DIRTY_LINE",
        "T62x_LSC_SNOOPS",
        "T62x_AXI_TLB_STALL",
-       "T62x_AXI_TLB_MIESS",
+       "T62x_AXI_TLB_MISS",
        "T62x_AXI_TLB_TRANSACTION",
        "T62x_LS_TLB_MISS",
        "T62x_LS_TLB_HIT",
@@ -1018,7 +1018,7 @@ static const char * const hardware_counters_mali_t76x[] = {
        "T76x_LSC_DIRTY_LINE",
        "T76x_LSC_SNOOPS",
        "T76x_AXI_TLB_STALL",
-       "T76x_AXI_TLB_MIESS",
+       "T76x_AXI_TLB_MISS",
        "T76x_AXI_TLB_TRANSACTION",
        "T76x_LS_TLB_MISS",
        "T76x_LS_TLB_HIT",
@@ -1284,7 +1284,7 @@ static const char * const hardware_counters_mali_t82x[] = {
        "T82x_LSC_DIRTY_LINE",
        "T82x_LSC_SNOOPS",
        "T82x_AXI_TLB_STALL",
-       "T82x_AXI_TLB_MIESS",
+       "T82x_AXI_TLB_MISS",
        "T82x_AXI_TLB_TRANSACTION",
        "T82x_LS_TLB_MISS",
        "T82x_LS_TLB_HIT",
@@ -1550,7 +1550,7 @@ static const char * const hardware_counters_mali_t83x[] = {
        "T83x_LSC_DIRTY_LINE",
        "T83x_LSC_SNOOPS",
        "T83x_AXI_TLB_STALL",
-       "T83x_AXI_TLB_MIESS",
+       "T83x_AXI_TLB_MISS",
        "T83x_AXI_TLB_TRANSACTION",
        "T83x_LS_TLB_MISS",
        "T83x_LS_TLB_HIT",
@@ -1816,7 +1816,7 @@ static const char * const hardware_counters_mali_t86x[] = {
        "T86x_LSC_DIRTY_LINE",
        "T86x_LSC_SNOOPS",
        "T86x_AXI_TLB_STALL",
-       "T86x_AXI_TLB_MIESS",
+       "T86x_AXI_TLB_MISS",
        "T86x_AXI_TLB_TRANSACTION",
        "T86x_LS_TLB_MISS",
        "T86x_LS_TLB_HIT",
@@ -2082,7 +2082,7 @@ static const char * const hardware_counters_mali_t88x[] = {
        "T88x_LSC_DIRTY_LINE",
        "T88x_LSC_SNOOPS",
        "T88x_AXI_TLB_STALL",
-       "T88x_AXI_TLB_MIESS",
+       "T88x_AXI_TLB_MISS",
        "T88x_AXI_TLB_TRANSACTION",
        "T88x_LS_TLB_MISS",
        "T88x_LS_TLB_HIT",
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
new file mode 100644 (file)
index 0000000..a962ecb
--- /dev/null
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#ifndef _KBASE_GPU_ID_H_
+#define _KBASE_GPU_ID_H_
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xF  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xF  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID  (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X                    0x6956
+#define GPU_ID_PI_T62X                    0x0620
+#define GPU_ID_PI_T76X                    0x0750
+#define GPU_ID_PI_T72X                    0x0720
+#define GPU_ID_PI_TFRX                    0x0880
+#define GPU_ID_PI_T86X                    0x0860
+#define GPU_ID_PI_T82X                    0x0820
+#define GPU_ID_PI_T83X                    0x0830
+
+/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
+#define GPU_ID_PI_NEW_FORMAT_START        0x1000
+#define GPU_ID_IS_NEW_FORMAT(product_id)  ((product_id) != GPU_ID_PI_T60X && \
+                                               (product_id) >= \
+                                               GPU_ID_PI_NEW_FORMAT_START)
+
+#define GPU_ID2_VERSION_STATUS_SHIFT      0
+#define GPU_ID2_VERSION_MINOR_SHIFT       4
+#define GPU_ID2_VERSION_MAJOR_SHIFT       12
+#define GPU_ID2_PRODUCT_MAJOR_SHIFT       16
+#define GPU_ID2_ARCH_REV_SHIFT            20
+#define GPU_ID2_ARCH_MINOR_SHIFT          24
+#define GPU_ID2_ARCH_MAJOR_SHIFT          28
+#define GPU_ID2_VERSION_STATUS            (0xF << GPU_ID2_VERSION_STATUS_SHIFT)
+#define GPU_ID2_VERSION_MINOR             (0xFF << GPU_ID2_VERSION_MINOR_SHIFT)
+#define GPU_ID2_VERSION_MAJOR             (0xF << GPU_ID2_VERSION_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MAJOR             (0xF << GPU_ID2_PRODUCT_MAJOR_SHIFT)
+#define GPU_ID2_ARCH_REV                  (0xF << GPU_ID2_ARCH_REV_SHIFT)
+#define GPU_ID2_ARCH_MINOR                (0xF << GPU_ID2_ARCH_MINOR_SHIFT)
+#define GPU_ID2_ARCH_MAJOR                (0xF << GPU_ID2_ARCH_MAJOR_SHIFT)
+#define GPU_ID2_PRODUCT_MODEL  (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR)
+
+/* Helper macro to create a partial GPU_ID (new format) that defines
+   a product ignoring its version. */
+#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \
+               (((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+                ((arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT)  | \
+                ((arch_rev) << GPU_ID2_ARCH_REV_SHIFT)      | \
+                ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Helper macro to create a partial GPU_ID (new format) that specifies the
+   revision (major, minor, status) of a product */
+#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \
+               (((version_major) << GPU_ID2_VERSION_MAJOR_SHIFT)  | \
+                ((version_minor) << GPU_ID2_VERSION_MINOR_SHIFT)  | \
+                ((version_status) << GPU_ID2_VERSION_STATUS_SHIFT))
+
+/* Helper macro to create a complete GPU_ID (new format) */
+#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \
+       version_major, version_minor, version_status) \
+               (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \
+                       product_major) | \
+                GPU_ID2_VERSION_MAKE(version_major, version_minor,     \
+                       version_status))
+
+/* Helper macro to create a partial GPU_ID (new format) that identifies
+   a particular GPU model by its arch_major and product_major. */
+#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \
+               (((arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT)  | \
+               ((product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT))
+
+/* Strip off the non-relevant bits from a product_id value and make it suitable
+   for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU
+   model. */
+#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \
+               (((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \
+                   GPU_ID2_PRODUCT_MODEL)
+
+#define GPU_ID2_PRODUCT_TMIX              GPU_ID2_MODEL_MAKE(6, 0)
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0                   0x1
+#define GPU_ID_S_EAC                      0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major,
+   minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+               (((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+               ((major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+               ((minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+               ((status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+#endif /* _KBASE_GPU_ID_H_ */
index ca264049653c40b87e2cf2ff865a3e44174bd325..82f4c36d509efceb3911f86c3dd77d35913dd6dd 100755 (executable)
@@ -32,7 +32,6 @@
 
 static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
 {
-       ssize_t ret = 0;
        struct list_head *entry;
        const struct list_head *kbdev_list;
 
@@ -43,14 +42,14 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
 
                kbdev = list_entry(entry, struct kbase_device, entry);
                /* output the total memory usage and cap for this device */
-               ret = seq_printf(sfile, "%-16s  %10u\n",
+               seq_printf(sfile, "%-16s  %10u\n",
                                kbdev->devname,
                                atomic_read(&(kbdev->memdev.used_pages)));
                mutex_lock(&kbdev->kctx_list_lock);
                list_for_each_entry(element, &kbdev->kctx_list, link) {
                        /* output the memory usage and cap for each kctx
                        * opened on this device */
-                       ret = seq_printf(sfile, "  %s-0x%p %10u\n",
+                       seq_printf(sfile, "  %s-0x%p %10u\n",
                                "kctx",
                                element->kctx,
                                atomic_read(&(element->kctx->used_pages)));
@@ -58,7 +57,7 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
                mutex_unlock(&kbdev->kctx_list_lock);
        }
        kbase_dev_list_put(kbdev_list);
-       return ret;
+       return 0;
 }
 
 /*
index d632a0bbb1bc887289db74c13e81078763bb45d3..7f77dba347d086abb8aa7a11b4e96d462d608822 100755 (executable)
@@ -87,7 +87,6 @@ int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpup
        if (kctx->api_version < KBASE_API_VERSION(8, 2))
                kbase_props->props.raw_props.suspend_size = 0;
 
-
        return 0;
 }
 
@@ -200,7 +199,6 @@ static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kb
        gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
        gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
        gpu_props->raw_props.thread_features = regdump.thread_features;
-
 }
 
 /**
@@ -297,3 +295,20 @@ void kbase_gpuprops_set(struct kbase_device *kbdev)
        gpu_props->num_address_spaces = hweight32(raw->as_present);
        gpu_props->num_job_slots = hweight32(raw->js_present);
 }
+
+void kbase_gpuprops_set_features(struct kbase_device *kbdev)
+{
+       base_gpu_props *gpu_props;
+       struct kbase_gpuprops_regdump regdump;
+
+       gpu_props = &kbdev->gpu_props.props;
+
+       /* Dump relevant registers */
+       kbase_backend_gpuprops_get_features(kbdev, &regdump);
+
+       /*
+        * Copy the raw value from the register, later this will get turned
+        * into the selected coherency mode.
+        */
+       gpu_props->raw_props.coherency_mode = regdump.coherency_features;
+}
index af97d97bf94523e838d983b49dbf7dc8e860efc9..f3c95cc1849cb8bdb4ae9742d5393a9c8c9344c0 100755 (executable)
@@ -39,6 +39,16 @@ struct kbase_device;
  */
 void kbase_gpuprops_set(struct kbase_device *kbdev);
 
+/**
+ * kbase_gpuprops_set_features - Set up Kbase GPU properties
+ * @kbdev:   Device pointer
+ *
+ * This function sets up GPU properties that are dependent on the hardware
+ * features bitmask. This function must be preceeded by a call to
+ * kbase_hw_set_features_mask().
+ */
+void kbase_gpuprops_set_features(struct kbase_device *kbdev);
+
 /**
  * @brief Provide GPU properties to userside through UKU call.
  *
index 463fead4b05d9070eb718f3d7f2502da98d70183..781375a9a97f5ece8582312b254d2b384c7124b4 100755 (executable)
@@ -51,6 +51,7 @@ struct kbase_gpuprops_regdump {
        u32 tiler_present_hi;
        u32 l2_present_lo;
        u32 l2_present_hi;
+       u32 coherency_features;
 };
 
 struct kbase_gpu_cache_props {
index fac65d4f22865de07a44ed88becb6c2c5b42b0a9..de2461fb8de422d0751f04ee694f7faad1a6f594 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -31,38 +31,50 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
 {
        const enum base_hw_feature *features;
        u32 gpu_id;
+       u32 product_id;
 
        gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
-       gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
-       gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
-
-       switch (gpu_id) {
-       case GPU_ID_PI_TFRX:
-               /* FALLTHROUGH */
-       case GPU_ID_PI_T86X:
-               features = base_hw_features_tFxx;
-               break;
-       case GPU_ID_PI_T83X:
-               features = base_hw_features_t83x;
-               break;
-       case GPU_ID_PI_T82X:
-               features = base_hw_features_t82x;
-               break;
-       case GPU_ID_PI_T76X:
-               features = base_hw_features_t76x;
-               break;
-       case GPU_ID_PI_T72X:
-               features = base_hw_features_t72x;
-               break;
-       case GPU_ID_PI_T62X:
-               features = base_hw_features_t62x;
-               break;
-       case GPU_ID_PI_T60X:
-               features = base_hw_features_t60x;
-               break;
-       default:
-               features = base_hw_features_generic;
-               break;
+       product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+       product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+       if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+               switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+               case GPU_ID2_PRODUCT_TMIX:
+                       features = base_hw_features_tMIx;
+                       break;
+               default:
+                       features = base_hw_features_generic;
+                       break;
+               }
+       } else {
+               switch (product_id) {
+               case GPU_ID_PI_TFRX:
+                       /* FALLTHROUGH */
+               case GPU_ID_PI_T86X:
+                       features = base_hw_features_tFxx;
+                       break;
+               case GPU_ID_PI_T83X:
+                       features = base_hw_features_t83x;
+                       break;
+               case GPU_ID_PI_T82X:
+                       features = base_hw_features_t82x;
+                       break;
+               case GPU_ID_PI_T76X:
+                       features = base_hw_features_t76x;
+                       break;
+               case GPU_ID_PI_T72X:
+                       features = base_hw_features_t72x;
+                       break;
+               case GPU_ID_PI_T62X:
+                       features = base_hw_features_t62x;
+                       break;
+               case GPU_ID_PI_T60X:
+                       features = base_hw_features_t60x;
+                       break;
+               default:
+                       features = base_hw_features_generic;
+                       break;
+               }
        }
 
        for (; *features != BASE_HW_FEATURE_END; features++)
@@ -73,135 +85,172 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 {
        const enum base_hw_issue *issues;
        u32 gpu_id;
+       u32 product_id;
        u32 impl_tech;
 
        gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+       product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+       product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
        impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
 
        if (impl_tech != IMPLEMENTATION_MODEL) {
-               switch (gpu_id) {
-               case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
-                       issues = base_hw_issues_t60x_r0p0_15dev0;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
-                       issues = base_hw_issues_t60x_r0p0_eac;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
-                       issues = base_hw_issues_t60x_r0p1;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
-                       issues = base_hw_issues_t62x_r0p1;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
-               case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
-                       issues = base_hw_issues_t62x_r1p0;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
-                       issues = base_hw_issues_t62x_r1p1;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
-                       issues = base_hw_issues_t76x_r0p0;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
-                       issues = base_hw_issues_t76x_r0p1;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
-                       issues = base_hw_issues_t76x_r0p1_50rel0;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
-                       issues = base_hw_issues_t76x_r0p2;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
-                       issues = base_hw_issues_t76x_r0p3;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
-                       issues = base_hw_issues_t76x_r1p0;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
-               case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
-               case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
-                       issues = base_hw_issues_t72x_r0p0;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
-                       issues = base_hw_issues_t72x_r1p0;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
-                       issues = base_hw_issues_t72x_r1p1;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
-                       issues = base_hw_issues_tFRx_r0p1;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
-                       issues = base_hw_issues_tFRx_r0p2;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
-               case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8):
-                       issues = base_hw_issues_tFRx_r1p0;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
-                       issues = base_hw_issues_tFRx_r2p0;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
-                       issues = base_hw_issues_t86x_r0p2;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
-               case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8):
-                       issues = base_hw_issues_t86x_r1p0;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
-                       issues = base_hw_issues_t86x_r2p0;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
-                       issues = base_hw_issues_t83x_r0p1;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
-               case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8):
-                       issues = base_hw_issues_t83x_r1p0;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
-                       issues = base_hw_issues_t82x_r0p0;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
-                       issues = base_hw_issues_t82x_r0p1;
-                       break;
-               case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
-               case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8):
-                       issues = base_hw_issues_t82x_r1p0;
-                       break;
-               default:
-                       dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id);
-                       return -EINVAL;
+               if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+                       switch (gpu_id) {
+                       case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 1):
+                               issues = base_hw_issues_tMIx_r0p0_05dev0;
+                               break;
+                       case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 2):
+                               issues = base_hw_issues_tMIx_r0p0;
+                               break;
+                       default:
+                               if ((gpu_id & GPU_ID2_PRODUCT_MODEL) ==
+                                                       GPU_ID2_PRODUCT_TMIX) {
+                                       issues = base_hw_issues_tMIx_r0p0;
+                               } else {
+                                       dev_err(kbdev->dev,
+                                               "Unknown GPU ID %x", gpu_id);
+                                       return -EINVAL;
+                               }
+                       }
+               } else {
+                       switch (gpu_id) {
+                       case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+                               issues = base_hw_issues_t60x_r0p0_15dev0;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+                               issues = base_hw_issues_t60x_r0p0_eac;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+                               issues = base_hw_issues_t60x_r0p1;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+                               issues = base_hw_issues_t62x_r0p1;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+                       case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
+                               issues = base_hw_issues_t62x_r1p0;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+                               issues = base_hw_issues_t62x_r1p1;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
+                               issues = base_hw_issues_t76x_r0p0;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
+                               issues = base_hw_issues_t76x_r0p1;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
+                               issues = base_hw_issues_t76x_r0p1_50rel0;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
+                               issues = base_hw_issues_t76x_r0p2;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
+                               issues = base_hw_issues_t76x_r0p3;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
+                               issues = base_hw_issues_t76x_r1p0;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
+                       case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
+                       case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
+                               issues = base_hw_issues_t72x_r0p0;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
+                               issues = base_hw_issues_t72x_r1p0;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
+                               issues = base_hw_issues_t72x_r1p1;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
+                               issues = base_hw_issues_tFRx_r0p1;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
+                               issues = base_hw_issues_tFRx_r0p2;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
+                       case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8):
+                               issues = base_hw_issues_tFRx_r1p0;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
+                               issues = base_hw_issues_tFRx_r2p0;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
+                               issues = base_hw_issues_t86x_r0p2;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
+                       case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8):
+                               issues = base_hw_issues_t86x_r1p0;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
+                               issues = base_hw_issues_t86x_r2p0;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
+                               issues = base_hw_issues_t83x_r0p1;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
+                       case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8):
+                               issues = base_hw_issues_t83x_r1p0;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
+                               issues = base_hw_issues_t82x_r0p0;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
+                               issues = base_hw_issues_t82x_r0p1;
+                               break;
+                       case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
+                       case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8):
+                               issues = base_hw_issues_t82x_r1p0;
+                               break;
+                       default:
+                               dev_err(kbdev->dev,
+                                       "Unknown GPU ID %x", gpu_id);
+                               return -EINVAL;
+                       }
                }
        } else {
                /* Software model */
-               switch (gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT) {
-               case GPU_ID_PI_T60X:
-                       issues = base_hw_issues_model_t60x;
-                       break;
-               case GPU_ID_PI_T62X:
-                       issues = base_hw_issues_model_t62x;
-                       break;
-               case GPU_ID_PI_T72X:
-                       issues = base_hw_issues_model_t72x;
-                       break;
-               case GPU_ID_PI_T76X:
-                       issues = base_hw_issues_model_t76x;
-                       break;
-               case GPU_ID_PI_TFRX:
-                       issues = base_hw_issues_model_tFRx;
-                       break;
-               case GPU_ID_PI_T86X:
-                       issues = base_hw_issues_model_t86x;
-                       break;
-               case GPU_ID_PI_T83X:
-                       issues = base_hw_issues_model_t83x;
-                       break;
-               case GPU_ID_PI_T82X:
-                       issues = base_hw_issues_model_t82x;
-                       break;
-               default:
-                       dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id);
-                       return -EINVAL;
+               if (GPU_ID_IS_NEW_FORMAT(product_id)) {
+                       switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+                       case GPU_ID2_PRODUCT_TMIX:
+                               issues = base_hw_issues_model_tMIx;
+                               break;
+                       default:
+                               dev_err(kbdev->dev,
+                                       "Unknown GPU ID %x", gpu_id);
+                               return -EINVAL;
+                       }
+               } else {
+                       switch (product_id) {
+                       case GPU_ID_PI_T60X:
+                               issues = base_hw_issues_model_t60x;
+                               break;
+                       case GPU_ID_PI_T62X:
+                               issues = base_hw_issues_model_t62x;
+                               break;
+                       case GPU_ID_PI_T72X:
+                               issues = base_hw_issues_model_t72x;
+                               break;
+                       case GPU_ID_PI_T76X:
+                               issues = base_hw_issues_model_t76x;
+                               break;
+                       case GPU_ID_PI_TFRX:
+                               issues = base_hw_issues_model_tFRx;
+                               break;
+                       case GPU_ID_PI_T86X:
+                               issues = base_hw_issues_model_t86x;
+                               break;
+                       case GPU_ID_PI_T83X:
+                               issues = base_hw_issues_model_t83x;
+                               break;
+                       case GPU_ID_PI_T82X:
+                               issues = base_hw_issues_model_t82x;
+                               break;
+                       default:
+                               dev_err(kbdev->dev, "Unknown GPU ID %x",
+                                       gpu_id);
+                               return -EINVAL;
+                       }
                }
        }
 
index f93ca9d86802b5dbfb32478ec97233b55bce5c05..cf8a8131c22ed29b846f8c3b28029ec0147b0e1c 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
                                        struct kbase_gpuprops_regdump *regdump);
 
+/**
+ * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU
+ * @kbdev:   Device pointer
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * This function reads GPU properties that are dependent on the hardware
+ * features bitmask
+ */
+void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
+                                       struct kbase_gpuprops_regdump *regdump);
+
+
 #endif /* _KBASE_HWACCESS_GPUPROPS_H_ */
index 6bddaa81073b758b03bd1c8e56905b6b6869e0a1..2efa293088a14f88e7022d9d92b64f03997f5fce 100755 (executable)
@@ -254,6 +254,15 @@ void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
  */
 void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
 
+/**
+ * kbase_backend_get_current_flush_id - Return the current flush ID
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: the current flush ID to be recorded for each job chain
+ */
+u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
+
 #if KBASE_GPU_RESET_EN
 /**
  * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
index dbdcd3def220d7b4de9f609089e14d6f83c1edb2..71c7d495c40ab924f6ec9fe01ffd2fd9d1263fd2 100755 (executable)
@@ -126,10 +126,13 @@ void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
  *
  * @param kbdev         The kbase device structure for the device (must be a
  *                      valid pointer)
- * @param new_core_mask The core mask to use
+ * @param new_core_mask_js0 The core mask to use for job slot 0
+ * @param new_core_mask_js0 The core mask to use for job slot 1
+ * @param new_core_mask_js0 The core mask to use for job slot 2
  */
 void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
-                                                       u64 new_core_mask);
+               u64 new_core_mask_js0, u64 new_core_mask_js1,
+               u64 new_core_mask_js2);
 
 
 /**
index 314ae0819d50118ae2092e7d0dc0e2840e96a1ec..fda317b90176e5ae2505d34060b483cc407ed7e0 100755 (executable)
@@ -66,18 +66,10 @@ int kbase_instr_hwcnt_enable(struct kbase_context *kctx,
                struct kbase_uk_hwcnt_setup *setup)
 {
        struct kbase_device *kbdev;
-       bool access_allowed;
        int err;
 
        kbdev = kctx->kbdev;
 
-       /* Determine if the calling task has access to this capability */
-       access_allowed = kbase_security_has_capability(kctx,
-                                       KBASE_SEC_INSTR_HW_COUNTERS_COLLECT,
-                                       KBASE_SEC_FLAG_NOAUDIT);
-       if (!access_allowed)
-               return -EINVAL;
-
        /* Mark the context as active so the GPU is kept turned on */
        /* A suspend won't happen here, because we're in a syscall from a
         * userspace thread. */
index 433103c0d331df3c51a5f95aea3936bf8bc7f4a9..c579d0a589f708cb161ca6a98cca1d22e0bea55a 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 
 #define NR_IPA_GROUPS 8
 
+struct kbase_ipa_context;
+
 /**
  * struct ipa_group - represents a single IPA group
  * @name:               name of the IPA group
  * @capacitance:        capacitance constant for IPA group
+ * @calc_power:         function to calculate power for IPA group
  */
 struct ipa_group {
        const char *name;
        u32 capacitance;
+       u32 (*calc_power)(struct kbase_ipa_context *,
+                       struct ipa_group *);
 };
 
+#include <mali_kbase_ipa_tables.h>
+
 /**
  * struct kbase_ipa_context - IPA context per device
- * @kbdev:      pointer to kbase device
- * @groups:     array of IPA groups for this context
- * @ipa_lock:   protects the entire IPA context
+ * @kbdev:              pointer to kbase device
+ * @groups:             array of IPA groups for this context
+ * @vinstr_cli:         vinstr client handle
+ * @vinstr_buffer:      buffer to dump hardware counters onto
+ * @ipa_lock:           protects the entire IPA context
  */
 struct kbase_ipa_context {
        struct kbase_device *kbdev;
        struct ipa_group groups[NR_IPA_GROUPS];
+       struct kbase_vinstr_client *vinstr_cli;
+       void *vinstr_buffer;
        struct mutex ipa_lock;
 };
 
-static struct ipa_group ipa_groups_def_v4[] = {
-       { .name = "group0", .capacitance = 0 },
-       { .name = "group1", .capacitance = 0 },
-       { .name = "group2", .capacitance = 0 },
-       { .name = "group3", .capacitance = 0 },
-       { .name = "group4", .capacitance = 0 },
-       { .name = "group5", .capacitance = 0 },
-       { .name = "group6", .capacitance = 0 },
-       { .name = "group7", .capacitance = 0 },
-};
-
-static struct ipa_group ipa_groups_def_v5[] = {
-       { .name = "group0", .capacitance = 0 },
-       { .name = "group1", .capacitance = 0 },
-       { .name = "group2", .capacitance = 0 },
-       { .name = "group3", .capacitance = 0 },
-       { .name = "group4", .capacitance = 0 },
-       { .name = "group5", .capacitance = 0 },
-       { .name = "group6", .capacitance = 0 },
-       { .name = "group7", .capacitance = 0 },
-};
-
 static ssize_t show_ipa_group(struct device *dev,
                struct device_attribute *attr,
                char *buf)
@@ -143,25 +132,10 @@ static struct attribute_group kbase_ipa_attr_group = {
 
 static void init_ipa_groups(struct kbase_ipa_context *ctx)
 {
-       struct kbase_device *kbdev = ctx->kbdev;
-       struct ipa_group *defs;
-       size_t i, len;
-
-       if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
-               defs = ipa_groups_def_v4;
-               len = ARRAY_SIZE(ipa_groups_def_v4);
-       } else {
-               defs = ipa_groups_def_v5;
-               len = ARRAY_SIZE(ipa_groups_def_v5);
-       }
-
-       for (i = 0; i < len; i++) {
-               ctx->groups[i].name = defs[i].name;
-               ctx->groups[i].capacitance = defs[i].capacitance;
-       }
+       memcpy(ctx->groups, ipa_groups_def, sizeof(ctx->groups));
 }
 
-#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0))
+#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
 static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx)
 {
        struct kbase_device *kbdev = ctx->kbdev;
@@ -171,7 +145,7 @@ static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx)
        size_t i;
        int err;
 
-       np = of_find_node_by_name(kbdev->dev->of_node, "ipa-groups");
+       np = of_get_child_by_name(kbdev->dev->of_node, "ipa-groups");
        if (!np)
                return 0;
 
@@ -229,6 +203,172 @@ static int reset_ipa_groups(struct kbase_ipa_context *ctx)
        return update_ipa_groups_from_dt(ctx);
 }
 
+static inline u32 read_hwcnt(struct kbase_ipa_context *ctx,
+       u32 offset)
+{
+       u8 *p = ctx->vinstr_buffer;
+
+       return *(u32 *)&p[offset];
+}
+
+static inline u32 add_saturate(u32 a, u32 b)
+{
+       if (U32_MAX - a < b)
+               return U32_MAX;
+       return a + b;
+}
+
+/*
+ * Calculate power estimation based on hardware counter `c'
+ * across all shader cores.
+ */
+static u32 calc_power_sc_single(struct kbase_ipa_context *ctx,
+       struct ipa_group *group, u32 c)
+{
+       struct kbase_device *kbdev = ctx->kbdev;
+       u64 core_mask;
+       u32 base = 0, r = 0;
+
+       core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+       while (core_mask != 0ull) {
+               if ((core_mask & 1ull) != 0ull) {
+                       u64 n = read_hwcnt(ctx, base + c);
+                       u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+                       u32 s = group->capacitance;
+
+                       r = add_saturate(r, div_u64(n * s, d));
+               }
+               base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+               core_mask >>= 1;
+       }
+       return r;
+}
+
+/*
+ * Calculate power estimation based on hardware counter `c1'
+ * and `c2' across all shader cores.
+ */
+static u32 calc_power_sc_double(struct kbase_ipa_context *ctx,
+       struct ipa_group *group, u32 c1, u32 c2)
+{
+       struct kbase_device *kbdev = ctx->kbdev;
+       u64 core_mask;
+       u32 base = 0, r = 0;
+
+       core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+       while (core_mask != 0ull) {
+               if ((core_mask & 1ull) != 0ull) {
+                       u64 n = read_hwcnt(ctx, base + c1);
+                       u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+                       u32 s = group->capacitance;
+
+                       r = add_saturate(r, div_u64(n * s, d));
+                       n = read_hwcnt(ctx, base + c2);
+                       r = add_saturate(r, div_u64(n * s, d));
+               }
+               base += NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+               core_mask >>= 1;
+       }
+       return r;
+}
+
+static u32 calc_power_single(struct kbase_ipa_context *ctx,
+       struct ipa_group *group, u32 c)
+{
+       u64 n = read_hwcnt(ctx, c);
+       u32 d = read_hwcnt(ctx, GPU_ACTIVE);
+       u32 s = group->capacitance;
+
+       return div_u64(n * s, d);
+}
+
+static u32 calc_power_group0(struct kbase_ipa_context *ctx,
+               struct ipa_group *group)
+{
+       return calc_power_single(ctx, group, L2_ANY_LOOKUP);
+}
+
+static u32 calc_power_group1(struct kbase_ipa_context *ctx,
+               struct ipa_group *group)
+{
+       return calc_power_single(ctx, group, TILER_ACTIVE);
+}
+
+static u32 calc_power_group2(struct kbase_ipa_context *ctx,
+               struct ipa_group *group)
+{
+       return calc_power_sc_single(ctx, group, FRAG_ACTIVE);
+}
+
+static u32 calc_power_group3(struct kbase_ipa_context *ctx,
+               struct ipa_group *group)
+{
+       return calc_power_sc_double(ctx, group, VARY_SLOT_32,
+                       VARY_SLOT_16);
+}
+
+static u32 calc_power_group4(struct kbase_ipa_context *ctx,
+               struct ipa_group *group)
+{
+       return calc_power_sc_single(ctx, group, TEX_COORD_ISSUE);
+}
+
+static u32 calc_power_group5(struct kbase_ipa_context *ctx,
+               struct ipa_group *group)
+{
+       return calc_power_sc_single(ctx, group, EXEC_INSTR_COUNT);
+}
+
+static u32 calc_power_group6(struct kbase_ipa_context *ctx,
+               struct ipa_group *group)
+{
+       return calc_power_sc_double(ctx, group, BEATS_RD_LSC,
+                       BEATS_WR_LSC);
+}
+
+static u32 calc_power_group7(struct kbase_ipa_context *ctx,
+               struct ipa_group *group)
+{
+       return calc_power_sc_single(ctx, group, EXEC_CORE_ACTIVE);
+}
+
+static int attach_vinstr(struct kbase_ipa_context *ctx)
+{
+       struct kbase_device *kbdev = ctx->kbdev;
+       struct kbase_uk_hwcnt_reader_setup setup;
+       size_t dump_size;
+
+       dump_size = kbase_vinstr_dump_size(kbdev);
+       ctx->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
+       if (!ctx->vinstr_buffer) {
+               dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
+               return -1;
+       }
+
+       setup.jm_bm = ~0u;
+       setup.shader_bm = ~0u;
+       setup.tiler_bm = ~0u;
+       setup.mmu_l2_bm = ~0u;
+       ctx->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx,
+                       &setup, ctx->vinstr_buffer);
+       if (!ctx->vinstr_cli) {
+               dev_err(kbdev->dev, "Failed to register IPA with vinstr core");
+               kfree(ctx->vinstr_buffer);
+               ctx->vinstr_buffer = NULL;
+               return -1;
+       }
+       return 0;
+}
+
+static void detach_vinstr(struct kbase_ipa_context *ctx)
+{
+       if (ctx->vinstr_cli)
+               kbase_vinstr_detach_client(ctx->vinstr_cli);
+       ctx->vinstr_cli = NULL;
+       kfree(ctx->vinstr_buffer);
+       ctx->vinstr_buffer = NULL;
+}
+
 struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev)
 {
        struct kbase_ipa_context *ctx;
@@ -259,6 +399,33 @@ void kbase_ipa_term(struct kbase_ipa_context *ctx)
 {
        struct kbase_device *kbdev = ctx->kbdev;
 
+       detach_vinstr(ctx);
        sysfs_remove_group(&kbdev->dev->kobj, &kbase_ipa_attr_group);
        kfree(ctx);
 }
+
+u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err)
+{
+       struct ipa_group *group;
+       u32 power = 0;
+       size_t i;
+
+       mutex_lock(&ctx->ipa_lock);
+       if (!ctx->vinstr_cli) {
+               *err = attach_vinstr(ctx);
+               if (*err < 0)
+                       goto err0;
+       }
+       *err = kbase_vinstr_hwc_dump(ctx->vinstr_cli,
+                       BASE_HWCNT_READER_EVENT_MANUAL);
+       if (*err)
+               goto err0;
+       for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+               group = &ctx->groups[i];
+               power = add_saturate(power, group->calc_power(ctx, group));
+       }
+err0:
+       mutex_unlock(&ctx->ipa_lock);
+       return power;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_dynamic_power);
index ed123759ff3c93e37a3daab1dee66406df182ac8..e2234d150b0b64140a0717239839bfb62fea45f7 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,3 +30,12 @@ struct kbase_ipa_context *kbase_ipa_init(struct kbase_device *kbdev);
  * @ctx:        pointer to the IPA context
  */
 void kbase_ipa_term(struct kbase_ipa_context *ctx);
+
+/**
+ * kbase_ipa_dynamic_power - calculate power
+ * @ctx:        pointer to the IPA context
+ * @err:        0 on success, negative on failure
+ *
+ * Return:      returns power consumption as mw @ 1GHz @ 1V
+ */
+u32 kbase_ipa_dynamic_power(struct kbase_ipa_context *ctx, int *err);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_ipa_tables.h
new file mode 100644 (file)
index 0000000..101abfe
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#define NR_BYTES_PER_CNT  4
+#define NR_CNT_PER_BLOCK 64
+
+#define JM_BASE    (0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define TILER_BASE (1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define MMU_BASE   (2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+#define SC0_BASE   (3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT)
+
+#define GPU_ACTIVE       (JM_BASE    + NR_BYTES_PER_CNT *  6)
+#define TILER_ACTIVE     (TILER_BASE + NR_BYTES_PER_CNT * 45)
+#define L2_ANY_LOOKUP    (MMU_BASE   + NR_BYTES_PER_CNT * 25)
+#define FRAG_ACTIVE      (SC0_BASE   + NR_BYTES_PER_CNT *  4)
+#define EXEC_CORE_ACTIVE (SC0_BASE   + NR_BYTES_PER_CNT * 26)
+#define EXEC_INSTR_COUNT (SC0_BASE   + NR_BYTES_PER_CNT * 28)
+#define TEX_COORD_ISSUE  (SC0_BASE   + NR_BYTES_PER_CNT * 40)
+#define VARY_SLOT_32     (SC0_BASE   + NR_BYTES_PER_CNT * 50)
+#define VARY_SLOT_16     (SC0_BASE   + NR_BYTES_PER_CNT * 51)
+#define BEATS_RD_LSC     (SC0_BASE   + NR_BYTES_PER_CNT * 56)
+#define BEATS_WR_LSC     (SC0_BASE   + NR_BYTES_PER_CNT * 61)
+
+static u32 calc_power_group0(struct kbase_ipa_context *ctx,
+               struct ipa_group *group);
+static u32 calc_power_group1(struct kbase_ipa_context *ctx,
+               struct ipa_group *group);
+static u32 calc_power_group2(struct kbase_ipa_context *ctx,
+               struct ipa_group *group);
+static u32 calc_power_group3(struct kbase_ipa_context *ctx,
+               struct ipa_group *group);
+static u32 calc_power_group4(struct kbase_ipa_context *ctx,
+               struct ipa_group *group);
+static u32 calc_power_group5(struct kbase_ipa_context *ctx,
+               struct ipa_group *group);
+static u32 calc_power_group6(struct kbase_ipa_context *ctx,
+               struct ipa_group *group);
+static u32 calc_power_group7(struct kbase_ipa_context *ctx,
+               struct ipa_group *group);
+
+static struct ipa_group ipa_groups_def[] = {
+       /* L2 */
+       {
+               .name = "group0",
+               .capacitance = 687,
+               .calc_power = calc_power_group0,
+       },
+       /* TILER */
+       {
+               .name = "group1",
+               .capacitance = 0,
+               .calc_power = calc_power_group1,
+       },
+       /* FRAG */
+       {
+               .name = "group2",
+               .capacitance = 23,
+               .calc_power = calc_power_group2,
+       },
+       /* VARY */
+       {
+               .name = "group3",
+               .capacitance = 108,
+               .calc_power = calc_power_group3,
+       },
+       /* TEX */
+       {
+               .name = "group4",
+               .capacitance = 128,
+               .calc_power = calc_power_group4,
+       },
+       /* EXEC INSTR */
+       {
+               .name = "group5",
+               .capacitance = 249,
+               .calc_power = calc_power_group5,
+       },
+       /* LSC */
+       {
+               .name = "group6",
+               .capacitance = 0,
+               .calc_power = calc_power_group6,
+       },
+       /* EXEC OVERHEAD */
+       {
+               .name = "group7",
+               .capacitance = 29,
+               .calc_power = calc_power_group7,
+       },
+};
index dd2d187d5cd928877916f174944195e90e8855f8..c091ffef643f95c38ddead062f1f2381b8fc07a9 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 #endif
 #include <mali_kbase.h>
 #include <mali_kbase_uku.h>
-#ifdef CONFIG_UMP
-#include <linux/ump.h>
-#endif                         /* CONFIG_UMP */
 #include <linux/random.h>
 #include <linux/version.h>
 #include <linux/ratelimit.h>
+#include <linux/pagemap.h>
 
 #include <mali_kbase_jm.h>
 #include <mali_kbase_hwaccess_jm.h>
-
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
 
 #define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
 
@@ -90,6 +85,10 @@ static int jd_run_atom(struct kbase_jd_atom *katom)
                return 0;
        } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
                /* Soft-job */
+               if (katom->will_fail_event_code) {
+                       katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+                       return 0;
+               }
                if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
                                                  == BASE_JD_REQ_SOFT_REPLAY) {
                        if (!kbase_replay_process(katom))
@@ -97,9 +96,6 @@ static int jd_run_atom(struct kbase_jd_atom *katom)
                } else if (kbase_process_soft_job(katom) == 0) {
                        kbase_finish_soft_job(katom);
                        katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
-               } else {
-                       /* The job has not completed */
-                       list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs);
                }
                return 0;
        }
@@ -198,85 +194,6 @@ static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom)
 }
 #endif                         /* CONFIG_KDS */
 
-#ifdef CONFIG_DMA_SHARED_BUFFER
-static int kbase_jd_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg)
-{
-       struct sg_table *sgt;
-       struct scatterlist *s;
-       int i;
-       phys_addr_t *pa;
-       int err;
-       size_t count = 0;
-       struct kbase_mem_phy_alloc *alloc;
-
-       alloc = reg->gpu_alloc;
-
-       KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
-       KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
-       sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, DMA_BIDIRECTIONAL);
-
-       if (IS_ERR_OR_NULL(sgt))
-               return -EINVAL;
-
-       /* save for later */
-       alloc->imported.umm.sgt = sgt;
-
-       pa = kbase_get_gpu_phy_pages(reg);
-       KBASE_DEBUG_ASSERT(pa);
-
-       for_each_sg(sgt->sgl, s, sgt->nents, i) {
-               int j;
-               size_t pages = PFN_UP(sg_dma_len(s));
-
-               WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
-               "sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
-               sg_dma_len(s));
-
-               WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
-               "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
-               (unsigned long long) sg_dma_address(s));
-
-               for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++)
-                       *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT);
-               WARN_ONCE(j < pages,
-               "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
-               alloc->imported.umm.dma_buf->size);
-       }
-
-       if (WARN_ONCE(count < reg->nr_pages,
-                       "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
-                       alloc->imported.umm.dma_buf->size)) {
-               err = -EINVAL;
-               goto out;
-       }
-
-       /* Update nents as we now have pages to map */
-       alloc->nents = count;
-
-       err = kbase_mmu_insert_pages(kctx, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
-
-out:
-       if (err) {
-               dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
-               alloc->imported.umm.sgt = NULL;
-       }
-
-       return err;
-}
-
-static void kbase_jd_umm_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc)
-{
-       KBASE_DEBUG_ASSERT(kctx);
-       KBASE_DEBUG_ASSERT(alloc);
-       KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
-       KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
-       dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
-           alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
-       alloc->imported.umm.sgt = NULL;
-       alloc->nents = 0;
-}
-#endif                         /* CONFIG_DMA_SHARED_BUFFER */
-
 void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
 {
 #ifdef CONFIG_KDS
@@ -316,31 +233,13 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
 
                res_no = katom->nr_extres;
                while (res_no-- > 0) {
-                       struct kbase_mem_phy_alloc *alloc;
-
-                       alloc = katom->extres[res_no].alloc;
-#ifdef CONFIG_DMA_SHARED_BUFFER
-                       if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
-                               alloc->imported.umm.current_mapping_usage_count--;
-
-                               if (0 == alloc->imported.umm.current_mapping_usage_count) {
-                                       struct kbase_va_region *reg;
-
-                                       reg = kbase_region_tracker_find_region_base_address(
-                                                       katom->kctx,
-                                                       katom->extres[res_no].gpu_address);
-
-                                       if (reg && reg->gpu_alloc == alloc)
-                                               kbase_mmu_teardown_pages(
-                                                               katom->kctx,
-                                                               reg->start_pfn,
-                                                               kbase_reg_current_backed_size(reg));
+                       struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+                       struct kbase_va_region *reg;
 
-                                       kbase_jd_umm_unmap(katom->kctx, alloc);
-                               }
-                       }
-#endif /* CONFIG_DMA_SHARED_BUFFER */
-                       kbase_mem_phy_alloc_put(alloc);
+                       reg = kbase_region_tracker_find_region_base_address(
+                                       katom->kctx,
+                                       katom->extres[res_no].gpu_address);
+                       kbase_unmap_external_resource(katom->kctx, reg, alloc);
                }
                kfree(katom->extres);
                katom->extres = NULL;
@@ -348,24 +247,6 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
        kbase_gpu_vm_unlock(katom->kctx);
 }
 
-#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS)
-static void add_kds_resource(struct kds_resource *kds_res, struct kds_resource **kds_resources, u32 *kds_res_count, unsigned long *kds_access_bitmap, bool exclusive)
-{
-       u32 i;
-
-       for (i = 0; i < *kds_res_count; i++) {
-               /* Duplicate resource, ignore */
-               if (kds_resources[i] == kds_res)
-                       return;
-       }
-
-       kds_resources[*kds_res_count] = kds_res;
-       if (exclusive)
-               set_bit(*kds_res_count, kds_access_bitmap);
-       (*kds_res_count)++;
-}
-#endif
-
 /*
  * Set up external resources needed by this job.
  *
@@ -430,14 +311,22 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
        }
 #endif                         /* CONFIG_KDS */
 
+       /* Take the processes mmap lock */
+       down_read(&current->mm->mmap_sem);
+
        /* need to keep the GPU VM locked while we set up UMM buffers */
        kbase_gpu_vm_lock(katom->kctx);
        for (res_no = 0; res_no < katom->nr_extres; res_no++) {
                struct base_external_resource *res;
                struct kbase_va_region *reg;
+               struct kbase_mem_phy_alloc *alloc;
+               bool exclusive;
 
                res = &input_extres[res_no];
-               reg = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+               exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
+                               ? true : false;
+               reg = kbase_region_tracker_find_region_enclosing_address(
+                               katom->kctx,
                                res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
                /* did we find a matching region object? */
                if (NULL == reg || (reg->flags & KBASE_REG_FREE)) {
@@ -448,56 +337,17 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
                if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
                                (reg->flags & KBASE_REG_SECURE)) {
                        katom->atom_flags |= KBASE_KATOM_FLAG_SECURE;
-                       if ((katom->core_req & BASE_JD_REQ_FS) == 0) {
-                               WARN_RATELIMIT(1, "Secure non-fragment jobs not supported");
-                               goto failed_loop;
-                       }
                }
 
-               /* decide what needs to happen for this resource */
-               switch (reg->gpu_alloc->type) {
-               case BASE_MEM_IMPORT_TYPE_UMP:
-                       {
-#if defined(CONFIG_KDS) && defined(CONFIG_UMP)
-                               struct kds_resource *kds_res;
-
-                               kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle);
-                               if (kds_res)
-                                       add_kds_resource(kds_res, kds_resources, &kds_res_count,
-                                                       kds_access_bitmap,
-                                                       res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE);
-#endif                         /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */
-                               break;
-                       }
-#ifdef CONFIG_DMA_SHARED_BUFFER
-               case BASE_MEM_IMPORT_TYPE_UMM:
-                       {
-#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
-                               struct kds_resource *kds_res;
-
-                               kds_res = get_dma_buf_kds_resource(reg->gpu_alloc->imported.umm.dma_buf);
-                               if (kds_res)
-                                       add_kds_resource(kds_res, kds_resources, &kds_res_count, kds_access_bitmap, res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE);
-#endif
-                               reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
-                               if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
-                                       /* use a local variable to not pollute err_ret_val
-                                        * with a potential success value as some other gotos depend
-                                        * on the default error code stored in err_ret_val */
-                                       int tmp;
-
-                                       tmp = kbase_jd_umm_map(katom->kctx, reg);
-                                       if (tmp) {
-                                               /* failed to map this buffer, roll back */
-                                               err_ret_val = tmp;
-                                               reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
-                                               goto failed_loop;
-                                       }
-                               }
-                               break;
-                       }
+               alloc = kbase_map_external_resource(katom->kctx, reg,
+                               current->mm
+#ifdef CONFIG_KDS
+                               , &kds_res_count, kds_resources,
+                               kds_access_bitmap, exclusive
 #endif
-               default:
+                               );
+               if (!alloc) {
+                       err_ret_val = -EINVAL;
                        goto failed_loop;
                }
 
@@ -508,12 +358,15 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
                 * until the last read for an element.
                 * */
                katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
-               katom->extres[res_no].alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+               katom->extres[res_no].alloc = alloc;
        }
        /* successfully parsed the extres array */
        /* drop the vm lock before we call into kds */
        kbase_gpu_vm_unlock(katom->kctx);
 
+       /* Release the processes mmap lock */
+       up_read(&current->mm->mmap_sem);
+
 #ifdef CONFIG_KDS
        if (kds_res_count) {
                int wait_failed;
@@ -545,6 +398,8 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
 
 #ifdef CONFIG_KDS
  failed_kds_setup:
+       /* Lock the processes mmap lock */
+       down_read(&current->mm->mmap_sem);
 
        /* lock before we unmap */
        kbase_gpu_vm_lock(katom->kctx);
@@ -554,30 +409,14 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
        /* undo the loop work */
        while (res_no-- > 0) {
                struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
-#ifdef CONFIG_DMA_SHARED_BUFFER
-               if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
-                       alloc->imported.umm.current_mapping_usage_count--;
 
-                       if (0 == alloc->imported.umm.current_mapping_usage_count) {
-                               struct kbase_va_region *reg;
-
-                               reg = kbase_region_tracker_find_region_base_address(
-                                               katom->kctx,
-                                               katom->extres[res_no].gpu_address);
-
-                               if (reg && reg->gpu_alloc == alloc)
-                                       kbase_mmu_teardown_pages(katom->kctx,
-                                                       reg->start_pfn,
-                                                       kbase_reg_current_backed_size(reg));
-
-                               kbase_jd_umm_unmap(katom->kctx, alloc);
-                       }
-               }
-#endif                         /* CONFIG_DMA_SHARED_BUFFER */
-               kbase_mem_phy_alloc_put(alloc);
+               kbase_unmap_external_resource(katom->kctx, NULL, alloc);
        }
        kbase_gpu_vm_unlock(katom->kctx);
 
+       /* Release the processes mmap lock */
+       up_read(&current->mm->mmap_sem);
+
  early_err_out:
        kfree(katom->extres);
        katom->extres = NULL;
@@ -590,8 +429,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
 
 static inline void jd_resolve_dep(struct list_head *out_list,
                                        struct kbase_jd_atom *katom,
-                                       u8 d,
-                                       bool ctx_is_dying)
+                                       u8 d)
 {
        u8 other_d = !d;
 
@@ -608,12 +446,7 @@ static inline void jd_resolve_dep(struct list_head *out_list,
                kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
 
                if (katom->event_code != BASE_JD_EVENT_DONE &&
-                       (dep_type != BASE_JD_DEP_TYPE_ORDER || ctx_is_dying)) {
-                       /* Atom failed, so remove the other dependencies and immediately fail the atom */
-                       if (kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) {
-                               list_del(&dep_atom->dep_item[other_d]);
-                               kbase_jd_katom_dep_clear(&dep_atom->dep[other_d]);
-                       }
+                       (dep_type != BASE_JD_DEP_TYPE_ORDER)) {
 #ifdef CONFIG_KDS
                        if (!dep_atom->kds_dep_satisfied) {
                                /* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and
@@ -626,10 +459,17 @@ static inline void jd_resolve_dep(struct list_head *out_list,
                        dep_atom->event_code = katom->event_code;
                        KBASE_DEBUG_ASSERT(dep_atom->status !=
                                                KBASE_JD_ATOM_STATE_UNUSED);
-                       dep_atom->status = KBASE_JD_ATOM_STATE_COMPLETED;
 
-                       list_add_tail(&dep_atom->dep_item[0], out_list);
-               } else if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) {
+                       if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY)
+                                       != BASE_JD_REQ_SOFT_REPLAY) {
+                               dep_atom->will_fail_event_code =
+                                       dep_atom->event_code;
+                       } else {
+                               dep_atom->status =
+                                       KBASE_JD_ATOM_STATE_COMPLETED;
+                       }
+               }
+               if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) {
 #ifdef CONFIG_KDS
                        if (dep_atom->kds_dep_satisfied)
 #endif
@@ -702,7 +542,6 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 {
        struct kbase_context *kctx = katom->kctx;
        struct kbase_device *kbdev = kctx->kbdev;
-       struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
        struct list_head completed_jobs;
        struct list_head runnable_jobs;
        bool need_to_try_schedule_context = false;
@@ -717,7 +556,6 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
        jd_check_force_failure(katom);
 #endif
 
-
        /* This is needed in case an atom is failed due to being invalid, this
         * can happen *before* the jobs that the atom depends on have completed */
        for (i = 0; i < 2; i++) {
@@ -753,8 +591,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
                KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
 
                for (i = 0; i < 2; i++)
-                       jd_resolve_dep(&runnable_jobs, katom, i,
-                                               js_kctx_info->ctx.is_dying);
+                       jd_resolve_dep(&runnable_jobs, katom, i);
 
                if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
                        kbase_jd_post_external_resources(katom);
@@ -769,7 +606,8 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 
                        KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
 
-                       if (node->status != KBASE_JD_ATOM_STATE_COMPLETED) {
+                       if (node->status != KBASE_JD_ATOM_STATE_COMPLETED &&
+                                       !kctx->jctx.sched_info.ctx.is_dying) {
                                need_to_try_schedule_context |= jd_run_atom(node);
                        } else {
                                node->event_code = katom->event_code;
@@ -794,6 +632,12 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
                                list_add_tail(&node->dep_item[0], &completed_jobs);
                }
 
+               /* Completing an atom might have freed up space
+                * in the ringbuffer, but only on that slot. */
+               jsctx_ll_flush_to_rb(kctx,
+                               katom->sched_priority,
+                               katom->slot_nr);
+
                /* Register a completed job as a disjoint event when the GPU
                 * is in a disjoint state (ie. being reset or replaying jobs).
                 */
@@ -881,6 +725,7 @@ bool jd_submit_atom(struct kbase_context *kctx,
        int i;
        int sched_prio;
        bool ret;
+       bool will_fail = false;
 
        /* Update the TOTAL number of jobs. This includes those not tracked by
         * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
@@ -904,6 +749,7 @@ bool jd_submit_atom(struct kbase_context *kctx,
        katom->need_cache_flush_cores_retained = 0;
        katom->x_pre_dep = NULL;
        katom->x_post_dep = NULL;
+       katom->will_fail_event_code = 0;
 #ifdef CONFIG_KDS
        /* Start by assuming that the KDS dependencies are satisfied,
         * kbase_jd_pre_external_resources will correct this if there are dependencies */
@@ -925,13 +771,16 @@ bool jd_submit_atom(struct kbase_context *kctx,
                                        dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
                                katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
                                katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
-#if defined(CONFIG_MALI_MIPE_ENABLED)
+
+                               /* Wrong dependency setup. Atom will be sent
+                                * back to user space. Do not record any
+                                * dependencies. */
                                kbase_tlstream_tl_new_atom(
                                                katom,
                                                kbase_jd_atom_id(kctx, katom));
                                kbase_tlstream_tl_ret_atom_ctx(
                                                katom, kctx);
-#endif
+
                                ret = jd_done_nolock(katom, NULL);
                                goto out;
                        }
@@ -956,7 +805,7 @@ bool jd_submit_atom(struct kbase_context *kctx,
                        if (dep_atom->event_code == BASE_JD_EVENT_DONE)
                                continue;
                        /* don't stop this atom if it has an order dependency
-                        * only to the failed one, try to submit it throught
+                        * only to the failed one, try to submit it through
                         * the normal path
                         */
                        if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER &&
@@ -964,21 +813,18 @@ bool jd_submit_atom(struct kbase_context *kctx,
                                continue;
                        }
 
-                       if (i == 1 && kbase_jd_katom_dep_atom(&katom->dep[0])) {
-                               /* Remove the previous dependency */
-                               list_del(&katom->dep_item[0]);
-                               kbase_jd_katom_dep_clear(&katom->dep[0]);
-                       }
-
                        /* Atom has completed, propagate the error code if any */
                        katom->event_code = dep_atom->event_code;
                        katom->status = KBASE_JD_ATOM_STATE_QUEUED;
-#if defined(CONFIG_MALI_MIPE_ENABLED)
+
+                       /* This atom is going through soft replay or
+                        * will be sent back to user space. Do not record any
+                        * dependencies. */
                        kbase_tlstream_tl_new_atom(
                                        katom,
                                        kbase_jd_atom_id(kctx, katom));
                        kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
-#endif
+
                        if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
                                         == BASE_JD_REQ_SOFT_REPLAY) {
                                if (kbase_replay_process(katom)) {
@@ -986,9 +832,8 @@ bool jd_submit_atom(struct kbase_context *kctx,
                                        goto out;
                                }
                        }
-                       ret = jd_done_nolock(katom, NULL);
+                       will_fail = true;
 
-                       goto out;
                } else {
                        /* Atom is in progress, add this atom to the list */
                        list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
@@ -997,17 +842,37 @@ bool jd_submit_atom(struct kbase_context *kctx,
                }
        }
 
-       /* These must occur after the above loop to ensure that an atom that
-        * depends on a previous atom with the same number behaves as expected */
-       katom->event_code = BASE_JD_EVENT_DONE;
-       katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+       if (will_fail) {
+               if (!queued) {
+                       ret = jd_done_nolock(katom, NULL);
+
+                       goto out;
+               } else {
+                       katom->will_fail_event_code = katom->event_code;
+                       ret = false;
 
-#if defined(CONFIG_MALI_MIPE_ENABLED)
+                       goto out;
+               }
+       } else {
+               /* These must occur after the above loop to ensure that an atom
+                * that depends on a previous atom with the same number behaves
+                * as expected */
+               katom->event_code = BASE_JD_EVENT_DONE;
+               katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+       }
+
+       /* Create a new atom recording all dependencies it was set up with. */
        kbase_tlstream_tl_new_atom(
                        katom,
                        kbase_jd_atom_id(kctx, katom));
        kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
-#endif
+       for (i = 0; i < 2; i++)
+               if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type(
+                                       &katom->dep[i]))
+                       kbase_tlstream_tl_dep_atom_atom(
+                                       (void *)kbase_jd_katom_dep_atom(
+                                               &katom->dep[i]),
+                                       (void *)katom);
 
        /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
        if (!katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
@@ -1068,7 +933,8 @@ bool jd_submit_atom(struct kbase_context *kctx,
 
 #ifdef CONFIG_GPU_TRACEPOINTS
        katom->work_id = atomic_inc_return(&jctx->work_id);
-       trace_gpu_job_enqueue((u32)kctx, katom->work_id, kbasep_map_core_reqs_to_string(katom->core_req));
+       trace_gpu_job_enqueue((u32)kctx->id, katom->work_id,
+                       kbasep_map_core_reqs_to_string(katom->core_req));
 #endif
 
        if (queued && !IS_GPU_ATOM(katom)) {
@@ -1097,8 +963,7 @@ bool jd_submit_atom(struct kbase_context *kctx,
                        ret = jd_done_nolock(katom, NULL);
                        goto out;
                }
-               /* The job has not yet completed */
-               list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs);
+
                ret = false;
        } else if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
                katom->status = KBASE_JD_ATOM_STATE_IN_JS;
@@ -1130,6 +995,7 @@ int kbase_jd_submit(struct kbase_context *kctx,
        bool need_to_try_schedule_context = false;
        struct kbase_device *kbdev;
        void __user *user_addr;
+       u32 latest_flush;
 
        /*
         * kbase_jd_submit isn't expected to fail and so all errors with the jobs
@@ -1159,6 +1025,9 @@ int kbase_jd_submit(struct kbase_context *kctx,
 
        KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(submit_data->nr_atoms, &kctx->timeline.jd_atoms_in_flight));
 
+       /* All atoms submitted in this call have the same flush ID */
+       latest_flush = kbase_backend_get_current_flush_id(kbdev);
+
        for (i = 0; i < submit_data->nr_atoms; i++) {
                struct base_jd_atom_v2 user_atom;
                struct kbase_jd_atom *katom;
@@ -1234,6 +1103,9 @@ while (false)
 #endif
                katom = &jctx->atoms[user_atom.atom_number];
 
+               /* Record the flush ID for the cache flush optimisation */
+               katom->flush_id = latest_flush;
+
                while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
                        /* Atom number is already in use, wait for the atom to
                         * complete
@@ -1329,7 +1201,6 @@ void kbase_jd_done_worker(struct work_struct *data)
 
                mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
                mutex_unlock(&js_devdata->queue_mutex);
-               mutex_unlock(&jctx->lock);
 
                spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
@@ -1337,6 +1208,7 @@ void kbase_jd_done_worker(struct work_struct *data)
                kbase_js_unpull(kctx, katom);
 
                spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+               mutex_unlock(&jctx->lock);
 
                return;
        }
@@ -1521,51 +1393,6 @@ static void jd_cancel_worker(struct work_struct *data)
                kbase_js_sched_all(kbdev);
 }
 
-/**
- * jd_evict_worker - Work queue job evict function
- * @data: a &struct work_struct
- *
- * Only called as part of evicting failed jobs. This is only called on jobs that
- * were never submitted to HW Access. Jobs that were submitted are handled
- * through kbase_jd_done_worker().
- * Operates serially with the kbase_jd_done_worker() on the work queue.
- *
- * We don't need to release most of the resources that would occur on
- * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be
- * running (by virtue of having not been submitted to HW Access).
- */
-static void jd_evict_worker(struct work_struct *data)
-{
-       struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
-                                                                       work);
-       struct kbase_jd_context *jctx;
-       struct kbase_context *kctx;
-       struct kbasep_js_kctx_info *js_kctx_info;
-       struct kbase_device *kbdev;
-
-       /* Soft jobs should never reach this function */
-       KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
-
-       kctx = katom->kctx;
-       kbdev = kctx->kbdev;
-       jctx = &kctx->jctx;
-       js_kctx_info = &kctx->jctx.sched_info;
-
-       KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
-
-       /* Scheduler: Remove the job from the system */
-       mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
-       kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
-       mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
-
-       mutex_lock(&jctx->lock);
-       jd_done_nolock(katom, NULL);
-       /* katom may have been freed now, do not use! */
-       mutex_unlock(&jctx->lock);
-
-       kbase_js_sched_all(kbdev);
-}
-
 /**
  * kbase_jd_done - Complete a job that has been removed from the Hardware
  * @katom: atom which has been completed
@@ -1609,7 +1436,8 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
 
 #ifdef CONFIG_DEBUG_FS
        /* a failed job happened and is waiting for dumping*/
-       if (kbase_debug_job_fault_process(katom, katom->event_code))
+       if (!katom->will_fail_event_code &&
+                       kbase_debug_job_fault_process(katom, katom->event_code))
                return;
 #endif
 
@@ -1647,30 +1475,6 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
        queue_work(kctx->jctx.job_done_wq, &katom->work);
 }
 
-void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
-{
-       struct kbase_context *kctx;
-       struct kbasep_js_kctx_info *js_kctx_info;
-
-       KBASE_DEBUG_ASSERT(NULL != kbdev);
-       KBASE_DEBUG_ASSERT(NULL != katom);
-       kctx = katom->kctx;
-       KBASE_DEBUG_ASSERT(NULL != kctx);
-
-       js_kctx_info = &kctx->jctx.sched_info;
-
-       KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
-
-       /* This should only be done from a context that is currently scheduled
-        */
-       KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
-
-       WARN_ON(work_pending(&katom->work));
-
-       KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
-       INIT_WORK(&katom->work, jd_evict_worker);
-       queue_work(kctx->jctx.job_done_wq, &katom->work);
-}
 
 void kbase_jd_zap_context(struct kbase_context *kctx)
 {
@@ -1693,6 +1497,7 @@ void kbase_jd_zap_context(struct kbase_context *kctx)
         * queued outside the job scheduler.
         */
 
+       hrtimer_cancel(&kctx->soft_event_timeout);
        list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
                katom = list_entry(entry, struct kbase_jd_atom, dep_item[0]);
                kbase_cancel_soft_job(katom);
index b37f280a6475a0a7940a746f2d0f17ed74bd435b..0cf75f59c282e77ad9d6a37bc863e7654c9e6181 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -17,6 +17,8 @@
 
 #include <linux/seq_file.h>
 
+#include <mali_kbase.h>
+
 #include <mali_kbase_jd_debugfs.h>
 
 #ifdef CONFIG_DEBUG_FS
@@ -41,6 +43,13 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
 
        KBASE_DEBUG_ASSERT(kctx != NULL);
 
+       /* Print version */
+       seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION);
+
+       /* Print U/K API version */
+       seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR,
+                       BASE_UK_VERSION_MINOR);
+
        /* Print table heading */
        seq_puts(sfile, "atom id,core reqs,status,coreref status,predeps,start time,time on gpu\n");
 
index 703e4cf6a5f4e27ea0c9d3abba39ba0704da9e45..bc1878f60e8e0a09cf058447ccf0210e67a90300 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,6 +27,8 @@
 
 #include <mali_kbase.h>
 
+#define MALI_JD_DEBUGFS_VERSION 1
+
 /**
  * kbasep_jd_debugfs_ctx_add() - Add debugfs entries for JD system
  *
index 54b8d9bcd1e19c029544356e02b1f143ad8a4ea7..83228c05883bb1f8ce8a044d1fa1f3bc9dcb91b7 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,9 +25,7 @@
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 #include <mali_kbase_gator.h>
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
 #include <mali_kbase_hw.h>
 
 #include <mali_kbase_defs.h>
@@ -79,13 +77,6 @@ static int kbase_js_get_slot(struct kbase_device *kbdev,
 static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
                kbasep_js_policy_ctx_job_cb callback);
 
-static bool kbase_js_evict_atom(struct kbase_context *kctx,
-                               struct kbase_jd_atom *katom_evict,
-                               struct kbase_jd_atom *start_katom,
-                               struct kbase_jd_atom *head_katom,
-                               struct list_head *evict_list,
-                               struct jsctx_rb *rb, int idx);
-
 /* Helper for trace subcodes */
 #if KBASE_TRACE_ENABLE
 static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
@@ -239,25 +230,31 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
        return result;
 }
 
-/**
- * jsctx_rb_is_empty_prio(): - Check if ring buffer is empty
- * @kctx: Pointer to kbase context with ring buffer.
- * @js:   Job slot id to check.
- * @prio: Priority to check.
- *
- * Caller must hold runpool_irq.lock
- *
- * Return: true if the ring buffer is empty, false otherwise.
- */
-static inline bool
-jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio)
-{
-       struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+/* Helper macros to access and modify jsctx_queue.indicies */
+#define JSCTX_GET(offset, var, mask) \
+       ((var >> offset) & mask)
 
-       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+/* This wraps around to correct integer size automatically. */
+#define JSCTX_SET(var, offset, value, mask) \
+       (var = ((var & ~(mask << offset)) /*Clear old bits */ \
+       | (((value) & mask) << offset))) /* Set (after masking) new bits */
 
-       return rb->running_idx == rb->write_idx;
-}
+#define JSCTX_GET_WR_IDX(var) \
+       JSCTX_GET(JSCTX_WR_OFFSET, var, JSCTX_RB_MASK_STORE)
+#define JSCTX_GET_RN_IDX(var) \
+       JSCTX_GET(JSCTX_RN_OFFSET, var, JSCTX_RB_MASK_STORE)
+#define JSCTX_GET_RD_IDX(var) \
+       JSCTX_GET(JSCTX_RD_OFFSET, var, JSCTX_RB_MASK_STORE)
+
+#define JSCTX_GET_IDX_DIFF(lower, upper) \
+       ((upper >= lower) ? (upper - lower) : (upper+JSCTX_RB_SIZE_STORE-lower))
+
+#define JSCTX_SET_WR_IDX(var, value) \
+       JSCTX_SET(var, JSCTX_WR_OFFSET, value, JSCTX_RB_MASK_STORE)
+#define JSCTX_SET_RN_IDX(var, value) \
+       JSCTX_SET(var, JSCTX_RN_OFFSET, value, JSCTX_RB_MASK_STORE)
+#define JSCTX_SET_RD_IDX(var, value) \
+       JSCTX_SET(var, JSCTX_RD_OFFSET, value, JSCTX_RB_MASK_STORE)
 
 /**
  * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms
@@ -270,18 +267,15 @@ jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio)
  * ring buffer to be full (with running atoms) when this functions returns
  * true.
  *
- * Caller must hold runpool_irq.lock
- *
  * Return: true if there are no atoms to pull, false otherwise.
  */
 static inline bool
 jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio)
 {
-       struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
-
-       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+       struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+       unsigned int var = atomic_read(&rb->indicies);
 
-       return rb->read_idx == rb->write_idx;
+       return JSCTX_GET_RD_IDX(var) == JSCTX_GET_WR_IDX(var);
 }
 
 /**
@@ -311,55 +305,29 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js)
 }
 
 /**
- * jsctx_rb_compact_prio(): - Compact a ring buffer
- * @kctx: Pointer to kbase context with ring buffer.
- * @js:   Job slot id to compact.
- * @prio: Priority id to compact.
- */
-static inline void
-jsctx_rb_compact_prio(struct kbase_context *kctx, int js, int prio)
+  * jsctx_rb_is_full(): - Check if the given ringbuffer is full.
+  * @queue: Pointer to the queue containing the ringbuffer.
+  *
+  * No locks explicitly required, result will always be consistent.
+  * But depending on usage, the caller should consider jctx.lock,
+  * for the result to remain correct.
+  *
+  * Return: true if the ringbuffer is full, false otherwise.
+  */
+static inline bool
+jsctx_rb_is_full(struct jsctx_queue *queue)
 {
-       struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
-       u16 compact_idx = rb->write_idx - 1;
-       u16 end_idx = rb->running_idx - 1;
-       u16 i;
-
-       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
-       lockdep_assert_held(&kctx->jctx.lock);
-
-       for (i = compact_idx; i != end_idx; i--) {
-               if (rb->entries[i & JSCTX_RB_MASK].atom_id !=
-                               KBASEP_ATOM_ID_INVALID) {
-                       WARN_ON(compact_idx < rb->running_idx);
-                       rb->entries[compact_idx & JSCTX_RB_MASK].atom_id =
-                                       rb->entries[i & JSCTX_RB_MASK].atom_id;
-
-                       compact_idx--;
-               }
-               if (rb->read_idx == i)
-                       rb->read_idx = compact_idx + 1;
-       }
+       unsigned int var = atomic_read(&queue->indicies);
+       u16 rn_idx = JSCTX_GET_RN_IDX(var);
+       u16 wr_idx = JSCTX_GET_WR_IDX(var);
 
-       rb->running_idx = compact_idx + 1;
+       return JSCTX_GET_IDX_DIFF(rn_idx, wr_idx) >= JSCTX_RB_SIZE;
 }
 
-/**
- * jsctx_rb_compact(): - Compact all priority ring buffers
- * @kctx: Pointer to kbase context with ring buffer.
- * @js:   Job slot id to compact.
- */
-static inline void
-jsctx_rb_compact(struct kbase_context *kctx, int js)
-{
-       int prio;
-
-       for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
-               jsctx_rb_compact_prio(kctx, js, prio);
-}
 
 /**
- * jsctx_rb_foreach_prio(): - Execute callback for each entry in ring buffer
- * @kctx:     Pointer to kbase context with ring buffer.
+ * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue.
+ * @kctx:     Pointer to kbase context with the queue.
  * @js:       Job slot id to iterate.
  * @prio:     Priority id to iterate.
  * @callback: Function pointer to callback.
@@ -376,50 +344,64 @@ jsctx_rb_compact(struct kbase_context *kctx, int js)
  * calling this function.
  */
 static void
-jsctx_rb_foreach_prio(struct kbase_context *kctx, int js, int prio,
+jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio,
                kbasep_js_policy_ctx_job_cb callback)
 {
-       struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+       struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
        struct kbase_jd_atom *katom;
-       u16 write_idx = ACCESS_ONCE(rb->write_idx);
+
+       struct list_head *pos, *q;
+
+       unsigned int var = atomic_read(&queue->indicies);
+       u16 running_idx = JSCTX_GET_RN_IDX(var);
+       u16 read_idx = JSCTX_GET_RD_IDX(var);
+       u16 wr_idx = JSCTX_GET_WR_IDX(var);
+       u16 i;
+       const u16 count = JSCTX_GET_IDX_DIFF(running_idx, wr_idx);
 
        lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
        /* There must be no jobs currently in HW access */
-       WARN_ON(rb->read_idx != rb->running_idx);
+       WARN_ON(read_idx != JSCTX_GET_RN_IDX(var));
 
        /* Invoke callback on all kbase_jd_atoms in the ring buffer, and
         * removes them from the buffer */
-       while (rb->read_idx != write_idx) {
-               int id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id;
+       for (i = 0; i < count; i++) {
+               int id = queue->entries[read_idx & JSCTX_RB_MASK].atom_id;
 
                katom = kbase_jd_atom_from_id(kctx, id);
+               read_idx++;
+               callback(kctx->kbdev, katom);
+       }
+       atomic_set(&queue->indicies, 0);
 
-               rb->read_idx++;
-               rb->running_idx++;
+       list_for_each_safe(pos, q, &queue->queue_head) {
+               struct kbase_jd_atom *entry;
 
-               callback(kctx->kbdev, katom);
+               entry = list_entry(pos, struct kbase_jd_atom, queue);
+               list_del(pos);
+               callback(kctx->kbdev, entry);
        }
 }
 
 /**
- * jsctx_rb_foreach(): - Execute callback for each entry in all priority rb
- * @kctx:     Pointer to kbase context with ring buffer.
+ * jsctx_queue_foreach(): - Execute callback for each entry in every queue
+ * @kctx:     Pointer to kbase context with queue.
  * @js:       Job slot id to iterate.
  * @callback: Function pointer to callback.
  *
  * Iterate over all the different priorities, and for each call
- * jsctx_rb_foreach_prio() to iterate over the ring buffer and invoke @callback
- * for each entry in buffer, and remove the entry from the buffer.
+ * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback
+ * for each entry, and remove the entry from the queue.
  */
 static inline void
-jsctx_rb_foreach(struct kbase_context *kctx, int js,
+jsctx_queue_foreach(struct kbase_context *kctx, int js,
                kbasep_js_policy_ctx_job_cb callback)
 {
        int prio;
 
        for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
-               jsctx_rb_foreach_prio(kctx, js, prio, callback);
+               jsctx_queue_foreach_prio(kctx, js, prio, callback);
 }
 
 /**
@@ -436,15 +418,16 @@ jsctx_rb_foreach(struct kbase_context *kctx, int js,
 static inline struct kbase_jd_atom *
 jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
 {
-       struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+       struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
        int id;
+       unsigned int var = atomic_read(&rb->indicies);
 
        lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
-       if (jsctx_rb_none_to_pull_prio(kctx, js, prio))
+       if (JSCTX_GET_RD_IDX(var) == JSCTX_GET_WR_IDX(var))
                return NULL;
 
-       id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id;
+       id = rb->entries[JSCTX_GET_RD_IDX(var) & JSCTX_RB_MASK].atom_id;
        return kbase_jd_atom_from_id(kctx, id);
 }
 
@@ -457,6 +440,8 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
  * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
  * pointer to the next atom, unless all the priority's ring buffers are empty.
  *
+ * Caller must hold the runpool_irq.lock.
+ *
  * Return: Pointer to next atom in buffer, or NULL if there is no atom.
  */
 static inline struct kbase_jd_atom *
@@ -464,6 +449,8 @@ jsctx_rb_peek(struct kbase_context *kctx, int js)
 {
        int prio;
 
+       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
        for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
                struct kbase_jd_atom *katom;
 
@@ -491,16 +478,21 @@ jsctx_rb_peek(struct kbase_context *kctx, int js)
 static inline struct kbase_jd_atom *
 jsctx_rb_peek_last(struct kbase_context *kctx, int js, int prio)
 {
-       struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+       struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+       unsigned int var = atomic_read(&rb->indicies);
        int id;
 
-       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
        lockdep_assert_held(&kctx->jctx.lock);
 
-       if (jsctx_rb_is_empty_prio(kctx, js, prio))
+       if (!list_empty(&rb->queue_head)) {
+               return list_entry(rb->queue_head.prev,
+                               struct kbase_jd_atom, queue);
+       }
+
+       if (JSCTX_GET_RN_IDX(var) == JSCTX_GET_WR_IDX(var))
                return NULL;
 
-       id = rb->entries[(rb->write_idx - 1) & JSCTX_RB_MASK].atom_id;
+       id = rb->entries[(JSCTX_GET_WR_IDX(var) - 1) & JSCTX_RB_MASK].atom_id;
        return kbase_jd_atom_from_id(kctx, id);
 }
 
@@ -518,14 +510,23 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
        int prio = katom->sched_priority;
        int js = katom->slot_nr;
-       struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+       unsigned int oldvar, var;
+       struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
 
        lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
        /* Atoms must be pulled in the correct order. */
        WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
 
-       rb->read_idx++;
+       do {
+               u16 rd_idx;
+
+               oldvar = atomic_read(&rb->indicies);
+               var = oldvar;
+               rd_idx = JSCTX_GET_RD_IDX(var);
+
+               JSCTX_SET_RD_IDX(var, rd_idx+1);
+       } while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar);
 }
 
 /**
@@ -543,15 +544,27 @@ jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
        int prio = katom->sched_priority;
        int js = katom->slot_nr;
-       struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+       unsigned int oldvar, var;
+       struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+
 
        lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
-       /* Atoms must be unpulled in correct order. */
-       WARN_ON(rb->entries[(rb->read_idx - 1) & JSCTX_RB_MASK].atom_id !=
-                       kbase_jd_atom_id(kctx, katom));
+       do {
+               u16 rd_idx;
 
-       rb->read_idx--;
+               oldvar = atomic_read(&rb->indicies);
+               var = oldvar;
+
+
+               rd_idx = JSCTX_GET_RD_IDX(var)-1;
+
+               /* Atoms must be unpulled in correct order. */
+               WARN_ON(rb->entries[rd_idx & JSCTX_RB_MASK].atom_id !=
+                               kbase_jd_atom_id(kctx, katom));
+
+               JSCTX_SET_RD_IDX(var, rd_idx);
+       } while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar);
 }
 
 /**
@@ -571,18 +584,36 @@ jsctx_rb_add_atom(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
        int prio = katom->sched_priority;
        int js = katom->slot_nr;
-       struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+       struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
+       unsigned int oldvar, var;
+       u16 wr_idx, running_idx, count;
 
        lockdep_assert_held(&kctx->jctx.lock);
 
+       oldvar = atomic_read(&rb->indicies);
+       var = oldvar;
+
+       running_idx = JSCTX_GET_RN_IDX(var);
+       wr_idx = JSCTX_GET_WR_IDX(var);
+       count = JSCTX_GET_IDX_DIFF(running_idx, wr_idx);
+
        /* Check if the ring buffer is full */
-       if ((rb->write_idx - rb->running_idx) >= JSCTX_RB_SIZE)
+       if (count >= JSCTX_RB_SIZE)
                return -EBUSY;
 
-       rb->entries[rb->write_idx & JSCTX_RB_MASK].atom_id =
-                       kbase_jd_atom_id(kctx, katom);
-       rb->write_idx++;
+       rb->entries[wr_idx & JSCTX_RB_MASK].atom_id =
+               kbase_jd_atom_id(kctx, katom);
+
+       wr_idx++;
+       JSCTX_SET_WR_IDX(var, wr_idx);
 
+       while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar) {
+               oldvar = atomic_read(&rb->indicies);
+               var = oldvar;
+               wr_idx = JSCTX_GET_WR_IDX(var)+1;
+
+               JSCTX_SET_WR_IDX(var, wr_idx);
+       }
        return 0;
 }
 
@@ -602,73 +633,129 @@ jsctx_rb_remove(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
        int prio = katom->sched_priority;
        int js = katom->slot_nr;
-       struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+       unsigned int oldvar, var;
+       struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js];
 
-       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kctx->jctx.lock);
 
-       /* Atoms must be completed in order. */
-       WARN_ON(rb->entries[rb->running_idx & JSCTX_RB_MASK].atom_id !=
-                       kbase_jd_atom_id(kctx, katom));
 
-       rb->running_idx++;
+       do {
+               unsigned int rn_idx;
+
+               oldvar = atomic_read(&rb->indicies);
+               var = oldvar;
+
+               rn_idx = JSCTX_GET_RN_IDX(var);
+
+               JSCTX_SET_RN_IDX(var, rn_idx+1);
+       } while (atomic_cmpxchg(&rb->indicies, oldvar, var) != oldvar);
 }
 
-/**
- * jsctx_rb_evict(): - Evict atom, and dependents, from ring buffer
- * @kctx:        Pointer to kbase context with ring buffer.
- * @start_katom: Pointer to the first katom to evict.
- * @head_katom:  Pointer to head katom.
- * @evict_list:  Pointer to head of list where evicted atoms are added.
- *
- * Iterate over the ring buffer starting at @start_katom and evict @start_atom
- * and dependent atoms in ring buffer.
- *
- * @evict_list and @head_katom is passed on to kbase_js_evict_atom() which will
- * examine the atom dependencies.
- *
- * jsctx_rb_evict() is only called by kbase_js_evict_deps().
- */
+
 static void
-jsctx_rb_evict(struct kbase_context *kctx,
-               struct kbase_jd_atom *start_katom,
-               struct kbase_jd_atom *head_katom,
-               struct list_head *evict_list)
+jsctx_ll_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
-       int prio = start_katom->sched_priority;
-       int js = start_katom->slot_nr;
-       struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
-       bool atom_in_rb = false;
-       u16 i, start_idx;
+       int prio = katom->sched_priority;
+       int js = katom->slot_nr;
+       struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
 
-       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
        lockdep_assert_held(&kctx->jctx.lock);
 
-       for (i = rb->running_idx; i != rb->write_idx; i++) {
-               if (rb->entries[i & JSCTX_RB_MASK].atom_id ==
-                               kbase_jd_atom_id(kctx, start_katom)) {
-                       start_idx = i;
-                       atom_in_rb = true;
+       list_add_tail(&katom->queue, &queue->queue_head);
+}
+
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx,
+                                       int js,
+                                       bool is_scheduled);
+static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev,
+                                               struct kbase_context *kctx,
+                                               int js);
+static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev,
+                                               struct kbase_context *kctx,
+                                               int js);
+
+void
+jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js)
+{
+       unsigned long flags;
+       struct list_head *pos, *q;
+       struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
+       bool flushed_any = false;
+       struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+       bool enqueue_required = false;
+
+       lockdep_assert_held(&kctx->jctx.lock);
+
+
+       /* Early out for common case */
+       if (list_empty(&queue->queue_head) || jsctx_rb_is_full(queue))
+               return;
+
+
+       mutex_lock(&js_devdata->queue_mutex);
+       mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+       mutex_lock(&js_devdata->runpool_mutex);
+
+
+       spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, flags);
+       /* If slot will transition from unpullable to pullable then add to
+        * pullable list */
+       if (jsctx_rb_none_to_pull(kctx, js))
+               enqueue_required = true;
+       else
+               enqueue_required = false;
+
+       list_for_each_safe(pos, q, &queue->queue_head) {
+               struct kbase_jd_atom *katom;
+
+               katom = list_entry(pos, struct kbase_jd_atom, queue);
+
+               KBASE_DEBUG_ASSERT(katom);
+
+               if (jsctx_rb_add_atom(kctx, katom))
                        break;
-               }
+
+               katom->atom_flags &= ~KBASE_KATOM_FLAG_JSCTX_IN_LL;
+               katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED;
+               flushed_any = true;
+
+               list_del(pos);
        }
 
-       /* start_katom must still be in ring buffer. */
-       if (i == rb->write_idx || !atom_in_rb)
-               return;
 
-       /* Evict all dependencies on same slot. */
-       for (i = start_idx; i != rb->write_idx; i++) {
-               u8 katom_evict;
+       if (flushed_any) {
+               bool timer_sync = false;
 
-               katom_evict = rb->entries[i & JSCTX_RB_MASK].atom_id;
-               if (katom_evict != KBASEP_ATOM_ID_INVALID) {
-                       if (!kbase_js_evict_atom(kctx,
-                                               &kctx->jctx.atoms[katom_evict],
-                                               start_katom, head_katom,
-                                               evict_list, rb, i))
-                               break;
+               if (enqueue_required) {
+                       if (kbase_js_ctx_pullable(kctx, js, false))
+                               timer_sync = kbase_js_ctx_list_add_pullable(
+                                               kctx->kbdev, kctx, js);
+                       else
+                               timer_sync = kbase_js_ctx_list_add_unpullable(
+                                               kctx->kbdev, kctx, js);
+                       /* If this context is active and the atom is the first
+                        * on its slot, kick the job manager to attempt to
+                        * fast-start the atom */
+                       if (kctx == kctx->kbdev->hwaccess.active_kctx)
+                               kbase_jm_try_kick(kctx->kbdev, 1 << js);
+
+                       spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+                                       flags);
+
+                       if (timer_sync)
+                               kbase_backend_ctx_count_changed(kctx->kbdev);
+
+               } else {
+                       spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+                                       flags);
                }
+       } else {
+               spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
        }
+       mutex_unlock(&js_devdata->runpool_mutex);
+       mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+       mutex_unlock(&js_devdata->queue_mutex);
+
 }
 
 /*
@@ -742,6 +829,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
                DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES;
        jsdd->cfs_ctx_runtime_min_slices =
                DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES;
+       atomic_set(&jsdd->soft_event_timeout_ms, DEFAULT_JS_SOFT_EVENT_TIMEOUT);
 
        dev_dbg(kbdev->dev, "JS Config Attribs: ");
        dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
@@ -768,6 +856,8 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
                        jsdd->cfs_ctx_runtime_init_slices);
        dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u",
                        jsdd->cfs_ctx_runtime_min_slices);
+       dev_dbg(kbdev->dev, "\tsoft_event_timeout:%i",
+               atomic_read(&jsdd->soft_event_timeout_ms));
 
        if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
                        jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
@@ -872,7 +962,7 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx)
        struct kbase_device *kbdev;
        struct kbasep_js_kctx_info *js_kctx_info;
        int err;
-       int i;
+       int i, j;
 
        KBASE_DEBUG_ASSERT(kctx != NULL);
 
@@ -912,6 +1002,13 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx)
        if (js_kctx_info->init_status != JS_KCTX_INIT_ALL)
                return -EINVAL;
 
+       for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) {
+               for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) {
+                       INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].queue_head);
+                       atomic_set(&kctx->jsctx_queue[i][j].indicies, 0);
+               }
+       }
+
        return 0;
 }
 
@@ -921,6 +1018,7 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
        struct kbasep_js_kctx_info *js_kctx_info;
        union kbasep_js_policy *js_policy;
        int js;
+       bool update_ctx_count = false;
 
        KBASE_DEBUG_ASSERT(kctx != NULL);
 
@@ -937,14 +1035,31 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
        }
 
        mutex_lock(&kbdev->js_data.queue_mutex);
+       mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
        for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
                list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+       if (kctx->ctx_runnable_ref) {
+               WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0);
+               atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+               update_ctx_count = true;
+               kctx->ctx_runnable_ref = false;
+       }
+
+       mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
        mutex_unlock(&kbdev->js_data.queue_mutex);
 
        if ((js_kctx_info->init_status & JS_KCTX_INIT_POLICY))
                kbasep_js_policy_term_ctx(js_policy, kctx);
 
        js_kctx_info->init_status = JS_KCTX_INIT_NONE;
+
+       if (update_ctx_count) {
+               mutex_lock(&kbdev->js_data.runpool_mutex);
+               kbase_backend_ctx_count_changed(kbdev);
+               mutex_unlock(&kbdev->js_data.runpool_mutex);
+       }
 }
 
 /**
@@ -982,8 +1097,11 @@ static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev,
        if (!kctx->slots_pullable) {
                kbdev->js_data.nr_contexts_pullable++;
                ret = true;
-               if (!atomic_read(&kctx->atoms_pulled))
+               if (!atomic_read(&kctx->atoms_pulled)) {
+                       WARN_ON(kctx->ctx_runnable_ref);
+                       kctx->ctx_runnable_ref = true;
                        atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+               }
        }
        kctx->slots_pullable |= (1 << js);
 
@@ -1025,8 +1143,11 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
        if (!kctx->slots_pullable) {
                kbdev->js_data.nr_contexts_pullable++;
                ret = true;
-               if (!atomic_read(&kctx->atoms_pulled))
+               if (!atomic_read(&kctx->atoms_pulled)) {
+                       WARN_ON(kctx->ctx_runnable_ref);
+                       kctx->ctx_runnable_ref = true;
                        atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+               }
        }
        kctx->slots_pullable |= (1 << js);
 
@@ -1065,8 +1186,11 @@ static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev,
        if (kctx->slots_pullable == (1 << js)) {
                kbdev->js_data.nr_contexts_pullable--;
                ret = true;
-               if (!atomic_read(&kctx->atoms_pulled))
+               if (!atomic_read(&kctx->atoms_pulled)) {
+                       WARN_ON(!kctx->ctx_runnable_ref);
+                       kctx->ctx_runnable_ref = false;
                        atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+               }
        }
        kctx->slots_pullable &= ~(1 << js);
 
@@ -1105,8 +1229,11 @@ static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev,
        if (kctx->slots_pullable == (1 << js)) {
                kbdev->js_data.nr_contexts_pullable--;
                ret = true;
-               if (!atomic_read(&kctx->atoms_pulled))
+               if (!atomic_read(&kctx->atoms_pulled)) {
+                       WARN_ON(!kctx->ctx_runnable_ref);
+                       kctx->ctx_runnable_ref = false;
                        atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+               }
        }
        kctx->slots_pullable &= ~(1 << js);
 
@@ -1177,7 +1304,8 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
                return false; /* next atom blocked */
        if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
                if (katom->x_pre_dep->gpu_rb_state ==
-                                       KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)
+                                       KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+                                       katom->x_pre_dep->will_fail_event_code)
                        return false;
                if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
                                kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
@@ -1206,7 +1334,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx,
 
                        /* Dependent atom must already have been submitted */
                        if (!(dep_atom->atom_flags &
-                                       KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)) {
+                                       (KBASE_KATOM_FLAG_JSCTX_IN_LL |
+                                       KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED))){
                                ret = false;
                                break;
                        }
@@ -1368,6 +1497,10 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
                /* Dependencies could not be represented */
                --(js_kctx_info->ctx.nr_jobs);
 
+               /* Setting atom status back to queued as it still has unresolved
+                * dependencies */
+               atom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
                spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
                mutex_unlock(&js_devdata->runpool_mutex);
 
@@ -1376,17 +1509,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx,
 
        KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
 
-       if (kbase_js_dep_resolved_submit(kctx, atom, &enqueue_required) != 0) {
-               /* Ringbuffer was full (should be impossible) - fail the job */
-               --(js_kctx_info->ctx.nr_jobs);
-
-               spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
-               mutex_unlock(&js_devdata->runpool_mutex);
-
-               atom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
-
-               goto out_unlock;
-       }
+       enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
 
        KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
                                kbasep_js_trace_get_refcnt_nolock(kbdev, kctx));
@@ -1728,10 +1851,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
                kbase_trace_mali_mmu_as_released(kctx->as_nr);
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
                kbase_tlstream_tl_nret_as_ctx(&kbdev->as[kctx->as_nr], kctx);
-               kbase_tlstream_tl_nret_gpu_ctx(kbdev, kctx);
-#endif
 
                kbase_backend_release_ctx_irq(kbdev, kctx);
 
@@ -2076,10 +2196,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
        kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
-       kbase_tlstream_tl_ret_gpu_ctx(kbdev, kctx);
        kbase_tlstream_tl_ret_as_ctx(&kbdev->as[kctx->as_nr], kctx);
-#endif
 
        /* Cause any future waiter-on-termination to wait until the context is
         * descheduled */
@@ -2365,37 +2482,78 @@ static int kbase_js_get_slot(struct kbase_device *kbdev,
        return 1;
 }
 
-int kbase_js_dep_resolved_submit(struct kbase_context *kctx,
-                                       struct kbase_jd_atom *katom,
-                                       bool *enqueue_required)
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+                                       struct kbase_jd_atom *katom)
 {
+       bool enqueue_required;
+
        katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
 
        lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+       lockdep_assert_held(&kctx->jctx.lock);
 
        /* If slot will transition from unpullable to pullable then add to
         * pullable list */
        if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) {
-               *enqueue_required = true;
+               enqueue_required = true;
        } else {
-               *enqueue_required = false;
+               enqueue_required = false;
        }
        /* Check if there are lower priority jobs to soft stop */
        kbase_job_slot_ctx_priority_check_locked(kctx, katom);
 
        /* Add atom to ring buffer. */
-       if (unlikely(jsctx_rb_add_atom(kctx, katom))) {
-               /* The ring buffer is full. This should be impossible as the
-                * job dispatcher can not submit enough atoms to exceed the
-                * ring buffer size. Fail the job.
-                */
-               WARN(1, "Job submit while JSCTX ringbuffer already full\n");
-               return -EINVAL;
+       if (jsctx_rb_add_atom(kctx, katom)) {
+               jsctx_ll_add(kctx, katom);
+               enqueue_required = false;
+               katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_LL;
+       } else {
+               katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED;
        }
+       return enqueue_required;
+}
+
+/**
+ * kbase_js_evict_deps - Evict dependencies of a failed atom.
+ * @kctx:       Context pointer
+ * @katom:      Pointer to the atom that has failed.
+ * @js:         The job slot the katom was run on.
+ * @prio:       Priority of the katom.
+ *
+ * Remove all post dependencies of an atom from the context ringbuffers.
+ *
+ * The original atom's event_code will be propogated to all dependent atoms.
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_js_evict_deps(struct kbase_context *kctx,
+                               struct kbase_jd_atom *katom, int js, int prio)
+{
+       struct kbase_jd_atom *x_dep = katom->x_post_dep;
+       struct kbase_jd_atom *next_katom = jsctx_rb_peek_prio(kctx, js, prio);
 
-       katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED;
+       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
-       return 0;
+       if (next_katom &&
+                       (next_katom->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV)) {
+               KBASE_DEBUG_ASSERT(next_katom->status !=
+                               KBASE_JD_ATOM_STATE_HW_COMPLETED);
+
+               next_katom->will_fail_event_code = katom->event_code;
+
+       }
+
+       /* Has cross slot depenency. */
+       if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_LL |
+                                       KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED))) {
+               /* Remove dependency.*/
+               x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+               /* Fail if it had a data dependency. */
+               if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) {
+                       x_dep->will_fail_event_code = katom->event_code;
+               }
+       }
 }
 
 struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
@@ -2435,7 +2593,8 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
 
        if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
                if (katom->x_pre_dep->gpu_rb_state ==
-                                       KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)
+                                       KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB ||
+                                       katom->x_pre_dep->will_fail_event_code)
                        return NULL;
                if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
                                kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
@@ -2444,8 +2603,11 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
 
        kctx->pulled = true;
        pulled = atomic_inc_return(&kctx->atoms_pulled);
-       if (pulled == 1 && !kctx->slots_pullable)
+       if (pulled == 1 && !kctx->slots_pullable) {
+               WARN_ON(kctx->ctx_runnable_ref);
+               kctx->ctx_runnable_ref = true;
                atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable);
+       }
        atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
        jsctx_rb_pull(kctx, katom);
 
@@ -2475,6 +2637,8 @@ static void js_return_worker(struct work_struct *data)
        u64 affinity = katom->affinity;
        enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
 
+       kbase_tlstream_aux_job_softstop_ex(katom);
+
        kbase_backend_complete_wq(kbdev, katom);
 
        if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
@@ -2497,8 +2661,11 @@ static void js_return_worker(struct work_struct *data)
                timer_sync |= kbase_js_ctx_list_remove(kbdev, kctx, js);
 
        if (!atomic_read(&kctx->atoms_pulled)) {
-               if (!kctx->slots_pullable)
+               if (!kctx->slots_pullable) {
+                       WARN_ON(!kctx->ctx_runnable_ref);
+                       kctx->ctx_runnable_ref = false;
                        atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+               }
 
                if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
                                !js_kctx_info->ctx.is_dying) {
@@ -2563,112 +2730,6 @@ void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
        queue_work(kctx->jctx.job_done_wq, &katom->work);
 }
 
-static bool kbase_js_evict_atom(struct kbase_context *kctx,
-                               struct kbase_jd_atom *katom_evict,
-                               struct kbase_jd_atom *start_katom,
-                               struct kbase_jd_atom *head_katom,
-                               struct list_head *evict_list,
-                               struct jsctx_rb *rb, int idx)
-{
-       struct kbase_jd_atom *x_dep = katom_evict->x_post_dep;
-
-       if (!(katom_evict->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) &&
-                                               katom_evict != start_katom)
-               return false;
-
-       if (katom_evict->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
-               WARN_ON(katom_evict->event_code != head_katom->event_code);
-
-               return false;
-       }
-
-       if (katom_evict->status == KBASE_JD_ATOM_STATE_HW_COMPLETED &&
-                                               katom_evict != head_katom)
-               return false;
-
-       /* Evict cross dependency if present */
-       if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)
-                       && (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER))
-               list_add_tail(&x_dep->dep_item[0], evict_list);
-
-       /* If cross dependency is present and does not have a data dependency
-        * then unblock */
-       if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)
-                       && !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER))
-               x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
-
-       if (katom_evict != head_katom) {
-               rb->entries[idx & JSCTX_RB_MASK].atom_id =
-                               KBASEP_ATOM_ID_INVALID;
-
-               katom_evict->event_code = head_katom->event_code;
-               katom_evict->atom_flags &=
-                                       ~KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED;
-
-               if (katom_evict->atom_flags & KBASE_KATOM_FLAG_HOLDING_CTX_REF)
-                       kbase_jd_done(katom_evict, katom_evict->slot_nr, NULL,
-                                                                       0);
-               else
-                       kbase_jd_evict(kctx->kbdev, katom_evict);
-       }
-
-       return true;
-}
-
-/**
- * kbase_js_evict_deps - Evict dependencies
- * @kctx:       Context pointer
- * @head_katom: Pointer to the atom to evict
- *
- * Remove all post dependencies of an atom from the context ringbuffers.
- *
- * The original atom's event_code will be propogated to all dependent atoms.
- *
- * Context: Caller must hold both jctx and HW access locks
- */
-static void kbase_js_evict_deps(struct kbase_context *kctx,
-                               struct kbase_jd_atom *head_katom)
-{
-       struct list_head evict_list;
-
-       lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
-       lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
-
-       INIT_LIST_HEAD(&evict_list);
-
-       list_add_tail(&head_katom->dep_item[0], &evict_list);
-
-       while (!list_empty(&evict_list)) {
-               struct kbase_jd_atom *start_katom;
-
-               start_katom = list_entry(evict_list.prev, struct kbase_jd_atom,
-                                                               dep_item[0]);
-               list_del(evict_list.prev);
-
-               jsctx_rb_evict(kctx, start_katom, head_katom, &evict_list);
-       }
-}
-
-/**
- * kbase_js_compact - Compact JSCTX ringbuffers
- * @kctx:  Context pointer
- *
- * Compact the JSCTX ringbuffers, removing any NULL entries
- *
- * Context: Caller must hold both jctx and HW access locks
- */
-static void kbase_js_compact(struct kbase_context *kctx)
-{
-       struct kbase_device *kbdev = kctx->kbdev;
-       int js;
-
-       lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
-       lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
-
-       for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
-               jsctx_rb_compact(kctx, js);
-}
-
 bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
                                                struct kbase_jd_atom *katom)
 {
@@ -2692,20 +2753,19 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx,
        spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
 
        if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) {
-               if (katom->event_code != BASE_JD_EVENT_DONE)
-                       kbase_js_evict_deps(kctx, katom);
-
                jsctx_rb_remove(kctx, katom);
 
                context_idle = !atomic_dec_return(&kctx->atoms_pulled);
                atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
 
-               if (!atomic_read(&kctx->atoms_pulled) && !kctx->slots_pullable)
+               if (!atomic_read(&kctx->atoms_pulled) &&
+                               !kctx->slots_pullable) {
+                       WARN_ON(!kctx->ctx_runnable_ref);
+                       kctx->ctx_runnable_ref = false;
                        atomic_dec(&kbdev->js_data.nr_contexts_runnable);
-
-               if (katom->event_code != BASE_JD_EVENT_DONE)
-                       kbase_js_compact(kctx);
+               }
        }
+       WARN_ON(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_LL);
 
        if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
                        jsctx_rb_none_to_pull(kctx, atom_slot))
@@ -2769,19 +2829,31 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp)
 
        lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
 
+       if (katom->will_fail_event_code)
+               katom->event_code = katom->will_fail_event_code;
+
        katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
 
+       if (katom->event_code != BASE_JD_EVENT_DONE) {
+               kbase_js_evict_deps(kctx, katom, katom->slot_nr,
+                               katom->sched_priority);
+       }
+
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
        kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP,
                                katom->slot_nr), NULL, 0);
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
+
        kbase_tlstream_tl_nret_atom_lpu(
                        katom,
                        &kbdev->gpu_props.props.raw_props.js_features[
                                katom->slot_nr]);
        kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]);
-#endif
+       kbase_tlstream_tl_nret_ctx_lpu(
+                       kctx,
+                       &kbdev->gpu_props.props.raw_props.js_features[
+                               katom->slot_nr]);
+
        /* Calculate the job's time used */
        if (end_timestamp != NULL) {
                /* Only calculating it for jobs that really run on the HW (e.g.
@@ -3148,7 +3220,6 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
        u32 js;
 
        kbdev = kctx->kbdev;
-
        js_devdata = &kbdev->js_data;
 
        spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
@@ -3158,7 +3229,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
 
        /* Invoke callback on jobs on each slot in turn */
        for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
-               jsctx_rb_foreach(kctx, js, callback);
+               jsctx_queue_foreach(kctx, js, callback);
 
        spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
 }
index 868c6808d628c19e6cd90fa85396113a19cd8e72..bdb820a7e9526cb4c95b8770aa9372f7573a4d48 100755 (executable)
@@ -491,15 +491,22 @@ void kbasep_js_resume(struct kbase_device *kbdev);
  * @param[in] kctx  Context pointer
  * @param[in] atom  Pointer to the atom to submit
  *
- * @return 0 if submit succeeded
- *         error code if the atom can not be submitted at this
- *         time, due to insufficient space in the ringbuffer, or dependencies
- *         that can not be represented.
- */
-int kbase_js_dep_resolved_submit(struct kbase_context *kctx,
-                                       struct kbase_jd_atom *katom,
-                                       bool *enqueue_required);
+ * @return Whether the context requires to be enqueued. */
+bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+                                       struct kbase_jd_atom *katom);
 
+/**
+  * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer.
+  * @kctx:  Context Pointer
+  * @prio:  Priority (specifies the queue together with js).
+  * @js:    Job slot (specifies the queue together with prio).
+  *
+  * Pushes all possible atoms from the linked list to the ringbuffer.
+  * Number of atoms are limited to free space in the ringbuffer and
+  * number of available atoms in the linked list.
+  *
+  */
+void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js);
 /**
  * @brief Pull an atom from a context in the job scheduler for execution.
  *
index 8891bff70c60d19799f8587999e5f7a3a982222f..e6e611b9f415a374b63ec2c03c408247e2bca053 100755 (executable)
@@ -209,13 +209,6 @@ void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kba
 
        /* Transfer attributes held in the context flags for contexts that have submit enabled */
 
-       if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_HINT_ONLY_COMPUTE) != false) {
-               /* Compute context */
-               runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
-       }
-       /* NOTE: Whether this is a non-compute context depends on the jobs being
-        * run, e.g. it might be submitting jobs with BASE_JD_REQ_ONLY_COMPUTE */
-
        /* ... More attributes can be added here ... */
 
        /* The context should not have been scheduled yet, so ASSERT if this caused
index d65b494a70c934487851004dfae6b2f3d955d6a4..75d4b98b24a5c79c13f0aa7c3108a6a2c6af6bee 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -50,10 +50,7 @@ enum {
        KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0),
 
        /** Set if the context uses an address space and should be kept scheduled in */
-       KBASE_CTX_FLAG_PRIVILEGED = (1u << 1),
-
-       /** Kernel-side equivalent of BASE_CONTEXT_HINT_ONLY_COMPUTE. Non-mutable after creation flags set */
-       KBASE_CTX_FLAG_HINT_ONLY_COMPUTE = (1u << 2)
+       KBASE_CTX_FLAG_PRIVILEGED = (1u << 1)
 
            /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */
 };
@@ -126,15 +123,10 @@ typedef void (*kbasep_js_policy_ctx_job_cb)(struct kbase_device *kbdev, struct k
  * - The runpool holds a refcount of how many contexts in the runpool have this
  * attribute.
  * - The context holds a refcount of how many atoms have this attribute.
- *
- * Examples of use:
- * - Finding out when there are a mix of @ref BASE_CONTEXT_HINT_ONLY_COMPUTE
- * and ! @ref BASE_CONTEXT_HINT_ONLY_COMPUTE contexts in the runpool
  */
 enum kbasep_js_ctx_attr {
        /** Attribute indicating a context that contains Compute jobs. That is,
-        * @ref BASE_CONTEXT_HINT_ONLY_COMPUTE is \b set and/or the context has jobs of type
-        * @ref BASE_JD_REQ_ONLY_COMPUTE
+        * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
         *
         * @note A context can be both 'Compute' and 'Non Compute' if it contains
         * both types of jobs.
@@ -356,6 +348,9 @@ struct kbasep_js_device_data {
        u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */
        u32 cfs_ctx_runtime_min_slices;  /**< Value for  DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */
 
+       /**< Value for JS_SOFT_EVENT_TIMEOUT */
+       atomic_t soft_event_timeout_ms;
+
        /** List of suspended soft jobs */
        struct list_head suspended_soft_jobs_list;
 
@@ -410,7 +405,7 @@ struct kbasep_js_kctx_info {
         *
         * You may not access any of these members from IRQ context.
         */
-       struct {
+       struct kbase_jsctx {
                struct mutex jsctx_mutex;                   /**< Job Scheduler Context lock */
 
                /** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
index 2094586ff2d39216a5e919d3f90d9593eda5e320..debd0117d45aecaf8210a12c55704c16f858e4d5 100755 (executable)
@@ -708,7 +708,7 @@ void kbasep_js_policy_deregister_job(union kbasep_js_policy *js_policy, struct k
 bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx, struct kbase_jd_atom ** const katom_ptr);
 
 /**
- * @brief Requeue a Job back into the the Job Scheduler Policy Run Pool
+ * @brief Requeue a Job back into the Job Scheduler Policy Run Pool
  *
  * This will be used to enqueue a job after its creation and also to requeue
  * a job into the Run Pool that was previously dequeued (running). It notifies
index 2909f20c08b24be197a84d7f28f8eaf878da081f..385d56a8f53d587cec24b8d6877c6cd70ff3f8a9 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,7 +24,9 @@
 #ifdef CONFIG_DMA_SHARED_BUFFER
 #include <linux/dma-buf.h>
 #endif                         /* CONFIG_DMA_SHARED_BUFFER */
-
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif                         /* CONFIG_UMP */
 #include <linux/kernel.h>
 #include <linux/bug.h>
 #include <linux/compat.h>
 #include <mali_kbase_hw.h>
 #include <mali_kbase_gator.h>
 #include <mali_kbase_hwaccess_time.h>
-
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
 
 /**
  * @brief Check the zone compatibility of two regions.
@@ -392,13 +391,33 @@ int kbase_add_va_region(struct kbase_context *kctx,
        {
                u64 start_pfn;
 
-               tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg, nr_pages, align);
-               if (!tmp) {
+               /*
+                * Depending on the zone the allocation request is for
+                * we might need to retry it.
+                */
+               do {
+                       tmp = kbase_region_tracker_find_region_meeting_reqs(
+                                       kctx, reg, nr_pages, align);
+                       if (tmp) {
+                               start_pfn = (tmp->start_pfn + align - 1) &
+                                               ~(align - 1);
+                               err = kbase_insert_va_region_nolock(kctx, reg,
+                                               tmp, start_pfn, nr_pages);
+                               break;
+                       }
+
+                       /*
+                        * If the allocation is not from the same zone as JIT
+                        * then don't retry, we're out of VA and there is
+                        * nothing which can be done about it.
+                        */
+                       if ((reg->flags & KBASE_REG_ZONE_MASK) !=
+                                       KBASE_REG_ZONE_CUSTOM_VA)
+                               break;
+               } while (kbase_jit_evict(kctx));
+
+               if (!tmp)
                        err = -ENOMEM;
-                       goto exit;
-               }
-               start_pfn = (tmp->start_pfn + align - 1) & ~(align - 1);
-               err = kbase_insert_va_region_nolock(kctx, reg, tmp, start_pfn, nr_pages);
        }
 
  exit:
@@ -410,7 +429,10 @@ KBASE_EXPORT_TEST_API(kbase_add_va_region);
 /**
  * @brief Initialize the internal region tracker data structure.
  */
-static void kbase_region_tracker_ds_init(struct kbase_context *kctx, struct kbase_va_region *same_va_reg, struct kbase_va_region *exec_reg, struct kbase_va_region *custom_va_reg)
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
+               struct kbase_va_region *same_va_reg,
+               struct kbase_va_region *exec_reg,
+               struct kbase_va_region *custom_va_reg)
 {
        kctx->reg_rbtree = RB_ROOT;
        kbase_region_tracker_insert(kctx, same_va_reg);
@@ -448,6 +470,11 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
        size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
        u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
        u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+       u64 same_va_pages;
+       int err;
+
+       /* Take the lock as kbase_free_alloced_region requires it */
+       kbase_gpu_vm_lock(kctx);
 
 #if defined(CONFIG_ARM64)
        same_va_bits = VA_BITS;
@@ -464,24 +491,29 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
                same_va_bits = 33;
 #endif
 
-       if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits)
-               return -EINVAL;
+       if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) {
+               err = -EINVAL;
+               goto fail_unlock;
+       }
 
+       same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
        /* all have SAME_VA */
        same_va_reg = kbase_alloc_free_region(kctx, 1,
-                       (1ULL << (same_va_bits - PAGE_SHIFT)) - 1,
+                       same_va_pages,
                        KBASE_REG_ZONE_SAME_VA);
 
-       if (!same_va_reg)
-               return -ENOMEM;
+       if (!same_va_reg) {
+               err = -ENOMEM;
+               goto fail_unlock;
+       }
 
 #ifdef CONFIG_64BIT
-       /* only 32-bit clients have the other two zones */
+       /* 32-bit clients have exec and custom VA zones */
        if (kctx->is_compat) {
 #endif
                if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
-                       kbase_free_alloced_region(same_va_reg);
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto fail_free_same_va;
                }
                /* If the current size of TMEM is out of range of the
                 * virtual address space addressable by the MMU then
@@ -496,8 +528,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
                                KBASE_REG_ZONE_EXEC);
 
                if (!exec_reg) {
-                       kbase_free_alloced_region(same_va_reg);
-                       return -ENOMEM;
+                       err = -ENOMEM;
+                       goto fail_free_same_va;
                }
 
                custom_va_reg = kbase_alloc_free_region(kctx,
@@ -505,9 +537,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
                                custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
 
                if (!custom_va_reg) {
-                       kbase_free_alloced_region(same_va_reg);
-                       kbase_free_alloced_region(exec_reg);
-                       return -ENOMEM;
+                       err = -ENOMEM;
+                       goto fail_free_exec;
                }
 #ifdef CONFIG_64BIT
        }
@@ -515,7 +546,102 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 
        kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
 
+       kctx->same_va_end = same_va_pages + 1;
+
+       kbase_gpu_vm_unlock(kctx);
        return 0;
+
+fail_free_exec:
+       kbase_free_alloced_region(exec_reg);
+fail_free_same_va:
+       kbase_free_alloced_region(same_va_reg);
+fail_unlock:
+       kbase_gpu_vm_unlock(kctx);
+       return err;
+}
+
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages)
+{
+#ifdef CONFIG_64BIT
+       struct kbase_va_region *same_va;
+       struct kbase_va_region *custom_va_reg;
+       u64 same_va_bits;
+       u64 total_va_size;
+       int err;
+
+       /*
+        * Nothing to do for 32-bit clients, JIT uses the existing
+        * custom VA zone.
+        */
+       if (kctx->is_compat)
+               return 0;
+
+#if defined(CONFIG_ARM64)
+       same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+       same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+       if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+               same_va_bits = 33;
+
+       total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+
+       kbase_gpu_vm_lock(kctx);
+
+       /*
+        * Modify the same VA free region after creation. Be careful to ensure
+        * that allocations haven't been made as they could cause an overlap
+        * to happen with existing same VA allocations and the custom VA zone.
+        */
+       same_va = kbase_region_tracker_find_region_base_address(kctx,
+                       PAGE_SIZE);
+       if (!same_va) {
+               err = -ENOMEM;
+               goto fail_unlock;
+       }
+
+       /* The region flag or region size has changed since creation so bail. */
+       if ((!(same_va->flags & KBASE_REG_FREE)) ||
+                       (same_va->nr_pages != total_va_size)) {
+               err = -ENOMEM;
+               goto fail_unlock;
+       }
+
+       /* It's safe to adjust the same VA zone now */
+       same_va->nr_pages -= jit_va_pages;
+       kctx->same_va_end -= jit_va_pages;
+
+       /*
+        * Create a custom VA zone at the end of the VA for allocations which
+        * JIT can use so it doesn't have to allocate VA from the kernel.
+        */
+       custom_va_reg = kbase_alloc_free_region(kctx,
+                               kctx->same_va_end,
+                               jit_va_pages,
+                               KBASE_REG_ZONE_CUSTOM_VA);
+       if (!custom_va_reg) {
+               /*
+                * The context will be destroyed if we fail here so no point
+                * reverting the change we made to same_va.
+                */
+               err = -ENOMEM;
+               goto fail_unlock;
+       }
+
+       kbase_region_tracker_insert(kctx, custom_va_reg);
+
+       kbase_gpu_vm_unlock(kctx);
+       return 0;
+
+fail_unlock:
+       kbase_gpu_vm_unlock(kctx);
+       return err;
+#else
+       return 0;
+#endif
 }
 
 int kbase_mem_init(struct kbase_device *kbdev)
@@ -613,8 +739,46 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
  */
 void kbase_free_alloced_region(struct kbase_va_region *reg)
 {
-       KBASE_DEBUG_ASSERT(NULL != reg);
        if (!(reg->flags & KBASE_REG_FREE)) {
+               /*
+                * The physical allocation should have been removed from the
+                * eviction list before this function is called. However, in the
+                * case of abnormal process termination or the app leaking the
+                * memory kbase_mem_free_region is not called so it can still be
+                * on the list at termination time of the region tracker.
+                */
+               if (!list_empty(&reg->gpu_alloc->evict_node)) {
+                       /*
+                        * Unlink the physical allocation before unmaking it
+                        * evictable so that the allocation isn't grown back to
+                        * its last backed size as we're going to unmap it
+                        * anyway.
+                        */
+                       reg->cpu_alloc->reg = NULL;
+                       if (reg->cpu_alloc != reg->gpu_alloc)
+                               reg->gpu_alloc->reg = NULL;
+
+                       /*
+                        * If a region has been made evictable then we must
+                        * unmake it before trying to free it.
+                        * If the memory hasn't been reclaimed it will be
+                        * unmapped and freed below, if it has been reclaimed
+                        * then the operations below are no-ops.
+                        */
+                       if (reg->flags & KBASE_REG_DONT_NEED) {
+                               KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+                                                  KBASE_MEM_TYPE_NATIVE);
+                               kbase_mem_evictable_unmake(reg->gpu_alloc);
+                       }
+               }
+
+               /*
+                * Remove the region from the sticky resource metadata
+                * list should it be there.
+                */
+               kbase_sticky_resource_release(reg->kctx, NULL,
+                               reg->start_pfn << PAGE_SHIFT, true);
+
                kbase_mem_phy_alloc_put(reg->cpu_alloc);
                kbase_mem_phy_alloc_put(reg->gpu_alloc);
                /* To detect use-after-free in debug builds */
@@ -891,10 +1055,10 @@ static int kbase_do_syncset(struct kbase_context *kctx,
 
        /* find the region where the virtual address is contained */
        reg = kbase_region_tracker_find_region_enclosing_address(kctx,
-                       sset->mem_handle);
+                       sset->mem_handle.basep.handle);
        if (!reg) {
                dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX",
-                               sset->mem_handle);
+                               sset->mem_handle.basep.handle);
                err = -EINVAL;
                goto out_unlock;
        }
@@ -908,7 +1072,7 @@ static int kbase_do_syncset(struct kbase_context *kctx,
        map = kbasep_find_enclosing_cpu_mapping_of_region(reg, start, size);
        if (!map) {
                dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
-                               start, sset->mem_handle);
+                               start, sset->mem_handle.basep.handle);
                err = -EINVAL;
                goto out_unlock;
        }
@@ -989,6 +1153,28 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
        KBASE_DEBUG_ASSERT(NULL != kctx);
        KBASE_DEBUG_ASSERT(NULL != reg);
        lockdep_assert_held(&kctx->reg_lock);
+
+       /*
+        * Unlink the physical allocation before unmaking it evictable so
+        * that the allocation isn't grown back to its last backed size
+        * as we're going to unmap it anyway.
+        */
+       reg->cpu_alloc->reg = NULL;
+       if (reg->cpu_alloc != reg->gpu_alloc)
+               reg->gpu_alloc->reg = NULL;
+
+       /*
+        * If a region has been made evictable then we must unmake it
+        * before trying to free it.
+        * If the memory hasn't been reclaimed it will be unmapped and freed
+        * below, if it has been reclaimed then the operations below are no-ops.
+        */
+       if (reg->flags & KBASE_REG_DONT_NEED) {
+               KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
+                                  KBASE_MEM_TYPE_NATIVE);
+               kbase_mem_evictable_unmake(reg->gpu_alloc);
+       }
+
        err = kbase_gpu_munmap(kctx, reg);
        if (err) {
                dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
@@ -1046,7 +1232,6 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
                kbase_free_alloced_region(reg);
        } else {
                /* A real GPU va */
-
                /* Validate the region */
                reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
                if (!reg || (reg->flags & KBASE_REG_FREE)) {
@@ -1063,7 +1248,6 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
                        err = -EINVAL;
                        goto out_unlock;
                }
-
                err = kbase_mem_free_region(kctx, reg);
        }
 
@@ -1124,6 +1308,8 @@ int kbase_alloc_phy_pages_helper(
        struct kbase_mem_phy_alloc *alloc,
        size_t nr_pages_requested)
 {
+       int new_page_count __maybe_unused;
+
        KBASE_DEBUG_ASSERT(alloc);
        KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
        KBASE_DEBUG_ASSERT(alloc->imported.kctx);
@@ -1131,7 +1317,8 @@ int kbase_alloc_phy_pages_helper(
        if (nr_pages_requested == 0)
                goto done; /*nothing to do*/
 
-       kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
+       new_page_count = kbase_atomic_add_pages(
+                       nr_pages_requested, &alloc->imported.kctx->used_pages);
        kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
 
        /* Increase mm counters before we allocate pages so that this
@@ -1142,9 +1329,9 @@ int kbase_alloc_phy_pages_helper(
                        nr_pages_requested, alloc->pages + alloc->nents) != 0)
                goto no_alloc;
 
-#if defined(CONFIG_MALI_MIPE_ENABLED)
-       kbase_tlstream_aux_pagesalloc((s64)nr_pages_requested);
-#endif
+       kbase_tlstream_aux_pagesalloc(
+                       (u32)alloc->imported.kctx->id,
+                       (u64)new_page_count);
 
        alloc->nents += nr_pages_requested;
 done:
@@ -1162,10 +1349,12 @@ int kbase_free_phy_pages_helper(
        struct kbase_mem_phy_alloc *alloc,
        size_t nr_pages_to_free)
 {
+       struct kbase_context *kctx = alloc->imported.kctx;
        bool syncback;
+       bool reclaimed = (alloc->evicted != 0);
        phys_addr_t *start_free;
+       int new_page_count __maybe_unused;
 
-       KBASE_DEBUG_ASSERT(alloc);
        KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
        KBASE_DEBUG_ASSERT(alloc->imported.kctx);
        KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
@@ -1178,19 +1367,29 @@ int kbase_free_phy_pages_helper(
 
        syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
 
-       kbase_mem_pool_free_pages(&alloc->imported.kctx->mem_pool,
+       kbase_mem_pool_free_pages(&kctx->mem_pool,
                                  nr_pages_to_free,
                                  start_free,
-                                 syncback);
+                                 syncback,
+                                 reclaimed);
 
        alloc->nents -= nr_pages_to_free;
-       kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_to_free);
-       kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->used_pages);
-       kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->kbdev->memdev.used_pages);
 
-#if defined(CONFIG_MALI_MIPE_ENABLED)
-       kbase_tlstream_aux_pagesalloc(-(s64)nr_pages_to_free);
-#endif
+       /*
+        * If the allocation was not evicted (i.e. evicted == 0) then
+        * the page accounting needs to be done.
+        */
+       if (!reclaimed) {
+               kbase_process_page_usage_dec(kctx, nr_pages_to_free);
+               new_page_count = kbase_atomic_sub_pages(nr_pages_to_free,
+                                                       &kctx->used_pages);
+               kbase_atomic_sub_pages(nr_pages_to_free,
+                                      &kctx->kbdev->memdev.used_pages);
+
+               kbase_tlstream_aux_pagesalloc(
+                               (u32)kctx->id,
+                               (u64)new_page_count);
+       }
 
        return 0;
 }
@@ -1203,7 +1402,12 @@ void kbase_mem_kref_free(struct kref *kref)
 
        switch (alloc->type) {
        case KBASE_MEM_TYPE_NATIVE: {
-               KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+               WARN_ON(!alloc->imported.kctx);
+               /*
+                * The physical allocation must have been removed from the
+                * eviction list before trying to free it.
+                */
+               WARN_ON(!list_empty(&alloc->evict_node));
                kbase_free_phy_pages_helper(alloc, alloc->nents);
                break;
        }
@@ -1236,6 +1440,9 @@ void kbase_mem_kref_free(struct kref *kref)
                dma_buf_put(alloc->imported.umm.dma_buf);
                break;
 #endif
+       case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+               kfree(alloc->imported.user_buf.pages);
+               break;
        case KBASE_MEM_TYPE_TB:{
                void *tb;
 
@@ -1278,9 +1485,11 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size
        if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
                goto out_term;
 
+       reg->cpu_alloc->reg = reg;
        if (reg->cpu_alloc != reg->gpu_alloc) {
                if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
                        goto out_rollback;
+               reg->gpu_alloc->reg = reg;
        }
 
        return 0;
@@ -1374,3 +1583,921 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx)
 }
 
 KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
+
+struct kbase_jit_debugfs_data {
+       int (*func)(struct kbase_jit_debugfs_data *);
+       struct mutex lock;
+       struct kbase_context *kctx;
+       u64 active_value;
+       u64 pool_value;
+       u64 destroy_value;
+       char buffer[50];
+};
+
+static int kbase_jit_debugfs_common_open(struct inode *inode,
+               struct file *file, int (*func)(struct kbase_jit_debugfs_data *))
+{
+       struct kbase_jit_debugfs_data *data;
+
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       data->func = func;
+       mutex_init(&data->lock);
+       data->kctx = (struct kbase_context *) inode->i_private;
+
+       file->private_data = data;
+
+       return nonseekable_open(inode, file);
+}
+
+static ssize_t kbase_jit_debugfs_common_read(struct file *file,
+               char __user *buf, size_t len, loff_t *ppos)
+{
+       struct kbase_jit_debugfs_data *data;
+       size_t size;
+       int ret;
+
+       data = (struct kbase_jit_debugfs_data *) file->private_data;
+       mutex_lock(&data->lock);
+
+       if (*ppos) {
+               size = strnlen(data->buffer, sizeof(data->buffer));
+       } else {
+               if (!data->func) {
+                       ret = -EACCES;
+                       goto out_unlock;
+               }
+
+               if (data->func(data)) {
+                       ret = -EACCES;
+                       goto out_unlock;
+               }
+
+               size = scnprintf(data->buffer, sizeof(data->buffer),
+                               "%llu,%llu,%llu", data->active_value,
+                               data->pool_value, data->destroy_value);
+       }
+
+       ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size);
+
+out_unlock:
+       mutex_unlock(&data->lock);
+       return ret;
+}
+
+static int kbase_jit_debugfs_common_release(struct inode *inode,
+               struct file *file)
+{
+       kfree(file->private_data);
+       return 0;
+}
+
+#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+       return kbase_jit_debugfs_common_open(inode, file, __func); \
+} \
+static const struct file_operations __fops = { \
+       .owner = THIS_MODULE, \
+       .open = __fops ## _open, \
+       .release = kbase_jit_debugfs_common_release, \
+       .read = kbase_jit_debugfs_common_read, \
+       .write = NULL, \
+       .llseek = generic_file_llseek, \
+}
+
+static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data)
+{
+       struct kbase_context *kctx = data->kctx;
+       struct list_head *tmp;
+
+       mutex_lock(&kctx->jit_lock);
+       list_for_each(tmp, &kctx->jit_active_head) {
+               data->active_value++;
+       }
+
+       list_for_each(tmp, &kctx->jit_pool_head) {
+               data->pool_value++;
+       }
+
+       list_for_each(tmp, &kctx->jit_destroy_head) {
+               data->destroy_value++;
+       }
+       mutex_unlock(&kctx->jit_lock);
+
+       return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops,
+               kbase_jit_debugfs_count_get);
+
+static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data)
+{
+       struct kbase_context *kctx = data->kctx;
+       struct kbase_va_region *reg;
+
+       mutex_lock(&kctx->jit_lock);
+       list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+               data->active_value += reg->nr_pages;
+       }
+
+       list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+               data->pool_value += reg->nr_pages;
+       }
+
+       list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+               data->destroy_value += reg->nr_pages;
+       }
+       mutex_unlock(&kctx->jit_lock);
+
+       return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops,
+               kbase_jit_debugfs_vm_get);
+
+static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
+{
+       struct kbase_context *kctx = data->kctx;
+       struct kbase_va_region *reg;
+
+       mutex_lock(&kctx->jit_lock);
+       list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
+               data->active_value += reg->gpu_alloc->nents;
+       }
+
+       list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
+               data->pool_value += reg->gpu_alloc->nents;
+       }
+
+       list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
+               data->destroy_value += reg->gpu_alloc->nents;
+       }
+       mutex_unlock(&kctx->jit_lock);
+
+       return 0;
+}
+KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
+               kbase_jit_debugfs_phys_get);
+
+void kbase_jit_debugfs_add(struct kbase_context *kctx)
+{
+       /* Debugfs entry for getting the number of JIT allocations. */
+       debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry,
+                       kctx, &kbase_jit_debugfs_count_fops);
+
+       /*
+        * Debugfs entry for getting the total number of virtual pages
+        * used by JIT allocations.
+        */
+       debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry,
+                       kctx, &kbase_jit_debugfs_vm_fops);
+
+       /*
+        * Debugfs entry for getting the number of physical pages used
+        * by JIT allocations.
+        */
+       debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry,
+                       kctx, &kbase_jit_debugfs_phys_fops);
+}
+
+/**
+ * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations
+ * @work: Work item
+ *
+ * This function does the work of freeing JIT allocations whose physical
+ * backing has been released.
+ */
+static void kbase_jit_destroy_worker(struct work_struct *work)
+{
+       struct kbase_context *kctx;
+       struct kbase_va_region *reg;
+
+       kctx = container_of(work, struct kbase_context, jit_work);
+       do {
+               mutex_lock(&kctx->jit_lock);
+               if (list_empty(&kctx->jit_destroy_head))
+                       reg = NULL;
+               else
+                       reg = list_first_entry(&kctx->jit_destroy_head,
+                               struct kbase_va_region, jit_node);
+
+               if (reg) {
+                       list_del(&reg->jit_node);
+                       mutex_unlock(&kctx->jit_lock);
+
+                       kbase_gpu_vm_lock(kctx);
+                       kbase_mem_free_region(kctx, reg);
+                       kbase_gpu_vm_unlock(kctx);
+               } else
+                       mutex_unlock(&kctx->jit_lock);
+       } while (reg);
+}
+
+int kbase_jit_init(struct kbase_context *kctx)
+{
+       INIT_LIST_HEAD(&kctx->jit_active_head);
+       INIT_LIST_HEAD(&kctx->jit_pool_head);
+       INIT_LIST_HEAD(&kctx->jit_destroy_head);
+       mutex_init(&kctx->jit_lock);
+       INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
+
+       return 0;
+}
+
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+               struct base_jit_alloc_info *info)
+{
+       struct kbase_va_region *reg = NULL;
+       struct kbase_va_region *walker;
+       struct kbase_va_region *temp;
+       size_t current_diff = SIZE_MAX;
+
+       int ret;
+
+       mutex_lock(&kctx->jit_lock);
+       /*
+        * Scan the pool for an existing allocation which meets our
+        * requirements and remove it.
+        */
+       list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) {
+
+               if (walker->nr_pages >= info->va_pages) {
+                       size_t min_size, max_size, diff;
+
+                       /*
+                        * The JIT allocations VA requirements have been
+                        * meet, it's suitable but other allocations
+                        * might be a better fit.
+                        */
+                       min_size = min_t(size_t, walker->gpu_alloc->nents,
+                                       info->commit_pages);
+                       max_size = max_t(size_t, walker->gpu_alloc->nents,
+                                       info->commit_pages);
+                       diff = max_size - min_size;
+
+                       if (current_diff > diff) {
+                               current_diff = diff;
+                               reg = walker;
+                       }
+
+                       /* The allocation is an exact match, stop looking */
+                       if (current_diff == 0)
+                               break;
+               }
+       }
+
+       if (reg) {
+               /*
+                * Remove the found region from the pool and add it to the
+                * active list.
+                */
+               list_del_init(&reg->jit_node);
+               list_add(&reg->jit_node, &kctx->jit_active_head);
+
+               /* Release the jit lock before modifying the allocation */
+               mutex_unlock(&kctx->jit_lock);
+
+               kbase_gpu_vm_lock(kctx);
+
+               /* Make the physical backing no longer reclaimable */
+               if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
+                       goto update_failed;
+
+               /* Grow the backing if required */
+               if (reg->gpu_alloc->nents < info->commit_pages) {
+                       size_t delta;
+                       size_t old_size = reg->gpu_alloc->nents;
+
+                       /* Allocate some more pages */
+                       delta = info->commit_pages - reg->gpu_alloc->nents;
+                       if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta)
+                                       != 0)
+                               goto update_failed;
+
+                       if (reg->cpu_alloc != reg->gpu_alloc) {
+                               if (kbase_alloc_phy_pages_helper(
+                                               reg->cpu_alloc, delta) != 0) {
+                                       kbase_free_phy_pages_helper(
+                                                       reg->gpu_alloc, delta);
+                                       goto update_failed;
+                               }
+                       }
+
+                       ret = kbase_mem_grow_gpu_mapping(kctx, reg,
+                                       info->commit_pages, old_size);
+                       /*
+                        * The grow failed so put the allocation back in the
+                        * pool and return failure.
+                        */
+                       if (ret)
+                               goto update_failed;
+               }
+               kbase_gpu_vm_unlock(kctx);
+       } else {
+               /* No suitable JIT allocation was found so create a new one */
+               u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
+                               BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF;
+               u64 gpu_addr;
+               u16 alignment;
+
+               mutex_unlock(&kctx->jit_lock);
+
+               reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
+                               info->extent, &flags, &gpu_addr, &alignment);
+               if (!reg)
+                       goto out_unlocked;
+
+               mutex_lock(&kctx->jit_lock);
+               list_add(&reg->jit_node, &kctx->jit_active_head);
+               mutex_unlock(&kctx->jit_lock);
+       }
+
+       return reg;
+
+update_failed:
+       /*
+        * An update to an allocation from the pool failed, chances
+        * are slim a new allocation would fair any better so return
+        * the allocation to the pool and return the function with failure.
+        */
+       kbase_gpu_vm_unlock(kctx);
+       mutex_lock(&kctx->jit_lock);
+       list_del_init(&reg->jit_node);
+       list_add(&reg->jit_node, &kctx->jit_pool_head);
+       mutex_unlock(&kctx->jit_lock);
+out_unlocked:
+       return NULL;
+}
+
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+       /* The physical backing of memory in the pool is always reclaimable */
+       down_read(&kctx->process_mm->mmap_sem);
+       kbase_gpu_vm_lock(kctx);
+       kbase_mem_evictable_make(reg->gpu_alloc);
+       kbase_gpu_vm_unlock(kctx);
+       up_read(&kctx->process_mm->mmap_sem);
+
+       mutex_lock(&kctx->jit_lock);
+       list_del_init(&reg->jit_node);
+       list_add(&reg->jit_node, &kctx->jit_pool_head);
+       mutex_unlock(&kctx->jit_lock);
+}
+
+void kbase_jit_backing_lost(struct kbase_va_region *reg)
+{
+       struct kbase_context *kctx = reg->kctx;
+
+       /*
+        * JIT allocations will always be on a list, if the region
+        * is not on a list then it's not a JIT allocation.
+        */
+       if (list_empty(&reg->jit_node))
+               return;
+
+       /*
+        * Freeing the allocation requires locks we might not be able
+        * to take now, so move the allocation to the free list and kick
+        * the worker which will do the freeing.
+        */
+       mutex_lock(&kctx->jit_lock);
+       list_del_init(&reg->jit_node);
+       list_add(&reg->jit_node, &kctx->jit_destroy_head);
+       mutex_unlock(&kctx->jit_lock);
+
+       schedule_work(&kctx->jit_work);
+}
+
+bool kbase_jit_evict(struct kbase_context *kctx)
+{
+       struct kbase_va_region *reg = NULL;
+
+       lockdep_assert_held(&kctx->reg_lock);
+
+       /* Free the oldest allocation from the pool */
+       mutex_lock(&kctx->jit_lock);
+       if (!list_empty(&kctx->jit_pool_head)) {
+               reg = list_entry(kctx->jit_pool_head.prev,
+                               struct kbase_va_region, jit_node);
+               list_del(&reg->jit_node);
+       }
+       mutex_unlock(&kctx->jit_lock);
+
+       if (reg)
+               kbase_mem_free_region(kctx, reg);
+
+       return (reg != NULL);
+}
+
+void kbase_jit_term(struct kbase_context *kctx)
+{
+       struct kbase_va_region *walker;
+
+       /* Free all allocations for this context */
+
+       /*
+        * Flush the freeing of allocations whose backing has been freed
+        * (i.e. everything in jit_destroy_head).
+        */
+       cancel_work_sync(&kctx->jit_work);
+
+       kbase_gpu_vm_lock(kctx);
+       /* Free all allocations from the pool */
+       while (!list_empty(&kctx->jit_pool_head)) {
+               walker = list_first_entry(&kctx->jit_pool_head,
+                               struct kbase_va_region, jit_node);
+               list_del(&walker->jit_node);
+               kbase_mem_free_region(kctx, walker);
+       }
+
+       /* Free all allocations from active list */
+       while (!list_empty(&kctx->jit_active_head)) {
+               walker = list_first_entry(&kctx->jit_active_head,
+                               struct kbase_va_region, jit_node);
+               list_del(&walker->jit_node);
+               kbase_mem_free_region(kctx, walker);
+       }
+       kbase_gpu_vm_unlock(kctx);
+}
+
+static int kbase_jd_user_buf_map(struct kbase_context *kctx,
+               struct kbase_va_region *reg)
+{
+       long pinned_pages;
+       struct kbase_mem_phy_alloc *alloc;
+       struct page **pages;
+       phys_addr_t *pa;
+       long i;
+       int err = -ENOMEM;
+       unsigned long address;
+       struct task_struct *owner;
+       struct device *dev;
+       unsigned long offset;
+       unsigned long local_size;
+
+       alloc = reg->gpu_alloc;
+       pa = kbase_get_gpu_phy_pages(reg);
+       address = alloc->imported.user_buf.address;
+       owner = alloc->imported.user_buf.owner;
+
+       KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+
+       pages = alloc->imported.user_buf.pages;
+
+       pinned_pages = get_user_pages(owner, owner->mm,
+                       address,
+                       alloc->imported.user_buf.nr_pages,
+                       reg->flags & KBASE_REG_GPU_WR,
+                       0, pages, NULL);
+
+       if (pinned_pages <= 0)
+               return pinned_pages;
+
+       if (pinned_pages != alloc->imported.user_buf.nr_pages) {
+               for (i = 0; i < pinned_pages; i++)
+                       put_page(pages[i]);
+               return -ENOMEM;
+       }
+
+       dev = kctx->kbdev->dev;
+       offset = address & ~PAGE_MASK;
+       local_size = alloc->imported.user_buf.size;
+
+       for (i = 0; i < pinned_pages; i++) {
+               dma_addr_t dma_addr;
+               unsigned long min;
+
+               min = MIN(PAGE_SIZE - offset, local_size);
+               dma_addr = dma_map_page(dev, pages[i],
+                               offset, min,
+                               DMA_BIDIRECTIONAL);
+               if (dma_mapping_error(dev, dma_addr))
+                       goto unwind;
+
+               alloc->imported.user_buf.dma_addrs[i] = dma_addr;
+               pa[i] = page_to_phys(pages[i]);
+
+               local_size -= min;
+               offset = 0;
+       }
+
+       alloc->nents = pinned_pages;
+
+       err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa,
+                       kbase_reg_current_backed_size(reg),
+                       reg->flags);
+       if (err == 0)
+               return 0;
+
+       alloc->nents = 0;
+       /* fall down */
+unwind:
+       while (i--) {
+               dma_unmap_page(kctx->kbdev->dev,
+                               alloc->imported.user_buf.dma_addrs[i],
+                               PAGE_SIZE, DMA_BIDIRECTIONAL);
+               put_page(pages[i]);
+               pages[i] = NULL;
+       }
+
+       return err;
+}
+
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
+               struct kbase_mem_phy_alloc *alloc, bool writeable)
+{
+       long i;
+       struct page **pages;
+       unsigned long size = alloc->imported.user_buf.size;
+
+       KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+       pages = alloc->imported.user_buf.pages;
+       for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
+               unsigned long local_size;
+               dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+
+               local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
+               dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+                               DMA_BIDIRECTIONAL);
+               if (writeable)
+                       set_page_dirty_lock(pages[i]);
+               put_page(pages[i]);
+               pages[i] = NULL;
+
+               size -= local_size;
+       }
+       alloc->nents = 0;
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kbase_jd_umm_map(struct kbase_context *kctx,
+               struct kbase_va_region *reg)
+{
+       struct sg_table *sgt;
+       struct scatterlist *s;
+       int i;
+       phys_addr_t *pa;
+       int err;
+       size_t count = 0;
+       struct kbase_mem_phy_alloc *alloc;
+
+       alloc = reg->gpu_alloc;
+
+       KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
+       KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
+       sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment,
+                       DMA_BIDIRECTIONAL);
+
+       if (IS_ERR_OR_NULL(sgt))
+               return -EINVAL;
+
+       /* save for later */
+       alloc->imported.umm.sgt = sgt;
+
+       pa = kbase_get_gpu_phy_pages(reg);
+       KBASE_DEBUG_ASSERT(pa);
+
+       for_each_sg(sgt->sgl, s, sgt->nents, i) {
+               int j;
+               size_t pages = PFN_UP(sg_dma_len(s));
+
+               WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+               "sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+               sg_dma_len(s));
+
+               WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+               "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+               (unsigned long long) sg_dma_address(s));
+
+               for (j = 0; (j < pages) && (count < reg->nr_pages); j++,
+                               count++)
+                       *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT);
+               WARN_ONCE(j < pages,
+               "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+               alloc->imported.umm.dma_buf->size);
+       }
+
+       if (WARN_ONCE(count < reg->nr_pages,
+                       "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+                       alloc->imported.umm.dma_buf->size)) {
+               err = -EINVAL;
+               goto out;
+       }
+
+       /* Update nents as we now have pages to map */
+       alloc->nents = count;
+
+       err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+                       kbase_get_gpu_phy_pages(reg),
+                       kbase_reg_current_backed_size(reg),
+                       reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+
+out:
+       if (err) {
+               dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+                               alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+               alloc->imported.umm.sgt = NULL;
+       }
+
+       return err;
+}
+
+static void kbase_jd_umm_unmap(struct kbase_context *kctx,
+               struct kbase_mem_phy_alloc *alloc)
+{
+       KBASE_DEBUG_ASSERT(kctx);
+       KBASE_DEBUG_ASSERT(alloc);
+       KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
+       KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
+       dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+           alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+       alloc->imported.umm.sgt = NULL;
+       alloc->nents = 0;
+}
+#endif                         /* CONFIG_DMA_SHARED_BUFFER */
+
+#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) \
+               || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS)
+static void add_kds_resource(struct kds_resource *kds_res,
+               struct kds_resource **kds_resources, u32 *kds_res_count,
+               unsigned long *kds_access_bitmap, bool exclusive)
+{
+       u32 i;
+
+       for (i = 0; i < *kds_res_count; i++) {
+               /* Duplicate resource, ignore */
+               if (kds_resources[i] == kds_res)
+                       return;
+       }
+
+       kds_resources[*kds_res_count] = kds_res;
+       if (exclusive)
+               set_bit(*kds_res_count, kds_access_bitmap);
+       (*kds_res_count)++;
+}
+#endif
+
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+               struct kbase_context *kctx, struct kbase_va_region *reg,
+               struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+               , u32 *kds_res_count, struct kds_resource **kds_resources,
+               unsigned long *kds_access_bitmap, bool exclusive
+#endif
+               )
+{
+       int err;
+
+       /* decide what needs to happen for this resource */
+       switch (reg->gpu_alloc->type) {
+       case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
+               if (reg->gpu_alloc->imported.user_buf.owner->mm != locked_mm)
+                       goto exit;
+
+               reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
+               if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) {
+                       err = kbase_jd_user_buf_map(kctx, reg);
+                       if (err) {
+                               reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
+                               goto exit;
+                       }
+               }
+       }
+       break;
+       case BASE_MEM_IMPORT_TYPE_UMP: {
+#if defined(CONFIG_KDS) && defined(CONFIG_UMP)
+               if (kds_res_count) {
+                       struct kds_resource *kds_res;
+
+                       kds_res = ump_dd_kds_resource_get(
+                                       reg->gpu_alloc->imported.ump_handle);
+                       if (kds_res)
+                               add_kds_resource(kds_res, kds_resources,
+                                               kds_res_count,
+                                               kds_access_bitmap, exclusive);
+               }
+#endif                         /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */
+               break;
+       }
+#ifdef CONFIG_DMA_SHARED_BUFFER
+       case BASE_MEM_IMPORT_TYPE_UMM: {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+               if (kds_res_count) {
+                       struct kds_resource *kds_res;
+
+                       kds_res = get_dma_buf_kds_resource(
+                                       reg->gpu_alloc->imported.umm.dma_buf);
+                       if (kds_res)
+                               add_kds_resource(kds_res, kds_resources,
+                                               kds_res_count,
+                                               kds_access_bitmap, exclusive);
+               }
+#endif
+               reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
+               if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+                       err = kbase_jd_umm_map(kctx, reg);
+                       if (err) {
+                               reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
+                               goto exit;
+                       }
+               }
+               break;
+       }
+#endif
+       default:
+               goto exit;
+       }
+
+       return kbase_mem_phy_alloc_get(reg->gpu_alloc);
+exit:
+       return NULL;
+}
+
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+               struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+{
+       switch (alloc->type) {
+#ifdef CONFIG_DMA_SHARED_BUFFER
+       case KBASE_MEM_TYPE_IMPORTED_UMM: {
+               alloc->imported.umm.current_mapping_usage_count--;
+
+               if (0 == alloc->imported.umm.current_mapping_usage_count) {
+                       if (reg && reg->gpu_alloc == alloc)
+                               kbase_mmu_teardown_pages(
+                                               kctx,
+                                               reg->start_pfn,
+                                               kbase_reg_current_backed_size(reg));
+
+                       kbase_jd_umm_unmap(kctx, alloc);
+               }
+       }
+       break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+       case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+               alloc->imported.user_buf.current_mapping_usage_count--;
+
+               if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
+                       bool writeable = true;
+
+                       if (reg && reg->gpu_alloc == alloc)
+                               kbase_mmu_teardown_pages(
+                                               kctx,
+                                               reg->start_pfn,
+                                               kbase_reg_current_backed_size(reg));
+
+                       if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
+                               writeable = false;
+
+                       kbase_jd_user_buf_unmap(kctx, alloc, writeable);
+               }
+       }
+       break;
+       default:
+       break;
+       }
+       kbase_mem_phy_alloc_put(alloc);
+}
+
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+               struct kbase_context *kctx, u64 gpu_addr)
+{
+       struct kbase_ctx_ext_res_meta *meta = NULL;
+       struct kbase_ctx_ext_res_meta *walker;
+
+       lockdep_assert_held(&kctx->reg_lock);
+
+       /*
+        * Walk the per context externel resource metadata list for the
+        * metadata which matches the region which is being acquired.
+        */
+       list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
+               if (walker->gpu_addr == gpu_addr) {
+                       meta = walker;
+                       break;
+               }
+       }
+
+       /* No metadata exists so create one. */
+       if (!meta) {
+               struct kbase_va_region *reg;
+
+               /* Find the region */
+               reg = kbase_region_tracker_find_region_enclosing_address(
+                               kctx, gpu_addr);
+               if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+                       goto failed;
+
+               /* Allocate the metadata object */
+               meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+               if (!meta)
+                       goto failed;
+
+               /*
+                * Fill in the metadata object and acquire a reference
+                * for the physical resource.
+                */
+               meta->alloc = kbase_map_external_resource(kctx, reg, NULL
+#ifdef CONFIG_KDS
+                               , NULL, NULL,
+                               NULL, false
+#endif
+                               );
+
+               if (!meta->alloc)
+                       goto fail_map;
+
+               meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
+               meta->refcount = 1;
+
+               list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
+       } else {
+               if (meta->refcount == UINT_MAX)
+                       goto failed;
+
+               meta->refcount++;
+       }
+
+       return meta;
+
+fail_map:
+       kfree(meta);
+failed:
+       return NULL;
+}
+
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+               struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr, bool force)
+{
+       struct kbase_ctx_ext_res_meta *walker;
+
+       lockdep_assert_held(&kctx->reg_lock);
+
+       /* Search of the metadata if one isn't provided. */
+       if (!meta) {
+               /*
+                * Walk the per context externel resource metadata list for the
+                * metadata which matches the region which is being released.
+                */
+               list_for_each_entry(walker, &kctx->ext_res_meta_head,
+                               ext_res_node) {
+                       if (walker->gpu_addr == gpu_addr) {
+                               meta = walker;
+                               break;
+                       }
+               }
+       }
+
+       /* No metadata so just return. */
+       if (!meta)
+               return false;
+
+       meta->refcount--;
+       if ((meta->refcount == 0) || force) {
+               /*
+                * Last reference to the metadata, drop the physical memory
+                * reference and free the metadata.
+                */
+               struct kbase_va_region *reg;
+
+               reg = kbase_region_tracker_find_region_enclosing_address(
+                               kctx,
+                               meta->gpu_addr);
+
+               kbase_unmap_external_resource(kctx, reg, meta->alloc);
+               list_del(&meta->ext_res_node);
+               kfree(meta);
+       }
+
+       return true;
+}
+
+int kbase_sticky_resource_init(struct kbase_context *kctx)
+{
+       INIT_LIST_HEAD(&kctx->ext_res_meta_head);
+
+       return 0;
+}
+
+void kbase_sticky_resource_term(struct kbase_context *kctx)
+{
+       struct kbase_ctx_ext_res_meta *walker;
+
+       lockdep_assert_held(&kctx->reg_lock);
+
+       /*
+        * Free any sticky resources which haven't been unmapped.
+        *
+        * Note:
+        * We don't care about refcounts at this point as no future
+        * references to the meta data will be made.
+        * Region termination would find these if we didn't free them
+        * here, but it's more efficient if we do the clean up here.
+        */
+       while (!list_empty(&kctx->ext_res_meta_head)) {
+               walker = list_first_entry(&kctx->ext_res_meta_head,
+                               struct kbase_ctx_ext_res_meta, ext_res_node);
+
+               kbase_sticky_resource_release(kctx, walker, 0, true);
+       }
+}
index 1839cced237ed505b6964ab1ad1421d5f74abd23..8f7629a3ce5c94c0ef6fe3493e8d5582e9917eda 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,7 +30,9 @@
 #endif
 
 #include <linux/kref.h>
-
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif                         /* CONFIG_KDS */
 #ifdef CONFIG_UMP
 #include <linux/ump.h>
 #endif                         /* CONFIG_UMP */
@@ -41,6 +43,8 @@
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 #include "mali_kbase_gator.h"
 #endif
+/* Required for kbase_mem_evictable_unmake */
+#include "mali_kbase_mem_linux.h"
 
 /* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
 #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)    /* round to 4 pages */
@@ -75,6 +79,7 @@ enum kbase_memory_type {
        KBASE_MEM_TYPE_NATIVE,
        KBASE_MEM_TYPE_IMPORTED_UMP,
        KBASE_MEM_TYPE_IMPORTED_UMM,
+       KBASE_MEM_TYPE_IMPORTED_USER_BUF,
        KBASE_MEM_TYPE_ALIAS,
        KBASE_MEM_TYPE_TB,
        KBASE_MEM_TYPE_RAW
@@ -111,6 +116,16 @@ struct kbase_mem_phy_alloc {
        /* kbase_cpu_mappings */
        struct list_head      mappings;
 
+       /* Node used to store this allocation on the eviction list */
+       struct list_head      evict_node;
+       /* Physical backing size when the pages where evicted */
+       size_t                evicted;
+       /*
+        * Back reference to the region structure which created this
+        * allocation, or NULL if it has been freed.
+        */
+       struct kbase_va_region *reg;
+
        /* type of buffer */
        enum kbase_memory_type type;
 
@@ -136,6 +151,15 @@ struct kbase_mem_phy_alloc {
                } alias;
                /* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */
                struct kbase_context *kctx;
+               struct {
+                       unsigned long address;
+                       unsigned long size;
+                       unsigned long nr_pages;
+                       struct page **pages;
+                       unsigned int current_mapping_usage_count;
+                       struct task_struct *owner;
+                       dma_addr_t *dma_addrs;
+               } user_buf;
        } imported;
 };
 
@@ -232,6 +256,8 @@ struct kbase_va_region {
 
 #define KBASE_REG_SECURE            (1ul << 19)
 
+#define KBASE_REG_DONT_NEED         (1ul << 20)
+
 #define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
 
 /* only used with 32-bit clients */
@@ -266,6 +292,8 @@ struct kbase_va_region {
        /* non-NULL if this memory object is a kds_resource */
        struct kds_resource *kds_res;
 
+       /* List head used to store the region in the JIT allocation pool */
+       struct list_head jit_node;
 };
 
 /* Common functions */
@@ -308,12 +336,22 @@ static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
 static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
 {
        struct kbase_mem_phy_alloc *alloc;
-       const size_t alloc_size =
-                       sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+       size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+       size_t per_page_size = sizeof(*alloc->pages);
+
+       /* Imported pages may have page private data already in use */
+       if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+               alloc_size += nr_pages *
+                               sizeof(*alloc->imported.user_buf.dma_addrs);
+               per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs);
+       }
 
-       /* Prevent nr_pages*sizeof + sizeof(*alloc) from wrapping around. */
+       /*
+        * Prevent nr_pages*per_page_size + sizeof(*alloc) from
+        * wrapping around.
+        */
        if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
-                       / sizeof(*alloc->pages)))
+                       / per_page_size))
                return ERR_PTR(-ENOMEM);
 
        /* Allocate based on the size to reduce internal fragmentation of vmem */
@@ -336,6 +374,10 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, en
        INIT_LIST_HEAD(&alloc->mappings);
        alloc->type = type;
 
+       if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+               alloc->imported.user_buf.dma_addrs =
+                               (void *) (alloc->pages + nr_pages);
+
        return alloc;
 }
 
@@ -354,14 +396,17 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
        else if (!reg->cpu_alloc)
                return -ENOMEM;
        reg->cpu_alloc->imported.kctx = kctx;
+       INIT_LIST_HEAD(&reg->cpu_alloc->evict_node);
        if (kctx->infinite_cache_active && (reg->flags & KBASE_REG_CPU_CACHED)) {
                reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
                                KBASE_MEM_TYPE_NATIVE);
                reg->gpu_alloc->imported.kctx = kctx;
+               INIT_LIST_HEAD(&reg->gpu_alloc->evict_node);
        } else {
                reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
        }
 
+       INIT_LIST_HEAD(&reg->jit_node);
        reg->flags &= ~KBASE_REG_FREE;
        return 0;
 }
@@ -481,11 +526,13 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
  * @pages:    Pointer to array holding the physical addresses of the pages to
  *            free.
  * @dirty:    Whether any pages may be dirty in the cache.
+ * @reclaimed: Whether the pages where reclaimable and thus should bypass
+ *             the pool and go straight to the kernel.
  *
  * Like kbase_mem_pool_free() but optimized for freeing many pages.
  */
 void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
-               phys_addr_t *pages, bool dirty);
+               phys_addr_t *pages, bool dirty, bool reclaimed);
 
 /**
  * kbase_mem_pool_size - Get number of free pages in memory pool
@@ -537,6 +584,7 @@ size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size);
 
 
 int kbase_region_tracker_init(struct kbase_context *kctx);
+int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages);
 void kbase_region_tracker_term(struct kbase_context *kctx);
 
 struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
@@ -748,7 +796,7 @@ static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
        SetPagePrivate(p);
        if (sizeof(dma_addr_t) > sizeof(p->private)) {
                /* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
-                * private filed stays the same. So we have to be clever and
+                * private field stays the same. So we have to be clever and
                 * use the fact that we only store DMA addresses of whole pages,
                 * so the low bits should be zero */
                KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
@@ -830,4 +878,134 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
 void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
                size_t size, enum dma_data_direction dir);
 
+/**
+ * kbase_jit_debugfs_add - Add per context debugfs entry for JIT.
+ * @kctx: kbase context
+ */
+void kbase_jit_debugfs_add(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_init - Initialize the JIT memory pool management
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_jit_init(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_allocate - Allocate JIT memory
+ * @kctx: kbase context
+ * @info: JIT allocation information
+ *
+ * Return: JIT allocation on success or NULL on failure.
+ */
+struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
+               struct base_jit_alloc_info *info);
+
+/**
+ * kbase_jit_free - Free a JIT allocation
+ * @kctx: kbase context
+ * @reg: JIT allocation
+ *
+ * Frees a JIT allocation and places it into the free pool for later reuse.
+ */
+void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing
+ * @reg: JIT allocation
+ */
+void kbase_jit_backing_lost(struct kbase_va_region *reg);
+
+/**
+ * kbase_jit_evict - Evict a JIT allocation from the pool
+ * @kctx: kbase context
+ *
+ * Evict the least recently used JIT allocation from the pool. This can be
+ * required if normal VA allocations are failing due to VA exhaustion.
+ *
+ * Return: True if a JIT allocation was freed, false otherwise.
+ */
+bool kbase_jit_evict(struct kbase_context *kctx);
+
+/**
+ * kbase_jit_term - Terminate the JIT memory pool management
+ * @kctx: kbase context
+ */
+void kbase_jit_term(struct kbase_context *kctx);
+
+/**
+ * kbase_map_external_resource - Map an external resource to the GPU.
+ * @kctx:              kbase context.
+ * @reg:               The region to map.
+ * @locked_mm:         The mm_struct which has been locked for this operation.
+ * @kds_res_count:     The number of KDS resources.
+ * @kds_resources:     Array of KDS resources.
+ * @kds_access_bitmap: Access bitmap for KDS.
+ * @exclusive:         If the KDS resource requires exclusive access.
+ *
+ * Return: The physical allocation which backs the region on success or NULL
+ * on failure.
+ */
+struct kbase_mem_phy_alloc *kbase_map_external_resource(
+               struct kbase_context *kctx, struct kbase_va_region *reg,
+               struct mm_struct *locked_mm
+#ifdef CONFIG_KDS
+               , u32 *kds_res_count, struct kds_resource **kds_resources,
+               unsigned long *kds_access_bitmap, bool exclusive
+#endif
+               );
+
+/**
+ * kbase_unmap_external_resource - Unmap an external resource from the GPU.
+ * @kctx:  kbase context.
+ * @reg:   The region to unmap or NULL if it has already been released.
+ * @alloc: The physical allocation being unmapped.
+ */
+void kbase_unmap_external_resource(struct kbase_context *kctx,
+               struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_sticky_resource_init - Initialize sticky resource management.
+ * @kctx: kbase context
+ *
+ * Returns zero on success or negative error number on failure.
+ */
+int kbase_sticky_resource_init(struct kbase_context *kctx);
+
+/**
+ * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @gpu_addr: The GPU address of the external resource.
+ *
+ * Return: The metadata object which represents the binding between the
+ * external resource and the kbase context on success or NULL on failure.
+ */
+struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
+               struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * kbase_sticky_resource_release - Release a reference on a sticky resource.
+ * @kctx:     kbase context.
+ * @meta:     Binding metadata.
+ * @gpu_addr: GPU address of the external resource.
+ * @force:    If the release is being forced.
+ *
+ * If meta is NULL then gpu_addr will be used to scan the metadata list and
+ * find the matching metadata (if any), otherwise the provided meta will be
+ * used and gpu_addr will be ignored.
+ *
+ * If force is true then the refcount in the metadata is ignored and the
+ * resource will be forced freed.
+ *
+ * Return: True if the release found the metadata and the reference was dropped.
+ */
+bool kbase_sticky_resource_release(struct kbase_context *kctx,
+               struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr, bool force);
+
+/**
+ * kbase_sticky_resource_term - Terminate sticky resource management.
+ * @kctx: kbase context
+ */
+void kbase_sticky_resource_term(struct kbase_context *kctx);
 #endif                         /* _KBASE_MEM_H_ */
index 3e4481a77e151257fd74d9c012be31030c40bad1..0abe0e6ab39c144cd316012a00f590109f804b71 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 #include <linux/kernel.h>
 #include <linux/bug.h>
 #include <linux/mm.h>
+#include <linux/mman.h>
 #include <linux/fs.h>
 #include <linux/version.h>
 #include <linux/dma-mapping.h>
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
        #include <linux/dma-attrs.h>
-#endif
+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)  */
 #ifdef CONFIG_DMA_SHARED_BUFFER
 #include <linux/dma-buf.h>
 #endif                         /* defined(CONFIG_DMA_SHARED_BUFFER) */
+#include <linux/shrinker.h>
 
 #include <mali_kbase.h>
 #include <mali_kbase_mem_linux.h>
 #include <mali_kbase_config_defaults.h>
 #include <mali_kbase_hwaccess_time.h>
 
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+
 static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
 static const struct vm_operations_struct kbase_vm_ops;
 
+/**
+ * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Shrink (or completely remove) all CPU mappings which reference the shrunk
+ * part of the allocation.
+ *
+ * Note: Caller must be holding the processes mmap_sem lock.
+ */
+static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+               struct kbase_va_region *reg,
+               u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region or NULL if there isn't one
+ * @new_pages: The number of pages after the shrink
+ * @old_pages: The number of pages before the shrink
+ *
+ * Return: 0 on success, negative -errno on error
+ *
+ * Unmap the shrunk pages from the GPU mapping. Note that the size of the region
+ * itself is unmodified as we still need to reserve the VA, only the page tables
+ * will be modified by this function.
+ */
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+               struct kbase_va_region *reg,
+               u64 new_pages, u64 old_pages);
+
 struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment)
 {
        int zone;
@@ -76,9 +117,6 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 #if defined(CONFIG_64BIT)
        if (kctx->is_compat)
                cpu_va_bits = 32;
-       else
-               /* force SAME_VA if a 64-bit client */
-               *flags |= BASE_MEM_SAME_VA;
 #endif
 
        if (!kbase_check_alloc_flags(*flags)) {
@@ -89,12 +127,12 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
        }
 
        if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
-                       kctx->kbdev->system_coherency != COHERENCY_ACE) {
+                       !kbase_device_is_cpu_coherent(kctx->kbdev)) {
                dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
                goto bad_flags;
        }
        if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
-                       kctx->kbdev->system_coherency != COHERENCY_ACE) {
+                       !kbase_device_is_cpu_coherent(kctx->kbdev)) {
                /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
                *flags &= ~BASE_MEM_COHERENT_SYSTEM;
        }
@@ -141,20 +179,29 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 
        /* mmap needed to setup VA? */
        if (*flags & BASE_MEM_SAME_VA) {
+               unsigned long prot = PROT_NONE;
+               unsigned long va_size = va_pages << PAGE_SHIFT;
+               unsigned long va_map = va_size;
+               unsigned long cookie, cookie_nr;
+               unsigned long cpu_addr;
+
                /* Bind to a cookie */
                if (!kctx->cookies) {
                        dev_err(dev, "No cookies available for allocation!");
+                       kbase_gpu_vm_unlock(kctx);
                        goto no_cookie;
                }
                /* return a cookie */
-               *gpu_va = __ffs(kctx->cookies);
-               kctx->cookies &= ~(1UL << *gpu_va);
-               BUG_ON(kctx->pending_regions[*gpu_va]);
-               kctx->pending_regions[*gpu_va] = reg;
+               cookie_nr = __ffs(kctx->cookies);
+               kctx->cookies &= ~(1UL << cookie_nr);
+               BUG_ON(kctx->pending_regions[cookie_nr]);
+               kctx->pending_regions[cookie_nr] = reg;
+
+               kbase_gpu_vm_unlock(kctx);
 
                /* relocate to correct base */
-               *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
-               *gpu_va <<= PAGE_SHIFT;
+               cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE);
+               cookie <<= PAGE_SHIFT;
 
                /* See if we must align memory due to GPU PC bits vs CPU VA */
                if ((*flags & BASE_MEM_PROT_GPU_EX) &&
@@ -162,21 +209,101 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
                        *va_alignment = gpu_pc_bits;
                        reg->flags |= KBASE_REG_ALIGNED;
                }
+
+               /*
+                * Pre-10.1 UKU userland calls mmap for us so return the
+                * unaligned address and skip the map.
+                */
+               if (kctx->api_version < KBASE_API_VERSION(10, 1)) {
+                       *gpu_va = (u64) cookie;
+                       return reg;
+               }
+
+               /*
+                * GPUCORE-2190:
+                *
+                * We still need to return alignment for old userspace.
+                */
+               if (*va_alignment)
+                       va_map += 3 * (1UL << *va_alignment);
+
+               if (*flags & BASE_MEM_PROT_CPU_RD)
+                       prot |= PROT_READ;
+               if (*flags & BASE_MEM_PROT_CPU_WR)
+                       prot |= PROT_WRITE;
+
+               cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot,
+                               MAP_SHARED, cookie);
+
+               if (IS_ERR_VALUE(cpu_addr)) {
+                       kctx->pending_regions[cookie_nr] = NULL;
+                       kctx->cookies |= (1UL << cookie_nr);
+                       goto no_mmap;
+               }
+
+               /*
+                * If we had to allocate extra VA space to force the
+                * alignment release it.
+                */
+               if (*va_alignment) {
+                       unsigned long alignment = 1UL << *va_alignment;
+                       unsigned long align_mask = alignment - 1;
+                       unsigned long addr;
+                       unsigned long addr_end;
+                       unsigned long aligned_addr;
+                       unsigned long aligned_addr_end;
+
+                       addr = cpu_addr;
+                       addr_end = addr + va_map;
+
+                       aligned_addr = (addr + align_mask) &
+                                       ~((u64) align_mask);
+                       aligned_addr_end = aligned_addr + va_size;
+
+                       if ((aligned_addr_end & BASE_MEM_MASK_4GB) == 0) {
+                               /*
+                                * Can't end at 4GB boundary on some GPUs as
+                                * it will halt the shader.
+                                */
+                               aligned_addr += 2 * alignment;
+                               aligned_addr_end += 2 * alignment;
+                       } else if ((aligned_addr & BASE_MEM_MASK_4GB) == 0) {
+                               /*
+                                * Can't start at 4GB boundary on some GPUs as
+                                * it will halt the shader.
+                                */
+                               aligned_addr += alignment;
+                               aligned_addr_end += alignment;
+                       }
+
+                       /* anything to chop off at the start? */
+                       if (addr != aligned_addr)
+                               vm_munmap(addr, aligned_addr - addr);
+
+                       /* anything at the end? */
+                       if (addr_end != aligned_addr_end)
+                               vm_munmap(aligned_addr_end,
+                                               addr_end - aligned_addr_end);
+
+                       *gpu_va = (u64) aligned_addr;
+               } else
+                       *gpu_va = (u64) cpu_addr;
        } else /* we control the VA */ {
                if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) {
                        dev_warn(dev, "Failed to map memory on GPU");
+                       kbase_gpu_vm_unlock(kctx);
                        goto no_mmap;
                }
                /* return real GPU VA */
                *gpu_va = reg->start_pfn << PAGE_SHIFT;
+
+               kbase_gpu_vm_unlock(kctx);
        }
 
-       kbase_gpu_vm_unlock(kctx);
        return reg;
 
 no_mmap:
 no_cookie:
-       kbase_gpu_vm_unlock(kctx);
 no_mem:
        kbase_mem_phy_alloc_put(reg->cpu_alloc);
        kbase_mem_phy_alloc_put(reg->gpu_alloc);
@@ -254,12 +381,292 @@ out_unlock:
        return ret;
 }
 
+/**
+ * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the
+ * Ephemeral memory eviction list.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages which can be freed.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
+               struct shrink_control *sc)
+{
+       struct kbase_context *kctx;
+       struct kbase_mem_phy_alloc *alloc;
+       unsigned long pages = 0;
+
+       kctx = container_of(s, struct kbase_context, reclaim);
+
+       mutex_lock(&kctx->evict_lock);
+
+       list_for_each_entry(alloc, &kctx->evict_list, evict_node)
+               pages += alloc->nents;
+
+       mutex_unlock(&kctx->evict_lock);
+       return pages;
+}
+
+/**
+ * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction
+ * list for pages and try to reclaim them.
+ * @s:        Shrinker
+ * @sc:       Shrinker control
+ *
+ * Return: Number of pages freed (can be less then requested) or -1 if the
+ * shrinker failed to free pages in its pool.
+ *
+ * Note:
+ * This function accesses region structures without taking the region lock,
+ * this is required as the OOM killer can call the shrinker after the region
+ * lock has already been held.
+ * This is safe as we can guarantee that a region on the eviction list will
+ * not be freed (kbase_mem_free_region removes the allocation from the list
+ * before destroying it), or modified by other parts of the driver.
+ * The eviction list itself is guarded by the eviction lock and the MMU updates
+ * are protected by their own lock.
+ */
+static
+unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s,
+               struct shrink_control *sc)
+{
+       struct kbase_context *kctx;
+       struct kbase_mem_phy_alloc *alloc;
+       struct kbase_mem_phy_alloc *tmp;
+       unsigned long freed = 0;
+
+       kctx = container_of(s, struct kbase_context, reclaim);
+       mutex_lock(&kctx->evict_lock);
+
+       list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) {
+               int err;
+
+               err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg,
+                               0, alloc->nents);
+               if (err != 0) {
+                       /*
+                        * Failed to remove GPU mapping, tell the shrinker
+                        * to stop trying to shrink our slab even though we
+                        * have pages in it.
+                        */
+                       freed = -1;
+                       goto out_unlock;
+               }
+
+               /*
+                * Update alloc->evicted before freeing the backing so the
+                * helper can determine that it needs to bypass the accounting
+                * and memory pool.
+                */
+               alloc->evicted = alloc->nents;
+
+               kbase_free_phy_pages_helper(alloc, alloc->evicted);
+               freed += alloc->evicted;
+               list_del_init(&alloc->evict_node);
+
+               /*
+                * Inform the JIT allocator this region has lost backing
+                * as it might need to free the allocation.
+                */
+               kbase_jit_backing_lost(alloc->reg);
+
+               /* Enough pages have been freed so stop now */
+               if (freed > sc->nr_to_scan)
+                       break;
+       }
+out_unlock:
+       mutex_unlock(&kctx->evict_lock);
+
+       return freed;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s,
+               struct shrink_control *sc)
+{
+       if (sc->nr_to_scan == 0)
+               return kbase_mem_evictable_reclaim_count_objects(s, sc);
+
+       return kbase_mem_evictable_reclaim_scan_objects(s, sc);
+}
+#endif
+
+int kbase_mem_evictable_init(struct kbase_context *kctx)
+{
+       INIT_LIST_HEAD(&kctx->evict_list);
+       mutex_init(&kctx->evict_lock);
+
+       /* Register shrinker */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+       kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink;
+#else
+       kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
+       kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
+#endif
+       kctx->reclaim.seeks = DEFAULT_SEEKS;
+       /* Kernel versions prior to 3.1 :
+        * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+       kctx->reclaim.batch = 0;
+#endif
+       register_shrinker(&kctx->reclaim);
+       return 0;
+}
+
+void kbase_mem_evictable_deinit(struct kbase_context *kctx)
+{
+       unregister_shrinker(&kctx->reclaim);
+}
+
+/**
+ * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable.
+ * @alloc: The physical allocation
+ */
+static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+       struct kbase_context *kctx = alloc->imported.kctx;
+       int __maybe_unused new_page_count;
+       int i;
+
+       for (i = 0; i < alloc->nents; i++) {
+               struct page *p = phys_to_page(alloc->pages[i]);
+
+               zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+       }
+
+       kbase_process_page_usage_dec(kctx, alloc->nents);
+       new_page_count = kbase_atomic_sub_pages(alloc->nents,
+                                               &kctx->used_pages);
+       kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+       kbase_tlstream_aux_pagesalloc(
+                       (u32)kctx->id,
+                       (u64)new_page_count);
+#endif
+}
+
+/**
+ * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable.
+ * @alloc: The physical allocation
+ */
+static
+void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
+{
+       struct kbase_context *kctx = alloc->imported.kctx;
+       int __maybe_unused new_page_count;
+       int i;
+
+       new_page_count = kbase_atomic_add_pages(alloc->nents,
+                                               &kctx->used_pages);
+       kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+
+       /* Increase mm counters so that the allocation is accounted for
+        * against the process and thus is visible to the OOM killer,
+        * then remove it from the reclaimable accounting. */
+       kbase_process_page_usage_inc(kctx, alloc->nents);
+
+       for (i = 0; i < alloc->nents; i++) {
+               struct page *p = phys_to_page(alloc->pages[i]);
+
+               zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE);
+       }
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+       kbase_tlstream_aux_pagesalloc(
+                       (u32)kctx->id,
+                       (u64)new_page_count);
+#endif
+}
+
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+       struct kbase_context *kctx = gpu_alloc->imported.kctx;
+       int err;
+
+       lockdep_assert_held(&kctx->reg_lock);
+
+       /* This alloction can't already be on a list. */
+       WARN_ON(!list_empty(&gpu_alloc->evict_node));
+
+       /*
+        * Try to shrink the CPU mappings as required, if we fail then
+        * fail the process of making this allocation evictable.
+        */
+       err = kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
+                       0, gpu_alloc->nents);
+       if (err)
+               return -EINVAL;
+
+       /*
+        * Add the allocation to the eviction list, after this point the shrink
+        * can reclaim it.
+        */
+       mutex_lock(&kctx->evict_lock);
+       list_add(&gpu_alloc->evict_node, &kctx->evict_list);
+       mutex_unlock(&kctx->evict_lock);
+       kbase_mem_evictable_mark_reclaim(gpu_alloc);
+
+       gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED;
+       return 0;
+}
+
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
+{
+       struct kbase_context *kctx = gpu_alloc->imported.kctx;
+       int err = 0;
+
+       lockdep_assert_held(&kctx->reg_lock);
+
+       /*
+        * First remove the allocation from the eviction list as it's no
+        * longer eligible for eviction.
+        */
+       mutex_lock(&kctx->evict_lock);
+       list_del_init(&gpu_alloc->evict_node);
+       mutex_unlock(&kctx->evict_lock);
+
+       if (gpu_alloc->evicted == 0) {
+               /*
+                * The backing is still present, update the VM stats as it's
+                * in use again.
+                */
+               kbase_mem_evictable_unmark_reclaim(gpu_alloc);
+       } else {
+               /* If the region is still alive ... */
+               if (gpu_alloc->reg) {
+                       /* ... allocate replacement backing ... */
+                       err = kbase_alloc_phy_pages_helper(gpu_alloc,
+                                       gpu_alloc->evicted);
+
+                       /*
+                        * ... and grow the mapping back to its
+                        * pre-eviction size.
+                        */
+                       if (!err)
+                               err = kbase_mem_grow_gpu_mapping(kctx,
+                                               gpu_alloc->reg,
+                                               gpu_alloc->evicted, 0);
+
+                       gpu_alloc->evicted = 0;
+               }
+       }
+
+       /* If the region is still alive remove the DONT_NEED attribute. */
+       if (gpu_alloc->reg)
+               gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED;
+
+       return (err == 0);
+}
+
 int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask)
 {
        struct kbase_va_region *reg;
        int ret = -EINVAL;
        unsigned int real_flags = 0;
        unsigned int prev_flags = 0;
+       bool prev_needed, new_needed;
 
        KBASE_DEBUG_ASSERT(kctx);
 
@@ -270,11 +677,11 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
        flags &= mask;
 
        /* check for only supported flags */
-       if (flags & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL))
+       if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE))
                goto out;
 
        /* mask covers bits we don't support? */
-       if (mask & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL))
+       if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE))
                goto out;
 
        /* convert flags */
@@ -284,6 +691,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
                real_flags |= KBASE_REG_SHARE_IN;
 
        /* now we can lock down the context, and find the region */
+       down_write(&current->mm->mmap_sem);
        kbase_gpu_vm_lock(kctx);
 
        /* Validate the region */
@@ -291,6 +699,28 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
        if (!reg || (reg->flags & KBASE_REG_FREE))
                goto out_unlock;
 
+       /* Is the region being transitioning between not needed and needed? */
+       prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED;
+       new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED;
+       if (prev_needed != new_needed) {
+               /* Aliased allocations can't be made ephemeral */
+               if (atomic_read(&reg->cpu_alloc->gpu_mappings) > 1)
+                       goto out_unlock;
+
+               if (new_needed) {
+                       /* Only native allocations can be marked not needed */
+                       if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+                               ret = -EINVAL;
+                               goto out_unlock;
+                       }
+                       ret = kbase_mem_evictable_make(reg->gpu_alloc);
+                       if (ret)
+                               goto out_unlock;
+               } else {
+                       kbase_mem_evictable_unmake(reg->gpu_alloc);
+               }
+       }
+
        /* limit to imported memory */
        if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) &&
             (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM))
@@ -333,6 +763,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
 
 out_unlock:
        kbase_gpu_vm_unlock(kctx);
+       up_write(&current->mm->mmap_sem);
 out:
        return ret;
 }
@@ -352,10 +783,6 @@ static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, um
        ump_alloc_flags cpu_flags;
        ump_alloc_flags gpu_flags;
 
-       KBASE_DEBUG_ASSERT(kctx);
-       KBASE_DEBUG_ASSERT(va_pages);
-       KBASE_DEBUG_ASSERT(flags);
-
        if (*flags & BASE_MEM_SECURE)
                goto bad_flags;
 
@@ -554,6 +981,106 @@ no_buf:
 }
 #endif  /* CONFIG_DMA_SHARED_BUFFER */
 
+
+static struct kbase_va_region *kbase_mem_from_user_buffer(
+               struct kbase_context *kctx, unsigned long address,
+               unsigned long size, u64 *va_pages, u64 *flags)
+{
+       struct kbase_va_region *reg;
+       long faulted_pages;
+       int zone = KBASE_REG_ZONE_CUSTOM_VA;
+
+       *va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) -
+               PFN_DOWN(address);
+       if (!*va_pages)
+               goto bad_size;
+
+       if (*va_pages > (UINT64_MAX / PAGE_SIZE))
+               /* 64-bit address range is the max */
+               goto bad_size;
+
+       /* SAME_VA generally not supported with imported memory (no known use cases) */
+       *flags &= ~BASE_MEM_SAME_VA;
+
+#ifdef CONFIG_64BIT
+       if (!kctx->is_compat) {
+               /* 64-bit tasks must MMAP anyway, but not expose this address to
+                * clients */
+               *flags |= BASE_MEM_NEED_MMAP;
+               zone = KBASE_REG_ZONE_SAME_VA;
+       }
+#endif
+       reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone);
+
+       if (!reg)
+               goto no_region;
+
+       reg->gpu_alloc = kbase_alloc_create(*va_pages,
+                       KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+       if (IS_ERR_OR_NULL(reg->gpu_alloc))
+               goto no_alloc_obj;
+
+       reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+       reg->flags &= ~KBASE_REG_FREE;
+       reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */
+       reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */
+
+       if (*flags & BASE_MEM_PROT_CPU_WR)
+               reg->flags |= KBASE_REG_CPU_WR;
+
+       if (*flags & BASE_MEM_PROT_CPU_RD)
+               reg->flags |= KBASE_REG_CPU_RD;
+
+       if (*flags & BASE_MEM_PROT_GPU_WR)
+               reg->flags |= KBASE_REG_GPU_WR;
+
+       if (*flags & BASE_MEM_PROT_GPU_RD)
+               reg->flags |= KBASE_REG_GPU_RD;
+
+       down_read(&current->mm->mmap_sem);
+
+       /* A sanity check that get_user_pages will work on the memory */
+       /* (so the initial import fails on weird memory regions rather than */
+       /* the job failing when we try to handle the external resources). */
+       /* It doesn't take a reference to the pages (because the page list is NULL). */
+       /* We can't really store the page list because that would involve */
+       /* keeping the pages pinned - instead we pin/unpin around the job */
+       /* (as part of the external resources handling code) */
+       faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
+                       reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL);
+       up_read(&current->mm->mmap_sem);
+
+       if (faulted_pages != *va_pages)
+               goto fault_mismatch;
+
+       reg->gpu_alloc->imported.user_buf.size = size;
+       reg->gpu_alloc->imported.user_buf.address = address;
+       reg->gpu_alloc->imported.user_buf.nr_pages = faulted_pages;
+       reg->gpu_alloc->imported.user_buf.pages = kmalloc_array(faulted_pages,
+                       sizeof(struct page *), GFP_KERNEL);
+       reg->gpu_alloc->imported.user_buf.owner = current;
+
+       if (!reg->gpu_alloc->imported.user_buf.pages)
+               goto no_page_array;
+
+       reg->gpu_alloc->nents = 0;
+       reg->extent = 0;
+
+       return reg;
+
+no_page_array:
+fault_mismatch:
+       kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+       kfree(reg);
+no_region:
+bad_size:
+       return NULL;
+
+}
+
+
 u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
                    u64 nents, struct base_mem_aliasing_info *ai,
                    u64 *num_pages)
@@ -632,8 +1159,9 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 
        /* validate and add src handles */
        for (i = 0; i < nents; i++) {
-               if (ai[i].handle < BASE_MEM_FIRST_FREE_ADDRESS) {
-                       if (ai[i].handle != BASE_MEM_WRITE_ALLOC_PAGES_HANDLE)
+               if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+                       if (ai[i].handle.basep.handle !=
+                           BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE)
                                goto bad_handle; /* unsupported magic handle */
                        if (!ai[i].length)
                                goto bad_handle; /* must be > 0 */
@@ -645,13 +1173,17 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
                        struct kbase_va_region *aliasing_reg;
                        struct kbase_mem_phy_alloc *alloc;
 
-                       aliasing_reg = kbase_region_tracker_find_region_base_address(kctx, (ai[i].handle >> PAGE_SHIFT) << PAGE_SHIFT);
+                       aliasing_reg = kbase_region_tracker_find_region_base_address(
+                               kctx,
+                               (ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT);
 
                        /* validate found region */
                        if (!aliasing_reg)
                                goto bad_handle; /* Not found */
                        if (aliasing_reg->flags & KBASE_REG_FREE)
                                goto bad_handle; /* Free region */
+                       if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+                               goto bad_handle; /* Ephemeral region */
                        if (!aliasing_reg->gpu_alloc)
                                goto bad_handle; /* No alloc */
                        if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
@@ -736,7 +1268,9 @@ bad_flags:
        return 0;
 }
 
-int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, int handle, u64 *gpu_va, u64 *va_pages, u64 *flags)
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+               void __user *phandle, u64 *gpu_va, u64 *va_pages,
+               u64 *flags)
 {
        struct kbase_va_region *reg;
 
@@ -759,19 +1293,53 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
 
        switch (type) {
 #ifdef CONFIG_UMP
-       case BASE_MEM_IMPORT_TYPE_UMP:
-               reg = kbase_mem_from_ump(kctx, (ump_secure_id)handle, va_pages, flags);
-               break;
+       case BASE_MEM_IMPORT_TYPE_UMP: {
+               ump_secure_id id;
+
+               if (get_user(id, (ump_secure_id __user *)phandle))
+                       reg = NULL;
+               else
+                       reg = kbase_mem_from_ump(kctx, id, va_pages, flags);
+       }
+       break;
 #endif /* CONFIG_UMP */
 #ifdef CONFIG_DMA_SHARED_BUFFER
-       case BASE_MEM_IMPORT_TYPE_UMM:
-               reg = kbase_mem_from_umm(kctx, handle, va_pages, flags);
-               break;
+       case BASE_MEM_IMPORT_TYPE_UMM: {
+               int fd;
+
+               if (get_user(fd, (int __user *)phandle))
+                       reg = NULL;
+               else
+                       reg = kbase_mem_from_umm(kctx, fd, va_pages, flags);
+       }
+       break;
 #endif /* CONFIG_DMA_SHARED_BUFFER */
-       default:
+       case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
+               struct base_mem_import_user_buffer user_buffer;
+               void __user *uptr;
+
+               if (copy_from_user(&user_buffer, phandle,
+                               sizeof(user_buffer))) {
+                       reg = NULL;
+               } else {
+#ifdef CONFIG_COMPAT
+                       if (kctx->is_compat)
+                               uptr = compat_ptr(user_buffer.ptr.compat_value);
+                       else
+#endif
+                               uptr = user_buffer.ptr.value;
+
+                       reg = kbase_mem_from_user_buffer(kctx,
+                                       (unsigned long)uptr, user_buffer.length,
+                                       va_pages, flags);
+               }
+               break;
+       }
+       default: {
                reg = NULL;
                break;
        }
+       }
 
        if (!reg)
                goto no_reg;
@@ -837,6 +1405,7 @@ static int zap_range_nolock(struct mm_struct *mm,
        int err = -EINVAL; /* in case end < start */
 
        while (start < end) {
+               unsigned long local_start;
                unsigned long local_end;
 
                vma = find_vma_intersection(mm, start, end);
@@ -847,12 +1416,17 @@ static int zap_range_nolock(struct mm_struct *mm,
                if (vma->vm_ops != vm_ops)
                        goto try_next;
 
+               local_start = vma->vm_start;
+
+               if (start > local_start)
+                       local_start = start;
+
                local_end = vma->vm_end;
 
                if (end < local_end)
                        local_end = end;
 
-               err = zap_vma_ptes(vma, start, local_end - start);
+               err = zap_vma_ptes(vma, local_start, local_end - local_start);
                if (unlikely(err))
                        break;
 
@@ -864,19 +1438,110 @@ try_next:
        return err;
 }
 
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+               struct kbase_va_region *reg,
+               u64 new_pages, u64 old_pages)
+{
+       phys_addr_t *phy_pages;
+       u64 delta = new_pages - old_pages;
+       int ret = 0;
+
+       lockdep_assert_held(&kctx->reg_lock);
+
+       /* Map the new pages into the GPU */
+       phy_pages = kbase_get_gpu_phy_pages(reg);
+       ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
+                       phy_pages + old_pages, delta, reg->flags);
+
+       return ret;
+}
+
+static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
+               struct kbase_va_region *reg,
+               u64 new_pages, u64 old_pages)
+{
+       struct kbase_mem_phy_alloc *cpu_alloc = reg->cpu_alloc;
+       struct kbase_cpu_mapping *mapping;
+       int err;
+
+       lockdep_assert_held(&kctx->process_mm->mmap_sem);
+
+       list_for_each_entry(mapping, &cpu_alloc->mappings, mappings_list) {
+               unsigned long mapping_size;
+
+               mapping_size = (mapping->vm_end - mapping->vm_start)
+                               >> PAGE_SHIFT;
+
+               /* is this mapping affected ?*/
+               if ((mapping->page_off + mapping_size) > new_pages) {
+                       unsigned long first_bad = 0;
+
+                       if (new_pages > mapping->page_off)
+                               first_bad = new_pages - mapping->page_off;
+
+                       err = zap_range_nolock(current->mm,
+                                       &kbase_vm_ops,
+                                       mapping->vm_start +
+                                       (first_bad << PAGE_SHIFT),
+                                       mapping->vm_end);
+
+                       WARN(err,
+                            "Failed to zap VA range (0x%lx - 0x%lx);\n",
+                            mapping->vm_start +
+                            (first_bad << PAGE_SHIFT),
+                            mapping->vm_end
+                            );
+
+                       /* The zap failed, give up and exit */
+                       if (err)
+                               goto failed;
+               }
+       }
+
+       return 0;
+
+failed:
+       return err;
+}
+
+static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx,
+               struct kbase_va_region *reg,
+               u64 new_pages, u64 old_pages)
+{
+       u64 delta = old_pages - new_pages;
+       int ret = 0;
+
+       ret = kbase_mmu_teardown_pages(kctx,
+                       reg->start_pfn + new_pages, delta);
+       if (ret)
+               return ret;
+
+#ifndef CONFIG_MALI_NO_MALI
+       if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) {
+               /*
+               * Wait for GPU to flush write buffer before freeing
+               * physical pages.
+                */
+               kbase_wait_write_flush(kctx);
+       }
+#endif
+
+       return ret;
+}
+
 int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason)
 {
        u64 old_pages;
        u64 delta;
        int res = -EINVAL;
        struct kbase_va_region *reg;
-       phys_addr_t *phy_pages;
+       bool read_locked = false;
 
        KBASE_DEBUG_ASSERT(kctx);
        KBASE_DEBUG_ASSERT(failure_reason);
        KBASE_DEBUG_ASSERT(gpu_addr != 0);
 
-       down_read(&current->mm->mmap_sem);
+       down_write(&current->mm->mmap_sem);
        kbase_gpu_vm_lock(kctx);
 
        /* Validate the region */
@@ -910,6 +1575,11 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en
                *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
                goto out_unlock;
        }
+       /* can't grow regions which are ephemeral */
+       if (reg->flags & BASE_MEM_DONT_NEED) {
+               *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+               goto out_unlock;
+       }
 
        if (new_pages == reg->gpu_alloc->nents) {
                /* no change */
@@ -917,14 +1587,17 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en
                goto out_unlock;
        }
 
-       phy_pages = kbase_get_gpu_phy_pages(reg);
        old_pages = kbase_reg_current_backed_size(reg);
-
        if (new_pages > old_pages) {
-               /* growing */
-               int err;
-
                delta = new_pages - old_pages;
+
+               /*
+                * No update to the mm so downgrade the writer lock to a read
+                * lock so other readers aren't blocked after this point.
+                */
+               downgrade_write(&current->mm->mmap_sem);
+               read_locked = true;
+
                /* Allocate some more pages */
                if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) {
                        *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
@@ -939,9 +1612,15 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en
                                goto out_unlock;
                        }
                }
-               err = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
-                               phy_pages + old_pages, delta, reg->flags);
-               if (err) {
+
+               /* No update required for CPU mappings, that's done on fault. */
+
+               /* Update GPU mapping. */
+               res = kbase_mem_grow_gpu_mapping(kctx, reg,
+                               new_pages, old_pages);
+
+               /* On error free the new pages */
+               if (res) {
                        kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
                        if (reg->cpu_alloc != reg->gpu_alloc)
                                kbase_free_phy_pages_helper(reg->gpu_alloc,
@@ -950,60 +1629,35 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en
                        goto out_unlock;
                }
        } else {
-               /* shrinking */
-               struct kbase_cpu_mapping *mapping;
-               int err;
+               delta = old_pages - new_pages;
 
-               /* first, unmap from any mappings affected */
-               list_for_each_entry(mapping, &reg->cpu_alloc->mappings, mappings_list) {
-                       unsigned long mapping_size = (mapping->vm_end - mapping->vm_start) >> PAGE_SHIFT;
-
-                       /* is this mapping affected ?*/
-                       if ((mapping->page_off + mapping_size) > new_pages) {
-                               unsigned long first_bad = 0;
-                               int zap_res;
-
-                               if (new_pages > mapping->page_off)
-                                       first_bad = new_pages - mapping->page_off;
-
-                               zap_res = zap_range_nolock(current->mm,
-                                               &kbase_vm_ops,
-                                               mapping->vm_start +
-                                               (first_bad << PAGE_SHIFT),
-                                               mapping->vm_end);
-                               WARN(zap_res,
-                                    "Failed to zap VA range (0x%lx - 0x%lx);\n",
-                                    mapping->vm_start +
-                                    (first_bad << PAGE_SHIFT),
-                                    mapping->vm_end
-                                    );
-                       }
+               /* Update all CPU mapping(s) */
+               res = kbase_mem_shrink_cpu_mapping(kctx, reg,
+                               new_pages, old_pages);
+               if (res) {
+                       *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+                       goto out_unlock;
                }
 
-               /* Free some pages */
-               delta = old_pages - new_pages;
-               err = kbase_mmu_teardown_pages(kctx, reg->start_pfn + new_pages,
-                               delta);
-               if (err) {
+               /* Update the GPU mapping */
+               res = kbase_mem_shrink_gpu_mapping(kctx, reg,
+                               new_pages, old_pages);
+               if (res) {
                        *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
                        goto out_unlock;
                }
-#ifndef CONFIG_MALI_NO_MALI
-               if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) {
-                       /* Wait for GPU to flush write buffer before freeing physical pages */
-                       kbase_wait_write_flush(kctx);
-               }
-#endif
+
                kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
                if (reg->cpu_alloc != reg->gpu_alloc)
                        kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
        }
 
-       res = 0;
-
 out_unlock:
        kbase_gpu_vm_unlock(kctx);
-       up_read(&current->mm->mmap_sem);
+       if (read_locked)
+               up_read(&current->mm->mmap_sem);
+       else
+               up_write(&current->mm->mmap_sem);
 
        return res;
 }
@@ -1075,6 +1729,10 @@ static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        if (map->page_off + rel_pgoff >= map->alloc->nents)
                goto locked_bad_fault;
 
+       /* Fault on access to DONT_NEED regions */
+       if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED))
+               goto locked_bad_fault;
+
        /* insert all valid pages from the fault location */
        for (i = rel_pgoff;
             i < MIN((vma->vm_end - vma->vm_start) >> PAGE_SHIFT,
@@ -1222,7 +1880,11 @@ static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_st
                        goto out;
                }
 
-               kbase_device_trace_buffer_install(kctx, tb, size);
+               err = kbase_device_trace_buffer_install(kctx, tb, size);
+               if (err) {
+                       vfree(tb);
+                       goto out;
+               }
        } else {
                err = -EINVAL;
                goto out;
@@ -1438,8 +2100,8 @@ int kbase_mmap(struct file *file, struct vm_area_struct *vma)
        rcu_read_unlock();
 
        switch (vma->vm_pgoff) {
-       case PFN_DOWN(BASE_MEM_INVALID_HANDLE):
-       case PFN_DOWN(BASE_MEM_WRITE_ALLOC_PAGES_HANDLE):
+       case PFN_DOWN(BASEP_MEM_INVALID_HANDLE):
+       case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE):
                /* Illegal handle for direct map */
                err = -EINVAL;
                goto out_unlock;
@@ -1686,6 +2348,9 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
        if (page_index + page_count > kbase_reg_current_backed_size(reg))
                goto out_unlock;
 
+       if (reg->flags & KBASE_REG_DONT_NEED)
+               goto out_unlock;
+
        page_array = kbase_get_cpu_phy_pages(reg);
        if (!page_array)
                goto out_unlock;
@@ -1720,6 +2385,12 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
        map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0;
        sync_needed = map->is_cached;
 
+#ifdef CONFIG_MALI_COH_KERN
+       /* kernel can use coherent memory if supported */
+       if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+               sync_needed = false;
+#endif
+
        if (sync_needed) {
                /* Sync first page */
                size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
@@ -1761,6 +2432,11 @@ void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
        void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
        bool sync_needed = map->is_cached;
        vunmap(addr);
+#ifdef CONFIG_MALI_COH_KERN
+       /* kernel can use coherent memory if supported */
+       if (kctx->kbdev->system_coherency == COHERENCY_ACE)
+               sync_needed = false;
+#endif
        if (sync_needed) {
                off_t offset = (uintptr_t)map->addr & ~PAGE_MASK;
                size_t size = map->size;
index 1d854152704b72aac3afbe2929a1064985133b83..6c0fb5648514ef55d40c09f29445b5599e2c15e2 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010, 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -34,12 +34,80 @@ struct kbase_hwc_dma_mapping {
 
 struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment);
 int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages);
-int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, int handle, u64 *gpu_va, u64 *va_pages, u64 *flags);
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
+               void __user *phandle, u64 *gpu_va, u64 *va_pages,
+               u64 *flags);
 u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
 int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
 int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason);
 int kbase_mmap(struct file *file, struct vm_area_struct *vma);
 
+/**
+ * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction
+ * mechanism.
+ * @kctx: The kbase context to initialize.
+ *
+ * Return: Zero on success or -errno on failure.
+ */
+int kbase_mem_evictable_init(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction
+ * mechanism.
+ * @kctx: The kbase context to de-initialize.
+ */
+void kbase_mem_evictable_deinit(struct kbase_context *kctx);
+
+/**
+ * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation
+ * @kctx:      Context the region belongs to
+ * @reg:       The GPU region
+ * @new_pages: The number of pages after the grow
+ * @old_pages: The number of pages before the grow
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Expand the GPU mapping to encompass the new psychical pages which have
+ * been added to the allocation.
+ *
+ * Note: Caller must be holding the region lock.
+ */
+int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
+               struct kbase_va_region *reg,
+               u64 new_pages, u64 old_pages);
+
+/**
+ * kbase_mem_evictable_make - Make a physical allocation eligible for eviction
+ * @gpu_alloc: The physical allocation to make evictable
+ *
+ * Return: 0 on success, -errno on error.
+ *
+ * Take the provided region and make all the physical pages within it
+ * reclaimable by the kernel, updating the per-process VM stats as well.
+ * Remove any CPU mappings (as these can't be removed in the shrinker callback
+ * as mmap_sem might already be taken) but leave the GPU mapping intact as
+ * and until the shrinker reclaims the allocation.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc);
+
+/**
+ * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for
+ * eviction.
+ * @alloc: The physical allocation to remove eviction eligibility from.
+ *
+ * Return: True if the allocation had its backing restored and false if
+ * it hasn't.
+ *
+ * Make the physical pages in the region no longer reclaimable and update the
+ * per-process stats, if the shrinker has already evicted the memory then
+ * re-allocate it if the region is still alive.
+ *
+ * Note: Must be called with the region lock of the containing context.
+ */
+bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc);
+
 struct kbase_vmap_struct {
        u64 gpu_addr;
        struct kbase_mem_phy_alloc *cpu_alloc;
index a049205d0e90d3b6b531e412948fc8876d319b95..c0f47be362f326c39ee01cac0f5bac72002409a5 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 #include <linux/atomic.h>
 #include <linux/version.h>
 
-/* Backwards compatibility with kernels using the old carveout allocator */
+/* This function is only provided for backwards compatibility with kernels
+ * which use the old carveout allocator.
+ *
+ * The forward declaration is to keep sparse happy.
+ */
+int __init kbase_carveout_mem_reserve(
+               phys_addr_t size);
 int __init kbase_carveout_mem_reserve(phys_addr_t size)
 {
        return 0;
@@ -37,6 +43,9 @@ int __init kbase_carveout_mem_reserve(phys_addr_t size)
                kbase_mem_pool_max_size(pool),  \
                ##__VA_ARGS__)
 
+#define NOT_DIRTY false
+#define NOT_RECLAIMED false
+
 static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool)
 {
        spin_lock(&pool->pool_lock);
@@ -73,6 +82,8 @@ static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
        list_add(&p->lru, &pool->page_list);
        pool->cur_size++;
 
+       zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
        pool_dbg(pool, "added page\n");
 }
 
@@ -86,8 +97,14 @@ static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p)
 static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
                struct list_head *page_list, size_t nr_pages)
 {
+       struct page *p;
+
        lockdep_assert_held(&pool->pool_lock);
 
+       list_for_each_entry(p, page_list, lru) {
+               zone_page_state_add(1, page_zone(p), NR_SLAB_RECLAIMABLE);
+       }
+
        list_splice(page_list, &pool->page_list);
        pool->cur_size += nr_pages;
 
@@ -115,6 +132,8 @@ static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool)
        list_del_init(&p->lru);
        pool->cur_size--;
 
+       zone_page_state_add(-1, page_zone(p), NR_SLAB_RECLAIMABLE);
+
        pool_dbg(pool, "removed page\n");
 
        return p;
@@ -489,7 +508,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
        return 0;
 
 err_rollback:
-       kbase_mem_pool_free_pages(pool, i, pages, false);
+       kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED);
        return err;
 }
 
@@ -532,7 +551,7 @@ static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool,
 }
 
 void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
-               phys_addr_t *pages, bool dirty)
+               phys_addr_t *pages, bool dirty, bool reclaimed)
 {
        struct kbase_mem_pool *next_pool = pool->next_pool;
        struct page *p;
@@ -542,22 +561,24 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
 
        pool_dbg(pool, "free_pages(%zu):\n", nr_pages);
 
-       /* Add to this pool */
-       nr_to_pool = kbase_mem_pool_capacity(pool);
-       nr_to_pool = min(nr_pages, nr_to_pool);
+       if (!reclaimed) {
+               /* Add to this pool */
+               nr_to_pool = kbase_mem_pool_capacity(pool);
+               nr_to_pool = min(nr_pages, nr_to_pool);
 
-       kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty);
+               kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty);
 
-       i += nr_to_pool;
+               i += nr_to_pool;
 
-       if (i != nr_pages && next_pool) {
-               /* Spill to next pool (may overspill) */
-               nr_to_pool = kbase_mem_pool_capacity(next_pool);
-               nr_to_pool = min(nr_pages - i, nr_to_pool);
+               if (i != nr_pages && next_pool) {
+                       /* Spill to next pool (may overspill) */
+                       nr_to_pool = kbase_mem_pool_capacity(next_pool);
+                       nr_to_pool = min(nr_pages - i, nr_to_pool);
 
-               kbase_mem_pool_add_array(next_pool, nr_to_pool, pages + i,
-                               true, dirty);
-               i += nr_to_pool;
+                       kbase_mem_pool_add_array(next_pool, nr_to_pool,
+                                       pages + i, true, dirty);
+                       i += nr_to_pool;
+               }
        }
 
        /* Free any remaining pages to kernel */
@@ -566,6 +587,10 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
                        continue;
 
                p = phys_to_page(pages[i]);
+               if (reclaimed)
+                       zone_page_state_add(-1, page_zone(p),
+                                       NR_SLAB_RECLAIMABLE);
+
                kbase_mem_pool_free_page(pool, p);
                pages[i] = 0;
        }
index bf60c1920294abac3130bbf9c7801725aa918542..0b19d05c46e809bc9542868974054e6109896951 100755 (executable)
 
 #ifdef CONFIG_DEBUG_FS
 
-/* mam_profile file name max length 22 based on format <int>_<int>\0 */
-#define KBASEP_DEBUGFS_FNAME_SIZE_MAX (10+1+10+1)
-
-void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
-               size_t size)
-{
-       spin_lock(&kctx->mem_profile_lock);
-       kfree(kctx->mem_profile_data);
-       kctx->mem_profile_data = data;
-       kctx->mem_profile_size = size;
-       spin_unlock(&kctx->mem_profile_lock);
-}
-
 /** Show callback for the @c mem_profile debugfs file.
  *
  * This function is called to get the contents of the @c mem_profile debugfs
@@ -40,19 +27,19 @@ void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
  * @param sfile The debugfs entry
  * @param data Data associated with the entry
  *
- * @return 0 if successfully prints data in debugfs entry file
- *         -1 if it encountered an error
+ * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise
  */
 static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
 {
        struct kbase_context *kctx = sfile->private;
 
-       KBASE_DEBUG_ASSERT(kctx != NULL);
+       mutex_lock(&kctx->mem_profile_lock);
 
-       spin_lock(&kctx->mem_profile_lock);
        seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+
        seq_putc(sfile, '\n');
-       spin_unlock(&kctx->mem_profile_lock);
+
+       mutex_unlock(&kctx->mem_profile_lock);
 
        return 0;
 }
@@ -72,34 +59,60 @@ static const struct file_operations kbasep_mem_profile_debugfs_fops = {
        .release = single_release,
 };
 
-void kbasep_mem_profile_debugfs_add(struct kbase_context *kctx)
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+                                       size_t size)
 {
-       KBASE_DEBUG_ASSERT(kctx != NULL);
+       int err = 0;
+
+       mutex_lock(&kctx->mem_profile_lock);
 
-       spin_lock_init(&kctx->mem_profile_lock);
+       dev_dbg(kctx->kbdev->dev, "initialised: %d",
+                               kctx->mem_profile_initialized);
 
-       debugfs_create_file("mem_profile", S_IRUGO, kctx->kctx_dentry, kctx,
-                       &kbasep_mem_profile_debugfs_fops);
+       if (!kctx->mem_profile_initialized) {
+               if (!debugfs_create_file("mem_profile", S_IRUGO,
+                                       kctx->kctx_dentry, kctx,
+                                       &kbasep_mem_profile_debugfs_fops)) {
+                       err = -EAGAIN;
+               } else {
+                       kctx->mem_profile_initialized = true;
+               }
+       }
+
+       if (kctx->mem_profile_initialized) {
+               kfree(kctx->mem_profile_data);
+               kctx->mem_profile_data = data;
+               kctx->mem_profile_size = size;
+       }
+
+       dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d",
+                               err, kctx->mem_profile_initialized);
+
+       mutex_unlock(&kctx->mem_profile_lock);
+
+       return err;
 }
 
 void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
 {
-       KBASE_DEBUG_ASSERT(kctx != NULL);
+       mutex_lock(&kctx->mem_profile_lock);
+
+       dev_dbg(kctx->kbdev->dev, "initialised: %d",
+                               kctx->mem_profile_initialized);
 
-       spin_lock(&kctx->mem_profile_lock);
        kfree(kctx->mem_profile_data);
        kctx->mem_profile_data = NULL;
-       spin_unlock(&kctx->mem_profile_lock);
+       kctx->mem_profile_size = 0;
+
+       mutex_unlock(&kctx->mem_profile_lock);
 }
 
 #else /* CONFIG_DEBUG_FS */
 
-/**
- * @brief Stub function for when debugfs is disabled
- */
-void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
-               size_t size)
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+                                       size_t size)
 {
        kfree(data);
+       return 0;
 }
 #endif /* CONFIG_DEBUG_FS */
index 205bd378c8eca235b3c1188a2cef82c5dbf303c5..9555197f305ccb6de10c72242e93937a350a6b17 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
-/**
- * @brief Add new entry to Mali memory profile debugfs
- */
-void kbasep_mem_profile_debugfs_add(struct kbase_context *kctx);
-
 /**
  * @brief Remove entry from Mali memory profile debugfs
  */
 void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
 
 /**
- * @brief Insert data to debugfs file, so it can be read by userspce
+ * @brief Insert @p data to the debugfs file so it can be read by userspace
+ *
+ * The function takes ownership of @p data and frees it later when new data
+ * is inserted.
  *
- * Function takes ownership of @c data and frees it later when new data
- * are inserted.
+ * If the debugfs entry corresponding to the @p kctx doesn't exist,
+ * an attempt will be made to create it.
  *
- * @param kctx Context to which file data should be inserted
- * @param data NULL-terminated string to be inserted to mem_profile file,
-               without trailing new line character
- * @param size @c buf length
+ * @param kctx The context whose debugfs file @p data should be inserted to
+ * @param data A NULL-terminated string to be inserted to the debugfs file,
+ *             without the trailing new line character
+ * @param size The length of the @p data string
+ * @return 0 if @p data inserted correctly
+ *         -EAGAIN in case of error
+ * @post @ref mem_profile_initialized will be set to @c true
+ *       the first time this function succeeds.
  */
-void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
-               size_t size);
+int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+                                       size_t size);
 
 #endif  /*_KBASE_MEM_PROFILE_DEBUGFS_H*/
 
index c061f2a988d275c4d422bbca97f6e782741e3432..bf45d39f5a0552fbf5e2235b5c511095dbd542af 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,9 +30,7 @@
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
 #include <mali_kbase_gator.h>
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
 #include <mali_kbase_tlstream.h>
-#endif
 #include <mali_kbase_debug.h>
 
 #define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
 
 /**
  * kbase_mmu_sync_pgd - sync page directory to memory
- * @dev:       Device pointer.
+ * @kbdev:     Device pointer.
  * @handle:    Address of DMA region.
  * @size:       Size of the region to sync.
  *
  * This should be called after each page directory update.
  */
 
-static void kbase_mmu_sync_pgd(struct device *dev,
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
                dma_addr_t handle, size_t size)
 {
-
-       dma_sync_single_for_device(dev, handle, size, DMA_TO_DEVICE);
+       /* If page table is not coherent then ensure the gpu can read
+        * the pages from memory
+        */
+       if (kbdev->system_coherency != COHERENCY_ACE)
+               dma_sync_single_for_device(kbdev->dev, handle, size,
+                               DMA_TO_DEVICE);
 }
 
 /*
@@ -136,6 +138,18 @@ void page_fault_worker(struct work_struct *data)
                dev_warn(kbdev->dev, "Access flag unexpectedly set");
                goto fault_done;
 
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+       case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
+
+               kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+                                       "Address size fault");
+               goto fault_done;
+
+       case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
+               kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+                                       "Memory attributes fault");
+               goto fault_done;
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
 
        default:
                kbase_mmu_report_fault_and_kill(kctx, faulting_as,
@@ -164,6 +178,13 @@ void page_fault_worker(struct work_struct *data)
                goto fault_done;
        }
 
+       if ((region->flags & KBASE_REG_DONT_NEED)) {
+               kbase_gpu_vm_unlock(kctx);
+               kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+                               "Don't need memory can't be grown");
+               goto fault_done;
+       }
+
        /* find the size we need to grow it by */
        /* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
         * validating the fault_adress to be within a size_t from the start_pfn */
@@ -238,14 +259,9 @@ void page_fault_worker(struct work_struct *data)
                /* alloc success */
                KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
 
-               /* AS transaction begin */
-               mutex_lock(&faulting_as->transaction_mutex);
-
                /* set up the new pages */
                err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_gpu_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags);
                if (err) {
-                       /* failed to insert pages, handle as a normal PF */
-                       mutex_unlock(&faulting_as->transaction_mutex);
                        kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
                        if (region->gpu_alloc != region->cpu_alloc)
                                kbase_free_phy_pages_helper(region->cpu_alloc,
@@ -259,11 +275,10 @@ void page_fault_worker(struct work_struct *data)
 #if defined(CONFIG_MALI_GATOR_SUPPORT)
                kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
 #endif
-#if defined(CONFIG_MALI_MIPE_ENABLED)
-               kbase_tlstream_aux_pagefault(
-                               as_no,
-                               atomic_read(&kctx->used_pages));
-#endif
+               kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages);
+
+               /* AS transaction begin */
+               mutex_lock(&faulting_as->transaction_mutex);
 
                /* flush L2 and unlock the VA (resumes the MMU) */
                if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
@@ -316,15 +331,20 @@ phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
        u64 *page;
        int i;
        struct page *p;
+       int new_page_count __maybe_unused;
 
        KBASE_DEBUG_ASSERT(NULL != kctx);
-       kbase_atomic_add_pages(1, &kctx->used_pages);
+       new_page_count = kbase_atomic_add_pages(1, &kctx->used_pages);
        kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
 
        p = kbase_mem_pool_alloc(&kctx->mem_pool);
        if (!p)
                goto sub_pages;
 
+       kbase_tlstream_aux_pagesalloc(
+                       (u32)kctx->id,
+                       (u64)new_page_count);
+
        page = kmap(p);
        if (NULL == page)
                goto alloc_free;
@@ -334,7 +354,7 @@ phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
        for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
                kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
 
-       kbase_mmu_sync_pgd(kctx->kbdev->dev, kbase_dma_addr(p), PAGE_SIZE);
+       kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
 
        kunmap(p);
        return page_to_phys(p);
@@ -360,7 +380,7 @@ static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd,
        KBASE_DEBUG_ASSERT(pgd);
        KBASE_DEBUG_ASSERT(NULL != kctx);
 
-       lockdep_assert_held(&kctx->reg_lock);
+       lockdep_assert_held(&kctx->mmu_lock);
 
        /*
         * Architecture spec defines level-0 as being the top-most.
@@ -388,8 +408,7 @@ static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd,
 
                kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
 
-               kbase_mmu_sync_pgd(kctx->kbdev->dev,
-                               kbase_dma_addr(p), PAGE_SIZE);
+               kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
                /* Rely on the caller to update the address space flags. */
        }
 
@@ -402,9 +421,10 @@ static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
        phys_addr_t pgd;
        int l;
 
-       pgd = kctx->pgd;
+       lockdep_assert_held(&kctx->mmu_lock);
 
-       for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
+       pgd = kctx->pgd;
+       for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
                pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l);
                /* Handle failure condition */
                if (!pgd) {
@@ -424,7 +444,7 @@ static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *k
        KBASE_DEBUG_ASSERT(pgd);
        KBASE_DEBUG_ASSERT(NULL != kctx);
 
-       lockdep_assert_held(&kctx->reg_lock);
+       lockdep_assert_held(&kctx->mmu_lock);
 
        /*
         * Architecture spec defines level-0 as being the top-most.
@@ -449,9 +469,11 @@ static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context
        phys_addr_t pgd;
        int l;
 
+       lockdep_assert_held(&kctx->mmu_lock);
+
        pgd = kctx->pgd;
 
-       for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
+       for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) {
                pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l);
                /* Should never fail */
                KBASE_DEBUG_ASSERT(0 != pgd);
@@ -472,7 +494,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vp
        /* 64-bit address range is the max */
        KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
 
-       lockdep_assert_held(&kctx->reg_lock);
+       lockdep_assert_held(&kctx->mmu_lock);
 
        mmu_mode = kctx->kbdev->mmu_mode;
 
@@ -500,9 +522,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vp
                vpfn += count;
                nr -= count;
 
-               kbase_mmu_sync_pgd(kctx->kbdev->dev,
-                                          kbase_dma_addr(p),
-                                          PAGE_SIZE);
+               kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p), PAGE_SIZE);
 
                kunmap_atomic(pgd_page);
        }
@@ -522,13 +542,14 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
        bool recover_required = false;
        u64 recover_vpfn = vpfn;
        size_t recover_count = 0;
+       int err;
 
        KBASE_DEBUG_ASSERT(NULL != kctx);
        KBASE_DEBUG_ASSERT(0 != vpfn);
        /* 64-bit address range is the max */
        KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
 
-       lockdep_assert_held(&kctx->reg_lock);
+       mutex_lock(&kctx->mmu_lock);
 
        while (nr) {
                unsigned int i;
@@ -556,7 +577,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
                                                                  recover_vpfn,
                                                                  recover_count);
                        }
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto fail_unlock;
                }
 
                p = pfn_to_page(PFN_DOWN(pgd));
@@ -570,7 +592,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
                                                                  recover_vpfn,
                                                                  recover_count);
                        }
-                       return -ENOMEM;
+                       err = -ENOMEM;
+                       goto fail_unlock;
                }
 
                for (i = 0; i < count; i++) {
@@ -584,10 +607,9 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
                vpfn += count;
                nr -= count;
 
-               kbase_mmu_sync_pgd(kctx->kbdev->dev,
-                                          kbase_dma_addr(p) +
-                                          (index * sizeof(u64)),
-                                          count * sizeof(u64));
+               kbase_mmu_sync_pgd(kctx->kbdev,
+                               kbase_dma_addr(p) + (index * sizeof(u64)),
+                               count * sizeof(u64));
 
                kunmap(p);
                /* We have started modifying the page table.
@@ -596,7 +618,12 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
                recover_required = true;
                recover_count += count;
        }
+       mutex_unlock(&kctx->mmu_lock);
        return 0;
+
+fail_unlock:
+       mutex_unlock(&kctx->mmu_lock);
+       return err;
 }
 
 /*
@@ -613,13 +640,14 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
        bool recover_required = false;
        u64 recover_vpfn = vpfn;
        size_t recover_count = 0;
+       int err;
 
        KBASE_DEBUG_ASSERT(NULL != kctx);
        KBASE_DEBUG_ASSERT(0 != vpfn);
        /* 64-bit address range is the max */
        KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
 
-       lockdep_assert_held(&kctx->reg_lock);
+       mutex_lock(&kctx->mmu_lock);
 
        while (nr) {
                unsigned int i;
@@ -647,7 +675,8 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
                                                                  recover_vpfn,
                                                                  recover_count);
                        }
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto fail_unlock;
                }
 
                p = pfn_to_page(PFN_DOWN(pgd));
@@ -661,7 +690,8 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
                                                                  recover_vpfn,
                                                                  recover_count);
                        }
-                       return -ENOMEM;
+                       err = -ENOMEM;
+                       goto fail_unlock;
                }
 
                for (i = 0; i < count; i++) {
@@ -676,10 +706,9 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
                vpfn += count;
                nr -= count;
 
-               kbase_mmu_sync_pgd(kctx->kbdev->dev,
-                                          kbase_dma_addr(p) +
-                                          (index * sizeof(u64)),
-                                          count * sizeof(u64));
+               kbase_mmu_sync_pgd(kctx->kbdev,
+                               kbase_dma_addr(p) + (index * sizeof(u64)),
+                               count * sizeof(u64));
 
                kunmap(p);
                /* We have started modifying the page table. If further pages
@@ -688,7 +717,13 @@ int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
                recover_required = true;
                recover_count += count;
        }
+
+       mutex_unlock(&kctx->mmu_lock);
        return 0;
+
+fail_unlock:
+       mutex_unlock(&kctx->mmu_lock);
+       return err;
 }
 
 KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
@@ -782,17 +817,18 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
        struct kbase_device *kbdev;
        size_t requested_nr = nr;
        struct kbase_mmu_mode const *mmu_mode;
+       int err;
 
        KBASE_DEBUG_ASSERT(NULL != kctx);
        beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
 
-       lockdep_assert_held(&kctx->reg_lock);
-
        if (0 == nr) {
                /* early out if nothing to do */
                return 0;
        }
 
+       mutex_lock(&kctx->mmu_lock);
+
        kbdev = kctx->kbdev;
        mmu_mode = kbdev->mmu_mode;
 
@@ -808,14 +844,16 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
                pgd = mmu_get_bottom_pgd(kctx, vpfn);
                if (!pgd) {
                        dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n");
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto fail_unlock;
                }
 
                p = pfn_to_page(PFN_DOWN(pgd));
                pgd_page = kmap(p);
                if (!pgd_page) {
                        dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n");
-                       return -ENOMEM;
+                       err = -ENOMEM;
+                       goto fail_unlock;
                }
 
                for (i = 0; i < count; i++)
@@ -824,16 +862,20 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
                vpfn += count;
                nr -= count;
 
-               kbase_mmu_sync_pgd(kctx->kbdev->dev,
-                                          kbase_dma_addr(p) +
-                                          (index * sizeof(u64)),
-                                          count * sizeof(u64));
+               kbase_mmu_sync_pgd(kctx->kbdev,
+                               kbase_dma_addr(p) + (index * sizeof(u64)),
+                               count * sizeof(u64));
 
                kunmap(p);
        }
 
+       mutex_unlock(&kctx->mmu_lock);
        kbase_mmu_flush(kctx, vpfn, requested_nr);
        return 0;
+
+fail_unlock:
+       mutex_unlock(&kctx->mmu_lock);
+       return err;
 }
 
 KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
@@ -856,12 +898,13 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph
        u64 *pgd_page;
        size_t requested_nr = nr;
        struct kbase_mmu_mode const *mmu_mode;
+       int err;
 
        KBASE_DEBUG_ASSERT(NULL != kctx);
        KBASE_DEBUG_ASSERT(0 != vpfn);
        KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
 
-       lockdep_assert_held(&kctx->reg_lock);
+       mutex_lock(&kctx->mmu_lock);
 
        mmu_mode = kctx->kbdev->mmu_mode;
 
@@ -880,14 +923,16 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph
                pgd = mmu_get_bottom_pgd(kctx, vpfn);
                if (!pgd) {
                        dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n");
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto fail_unlock;
                }
 
                p = pfn_to_page(PFN_DOWN(pgd));
                pgd_page = kmap(p);
                if (!pgd_page) {
                        dev_warn(kctx->kbdev->dev, "kmap failure\n");
-                       return -ENOMEM;
+                       err = -ENOMEM;
+                       goto fail_unlock;
                }
 
                for (i = 0; i < count; i++)
@@ -898,17 +943,20 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph
                vpfn += count;
                nr -= count;
 
-               kbase_mmu_sync_pgd(kctx->kbdev->dev,
-                                          kbase_dma_addr(p) +
-                                          (index * sizeof(u64)),
-                                          count * sizeof(u64));
+               kbase_mmu_sync_pgd(kctx->kbdev,
+                               kbase_dma_addr(p) + (index * sizeof(u64)),
+                               count * sizeof(u64));
 
                kunmap(pfn_to_page(PFN_DOWN(pgd)));
        }
 
+       mutex_unlock(&kctx->mmu_lock);
        kbase_mmu_flush(kctx, vpfn, requested_nr);
-
        return 0;
+
+fail_unlock:
+       mutex_unlock(&kctx->mmu_lock);
+       return err;
 }
 
 /* This is a debug feature only */
@@ -936,7 +984,7 @@ static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int
        struct kbase_mmu_mode const *mmu_mode;
 
        KBASE_DEBUG_ASSERT(NULL != kctx);
-       lockdep_assert_held(&kctx->reg_lock);
+       lockdep_assert_held(&kctx->mmu_lock);
 
        pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
        /* kmap_atomic should NEVER fail. */
@@ -952,7 +1000,7 @@ static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int
                target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
 
                if (target_pgd) {
-                       if (level < 2) {
+                       if (level < (MIDGARD_MMU_BOTTOMLEVEL - 1)) {
                                mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64)));
                        } else {
                                /*
@@ -981,6 +1029,8 @@ int kbase_mmu_init(struct kbase_context *kctx)
        KBASE_DEBUG_ASSERT(NULL != kctx);
        KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
 
+       mutex_init(&kctx->mmu_lock);
+
        /* Preallocate MMU depth of four pages for mmu_teardown_level to use */
        kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
 
@@ -1001,18 +1051,24 @@ void kbase_mmu_term(struct kbase_context *kctx)
 
 void kbase_mmu_free_pgd(struct kbase_context *kctx)
 {
+       int new_page_count __maybe_unused;
+
        KBASE_DEBUG_ASSERT(NULL != kctx);
        KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
 
-       lockdep_assert_held(&kctx->reg_lock);
-
+       mutex_lock(&kctx->mmu_lock);
        mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages);
+       mutex_unlock(&kctx->mmu_lock);
 
        beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
        kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true);
        kbase_process_page_usage_dec(kctx, 1);
-       kbase_atomic_sub_pages(1, &kctx->used_pages);
+       new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages);
        kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+       kbase_tlstream_aux_pagesalloc(
+                       (u32)kctx->id,
+                       (u64)new_page_count);
 }
 
 KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
@@ -1027,7 +1083,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
        struct kbase_mmu_mode const *mmu_mode;
 
        KBASE_DEBUG_ASSERT(NULL != kctx);
-       lockdep_assert_held(&kctx->reg_lock);
+       lockdep_assert_held(&kctx->mmu_lock);
 
        mmu_mode = kctx->kbdev->mmu_mode;
 
@@ -1052,16 +1108,21 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
                *size_left -= size;
        }
 
-       for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
-               if (mmu_mode->pte_is_valid(pgd_page[i])) {
-                       target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
-
-                       dump_size = kbasep_mmu_dump_level(kctx, target_pgd, level + 1, buffer, size_left);
-                       if (!dump_size) {
-                               kunmap(pfn_to_page(PFN_DOWN(pgd)));
-                               return 0;
+       if (level < MIDGARD_MMU_BOTTOMLEVEL) {
+               for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+                       if (mmu_mode->pte_is_valid(pgd_page[i])) {
+                               target_pgd = mmu_mode->pte_to_phy_addr(
+                                               pgd_page[i]);
+
+                               dump_size = kbasep_mmu_dump_level(kctx,
+                                               target_pgd, level + 1,
+                                               buffer, size_left);
+                               if (!dump_size) {
+                                       kunmap(pfn_to_page(PFN_DOWN(pgd)));
+                                       return 0;
+                               }
+                               size += dump_size;
                        }
-                       size += dump_size;
                }
        }
 
@@ -1077,13 +1138,13 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
 
        KBASE_DEBUG_ASSERT(kctx);
 
-       lockdep_assert_held(&kctx->reg_lock);
-
        if (0 == nr_pages) {
                /* can't dump in a 0 sized buffer, early out */
                return NULL;
        }
 
+       mutex_lock(&kctx->mmu_lock);
+
        size_left = nr_pages * PAGE_SIZE;
 
        KBASE_DEBUG_ASSERT(0 != size_left);
@@ -1105,7 +1166,7 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
                        kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup);
                        config[0] = as_setup.transtab;
                        config[1] = as_setup.memattr;
-                       config[2] = 0;
+                       config[2] = as_setup.transcfg;
                        memcpy(buffer, &config, sizeof(config));
                        mmu_dump_buffer += sizeof(config);
                        size_left -= sizeof(config);
@@ -1119,10 +1180,8 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
                                &mmu_dump_buffer,
                                &size_left);
 
-               if (!size) {
-                       vfree(kaddr);
-                       return NULL;
-               }
+               if (!size)
+                       goto fail_free;
 
                /* Add on the size for the end marker */
                size += sizeof(u64);
@@ -1133,15 +1192,20 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
 
                if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) {
                        /* The buffer isn't big enough - free the memory and return failure */
-                       vfree(kaddr);
-                       return NULL;
+                       goto fail_free;
                }
 
                /* Add the end marker */
                memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
        }
 
+       mutex_unlock(&kctx->mmu_lock);
        return kaddr;
+
+fail_free:
+       vfree(kaddr);
+       mutex_unlock(&kctx->mmu_lock);
+       return NULL;
 }
 KBASE_EXPORT_TEST_API(kbase_mmu_dump);
 
@@ -1306,6 +1370,15 @@ const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
                e = "TRANSLATION_FAULT";
                break;
        case 0xC8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+       case 0xC9:
+       case 0xCA:
+       case 0xCB:
+       case 0xCC:
+       case 0xCD:
+       case 0xCE:
+       case 0xCF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
                e = "PERMISSION_FAULT";
                break;
        case 0xD0:
@@ -1319,8 +1392,38 @@ const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
                e = "TRANSTAB_BUS_FAULT";
                break;
        case 0xD8:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+       case 0xD9:
+       case 0xDA:
+       case 0xDB:
+       case 0xDC:
+       case 0xDD:
+       case 0xDE:
+       case 0xDF:
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
                e = "ACCESS_FLAG";
                break;
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+       case 0xE0:
+       case 0xE1:
+       case 0xE2:
+       case 0xE3:
+       case 0xE4:
+       case 0xE5:
+       case 0xE6:
+       case 0xE7:
+               e = "ADDRESS_SIZE_FAULT";
+               break;
+       case 0xE8:
+       case 0xE9:
+       case 0xEA:
+       case 0xEB:
+       case 0xEC:
+       case 0xED:
+       case 0xEE:
+       case 0xEF:
+               e = "MEMORY_ATTRIBUTES_FAULT";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
                break;
        default:
                e = "UNKNOWN";
@@ -1334,7 +1437,12 @@ static const char *access_type_name(struct kbase_device *kbdev,
                u32 fault_status)
 {
        switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+       case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+               return "ATOMIC";
+#else
                return "UNKNOWN";
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
        case AS_FAULTSTATUS_ACCESS_TYPE_READ:
                return "READ";
        case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
@@ -1667,8 +1775,15 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_contex
                 */
                kbasep_js_clear_submit_allowed(js_devdata, kctx);
 
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+               dev_warn(kbdev->dev,
+                               "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
+                               as->number, as->fault_addr,
+                               as->fault_extra_addr);
+#else
                dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
                                as->number, as->fault_addr);
+#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */
 
                /*
                 * We need to switch to UNMAPPED mode - but we do this in a
index 079ef81d06d13222544c3b40c88f7a30a75deae4..683cabb797db60cd6aac6f40317f54b966ae9eee 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -77,17 +77,25 @@ static void mmu_get_as_setup(struct kbase_context *kctx,
                (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
                (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
                (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    <<
-               (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
+               (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8))    |
                (AS_MEMATTR_LPAE_WRITE_ALLOC           <<
-               (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
+               (AS_MEMATTR_INDEX_WRITE_ALLOC * 8))           |
+               (AS_MEMATTR_LPAE_OUTER_IMPL_DEF        <<
+               (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8))        |
+               (AS_MEMATTR_LPAE_OUTER_WA              <<
+               (AS_MEMATTR_INDEX_OUTER_WA * 8))              |
                0; /* The other indices are unused for now */
 
-       setup->transtab = (u64)kctx->pgd &
-               ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK);
-
-       setup->transtab |= AS_TRANSTAB_LPAE_ADRMODE_TABLE;
-       setup->transtab |= AS_TRANSTAB_LPAE_READ_INNER;
+       setup->transtab = ((u64)kctx->pgd &
+               ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) |
+               AS_TRANSTAB_LPAE_ADRMODE_TABLE |
+               AS_TRANSTAB_LPAE_READ_INNER;
 
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+       setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#else
+       setup->transcfg = 0;
+#endif
 }
 
 static void mmu_update(struct kbase_context *kctx)
@@ -109,6 +117,9 @@ static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
 
        current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
 
+#ifdef CONFIG_MALI_GPU_MMU_AARCH64
+       current_setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY;
+#endif
 
        /* Apply the address space setting */
        kbase_mmu_hw_configure(kbdev, as, NULL);
index 71f005e325215e58fc9182779a569b3b108a26b7..6ac49df3f2be3de403b30901e37d30975532fb8b 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 #include <mali_kbase_mem_linux.h>
 
 #define JOB_NOT_STARTED 0
-#define JOB_TYPE_MASK      0xfe
-#define JOB_TYPE_NULL      (1 << 1)
-#define JOB_TYPE_VERTEX    (5 << 1)
-#define JOB_TYPE_TILER     (7 << 1)
-#define JOB_TYPE_FUSED     (8 << 1)
-#define JOB_TYPE_FRAGMENT  (9 << 1)
-
-#define JOB_FLAG_DESC_SIZE           (1 << 0)
-#define JOB_FLAG_PERFORM_JOB_BARRIER (1 << 8)
+#define JOB_TYPE_NULL      (1)
+#define JOB_TYPE_VERTEX    (5)
+#define JOB_TYPE_TILER     (7)
+#define JOB_TYPE_FUSED     (8)
+#define JOB_TYPE_FRAGMENT  (9)
 
 #define JOB_HEADER_32_FBD_OFFSET (31*4)
 #define JOB_HEADER_64_FBD_OFFSET (44*4)
 #define JOB_SOURCE_ID(status)          (((status) >> 16) & 0xFFFF)
 #define JOB_POLYGON_LIST               (0x03)
 
-struct job_head {
-       u32 status;
-       u32 not_complete_index;
-       u64 fault_addr;
-       u16 flags;
-       u16 index;
-       u16 dependencies[2];
-       union {
-               u64 _64;
-               u32 _32;
-       } next;
+struct fragment_job {
+       struct job_descriptor_header header;
+
        u32 x[2];
        union {
                u64 _64;
@@ -77,28 +65,43 @@ struct job_head {
 };
 
 static void dump_job_head(struct kbase_context *kctx, char *head_str,
-               struct job_head *job)
+               struct job_descriptor_header *job)
 {
 #ifdef CONFIG_MALI_DEBUG
        dev_dbg(kctx->kbdev->dev, "%s\n", head_str);
-       dev_dbg(kctx->kbdev->dev, "addr               = %p\n"
-                       "status             = %x\n"
-                       "not_complete_index = %x\n"
-                       "fault_addr         = %llx\n"
-                       "flags              = %x\n"
-                       "index              = %x\n"
-                       "dependencies       = %x,%x\n",
-                       job, job->status, job->not_complete_index,
-                       job->fault_addr, job->flags, job->index,
-                       job->dependencies[0],
-                       job->dependencies[1]);
-
-       if (job->flags & JOB_FLAG_DESC_SIZE)
+       dev_dbg(kctx->kbdev->dev,
+                       "addr                  = %p\n"
+                       "exception_status      = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n"
+                       "first_incomplete_task = %x\n"
+                       "fault_pointer         = %llx\n"
+                       "job_descriptor_size   = %x\n"
+                       "job_type              = %x\n"
+                       "job_barrier           = %x\n"
+                       "_reserved_01          = %x\n"
+                       "_reserved_02          = %x\n"
+                       "_reserved_03          = %x\n"
+                       "_reserved_04/05       = %x,%x\n"
+                       "job_index             = %x\n"
+                       "dependencies          = %x,%x\n",
+                       job, job->exception_status,
+                       JOB_SOURCE_ID(job->exception_status),
+                       (job->exception_status >> 8) & 0x3,
+                       job->exception_status  & 0xFF,
+                       job->first_incomplete_task,
+                       job->fault_pointer, job->job_descriptor_size,
+                       job->job_type, job->job_barrier, job->_reserved_01,
+                       job->_reserved_02, job->_reserved_03,
+                       job->_reserved_04, job->_reserved_05,
+                       job->job_index,
+                       job->job_dependency_index_1,
+                       job->job_dependency_index_2);
+
+       if (job->job_descriptor_size)
                dev_dbg(kctx->kbdev->dev, "next               = %llx\n",
-                               job->next._64);
+                               job->next_job._64);
        else
                dev_dbg(kctx->kbdev->dev, "next               = %x\n",
-                               job->next._32);
+                               job->next_job._32);
 #endif
 }
 
@@ -372,77 +375,81 @@ static int kbasep_replay_reset_job(struct kbase_context *kctx,
                u32 default_weight, u16 hw_job_id_offset,
                bool first_in_chain, bool fragment_chain)
 {
-       struct job_head *job;
+       struct fragment_job *frag_job;
+       struct job_descriptor_header *job;
        u64 new_job_header;
        struct kbase_vmap_struct map;
 
-       job = kbase_vmap(kctx, *job_header, sizeof(*job), &map);
-       if (!job) {
+       frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map);
+       if (!frag_job) {
                dev_err(kctx->kbdev->dev,
                                 "kbasep_replay_parse_jc: failed to map jc\n");
                return -EINVAL;
        }
+       job = &frag_job->header;
 
        dump_job_head(kctx, "Job header:", job);
 
-       if (job->status == JOB_NOT_STARTED && !fragment_chain) {
+       if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) {
                dev_err(kctx->kbdev->dev, "Job already not started\n");
                goto out_unmap;
        }
-       job->status = JOB_NOT_STARTED;
+       job->exception_status = JOB_NOT_STARTED;
 
-       if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_VERTEX)
-               job->flags = (job->flags & ~JOB_TYPE_MASK) | JOB_TYPE_NULL;
+       if (job->job_type == JOB_TYPE_VERTEX)
+               job->job_type = JOB_TYPE_NULL;
 
-       if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FUSED) {
+       if (job->job_type == JOB_TYPE_FUSED) {
                dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n");
                goto out_unmap;
        }
 
        if (first_in_chain)
-               job->flags |= JOB_FLAG_PERFORM_JOB_BARRIER;
+               job->job_barrier = 1;
 
-       if ((job->dependencies[0] + hw_job_id_offset) > JOB_HEADER_ID_MAX ||
-           (job->dependencies[1] + hw_job_id_offset) > JOB_HEADER_ID_MAX ||
-           (job->index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
+       if ((job->job_dependency_index_1 + hw_job_id_offset) >
+                       JOB_HEADER_ID_MAX ||
+           (job->job_dependency_index_2 + hw_job_id_offset) >
+                       JOB_HEADER_ID_MAX ||
+           (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
                dev_err(kctx->kbdev->dev,
                             "Job indicies/dependencies out of valid range\n");
                goto out_unmap;
        }
 
-       if (job->dependencies[0])
-               job->dependencies[0] += hw_job_id_offset;
-       if (job->dependencies[1])
-               job->dependencies[1] += hw_job_id_offset;
+       if (job->job_dependency_index_1)
+               job->job_dependency_index_1 += hw_job_id_offset;
+       if (job->job_dependency_index_2)
+               job->job_dependency_index_2 += hw_job_id_offset;
 
-       job->index += hw_job_id_offset;
+       job->job_index += hw_job_id_offset;
 
-       if (job->flags & JOB_FLAG_DESC_SIZE) {
-               new_job_header = job->next._64;
-               if (!job->next._64)
-                       job->next._64 = prev_jc;
+       if (job->job_descriptor_size) {
+               new_job_header = job->next_job._64;
+               if (!job->next_job._64)
+                       job->next_job._64 = prev_jc;
        } else {
-               new_job_header = job->next._32;
-               if (!job->next._32)
-                       job->next._32 = prev_jc;
+               new_job_header = job->next_job._32;
+               if (!job->next_job._32)
+                       job->next_job._32 = prev_jc;
        }
        dump_job_head(kctx, "Updated to:", job);
 
-       if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_TILER) {
-               bool job_64 = (job->flags & JOB_FLAG_DESC_SIZE) != 0;
+       if (job->job_type == JOB_TYPE_TILER) {
+               bool job_64 = job->job_descriptor_size != 0;
 
                if (kbasep_replay_reset_tiler_job(kctx, *job_header,
                                tiler_heap_free, hierarchy_mask,
                                default_weight, job_64) != 0)
                        goto out_unmap;
 
-       } else if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FRAGMENT) {
+       } else if (job->job_type == JOB_TYPE_FRAGMENT) {
                u64 fbd_address;
 
-               if (job->flags & JOB_FLAG_DESC_SIZE)
-                       fbd_address = job->fragment_fbd._64;
+               if (job->job_descriptor_size)
+                       fbd_address = frag_job->fragment_fbd._64;
                else
-                       fbd_address = (u64)job->fragment_fbd._32;
+                       fbd_address = (u64)frag_job->fragment_fbd._32;
 
                if (fbd_address & FBD_TYPE) {
                        if (kbasep_replay_reset_mfbd(kctx,
@@ -485,7 +492,7 @@ static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
                u64 jc, u16 *hw_job_id)
 {
        while (jc) {
-               struct job_head *job;
+               struct job_descriptor_header *job;
                struct kbase_vmap_struct map;
 
                dev_dbg(kctx->kbdev->dev,
@@ -498,13 +505,13 @@ static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
                        return -EINVAL;
                }
 
-               if (job->index > *hw_job_id)
-                       *hw_job_id = job->index;
+               if (job->job_index > *hw_job_id)
+                       *hw_job_id = job->job_index;
 
-               if (job->flags & JOB_FLAG_DESC_SIZE)
-                       jc = job->next._64;
+               if (job->job_descriptor_size)
+                       jc = job->next_job._64;
                else
-                       jc = job->next._32;
+                       jc = job->next_job._32;
 
                kbase_vunmap(kctx, &map);
        }
@@ -957,7 +964,7 @@ static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
        base_jd_replay_payload *payload;
        u64 job_header;
        u64 job_loop_detect;
-       struct job_head *job;
+       struct job_descriptor_header *job;
        struct kbase_vmap_struct job_map;
        struct kbase_vmap_struct map;
        bool err = false;
@@ -1012,41 +1019,22 @@ static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
                }
 
 
-#ifdef CONFIG_MALI_DEBUG
-               dev_dbg(dev, "\njob_head structure:\n"
-                            "Source ID:0x%x Access:0x%x Exception:0x%x\n"
-                            "at job addr               = %p\n"
-                            "not_complete_index        = 0x%x\n"
-                            "fault_addr                = 0x%llx\n"
-                            "flags                     = 0x%x\n"
-                            "index                     = 0x%x\n"
-                            "dependencies              = 0x%x,0x%x\n",
-                            JOB_SOURCE_ID(job->status),
-                            ((job->status >> 8) & 0x3),
-                            (job->status  & 0xFF),
-                            job,
-                            job->not_complete_index,
-                            job->fault_addr,
-                            job->flags,
-                            job->index,
-                            job->dependencies[0],
-                            job->dependencies[1]);
-#endif
+               dump_job_head(kctx, "\njob_head structure:\n", job);
 
                /* Replay only when the polygon list reader caused the
                 * DATA_INVALID_FAULT */
                if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) &&
-                   (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->status))) {
+                  (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) {
                        err = true;
                        kbase_vunmap(kctx, &job_map);
                        break;
                }
 
                /* Move on to next fragment job in the list */
-               if (job->flags & JOB_FLAG_DESC_SIZE)
-                       job_header = job->next._64;
+               if (job->job_descriptor_size)
+                       job_header = job->next_job._64;
                else
-                       job_header = job->next._32;
+                       job_header = job->next_job._32;
 
                kbase_vunmap(kctx, &job_map);
 
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_security.c b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_security.c
deleted file mode 100755 (executable)
index a0bb352..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * A copy of the licence is included with the program, and can also be obtained
- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA  02110-1301, USA.
- *
- */
-
-
-
-
-
-/**
- * @file mali_kbase_security.c
- * Base kernel security capability API
- */
-
-#include <mali_kbase.h>
-
-static inline bool kbasep_am_i_root(void)
-{
-#if KBASE_HWCNT_DUMP_BYPASS_ROOT
-       return true;
-#else
-       /* Check if root */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
-       if (uid_eq(current_euid(), GLOBAL_ROOT_UID))
-               return true;
-#else
-       if (current_euid() == 0)
-               return true;
-#endif /*LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)*/
-       return false;
-#endif /*KBASE_HWCNT_DUMP_BYPASS_ROOT*/
-}
-
-/**
- * kbase_security_has_capability - see mali_kbase_caps.h for description.
- */
-
-bool kbase_security_has_capability(struct kbase_context *kctx, enum kbase_security_capability cap, u32 flags)
-{
-       /* Assume failure */
-       bool access_allowed = false;
-       bool audit = KBASE_SEC_FLAG_AUDIT & flags;
-
-       KBASE_DEBUG_ASSERT(NULL != kctx);
-       CSTD_UNUSED(kctx);
-
-       /* Detect unsupported flags */
-       KBASE_DEBUG_ASSERT(((~KBASE_SEC_FLAG_MASK) & flags) == 0);
-
-       /* Determine if access is allowed for the given cap */
-       switch (cap) {
-       case KBASE_SEC_MODIFY_PRIORITY:
-       case KBASE_SEC_INSTR_HW_COUNTERS_COLLECT:
-               /* Access is granted only if the caller is privileged */
-               access_allowed = kbasep_am_i_root();
-               break;
-       }
-
-       /* Report problem if requested */
-       if (!access_allowed && audit)
-               dev_warn(kctx->kbdev->dev, "Security capability failure: %d, %p", cap, (void *)kctx);
-
-       return access_allowed;
-}
-
-KBASE_EXPORT_TEST_API(kbase_security_has_capability);
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_security.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_kbase_security.h
deleted file mode 100755 (executable)
index 024a7ee..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * A copy of the licence is included with the program, and can also be obtained
- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA  02110-1301, USA.
- *
- */
-
-
-
-
-
-/**
- * @file mali_kbase_security.h
- * Base kernel security capability APIs
- */
-
-#ifndef _KBASE_SECURITY_H_
-#define _KBASE_SECURITY_H_
-
-/* Security flags */
-#define KBASE_SEC_FLAG_NOAUDIT (0u << 0)       /* Silently handle privilege failure */
-#define KBASE_SEC_FLAG_AUDIT   (1u << 0)       /* Write audit message on privilege failure */
-#define KBASE_SEC_FLAG_MASK    (KBASE_SEC_FLAG_AUDIT)  /* Mask of all valid flag bits */
-
-/* List of unique capabilities that have security access privileges */
-enum kbase_security_capability {
-       /* Instrumentation Counters access privilege */
-       KBASE_SEC_INSTR_HW_COUNTERS_COLLECT = 1,
-       KBASE_SEC_MODIFY_PRIORITY
-           /* Add additional access privileges here */
-};
-
-/**
- * kbase_security_has_capability - determine whether a task has a particular effective capability
- * @param[in]   kctx    The task context.
- * @param[in]   cap     The capability to check for.
- * @param[in]   flags   Additional configuration information
- *                      Such as whether to write an audit message or not.
- * @return true if success (capability is allowed), false otherwise.
- */
-
-bool kbase_security_has_capability(struct kbase_context *kctx, enum kbase_security_capability cap, u32 flags);
-
-#endif                         /* _KBASE_SECURITY_H_ */
index 17455fe00d79d736fe81bef634b125d81bedcf4b..43175c85988fd000c1eec01de9828b7e6c17c1c6 100755 (executable)
 static noinline u64 invoke_smc_fid(u64 function_id,
                u64 arg0, u64 arg1, u64 arg2)
 {
+       register u64 x0 asm("x0") = function_id;
+       register u64 x1 asm("x1") = arg0;
+       register u64 x2 asm("x2") = arg1;
+       register u64 x3 asm("x3") = arg2;
+
        asm volatile(
                        __asmeq("%0", "x0")
                        __asmeq("%1", "x1")
                        __asmeq("%2", "x2")
                        __asmeq("%3", "x3")
-                       "smc    #0\n"
-               : "+r" (function_id)
-               : "r" (arg0), "r" (arg1), "r" (arg2));
+                       "smc    #0\n"
+                       : "+r" (x0)
+                       : "r" (x1), "r" (x2), "r" (x3));
 
-       return function_id;
+       return x0;
 }
 
 u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2)
index 637eba893a2db93728447f1085bd18d774759b70..f1dd011a16a3cba7ad7943690d7866a8ba2af0c6 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 #include <linux/syscalls.h>
 #include "mali_kbase_sync.h"
 #endif
+#include <mali_base_kernel.h>
 #include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_mem_linux.h>
 #include <linux/version.h>
+#include <linux/ktime.h>
+#include <linux/pfn.h>
+#include <linux/sched.h>
 
 /* Mask to check cache alignment of data structures */
 #define KBASE_CACHE_ALIGNMENT_MASK             ((1<<L1_CACHE_SHIFT)-1)
  * executed within the driver rather than being handed over to the GPU.
  */
 
+void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom)
+{
+       struct kbase_context *kctx = katom->kctx;
+       unsigned long lflags;
+
+       spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+       list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs);
+       spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+static struct page *kbasep_translate_gpu_addr_to_kernel_page(
+               struct kbase_context *kctx, u64 gpu_addr)
+{
+       u64 pfn;
+       struct kbase_va_region *reg;
+       phys_addr_t addr = 0;
+
+       KBASE_DEBUG_ASSERT(NULL != kctx);
+
+       pfn = gpu_addr >> PAGE_SHIFT;
+
+       kbase_gpu_vm_lock(kctx);
+       reg = kbase_region_tracker_find_region_enclosing_address(
+                       kctx, gpu_addr);
+       if (!reg || (reg->flags & KBASE_REG_FREE))
+               goto err_vm_unlock;
+       addr = reg->cpu_alloc->pages[pfn - reg->start_pfn];
+       kbase_gpu_vm_unlock(kctx);
+
+       if (!addr)
+               goto err;
+
+       return pfn_to_page(PFN_DOWN(addr));
+
+err_vm_unlock:
+       kbase_gpu_vm_unlock(kctx);
+err:
+       return NULL;
+}
+
+int kbasep_read_soft_event_status(
+               struct kbase_context *kctx, u64 evt, unsigned char *status)
+{
+       struct page *pg = kbasep_translate_gpu_addr_to_kernel_page(
+                       kctx, evt);
+       unsigned char *mapped_pg;
+       u32 offset = evt & ~PAGE_MASK;
+
+       KBASE_DEBUG_ASSERT(NULL != status);
+
+       if (!pg)
+               return -1;
+
+       mapped_pg = (unsigned char *)kmap_atomic(pg);
+       KBASE_DEBUG_ASSERT(NULL != mapped_pg); /* kmap_atomic() must not fail */
+       *status = *(mapped_pg + offset);
+       kunmap_atomic(mapped_pg);
+
+       return 0;
+}
+
+int kbasep_write_soft_event_status(
+               struct kbase_context *kctx, u64 evt, unsigned char new_status)
+{
+       struct page *pg = kbasep_translate_gpu_addr_to_kernel_page(
+                       kctx, evt);
+       unsigned char *mapped_pg;
+       u32 offset = evt & ~PAGE_MASK;
+
+       KBASE_DEBUG_ASSERT((new_status == BASE_JD_SOFT_EVENT_SET) ||
+                          (new_status == BASE_JD_SOFT_EVENT_RESET));
+
+       if (!pg)
+               return -1;
+
+       mapped_pg = (unsigned char *)kmap_atomic(pg);
+       KBASE_DEBUG_ASSERT(NULL != mapped_pg); /* kmap_atomic() must not fail */
+       *(mapped_pg + offset) = new_status;
+       kunmap_atomic(mapped_pg);
+
+       return 0;
+}
+
 static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
 {
        struct kbase_va_region *reg;
@@ -68,6 +156,9 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
                list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
                mutex_unlock(&js_devdata->runpool_mutex);
 
+               /* Also adding this to the list of waiting soft job */
+               kbasep_add_waiting_soft_job(katom);
+
                return pm_active_err;
        }
 
@@ -234,18 +325,19 @@ static int kbase_fence_wait(struct kbase_jd_atom *katom)
        if (ret == 1) {
                /* Already signalled */
                return 0;
-       } else if (ret < 0) {
-               goto cancel_atom;
        }
-       return 1;
 
- cancel_atom:
-       katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
-       /* We should cause the dependant jobs in the bag to be failed,
-        * to do this we schedule the work queue to complete this job */
-       KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
-       INIT_WORK(&katom->work, kbase_fence_wait_worker);
-       queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+       if (ret < 0) {
+               katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+               /* We should cause the dependent jobs in the bag to be failed,
+                * to do this we schedule the work queue to complete this job */
+               KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+               INIT_WORK(&katom->work, kbase_fence_wait_worker);
+               queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+       }
+
+       kbasep_add_waiting_soft_job(katom);
+
        return 1;
 }
 
@@ -266,6 +358,600 @@ static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom)
 }
 #endif /* CONFIG_SYNC */
 
+static void kbasep_soft_event_complete_job(struct work_struct *work)
+{
+       struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom,
+                       work);
+       struct kbase_context *kctx = katom->kctx;
+       int resched;
+
+       mutex_lock(&kctx->jctx.lock);
+       resched = jd_done_nolock(katom, NULL);
+       mutex_unlock(&kctx->jctx.lock);
+
+       if (resched)
+               kbase_js_sched_all(kctx->kbdev);
+}
+
+void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt)
+{
+       int cancel_timer = 1;
+       struct list_head *entry, *tmp;
+       unsigned long lflags;
+
+       spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+       list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+               struct kbase_jd_atom *katom = list_entry(
+                               entry, struct kbase_jd_atom, dep_item[0]);
+
+               if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==
+                   BASE_JD_REQ_SOFT_EVENT_WAIT) {
+                       if (katom->jc == evt) {
+                               list_del(&katom->dep_item[0]);
+
+                               katom->event_code = BASE_JD_EVENT_DONE;
+                               INIT_WORK(&katom->work,
+                                         kbasep_soft_event_complete_job);
+                               queue_work(kctx->jctx.job_done_wq,
+                                          &katom->work);
+                       } else {
+                               /* There are still other waiting jobs, we cannot
+                                * cancel the timer yet */
+                               cancel_timer = 0;
+                       }
+               }
+       }
+
+       if (cancel_timer)
+               hrtimer_try_to_cancel(&kctx->soft_event_timeout);
+       spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+}
+
+enum hrtimer_restart kbasep_soft_event_timeout_worker(struct hrtimer *timer)
+{
+       struct kbase_context *kctx = container_of(timer, struct kbase_context,
+                       soft_event_timeout);
+       u32 timeout_ms = (u32)atomic_read(
+                       &kctx->kbdev->js_data.soft_event_timeout_ms);
+       ktime_t cur_time = ktime_get();
+       enum hrtimer_restart restarting = HRTIMER_NORESTART;
+       unsigned long lflags;
+       struct list_head *entry, *tmp;
+
+       spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags);
+       list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+               struct kbase_jd_atom *katom = list_entry(
+                               entry, struct kbase_jd_atom, dep_item[0]);
+
+               if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==
+                   BASE_JD_REQ_SOFT_EVENT_WAIT) {
+                       s64 elapsed_time =
+                               ktime_to_ms(ktime_sub(cur_time,
+                                                     katom->start_timestamp));
+                       if (elapsed_time > (s64)timeout_ms) {
+                               /* Take it out of the list to ensure that it
+                                * will be cancelled in all cases */
+                               list_del(&katom->dep_item[0]);
+
+                               katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+                               INIT_WORK(&katom->work,
+                                         kbasep_soft_event_complete_job);
+                               queue_work(kctx->jctx.job_done_wq,
+                                          &katom->work);
+                       } else {
+                               restarting = HRTIMER_RESTART;
+                       }
+               }
+       }
+
+       if (restarting)
+               hrtimer_add_expires(timer, HR_TIMER_DELAY_MSEC(timeout_ms));
+       spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags);
+
+       return restarting;
+}
+
+static int kbasep_soft_event_wait(struct kbase_jd_atom *katom)
+{
+       struct kbase_context *kctx = katom->kctx;
+       ktime_t remaining;
+       unsigned char status;
+
+       /* The status of this soft-job is stored in jc */
+       if (kbasep_read_soft_event_status(kctx, katom->jc, &status) != 0) {
+               katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+               return 0;
+       }
+
+       if (status == BASE_JD_SOFT_EVENT_SET)
+               return 0; /* Event already set, nothing to do */
+
+       /* Record the start time of this atom so we could cancel it at
+        * the right time */
+       katom->start_timestamp = ktime_get();
+
+       /* Add the atom to the waiting list before the timer is
+        * (re)started to make sure that it gets processed */
+       kbasep_add_waiting_soft_job(katom);
+
+       /* Schedule cancellation of this atom after a period if it is
+        * not active */
+       remaining = hrtimer_get_remaining(&kctx->soft_event_timeout);
+       if (remaining.tv64 <= 0) {
+               int timeout_ms = atomic_read(
+                               &kctx->kbdev->js_data.soft_event_timeout_ms);
+               hrtimer_start(&kctx->soft_event_timeout,
+                             HR_TIMER_DELAY_MSEC((u64)timeout_ms),
+                             HRTIMER_MODE_REL);
+       }
+
+       return 1;
+}
+
+static void kbasep_soft_event_update(struct kbase_jd_atom *katom,
+                                    unsigned char new_status)
+{
+       /* Complete jobs waiting on the same event */
+       struct kbase_context *kctx = katom->kctx;
+
+       if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) {
+               katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+               return;
+       }
+
+       if (new_status == BASE_JD_SOFT_EVENT_SET)
+               kbasep_complete_triggered_soft_events(kctx, katom->jc);
+}
+
+static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
+{
+       katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+       if (jd_done_nolock(katom, NULL))
+               kbase_js_sched_all(katom->kctx->kbdev);
+}
+
+struct kbase_debug_copy_buffer {
+       u64 size;
+       struct page **pages;
+       int nr_pages;
+       u64 offset;
+};
+
+static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
+{
+       struct kbase_debug_copy_buffer *buffers =
+                       (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+       unsigned int i;
+       unsigned int nr = katom->nr_extres;
+
+       if (!buffers)
+               return;
+
+       for (i = 0; i < nr; i++) {
+               int p;
+
+               if (!buffers[i].pages)
+                       break;
+               for (p = 0; p < buffers[i].nr_pages; p++) {
+                       struct page *pg = buffers[i].pages[p];
+
+                       if (pg)
+                               put_page(pg);
+               }
+               kfree(buffers[i].pages);
+       }
+       kfree(buffers);
+
+       katom->jc = 0;
+}
+
+static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom)
+{
+       struct kbase_debug_copy_buffer *buffers;
+       struct base_jd_debug_copy_buffer *user_buffers = NULL;
+       unsigned int i;
+       unsigned int nr = katom->nr_extres;
+       int ret = 0;
+       void __user *user_structs = (void __user *)(uintptr_t)katom->jc;
+
+       if (!user_structs)
+               return -EINVAL;
+
+       buffers = kmalloc_array(nr, sizeof(*buffers), GFP_KERNEL);
+       if (!buffers) {
+               ret = -ENOMEM;
+               katom->jc = 0;
+               goto out_cleanup;
+       }
+       katom->jc = (u64)(uintptr_t)buffers;
+
+       user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL);
+
+       if (!user_buffers) {
+               ret = -ENOMEM;
+               goto out_cleanup;
+       }
+
+       if (copy_from_user(user_buffers, user_structs,
+                       sizeof(*user_buffers)*nr)) {
+               ret = -EINVAL;
+               goto out_cleanup;
+       }
+
+       down_read(&current->mm->mmap_sem);
+       for (i = 0; i < nr; i++) {
+               u64 addr = user_buffers[i].address;
+               u64 page_addr = addr & PAGE_MASK;
+               u64 end_page_addr = addr + user_buffers[i].size - 1;
+               u64 last_page_addr = end_page_addr & PAGE_MASK;
+               int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1;
+               int pinned_pages;
+
+               if (!user_buffers[i].address) {
+                       memset(&buffers[i], 0,
+                                       sizeof(struct kbase_debug_copy_buffer));
+                       continue;
+               }
+
+               buffers[i].nr_pages = nr_pages;
+               buffers[i].offset = addr & ~PAGE_MASK;
+               buffers[i].size = user_buffers[i].size;
+
+               buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *),
+                               GFP_KERNEL);
+               if (!buffers[i].pages) {
+                       ret = -ENOMEM;
+                       goto out_unlock;
+               }
+
+               pinned_pages = get_user_pages(current, current->mm, page_addr,
+                                       nr_pages,
+                                       1, /* Write */
+                                       0, /* No force */
+                                       buffers[i].pages,
+                                       NULL);
+               if (pinned_pages < 0) {
+                       ret = pinned_pages;
+                       goto out_unlock;
+               }
+               if (pinned_pages != nr_pages) {
+                       ret = -EINVAL;
+                       goto out_unlock;
+               }
+       }
+       up_read(&current->mm->mmap_sem);
+
+       kfree(user_buffers);
+
+       return ret;
+
+out_unlock:
+       up_read(&current->mm->mmap_sem);
+
+out_cleanup:
+       kfree(buffers);
+       kfree(user_buffers);
+
+       /* Frees allocated memory for kbase_debug_copy_job struct, including
+        * members, and sets jc to 0 */
+       kbase_debug_copy_finish(katom);
+
+       return ret;
+}
+
+static int kbase_debug_copy(struct kbase_jd_atom *katom)
+{
+       struct kbase_debug_copy_buffer *buffers =
+                       (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc;
+       unsigned int i;
+
+       for (i = 0; i < katom->nr_extres; i++) {
+               u64 offset = buffers[i].offset;
+               u64 buffer_space = buffers[i].size;
+               int p;
+
+               for (p = 0; p < buffers[i].nr_pages; p++) {
+                       struct page *pg = buffers[i].pages[p];
+                       void *kpage = kmap(pg);
+                       u64 page_space = PAGE_SIZE-offset;
+                       u64 space;
+
+                       if (page_space <= buffer_space)
+                               space = page_space;
+                       else
+                               space = buffer_space;
+
+                       /* Temporary - GPUCORE-1843 covers the implementation
+                        * of the actual copying. */
+                       memset(kpage+offset, 0x4B, space);
+
+                       if (!PageReserved(pg))
+                               SetPageDirty(pg);
+
+                       kunmap(pg);
+                       offset = 0;
+                       buffer_space -= space;
+               }
+       }
+
+       return 0;
+}
+
+static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
+{
+       __user void *data = (__user void *)(uintptr_t) katom->jc;
+       struct base_jit_alloc_info *info;
+       struct kbase_context *kctx = katom->kctx;
+       int ret;
+
+       /* Fail the job if there is no info structure */
+       if (!data) {
+               ret = -EINVAL;
+               goto fail;
+       }
+
+       /* Copy the information for safe access and future storage */
+       info = kzalloc(sizeof(*info), GFP_KERNEL);
+       if (!info) {
+               ret = -ENOMEM;
+               goto fail;
+       }
+
+       if (copy_from_user(info, data, sizeof(*info)) != 0) {
+               ret = -EINVAL;
+               goto free_info;
+       }
+
+       /* If the ID is zero or is in use then fail the job */
+       if ((info->id == 0) || (kctx->jit_alloc[info->id])) {
+               ret = -EINVAL;
+               goto free_info;
+       }
+
+       /* Set the jit_alloc to a non-zero value so we know the ID is in use */
+       kctx->jit_alloc[info->id] = (struct kbase_va_region *) -1;
+
+       /* Sanity check that the PA fits within the VA */
+       if (info->va_pages < info->commit_pages) {
+               ret = -EINVAL;
+               goto free_info;
+       }
+
+       /* Ensure the GPU address is correctly aligned */
+       if ((info->gpu_alloc_addr & 0x7) != 0) {
+               ret = -EINVAL;
+               goto free_info;
+       }
+
+       /* Replace the user pointer with our kernel allocated info structure */
+       katom->jc = (u64)(uintptr_t) info;
+
+       /*
+        * Note:
+        * The provided info->gpu_alloc_addr isn't validated here as
+        * userland can cache allocations which means that even
+        * though the region is valid it doesn't represent the
+        * same thing it used to.
+        *
+        * Complete validation of va_pages, commit_pages and extent
+        * isn't done here as it will be done during the call to
+        * kbase_mem_alloc.
+        */
+       return 0;
+
+free_info:
+       kfree(info);
+fail:
+       katom->jc = 0;
+       return ret;
+}
+
+static void kbase_jit_allocate_process(struct kbase_jd_atom *katom)
+{
+       struct kbase_context *kctx = katom->kctx;
+       struct base_jit_alloc_info *info;
+       struct kbase_va_region *reg;
+       struct kbase_vmap_struct mapping;
+       u64 *ptr;
+
+       info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+
+       /* Create a JIT allocation */
+       reg = kbase_jit_allocate(kctx, info);
+       if (!reg) {
+               katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+               return;
+       }
+
+       /*
+        * Write the address of the JIT allocation to the user provided
+        * GPU allocation.
+        */
+       ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr),
+                       &mapping);
+       if (!ptr) {
+               /*
+                * Leave the allocation "live" as the JIT free jit will be
+                * submitted anyway.
+                */
+               katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+               return;
+       }
+
+       *ptr = reg->start_pfn << PAGE_SHIFT;
+       kbase_vunmap(kctx, &mapping);
+
+       katom->event_code = BASE_JD_EVENT_DONE;
+
+       /*
+        * Bind it to the user provided ID. Do this last so we can check for
+        * the JIT free racing this JIT alloc job.
+        */
+       kctx->jit_alloc[info->id] = reg;
+}
+
+static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom)
+{
+       struct base_jit_alloc_info *info;
+
+       info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc;
+       /* Free the info structure */
+       kfree(info);
+}
+
+static void kbase_jit_free_process(struct kbase_jd_atom *katom)
+{
+       struct kbase_context *kctx = katom->kctx;
+       u8 id = (u8) katom->jc;
+
+       /*
+        * If the ID is zero or it is not in use yet then fail the job.
+        */
+       if ((id == 0) || (kctx->jit_alloc[id] == NULL)) {
+               katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+               return;
+       }
+
+       /*
+        * If the ID is valid but the allocation request failed still succeed
+        * this soft job but don't try and free the allocation.
+        */
+       if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1)
+               kbase_jit_free(kctx, kctx->jit_alloc[id]);
+
+       kctx->jit_alloc[id] = NULL;
+}
+
+static int kbase_ext_res_prepare(struct kbase_jd_atom *katom)
+{
+       __user struct base_external_resource_list *user_ext_res;
+       struct base_external_resource_list *ext_res;
+       u64 count = 0;
+       size_t copy_size;
+       int ret;
+
+       user_ext_res = (__user struct base_external_resource_list *)
+                       (uintptr_t) katom->jc;
+
+       /* Fail the job if there is no info structure */
+       if (!user_ext_res) {
+               ret = -EINVAL;
+               goto fail;
+       }
+
+       if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) {
+               ret = -EINVAL;
+               goto fail;
+       }
+
+       /* Is the number of external resources in range? */
+       if (!count || count > BASE_EXT_RES_COUNT_MAX) {
+               ret = -EINVAL;
+               goto fail;
+       }
+
+       /* Copy the information for safe access and future storage */
+       copy_size = sizeof(*ext_res);
+       copy_size += sizeof(struct base_external_resource) * (count - 1);
+       ext_res = kzalloc(copy_size, GFP_KERNEL);
+       if (!ext_res) {
+               ret = -ENOMEM;
+               goto fail;
+       }
+
+       if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) {
+               ret = -EINVAL;
+               goto free_info;
+       }
+
+       /*
+        * Overwrite the count with the first value incase it was changed
+        * after the fact.
+        */
+       ext_res->count = count;
+
+       /*
+        * Replace the user pointer with our kernel allocated
+        * ext_res structure.
+        */
+       katom->jc = (u64)(uintptr_t) ext_res;
+
+       return 0;
+
+free_info:
+       kfree(ext_res);
+fail:
+       return ret;
+}
+
+static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map)
+{
+       struct base_external_resource_list *ext_res;
+       int i;
+       bool failed = false;
+
+       ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+       if (!ext_res)
+               goto failed_jc;
+
+       kbase_gpu_vm_lock(katom->kctx);
+
+       for (i = 0; i < ext_res->count; i++) {
+               u64 gpu_addr;
+
+               gpu_addr = ext_res->ext_res[i].ext_resource &
+                               ~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+               if (map) {
+                       if (!kbase_sticky_resource_acquire(katom->kctx,
+                                       gpu_addr))
+                               goto failed_loop;
+               } else
+                       if (!kbase_sticky_resource_release(katom->kctx, NULL,
+                                       gpu_addr, false))
+                               failed = true;
+       }
+
+       /*
+        * In the case of unmap we continue unmapping other resources in the
+        * case of failure but will always report failure if _any_ unmap
+        * request fails.
+        */
+       if (failed)
+               katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+       else
+               katom->event_code = BASE_JD_EVENT_DONE;
+
+       kbase_gpu_vm_unlock(katom->kctx);
+
+       return;
+
+failed_loop:
+       while (--i > 0) {
+               u64 gpu_addr;
+
+               gpu_addr = ext_res->ext_res[i].ext_resource &
+                               ~BASE_EXT_RES_ACCESS_EXCLUSIVE;
+
+               kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr,
+                               false);
+       }
+
+       katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+       kbase_gpu_vm_unlock(katom->kctx);
+
+failed_jc:
+       return;
+}
+
+static void kbase_ext_res_finish(struct kbase_jd_atom *katom)
+{
+       struct base_external_resource_list *ext_res;
+
+       ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc;
+       /* Free the info structure */
+       kfree(ext_res);
+}
+
 int kbase_process_soft_job(struct kbase_jd_atom *katom)
 {
        switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
@@ -284,6 +970,28 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom)
 #endif                         /* CONFIG_SYNC */
        case BASE_JD_REQ_SOFT_REPLAY:
                return kbase_replay_process(katom);
+       case BASE_JD_REQ_SOFT_EVENT_WAIT:
+               return kbasep_soft_event_wait(katom);
+       case BASE_JD_REQ_SOFT_EVENT_SET:
+               kbasep_soft_event_update(katom, BASE_JD_SOFT_EVENT_SET);
+               break;
+       case BASE_JD_REQ_SOFT_EVENT_RESET:
+               kbasep_soft_event_update(katom, BASE_JD_SOFT_EVENT_RESET);
+               break;
+       case BASE_JD_REQ_SOFT_DEBUG_COPY:
+               return kbase_debug_copy(katom);
+       case BASE_JD_REQ_SOFT_JIT_ALLOC:
+               kbase_jit_allocate_process(katom);
+               break;
+       case BASE_JD_REQ_SOFT_JIT_FREE:
+               kbase_jit_free_process(katom);
+               break;
+       case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+               kbase_ext_res_process(katom, true);
+               break;
+       case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+               kbase_ext_res_process(katom, false);
+               break;
        }
 
        /* Atom is complete */
@@ -298,6 +1006,9 @@ void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
                kbase_fence_cancel_wait(katom);
                break;
 #endif
+       case BASE_JD_REQ_SOFT_EVENT_WAIT:
+               kbasep_soft_event_cancel_job(katom);
+               break;
        default:
                /* This soft-job doesn't support cancellation! */
                KBASE_DEBUG_ASSERT(0);
@@ -355,8 +1066,23 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
                }
                break;
 #endif                         /* CONFIG_SYNC */
+       case BASE_JD_REQ_SOFT_JIT_ALLOC:
+               return kbase_jit_allocate_prepare(katom);
        case BASE_JD_REQ_SOFT_REPLAY:
+       case BASE_JD_REQ_SOFT_JIT_FREE:
                break;
+       case BASE_JD_REQ_SOFT_EVENT_WAIT:
+       case BASE_JD_REQ_SOFT_EVENT_SET:
+       case BASE_JD_REQ_SOFT_EVENT_RESET:
+               if (katom->jc == 0)
+                       return -EINVAL;
+               break;
+       case BASE_JD_REQ_SOFT_DEBUG_COPY:
+               return kbase_debug_copy_prepare(katom);
+       case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+               return kbase_ext_res_prepare(katom);
+       case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+               return kbase_ext_res_prepare(katom);
        default:
                /* Unsupported soft-job */
                return -EINVAL;
@@ -386,6 +1112,19 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom)
                katom->fence = NULL;
                break;
 #endif                         /* CONFIG_SYNC */
+
+       case BASE_JD_REQ_SOFT_DEBUG_COPY:
+               kbase_debug_copy_finish(katom);
+               break;
+       case BASE_JD_REQ_SOFT_JIT_ALLOC:
+               kbase_jit_allocate_finish(katom);
+               break;
+       case BASE_JD_REQ_SOFT_EXT_RES_MAP:
+               kbase_ext_res_finish(katom);
+               break;
+       case BASE_JD_REQ_SOFT_EXT_RES_UNMAP:
+               kbase_ext_res_finish(katom);
+               break;
        }
 }
 
@@ -426,12 +1165,9 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
                        kbase_finish_soft_job(katom_iter);
                        resched |= jd_done_nolock(katom_iter, NULL);
                } else {
-                       /* The job has not completed */
                        KBASE_DEBUG_ASSERT((katom_iter->core_req &
                                        BASEP_JD_REQ_ATOM_TYPE)
                                        != BASE_JD_REQ_SOFT_REPLAY);
-                       list_add_tail(&katom_iter->dep_item[0],
-                                       &kctx->waiting_soft_jobs);
                }
 
                mutex_unlock(&kctx->jctx.lock);
index 99428d1e660eb5fe78b287fcf0ad75262ed2c7e1..e41efb81b2175bff47c94c09fb8d999a21c2a62f 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 #include <mali_kbase.h>
 #include <mali_kbase_jm.h>
 #include <mali_kbase_tlstream.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
 
 /*****************************************************************************/
 
-/* The version of timeline stream. */
-#define KBASEP_TLSTREAM_VERSION 1
+/* The version of swtrace protocol used in timeline stream. */
+#define SWTRACE_VERSION    3
 
 /* The maximum expected length of string in tracepoint descriptor. */
 #define STRLEN_MAX         64 /* bytes */
 /* The number of nanoseconds in a second. */
 #define NSECS_IN_SEC       1000000000ull /* ns */
 
-/* The number of nanoseconds to wait before autoflushing the stream. */
-#define AUTOFLUSH_TIMEOUT  (2ull * NSECS_IN_SEC) /* ns */
-
 /* The period of autoflush checker execution in milliseconds. */
 #define AUTOFLUSH_INTERVAL 1000 /* ms */
 
 /* The maximum size of a single packet used by timeline. */
-#define PACKET_SIZE        2048 /* bytes */
+#define PACKET_SIZE        4096 /* bytes */
 
 /* The number of packets used by one timeline stream. */
 #define PACKET_COUNT       16
@@ -120,7 +118,7 @@ enum tl_packet_type {
 };
 
 /* Message ids of trace events that are recorded in the timeline stream. */
-enum tl_msg_id {
+enum tl_msg_id_obj {
        /* Timeline object events. */
        KBASE_TL_NEW_CTX,
        KBASE_TL_NEW_GPU,
@@ -131,25 +129,30 @@ enum tl_msg_id {
        KBASE_TL_DEL_ATOM,
        KBASE_TL_LIFELINK_LPU_GPU,
        KBASE_TL_LIFELINK_AS_GPU,
-       KBASE_TL_RET_GPU_CTX,
+       KBASE_TL_RET_CTX_LPU,
        KBASE_TL_RET_ATOM_CTX,
        KBASE_TL_RET_ATOM_LPU,
-       KBASE_TL_NRET_GPU_CTX,
+       KBASE_TL_NRET_CTX_LPU,
        KBASE_TL_NRET_ATOM_CTX,
        KBASE_TL_NRET_ATOM_LPU,
        KBASE_TL_RET_AS_CTX,
        KBASE_TL_NRET_AS_CTX,
        KBASE_TL_RET_ATOM_AS,
        KBASE_TL_NRET_ATOM_AS,
+       KBASE_TL_DEP_ATOM_ATOM,
        KBASE_TL_ATTRIB_ATOM_CONFIG,
        KBASE_TL_ATTRIB_AS_CONFIG,
 
-       /* Job dump specific events (part of timeline stream). */
-       KBASE_JD_GPU_SOFT_RESET,
+       /* Job dump specific events. */
+       KBASE_JD_GPU_SOFT_RESET
+};
 
-       /* Timeline non-object events. */
+/* Message ids of trace events that are recorded in the auxiliary stream. */
+enum tl_msg_id_aux {
        KBASE_AUX_PM_STATE,
+       KBASE_AUX_ISSUE_JOB_SOFTSTOP,
        KBASE_AUX_JOB_SOFTSTOP,
+       KBASE_AUX_JOB_SOFTSTOP_EX,
        KBASE_AUX_PAGEFAULT,
        KBASE_AUX_PAGESALLOC
 };
@@ -163,7 +166,7 @@ enum tl_msg_id {
  * @wbi: write buffer index
  * @rbi: read buffer index
  * @numbered: if non-zero stream's packets are sequentially numbered
- * @last_write_time: timestamp indicating last write
+ * @autoflush_counter: counter tracking stream's autoflush state
  *
  * This structure holds information needed to construct proper packets in the
  * timeline stream. Each message in sequence must bear timestamp that is greater
@@ -174,6 +177,11 @@ enum tl_msg_id {
  * Each packet in timeline body stream has sequence number embedded (this value
  * must increment monotonically and is used by packets receiver to discover
  * buffer overflows.
+ * Autoflush counter is set to negative number when there is no data pending
+ * for flush and it is set to zero on every update of the buffer. Autoflush
+ * timer will increment the counter by one on every expiry. In case there will
+ * be no activity on the buffer during two consecutive timer expiries, stream
+ * buffer will be flushed.
  */
 struct tl_stream {
        spinlock_t lock;
@@ -187,7 +195,7 @@ struct tl_stream {
        atomic_t rbi;
 
        int      numbered;
-       u64      last_write_time;
+       atomic_t autoflush_counter;
 };
 
 /**
@@ -238,9 +246,6 @@ static atomic_t autoflush_timer_active;
  * streams at any given time. */
 static DEFINE_MUTEX(tl_reader_lock);
 
-/* Indicator of whether the timeline stream file descriptor is already used. */
-static atomic_t tlstream_busy = {0};
-
 /* Timeline stream event queue. */
 static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue);
 
@@ -266,8 +271,8 @@ static const struct tp_desc tp_desc_obj[] = {
                KBASE_TL_NEW_CTX,
                __stringify(KBASE_TL_NEW_CTX),
                "object ctx is created",
-               "@pI",
-               "ctx,ctx_nr"
+               "@pII",
+               "ctx,ctx_nr,tgid"
        },
        {
                KBASE_TL_NEW_GPU,
@@ -326,11 +331,11 @@ static const struct tp_desc tp_desc_obj[] = {
                "address_space,gpu"
        },
        {
-               KBASE_TL_RET_GPU_CTX,
-               __stringify(KBASE_TL_RET_GPU_CTX),
-               "gpu is retained by context",
+               KBASE_TL_RET_CTX_LPU,
+               __stringify(KBASE_TL_RET_CTX_LPU),
+               "context is retained by lpu",
                "@pp",
-               "gpu,ctx"
+               "ctx,lpu"
        },
        {
                KBASE_TL_RET_ATOM_CTX,
@@ -343,22 +348,22 @@ static const struct tp_desc tp_desc_obj[] = {
                KBASE_TL_RET_ATOM_LPU,
                __stringify(KBASE_TL_RET_ATOM_LPU),
                "atom is retained by lpu",
-               "@pp",
-               "atom,lpu"
+               "@pps",
+               "atom,lpu,attrib_match_list"
        },
        {
-               KBASE_TL_NRET_GPU_CTX,
-               __stringify(KBASE_TL_NRET_GPU_CTX),
-               "gpu is released by context",
+               KBASE_TL_NRET_CTX_LPU,
+               __stringify(KBASE_TL_NRET_CTX_LPU),
+               "context is released by lpu",
                "@pp",
-               "gpu,ctx"
+               "ctx,lpu"
        },
        {
                KBASE_TL_NRET_ATOM_CTX,
                __stringify(KBASE_TL_NRET_ATOM_CTX),
                "atom is released by context",
                "@pp",
-               "atom,context"
+               "atom,ctx"
        },
        {
                KBASE_TL_NRET_ATOM_LPU,
@@ -395,6 +400,13 @@ static const struct tp_desc tp_desc_obj[] = {
                "@pp",
                "atom,address_space"
        },
+       {
+               KBASE_TL_DEP_ATOM_ATOM,
+               __stringify(KBASE_TL_DEP_ATOM_ATOM),
+               "atom2 depends on atom1",
+               "@pp",
+               "atom1,atom2"
+       },
        {
                KBASE_TL_ATTRIB_ATOM_CONFIG,
                __stringify(KBASE_TL_ATTRIB_ATOM_CONFIG),
@@ -427,6 +439,13 @@ static const struct tp_desc tp_desc_aux[] = {
                "@IL",
                "core_type,core_state_bitset"
        },
+       {
+               KBASE_AUX_ISSUE_JOB_SOFTSTOP,
+               __stringify(KBASE_AUX_ISSUE_JOB_SOFTSTOP),
+               "Issuing job soft stop",
+               "@p",
+               "atom"
+       },
        {
                KBASE_AUX_JOB_SOFTSTOP,
                __stringify(KBASE_AUX_JOB_SOFTSTOP),
@@ -434,19 +453,26 @@ static const struct tp_desc tp_desc_aux[] = {
                "@I",
                "tag_id"
        },
+       {
+               KBASE_AUX_JOB_SOFTSTOP_EX,
+               __stringify(KBASE_AUX_JOB_SOFTSTOP_EX),
+               "Job soft stop, more details",
+               "@pI",
+               "atom,job_type"
+       },
        {
                KBASE_AUX_PAGEFAULT,
                __stringify(KBASE_AUX_PAGEFAULT),
                "Page fault",
-               "@II",
-               "as_id,page_cnt"
+               "@IL",
+               "ctx_nr,page_cnt_change"
        },
        {
                KBASE_AUX_PAGESALLOC,
                __stringify(KBASE_AUX_PAGESALLOC),
                "Total alloc pages change",
-               "@l",
-               "page_cnt_change"
+               "@IL",
+               "ctx_nr,page_cnt"
        }
 };
 
@@ -460,6 +486,11 @@ static atomic_t tlstream_bytes_generated = {0};
 
 /*****************************************************************************/
 
+/* Indicator of whether the timeline stream file descriptor is used. */
+atomic_t kbase_tlstream_enabled = {0};
+
+/*****************************************************************************/
+
 /**
  * kbasep_tlstream_get_timestamp - return timestamp
  *
@@ -763,6 +794,9 @@ static size_t kbasep_tlstream_msgbuf_submit(
        unsigned int rb_idx_raw = atomic_read(&stream->rbi);
        unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
 
+       /* Set stream as flushed. */
+       atomic_set(&stream->autoflush_counter, -1);
+
        kbasep_tlstream_packet_header_update(
                        stream->buffer[wb_idx].data,
                        wb_size - PACKET_HEADER_SIZE);
@@ -811,14 +845,14 @@ static size_t kbasep_tlstream_msgbuf_submit(
  *
  * Return: pointer to the buffer where message can be stored
  *
- * Warning: Stream must be relased with kbasep_tlstream_msgbuf_release().
+ * Warning: Stream must be released with kbasep_tlstream_msgbuf_release().
  *          Only atomic operations are allowed while stream is locked
  *          (i.e. do not use any operation that may sleep).
  */
 static char *kbasep_tlstream_msgbuf_acquire(
                enum tl_stream_type stream_type,
                size_t              msg_size,
-               unsigned long       *flags)
+               unsigned long       *flags) __acquires(&stream->lock)
 {
        struct tl_stream *stream;
        unsigned int     wb_idx_raw;
@@ -865,14 +899,16 @@ static char *kbasep_tlstream_msgbuf_acquire(
  */
 static void kbasep_tlstream_msgbuf_release(
                enum tl_stream_type stream_type,
-               unsigned long       flags)
+               unsigned long       flags) __releases(&stream->lock)
 {
        struct tl_stream *stream;
 
        KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
 
        stream = tl_stream[stream_type];
-       stream->last_write_time = kbasep_tlstream_get_timestamp();
+
+       /* Mark stream as containing unflushed data. */
+       atomic_set(&stream->autoflush_counter, 0);
 
        spin_unlock_irqrestore(&stream->lock, flags);
 }
@@ -921,7 +957,6 @@ static void kbasep_tlstream_flush_stream(enum tl_stream_type stype)
  */
 static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
 {
-       u64                 timestamp = kbasep_tlstream_get_timestamp();
        enum tl_stream_type stype;
        int                 rcode;
 
@@ -935,6 +970,22 @@ static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
                size_t           wb_size;
                size_t           min_size = PACKET_HEADER_SIZE;
 
+               int af_cnt = atomic_read(&stream->autoflush_counter);
+
+               /* Check if stream contain unflushed data. */
+               if (0 > af_cnt)
+                       continue;
+
+               /* Check if stream should be flushed now. */
+               if (af_cnt != atomic_cmpxchg(
+                                       &stream->autoflush_counter,
+                                       af_cnt,
+                                       af_cnt + 1))
+                       continue;
+               if (!af_cnt)
+                       continue;
+
+               /* Autoflush this stream. */
                if (stream->numbered)
                        min_size += PACKET_NUMBER_SIZE;
 
@@ -944,16 +995,12 @@ static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
                wb_idx     = wb_idx_raw % PACKET_COUNT;
                wb_size    = atomic_read(&stream->buffer[wb_idx].size);
 
-               if (
-                               (wb_size > min_size) &&
-                               (
-                                timestamp - stream->last_write_time >
-                                AUTOFLUSH_TIMEOUT)) {
-
+               if (wb_size > min_size) {
                        wb_size = kbasep_tlstream_msgbuf_submit(
                                        stream, wb_idx_raw, wb_size);
                        wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
-                       atomic_set(&stream->buffer[wb_idx].size, wb_size);
+                       atomic_set(&stream->buffer[wb_idx].size,
+                                       wb_size);
                }
                spin_unlock_irqrestore(&stream->lock, flags);
        }
@@ -1071,8 +1118,10 @@ static ssize_t kbasep_tlstream_read(
                        break;
                }
 
-               /* Verify if there was no overflow in selected stream. Make sure
-                * that if incorrect size was used we will know about it. */
+               /* If the rbi still points to the packet we just processed
+                * then there was no overflow so we add the copied size to
+                * copy_len and move rbi on to the next packet
+                */
                smp_rmb();
                if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) {
                        copy_len += rb_size;
@@ -1122,7 +1171,12 @@ static int kbasep_tlstream_release(struct inode *inode, struct file *filp)
        KBASE_DEBUG_ASSERT(filp);
        CSTD_UNUSED(inode);
        CSTD_UNUSED(filp);
-       atomic_set(&tlstream_busy, 0);
+
+       /* Stop autoflush timer before releasing access to streams. */
+       atomic_set(&autoflush_timer_active, 0);
+       del_timer_sync(&autoflush_timer);
+
+       atomic_set(&kbase_tlstream_enabled, 0);
        return 0;
 }
 
@@ -1140,7 +1194,7 @@ static void kbasep_tlstream_timeline_header(
                const struct tp_desc *tp_desc,
                u32                  tp_count)
 {
-       const u8      tv = KBASEP_TLSTREAM_VERSION; /* tlstream version */
+       const u8      tv = SWTRACE_VERSION; /* protocol version */
        const u8      ps = sizeof(void *); /* pointer size */
        size_t        msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count);
        char          *buffer;
@@ -1211,7 +1265,6 @@ static void kbasep_tlstream_timeline_header(
 int kbase_tlstream_init(void)
 {
        enum tl_stream_type i;
-       int                 rcode;
 
        /* Prepare stream structures. */
        for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
@@ -1229,14 +1282,10 @@ int kbase_tlstream_init(void)
        }
 
        /* Initialize autoflush timer. */
-       atomic_set(&autoflush_timer_active, 1);
+       atomic_set(&autoflush_timer_active, 0);
        setup_timer(&autoflush_timer,
                        kbasep_tlstream_autoflush_timer_callback,
                        0);
-       rcode = mod_timer(
-                       &autoflush_timer,
-                       jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
-       CSTD_UNUSED(rcode);
 
        return 0;
 }
@@ -1245,9 +1294,6 @@ void kbase_tlstream_term(void)
 {
        enum tl_stream_type i;
 
-       atomic_set(&autoflush_timer_active, 0);
-       del_timer_sync(&autoflush_timer);
-
        for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
                kbasep_timeline_stream_term(tl_stream[i]);
                kfree(tl_stream[i]);
@@ -1256,14 +1302,16 @@ void kbase_tlstream_term(void)
 
 int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd)
 {
-       if (0 == atomic_cmpxchg(&tlstream_busy, 0, 1)) {
+       if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, 1)) {
+               int rcode;
+
                *fd = anon_inode_getfd(
                                "[mali_tlstream]",
                                &kbasep_tlstream_fops,
                                kctx,
                                O_RDONLY | O_CLOEXEC);
                if (0 > *fd) {
-                       atomic_set(&tlstream_busy, 0);
+                       atomic_set(&kbase_tlstream_enabled, 0);
                        return *fd;
                }
 
@@ -1282,6 +1330,14 @@ int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd)
                                TL_STREAM_TYPE_AUX_HEADER,
                                tp_desc_aux,
                                ARRAY_SIZE(tp_desc_aux));
+
+               /* Start autoflush timer. */
+               atomic_set(&autoflush_timer_active, 1);
+               rcode = mod_timer(
+                               &autoflush_timer,
+                               jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+               CSTD_UNUSED(rcode);
+
        } else {
                *fd = -EBUSY;
        }
@@ -1317,11 +1373,12 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated)
 
 /*****************************************************************************/
 
-void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr)
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid)
 {
        const u32     msg_id = KBASE_TL_NEW_CTX;
        const size_t  msg_size =
-               sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr);
+               sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+               sizeof(tgid);
        unsigned long flags;
        char          *buffer;
        size_t        pos = 0;
@@ -1337,12 +1394,15 @@ void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr)
                        buffer, pos, &context, sizeof(context));
        pos = kbasep_tlstream_write_bytes(
                        buffer, pos, &nr, sizeof(nr));
+       pos = kbasep_tlstream_write_bytes(
+                       buffer, pos, &tgid, sizeof(tgid));
+
        KBASE_DEBUG_ASSERT(msg_size == pos);
 
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
 }
 
-void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
 {
        const u32     msg_id = KBASE_TL_NEW_GPU;
        const size_t  msg_size =
@@ -1370,7 +1430,7 @@ void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
 }
 
-void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
 {
        const u32     msg_id = KBASE_TL_NEW_LPU;
        const size_t  msg_size =
@@ -1398,7 +1458,7 @@ void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
 }
 
-void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
 {
        const u32     msg_id = KBASE_TL_LIFELINK_LPU_GPU;
        const size_t  msg_size =
@@ -1423,7 +1483,7 @@ void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
 }
 
-void kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
 {
        const u32     msg_id = KBASE_TL_NEW_AS;
        const size_t  msg_size =
@@ -1448,7 +1508,7 @@ void kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
 }
 
-void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
 {
        const u32     msg_id = KBASE_TL_LIFELINK_AS_GPU;
        const size_t  msg_size =
@@ -1475,11 +1535,12 @@ void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
 
 /*****************************************************************************/
 
-void kbase_tlstream_tl_new_ctx(void *context, u32 nr)
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid)
 {
        const u32     msg_id = KBASE_TL_NEW_CTX;
        const size_t  msg_size =
-               sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr);
+               sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
+               sizeof(tgid);
        unsigned long flags;
        char          *buffer;
        size_t        pos = 0;
@@ -1495,12 +1556,14 @@ void kbase_tlstream_tl_new_ctx(void *context, u32 nr)
                        buffer, pos, &context, sizeof(context));
        pos = kbasep_tlstream_write_bytes(
                        buffer, pos, &nr, sizeof(nr));
+       pos = kbasep_tlstream_write_bytes(
+                       buffer, pos, &tgid, sizeof(tgid));
        KBASE_DEBUG_ASSERT(msg_size == pos);
 
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_new_atom(void *atom, u32 nr)
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr)
 {
        const u32     msg_id = KBASE_TL_NEW_ATOM;
        const size_t  msg_size =
@@ -1525,7 +1588,7 @@ void kbase_tlstream_tl_new_atom(void *atom, u32 nr)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_del_ctx(void *context)
+void __kbase_tlstream_tl_del_ctx(void *context)
 {
        const u32     msg_id = KBASE_TL_DEL_CTX;
        const size_t  msg_size =
@@ -1548,7 +1611,7 @@ void kbase_tlstream_tl_del_ctx(void *context)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_del_atom(void *atom)
+void __kbase_tlstream_tl_del_atom(void *atom)
 {
        const u32     msg_id = KBASE_TL_DEL_ATOM;
        const size_t  msg_size =
@@ -1571,11 +1634,11 @@ void kbase_tlstream_tl_del_atom(void *atom)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context)
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu)
 {
-       const u32     msg_id = KBASE_TL_RET_GPU_CTX;
+       const u32     msg_id = KBASE_TL_RET_CTX_LPU;
        const size_t  msg_size =
-               sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(context);
+               sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
        unsigned long flags;
        char          *buffer;
        size_t        pos = 0;
@@ -1587,16 +1650,16 @@ void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context)
 
        pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
        pos = kbasep_tlstream_write_timestamp(buffer, pos);
-       pos = kbasep_tlstream_write_bytes(
-                       buffer, pos, &gpu, sizeof(gpu));
        pos = kbasep_tlstream_write_bytes(
                        buffer, pos, &context, sizeof(context));
+       pos = kbasep_tlstream_write_bytes(
+                       buffer, pos, &lpu, sizeof(lpu));
        KBASE_DEBUG_ASSERT(msg_size == pos);
 
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
 {
        const u32     msg_id = KBASE_TL_RET_ATOM_CTX;
        const size_t  msg_size =
@@ -1621,11 +1684,15 @@ void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_ret_atom_lpu(void *atom, void *lpu)
+void __kbase_tlstream_tl_ret_atom_lpu(
+               void *atom, void *lpu, const char *attrib_match_list)
 {
        const u32     msg_id = KBASE_TL_RET_ATOM_LPU;
+       const size_t  msg_s0 = sizeof(u32) + sizeof(char) +
+                       strnlen(attrib_match_list, STRLEN_MAX);
        const size_t  msg_size =
-               sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
+                       sizeof(msg_id) + sizeof(u64) +
+                       sizeof(atom) + sizeof(lpu) + msg_s0;
        unsigned long flags;
        char          *buffer;
        size_t        pos = 0;
@@ -1641,16 +1708,18 @@ void kbase_tlstream_tl_ret_atom_lpu(void *atom, void *lpu)
                        buffer, pos, &atom, sizeof(atom));
        pos = kbasep_tlstream_write_bytes(
                        buffer, pos, &lpu, sizeof(lpu));
+       pos = kbasep_tlstream_write_string(
+                       buffer, pos, attrib_match_list, msg_s0);
        KBASE_DEBUG_ASSERT(msg_size == pos);
 
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context)
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu)
 {
-       const u32     msg_id = KBASE_TL_NRET_GPU_CTX;
+       const u32     msg_id = KBASE_TL_NRET_CTX_LPU;
        const size_t  msg_size =
-               sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(context);
+               sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
        unsigned long flags;
        char          *buffer;
        size_t        pos = 0;
@@ -1662,16 +1731,16 @@ void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context)
 
        pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
        pos = kbasep_tlstream_write_timestamp(buffer, pos);
-       pos = kbasep_tlstream_write_bytes(
-                       buffer, pos, &gpu, sizeof(gpu));
        pos = kbasep_tlstream_write_bytes(
                        buffer, pos, &context, sizeof(context));
+       pos = kbasep_tlstream_write_bytes(
+                       buffer, pos, &lpu, sizeof(lpu));
        KBASE_DEBUG_ASSERT(msg_size == pos);
 
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
 {
        const u32     msg_id = KBASE_TL_NRET_ATOM_CTX;
        const size_t  msg_size =
@@ -1696,7 +1765,32 @@ void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2)
+{
+       const u32     msg_id = KBASE_TL_DEP_ATOM_ATOM;
+       const size_t  msg_size =
+               sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
+       unsigned long flags;
+       char          *buffer;
+       size_t        pos = 0;
+
+       buffer = kbasep_tlstream_msgbuf_acquire(
+                       TL_STREAM_TYPE_OBJ,
+                       msg_size, &flags);
+       KBASE_DEBUG_ASSERT(buffer);
+
+       pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+       pos = kbasep_tlstream_write_timestamp(buffer, pos);
+       pos = kbasep_tlstream_write_bytes(
+                       buffer, pos, &atom1, sizeof(atom1));
+       pos = kbasep_tlstream_write_bytes(
+                       buffer, pos, &atom2, sizeof(atom2));
+       KBASE_DEBUG_ASSERT(msg_size == pos);
+
+       kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
 {
        const u32     msg_id = KBASE_TL_NRET_ATOM_LPU;
        const size_t  msg_size =
@@ -1721,7 +1815,7 @@ void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
 {
        const u32     msg_id = KBASE_TL_RET_AS_CTX;
        const size_t  msg_size =
@@ -1746,7 +1840,7 @@ void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
 {
        const u32     msg_id = KBASE_TL_NRET_AS_CTX;
        const size_t  msg_size =
@@ -1771,7 +1865,7 @@ void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
 {
        const u32     msg_id = KBASE_TL_RET_ATOM_AS;
        const size_t  msg_size =
@@ -1796,7 +1890,7 @@ void kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
 {
        const u32     msg_id = KBASE_TL_NRET_ATOM_AS;
        const size_t  msg_size =
@@ -1821,7 +1915,7 @@ void kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_attrib_atom_config(
+void __kbase_tlstream_tl_attrib_atom_config(
                void *atom, u64 jd, u64 affinity, u32 config)
 {
        const u32     msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
@@ -1852,7 +1946,7 @@ void kbase_tlstream_tl_attrib_atom_config(
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_tl_attrib_as_config(
+void __kbase_tlstream_tl_attrib_as_config(
                void *as, u64 transtab, u64 memattr, u64 transcfg)
 {
        const u32     msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
@@ -1883,7 +1977,7 @@ void kbase_tlstream_tl_attrib_as_config(
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
 }
 
-void kbase_tlstream_jd_gpu_soft_reset(void *gpu)
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
 {
        const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
        const size_t  msg_size =
@@ -1908,7 +2002,7 @@ void kbase_tlstream_jd_gpu_soft_reset(void *gpu)
 
 /*****************************************************************************/
 
-void kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
 {
        const u32     msg_id = KBASE_AUX_PM_STATE;
        const size_t  msg_size =
@@ -1933,7 +2027,28 @@ void kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
 }
 
-void kbase_tlstream_aux_job_softstop(u32 js_id)
+void __kbase_tlstream_aux_issue_job_softstop(void *katom)
+{
+       const u32     msg_id = KBASE_AUX_ISSUE_JOB_SOFTSTOP;
+       const size_t  msg_size =
+               sizeof(msg_id) + sizeof(u64) + sizeof(katom);
+       unsigned long flags;
+       char          *buffer;
+       size_t        pos = 0;
+
+       buffer = kbasep_tlstream_msgbuf_acquire(
+                       TL_STREAM_TYPE_AUX, msg_size, &flags);
+       KBASE_DEBUG_ASSERT(buffer);
+
+       pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+       pos = kbasep_tlstream_write_timestamp(buffer, pos);
+       pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom));
+       KBASE_DEBUG_ASSERT(msg_size == pos);
+
+       kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void __kbase_tlstream_aux_job_softstop(u32 js_id)
 {
        const u32     msg_id = KBASE_AUX_JOB_SOFTSTOP;
        const size_t  msg_size =
@@ -1955,12 +2070,17 @@ void kbase_tlstream_aux_job_softstop(u32 js_id)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
 }
 
-void kbase_tlstream_aux_pagefault(u32 mmu_as, u32 page_count)
+/**
+ * __kbase_tlstream_aux_job_softstop_ex_record - record the trace point
+ * @katom: the atom that has been soft-stopped
+ * @job_type: the job type
+ */
+static void __kbase_tlstream_aux_job_softstop_ex_record(
+               void *katom, u32 job_type)
 {
-       const u32     msg_id = KBASE_AUX_PAGEFAULT;
+       const u32     msg_id = KBASE_AUX_JOB_SOFTSTOP_EX;
        const size_t  msg_size =
-               sizeof(msg_id) + sizeof(u64) + sizeof(mmu_as) +
-               sizeof(page_count);
+               sizeof(msg_id) + sizeof(u64) + sizeof(katom) + sizeof(job_type);
        unsigned long flags;
        char          *buffer;
        size_t        pos = 0;
@@ -1971,19 +2091,50 @@ void kbase_tlstream_aux_pagefault(u32 mmu_as, u32 page_count)
 
        pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
        pos = kbasep_tlstream_write_timestamp(buffer, pos);
-       pos = kbasep_tlstream_write_bytes(buffer, pos, &mmu_as, sizeof(mmu_as));
+       pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom));
        pos = kbasep_tlstream_write_bytes(
-                       buffer, pos, &page_count, sizeof(page_count));
+                       buffer, pos, &job_type, sizeof(job_type));
        KBASE_DEBUG_ASSERT(msg_size == pos);
 
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
 }
 
-void kbase_tlstream_aux_pagesalloc(s64 page_count_change)
+void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom)
 {
-       const u32     msg_id = KBASE_AUX_PAGESALLOC;
+       struct kbase_context *kctx = katom->kctx;
+       u64 jd = katom->jc;
+
+       while (jd != 0) {
+               struct job_descriptor_header *job;
+               struct kbase_vmap_struct map;
+
+               job = kbase_vmap(kctx, jd, sizeof(*job), &map);
+               if (!job) {
+                       dev_err(kctx->kbdev->dev,
+                               "__kbase_tlstream_aux_job_softstop_ex: failed to map job descriptor 0x%llx for atom 0x%p\n",
+                               jd, (void *)katom);
+                       break;
+               }
+               if (job->exception_status != BASE_JD_EVENT_STOPPED) {
+                       kbase_vunmap(kctx, &map);
+                       break;
+               }
+
+               __kbase_tlstream_aux_job_softstop_ex_record(
+                               katom, job->job_type);
+
+               jd = job->job_descriptor_size ?
+                       job->next_job._64 : job->next_job._32;
+               kbase_vunmap(kctx, &map);
+       }
+}
+
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
+{
+       const u32     msg_id = KBASE_AUX_PAGEFAULT;
        const size_t  msg_size =
-               sizeof(msg_id) + sizeof(u64) + sizeof(page_count_change);
+               sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+               sizeof(page_count_change);
        unsigned long flags;
        char          *buffer;
        size_t        pos = 0;
@@ -1994,6 +2145,7 @@ void kbase_tlstream_aux_pagesalloc(s64 page_count_change)
 
        pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
        pos = kbasep_tlstream_write_timestamp(buffer, pos);
+       pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
        pos = kbasep_tlstream_write_bytes(
                        buffer, pos,
                        &page_count_change, sizeof(page_count_change));
@@ -2002,3 +2154,27 @@ void kbase_tlstream_aux_pagesalloc(s64 page_count_change)
        kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
 }
 
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
+{
+       const u32     msg_id = KBASE_AUX_PAGESALLOC;
+       const size_t  msg_size =
+               sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
+               sizeof(page_count);
+       unsigned long flags;
+       char          *buffer;
+       size_t        pos = 0;
+
+       buffer = kbasep_tlstream_msgbuf_acquire(
+                       TL_STREAM_TYPE_AUX, msg_size, &flags);
+       KBASE_DEBUG_ASSERT(buffer);
+
+       pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+       pos = kbasep_tlstream_write_timestamp(buffer, pos);
+       pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
+       pos = kbasep_tlstream_write_bytes(
+                       buffer, pos, &page_count, sizeof(page_count));
+       KBASE_DEBUG_ASSERT(msg_size == pos);
+
+       kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
index 30171044ee1dbf5cfe990318f0c285f0a403022e..6c5c59616b781d7c4dcc371fce03c23aec265ae1 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -102,18 +102,65 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
 
 /*****************************************************************************/
 
+void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
+void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu);
+void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid);
+void __kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+void __kbase_tlstream_tl_del_ctx(void *context);
+void __kbase_tlstream_tl_del_atom(void *atom);
+void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_ret_atom_lpu(
+               void *atom, void *lpu, const char *attrib_match_list);
+void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu);
+void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
+void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
+void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2);
+void __kbase_tlstream_tl_attrib_atom_config(
+               void *atom, u64 jd, u64 affinity, u32 config);
+void __kbase_tlstream_tl_attrib_as_config(
+               void *as, u64 transtab, u64 memattr, u64 transcfg);
+void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
+void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+void __kbase_tlstream_aux_issue_job_softstop(void *katom);
+void __kbase_tlstream_aux_job_softstop(u32 js_id);
+void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom);
+void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
+void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
+
+extern atomic_t kbase_tlstream_enabled;
+
+#define __TRACE_IF_ENABLED(trace_name, ...)                         \
+       do {                                                        \
+               int enabled = atomic_read(&kbase_tlstream_enabled); \
+               if (enabled)                                        \
+                       __kbase_tlstream_##trace_name(__VA_ARGS__); \
+       } while (0)
+
+/*****************************************************************************/
+
 /**
  * kbase_tlstream_tl_summary_new_ctx - create context object in timeline
  *                                     summary
  * @context: name of the context object
  * @nr:      context number
+ * @tgid:    thread Group Id
  *
  * Function emits a timeline message informing about context creation. Context
  * is created with context number (its attribute), that can be used to link
  * kbase context with userspace context.
  * This message is directed to timeline summary stream.
  */
-void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr);
+#define kbase_tlstream_tl_summary_new_ctx(context, nr, tgid) \
+       __TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid)
 
 /**
  * kbase_tlstream_tl_summary_new_gpu - create GPU object in timeline summary
@@ -125,7 +172,8 @@ void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr);
  * created with two attributes: id and core count.
  * This message is directed to timeline summary stream.
  */
-void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+#define kbase_tlstream_tl_summary_new_gpu(gpu, id, core_count) \
+       __TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count)
 
 /**
  * kbase_tlstream_tl_summary_new_lpu - create LPU object in timeline summary
@@ -138,7 +186,8 @@ void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
  * and function bearing information about this LPU abilities.
  * This message is directed to timeline summary stream.
  */
-void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+#define kbase_tlstream_tl_summary_new_lpu(lpu, nr, fn) \
+       __TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn)
 
 /**
  * kbase_tlstream_tl_summary_lifelink_lpu_gpu - lifelink LPU object to GPU
@@ -149,7 +198,8 @@ void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
  * along with GPU object.
  * This message is directed to timeline summary stream.
  */
-void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+#define kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, gpu) \
+       __TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu)
 
 /**
  * kbase_tlstream_tl_summary_new_as - create address space object in timeline summary
@@ -161,7 +211,8 @@ void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
  * address space.
  * This message is directed to timeline summary stream.
  */
-void kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
+#define kbase_tlstream_tl_summary_new_as(as, nr) \
+       __TRACE_IF_ENABLED(tl_summary_new_as, as, nr)
 
 /**
  * kbase_tlstream_tl_summary_lifelink_as_gpu - lifelink address space object to GPU
@@ -172,18 +223,21 @@ void kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
  * shall be deleted along with GPU object.
  * This message is directed to timeline summary stream.
  */
-void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu);
+#define kbase_tlstream_tl_summary_lifelink_as_gpu(as, gpu) \
+       __TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu)
 
 /**
  * kbase_tlstream_tl_new_ctx - create context object in timeline
  * @context: name of the context object
  * @nr:      context number
+ * @tgid:    thread Group Id
  *
  * Function emits a timeline message informing about context creation. Context
  * is created with context number (its attribute), that can be used to link
  * kbase context with userspace context.
  */
-void kbase_tlstream_tl_new_ctx(void *context, u32 nr);
+#define kbase_tlstream_tl_new_ctx(context, nr, tgid) \
+       __TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid)
 
 /**
  * kbase_tlstream_tl_new_atom - create atom object in timeline
@@ -194,7 +248,8 @@ void kbase_tlstream_tl_new_ctx(void *context, u32 nr);
  * created with atom number (its attribute) that links it with actual work
  * bucket id understood by hardware.
  */
-void kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+#define kbase_tlstream_tl_new_atom(atom, nr) \
+       __TRACE_IF_ENABLED(tl_new_atom, atom, nr)
 
 /**
  * kbase_tlstream_tl_del_ctx - destroy context object in timeline
@@ -203,7 +258,8 @@ void kbase_tlstream_tl_new_atom(void *atom, u32 nr);
  * Function emits a timeline message informing that context object ceased to
  * exist.
  */
-void kbase_tlstream_tl_del_ctx(void *context);
+#define kbase_tlstream_tl_del_ctx(context) \
+       __TRACE_IF_ENABLED(tl_del_ctx, context)
 
 /**
  * kbase_tlstream_tl_del_atom - destroy atom object in timeline
@@ -212,17 +268,19 @@ void kbase_tlstream_tl_del_ctx(void *context);
  * Function emits a timeline message informing that atom object ceased to
  * exist.
  */
-void kbase_tlstream_tl_del_atom(void *atom);
+#define kbase_tlstream_tl_del_atom(atom) \
+       __TRACE_IF_ENABLED(tl_del_atom, atom)
 
 /**
- * kbase_tlstream_tl_ret_gpu_ctx - retain GPU by context
- * @gpu:     name of the GPU object
+ * kbase_tlstream_tl_ret_ctx_lpu - retain context by LPU
  * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
  *
- * Function emits a timeline message informing that GPU object is being held
- * by context and must not be deleted unless it is released.
+ * Function emits a timeline message informing that context is being held
+ * by LPU and must not be deleted unless it is released.
  */
-void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context);
+#define kbase_tlstream_tl_ret_ctx_lpu(context, lpu) \
+       __TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu)
 
 /**
  * kbase_tlstream_tl_ret_atom_ctx - retain atom by context
@@ -232,27 +290,31 @@ void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context);
  * Function emits a timeline message informing that atom object is being held
  * by context and must not be deleted unless it is released.
  */
-void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+#define kbase_tlstream_tl_ret_atom_ctx(atom, context) \
+       __TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context)
 
 /**
  * kbase_tlstream_tl_ret_atom_lpu - retain atom by LPU
- * @atom: name of the atom object
- * @lpu:  name of the Logical Processing Unit object
+ * @atom:              name of the atom object
+ * @lpu:               name of the Logical Processing Unit object
+ * @attrib_match_list: list containing match operator attributes
  *
  * Function emits a timeline message informing that atom object is being held
  * by LPU and must not be deleted unless it is released.
  */
-void kbase_tlstream_tl_ret_atom_lpu(void *atom, void *lpu);
+#define kbase_tlstream_tl_ret_atom_lpu(atom, lpu, attrib_match_list) \
+       __TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list)
 
 /**
- * kbase_tlstream_tl_nret_gpu_ctx - release GPU by context
- * @gpu:     name of the GPU object
+ * kbase_tlstream_tl_nret_ctx_lpu - release context by LPU
  * @context: name of the context object
+ * @lpu:     name of the Logical Processing Unit object
  *
- * Function emits a timeline message informing that GPU object is being released
- * by context.
+ * Function emits a timeline message informing that context is being released
+ * by LPU object.
  */
-void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context);
+#define kbase_tlstream_tl_nret_ctx_lpu(context, lpu) \
+       __TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu)
 
 /**
  * kbase_tlstream_tl_nret_atom_ctx - release atom by context
@@ -262,7 +324,8 @@ void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context);
  * Function emits a timeline message informing that atom object is being
  * released by context.
  */
-void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+#define kbase_tlstream_tl_nret_atom_ctx(atom, context) \
+       __TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context)
 
 /**
  * kbase_tlstream_tl_nret_atom_lpu - release atom by LPU
@@ -272,7 +335,8 @@ void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
  * Function emits a timeline message informing that atom object is being
  * released by LPU.
  */
-void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+#define kbase_tlstream_tl_nret_atom_lpu(atom, lpu) \
+       __TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu)
 
 /**
  * kbase_tlstream_tl_ret_as_ctx - lifelink address space object to context
@@ -282,7 +346,8 @@ void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
  * Function emits a timeline message informing that address space object
  * is being held by the context object.
  */
-void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
+#define kbase_tlstream_tl_ret_as_ctx(as, ctx) \
+       __TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx)
 
 /**
  * kbase_tlstream_tl_nret_as_ctx - release address space by context
@@ -292,7 +357,8 @@ void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
  * Function emits a timeline message informing that address space object
  * is being released by atom.
  */
-void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
+#define kbase_tlstream_tl_nret_as_ctx(as, ctx) \
+       __TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx)
 
 /**
  * kbase_tlstream_tl_ret_atom_as - retain atom by address space
@@ -302,7 +368,8 @@ void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
  * Function emits a timeline message informing that atom object is being held
  * by address space and must not be deleted unless it is released.
  */
-void kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
+#define kbase_tlstream_tl_ret_atom_as(atom, as) \
+       __TRACE_IF_ENABLED(tl_ret_atom_as, atom, as)
 
 /**
  * kbase_tlstream_tl_nret_atom_as - release atom by address space
@@ -312,7 +379,19 @@ void kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
  * Function emits a timeline message informing that atom object is being
  * released by address space.
  */
-void kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
+#define kbase_tlstream_tl_nret_atom_as(atom, as) \
+       __TRACE_IF_ENABLED(tl_nret_atom_as, atom, as)
+
+/**
+ * kbase_tlstream_tl_dep_atom_atom - parent atom depends on child atom
+ * @atom1: name of the child atom object
+ * @atom2: name of the parent atom object that depends on child atom
+ *
+ * Function emits a timeline message informing that parent atom waits for
+ * child atom object to be completed before start its execution.
+ */
+#define kbase_tlstream_tl_dep_atom_atom(atom1, atom2) \
+       __TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2)
 
 /**
  * kbase_tlstream_tl_attrib_atom_config - atom job slot attributes
@@ -323,8 +402,8 @@ void kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
  *
  * Function emits a timeline message containing atom attributes.
  */
-void kbase_tlstream_tl_attrib_atom_config(
-               void *atom, u64 jd, u64 affinity, u32 config);
+#define kbase_tlstream_tl_attrib_atom_config(atom, jd, affinity, config) \
+       __TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config)
 
 /**
  * kbase_tlstream_tl_attrib_as_config - address space attributes
@@ -335,8 +414,8 @@ void kbase_tlstream_tl_attrib_atom_config(
  *
  * Function emits a timeline message containing address space attributes.
  */
-void kbase_tlstream_tl_attrib_as_config(
-               void *as, u64 transtab, u64 memattr, u64 transcfg);
+#define kbase_tlstream_tl_attrib_as_config(as, transtab, memattr, transcfg) \
+       __TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
 
 /**
  * kbase_tlstream_jd_gpu_soft_reset - The GPU is being soft reset
@@ -345,36 +424,62 @@ void kbase_tlstream_tl_attrib_as_config(
  * This imperative tracepoint is specific to job dumping.
  * Function emits a timeline message indicating GPU soft reset.
  */
-void kbase_tlstream_jd_gpu_soft_reset(void *gpu);
+#define kbase_tlstream_jd_gpu_soft_reset(gpu) \
+       __TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu)
 
 /**
  * kbase_tlstream_aux_pm_state - timeline message: power management state
  * @core_type: core type (shader, tiler, l2 cache, l3 cache)
  * @state:     64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
  */
-void kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+#define kbase_tlstream_aux_pm_state(core_type, state) \
+       __TRACE_IF_ENABLED(aux_pm_state, core_type, state)
+
+/**
+ * kbase_tlstream_aux_issue_job_softstop - a soft-stop command is being issued
+ * @katom: the atom that is being soft-stopped
+ */
+#define kbase_tlstream_aux_issue_job_softstop(katom) \
+       __TRACE_IF_ENABLED(aux_issue_job_softstop, katom)
 
 /**
  * kbase_tlstream_aux_job_softstop - soft job stop occurred
  * @js_id: job slot id
  */
-void kbase_tlstream_aux_job_softstop(u32 js_id);
+#define kbase_tlstream_aux_job_softstop(js_id) \
+       __TRACE_IF_ENABLED(aux_job_softstop, js_id)
+
+/**
+ * kbase_tlstream_aux_job_softstop_ex - extra info about soft-stopped atom
+ * @katom: the atom that has been soft-stopped
+ *
+ * This trace point adds more details about the soft-stopped atom. These details
+ * can't be safety collected inside the interrupt handler so we're doing it
+ * inside a worker.
+ *
+ * Note: this is not the same information that is recorded in the trace point,
+ * refer to __kbase_tlstream_aux_job_softstop_ex() for more details.
+ */
+#define kbase_tlstream_aux_job_softstop_ex(katom) \
+       __TRACE_IF_ENABLED(aux_job_softstop_ex, katom)
 
 /**
  * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event
  *                                resulting in new pages being mapped
- * @mmu_as:     MMU address space number
- * @page_count: number of currently used pages
+ * @ctx_nr:            kernel context number
+ * @page_count_change: number of pages to be added
  */
-void kbase_tlstream_aux_pagefault(u32 mmu_as, u32 page_count);
+#define kbase_tlstream_aux_pagefault(ctx_nr, page_count_change) \
+       __TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change)
 
 /**
  * kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated
  *                                 pages is changed
- * @page_count_change: number of pages to be added or subtracted (according to
- *                     the sign)
+ * @ctx_nr:     kernel context number
+ * @page_count: number of pages used by the context
  */
-void kbase_tlstream_aux_pagesalloc(s64 page_count_change);
+#define kbase_tlstream_aux_pagesalloc(ctx_nr, page_count) \
+       __TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count)
 
 #endif /* _KBASE_TLSTREAM_H */
 
index aac9858875ad47c1cbbe1b6b3350ad09d69a297f..a606ae810656da8ec1c8b789061cd231da827998 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -102,7 +102,7 @@ static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
        .open = kbasep_trace_timeline_debugfs_open,
        .read = seq_read,
        .llseek = seq_lseek,
-       .release = seq_release_private,
+       .release = seq_release,
 };
 
 void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
index 8108677542aba8842fcfeb3123b4e95bd5366750..93ddb5af939cc7da1ef1c6c9a90b21233567b55b 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2008-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2008-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 
 #include "mali_kbase_gpuprops_types.h"
 
+/*
+ * 10.1:
+ * - Do mmap in kernel for SAME_VA memory allocations rather then
+ *   calling back into the kernel as a 2nd stage of the allocation request.
+ *
+ * 10.2:
+ * - Add KBASE_FUNC_MEM_JIT_INIT which allows clients to request a custom VA
+ *   region for use with JIT (ignored on 32-bit platforms)
+ */
 #define BASE_UK_VERSION_MAJOR 10
-#define BASE_UK_VERSION_MINOR 0
+#define BASE_UK_VERSION_MINOR 2
 
 struct kbase_uk_mem_alloc {
        union uk_header header;
@@ -321,8 +330,8 @@ struct kbase_uk_context_id {
        int id;
 };
 
-#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \
-       defined(CONFIG_MALI_MIPE_ENABLED)
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+       !defined(MALI_MIPE_ENABLED)
 /**
  * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
  * @header: UK structure header
@@ -353,7 +362,7 @@ struct kbase_uk_tlstream_flush {
 
 #if MALI_UNIT_TEST
 /**
- * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
+ * struct kbase_uk_tlstream_test - User/Kernel space data exchange structure
  * @header:    UK structure header
  * @tpw_count: number of trace point writers in each context
  * @msg_delay: time delay between tracepoints from one writer in milliseconds
@@ -374,7 +383,7 @@ struct kbase_uk_tlstream_test {
 };
 
 /**
- * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
+ * struct kbase_uk_tlstream_stats - User/Kernel space data exchange structure
  * @header:          UK structure header
  * @bytes_collected: number of bytes read by user
  * @bytes_generated: number of bytes generated by tracepoints
@@ -390,7 +399,53 @@ struct kbase_uk_tlstream_stats {
        u32 bytes_generated;
 };
 #endif /* MALI_UNIT_TEST */
-#endif /* MALI_KTLSTREAM_ENABLED */
+#endif /* MALI_MIPE_ENABLED */
+
+/**
+ * struct struct kbase_uk_prfcnt_value for the KBASE_FUNC_SET_PRFCNT_VALUES ioctl
+ * @header:          UK structure header
+ * @data:            Counter samples for the dummy model
+ * @size:............Size of the counter sample data
+ */
+struct kbase_uk_prfcnt_values {
+       union uk_header header;
+       /* IN */
+       u32 *data;
+       u32 size;
+};
+
+/**
+ * struct kbase_uk_soft_event_update - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @evt:        the GPU address containing the event
+ * @new_status: the new event status, must be either BASE_JD_SOFT_EVENT_SET or
+ *              BASE_JD_SOFT_EVENT_RESET
+ * @flags:      reserved for future uses, must be set to 0
+ *
+ * This structure is used to update the status of a software event. If the
+ * event's status is set to BASE_JD_SOFT_EVENT_SET, any job currently waiting
+ * on this event will complete.
+ */
+struct kbase_uk_soft_event_update {
+       union uk_header header;
+       /* IN */
+       u64 evt;
+       u32 new_status;
+       u32 flags;
+};
+
+/**
+ * struct kbase_uk_mem_jit_init - User/Kernel space data exchange structure
+ * @header:     UK structure header
+ * @va_pages:   Number of virtual pages required for JIT
+ *
+ * This structure is used when requesting initialization of JIT.
+ */
+struct kbase_uk_mem_jit_init {
+       union uk_header header;
+       /* IN */
+       u64 va_pages;
+};
 
 enum kbase_uk_function_id {
        KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0),
@@ -443,18 +498,26 @@ enum kbase_uk_function_id {
 
        KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31),
 
-#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \
-       defined(CONFIG_MALI_MIPE_ENABLED)
+#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \
+       !defined(MALI_MIPE_ENABLED)
        KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 32),
 #if MALI_UNIT_TEST
        KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33),
        KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34),
 #endif /* MALI_UNIT_TEST */
        KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35),
-#endif /* MALI_KTLSTREAM_ENABLED */
+#endif /* MALI_MIPE_ENABLED */
 
        KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36),
 
+#ifdef SUPPORT_MALI_NO_MALI
+       KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37),
+#endif
+
+       KBASE_FUNC_SOFT_EVENT_UPDATE = (UK_FUNC_ID + 38),
+
+       KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39),
+
        KBASE_FUNC_MAX
 };
 
index bfa8bfa0e14f56e1e4b5d554f4f556eba6e897dd..371122f188b21fc20dc77af62f30a7eb206c4190 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,6 +30,7 @@
 #include <mali_kbase.h>
 #include <mali_kbase_hwcnt_reader.h>
 #include <mali_kbase_mem_linux.h>
+#include <mali_kbase_tlstream.h>
 
 /*****************************************************************************/
 
@@ -115,6 +116,7 @@ struct kbase_vinstr_context {
  * @dump_size:     size of one dump buffer in bytes
  * @bitmap:        bitmap request for JM, TILER, SHADER and MMU counters
  * @legacy_buffer: userspace hwcnt dump buffer (legacy interface)
+ * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface)
  * @accum_buffer:  temporary accumulation buffer for preserving counters
  * @dump_time:     next time this clients shall request hwcnt dump
  * @dump_interval: interval between periodic hwcnt dumps
@@ -134,6 +136,7 @@ struct kbase_vinstr_client {
        size_t                             dump_size;
        u32                                bitmap[4];
        void __user                        *legacy_buffer;
+       void                               *kernel_buffer;
        void                               *accum_buffer;
        u64                                dump_time;
        u32                                dump_interval;
@@ -225,11 +228,11 @@ static void hwcnt_bitmap_union(u32 dst[4], u32 src[4])
        dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM];
 }
 
-static size_t kbasep_vinstr_dump_size(struct kbase_vinstr_context *vinstr_ctx)
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev)
 {
-       struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev;
        size_t dump_size;
 
+#ifndef CONFIG_MALI_NO_MALI
        if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
                u32 nr_cg;
 
@@ -237,7 +240,9 @@ static size_t kbasep_vinstr_dump_size(struct kbase_vinstr_context *vinstr_ctx)
                dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP *
                                NR_CNT_PER_BLOCK *
                                NR_BYTES_PER_CNT;
-       } else {
+       } else
+#endif /* CONFIG_MALI_NO_MALI */
+       {
                /* assume v5 for now */
                base_gpu_props *props = &kbdev->gpu_props.props;
                u32 nr_l2 = props->l2_props.num_l2_slices;
@@ -251,6 +256,13 @@ static size_t kbasep_vinstr_dump_size(struct kbase_vinstr_context *vinstr_ctx)
        }
        return dump_size;
 }
+KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size);
+
+static size_t kbasep_vinstr_dump_size_ctx(
+               struct kbase_vinstr_context *vinstr_ctx)
+{
+       return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev);
+}
 
 static int kbasep_vinstr_map_kernel_dump_buffer(
                struct kbase_vinstr_context *vinstr_ctx)
@@ -261,7 +273,7 @@ static int kbasep_vinstr_map_kernel_dump_buffer(
        u16 va_align = 0;
 
        flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
-       vinstr_ctx->dump_size = kbasep_vinstr_dump_size(vinstr_ctx);
+       vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
        nr_pages = PFN_UP(vinstr_ctx->dump_size);
 
        reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
@@ -298,6 +310,8 @@ static void kbasep_vinstr_unmap_kernel_dump_buffer(
  */
 static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
 {
+       struct kbase_device *kbdev = vinstr_ctx->kbdev;
+       struct kbasep_kctx_list_element *element;
        int err;
 
        vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true);
@@ -313,10 +327,39 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
                return err;
        }
 
+       /* Add kernel context to list of contexts associated with device. */
+       element = kzalloc(sizeof(*element), GFP_KERNEL);
+       if (element) {
+               element->kctx = vinstr_ctx->kctx;
+               mutex_lock(&kbdev->kctx_list_lock);
+               list_add(&element->link, &kbdev->kctx_list);
+
+               /* Inform timeline client about new context.
+                * Do this while holding the lock to avoid tracepoint
+                * being created in both body and summary stream. */
+               kbase_tlstream_tl_new_ctx(
+                               vinstr_ctx->kctx,
+                               (u32)(vinstr_ctx->kctx->id),
+                               (u32)(vinstr_ctx->kctx->tgid));
+
+               mutex_unlock(&kbdev->kctx_list_lock);
+       } else {
+               /* Don't treat this as a fail - just warn about it. */
+               dev_warn(kbdev->dev,
+                               "couldn't add kctx to kctx_list\n");
+       }
+
        err = enable_hwcnt(vinstr_ctx);
        if (err) {
                kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
                kbase_destroy_context(vinstr_ctx->kctx);
+               if (element) {
+                       mutex_lock(&kbdev->kctx_list_lock);
+                       list_del(&element->link);
+                       kfree(element);
+                       mutex_unlock(&kbdev->kctx_list_lock);
+               }
+               kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
                vinstr_ctx->kctx = NULL;
                return err;
        }
@@ -329,6 +372,13 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
                disable_hwcnt(vinstr_ctx);
                kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
                kbase_destroy_context(vinstr_ctx->kctx);
+               if (element) {
+                       mutex_lock(&kbdev->kctx_list_lock);
+                       list_del(&element->link);
+                       kfree(element);
+                       mutex_unlock(&kbdev->kctx_list_lock);
+               }
+               kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
                vinstr_ctx->kctx = NULL;
                return -EFAULT;
        }
@@ -342,32 +392,55 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
  */
 static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx)
 {
+       struct kbase_device             *kbdev = vinstr_ctx->kbdev;
+       struct kbasep_kctx_list_element *element;
+       struct kbasep_kctx_list_element *tmp;
+       bool                            found = false;
+
        /* Release hw counters dumping resources. */
        vinstr_ctx->thread = NULL;
        disable_hwcnt(vinstr_ctx);
        kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
        kbase_destroy_context(vinstr_ctx->kctx);
+
+       /* Remove kernel context from the device's contexts list. */
+       mutex_lock(&kbdev->kctx_list_lock);
+       list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+               if (element->kctx == vinstr_ctx->kctx) {
+                       list_del(&element->link);
+                       kfree(element);
+                       found = true;
+               }
+       }
+       mutex_unlock(&kbdev->kctx_list_lock);
+
+       if (!found)
+               dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+       /* Inform timeline client about context destruction. */
+       kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx);
+
        vinstr_ctx->kctx = NULL;
 }
 
 /**
  * kbasep_vinstr_attach_client - Attach a client to the vinstr core
- * @vinstr_ctx:   vinstr context
- * @buffer_count: requested number of dump buffers
- * @bitmap:       bitmaps describing which counters should be enabled
- * @argp:         pointer where notification descriptor shall be stored
+ * @vinstr_ctx:    vinstr context
+ * @buffer_count:  requested number of dump buffers
+ * @bitmap:        bitmaps describing which counters should be enabled
+ * @argp:          pointer where notification descriptor shall be stored
+ * @kernel_buffer: pointer to kernel side buffer
  *
  * Return: vinstr opaque client handle or NULL on failure
  */
 static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
                struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count,
-               u32 bitmap[4], void *argp)
+               u32 bitmap[4], void *argp, void *kernel_buffer)
 {
        struct task_struct         *thread = NULL;
        struct kbase_vinstr_client *cli;
 
        KBASE_DEBUG_ASSERT(vinstr_ctx);
-       KBASE_DEBUG_ASSERT(argp);
        KBASE_DEBUG_ASSERT(buffer_count >= 0);
        KBASE_DEBUG_ASSERT(buffer_count <= MAX_BUFFER_COUNT);
        KBASE_DEBUG_ASSERT(!(buffer_count & (buffer_count - 1)));
@@ -405,7 +478,7 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
        /* The GPU resets the counter block every time there is a request
         * to dump it. We need a per client kernel buffer for accumulating
         * the counters. */
-       cli->dump_size    = kbasep_vinstr_dump_size(vinstr_ctx);
+       cli->dump_size    = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
        cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL);
        if (!cli->accum_buffer)
                goto error;
@@ -437,6 +510,8 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
                                O_RDONLY | O_CLOEXEC);
                if (0 > *fd)
                        goto error;
+       } else if (kernel_buffer) {
+               cli->kernel_buffer = kernel_buffer;
        } else {
                cli->legacy_buffer = (void __user *)argp;
        }
@@ -475,11 +550,7 @@ error:
        return NULL;
 }
 
-/**
- * kbasep_vinstr_detach_client - Detach a client from the vinstr core
- * @cli: Pointer to vinstr client
- */
-static void kbasep_vinstr_detach_client(struct kbase_vinstr_client *cli)
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
 {
        struct kbase_vinstr_context *vinstr_ctx;
        struct kbase_vinstr_client  *iter, *tmp;
@@ -540,6 +611,7 @@ static void kbasep_vinstr_detach_client(struct kbase_vinstr_client *cli)
        if (thread)
                kthread_stop(thread);
 }
+KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client);
 
 /* Accumulate counters in the dump buffer */
 static void accum_dump_buffer(void *dst, void *src, size_t dump_size)
@@ -702,9 +774,12 @@ static void patch_dump_buffer_hdr_v5(
 static void accum_clients(struct kbase_vinstr_context *vinstr_ctx)
 {
        struct kbase_vinstr_client *iter;
-       int v4;
+       int v4 = 0;
 
+#ifndef CONFIG_MALI_NO_MALI
        v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4);
+#endif
+
        list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) {
                /* Don't bother accumulating clients whose hwcnt requests
                 * have not yet been honoured. */
@@ -791,6 +866,11 @@ static int kbasep_vinstr_collect_and_accumulate(
 {
        int rcode;
 
+#ifdef CONFIG_MALI_NO_MALI
+       /* The dummy model needs the CPU mapping. */
+       gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va);
+#endif
+
        /* Request HW counters dump.
         * Disable preemption to make dump timestamp more accurate. */
        preempt_disable();
@@ -865,6 +945,23 @@ static int kbasep_vinstr_fill_dump_buffer_legacy(
        return rcode;
 }
 
+/**
+ * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer
+ *                                         allocated in kernel space
+ * @cli: requesting client
+ *
+ * Return: zero on success
+ *
+ * This is part of the kernel client interface.
+ */
+static int kbasep_vinstr_fill_dump_buffer_kernel(
+               struct kbase_vinstr_client *cli)
+{
+       memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size);
+
+       return 0;
+}
+
 /**
  * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst
  * @vinstr_ctx: vinstr context
@@ -910,6 +1007,8 @@ static int kbasep_vinstr_update_client(
        if (cli->buffer_count)
                rcode = kbasep_vinstr_fill_dump_buffer(
                                cli, timestamp, event_id);
+       else if (cli->kernel_buffer)
+               rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli);
        else
                rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli);
 
@@ -1299,14 +1398,18 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
 static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
                struct kbase_vinstr_client *cli, u32 __user *hwver)
 {
+#ifndef CONFIG_MALI_NO_MALI
        struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
-       u32                         ver;
+#endif
+
+       u32                         ver = 5;
 
+#ifndef CONFIG_MALI_NO_MALI
        KBASE_DEBUG_ASSERT(vinstr_ctx);
+       if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4))
+               ver = 4;
+#endif
 
-       ver = 4;
-       if (!kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4))
-               ver = 5;
        return put_user(ver, hwver);
 }
 
@@ -1451,7 +1554,7 @@ static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode,
        cli = filp->private_data;
        KBASE_DEBUG_ASSERT(cli);
 
-       kbasep_vinstr_detach_client(cli);
+       kbase_vinstr_detach_client(cli);
        return 0;
 }
 
@@ -1525,7 +1628,8 @@ int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
                        vinstr_ctx,
                        setup->buffer_count,
                        bitmap,
-                       &setup->fd);
+                       &setup->fd,
+                       NULL);
 
        if (!cli)
                return -ENOMEM;
@@ -1557,7 +1661,8 @@ int kbase_vinstr_legacy_hwc_setup(
                                vinstr_ctx,
                                0,
                                bitmap,
-                               (void *)(long)setup->dump_buffer);
+                               (void *)(long)setup->dump_buffer,
+                               NULL);
 
                if (!(*cli))
                        return -ENOMEM;
@@ -1565,13 +1670,37 @@ int kbase_vinstr_legacy_hwc_setup(
                if (!*cli)
                        return -EINVAL;
 
-               kbasep_vinstr_detach_client(*cli);
+               kbase_vinstr_detach_client(*cli);
                *cli = NULL;
        }
 
        return 0;
 }
 
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+               struct kbase_vinstr_context *vinstr_ctx,
+               struct kbase_uk_hwcnt_reader_setup *setup,
+               void *kernel_buffer)
+{
+       u32 bitmap[4];
+
+       if (!vinstr_ctx || !setup || !kernel_buffer)
+               return NULL;
+
+       bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+       bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
+       bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+       bitmap[JM_HWCNT_BM]     = setup->jm_bm;
+
+       return kbasep_vinstr_attach_client(
+                       vinstr_ctx,
+                       0,
+                       bitmap,
+                       NULL,
+                       kernel_buffer);
+}
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup);
+
 int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli,
                enum base_hwcnt_reader_event event_id)
 {
@@ -1615,6 +1744,7 @@ exit:
 
        return rcode;
 }
+KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump);
 
 int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli)
 {
@@ -1658,6 +1788,11 @@ void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx)
        KBASE_DEBUG_ASSERT(vinstr_ctx);
 
        mutex_lock(&vinstr_ctx->lock);
+       if (!vinstr_ctx->nclients || vinstr_ctx->suspended) {
+               mutex_unlock(&vinstr_ctx->lock);
+               return;
+       }
+
        kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
        vinstr_ctx->suspended = true;
        vinstr_ctx->suspended_clients = vinstr_ctx->waiting_clients;
@@ -1670,6 +1805,11 @@ void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx)
        KBASE_DEBUG_ASSERT(vinstr_ctx);
 
        mutex_lock(&vinstr_ctx->lock);
+       if (!vinstr_ctx->nclients || !vinstr_ctx->suspended) {
+               mutex_unlock(&vinstr_ctx->lock);
+               return;
+       }
+
        vinstr_ctx->suspended = false;
        vinstr_ctx->waiting_clients = vinstr_ctx->suspended_clients;
        vinstr_ctx->reprogram = true;
index 12340e5c647db18720d82831017656533a08cefc..d32462aec653facbc2e422d76e23fade2f5c134e 100755 (executable)
@@ -66,6 +66,22 @@ int kbase_vinstr_legacy_hwc_setup(
                struct kbase_vinstr_client  **cli,
                struct kbase_uk_hwcnt_setup *setup);
 
+/**
+ * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side
+ *                                   client
+ * @vinstr_ctx:    vinstr context
+ * @setup:         reader's configuration
+ * @kernel_buffer: pointer to dump buffer
+ *
+ * setup->buffer_count and setup->fd are not used for kernel side clients.
+ *
+ * Return: pointer to client structure, or NULL on failure
+ */
+struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
+               struct kbase_vinstr_context *vinstr_ctx,
+               struct kbase_uk_hwcnt_reader_setup *setup,
+               void *kernel_buffer);
+
 /**
  * kbase_vinstr_hwc_dump - issue counter dump for vinstr client
  * @cli:      pointer to vinstr client
@@ -100,5 +116,19 @@ void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx);
  */
 void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx);
 
+/**
+ * kbase_vinstr_dump_size - Return required size of dump buffer
+ * @kbdev: device pointer
+ *
+ * Return : buffer size in bytes
+ */
+size_t kbase_vinstr_dump_size(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_detach_client - Detach a client from the vinstr core
+ * @cli: Pointer to vinstr client
+ */
+void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
+
 #endif /* _KBASE_VINSTR_H_ */
 
index fc3cf32ba4d285e5c3e931bcbe841ed53e7b385c..93fc5eaf273b5d239e0f4bc8e06aa2f33faa84d4 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 
 
 
-
-
 #if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_MALI_H
 
-#include <linux/stringify.h>
-#include <linux/tracepoint.h>
-
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM mali
-#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
 #define TRACE_INCLUDE_FILE mali_linux_trace
 
+#include <linux/tracepoint.h>
+
 #define MALI_JOB_SLOTS_EVENT_CHANGED
 
 /**
diff --git a/t83x/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h b/t83x/kernel/drivers/gpu/arm/midgard/mali_midg_coherency.h
new file mode 100644 (file)
index 0000000..a509cbd
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _MIDG_COHERENCY_H_
+#define _MIDG_COHERENCY_H_
+
+#define COHERENCY_ACE_LITE 0
+#define COHERENCY_ACE      1
+#define COHERENCY_NONE     31
+#define COHERENCY_FEATURE_BIT(x) (1 << (x))
+
+#endif /* _MIDG_COHERENCY_H_ */
index c3def83dba24417123ee0494c3f74e4b3d7357af..6b1d67d637209921bbc96e33256091a82d4b40b3 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 
 
 
-
-
 #ifndef _MIDGARD_REGMAP_H_
 #define _MIDGARD_REGMAP_H_
 
+#include "mali_midg_coherency.h"
+#include "mali_kbase_gpu_id.h"
+
 /*
  * Begin Register Offsets
  */
@@ -57,7 +58,7 @@
 
 #define GPU_COMMAND             0x030  /* (WO) */
 #define GPU_STATUS              0x034  /* (RO) */
-
+#define LATEST_FLUSH            0x038  /* (RO) */
 
 #define GROUPS_L2_COHERENT      (1 << 0)       /* Cores groups are l2 coherent */
 
 #define L2_PWRACTIVE_LO         0x260  /* (RO) Level 2 cache active bitmap, low word */
 #define L2_PWRACTIVE_HI         0x264  /* (RO) Level 2 cache active bitmap, high word */
 
+#define COHERENCY_FEATURES      0x300  /* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304  /* (RW) Coherency enable */
 
 #define JM_CONFIG               0xF00   /* (RW) Job Manager configuration register (Implementation specific register) */
 #define SHADER_CONFIG           0xF04  /* (RW) Shader core configuration settings (Implementation specific register) */
 
 #define JS_COMMAND_NEXT        0x60    /* (RW) Next command register for job slot n */
 
+#define JS_FLUSH_ID_NEXT       0x70    /* (RW) Next job slot n cache flush ID */
 
 #define MEMORY_MANAGEMENT_BASE  0x2000
 #define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
 #define AS_STATUS              0x28    /* (RO) Status flags for address space n */
 
 
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
 
 /* End Register Offsets */
 
 
 #define AS_TRANSTAB_LPAE_ADRMODE_MASK      0x00000003
 
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
 
 /*
  * Begin MMU STATUS register values
 #define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT      (0x2<<3)
 #define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG             (0x3<<3)
 
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT      (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
 
 #define AS_FAULTSTATUS_ACCESS_TYPE_MASK                  (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC                (0x0<<8)
 #define AS_FAULTSTATUS_ACCESS_TYPE_EX                    (0x1<<8)
 #define AS_FAULTSTATUS_ACCESS_TYPE_READ                  (0x2<<8)
 #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE                 (0x3<<8)
 
+/*
+ * Begin MMU TRANSCFG register values
+ */
+
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
 
 /*
  * Begin Command Values
 #define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
 #define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
 #define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
 #define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
 
 /* JS_STATUS register values */
 #define GPU_COMMAND_CYCLE_COUNT_STOP   0x06    /* Stops the cycle counter, and system timestamp propagation */
 #define GPU_COMMAND_CLEAN_CACHES       0x07    /* Clean all caches */
 #define GPU_COMMAND_CLEAN_INV_CACHES   0x08    /* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09    /* Places the GPU in protected mode */
 
 /* End Command Values */
 
 /* GPU_STATUS values */
 #define GPU_STATUS_PRFCNT_ACTIVE           (1 << 2)    /* Set if the performance counters are active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE   (1 << 7)    /* Set if protected mode is active */
 
 /* PRFCNT_CONFIG register values */
-#define PRFCNT_CONFIG_AS_SHIFT    4    /* address space bitmap starts from bit 4 of the register */
+#define PRFCNT_CONFIG_MODE_SHIFT      0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT        4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
+
 #define PRFCNT_CONFIG_MODE_OFF    0    /* The performance counters are disabled. */
 #define PRFCNT_CONFIG_MODE_MANUAL 1    /* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
 #define PRFCNT_CONFIG_MODE_TILE   2    /* The performance counters are enabled, and are written out each time a tile finishes rendering. */
 
 /* AS<n>_MEMATTR values: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
 
 /* Use GPU implementation-defined  caching policy. */
 #define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
 /* Outer coherent, write alloc inner */
 #define AS_MEMATTR_INDEX_OUTER_WA              4
 
-/* GPU_ID register */
-#define GPU_ID_VERSION_STATUS_SHIFT       0
-#define GPU_ID_VERSION_MINOR_SHIFT        4
-#define GPU_ID_VERSION_MAJOR_SHIFT        12
-#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
-#define GPU_ID_VERSION_STATUS             (0xF  << GPU_ID_VERSION_STATUS_SHIFT)
-#define GPU_ID_VERSION_MINOR              (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
-#define GPU_ID_VERSION_MAJOR              (0xF  << GPU_ID_VERSION_MAJOR_SHIFT)
-#define GPU_ID_VERSION_PRODUCT_ID         (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
-
-/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
-#define GPU_ID_PI_T60X                    0x6956
-#define GPU_ID_PI_T62X                    0x0620
-#define GPU_ID_PI_T76X                    0x0750
-#define GPU_ID_PI_T72X                    0x0720
-#define GPU_ID_PI_TFRX                    0x0880
-#define GPU_ID_PI_T86X                    0x0860
-#define GPU_ID_PI_T82X                    0x0820
-#define GPU_ID_PI_T83X                    0x0830
-
-/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
-#define GPU_ID_S_15DEV0                   0x1
-#define GPU_ID_S_EAC                      0x2
-
-/* Helper macro to create a GPU_ID assuming valid values for id, major, minor, status */
-#define GPU_ID_MAKE(id, major, minor, status) \
-               (((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
-               ((major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
-               ((minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
-               ((status) << GPU_ID_VERSION_STATUS_SHIFT))
-
-/* End GPU_ID register */
-
 /* JS<n>_FEATURES register */
 
 #define JS_FEATURE_NULL_JOB              (1u << 1)
 /* End JS<n>_FEATURES register */
 
 /* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
 #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT        (24)
 #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS              (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
 #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT       (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
 
 /* End THREAD_* registers */
 
-/* COHERENCY_* values*/
-#define COHERENCY_ACE_LITE 0
-#define COHERENCY_ACE      1
-#define COHERENCY_NONE     31
-#define COHERENCY_FEATURE_BIT(x) (1 << (x))
-/* End COHERENCY_* values */
-
 /* SHADER_CONFIG register */
 
 #define SC_ALT_COUNTERS             (1ul << 3)
 #define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
 #define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
 #define SC_LS_PAUSEBUFFER_DISABLE   (1ul << 16)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
 #define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
 /* End SHADER_CONFIG register */
 
index b9a30da07aff26c145daf5cfbaa8e1eb37ebb65c..0833cac89be1a85abefd30327cb31c9605b082e4 100755 (executable)
 # Boston, MA  02110-1301, USA.
 #
 #
+ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-y += platform/devicetree/mali_clock.c
+obj-y += platform/devicetree/mpgpu.c
+obj-y += platform/devicetree/meson_main2.c
+obj-y += platform/devicetree/platform_gx.c
+obj-y += platform/devicetree/scaling.c
+obj-y += mali_kbase_runtime_pm.c
+obj-y += mali_kbase_config_devicetree.c
+else ifeq ($(CONFIG_MALI_MIDGARD),m)
+SRC += platform/devicetree/mali_clock.c
+SRC += platform/devicetree/mpgpu.c
+SRC += platform/devicetree/meson_main2.c
+SRC += platform/devicetree/platform_gx.c
+SRC += platform/devicetree/scaling.c
+SRC += platform/devicetree/mali_kbase_runtime_pm.c
+SRC += platform/devicetree/mali_kbase_config_devicetree.c
+endif
index 59c36b45a5c6019cc8f6b78663a860a8684676da..987043d9dd025a32365a8eb4dec83416c27b76fd 100755 (executable)
@@ -89,9 +89,17 @@ struct devfreq_cooling_ops t83x_model_ops = {
 
 #endif
 
+#include <mali_kbase_config.h>
+
 int kbase_platform_early_init(void)
 {
        /* Nothing needed at this stage */
        return 0;
 }
 
+static struct kbase_platform_config dummy_platform_config;
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+       return &dummy_platform_config;
+}
index 3c9e65d39e5a86449f6af0107dd341a69894a719..62f974c7bf54984925951a4c3bffa16e59fafc5f 100755 (executable)
@@ -177,14 +177,7 @@ int kbase_device_runtime_init(struct kbase_device *kbdev)
 {
        dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
        pm_runtime_enable(kbdev->dev);
-#ifdef CONFIG_MALI_MIDGARD_DEBUG_SYS
-       {
-               int err = kbase_platform_create_sysfs_file(kbdev->dev);
-
-               if (err)
-                       return err;
-       }
-#endif                         /* CONFIG_MALI_MIDGARD_DEBUG_SYS */
+
        return 0;
 }
 
index 82669510e941b508235a0e1377ab99060f500c75..3baf3d96d41afadb0faa46b76cabf6514d032c59 100755 (executable)
@@ -16,9 +16,6 @@
 
 
 #include <linux/ioport.h>
-#ifdef CONFIG_DEVFREQ_THERMAL
-#include <linux/devfreq_cooling.h>
-#endif
 #include <linux/thermal.h>
 #include <mali_kbase.h>
 #include <mali_kbase_defs.h>
@@ -68,74 +65,6 @@ struct kbase_pm_callback_conf pm_callbacks = {
        .power_resume_callback = NULL
 };
 
-#ifdef CONFIG_DEVFREQ_THERMAL
-
-#define FALLBACK_STATIC_TEMPERATURE 55000
-
-static unsigned long juno_model_static_power(unsigned long voltage)
-{
-       struct thermal_zone_device *tz;
-       unsigned long temperature, temp;
-       unsigned long temp_squared, temp_cubed, temp_scaling_factor;
-       const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10);
-       const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
-
-       tz = thermal_zone_get_zone_by_name("gpu");
-       if (IS_ERR(tz)) {
-               pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
-                               PTR_ERR(tz));
-               temperature = FALLBACK_STATIC_TEMPERATURE;
-       } else {
-               int ret;
-
-               ret = tz->ops->get_temp(tz, &temperature);
-               if (ret) {
-                       pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
-                                       ret);
-                       temperature = FALLBACK_STATIC_TEMPERATURE;
-               }
-       }
-
-       /* Calculate the temperature scaling factor. To be applied to the
-        * voltage scaled power.
-        */
-       temp = temperature / 1000;
-       temp_squared = temp * temp;
-       temp_cubed = temp_squared * temp;
-       temp_scaling_factor =
-                       (2 * temp_cubed)
-                       - (80 * temp_squared)
-                       + (4700 * temp)
-                       + 32000;
-
-       return (((coefficient * voltage_cubed) >> 20)
-                       * temp_scaling_factor)
-                               / 1000000;
-}
-
-static unsigned long juno_model_dynamic_power(unsigned long freq,
-               unsigned long voltage)
-{
-       /* The inputs: freq (f) is in Hz, and voltage (v) in mV.
-        * The coefficient (c) is in mW/(MHz mV mV).
-        *
-        * This function calculates the dynamic power after this formula:
-        * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
-        */
-       const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
-       const unsigned long f_mhz = freq / 1000000; /* MHz */
-       const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */
-
-       return (coefficient * v2 * f_mhz) / 1000000; /* mW */
-}
-
-struct devfreq_cooling_ops juno_model_ops = {
-       .get_static_power = juno_model_static_power,
-       .get_dynamic_power = juno_model_dynamic_power,
-};
-
-#endif /* CONFIG_DEVFREQ_THERMAL */
-
 /*
  * Juno Secure Mode integration
  */
index fa5e9e9a5b1127dffaeafed2175ca46e0d655c44..5fc6d9e1c312c666927f7a9daa3339c7d617f167 100755 (executable)
  */
 #define PLATFORM_FUNCS (NULL)
 
-/** Power model for IPA
- *
- * Attached value: pointer to @ref mali_pa_model_ops
- */
-#ifdef CONFIG_DEVFREQ_THERMAL
-#define POWER_MODEL_CALLBACKS (&juno_model_ops)
-#else
-#define POWER_MODEL_CALLBACKS (NULL)
-#endif
-
 /**
  * Secure mode switch
  *
index ac5060af6a7e68fc04c945ddab443b9ab89cfdb7..eb957d3b5731fdb5b8c68599417abd2b387d478c 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,7 +24,7 @@
  * Attached value: number in kHz
  * Default value: NA
  */
-#define GPU_FREQ_KHZ_MAX (5000)
+#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq()
 /**
  * Minimum frequency GPU will be clocked at. Given in kHz.
  * This must be specified as there is no default value.
  * Attached value: number in kHz
  * Default value: NA
  */
-#define GPU_FREQ_KHZ_MIN (5000)
-
-/**
- * Values used for determining the GPU frequency based on the LogicTile type
- * Used by the function kbase_get_platform_logic_tile_type
- */
-#define VE_VIRTEX6_GPU_FREQ_MIN 5000
-#define VE_VIRTEX6_GPU_FREQ_MAX 5000
-#define VE_VIRTEX7_GPU_FREQ_MIN 40000
-#define VE_VIRTEX7_GPU_FREQ_MAX 40000
+#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq()
 
 /**
  * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
  */
 #define PLATFORM_FUNCS (NULL)
 
-/** Power model for IPA
- *
- * Attached value: pointer to @ref mali_pa_model_ops
- */
-#define POWER_MODEL_CALLBACKS (NULL)
-
 /**
  * Secure mode switch
  *
index 9bc51f1e2da82d2637c00bf18977698fc0c6c13d..4665f98cbbe488a60d25bf7ccacc04c8b09c9040 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 
 #define IS_SINGLE_BIT_SET(val, pos) (val&(1<<pos))
 
+/**
+ * Values used for determining the GPU frequency based on the LogicTile type
+ * Used by the function kbase_get_platform_logic_tile_type
+ */
+#define VE_VIRTEX6_GPU_FREQ_MIN 5000
+#define VE_VIRTEX6_GPU_FREQ_MAX 5000
+#define VE_VIRTEX7_GPU_FREQ_MIN 40000
+#define VE_VIRTEX7_GPU_FREQ_MAX 40000
+#define VE_DEFAULT_GPU_FREQ_MIN 5000
+#define VE_DEFAULT_GPU_FREQ_MAX 5000
+
+
 #define CPU_CLOCK_SPEED_UNDEFINED (0)
 
 static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
@@ -197,14 +209,71 @@ syscfg_reg_map_failed:
        return err;
 }
 
-u32 kbase_get_platform_logic_tile_type(void)
+/**
+ * kbase_get_platform_logic_tile_type -  determines which LogicTile type
+ * is used by Versatile Express
+ *
+ * When platform_config build parameter is specified as vexpress, i.e.,
+ * platform_config=vexpress, GPU frequency may vary dependent on the
+ * particular platform. The GPU frequency depends on the LogicTile type.
+ *
+ * This function determines which LogicTile type is used by the platform by
+ * reading the HBI value of the daughterboard which holds the LogicTile:
+ *
+ * 0x217 HBI0217 Virtex-6
+ * 0x192 HBI0192 Virtex-5
+ * 0x247 HBI0247 Virtex-7
+ *
+ * Return: HBI value of the logic tile daughterboard, zero if not accessible
+ */
+static u32 kbase_get_platform_logic_tile_type(void)
 {
        void __iomem *syscfg_reg = NULL;
        u32 sys_procid1 = 0;
 
        syscfg_reg = ioremap(VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 + VE_SYS_PROC_ID1_OFFSET, 4);
-
-       sys_procid1 = (NULL != syscfg_reg) ? readl(syscfg_reg) : 0;
+       if (NULL != syscfg_reg) {
+               sys_procid1 = readl(syscfg_reg);
+               iounmap(syscfg_reg);
+       }
 
        return sys_procid1 & VE_LOGIC_TILE_HBI_MASK;
 }
+
+u32 kbase_get_platform_min_freq(void)
+{
+       u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+       switch (ve_logic_tile) {
+       case 0x217:
+               /* Virtex 6, HBI0217 */
+               return VE_VIRTEX6_GPU_FREQ_MIN;
+       case 0x247:
+               /* Virtex 7, HBI0247 */
+               return VE_VIRTEX7_GPU_FREQ_MIN;
+       default:
+               /* all other logic tiles, i.e., Virtex 5 HBI0192
+                * or unsuccessful reading from the platform -
+                * fall back to some default value */
+               return VE_DEFAULT_GPU_FREQ_MIN;
+       }
+}
+
+u32 kbase_get_platform_max_freq(void)
+{
+       u32 ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+       switch (ve_logic_tile) {
+       case 0x217:
+               /* Virtex 6, HBI0217 */
+               return VE_VIRTEX6_GPU_FREQ_MAX;
+       case 0x247:
+               /* Virtex 7, HBI0247 */
+               return VE_VIRTEX7_GPU_FREQ_MAX;
+       default:
+               /* all other logic tiles, i.e., Virtex 5 HBI0192
+                * or unsuccessful reading from the platform -
+                * fall back to some default value */
+               return VE_DEFAULT_GPU_FREQ_MAX;
+       }
+}
index ef9bfd7216189d85bd5a509e4a3e35784f699f53..da865698133a34d8fac98fce9a548eb2864fcd9d 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2013, 2015-2016 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
 int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
 
 /**
- * kbase_get_platform_logic_tile_type -  determines which LogicTile type 
- * is used by Versatile Express
- *
- * When platform_config build parameter is specified as vexpress, i.e.,
- * platform_config=vexpress, GPU frequency may vary dependent on the
- * particular platform. The GPU frequency depends on the LogicTile type.
- *
- * This function is called by kbase_common_device_init to determine
- * which LogicTile type is used by the platform by reading the HBI value
- * of the daughterboard which holds the LogicTile:
- *
- * 0x192 HBI0192 Virtex-5
- * 0x217 HBI0217 Virtex-6
- * 0x247 HBI0247 Virtex-7
- *
- * Return: HBI value of the logic tile daughterboard, zero if not accessible
+ * Get the minimum GPU frequency for the attached logic tile
+ */
+u32 kbase_get_platform_min_freq(void);
+
+/**
+ * Get the maximum GPU frequency for the attached logic tile
  */
-u32 kbase_get_platform_logic_tile_type(void);
+u32 kbase_get_platform_max_freq(void);
 
 #endif                         /* _KBASE_CPU_VEXPRESS_H_ */
index 11c320ecc1f75e81e38f5244ab3b5325785d8924..2b91d72bd93c08007dc314156e612ccb6a2fdd22 100755 (executable)
  */
 #define PLATFORM_FUNCS (NULL)
 
-/** Power model for IPA
- *
- * Attached value: pointer to @ref mali_pa_model_ops
- */
-#define POWER_MODEL_CALLBACKS (NULL)
-
 /**
  * Secure mode switch
  *
index 9bc2985fef0f0a91aa58b99f5f1301efad25dcf4..d269c259149074133fee2b21f3fd681ce2e0b346 100755 (executable)
  */
 #define PLATFORM_FUNCS (NULL)
 
-/** Power model for IPA
- *
- * Attached value: pointer to @ref mali_pa_model_ops
- */
-#define POWER_MODEL_CALLBACKS (NULL)
-
 /**
  * Secure mode switch
  *
index 7e41a438ac539edd8172a86e4c759bb703d71195..643b1f3f39e93ccd524bf1558aa53bb33bdb40c2 100755 (executable)
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -19,14 +19,12 @@ import re
 import sys
 Import('env')
 
+
 if Glob('tests/sconscript'):
        SConscript( 'tests/sconscript' )
 
 mock_test = 0
 
-if env['v'] != '1':
-       env['MAKECOMSTR'] = '[MAKE] ${SOURCE.dir}'
-
 # Fake platform is a transient solution for GPL drivers running in kernel that does not provide configuration via platform data.
 # For such kernels fake_platform_device should be set to 1. For kernels providing platform data fake_platform_device should be set to 0.
 if env['platform_config']=='devicetree':
@@ -129,6 +127,6 @@ if Glob('internal/sconsfrag'):
        execfile('internal/sconsfrag')
        get_internal(env)
 
-env.ProgTarget('kbase', cmd)
+env.KernelObjTarget('kbase', cmd)
 
 env.AppendUnique(BASE=['cutils_linked_list'])
index af92498784f30c75a98a99fde731baa109600e2e..6619c0795bd5b8ee26cd65dc48d03fb34c3fdd52 100755 (executable)
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -144,6 +144,8 @@ static int pl111_modeset_init(struct drm_device *dev)
 
        pl111_connector->connector.encoder = &pl111_encoder->encoder;
 
+       pl111_encoder->encoder.crtc = &priv->pl111_crtc->crtc;
+
        goto finish;
 
 out_config:
index c5011a7d07c12d13be6b9423467c999d0730400d..5c47de7ac84b0ca4a3ff4677b1a629c23dca9626 100755 (executable)
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2010-2013, 2015 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2013, 2015-2016 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -48,5 +48,5 @@ else:
 # need Module.symvers from drm.ko
 #drm_env.Depends('$STATIC_LIB_PATH/pl111_drm.ko', '$STATIC_LIB_PATH/drm.ko')
 
-env.ProgTarget('x11', cmd)
+drm_env.KernelObjTarget('x11', cmd)
 
index d030e7129eee51bd5f13023181df2fb8ea779fe8..a90fa898fc5940f12b3449cbb146219f803f86cc 100755 (executable)
@@ -15,7 +15,7 @@
 
 Import('env')
 
-if 'x11' in env['winsys']:
+if 'x11' in env['winsys'] or 'gbm' in env['winsys']:
        # pl111_drm isn't released so only try to build it if it's there
        if Glob('pl111/sconscript') and (env['platform_config'] == 'vexpress' or env['platform_config'] == 'vexpress_6xvirtex7_10mhz'):
                SConscript('pl111/sconscript')