iommu/arm-smmu: Set SMTNMB_TLBEN in ACR to enable caching of bypass entries
authorNipun Gupta <nipun.gupta@nxp.com>
Fri, 4 Nov 2016 09:55:23 +0000 (15:25 +0530)
committerWill Deacon <will.deacon@arm.com>
Tue, 29 Nov 2016 15:57:41 +0000 (15:57 +0000)
The SMTNMB_TLBEN in the Auxiliary Configuration Register (ACR) provides an
option to enable the updation of TLB in case of bypass transactions due to
no stream match in the stream match table. This reduces the latencies of
the subsequent transactions with the same stream-id which bypasses the SMMU.
This provides a significant performance benefit for certain networking
workloads.

With this change substantial performance improvement of ~9% is observed with
DPDK l3fwd application (http://dpdk.org/doc/guides/sample_app_ug/l3_forward.html)
on NXP's LS2088a platform.

Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Nipun Gupta <nipun.gupta@nxp.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
drivers/iommu/arm-smmu.c

index 2f15ffb6cb0c490c705119ddc10651c10ebb36c9..79d54a6f7f9a3f04f771ecc07e053858b506ad0d 100644 (file)
@@ -247,6 +247,7 @@ enum arm_smmu_s2cr_privcfg {
 #define ARM_MMU500_ACTLR_CPRE          (1 << 1)
 
 #define ARM_MMU500_ACR_CACHE_LOCK      (1 << 26)
+#define ARM_MMU500_ACR_SMTNMB_TLBEN    (1 << 8)
 
 #define CB_PAR_F                       (1 << 0)
 
@@ -1581,16 +1582,22 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
        for (i = 0; i < smmu->num_mapping_groups; ++i)
                arm_smmu_write_sme(smmu, i);
 
-       /*
-        * Before clearing ARM_MMU500_ACTLR_CPRE, need to
-        * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
-        * bit is only present in MMU-500r2 onwards.
-        */
-       reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
-       major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
-       if ((smmu->model == ARM_MMU500) && (major >= 2)) {
+       if (smmu->model == ARM_MMU500) {
+               /*
+                * Before clearing ARM_MMU500_ACTLR_CPRE, need to
+                * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
+                * bit is only present in MMU-500r2 onwards.
+                */
+               reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
+               major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
                reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
-               reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
+               if (major >= 2)
+                       reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
+               /*
+                * Allow unmatched Stream IDs to allocate bypass
+                * TLB entries for reduced latency.
+                */
+               reg |= ARM_MMU500_ACR_SMTNMB_TLBEN;
                writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
        }