x86: voluntary leave_mm before entering ACPI C3
authorVenki Pallipadi <venkatesh.pallipadi@intel.com>
Wed, 30 Jan 2008 12:32:01 +0000 (13:32 +0100)
committerIngo Molnar <mingo@elte.hu>
Wed, 30 Jan 2008 12:32:01 +0000 (13:32 +0100)
Aviod TLB flush IPIs during C3 states by voluntary leave_mm()
before entering C3.

The performance impact of TLB flush on C3 should not be significant with
respect to C3 wakeup latency. Also, CPUs tend to flush TLB in hardware while in
C3 anyways.

On a 8 logical CPU system, running make -j2, the number of tlbflush IPIs goes
down from 40 per second to ~ 0. Total number of interrupts during the run
of this workload was ~1200 per second, which makes it ~3% savings in wakeups.

There was no measurable performance or power impact however.

[ akpm@linux-foundation.org: symbol export fixes. ]

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
arch/x86/kernel/smp_32.c
arch/x86/kernel/smp_64.c
drivers/acpi/processor_idle.c
include/asm-ia64/acpi.h
include/asm-x86/acpi.h
include/asm-x86/mmu.h
include/asm-x86/mmu_context_32.h

index 070816ac79e101c93846011dcbc584282870029f..dc0cde9d16fb38088587b664f02b91f878176628 100644 (file)
@@ -256,13 +256,14 @@ static DEFINE_SPINLOCK(tlbstate_lock);
  * We need to reload %cr3 since the page tables may be going
  * away from under us..
  */
-void leave_mm(unsigned long cpu)
+void leave_mm(int cpu)
 {
        if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
                BUG();
        cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
        load_cr3(swapper_pg_dir);
 }
+EXPORT_SYMBOL_GPL(leave_mm);
 
 /*
  *
index 02a6533e8909e153e18b56609747733853883955..2fd74b06db67093cc3b7789534f30d81b6912346 100644 (file)
@@ -69,13 +69,14 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state);
  * We cannot call mmdrop() because we are in interrupt context,
  * instead update mm->cpu_vm_mask.
  */
-static inline void leave_mm(int cpu)
+void leave_mm(int cpu)
 {
        if (read_pda(mmu_state) == TLBSTATE_OK)
                BUG();
        cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
        load_cr3(swapper_pg_dir);
 }
+EXPORT_SYMBOL_GPL(leave_mm);
 
 /*
  *
index 2235f4e02d26f46267b512a0d3c4c14badc34526..0721a8183c89bef438ab3e64b6f5fa3af9fdb054 100644 (file)
@@ -534,6 +534,7 @@ static void acpi_processor_idle(void)
                break;
 
        case ACPI_STATE_C3:
+               acpi_unlazy_tlb(smp_processor_id());
                /*
                 * Must be done before busmaster disable as we might
                 * need to access HPET !
@@ -1423,6 +1424,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
                return 0;
        }
 
+       acpi_unlazy_tlb(smp_processor_id());
        /*
         * Must be done before busmaster disable as we might need to
         * access HPET !
index 81bcd5e517898af73ac5b096c8e1eed682968f6d..cd1cc39b5599b639441bbb00b050d40675889a2e 100644 (file)
@@ -127,6 +127,8 @@ extern int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS];
 extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
 #endif
 
+#define acpi_unlazy_tlb(x)
+
 #endif /*__KERNEL__*/
 
 #endif /*_ASM_ACPI_H*/
index 2feb0c494be75b4b053c81f9b43de9dcc19e365a..98a9ca266531e5c823f125d72f5445f7474d4a5d 100644 (file)
@@ -27,6 +27,7 @@
 
 #include <asm/numa.h>
 #include <asm/processor.h>
+#include <asm/mmu.h>
 
 #define COMPILER_DEPENDENT_INT64   long long
 #define COMPILER_DEPENDENT_UINT64  unsigned long long
@@ -167,4 +168,6 @@ static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
 }
 #endif
 
+#define acpi_unlazy_tlb(x)     leave_mm(x)
+
 #endif /*__X86_ASM_ACPI_H*/
index 3f922c8e1c881fcba9f779c75648d54225f15481..efa962c388975c916e3dec785e4b09669e764d42 100644 (file)
@@ -20,4 +20,12 @@ typedef struct {
        void *vdso;
 } mm_context_t;
 
+#ifdef CONFIG_SMP
+void leave_mm(int cpu);
+#else
+static inline void leave_mm(int cpu)
+{
+}
+#endif
+
 #endif /* _ASM_X86_MMU_H */
index 7eb0b0b1fb3c3e24899eaa3ba0fa378857f77e62..8198d1cca1f31264dc6b0ee4da90cdd0c13cd323 100644 (file)
@@ -32,8 +32,6 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 #endif
 }
 
-void leave_mm(unsigned long cpu);
-
 static inline void switch_mm(struct mm_struct *prev,
                             struct mm_struct *next,
                             struct task_struct *tsk)