x86: add x2apic_wrmsr_fence() to x2apic flush tlb paths
authorSuresh Siddha <suresh.b.siddha@intel.com>
Tue, 17 Mar 2009 18:16:54 +0000 (10:16 -0800)
committerIngo Molnar <mingo@elte.hu>
Wed, 18 Mar 2009 08:36:14 +0000 (09:36 +0100)
Impact: optimize APIC IPI related barriers

Uncached MMIO accesses for xapic are inherently serializing and hence
we don't need explicit barriers for xapic IPI paths.

x2apic MSR writes/reads don't have serializing semantics and hence need
a serializing instruction or mfence, to make all the previous memory
stores globally visisble before the x2apic msr write for IPI.

Add x2apic_wrmsr_fence() in flush tlb path to x2apic specific paths.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: "steiner@sgi.com" <steiner@sgi.com>
Cc: Nick Piggin <npiggin@suse.de>
LKML-Reference: <1237313814.27006.203.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/include/asm/apic.h
arch/x86/kernel/apic/x2apic_cluster.c
arch/x86/kernel/apic/x2apic_phys.c
arch/x86/mm/tlb.c

index 6d5b6f0900e12c31a40ed5d37d1cd7eb745499f9..00f5962d82d0f5379ca04f3708c3951dadd80987 100644 (file)
@@ -108,6 +108,16 @@ extern void native_apic_icr_write(u32 low, u32 id);
 extern u64 native_apic_icr_read(void);
 
 #ifdef CONFIG_X86_X2APIC
+/*
+ * Make previous memory operations globally visible before
+ * sending the IPI through x2apic wrmsr. We need a serializing instruction or
+ * mfence for this.
+ */
+static inline void x2apic_wrmsr_fence(void)
+{
+       asm volatile("mfence" : : : "memory");
+}
+
 static inline void native_apic_msr_write(u32 reg, u32 v)
 {
        if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
index 8fb87b6dd63330c193890ac3ae5d704eda096ba2..4a903e2f0d179d68d6a30f5afe7c1b0543a45870 100644 (file)
@@ -57,6 +57,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
        unsigned long query_cpu;
        unsigned long flags;
 
+       x2apic_wrmsr_fence();
+
        local_irq_save(flags);
        for_each_cpu(query_cpu, mask) {
                __x2apic_send_IPI_dest(
@@ -73,6 +75,8 @@ static void
        unsigned long query_cpu;
        unsigned long flags;
 
+       x2apic_wrmsr_fence();
+
        local_irq_save(flags);
        for_each_cpu(query_cpu, mask) {
                if (query_cpu == this_cpu)
@@ -90,6 +94,8 @@ static void x2apic_send_IPI_allbutself(int vector)
        unsigned long query_cpu;
        unsigned long flags;
 
+       x2apic_wrmsr_fence();
+
        local_irq_save(flags);
        for_each_online_cpu(query_cpu) {
                if (query_cpu == this_cpu)
index 23625b9f98b28530657ca7d572cf4b7a4a60a378..a284359627e7fe23ed8a84ab00be47e0a24ae9bf 100644 (file)
@@ -58,6 +58,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
        unsigned long query_cpu;
        unsigned long flags;
 
+       x2apic_wrmsr_fence();
+
        local_irq_save(flags);
        for_each_cpu(query_cpu, mask) {
                __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
@@ -73,6 +75,8 @@ static void
        unsigned long query_cpu;
        unsigned long flags;
 
+       x2apic_wrmsr_fence();
+
        local_irq_save(flags);
        for_each_cpu(query_cpu, mask) {
                if (query_cpu != this_cpu)
@@ -89,6 +93,8 @@ static void x2apic_send_IPI_allbutself(int vector)
        unsigned long query_cpu;
        unsigned long flags;
 
+       x2apic_wrmsr_fence();
+
        local_irq_save(flags);
        for_each_online_cpu(query_cpu) {
                if (query_cpu == this_cpu)
index a654d59e448327e640d239698625afaf9944d34e..821e97017e954a8a8e74a607950c583061412d7a 100644 (file)
@@ -186,11 +186,6 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
        cpumask_andnot(to_cpumask(f->flush_cpumask),
                       cpumask, cpumask_of(smp_processor_id()));
 
-       /*
-        * Make the above memory operations globally visible before
-        * sending the IPI.
-        */
-       smp_mb();
        /*
         * We have to send the IPI only to
         * CPUs affected.