sparc64: remove mm_cpumask clearing to fix kthread_use_mm race

author Nicholas Piggin <npiggin@gmail.com>

Mon, 14 Sep 2020 04:52:18 +0000 (14:52 +1000)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 5 Nov 2020 10:06:53 +0000 (11:06 +0100)
author Nicholas Piggin <npiggin@gmail.com>
Mon, 14 Sep 2020 04:52:18 +0000 (14:52 +1000)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 5 Nov 2020 10:06:53 +0000 (11:06 +0100)
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c

index c50182cd2f64684de47d7408c122677430137be7..98825058e1df0f1fe81555dc96ed8e41daa74e91 100644 (file)
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1039,38 +1039,9 @@ void smp_fetch_global_pmu(void)
   * are flush_tlb_*() routines, and these run after flush_cache_*()
   * which performs the flushw.
   *
- * The SMP TLB coherency scheme we use works as follows:
- *
- * 1) mm->cpu_vm_mask is a bit mask of which cpus an address
- *    space has (potentially) executed on, this is the heuristic
- *    we use to avoid doing cross calls.
- *
- *    Also, for flushing from kswapd and also for clones, we
- *    use cpu_vm_mask as the list of cpus to make run the TLB.
- *
- * 2) TLB context numbers are shared globally across all processors
- *    in the system, this allows us to play several games to avoid
- *    cross calls.
- *
- *    One invariant is that when a cpu switches to a process, and
- *    that processes tsk->active_mm->cpu_vm_mask does not have the
- *    current cpu's bit set, that tlb context is flushed locally.
- *
- *    If the address space is non-shared (ie. mm->count == 1) we avoid
- *    cross calls when we want to flush the currently running process's
- *    tlb state.  This is done by clearing all cpu bits except the current
- *    processor's in current->mm->cpu_vm_mask and performing the
- *    flush locally only.  This will force any subsequent cpus which run
- *    this task to flush the context from the local tlb if the process
- *    migrates to another cpu (again).
- *
- * 3) For shared address spaces (threads) and swapping we bite the
- *    bullet for most cases and perform the cross call (but only to
- *    the cpus listed in cpu_vm_mask).
- *
- *    The performance gain from "optimizing" away the cross call for threads is
- *    questionable (in theory the big win for threads is the massive sharing of
- *    address space state across processors).
+ * mm->cpu_vm_mask is a bit mask of which cpus an address
+ * space has (potentially) executed on, this is the heuristic
+ * we use to limit cross calls.
   */
  
  /* This currently is only used by the hugetlb arch pre-fault
@@ -1080,18 +1051,13 @@ void smp_fetch_global_pmu(void)
  void smp_flush_tlb_mm(struct mm_struct *mm)
  {
         u32 ctx = CTX_HWBITS(mm->context);
-       int cpu = get_cpu();
  
-       if (atomic_read(&mm->mm_users) == 1) {
-               cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
-               goto local_flush_and_out;
-       }
+       get_cpu();
  
         smp_cross_call_masked(&xcall_flush_tlb_mm,
                               ctx, 0, 0,
                               mm_cpumask(mm));
  
-local_flush_and_out:
         __flush_tlb_mm(ctx, SECONDARY_CONTEXT);
  
         put_cpu();
@@ -1114,17 +1080,15 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long
  {
         u32 ctx = CTX_HWBITS(mm->context);
         struct tlb_pending_info info;
-       int cpu = get_cpu();
+
+       get_cpu();
  
         info.ctx = ctx;
         info.nr = nr;
         info.vaddrs = vaddrs;
  
-       if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
-               cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
-       else
-               smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
-                                      &info, 1);
+       smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
+                              &info, 1);
  
         __flush_tlb_pending(ctx, nr, vaddrs);
  
@@ -1134,14 +1098,13 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long
  void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr)
  {
         unsigned long context = CTX_HWBITS(mm->context);
-       int cpu = get_cpu();
  
-       if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
-               cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
-       else
-               smp_cross_call_masked(&xcall_flush_tlb_page,
-                                     context, vaddr, 0,
-                                     mm_cpumask(mm));
+       get_cpu();
+
+       smp_cross_call_masked(&xcall_flush_tlb_page,
+                             context, vaddr, 0,
+                             mm_cpumask(mm));
+
         __flush_tlb_page(context, vaddr);
  
         put_cpu();
author	Nicholas Piggin <npiggin@gmail.com>
	Mon, 14 Sep 2020 04:52:18 +0000 (14:52 +1000)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 5 Nov 2020 10:06:53 +0000 (11:06 +0100)