MIPS: Loongson-3: IRQ balancing for PCI devices
authorHuacai Chen <chenhc@lemote.com>
Thu, 22 Jun 2017 15:06:52 +0000 (23:06 +0800)
committerRalf Baechle <ralf@linux-mips.org>
Thu, 29 Jun 2017 00:42:23 +0000 (02:42 +0200)
IRQ0 (HPET), IRQ1 (Keyboard), IRQ2 (Cascade), IRQ7 (SCI), IRQ8 (RTC)
and IRQ12 (Mouse) are handled by core-0 locally. Other PCI IRQs (3, 4,
5, 6, 14, 15) are balanced by all cores from Node-0. This can improve
I/O performance significantly.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@cavium.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16589/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
arch/mips/loongson64/loongson-3/irq.c
arch/mips/loongson64/loongson-3/smp.c

index 548f759454dce494de9db809a74f56821133efb9..2e6e205ed9b9e9930b6d222079f59eeb5b5d7d4a 100644 (file)
@@ -9,17 +9,32 @@
 
 #include "smp.h"
 
+extern void loongson3_send_irq_by_ipi(int cpu, int irqs);
 unsigned int ht_irq[] = {0, 1, 3, 4, 5, 6, 7, 8, 12, 14, 15};
+unsigned int local_irq = 1<<0 | 1<<1 | 1<<2 | 1<<7 | 1<<8 | 1<<12;
 
 static void ht_irqdispatch(void)
 {
-       unsigned int i, irq;
+       unsigned int i, irq, irq0, irq1;
+       static unsigned int dest_cpu = 0;
 
        irq = LOONGSON_HT1_INT_VECTOR(0);
        LOONGSON_HT1_INT_VECTOR(0) = irq; /* Acknowledge the IRQs */
 
+       irq0 = irq & local_irq;  /* handled by local core */
+       irq1 = irq & ~local_irq; /* balanced by other cores */
+
+       if (dest_cpu == 0 || !cpu_online(dest_cpu))
+               irq0 |= irq1;
+       else
+               loongson3_send_irq_by_ipi(dest_cpu, irq1);
+
+       dest_cpu = dest_cpu + 1;
+       if (dest_cpu >= num_possible_cpus() || cpu_data[dest_cpu].package > 0)
+               dest_cpu = 0;
+
        for (i = 0; i < ARRAY_SIZE(ht_irq); i++) {
-               if (irq & (0x1 << ht_irq[i]))
+               if (irq0 & (0x1 << ht_irq[i]))
                        do_IRQ(ht_irq[i]);
        }
 }
index 1629743ba96aaac9e15f5b2714f8813ae95a1d5c..b7a355c3c40813b45aa29940471bfa7148aa643a 100644 (file)
@@ -254,13 +254,21 @@ loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action)
                loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu_logical_map(i)]);
 }
 
+#define IPI_IRQ_OFFSET 6
+
+void loongson3_send_irq_by_ipi(int cpu, int irqs)
+{
+       loongson3_ipi_write32(irqs << IPI_IRQ_OFFSET, ipi_set0_regs[cpu_logical_map(cpu)]);
+}
+
 void loongson3_ipi_interrupt(struct pt_regs *regs)
 {
        int i, cpu = smp_processor_id();
-       unsigned int action, c0count;
+       unsigned int action, c0count, irqs;
 
        /* Load the ipi register to figure out what we're supposed to do */
        action = loongson3_ipi_read32(ipi_status0_regs[cpu_logical_map(cpu)]);
+       irqs = action >> IPI_IRQ_OFFSET;
 
        /* Clear the ipi register to clear the interrupt */
        loongson3_ipi_write32((u32)action, ipi_clear0_regs[cpu_logical_map(cpu)]);
@@ -282,6 +290,14 @@ void loongson3_ipi_interrupt(struct pt_regs *regs)
                        core0_c0count[i] = c0count;
                __wbflush(); /* Let others see the result ASAP */
        }
+
+       if (irqs) {
+               int irq;
+               while ((irq = ffs(irqs))) {
+                       do_IRQ(irq-1);
+                       irqs &= ~(1<<(irq-1));
+               }
+       }
 }
 
 #define MAX_LOOPS 800