x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout
authorDenys Vlasenko <dvlasenk@redhat.com>
Fri, 3 Apr 2015 19:49:13 +0000 (21:49 +0200)
committerIngo Molnar <mingo@kernel.org>
Wed, 8 Apr 2015 07:02:13 +0000 (09:02 +0200)
Interrupt entry points are handled with the following code,
each 32-byte code block contains seven entry points:

...
[push][jump 22] // 4 bytes
[push][jump 18] // 4 bytes
[push][jump 14] // 4 bytes
[push][jump 10] // 4 bytes
[push][jump  6] // 4 bytes
[push][jump  2] // 4 bytes
[push][jump common_interrupt][padding] // 8 bytes

[push][jump]
[push][jump]
[push][jump]
[push][jump]
[push][jump]
[push][jump]
[push][jump common_interrupt][padding]

[padding_2]
common_interrupt:

And there is a table which holds pointers to every entry point,
IOW: to every push.

In cold cache, two jumps are still costlier than one, even
though we get the benefit of them residing in the same
cacheline.

This change replaces short jumps with near ones to
'common_interrupt', and pads every push+jump pair to 8 bytes. This
way, each interrupt takes only one jump.

This change replaces ".p2align CONFIG_X86_L1_CACHE_SHIFT" before
dispatch table with ".align 8" - we do not need anything
stronger than that.

The table of entry addresses (the interrupt[] array) is no
longer necessary, the address of entries can be easily
calculated as (irq_entries_start + i*8).

   text    data     bss     dec     hex filename
  12546       0       0   12546    3102 entry_64.o.before
  11626       0       0   11626    2d6a entry_64.o

The size decrease is because 1656 bytes of .init.rodata are
gone. That's initdata, though. The resident size does go up a
bit.

Run-tested (32 and 64 bits).

Acked-and-Tested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1428090553-7283-1-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/include/asm/hw_irq.h
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/irqinit.c
arch/x86/lguest/boot.c

index 9662290e0b2075ab42608af776abbe4a4219b6fd..e9571ddabc4feb821ae04d47c9d6c3b509178344 100644 (file)
@@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
 extern __visible void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
-extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR
-                                   - FIRST_EXTERNAL_VECTOR])(void);
+extern char irq_entries_start[];
 #ifdef CONFIG_TRACING
-#define trace_interrupt interrupt
+#define trace_irq_entries_start irq_entries_start
 #endif
 
 #define VECTOR_UNDEFINED       (-1)
index effa2793febadd05ee7be3b4ae622c2a2d9881e4..02bec0f1d1e19d8acf93089ee30adea9721c64c5 100644 (file)
@@ -723,43 +723,22 @@ END(sysenter_badsys)
 .endm
 
 /*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
  */
-.section .init.rodata,"a"
-ENTRY(interrupt)
-.section .entry.text, "ax"
-       .p2align 5
-       .p2align CONFIG_X86_L1_CACHE_SHIFT
+       .align 8
 ENTRY(irq_entries_start)
        RING0_INT_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
-       .balign 32
-  .rept        7
-    .if vector < FIRST_SYSTEM_VECTOR
-      .if vector <> FIRST_EXTERNAL_VECTOR
+    vector=FIRST_EXTERNAL_VECTOR
+    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+       pushl_cfi $(~vector+0x80)       /* Note: always in signed byte range */
+    vector=vector+1
+       jmp     common_interrupt
        CFI_ADJUST_CFA_OFFSET -4
-      .endif
-1:     pushl_cfi $(~vector+0x80)       /* Note: always in signed byte range */
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
-       jmp 2f
-      .endif
-      .previous
-       .long 1b
-      .section .entry.text, "ax"
-vector=vector+1
-    .endif
-  .endr
-2:     jmp common_interrupt
-.endr
+       .align  8
+    .endr
 END(irq_entries_start)
 
-.previous
-END(interrupt)
-.previous
-
 /*
  * the CPU automatically disables interrupts when executing an IRQ vector,
  * so IRQ-flags tracing has to follow that:
index e4c810395bae95fa6c15af9c02bf927cb7812af3..4ca03c518ab48551c857cd0c72ac65222823402b 100644 (file)
@@ -608,44 +608,23 @@ ENTRY(ret_from_fork)
 END(ret_from_fork)
 
 /*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
  */
-       .section .init.rodata,"a"
-ENTRY(interrupt)
-       .section .entry.text
-       .p2align 5
-       .p2align CONFIG_X86_L1_CACHE_SHIFT
+       .align 8
 ENTRY(irq_entries_start)
        INTR_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
-       .balign 32
-  .rept        7
-    .if vector < FIRST_SYSTEM_VECTOR
-      .if vector <> FIRST_EXTERNAL_VECTOR
+    vector=FIRST_EXTERNAL_VECTOR
+    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+       pushq_cfi $(~vector+0x80)       /* Note: always in signed byte range */
+    vector=vector+1
+       jmp     common_interrupt
        CFI_ADJUST_CFA_OFFSET -8
-      .endif
-1:     pushq_cfi $(~vector+0x80)       /* Note: always in signed byte range */
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
-       jmp 2f
-      .endif
-      .previous
-       .quad 1b
-      .section .entry.text
-vector=vector+1
-    .endif
-  .endr
-2:     jmp common_interrupt
-.endr
+       .align  8
+    .endr
        CFI_ENDPROC
 END(irq_entries_start)
 
-.previous
-END(interrupt)
-.previous
-
 /*
  * Interrupt entry/exit.
  *
index 70e181ea1eac1f2da444482e6714e61b52d5a19e..cd10a64372647c3579ba6717db49c6cd63c6353a 100644 (file)
@@ -178,7 +178,8 @@ void __init native_init_IRQ(void)
 #endif
        for_each_clear_bit_from(i, used_vectors, first_system_vector) {
                /* IA32_SYSCALL_VECTOR could be used in trap_init already. */
-               set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+               set_intr_gate(i, irq_entries_start +
+                               8 * (i - FIRST_EXTERNAL_VECTOR));
        }
 #ifdef CONFIG_X86_LOCAL_APIC
        for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
index 8561585ee2c6a7b70fe9bcbfb66774b38ebcae14..717908b16037d45957a0ec69b94c8c7d396bfaa6 100644 (file)
@@ -868,7 +868,8 @@ static void __init lguest_init_IRQ(void)
                /* Some systems map "vectors" to interrupts weirdly.  Not us! */
                __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
                if (i != SYSCALL_VECTOR)
-                       set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+                       set_intr_gate(i, irq_entries_start +
+                                       8 * (i - FIRST_EXTERNAL_VECTOR));
        }
 
        /*