commit
f3c5c1bfd4 (netfilter: xtables: make ip_tables reentrant)
introduced a performance regression, because stackptr array is shared by
all cpus, adding cache line ping pongs. (16 cpus share a 64 bytes cache
line)
Fix this using alloc_percpu()
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-By: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
* @stacksize jumps (number of user chains) can possibly be made.
*/
unsigned int stacksize;
- unsigned int *stackptr;
+ unsigned int __percpu *stackptr;
void ***jumpstack;
/* ipt_entry tables: one per CPU */
/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
cpu = smp_processor_id();
table_base = private->entries[cpu];
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
- stackptr = &private->stackptr[cpu];
+ stackptr = per_cpu_ptr(private->stackptr, cpu);
origptr = *stackptr;
e = get_entry(table_base, private->hook_entry[hook]);
cpu = smp_processor_id();
table_base = private->entries[cpu];
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
- stackptr = &private->stackptr[cpu];
+ stackptr = per_cpu_ptr(private->stackptr, cpu);
origptr = *stackptr;
e = get_entry(table_base, private->hook_entry[hook]);
vfree(info->jumpstack);
else
kfree(info->jumpstack);
- if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE)
- vfree(info->stackptr);
- else
- kfree(info->stackptr);
+
+ free_percpu(info->stackptr);
kfree(info);
}
unsigned int size;
int cpu;
- size = sizeof(unsigned int) * nr_cpu_ids;
- if (size > PAGE_SIZE)
- i->stackptr = vmalloc(size);
- else
- i->stackptr = kmalloc(size, GFP_KERNEL);
+ i->stackptr = alloc_percpu(unsigned int);
if (i->stackptr == NULL)
return -ENOMEM;
- memset(i->stackptr, 0, size);
size = sizeof(void **) * nr_cpu_ids;
if (size > PAGE_SIZE)