#ifndef _NET_DST_OPS_H
#define _NET_DST_OPS_H
#include <linux/types.h>
+#include <linux/percpu_counter.h>
struct dst_entry;
struct kmem_cachep;
void (*update_pmtu)(struct dst_entry *dst, u32 mtu);
int (*local_out)(struct sk_buff *skb);
- atomic_t entries;
struct kmem_cache *kmem_cachep;
+
+ struct percpu_counter pcpuc_entries ____cacheline_aligned_in_smp;
};
+
+static inline int dst_entries_get_fast(struct dst_ops *dst)
+{
+ return percpu_counter_read_positive(&dst->pcpuc_entries);
+}
+
+static inline int dst_entries_get_slow(struct dst_ops *dst)
+{
+ int res;
+
+ local_bh_disable();
+ res = percpu_counter_sum_positive(&dst->pcpuc_entries);
+ local_bh_enable();
+ return res;
+}
+
+static inline void dst_entries_add(struct dst_ops *dst, int val)
+{
+ local_bh_disable();
+ percpu_counter_add(&dst->pcpuc_entries, val);
+ local_bh_enable();
+}
+
+static inline int dst_entries_init(struct dst_ops *dst)
+{
+ return percpu_counter_init(&dst->pcpuc_entries, 0);
+}
+
+static inline void dst_entries_destroy(struct dst_ops *dst)
+{
+ percpu_counter_destroy(&dst->pcpuc_entries);
+}
+
#endif
.family = AF_INET,
.protocol = cpu_to_be16(ETH_P_IP),
.update_pmtu = fake_update_pmtu,
- .entries = ATOMIC_INIT(0),
};
/*
{
int ret;
- ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+ ret = dst_entries_init(&fake_dst_ops);
if (ret < 0)
return ret;
+
+ ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+ if (ret < 0) {
+ dst_entries_destroy(&fake_dst_ops);
+ return ret;
+ }
#ifdef CONFIG_SYSCTL
brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table);
if (brnf_sysctl_header == NULL) {
printk(KERN_WARNING
"br_netfilter: can't register to sysctl.\n");
nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+ dst_entries_destroy(&fake_dst_ops);
return -ENOMEM;
}
#endif
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(brnf_sysctl_header);
#endif
+ dst_entries_destroy(&fake_dst_ops);
}
{
struct dst_entry *dst;
- if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
+ if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
if (ops->gc(ops))
return NULL;
}
#if RT_CACHE_DEBUG >= 2
atomic_inc(&dst_total);
#endif
- atomic_inc(&ops->entries);
+ dst_entries_add(ops, 1);
return dst;
}
EXPORT_SYMBOL(dst_alloc);
neigh_release(neigh);
}
- atomic_dec(&dst->ops->entries);
+ dst_entries_add(dst->ops, -1);
if (dst->ops->destroy)
dst->ops->destroy(dst);
.negative_advice = dn_dst_negative_advice,
.link_failure = dn_dst_link_failure,
.update_pmtu = dn_dst_update_pmtu,
- .entries = ATOMIC_INIT(0),
};
static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
dn_dst_ops.kmem_cachep =
kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+ dst_entries_init(&dn_dst_ops);
setup_timer(&dn_route_timer, dn_dst_check_expire, 0);
dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
add_timer(&dn_route_timer);
dn_run_flush(0);
proc_net_remove(&init_net, "decnet_cache");
+ dst_entries_destroy(&dn_dst_ops);
}
.link_failure = ipv4_link_failure,
.update_pmtu = ip_rt_update_pmtu,
.local_out = __ip_local_out,
- .entries = ATOMIC_INIT(0),
};
#define ECN_OR_COST(class) TC_PRIO_##class
seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
" %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
- atomic_read(&ipv4_dst_ops.entries),
+ dst_entries_get_slow(&ipv4_dst_ops),
st->in_hit,
st->in_slow_tot,
st->in_slow_mc,
struct rtable *rth, **rthp;
unsigned long now = jiffies;
int goal;
+ int entries = dst_entries_get_fast(&ipv4_dst_ops);
/*
* Garbage collection is pretty expensive,
RT_CACHE_STAT_INC(gc_total);
if (now - last_gc < ip_rt_gc_min_interval &&
- atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) {
+ entries < ip_rt_max_size) {
RT_CACHE_STAT_INC(gc_ignored);
goto out;
}
+ entries = dst_entries_get_slow(&ipv4_dst_ops);
/* Calculate number of entries, which we want to expire now. */
- goal = atomic_read(&ipv4_dst_ops.entries) -
- (ip_rt_gc_elasticity << rt_hash_log);
+ goal = entries - (ip_rt_gc_elasticity << rt_hash_log);
if (goal <= 0) {
if (equilibrium < ipv4_dst_ops.gc_thresh)
equilibrium = ipv4_dst_ops.gc_thresh;
- goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
+ goal = entries - equilibrium;
if (goal > 0) {
equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1);
- goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
+ goal = entries - equilibrium;
}
} else {
/* We are in dangerous area. Try to reduce cache really
* aggressively.
*/
goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1);
- equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal;
+ equilibrium = entries - goal;
}
if (now - last_gc >= ip_rt_gc_min_interval)
expire >>= 1;
#if RT_CACHE_DEBUG >= 2
printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
- atomic_read(&ipv4_dst_ops.entries), goal, i);
+ dst_entries_get_fast(&ipv4_dst_ops), goal, i);
#endif
- if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
+ if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
goto out;
} while (!in_softirq() && time_before_eq(jiffies, now));
- if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
+ if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
+ goto out;
+ if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size)
goto out;
if (net_ratelimit())
printk(KERN_WARNING "dst cache overflow\n");
work_done:
expire += ip_rt_gc_min_interval;
if (expire > ip_rt_gc_timeout ||
- atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh)
+ dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh ||
+ dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh)
expire = ip_rt_gc_timeout;
#if RT_CACHE_DEBUG >= 2
printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
- atomic_read(&ipv4_dst_ops.entries), goal, rover);
+ dst_entries_get_fast(&ipv4_dst_ops), goal, rover);
#endif
out: return 0;
}
.destroy = ipv4_dst_destroy,
.check = ipv4_blackhole_dst_check,
.update_pmtu = ipv4_rt_blackhole_update_pmtu,
- .entries = ATOMIC_INIT(0),
};
ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
+ if (dst_entries_init(&ipv4_dst_ops) < 0)
+ panic("IP: failed to allocate ipv4_dst_ops counter\n");
+
+ if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
+ panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
+
rt_hash_table = (struct rt_hash_bucket *)
alloc_large_system_hash("IP route cache",
sizeof(struct rt_hash_bucket),
struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
xfrm4_policy_afinfo.garbage_collect(net);
- return (atomic_read(&ops->entries) > ops->gc_thresh * 2);
+ return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
}
static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
.ifdown = xfrm4_dst_ifdown,
.local_out = __ip_local_out,
.gc_thresh = 1024,
- .entries = ATOMIC_INIT(0),
};
static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
* and start cleaning when were 1/2 full
*/
xfrm4_dst_ops.gc_thresh = rt_max_size/2;
+ dst_entries_init(&xfrm4_dst_ops);
xfrm4_state_init();
xfrm4_policy_init();
.link_failure = ip6_link_failure,
.update_pmtu = ip6_rt_update_pmtu,
.local_out = __ip6_local_out,
- .entries = ATOMIC_INIT(0),
};
static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
.destroy = ip6_dst_destroy,
.check = ip6_dst_check,
.update_pmtu = ip6_rt_blackhole_update_pmtu,
- .entries = ATOMIC_INIT(0),
};
static struct rt6_info ip6_null_entry_template = {
int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
+ int entries;
+ entries = dst_entries_get_fast(ops);
if (time_after(rt_last_gc + rt_min_interval, now) &&
- atomic_read(&ops->entries) <= rt_max_size)
+ entries <= rt_max_size)
goto out;
net->ipv6.ip6_rt_gc_expire++;
fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
net->ipv6.ip6_rt_last_gc = now;
- if (atomic_read(&ops->entries) < ops->gc_thresh)
+ entries = dst_entries_get_slow(ops);
+ if (entries < ops->gc_thresh)
net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
out:
net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
- return atomic_read(&ops->entries) > rt_max_size;
+ return entries > rt_max_size;
}
/* Clean host part of a prefix. Not necessary in radix tree,
net->ipv6.rt6_stats->fib_rt_alloc,
net->ipv6.rt6_stats->fib_rt_entries,
net->ipv6.rt6_stats->fib_rt_cache,
- atomic_read(&net->ipv6.ip6_dst_ops.entries),
+ dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
net->ipv6.rt6_stats->fib_discarded_routes);
return 0;
memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
sizeof(net->ipv6.ip6_dst_ops));
+ if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
+ goto out_ip6_dst_ops;
+
net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
sizeof(*net->ipv6.ip6_null_entry),
GFP_KERNEL);
if (!net->ipv6.ip6_null_entry)
- goto out_ip6_dst_ops;
+ goto out_ip6_dst_entries;
net->ipv6.ip6_null_entry->dst.path =
(struct dst_entry *)net->ipv6.ip6_null_entry;
net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
out_ip6_null_entry:
kfree(net->ipv6.ip6_null_entry);
#endif
+out_ip6_dst_entries:
+ dst_entries_destroy(&net->ipv6.ip6_dst_ops);
out_ip6_dst_ops:
goto out;
}
if (!ip6_dst_ops_template.kmem_cachep)
goto out;
- ret = register_pernet_subsys(&ip6_route_net_ops);
+ ret = dst_entries_init(&ip6_dst_blackhole_ops);
if (ret)
goto out_kmem_cache;
+ ret = register_pernet_subsys(&ip6_route_net_ops);
+ if (ret)
+ goto out_dst_entries;
+
ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
/* Registering of the loopback is done before this portion of code,
fib6_gc_cleanup();
out_register_subsys:
unregister_pernet_subsys(&ip6_route_net_ops);
+out_dst_entries:
+ dst_entries_destroy(&ip6_dst_blackhole_ops);
out_kmem_cache:
kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
goto out;
struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
xfrm6_policy_afinfo.garbage_collect(net);
- return atomic_read(&ops->entries) > ops->gc_thresh * 2;
+ return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
}
static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
.ifdown = xfrm6_dst_ifdown,
.local_out = __ip6_local_out,
.gc_thresh = 1024,
- .entries = ATOMIC_INIT(0),
};
static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
*/
gc_thresh = FIB6_TABLE_HASHSZ * 8;
xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
+ dst_entries_init(&xfrm6_dst_ops);
ret = xfrm6_policy_init();
- if (ret)
+ if (ret) {
+ dst_entries_destroy(&xfrm6_dst_ops);
goto out;
-
+ }
ret = xfrm6_state_init();
if (ret)
goto out_policy;
//xfrm6_input_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
+ dst_entries_destroy(&xfrm6_dst_ops);
}