This patch removes duplicate rcu_read_lock().
1. IPVS part:
According to Julian Anastasov's mention, contexts of ipvs are described
at: http://marc.info/?l=netfilter-devel&m=
149562884514072&w=2, in summary:
- packet RX/TX: does not need locks because packets come from hooks.
- sync msg RX: backup server uses RCU locks while registering new
connections.
- ip_vs_ctl.c: configuration get/set, RCU locks needed.
- xt_ipvs.c: It is a netfilter match, running from hook context.
As result, rcu_read_lock and rcu_read_unlock can be removed from:
- ip_vs_core.c: all
- ip_vs_ctl.c:
- only from ip_vs_has_real_service
- ip_vs_ftp.c: all
- ip_vs_proto_sctp.c: all
- ip_vs_proto_tcp.c: all
- ip_vs_proto_udp.c: all
- ip_vs_xmit.c: all (contains only packet processing)
2. Netfilter part:
There are three types of functions that are guaranteed the rcu_read_lock().
First, as result, functions are only called by nf_hook():
- nf_conntrack_broadcast_help(), pptp_expectfn(), set_expected_rtp_rtcp().
- tcpmss_reverse_mtu(), tproxy_laddr4(), tproxy_laddr6().
- match_lookup_rt6(), check_hlist(), hashlimit_mt_common().
- xt_osf_match_packet().
Second, functions that caller already held the rcu_read_lock().
- destroy_conntrack(), ctnetlink_conntrack_event().
- ctnl_timeout_find_get(), nfqnl_nf_hook_drop().
Third, functions that are mixed with type1 and type2.
These functions are called by nf_hook() also these are called by
ordinary functions that already held the rcu_read_lock():
- __ctnetlink_glue_build(), ctnetlink_expect_event().
- ctnetlink_proto_size().
Applied files are below:
- nf_conntrack_broadcast.c, nf_conntrack_core.c, nf_conntrack_netlink.c.
- nf_conntrack_pptp.c, nf_conntrack_sip.c, nfnetlink_cttimeout.c.
- nfnetlink_queue.c, xt_TCPMSS.c, xt_TPROXY.c, xt_addrtype.c.
- xt_connlimit.c, xt_hashlimit.c, xt_osf.c
Detailed calltrace can be found at:
http://marc.info/?l=netfilter-devel&m=
149667610710350&w=2
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.inpkts++;
s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
u64_stats_update_begin(&s->syncp);
s->cnt.outpkts++;
s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
u64_stats_update_begin(&s->syncp);
if (!pptr)
return NULL;
- rcu_read_lock();
dest = ip_vs_find_real_service(ipvs, af, iph->protocol,
&iph->saddr, pptr[0]);
if (dest) {
pptr[0], pptr[1]);
}
}
- rcu_read_unlock();
return cp;
}
if (dest) {
struct ip_vs_dest_dst *dest_dst;
- rcu_read_lock();
dest_dst = rcu_dereference(dest->dest_dst);
if (dest_dst)
mtu = dst_mtu(dest_dst->dst_cache);
- rcu_read_unlock();
}
if (mtu > 68 + sizeof(struct iphdr))
mtu -= sizeof(struct iphdr);
/* Check for "full" addressed entries */
hash = ip_vs_rs_hashkey(af, daddr, dport);
- rcu_read_lock();
hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
if (dest->port == dport &&
dest->af == af &&
ip_vs_addr_equal(af, &dest->addr, daddr) &&
(dest->protocol == protocol || dest->vfwmark)) {
/* HIT */
- rcu_read_unlock();
return true;
}
}
- rcu_read_unlock();
return false;
}
* hopefully it will succeed on the retransmitted
* packet.
*/
- rcu_read_lock();
mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
iph->ihl * 4,
start - data,
end - start,
buf, buf_len);
- rcu_read_unlock();
if (mangled) {
ip_vs_nfct_expect_related(skb, ct, n_cp,
IPPROTO_TCP, 0, 0);
return 0;
}
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
&iph->daddr, ports[1]);
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
/* Lookup application incarnations and bind the right one */
hash = sctp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
- goto out;
+ break;
}
}
- rcu_read_unlock();
-out:
+
return result;
}
}
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
/* Lookup application incarnations and bind the right one */
hash = tcp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
- goto out;
+ break;
}
}
- rcu_read_unlock();
- out:
return result;
}
return 0;
}
- rcu_read_lock();
if (likely(!ip_vs_iph_inverse(iph)))
svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
&iph->daddr, ports[1]);
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
*verdict = ip_vs_leave(svc, skb, pd, iph);
else
*verdict = NF_DROP;
- rcu_read_unlock();
return 0;
}
}
- rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
/* Lookup application incarnations and bind the right one */
hash = udp_app_hashkey(cp->vport);
- rcu_read_lock();
list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
cp->app = inc;
if (inc->init_conn)
result = inc->init_conn(inc, cp);
- goto out;
+ break;
}
}
- rcu_read_unlock();
- out:
return result;
}
EnterFunction(10);
- rcu_read_lock();
if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr,
IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
goto tx_error;
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
EnterFunction(10);
- rcu_read_lock();
if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL,
&iph->daddr, NULL,
ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
EnterFunction(10);
- rcu_read_lock();
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
__be16 _pt, *p;
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
- rcu_read_unlock();
LeaveFunction(10);
return rc;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
EnterFunction(10);
- rcu_read_lock();
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {
__be16 _pt, *p;
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
- rcu_read_unlock();
LeaveFunction(10);
return rc;
tx_error:
LeaveFunction(10);
kfree_skb(skb);
- rcu_read_unlock();
return NF_STOLEN;
}
#endif
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
- }
rt = skb_rtable(skb);
tdev = rt->dst.dev;
ip_local_out(net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
tx_error:
if (!IS_ERR(skb))
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6,
&saddr, ipvsh, 1,
IP_VS_RT_MODE_TUNNEL);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
- }
rt = (struct rt6_info *) skb_dst(skb);
tdev = rt->dst.dev;
ip6_local_out(cp->ipvs->net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
tx_error:
if (!IS_ERR(skb))
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
- }
ip_send_check(ip_hdr(skb));
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
EnterFunction(10);
- rcu_read_lock();
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6,
NULL, ipvsh, 0,
IP_VS_RT_MODE_KNOWN_NH);
if (local < 0)
goto tx_error;
- if (local) {
- rcu_read_unlock();
+ if (local)
return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
- }
/* Another hack: avoid icmp_send in ip_fragment */
skb->ignore_df = 1;
ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
rt_mode = (hooknum != NF_INET_FORWARD) ?
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
- rcu_read_lock();
local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
NULL, iph);
if (local < 0)
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
- rcu_read_unlock();
goto out;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
rc = NF_STOLEN;
out:
LeaveFunction(10);
rt_mode = (hooknum != NF_INET_FORWARD) ?
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
- rcu_read_lock();
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
if (local < 0)
skb->ignore_df = 1;
rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
- rcu_read_unlock();
goto out;
tx_error:
kfree_skb(skb);
- rcu_read_unlock();
rc = NF_STOLEN;
out:
LeaveFunction(10);
if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
goto out;
- rcu_read_lock();
in_dev = __in_dev_get_rcu(rt->dst.dev);
if (in_dev != NULL) {
for_primary_ifa(in_dev) {
}
} endfor_ifa(in_dev);
}
- rcu_read_unlock();
if (mask == 0)
goto out;
nf_ct_tmpl_free(ct);
return;
}
- rcu_read_lock();
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->destroy)
l4proto->destroy(ct);
- rcu_read_unlock();
-
local_bh_disable();
/* Expectations will have been removed in clean_from_lists,
* except TFTP can create an expectation on the first packet,
struct nf_conntrack_l4proto *l4proto;
size_t len = 0;
- rcu_read_lock();
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
len += l3proto->nla_size;
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
len += l4proto->nla_size;
- rcu_read_unlock();
return len;
}
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
- rcu_read_lock();
zone = nf_ct_zone(ct);
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
&& ctnetlink_dump_mark(skb, ct) < 0)
goto nla_put_failure;
#endif
- rcu_read_unlock();
-
nlmsg_end(skb, nlh);
err = nfnetlink_send(skb, net, item->portid, group, item->report,
GFP_ATOMIC);
return 0;
nla_put_failure:
- rcu_read_unlock();
nlmsg_cancel(skb, nlh);
nlmsg_failure:
kfree_skb(skb);
const struct nf_conntrack_zone *zone;
struct nlattr *nest_parms;
- rcu_read_lock();
zone = nf_ct_zone(ct);
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
#endif
if (ctnetlink_dump_labels(skb, ct) < 0)
goto nla_put_failure;
- rcu_read_unlock();
return 0;
nla_put_failure:
- rcu_read_unlock();
return -ENOSPC;
}
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
- rcu_read_lock();
if (ctnetlink_exp_dump_expect(skb, exp) < 0)
goto nla_put_failure;
- rcu_read_unlock();
nlmsg_end(skb, nlh);
nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC);
return 0;
nla_put_failure:
- rcu_read_unlock();
nlmsg_cancel(skb, nlh);
nlmsg_failure:
kfree_skb(skb);
/* Can you see how rusty this code is, compared with the pre-2.6.11
* one? That's what happened to my shiny newnat of 2002 ;( -HW */
- rcu_read_lock();
nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn);
if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK)
nf_nat_pptp_expectfn(ct, exp);
pr_debug("not found\n");
}
}
- rcu_read_unlock();
}
static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
tuple.dst.u3 = *daddr;
tuple.dst.u.udp.port = port;
- rcu_read_lock();
do {
exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
goto err1;
}
- if (skip_expect) {
- rcu_read_unlock();
+ if (skip_expect)
return NF_ACCEPT;
- }
rtp_exp = nf_ct_expect_alloc(ct);
if (rtp_exp == NULL)
err2:
nf_ct_expect_put(rtp_exp);
err1:
- rcu_read_unlock();
return ret;
}
{
struct ctnl_timeout *timeout, *matching = NULL;
- rcu_read_lock();
list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) {
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
break;
}
err:
- rcu_read_unlock();
return matching;
}
unsigned int instances = 0;
int i;
- rcu_read_lock();
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct nfqnl_instance *inst;
struct hlist_head *head = &q->instance_table[i];
instances++;
}
}
- rcu_read_unlock();
return instances;
}
memset(fl6, 0, sizeof(*fl6));
fl6->daddr = ipv6_hdr(skb)->saddr;
}
- rcu_read_lock();
ai = nf_get_afinfo(family);
if (ai != NULL)
ai->route(net, (struct dst_entry **)&rt, &fl, false);
- rcu_read_unlock();
if (rt != NULL) {
mtu = dst_mtu(&rt->dst);
return user_laddr;
laddr = 0;
- rcu_read_lock();
indev = __in_dev_get_rcu(skb->dev);
for_primary_ifa(indev) {
laddr = ifa->ifa_local;
break;
} endfor_ifa(indev);
- rcu_read_unlock();
return laddr ? laddr : daddr;
}
return user_laddr;
laddr = NULL;
- rcu_read_lock();
indev = __in6_dev_get(skb->dev);
if (indev) {
read_lock_bh(&indev->lock);
}
read_unlock_bh(&indev->lock);
}
- rcu_read_unlock();
return laddr ? laddr : daddr;
}
if (dev)
flow.flowi6_oif = dev->ifindex;
- rcu_read_lock();
-
afinfo = nf_get_afinfo(NFPROTO_IPV6);
if (afinfo != NULL) {
const struct nf_ipv6_ops *v6ops;
} else {
route_err = 1;
}
- rcu_read_unlock();
if (route_err)
return XT_ADDRTYPE_UNREACHABLE;
unsigned int length = 0;
*addit = true;
- rcu_read_lock();
/* check the saved connections */
hlist_for_each_entry_safe(conn, n, head, node) {
length++;
}
- rcu_read_unlock();
-
return length;
}
if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
goto hotdrop;
- rcu_read_lock_bh();
+ local_bh_disable();
dh = dsthash_find(hinfo, &dst);
if (dh == NULL) {
dh = dsthash_alloc_init(hinfo, &dst, &race);
if (dh == NULL) {
- rcu_read_unlock_bh();
+ local_bh_enable();
goto hotdrop;
} else if (race) {
/* Already got an entry, update expiration timeout */
/* below the limit */
dh->rateinfo.credit -= cost;
spin_unlock(&dh->lock);
- rcu_read_unlock_bh();
+ local_bh_enable();
return !(cfg->mode & XT_HASHLIMIT_INVERT);
}
spin_unlock(&dh->lock);
- rcu_read_unlock_bh();
+ local_bh_enable();
/* default match is underlimit - so over the limit, we need to invert */
return cfg->mode & XT_HASHLIMIT_INVERT;
sizeof(struct tcphdr), optsize, opts);
}
- rcu_read_lock();
list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
int foptsize, optnum;
info->loglevel == XT_OSF_LOGLEVEL_FIRST)
break;
}
- rcu_read_unlock();
if (!fcount && (info->flags & XT_OSF_LOG))
nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p),