Revert "Handle 'sk' being NULL in UID-based routing."

[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv4 / route.c
diff --git a/net/ipv4/route.c b/net/ipv4/route.c

index c850647711d711d7ae8078483a14837d0e6ad47a..dd1a3c567af670a577d9da5f2fe28706e3610598 100644 (file)
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -89,6 +89,7 @@
  #include <linux/rcupdate.h>
  #include <linux/times.h>
  #include <linux/slab.h>
+#include <linux/jhash.h>
  #include <net/dst.h>
  #include <net/net_namespace.h>
  #include <net/protocol.h>
@@ -464,43 +465,57 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
         return neigh_create(&arp_tbl, pkey, dev);
  }
  
-/*
- * Peer allocation may fail only in serious out-of-memory conditions.  However
- * we still can generate some output.
- * Random ID selection looks a bit dangerous because we have no chances to
- * select ID being unique in a reasonable period of time.
- * But broken packet identifier may be better than no packet at all.
+#define IP_IDENTS_SZ 2048u
+struct ip_ident_bucket {
+       atomic_t        id;
+       u32             stamp32;
+};
+
+static struct ip_ident_bucket *ip_idents __read_mostly;
+
+/* In order to protect privacy, we add a perturbation to identifiers
+ * if one generator is seldom used. This makes hard for an attacker
+ * to infer how many packets were sent between two points in time.
   */
-static void ip_select_fb_ident(struct iphdr *iph)
+u32 ip_idents_reserve(u32 hash, int segs)
  {
-       static DEFINE_SPINLOCK(ip_fb_id_lock);
-       static u32 ip_fallback_id;
-       u32 salt;
+       struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ;
+       u32 old = ACCESS_ONCE(bucket->stamp32);
+       u32 now = (u32)jiffies;
+       u32 delta = 0;
+
+       if (old != now && cmpxchg(&bucket->stamp32, old, now) == old) {
+               u64 x = prandom_u32();
+
+               x *= (now - old);
+               delta = (u32)(x >> 32);
+       }
  
-       spin_lock_bh(&ip_fb_id_lock);
-       salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
-       iph->id = htons(salt & 0xFFFF);
-       ip_fallback_id = salt;
-       spin_unlock_bh(&ip_fb_id_lock);
+       return atomic_add_return(segs + delta, &bucket->id) - segs;
  }
+EXPORT_SYMBOL(ip_idents_reserve);
  
-void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
+void __ip_select_ident(struct iphdr *iph, int segs)
  {
-       struct net *net = dev_net(dst->dev);
-       struct inet_peer *peer;
+       static u32 ip_idents_hashrnd __read_mostly;
+       static bool hashrnd_initialized = false;
+       u32 hash, id;
  
-       peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
-       if (peer) {
-               iph->id = htons(inet_getid(peer, more));
-               inet_putpeer(peer);
-               return;
+       if (unlikely(!hashrnd_initialized)) {
+               hashrnd_initialized = true;
+               get_random_bytes(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
         }
  
-       ip_select_fb_ident(iph);
+       hash = jhash_3words((__force u32)iph->daddr,
+                           (__force u32)iph->saddr,
+                           iph->protocol,
+                           ip_idents_hashrnd);
+       id = ip_idents_reserve(hash, segs);
+       iph->id = htons(id);
  }
  EXPORT_SYMBOL(__ip_select_ident);
  
-static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
+static void __build_flow_key(struct flowi4 *fl4, struct sock *sk,
                              const struct iphdr *iph,
                              int oif, u8 tos,
                              u8 prot, u32 mark, int flow_flags)
@@ -516,11 +531,12 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
         flowi4_init_output(fl4, oif, mark, tos,
                            RT_SCOPE_UNIVERSE, prot,
                            flow_flags,
-                          iph->daddr, iph->saddr, 0, 0);
+                          iph->daddr, iph->saddr, 0, 0,
+                          sock_i_uid(sk));
  }
  
  static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
-                              const struct sock *sk)
+                              struct sock *sk)
  {
         const struct iphdr *iph = ip_hdr(skb);
         int oif = skb->dev->ifindex;
@@ -531,7 +547,7 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
         __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
  }
  
-static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
+static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk)
  {
         const struct inet_sock *inet = inet_sk(sk);
         const struct ip_options_rcu *inet_opt;
@@ -545,11 +561,12 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
                            RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
                            inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
                            inet_sk_flowi_flags(sk),
-                          daddr, inet->inet_saddr, 0, 0);
+                          daddr, inet->inet_saddr, 0, 0,
+                          sock_i_uid(sk));
         rcu_read_unlock();
  }
  
-static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
+static void ip_rt_build_flow_key(struct flowi4 *fl4, struct sock *sk,
                                  const struct sk_buff *skb)
  {
         if (skb)
@@ -698,8 +715,10 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
                         goto reject_redirect;
         }
  
-       n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
-       if (n) {
+       n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
+       if (!n)
+               n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
+       if (!IS_ERR(n)) {
                 if (!(n->nud_state & NUD_VALID)) {
                         neigh_event_send(n, NULL);
                 } else {
@@ -856,6 +875,10 @@ static int ip_error(struct sk_buff *skb)
         bool send;
         int code;
  
+       /* IP on this device is disabled. */
+       if (!in_dev)
+               goto out;
+
         net = dev_net(rt->dst.dev);
         if (!IN_DEV_FORWARD(in_dev)) {
                 switch (rt->dst.error) {
@@ -956,6 +979,9 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
         struct flowi4 fl4;
         struct rtable *rt;
  
+       if (!mark)
+               mark = IP4_REPLY_MARK(net, skb->mark);
+
         __build_flow_key(&fl4, NULL, iph, oif,
                          RT_TOS(iph->tos), protocol, mark, flow_flags);
         rt = __ip_route_output_key(net, &fl4);
@@ -973,6 +999,10 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
         struct rtable *rt;
  
         __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+
+       if (!fl4.flowi4_mark)
+               fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
+
         rt = __ip_route_output_key(sock_net(sk), &fl4);
         if (!IS_ERR(rt)) {
                 __ip_rt_update_pmtu(rt, &fl4, mtu);
@@ -985,20 +1015,21 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
         const struct iphdr *iph = (const struct iphdr *) skb->data;
         struct flowi4 fl4;
         struct rtable *rt;
-       struct dst_entry *dst;
+       struct dst_entry *odst = NULL;
         bool new = false;
  
         bh_lock_sock(sk);
-       rt = (struct rtable *) __sk_dst_get(sk);
+       odst = sk_dst_get(sk);
  
-       if (sock_owned_by_user(sk) || !rt) {
+       if (sock_owned_by_user(sk) || !odst) {
                 __ipv4_sk_update_pmtu(skb, sk, mtu);
                 goto out;
         }
  
         __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
  
-       if (!__sk_dst_check(sk, 0)) {
+       rt = (struct rtable *)odst;
+       if (odst->obsolete && odst->ops->check(odst, 0) == NULL) {
                 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
                 if (IS_ERR(rt))
                         goto out;
@@ -1008,8 +1039,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
  
         __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
  
-       dst = dst_check(&rt->dst, 0);
-       if (!dst) {
+       if (!dst_check(&rt->dst, 0)) {
                 if (new)
                         dst_release(&rt->dst);
  
@@ -1021,10 +1051,11 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
         }
  
         if (new)
-               __sk_dst_set(sk, &rt->dst);
+               sk_dst_set(sk, &rt->dst);
  
  out:
         bh_unlock_sock(sk);
+       dst_release(odst);
  }
  EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
  
@@ -1478,7 +1509,7 @@ static int __mkroute_input(struct sk_buff *skb,
         struct in_device *out_dev;
         unsigned int flags = 0;
         bool do_cache;
-       u32 itag;
+       u32 itag = 0;
  
         /* get a working reference to the output device */
         out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
@@ -1498,11 +1529,10 @@ static int __mkroute_input(struct sk_buff *skb,
  
         do_cache = res->fi && !itag;
         if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
+           skb->protocol == htons(ETH_P_IP) &&
             (IN_DEV_SHARED_MEDIA(out_dev) ||
-            inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) {
-               flags |= RTCF_DOREDIRECT;
-               do_cache = false;
-       }
+            inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
+               IPCB(skb)->flags |= IPSKB_DOREDIRECT;
  
         if (skb->protocol != htons(ETH_P_IP)) {
                 /* Not IP (i.e. ARP). Do not create route, if it is
@@ -1768,6 +1798,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
  {
         int res;
  
+       tos &= IPTOS_RT_MASK;
         rcu_read_lock();
  
         /* Multicast recognition logic is moved from route cache to here.
@@ -1857,6 +1888,18 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
                  */
                 if (fi && res->prefixlen < 4)
                         fi = NULL;
+       } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
+                  (orig_oif != dev_out->ifindex)) {
+               /* For local routes that require a particular output interface
+                * we do not want to cache the result.  Caching the result
+                * causes incorrect behaviour when there are multiple source
+                * addresses on the interface, the end result being that if the
+                * intended recipient is waiting on that interface for the
+                * packet he won't receive it because it will be delivered on
+                * the loopback interface and the IP_PKTINFO ipi_ifindex will
+                * be set to the loopback interface as well.
+                */
+               fi = NULL;
         }
  
         fnhe = NULL;
@@ -2239,6 +2282,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
         r->rtm_flags    = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
         if (rt->rt_flags & RTCF_NOTIFY)
                 r->rtm_flags |= RTM_F_NOTIFY;
+       if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
+               r->rtm_flags |= RTCF_DOREDIRECT;
  
         if (nla_put_be32(skb, RTA_DST, dst))
                 goto nla_put_failure;
@@ -2284,6 +2329,11 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
             nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
                 goto nla_put_failure;
  
+       if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
+           nla_put_u32(skb, RTA_UID,
+                       from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
+               goto nla_put_failure;
+
         error = rt->dst.error;
  
         if (rt_is_input_route(rt)) {
@@ -2292,7 +2342,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
                     IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
                         int err = ipmr_get_route(net, skb,
                                                  fl4->saddr, fl4->daddr,
-                                                r, nowait);
+                                                r, nowait, portid);
+
                         if (err <= 0) {
                                 if (!nowait) {
                                         if (err == 0)
@@ -2333,6 +2384,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
         int err;
         int mark;
         struct sk_buff *skb;
+       kuid_t uid;
  
         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
         if (err < 0)
@@ -2360,6 +2412,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
         dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
         iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
         mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
+       if (tb[RTA_UID])
+               uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
+       else
+               uid = (iif ? INVALID_UID : current_uid());
  
         memset(&fl4, 0, sizeof(fl4));
         fl4.daddr = dst;
@@ -2367,6 +2423,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
         fl4.flowi4_tos = rtm->rtm_tos;
         fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
         fl4.flowi4_mark = mark;
+       fl4.flowi4_uid = uid;
  
         if (iif) {
                 struct net_device *dev;
@@ -2655,6 +2712,12 @@ int __init ip_rt_init(void)
  {
         int rc = 0;
  
+       ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
+       if (!ip_idents)
+               panic("IP: failed to allocate ip_idents\n");
+
+       prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
+
  #ifdef CONFIG_IP_ROUTE_CLASSID
         ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
         if (!ip_rt_acct)