net: tcp_memcontrol: simplify linkage between socket and page counter
authorJohannes Weiner <hannes@cmpxchg.org>
Thu, 14 Jan 2016 23:21:17 +0000 (15:21 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 15 Jan 2016 00:00:49 +0000 (16:00 -0800)
There won't be any separate counters for socket memory consumed by
protocols other than TCP in the future.  Remove the indirection and link
sockets directly to their owning memory cgroup.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/memcontrol.h
include/net/sock.h
include/net/tcp.h
include/net/tcp_memcontrol.h
mm/memcontrol.c
net/core/sock.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_memcontrol.c
net/ipv4/tcp_output.c
net/ipv6/tcp_ipv6.c

index e4e77bd1dd39ee6fb8ba84e9277f64a350b6e3ba..7c085e4636ba4ee8cc837a107888bf59e7344d04 100644 (file)
@@ -89,16 +89,6 @@ struct cg_proto {
        struct page_counter     memory_allocated;       /* Current allocated memory. */
        int                     memory_pressure;
        bool                    active;
-       /*
-        * memcg field is used to find which memcg we belong directly
-        * Each memcg struct can hold more than one cg_proto, so container_of
-        * won't really cut.
-        *
-        * The elegant solution would be having an inverse function to
-        * proto_cgroup in struct proto, but that means polluting the structure
-        * for everybody, instead of just for memcg users.
-        */
-       struct mem_cgroup       *memcg;
 };
 
 #ifdef CONFIG_MEMCG
@@ -688,15 +678,15 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
 struct sock;
 void sock_update_memcg(struct sock *sk);
 void sock_release_memcg(struct sock *sk);
-bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages);
-void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages);
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
+void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
-static inline bool mem_cgroup_under_socket_pressure(struct cg_proto *proto)
+static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
-       return proto->memory_pressure;
+       return memcg->tcp_mem.memory_pressure;
 }
 #else
-static inline bool mem_cgroup_under_pressure(struct cg_proto *proto)
+static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
        return false;
 }
index 94a6c1a740b98aa8765d97af38c86eb57c6be446..be96a8dcbc7449556eaac910417f7f78ba14e48f 100644 (file)
 #include <net/tcp_states.h>
 #include <linux/net_tstamp.h>
 
-struct cgroup;
-struct cgroup_subsys;
-#ifdef CONFIG_NET
-int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss);
-void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg);
-#else
-static inline
-int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
-{
-       return 0;
-}
-static inline
-void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
-{
-}
-#endif
 /*
  * This structure really needs to be cleaned up.
  * Most of it is for TCP, and not used by any of
@@ -245,7 +229,6 @@ struct sock_common {
        /* public: */
 };
 
-struct cg_proto;
 /**
   *    struct sock - network layer representation of sockets
   *    @__sk_common: shared layout with inet_timewait_sock
@@ -310,7 +293,7 @@ struct cg_proto;
   *    @sk_security: used by security modules
   *    @sk_mark: generic packet mark
   *    @sk_cgrp_data: cgroup data for this cgroup
-  *    @sk_cgrp: this socket's cgroup-specific proto data
+  *    @sk_memcg: this socket's memory cgroup association
   *    @sk_write_pending: a write to stream socket waits to start
   *    @sk_state_change: callback to indicate change in the state of the sock
   *    @sk_data_ready: callback to indicate there is data to be processed
@@ -446,7 +429,7 @@ struct sock {
        void                    *sk_security;
 #endif
        struct sock_cgroup_data sk_cgrp_data;
-       struct cg_proto         *sk_cgrp;
+       struct mem_cgroup       *sk_memcg;
        void                    (*sk_state_change)(struct sock *sk);
        void                    (*sk_data_ready)(struct sock *sk);
        void                    (*sk_write_space)(struct sock *sk);
@@ -1129,8 +1112,8 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
        if (!sk->sk_prot->memory_pressure)
                return false;
 
-       if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
-           mem_cgroup_under_socket_pressure(sk->sk_cgrp))
+       if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+           mem_cgroup_under_socket_pressure(sk->sk_memcg))
                return true;
 
        return !!*sk->sk_prot->memory_pressure;
index d9df80deba31a3266b167d1894e7ebca157e6d44..8ea19977ea530bffb57bebed734b977c95b603c5 100644 (file)
@@ -289,8 +289,8 @@ extern int tcp_memory_pressure;
 /* optimized version of sk_under_memory_pressure() for TCP sockets */
 static inline bool tcp_under_memory_pressure(const struct sock *sk)
 {
-       if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
-           mem_cgroup_under_socket_pressure(sk->sk_cgrp))
+       if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+           mem_cgroup_under_socket_pressure(sk->sk_memcg))
                return true;
 
        return tcp_memory_pressure;
index 05b94d9453de8b036f1c275caa268b6f8113eac7..3a17b16ae8aa47c4cb3f242291533d9be86fcd75 100644 (file)
@@ -1,7 +1,6 @@
 #ifndef _TCP_MEMCG_H
 #define _TCP_MEMCG_H
 
-struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg);
 int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss);
 void tcp_destroy_cgroup(struct mem_cgroup *memcg);
 #endif /* _TCP_MEMCG_H */
index f5de783860b87330c9326b6c0c5c45e37bd5bd62..eaaa86126277b8230b36ad2108cdbe62b3b33901 100644 (file)
@@ -294,9 +294,6 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 void sock_update_memcg(struct sock *sk)
 {
        struct mem_cgroup *memcg;
-       struct cg_proto *cg_proto;
-
-       BUG_ON(!sk->sk_prot->proto_cgroup);
 
        /* Socket cloning can throw us here with sk_cgrp already
         * filled. It won't however, necessarily happen from
@@ -306,68 +303,58 @@ void sock_update_memcg(struct sock *sk)
         * Respecting the original socket's memcg is a better
         * decision in this case.
         */
-       if (sk->sk_cgrp) {
-               BUG_ON(mem_cgroup_is_root(sk->sk_cgrp->memcg));
-               css_get(&sk->sk_cgrp->memcg->css);
+       if (sk->sk_memcg) {
+               BUG_ON(mem_cgroup_is_root(sk->sk_memcg));
+               css_get(&sk->sk_memcg->css);
                return;
        }
 
        rcu_read_lock();
        memcg = mem_cgroup_from_task(current);
-       cg_proto = sk->sk_prot->proto_cgroup(memcg);
-       if (cg_proto && cg_proto->active &&
-           css_tryget_online(&memcg->css)) {
-               sk->sk_cgrp = cg_proto;
-       }
+       if (memcg != root_mem_cgroup &&
+           memcg->tcp_mem.active &&
+           css_tryget_online(&memcg->css))
+               sk->sk_memcg = memcg;
        rcu_read_unlock();
 }
 EXPORT_SYMBOL(sock_update_memcg);
 
 void sock_release_memcg(struct sock *sk)
 {
-       WARN_ON(!sk->sk_cgrp->memcg);
-       css_put(&sk->sk_cgrp->memcg->css);
-}
-
-struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
-{
-       if (!memcg || mem_cgroup_is_root(memcg))
-               return NULL;
-
-       return &memcg->tcp_mem;
+       WARN_ON(!sk->sk_memcg);
+       css_put(&sk->sk_memcg->css);
 }
-EXPORT_SYMBOL(tcp_proto_cgroup);
 
 /**
  * mem_cgroup_charge_skmem - charge socket memory
- * @proto: proto to charge
+ * @memcg: memcg to charge
  * @nr_pages: number of pages to charge
  *
- * Charges @nr_pages to @proto. Returns %true if the charge fit within
- * @proto's configured limit, %false if the charge had to be forced.
+ * Charges @nr_pages to @memcg. Returns %true if the charge fit within
+ * @memcg's configured limit, %false if the charge had to be forced.
  */
-bool mem_cgroup_charge_skmem(struct cg_proto *proto, unsigned int nr_pages)
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
        struct page_counter *counter;
 
-       if (page_counter_try_charge(&proto->memory_allocated,
+       if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
                                    nr_pages, &counter)) {
-               proto->memory_pressure = 0;
+               memcg->tcp_mem.memory_pressure = 0;
                return true;
        }
-       page_counter_charge(&proto->memory_allocated, nr_pages);
-       proto->memory_pressure = 1;
+       page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
+       memcg->tcp_mem.memory_pressure = 1;
        return false;
 }
 
 /**
  * mem_cgroup_uncharge_skmem - uncharge socket memory
- * @proto - proto to uncharge
+ * @memcg - memcg to uncharge
  * @nr_pages - number of pages to uncharge
  */
-void mem_cgroup_uncharge_skmem(struct cg_proto *proto, unsigned int nr_pages)
+void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
-       page_counter_uncharge(&proto->memory_allocated, nr_pages);
+       page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages);
 }
 
 #endif
@@ -3653,7 +3640,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
        if (ret)
                return ret;
 
-       return mem_cgroup_sockets_init(memcg, ss);
+       return tcp_init_cgroup(memcg, ss);
 }
 
 static void memcg_deactivate_kmem(struct mem_cgroup *memcg)
@@ -3709,7 +3696,7 @@ static void memcg_destroy_kmem(struct mem_cgroup *memcg)
                static_key_slow_dec(&memcg_kmem_enabled_key);
                WARN_ON(page_counter_read(&memcg->kmem));
        }
-       mem_cgroup_sockets_destroy(memcg);
+       tcp_destroy_cgroup(memcg);
 }
 #else
 static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
index 89ae859d2dc54f2c3502e929637cc1a5925d2e30..3535bffa45f3c943932c61b8d63cde600b41b33c 100644 (file)
@@ -195,44 +195,6 @@ bool sk_net_capable(const struct sock *sk, int cap)
 }
 EXPORT_SYMBOL(sk_net_capable);
 
-
-#ifdef CONFIG_MEMCG_KMEM
-int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
-{
-       struct proto *proto;
-       int ret = 0;
-
-       mutex_lock(&proto_list_mutex);
-       list_for_each_entry(proto, &proto_list, node) {
-               if (proto->init_cgroup) {
-                       ret = proto->init_cgroup(memcg, ss);
-                       if (ret)
-                               goto out;
-               }
-       }
-
-       mutex_unlock(&proto_list_mutex);
-       return ret;
-out:
-       list_for_each_entry_continue_reverse(proto, &proto_list, node)
-               if (proto->destroy_cgroup)
-                       proto->destroy_cgroup(memcg);
-       mutex_unlock(&proto_list_mutex);
-       return ret;
-}
-
-void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
-{
-       struct proto *proto;
-
-       mutex_lock(&proto_list_mutex);
-       list_for_each_entry_reverse(proto, &proto_list, node)
-               if (proto->destroy_cgroup)
-                       proto->destroy_cgroup(memcg);
-       mutex_unlock(&proto_list_mutex);
-}
-#endif
-
 /*
  * Each address family might have different locking rules, so we have
  * one slock key per address family:
@@ -1601,7 +1563,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
                sk_set_socket(newsk, NULL);
                newsk->sk_wq = NULL;
 
-               if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+               if (mem_cgroup_sockets_enabled && sk->sk_memcg)
                        sock_update_memcg(newsk);
 
                if (newsk->sk_prot->sockets_allocated)
@@ -2089,8 +2051,8 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 
        allocated = sk_memory_allocated_add(sk, amt);
 
-       if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
-           !mem_cgroup_charge_skmem(sk->sk_cgrp, amt))
+       if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+           !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
                goto suppress_allocation;
 
        /* Under limit. */
@@ -2153,8 +2115,8 @@ suppress_allocation:
 
        sk_memory_allocated_sub(sk, amt);
 
-       if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-               mem_cgroup_uncharge_skmem(sk->sk_cgrp, amt);
+       if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+               mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
 
        return 0;
 }
@@ -2171,8 +2133,8 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
        sk_memory_allocated_sub(sk, amount);
        sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
 
-       if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-               mem_cgroup_uncharge_skmem(sk->sk_cgrp, amount);
+       if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+               mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
 
        if (sk_under_memory_pressure(sk) &&
            (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
index eb39e02899e58e2eb249f177bec36d5f17710e33..c7d1fb50f3818824fda315d0eb695be86bf0a360 100644 (file)
@@ -1819,7 +1819,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
 
        sk_sockets_allocated_dec(sk);
 
-       if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+       if (mem_cgroup_sockets_enabled && sk->sk_memcg)
                sock_release_memcg(sk);
 }
 EXPORT_SYMBOL(tcp_v4_destroy_sock);
@@ -2343,11 +2343,6 @@ struct proto tcp_prot = {
 #ifdef CONFIG_COMPAT
        .compat_setsockopt      = compat_tcp_setsockopt,
        .compat_getsockopt      = compat_tcp_getsockopt,
-#endif
-#ifdef CONFIG_MEMCG_KMEM
-       .init_cgroup            = tcp_init_cgroup,
-       .destroy_cgroup         = tcp_destroy_cgroup,
-       .proto_cgroup           = tcp_proto_cgroup,
 #endif
        .diag_destroy           = tcp_abort,
 };
index ef4268d12e43d04a75ccd75bd472cf89e8d3b16d..e5078259cbe3b9b6a81ddcdeb504ae03857cc7a6 100644 (file)
@@ -8,60 +8,47 @@
 
 int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 {
+       struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+       struct page_counter *counter_parent = NULL;
        /*
         * The root cgroup does not use page_counters, but rather,
         * rely on the data already collected by the network
         * subsystem
         */
-       struct mem_cgroup *parent = parent_mem_cgroup(memcg);
-       struct page_counter *counter_parent = NULL;
-       struct cg_proto *cg_proto, *parent_cg;
-
-       cg_proto = tcp_prot.proto_cgroup(memcg);
-       if (!cg_proto)
+       if (memcg == root_mem_cgroup)
                return 0;
 
-       cg_proto->memory_pressure = 0;
-       cg_proto->memcg = memcg;
+       memcg->tcp_mem.memory_pressure = 0;
 
-       parent_cg = tcp_prot.proto_cgroup(parent);
-       if (parent_cg)
-               counter_parent = &parent_cg->memory_allocated;
+       if (parent)
+               counter_parent = &parent->tcp_mem.memory_allocated;
 
-       page_counter_init(&cg_proto->memory_allocated, counter_parent);
+       page_counter_init(&memcg->tcp_mem.memory_allocated, counter_parent);
 
        return 0;
 }
-EXPORT_SYMBOL(tcp_init_cgroup);
 
 void tcp_destroy_cgroup(struct mem_cgroup *memcg)
 {
-       struct cg_proto *cg_proto;
-
-       cg_proto = tcp_prot.proto_cgroup(memcg);
-       if (!cg_proto)
+       if (memcg == root_mem_cgroup)
                return;
 
-       if (cg_proto->active)
+       if (memcg->tcp_mem.active)
                static_key_slow_dec(&memcg_socket_limit_enabled);
-
 }
-EXPORT_SYMBOL(tcp_destroy_cgroup);
 
 static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
 {
-       struct cg_proto *cg_proto;
        int ret;
 
-       cg_proto = tcp_prot.proto_cgroup(memcg);
-       if (!cg_proto)
+       if (memcg == root_mem_cgroup)
                return -EINVAL;
 
-       ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages);
+       ret = page_counter_limit(&memcg->tcp_mem.memory_allocated, nr_pages);
        if (ret)
                return ret;
 
-       if (!cg_proto->active) {
+       if (!memcg->tcp_mem.active) {
                /*
                 * The active flag needs to be written after the static_key
                 * update. This is what guarantees that the socket activation
@@ -79,7 +66,7 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
                 * patched in yet.
                 */
                static_key_slow_inc(&memcg_socket_limit_enabled);
-               cg_proto->active = true;
+               memcg->tcp_mem.active = true;
        }
 
        return 0;
@@ -123,32 +110,32 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
 static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-       struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg);
        u64 val;
 
        switch (cft->private) {
        case RES_LIMIT:
-               if (!cg_proto)
-                       return PAGE_COUNTER_MAX;
-               val = cg_proto->memory_allocated.limit;
+               if (memcg == root_mem_cgroup)
+                       val = PAGE_COUNTER_MAX;
+               else
+                       val = memcg->tcp_mem.memory_allocated.limit;
                val *= PAGE_SIZE;
                break;
        case RES_USAGE:
-               if (!cg_proto)
+               if (memcg == root_mem_cgroup)
                        val = atomic_long_read(&tcp_memory_allocated);
                else
-                       val = page_counter_read(&cg_proto->memory_allocated);
+                       val = page_counter_read(&memcg->tcp_mem.memory_allocated);
                val *= PAGE_SIZE;
                break;
        case RES_FAILCNT:
-               if (!cg_proto)
+               if (memcg == root_mem_cgroup)
                        return 0;
-               val = cg_proto->memory_allocated.failcnt;
+               val = memcg->tcp_mem.memory_allocated.failcnt;
                break;
        case RES_MAX_USAGE:
-               if (!cg_proto)
+               if (memcg == root_mem_cgroup)
                        return 0;
-               val = cg_proto->memory_allocated.watermark;
+               val = memcg->tcp_mem.memory_allocated.watermark;
                val *= PAGE_SIZE;
                break;
        default:
@@ -161,19 +148,17 @@ static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
                                char *buf, size_t nbytes, loff_t off)
 {
        struct mem_cgroup *memcg;
-       struct cg_proto *cg_proto;
 
        memcg = mem_cgroup_from_css(of_css(of));
-       cg_proto = tcp_prot.proto_cgroup(memcg);
-       if (!cg_proto)
+       if (memcg == root_mem_cgroup)
                return nbytes;
 
        switch (of_cft(of)->private) {
        case RES_MAX_USAGE:
-               page_counter_reset_watermark(&cg_proto->memory_allocated);
+               page_counter_reset_watermark(&memcg->tcp_mem.memory_allocated);
                break;
        case RES_FAILCNT:
-               cg_proto->memory_allocated.failcnt = 0;
+               memcg->tcp_mem.memory_allocated.failcnt = 0;
                break;
        }
 
index 493b48945f0c37cf9d707ae71447d1191164ae2c..fda379cd600d4e033333a37301f3cba4eec0ba7b 100644 (file)
@@ -2821,8 +2821,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
        sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
        sk_memory_allocated_add(sk, amt);
 
-       if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-               mem_cgroup_charge_skmem(sk->sk_cgrp, amt);
+       if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+               mem_cgroup_charge_skmem(sk->sk_memcg, amt);
 }
 
 /* Send a FIN. The caller locks the socket for us.
index db9f1c318afc3a5157c373f5b5d5fdac09f0049d..4ad8edb46f7c523276e6e837e365202d7ca1e7a7 100644 (file)
@@ -1888,9 +1888,6 @@ struct proto tcpv6_prot = {
 #ifdef CONFIG_COMPAT
        .compat_setsockopt      = compat_tcp_setsockopt,
        .compat_getsockopt      = compat_tcp_getsockopt,
-#endif
-#ifdef CONFIG_MEMCG_KMEM
-       .proto_cgroup           = tcp_proto_cgroup,
 #endif
        .clear_sk               = tcp_v6_clear_sk,
        .diag_destroy           = tcp_abort,