devlink: add shared buffer configuration
authorJiri Pirko <jiri@mellanox.com>
Thu, 14 Apr 2016 16:19:13 +0000 (18:19 +0200)
committerDavid S. Miller <davem@davemloft.net>
Thu, 14 Apr 2016 20:22:03 +0000 (16:22 -0400)
Define userspace API and drivers API for configuration of shared
buffers. Four basic objects are defined:
shared buffer - attributes are size, number of pools and TCs
pool - chunk of sharedbuffer definition, it has some size and either
       static or dynamic threshold
port pool threshold - to set per-port threshold for each pool
port tc threshold bind - to bind port and TC to specified pool
                         with threshold.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/devlink.h
include/uapi/linux/devlink.h
net/core/devlink.c

index c37d257891d62705ebbe9a18e3709bb31152e8ef..e4c27473ee4f5739040d734bd245f728c2af097f 100644 (file)
@@ -24,6 +24,7 @@ struct devlink_ops;
 struct devlink {
        struct list_head list;
        struct list_head port_list;
+       struct list_head sb_list;
        const struct devlink_ops *ops;
        struct device *dev;
        possible_net_t _net;
@@ -42,6 +43,12 @@ struct devlink_port {
        u32 split_group;
 };
 
+struct devlink_sb_pool_info {
+       enum devlink_sb_pool_type pool_type;
+       u32 size;
+       enum devlink_sb_threshold_type threshold_type;
+};
+
 struct devlink_ops {
        size_t priv_size;
        int (*port_type_set)(struct devlink_port *devlink_port,
@@ -49,6 +56,28 @@ struct devlink_ops {
        int (*port_split)(struct devlink *devlink, unsigned int port_index,
                          unsigned int count);
        int (*port_unsplit)(struct devlink *devlink, unsigned int port_index);
+       int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index,
+                          u16 pool_index,
+                          struct devlink_sb_pool_info *pool_info);
+       int (*sb_pool_set)(struct devlink *devlink, unsigned int sb_index,
+                          u16 pool_index, u32 size,
+                          enum devlink_sb_threshold_type threshold_type);
+       int (*sb_port_pool_get)(struct devlink_port *devlink_port,
+                               unsigned int sb_index, u16 pool_index,
+                               u32 *p_threshold);
+       int (*sb_port_pool_set)(struct devlink_port *devlink_port,
+                               unsigned int sb_index, u16 pool_index,
+                               u32 threshold);
+       int (*sb_tc_pool_bind_get)(struct devlink_port *devlink_port,
+                                  unsigned int sb_index,
+                                  u16 tc_index,
+                                  enum devlink_sb_pool_type pool_type,
+                                  u16 *p_pool_index, u32 *p_threshold);
+       int (*sb_tc_pool_bind_set)(struct devlink_port *devlink_port,
+                                  unsigned int sb_index,
+                                  u16 tc_index,
+                                  enum devlink_sb_pool_type pool_type,
+                                  u16 pool_index, u32 threshold);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
@@ -82,6 +111,11 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port,
 void devlink_port_type_clear(struct devlink_port *devlink_port);
 void devlink_port_split_set(struct devlink_port *devlink_port,
                            u32 split_group);
+int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
+                       u32 size, u16 ingress_pools_count,
+                       u16 egress_pools_count, u16 ingress_tc_count,
+                       u16 egress_tc_count);
+void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index);
 
 #else
 
@@ -135,6 +169,19 @@ static inline void devlink_port_split_set(struct devlink_port *devlink_port,
 {
 }
 
+static inline int devlink_sb_register(struct devlink *devlink,
+                                     unsigned int sb_index, u32 size,
+                                     u16 ingress_pools_count,
+                                     u16 egress_pools_count, u16 tc_count)
+{
+       return 0;
+}
+
+static inline void devlink_sb_unregister(struct devlink *devlink,
+                                        unsigned int sb_index)
+{
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
index c9fee5781eb1df82a045ed72d2afe2cd2381e381..9c1aa5783090e1aca21fddaacf27d86c157e65f1 100644 (file)
@@ -33,6 +33,26 @@ enum devlink_command {
        DEVLINK_CMD_PORT_SPLIT,
        DEVLINK_CMD_PORT_UNSPLIT,
 
+       DEVLINK_CMD_SB_GET,             /* can dump */
+       DEVLINK_CMD_SB_SET,
+       DEVLINK_CMD_SB_NEW,
+       DEVLINK_CMD_SB_DEL,
+
+       DEVLINK_CMD_SB_POOL_GET,        /* can dump */
+       DEVLINK_CMD_SB_POOL_SET,
+       DEVLINK_CMD_SB_POOL_NEW,
+       DEVLINK_CMD_SB_POOL_DEL,
+
+       DEVLINK_CMD_SB_PORT_POOL_GET,   /* can dump */
+       DEVLINK_CMD_SB_PORT_POOL_SET,
+       DEVLINK_CMD_SB_PORT_POOL_NEW,
+       DEVLINK_CMD_SB_PORT_POOL_DEL,
+
+       DEVLINK_CMD_SB_TC_POOL_BIND_GET,        /* can dump */
+       DEVLINK_CMD_SB_TC_POOL_BIND_SET,
+       DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+       DEVLINK_CMD_SB_TC_POOL_BIND_DEL,
+
        /* add new commands above here */
 
        __DEVLINK_CMD_MAX,
@@ -46,6 +66,31 @@ enum devlink_port_type {
        DEVLINK_PORT_TYPE_IB,
 };
 
+enum devlink_sb_pool_type {
+       DEVLINK_SB_POOL_TYPE_INGRESS,
+       DEVLINK_SB_POOL_TYPE_EGRESS,
+};
+
+/* static threshold - limiting the maximum number of bytes.
+ * dynamic threshold - limiting the maximum number of bytes
+ *   based on the currently available free space in the shared buffer pool.
+ *   In this mode, the maximum quota is calculated based
+ *   on the following formula:
+ *     max_quota = alpha / (1 + alpha) * Free_Buffer
+ *   While Free_Buffer is the amount of none-occupied buffer associated to
+ *   the relevant pool.
+ *   The value range which can be passed is 0-20 and serves
+ *   for computation of alpha by following formula:
+ *     alpha = 2 ^ (passed_value - 10)
+ */
+
+enum devlink_sb_threshold_type {
+       DEVLINK_SB_THRESHOLD_TYPE_STATIC,
+       DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC,
+};
+
+#define DEVLINK_SB_THRESHOLD_TO_ALPHA_MAX 20
+
 enum devlink_attr {
        /* don't change the order or add anything between, this is ABI! */
        DEVLINK_ATTR_UNSPEC,
@@ -62,6 +107,18 @@ enum devlink_attr {
        DEVLINK_ATTR_PORT_IBDEV_NAME,           /* string */
        DEVLINK_ATTR_PORT_SPLIT_COUNT,          /* u32 */
        DEVLINK_ATTR_PORT_SPLIT_GROUP,          /* u32 */
+       DEVLINK_ATTR_SB_INDEX,                  /* u32 */
+       DEVLINK_ATTR_SB_SIZE,                   /* u32 */
+       DEVLINK_ATTR_SB_INGRESS_POOL_COUNT,     /* u16 */
+       DEVLINK_ATTR_SB_EGRESS_POOL_COUNT,      /* u16 */
+       DEVLINK_ATTR_SB_INGRESS_TC_COUNT,       /* u16 */
+       DEVLINK_ATTR_SB_EGRESS_TC_COUNT,        /* u16 */
+       DEVLINK_ATTR_SB_POOL_INDEX,             /* u16 */
+       DEVLINK_ATTR_SB_POOL_TYPE,              /* u8 */
+       DEVLINK_ATTR_SB_POOL_SIZE,              /* u32 */
+       DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,    /* u8 */
+       DEVLINK_ATTR_SB_THRESHOLD,              /* u32 */
+       DEVLINK_ATTR_SB_TC_INDEX,               /* u16 */
 
        /* add new attributes above here, update the policy in devlink.c */
 
index b84cf0df4a0eb0d7e79663103287274eea1229ae..aa0b9e1542e7095a04d22692647d94199fddb000 100644 (file)
@@ -119,8 +119,167 @@ static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
        return devlink_port_get_from_attrs(devlink, info->attrs);
 }
 
+struct devlink_sb {
+       struct list_head list;
+       unsigned int index;
+       u32 size;
+       u16 ingress_pools_count;
+       u16 egress_pools_count;
+       u16 ingress_tc_count;
+       u16 egress_tc_count;
+};
+
+static u16 devlink_sb_pool_count(struct devlink_sb *devlink_sb)
+{
+       return devlink_sb->ingress_pools_count + devlink_sb->egress_pools_count;
+}
+
+static struct devlink_sb *devlink_sb_get_by_index(struct devlink *devlink,
+                                                 unsigned int sb_index)
+{
+       struct devlink_sb *devlink_sb;
+
+       list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+               if (devlink_sb->index == sb_index)
+                       return devlink_sb;
+       }
+       return NULL;
+}
+
+static bool devlink_sb_index_exists(struct devlink *devlink,
+                                   unsigned int sb_index)
+{
+       return devlink_sb_get_by_index(devlink, sb_index);
+}
+
+static struct devlink_sb *devlink_sb_get_from_attrs(struct devlink *devlink,
+                                                   struct nlattr **attrs)
+{
+       if (attrs[DEVLINK_ATTR_SB_INDEX]) {
+               u32 sb_index = nla_get_u32(attrs[DEVLINK_ATTR_SB_INDEX]);
+               struct devlink_sb *devlink_sb;
+
+               devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
+               if (!devlink_sb)
+                       return ERR_PTR(-ENODEV);
+               return devlink_sb;
+       }
+       return ERR_PTR(-EINVAL);
+}
+
+static struct devlink_sb *devlink_sb_get_from_info(struct devlink *devlink,
+                                                  struct genl_info *info)
+{
+       return devlink_sb_get_from_attrs(devlink, info->attrs);
+}
+
+static int devlink_sb_pool_index_get_from_attrs(struct devlink_sb *devlink_sb,
+                                               struct nlattr **attrs,
+                                               u16 *p_pool_index)
+{
+       u16 val;
+
+       if (!attrs[DEVLINK_ATTR_SB_POOL_INDEX])
+               return -EINVAL;
+
+       val = nla_get_u16(attrs[DEVLINK_ATTR_SB_POOL_INDEX]);
+       if (val >= devlink_sb_pool_count(devlink_sb))
+               return -EINVAL;
+       *p_pool_index = val;
+       return 0;
+}
+
+static int devlink_sb_pool_index_get_from_info(struct devlink_sb *devlink_sb,
+                                              struct genl_info *info,
+                                              u16 *p_pool_index)
+{
+       return devlink_sb_pool_index_get_from_attrs(devlink_sb, info->attrs,
+                                                   p_pool_index);
+}
+
+static int
+devlink_sb_pool_type_get_from_attrs(struct nlattr **attrs,
+                                   enum devlink_sb_pool_type *p_pool_type)
+{
+       u8 val;
+
+       if (!attrs[DEVLINK_ATTR_SB_POOL_TYPE])
+               return -EINVAL;
+
+       val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_TYPE]);
+       if (val != DEVLINK_SB_POOL_TYPE_INGRESS &&
+           val != DEVLINK_SB_POOL_TYPE_EGRESS)
+               return -EINVAL;
+       *p_pool_type = val;
+       return 0;
+}
+
+static int
+devlink_sb_pool_type_get_from_info(struct genl_info *info,
+                                  enum devlink_sb_pool_type *p_pool_type)
+{
+       return devlink_sb_pool_type_get_from_attrs(info->attrs, p_pool_type);
+}
+
+static int
+devlink_sb_th_type_get_from_attrs(struct nlattr **attrs,
+                                 enum devlink_sb_threshold_type *p_th_type)
+{
+       u8 val;
+
+       if (!attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE])
+               return -EINVAL;
+
+       val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]);
+       if (val != DEVLINK_SB_THRESHOLD_TYPE_STATIC &&
+           val != DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC)
+               return -EINVAL;
+       *p_th_type = val;
+       return 0;
+}
+
+static int
+devlink_sb_th_type_get_from_info(struct genl_info *info,
+                                enum devlink_sb_threshold_type *p_th_type)
+{
+       return devlink_sb_th_type_get_from_attrs(info->attrs, p_th_type);
+}
+
+static int
+devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
+                                  struct nlattr **attrs,
+                                  enum devlink_sb_pool_type pool_type,
+                                  u16 *p_tc_index)
+{
+       u16 val;
+
+       if (!attrs[DEVLINK_ATTR_SB_TC_INDEX])
+               return -EINVAL;
+
+       val = nla_get_u16(attrs[DEVLINK_ATTR_SB_TC_INDEX]);
+       if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS &&
+           val >= devlink_sb->ingress_tc_count)
+               return -EINVAL;
+       if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS &&
+           val >= devlink_sb->egress_tc_count)
+               return -EINVAL;
+       *p_tc_index = val;
+       return 0;
+}
+
+static int
+devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
+                                 struct genl_info *info,
+                                 enum devlink_sb_pool_type pool_type,
+                                 u16 *p_tc_index)
+{
+       return devlink_sb_tc_index_get_from_attrs(devlink_sb, info->attrs,
+                                                 pool_type, p_tc_index);
+}
+
 #define DEVLINK_NL_FLAG_NEED_DEVLINK   BIT(0)
 #define DEVLINK_NL_FLAG_NEED_PORT      BIT(1)
+#define DEVLINK_NL_FLAG_NEED_SB                BIT(2)
 
 static int devlink_nl_pre_doit(const struct genl_ops *ops,
                               struct sk_buff *skb, struct genl_info *info)
@@ -147,6 +306,18 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
                }
                info->user_ptr[0] = devlink_port;
        }
+       if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) {
+               struct devlink_sb *devlink_sb;
+
+               devlink_sb = devlink_sb_get_from_info(devlink, info);
+               if (IS_ERR(devlink_sb)) {
+                       if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT)
+                               mutex_unlock(&devlink_port_mutex);
+                       mutex_unlock(&devlink_mutex);
+                       return PTR_ERR(devlink_sb);
+               }
+               info->user_ptr[1] = devlink_sb;
+       }
        return 0;
 }
 
@@ -499,12 +670,675 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
        return devlink_port_unsplit(devlink, port_index);
 }
 
+static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
+                             struct devlink_sb *devlink_sb,
+                             enum devlink_command cmd, u32 portid,
+                             u32 seq, int flags)
+{
+       void *hdr;
+
+       hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+       if (!hdr)
+               return -EMSGSIZE;
+
+       if (devlink_nl_put_handle(msg, devlink))
+               goto nla_put_failure;
+       if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+               goto nla_put_failure;
+       if (nla_put_u32(msg, DEVLINK_ATTR_SB_SIZE, devlink_sb->size))
+               goto nla_put_failure;
+       if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_POOL_COUNT,
+                       devlink_sb->ingress_pools_count))
+               goto nla_put_failure;
+       if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_POOL_COUNT,
+                       devlink_sb->egress_pools_count))
+               goto nla_put_failure;
+       if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_TC_COUNT,
+                       devlink_sb->ingress_tc_count))
+               goto nla_put_failure;
+       if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_TC_COUNT,
+                       devlink_sb->egress_tc_count))
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb,
+                                     struct genl_info *info)
+{
+       struct devlink *devlink = info->user_ptr[0];
+       struct devlink_sb *devlink_sb = info->user_ptr[1];
+       struct sk_buff *msg;
+       int err;
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
+                                DEVLINK_CMD_SB_NEW,
+                                info->snd_portid, info->snd_seq, 0);
+       if (err) {
+               nlmsg_free(msg);
+               return err;
+       }
+
+       return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
+                                       struct netlink_callback *cb)
+{
+       struct devlink *devlink;
+       struct devlink_sb *devlink_sb;
+       int start = cb->args[0];
+       int idx = 0;
+       int err;
+
+       mutex_lock(&devlink_mutex);
+       list_for_each_entry(devlink, &devlink_list, list) {
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+                       continue;
+               list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+                       if (idx < start) {
+                               idx++;
+                               continue;
+                       }
+                       err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
+                                                DEVLINK_CMD_SB_NEW,
+                                                NETLINK_CB(cb->skb).portid,
+                                                cb->nlh->nlmsg_seq,
+                                                NLM_F_MULTI);
+                       if (err)
+                               goto out;
+                       idx++;
+               }
+       }
+out:
+       mutex_unlock(&devlink_mutex);
+
+       cb->args[0] = idx;
+       return msg->len;
+}
+
+static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink,
+                                  struct devlink_sb *devlink_sb,
+                                  u16 pool_index, enum devlink_command cmd,
+                                  u32 portid, u32 seq, int flags)
+{
+       struct devlink_sb_pool_info pool_info;
+       void *hdr;
+       int err;
+
+       err = devlink->ops->sb_pool_get(devlink, devlink_sb->index,
+                                       pool_index, &pool_info);
+       if (err)
+               return err;
+
+       hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+       if (!hdr)
+               return -EMSGSIZE;
+
+       if (devlink_nl_put_handle(msg, devlink))
+               goto nla_put_failure;
+       if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+               goto nla_put_failure;
+       if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+               goto nla_put_failure;
+       if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_info.pool_type))
+               goto nla_put_failure;
+       if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_SIZE, pool_info.size))
+               goto nla_put_failure;
+       if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,
+                      pool_info.threshold_type))
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb,
+                                          struct genl_info *info)
+{
+       struct devlink *devlink = info->user_ptr[0];
+       struct devlink_sb *devlink_sb = info->user_ptr[1];
+       struct sk_buff *msg;
+       u16 pool_index;
+       int err;
+
+       err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+                                                 &pool_index);
+       if (err)
+               return err;
+
+       if (!devlink->ops || !devlink->ops->sb_pool_get)
+               return -EOPNOTSUPP;
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       err = devlink_nl_sb_pool_fill(msg, devlink, devlink_sb, pool_index,
+                                     DEVLINK_CMD_SB_POOL_NEW,
+                                     info->snd_portid, info->snd_seq, 0);
+       if (err) {
+               nlmsg_free(msg);
+               return err;
+       }
+
+       return genlmsg_reply(msg, info);
+}
+
+static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
+                               struct devlink *devlink,
+                               struct devlink_sb *devlink_sb,
+                               u32 portid, u32 seq)
+{
+       u16 pool_count = devlink_sb_pool_count(devlink_sb);
+       u16 pool_index;
+       int err;
+
+       for (pool_index = 0; pool_index < pool_count; pool_index++) {
+               if (*p_idx < start) {
+                       (*p_idx)++;
+                       continue;
+               }
+               err = devlink_nl_sb_pool_fill(msg, devlink,
+                                             devlink_sb,
+                                             pool_index,
+                                             DEVLINK_CMD_SB_POOL_NEW,
+                                             portid, seq, NLM_F_MULTI);
+               if (err)
+                       return err;
+               (*p_idx)++;
+       }
+       return 0;
+}
+
+static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
+                                            struct netlink_callback *cb)
+{
+       struct devlink *devlink;
+       struct devlink_sb *devlink_sb;
+       int start = cb->args[0];
+       int idx = 0;
+       int err;
+
+       mutex_lock(&devlink_mutex);
+       list_for_each_entry(devlink, &devlink_list, list) {
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
+                   !devlink->ops || !devlink->ops->sb_pool_get)
+                       continue;
+               list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+                       err = __sb_pool_get_dumpit(msg, start, &idx, devlink,
+                                                  devlink_sb,
+                                                  NETLINK_CB(cb->skb).portid,
+                                                  cb->nlh->nlmsg_seq);
+                       if (err && err != -EOPNOTSUPP)
+                               goto out;
+               }
+       }
+out:
+       mutex_unlock(&devlink_mutex);
+
+       cb->args[0] = idx;
+       return msg->len;
+}
+
+static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
+                              u16 pool_index, u32 size,
+                              enum devlink_sb_threshold_type threshold_type)
+
+{
+       const struct devlink_ops *ops = devlink->ops;
+
+       if (ops && ops->sb_pool_set)
+               return ops->sb_pool_set(devlink, sb_index, pool_index,
+                                       size, threshold_type);
+       return -EOPNOTSUPP;
+}
+
+static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb,
+                                          struct genl_info *info)
+{
+       struct devlink *devlink = info->user_ptr[0];
+       struct devlink_sb *devlink_sb = info->user_ptr[1];
+       enum devlink_sb_threshold_type threshold_type;
+       u16 pool_index;
+       u32 size;
+       int err;
+
+       err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+                                                 &pool_index);
+       if (err)
+               return err;
+
+       err = devlink_sb_th_type_get_from_info(info, &threshold_type);
+       if (err)
+               return err;
+
+       if (!info->attrs[DEVLINK_ATTR_SB_POOL_SIZE])
+               return -EINVAL;
+
+       size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]);
+       return devlink_sb_pool_set(devlink, devlink_sb->index,
+                                  pool_index, size, threshold_type);
+}
+
+static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
+                                       struct devlink *devlink,
+                                       struct devlink_port *devlink_port,
+                                       struct devlink_sb *devlink_sb,
+                                       u16 pool_index,
+                                       enum devlink_command cmd,
+                                       u32 portid, u32 seq, int flags)
+{
+       u32 threshold;
+       void *hdr;
+       int err;
+
+       err = devlink->ops->sb_port_pool_get(devlink_port, devlink_sb->index,
+                                            pool_index, &threshold);
+       if (err)
+               return err;
+
+       hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+       if (!hdr)
+               return -EMSGSIZE;
+
+       if (devlink_nl_put_handle(msg, devlink))
+               goto nla_put_failure;
+       if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+               goto nla_put_failure;
+       if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+               goto nla_put_failure;
+       if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+               goto nla_put_failure;
+       if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb,
+                                               struct genl_info *info)
+{
+       struct devlink_port *devlink_port = info->user_ptr[0];
+       struct devlink *devlink = devlink_port->devlink;
+       struct devlink_sb *devlink_sb = info->user_ptr[1];
+       struct sk_buff *msg;
+       u16 pool_index;
+       int err;
+
+       err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+                                                 &pool_index);
+       if (err)
+               return err;
+
+       if (!devlink->ops || !devlink->ops->sb_port_pool_get)
+               return -EOPNOTSUPP;
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       err = devlink_nl_sb_port_pool_fill(msg, devlink, devlink_port,
+                                          devlink_sb, pool_index,
+                                          DEVLINK_CMD_SB_PORT_POOL_NEW,
+                                          info->snd_portid, info->snd_seq, 0);
+       if (err) {
+               nlmsg_free(msg);
+               return err;
+       }
+
+       return genlmsg_reply(msg, info);
+}
+
+static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
+                                    struct devlink *devlink,
+                                    struct devlink_sb *devlink_sb,
+                                    u32 portid, u32 seq)
+{
+       struct devlink_port *devlink_port;
+       u16 pool_count = devlink_sb_pool_count(devlink_sb);
+       u16 pool_index;
+       int err;
+
+       list_for_each_entry(devlink_port, &devlink->port_list, list) {
+               for (pool_index = 0; pool_index < pool_count; pool_index++) {
+                       if (*p_idx < start) {
+                               (*p_idx)++;
+                               continue;
+                       }
+                       err = devlink_nl_sb_port_pool_fill(msg, devlink,
+                                                          devlink_port,
+                                                          devlink_sb,
+                                                          pool_index,
+                                                          DEVLINK_CMD_SB_PORT_POOL_NEW,
+                                                          portid, seq,
+                                                          NLM_F_MULTI);
+                       if (err)
+                               return err;
+                       (*p_idx)++;
+               }
+       }
+       return 0;
+}
+
+static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
+                                                 struct netlink_callback *cb)
+{
+       struct devlink *devlink;
+       struct devlink_sb *devlink_sb;
+       int start = cb->args[0];
+       int idx = 0;
+       int err;
+
+       mutex_lock(&devlink_mutex);
+       mutex_lock(&devlink_port_mutex);
+       list_for_each_entry(devlink, &devlink_list, list) {
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
+                   !devlink->ops || !devlink->ops->sb_port_pool_get)
+                       continue;
+               list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+                       err = __sb_port_pool_get_dumpit(msg, start, &idx,
+                                                       devlink, devlink_sb,
+                                                       NETLINK_CB(cb->skb).portid,
+                                                       cb->nlh->nlmsg_seq);
+                       if (err && err != -EOPNOTSUPP)
+                               goto out;
+               }
+       }
+out:
+       mutex_unlock(&devlink_port_mutex);
+       mutex_unlock(&devlink_mutex);
+
+       cb->args[0] = idx;
+       return msg->len;
+}
+
+static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
+                                   unsigned int sb_index, u16 pool_index,
+                                   u32 threshold)
+
+{
+       const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+       if (ops && ops->sb_port_pool_set)
+               return ops->sb_port_pool_set(devlink_port, sb_index,
+                                            pool_index, threshold);
+       return -EOPNOTSUPP;
+}
+
+static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
+                                               struct genl_info *info)
+{
+       struct devlink_port *devlink_port = info->user_ptr[0];
+       struct devlink_sb *devlink_sb = info->user_ptr[1];
+       u16 pool_index;
+       u32 threshold;
+       int err;
+
+       err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+                                                 &pool_index);
+       if (err)
+               return err;
+
+       if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD])
+               return -EINVAL;
+
+       threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
+       return devlink_sb_port_pool_set(devlink_port, devlink_sb->index,
+                                       pool_index, threshold);
+}
+
+static int
+devlink_nl_sb_tc_pool_bind_fill(struct sk_buff *msg, struct devlink *devlink,
+                               struct devlink_port *devlink_port,
+                               struct devlink_sb *devlink_sb, u16 tc_index,
+                               enum devlink_sb_pool_type pool_type,
+                               enum devlink_command cmd,
+                               u32 portid, u32 seq, int flags)
+{
+       u16 pool_index;
+       u32 threshold;
+       void *hdr;
+       int err;
+
+       err = devlink->ops->sb_tc_pool_bind_get(devlink_port, devlink_sb->index,
+                                               tc_index, pool_type,
+                                               &pool_index, &threshold);
+       if (err)
+               return err;
+
+       hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+       if (!hdr)
+               return -EMSGSIZE;
+
+       if (devlink_nl_put_handle(msg, devlink))
+               goto nla_put_failure;
+       if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+               goto nla_put_failure;
+       if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+               goto nla_put_failure;
+       if (nla_put_u16(msg, DEVLINK_ATTR_SB_TC_INDEX, tc_index))
+               goto nla_put_failure;
+       if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_type))
+               goto nla_put_failure;
+       if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+               goto nla_put_failure;
+       if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
+                                                  struct genl_info *info)
+{
+       struct devlink_port *devlink_port = info->user_ptr[0];
+       struct devlink *devlink = devlink_port->devlink;
+       struct devlink_sb *devlink_sb = info->user_ptr[1];
+       struct sk_buff *msg;
+       enum devlink_sb_pool_type pool_type;
+       u16 tc_index;
+       int err;
+
+       err = devlink_sb_pool_type_get_from_info(info, &pool_type);
+       if (err)
+               return err;
+
+       err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
+                                               pool_type, &tc_index);
+       if (err)
+               return err;
+
+       if (!devlink->ops || !devlink->ops->sb_tc_pool_bind_get)
+               return -EOPNOTSUPP;
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, devlink_port,
+                                             devlink_sb, tc_index, pool_type,
+                                             DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+                                             info->snd_portid,
+                                             info->snd_seq, 0);
+       if (err) {
+               nlmsg_free(msg);
+               return err;
+       }
+
+       return genlmsg_reply(msg, info);
+}
+
+static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
+                                       int start, int *p_idx,
+                                       struct devlink *devlink,
+                                       struct devlink_sb *devlink_sb,
+                                       u32 portid, u32 seq)
+{
+       struct devlink_port *devlink_port;
+       u16 tc_index;
+       int err;
+
+       list_for_each_entry(devlink_port, &devlink->port_list, list) {
+               for (tc_index = 0;
+                    tc_index < devlink_sb->ingress_tc_count; tc_index++) {
+                       if (*p_idx < start) {
+                               (*p_idx)++;
+                               continue;
+                       }
+                       err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
+                                                             devlink_port,
+                                                             devlink_sb,
+                                                             tc_index,
+                                                             DEVLINK_SB_POOL_TYPE_INGRESS,
+                                                             DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+                                                             portid, seq,
+                                                             NLM_F_MULTI);
+                       if (err)
+                               return err;
+                       (*p_idx)++;
+               }
+               for (tc_index = 0;
+                    tc_index < devlink_sb->egress_tc_count; tc_index++) {
+                       if (*p_idx < start) {
+                               (*p_idx)++;
+                               continue;
+                       }
+                       err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
+                                                             devlink_port,
+                                                             devlink_sb,
+                                                             tc_index,
+                                                             DEVLINK_SB_POOL_TYPE_EGRESS,
+                                                             DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+                                                             portid, seq,
+                                                             NLM_F_MULTI);
+                       if (err)
+                               return err;
+                       (*p_idx)++;
+               }
+       }
+       return 0;
+}
+
+static int
+devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
+                                         struct netlink_callback *cb)
+{
+       struct devlink *devlink;
+       struct devlink_sb *devlink_sb;
+       int start = cb->args[0];
+       int idx = 0;
+       int err;
+
+       mutex_lock(&devlink_mutex);
+       mutex_lock(&devlink_port_mutex);
+       list_for_each_entry(devlink, &devlink_list, list) {
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
+                   !devlink->ops || !devlink->ops->sb_tc_pool_bind_get)
+                       continue;
+               list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+                       err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx,
+                                                          devlink,
+                                                          devlink_sb,
+                                                          NETLINK_CB(cb->skb).portid,
+                                                          cb->nlh->nlmsg_seq);
+                       if (err && err != -EOPNOTSUPP)
+                               goto out;
+               }
+       }
+out:
+       mutex_unlock(&devlink_port_mutex);
+       mutex_unlock(&devlink_mutex);
+
+       cb->args[0] = idx;
+       return msg->len;
+}
+
+static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
+                                      unsigned int sb_index, u16 tc_index,
+                                      enum devlink_sb_pool_type pool_type,
+                                      u16 pool_index, u32 threshold)
+
+{
+       const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+       if (ops && ops->sb_tc_pool_bind_set)
+               return ops->sb_tc_pool_bind_set(devlink_port, sb_index,
+                                               tc_index, pool_type,
+                                               pool_index, threshold);
+       return -EOPNOTSUPP;
+}
+
+static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
+                                                  struct genl_info *info)
+{
+       struct devlink_port *devlink_port = info->user_ptr[0];
+       struct devlink_sb *devlink_sb = info->user_ptr[1];
+       enum devlink_sb_pool_type pool_type;
+       u16 tc_index;
+       u16 pool_index;
+       u32 threshold;
+       int err;
+
+       err = devlink_sb_pool_type_get_from_info(info, &pool_type);
+       if (err)
+               return err;
+
+       err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
+                                               pool_type, &tc_index);
+       if (err)
+               return err;
+
+       err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+                                                 &pool_index);
+       if (err)
+               return err;
+
+       if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD])
+               return -EINVAL;
+
+       threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
+       return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index,
+                                          tc_index, pool_type,
+                                          pool_index, threshold);
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
        [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
        [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
        [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 },
        [DEVLINK_ATTR_PORT_TYPE] = { .type = NLA_U16 },
        [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 },
+       [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 },
+       [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 },
+       [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 },
+       [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 },
+       [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 },
+       [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
+       [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -545,6 +1379,66 @@ static const struct genl_ops devlink_nl_ops[] = {
                .flags = GENL_ADMIN_PERM,
                .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
        },
+       {
+               .cmd = DEVLINK_CMD_SB_GET,
+               .doit = devlink_nl_cmd_sb_get_doit,
+               .dumpit = devlink_nl_cmd_sb_get_dumpit,
+               .policy = devlink_nl_policy,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+               /* can be retrieved by unprivileged users */
+       },
+       {
+               .cmd = DEVLINK_CMD_SB_POOL_GET,
+               .doit = devlink_nl_cmd_sb_pool_get_doit,
+               .dumpit = devlink_nl_cmd_sb_pool_get_dumpit,
+               .policy = devlink_nl_policy,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+               /* can be retrieved by unprivileged users */
+       },
+       {
+               .cmd = DEVLINK_CMD_SB_POOL_SET,
+               .doit = devlink_nl_cmd_sb_pool_set_doit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+       },
+       {
+               .cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
+               .doit = devlink_nl_cmd_sb_port_pool_get_doit,
+               .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit,
+               .policy = devlink_nl_policy,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+               /* can be retrieved by unprivileged users */
+       },
+       {
+               .cmd = DEVLINK_CMD_SB_PORT_POOL_SET,
+               .doit = devlink_nl_cmd_sb_port_pool_set_doit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+       },
+       {
+               .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
+               .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit,
+               .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit,
+               .policy = devlink_nl_policy,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+               /* can be retrieved by unprivileged users */
+       },
+       {
+               .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET,
+               .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+       },
 };
 
 /**
@@ -566,6 +1460,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
        devlink->ops = ops;
        devlink_net_set(devlink, &init_net);
        INIT_LIST_HEAD(&devlink->port_list);
+       INIT_LIST_HEAD(&devlink->sb_list);
        return devlink;
 }
 EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -721,6 +1616,51 @@ void devlink_port_split_set(struct devlink_port *devlink_port,
 }
 EXPORT_SYMBOL_GPL(devlink_port_split_set);
 
+int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
+                       u32 size, u16 ingress_pools_count,
+                       u16 egress_pools_count, u16 ingress_tc_count,
+                       u16 egress_tc_count)
+{
+       struct devlink_sb *devlink_sb;
+       int err = 0;
+
+       mutex_lock(&devlink_mutex);
+       if (devlink_sb_index_exists(devlink, sb_index)) {
+               err = -EEXIST;
+               goto unlock;
+       }
+
+       devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL);
+       if (!devlink_sb) {
+               err = -ENOMEM;
+               goto unlock;
+       }
+       devlink_sb->index = sb_index;
+       devlink_sb->size = size;
+       devlink_sb->ingress_pools_count = ingress_pools_count;
+       devlink_sb->egress_pools_count = egress_pools_count;
+       devlink_sb->ingress_tc_count = ingress_tc_count;
+       devlink_sb->egress_tc_count = egress_tc_count;
+       list_add_tail(&devlink_sb->list, &devlink->sb_list);
+unlock:
+       mutex_unlock(&devlink_mutex);
+       return err;
+}
+EXPORT_SYMBOL_GPL(devlink_sb_register);
+
+void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
+{
+       struct devlink_sb *devlink_sb;
+
+       mutex_lock(&devlink_mutex);
+       devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
+       WARN_ON(!devlink_sb);
+       list_del(&devlink_sb->list);
+       mutex_unlock(&devlink_mutex);
+       kfree(devlink_sb);
+}
+EXPORT_SYMBOL_GPL(devlink_sb_unregister);
+
 static int __init devlink_module_init(void)
 {
        return genl_register_family_with_ops_groups(&devlink_nl_family,