IB/core: Make device counter infrastructure dynamic
authorChristoph Lameter <cl@linux.com>
Mon, 16 May 2016 17:49:33 +0000 (12:49 -0500)
committerDoug Ledford <dledford@redhat.com>
Thu, 26 May 2016 16:52:51 +0000 (12:52 -0400)
In practice, each RDMA device has a unique set of counters that the
hardware implements.  Having a central set of counters that they must
all adhere to is limiting and causes many useful counters to not be
available.

Therefore we create a dynamic counter registration infrastructure.

The driver must implement a stats structure allocation routine, in
which the driver must place the directory name it wants, a list of
names for all of the counters, an array of u64 counters themselves,
plus a few generic configuration options.

We then implement a core routine to create a sysfs file for each
of the named stats elements, and a core routine to retrieve the
stats when any of the sysfs attribute files are read.

To avoid excessive beating on the stats generation routine in the
drivers, the core code also caches the stats for a short period of
time so that someone attempting to read all of the stats in a
given device's directory will not result in a stats generation
call per file read.

Future work will attempt to standardize just the shared stats
elements, and possibly add a method to get the stats via netlink
in addition to sysfs.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
[ Add caching, make structure names more informative, add i40iw support,
  other significant rewrites from the original patch ]

Documentation/infiniband/sysfs.txt
drivers/infiniband/core/sysfs.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb4/provider.c
drivers/infiniband/hw/i40iw/i40iw_verbs.c
include/rdma/ib_verbs.h

index 3ecf0c3a133fb6a50cd40b42d0d8b87a2d1c99b9..45bcafe6ff8af9b447c6ac2e4e0bde925d6bb7c9 100644 (file)
@@ -56,6 +56,18 @@ SYSFS FILES
   ports/1/pkeys/10 contains the value at index 10 in port 1's P_Key
   table.
 
+  There is an optional "hw_counters" subdirectory that may be under either
+  the parent device or the port subdirectories or both.  If present,
+  there are a list of counters provided by the hardware.  They may match
+  some of the counters in the counters directory, but they often include
+  many other counters.  In addition to the various counters, there will
+  be a file named "lifespan" that configures how frequently the core
+  should update the counters when they are being accessed (counters are
+  not updated if they are not being accessed).  The lifespan is in milli-
+  seconds and defaults to 10 unless set to something else by the driver.
+  Users may echo a value between 0 - 10000 to the lifespan file to set
+  the length of time between updates in milliseconds.
+
 MTHCA
 
   The Mellanox HCA driver also creates the files:
index 14606afbfaa8d6c865d28a545c292146ffc1b4bc..5e573bb18660d68f9e5c051ce3c7859ce08dea03 100644 (file)
@@ -56,8 +56,10 @@ struct ib_port {
        struct gid_attr_group *gid_attr_group;
        struct attribute_group gid_group;
        struct attribute_group pkey_group;
-       u8                     port_num;
        struct attribute_group *pma_table;
+       struct attribute_group *hw_stats_ag;
+       struct rdma_hw_stats   *hw_stats;
+       u8                     port_num;
 };
 
 struct port_attribute {
@@ -80,6 +82,18 @@ struct port_table_attribute {
        __be16                  attr_id;
 };
 
+struct hw_stats_attribute {
+       struct attribute        attr;
+       ssize_t                 (*show)(struct kobject *kobj,
+                                       struct attribute *attr, char *buf);
+       ssize_t                 (*store)(struct kobject *kobj,
+                                        struct attribute *attr,
+                                        const char *buf,
+                                        size_t count);
+       int                     index;
+       u8                      port_num;
+};
+
 static ssize_t port_attr_show(struct kobject *kobj,
                              struct attribute *attr, char *buf)
 {
@@ -733,6 +747,212 @@ static struct attribute_group *get_counter_table(struct ib_device *dev,
        return &pma_group;
 }
 
+static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats,
+                          u8 port_num, int index)
+{
+       int ret;
+
+       if (time_is_after_eq_jiffies(stats->timestamp + stats->lifespan))
+               return 0;
+       ret = dev->get_hw_stats(dev, stats, port_num, index);
+       if (ret < 0)
+               return ret;
+       if (ret == stats->num_counters)
+               stats->timestamp = jiffies;
+
+       return 0;
+}
+
+static ssize_t print_hw_stat(struct rdma_hw_stats *stats, int index, char *buf)
+{
+       return sprintf(buf, "%llu\n", stats->value[index]);
+}
+
+static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr,
+                            char *buf)
+{
+       struct ib_device *dev;
+       struct ib_port *port;
+       struct hw_stats_attribute *hsa;
+       struct rdma_hw_stats *stats;
+       int ret;
+
+       hsa = container_of(attr, struct hw_stats_attribute, attr);
+       if (!hsa->port_num) {
+               dev = container_of((struct device *)kobj,
+                                  struct ib_device, dev);
+               stats = dev->hw_stats;
+       } else {
+               port = container_of(kobj, struct ib_port, kobj);
+               dev = port->ibdev;
+               stats = port->hw_stats;
+       }
+       ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index);
+       if (ret)
+               return ret;
+       return print_hw_stat(stats, hsa->index, buf);
+}
+
+static ssize_t show_stats_lifespan(struct kobject *kobj,
+                                  struct attribute *attr,
+                                  char *buf)
+{
+       struct hw_stats_attribute *hsa;
+       int msecs;
+
+       hsa = container_of(attr, struct hw_stats_attribute, attr);
+       if (!hsa->port_num) {
+               struct ib_device *dev = container_of((struct device *)kobj,
+                                                    struct ib_device, dev);
+               msecs = jiffies_to_msecs(dev->hw_stats->lifespan);
+       } else {
+               struct ib_port *p = container_of(kobj, struct ib_port, kobj);
+               msecs = jiffies_to_msecs(p->hw_stats->lifespan);
+       }
+       return sprintf(buf, "%d\n", msecs);
+}
+
+static ssize_t set_stats_lifespan(struct kobject *kobj,
+                                 struct attribute *attr,
+                                 const char *buf, size_t count)
+{
+       struct hw_stats_attribute *hsa;
+       int msecs;
+       int jiffies;
+       int ret;
+
+       ret = kstrtoint(buf, 10, &msecs);
+       if (ret)
+               return ret;
+       if (msecs < 0 || msecs > 10000)
+               return -EINVAL;
+       jiffies = msecs_to_jiffies(msecs);
+       hsa = container_of(attr, struct hw_stats_attribute, attr);
+       if (!hsa->port_num) {
+               struct ib_device *dev = container_of((struct device *)kobj,
+                                                    struct ib_device, dev);
+               dev->hw_stats->lifespan = jiffies;
+       } else {
+               struct ib_port *p = container_of(kobj, struct ib_port, kobj);
+               p->hw_stats->lifespan = jiffies;
+       }
+       return count;
+}
+
+static void free_hsag(struct kobject *kobj, struct attribute_group *attr_group)
+{
+       struct attribute **attr;
+
+       sysfs_remove_group(kobj, attr_group);
+
+       for (attr = attr_group->attrs; *attr; attr++)
+               kfree(*attr);
+       kfree(attr_group);
+}
+
+static struct attribute *alloc_hsa(int index, u8 port_num, const char *name)
+{
+       struct hw_stats_attribute *hsa;
+
+       hsa = kmalloc(sizeof(*hsa), GFP_KERNEL);
+       if (!hsa)
+               return NULL;
+
+       hsa->attr.name = (char *)name;
+       hsa->attr.mode = S_IRUGO;
+       hsa->show = show_hw_stats;
+       hsa->store = NULL;
+       hsa->index = index;
+       hsa->port_num = port_num;
+
+       return &hsa->attr;
+}
+
+static struct attribute *alloc_hsa_lifespan(char *name, u8 port_num)
+{
+       struct hw_stats_attribute *hsa;
+
+       hsa = kmalloc(sizeof(*hsa), GFP_KERNEL);
+       if (!hsa)
+               return NULL;
+
+       hsa->attr.name = name;
+       hsa->attr.mode = S_IWUSR | S_IRUGO;
+       hsa->show = show_stats_lifespan;
+       hsa->store = set_stats_lifespan;
+       hsa->index = 0;
+       hsa->port_num = port_num;
+
+       return &hsa->attr;
+}
+
+static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
+                          u8 port_num)
+{
+       struct attribute_group *hsag = NULL;
+       struct rdma_hw_stats *stats;
+       int i = 0, ret;
+
+       stats = device->alloc_hw_stats(device, port_num);
+
+       if (!stats)
+               return;
+
+       if (!stats->names || stats->num_counters <= 0)
+               goto err;
+
+       hsag = kzalloc(sizeof(*hsag) +
+                      // 1 extra for the lifespan config entry
+                      sizeof(void *) * (stats->num_counters + 1),
+                      GFP_KERNEL);
+       if (!hsag)
+               return;
+
+       ret = device->get_hw_stats(device, stats, port_num,
+                                  stats->num_counters);
+       if (ret != stats->num_counters)
+               goto err;
+
+       stats->timestamp = jiffies;
+
+       hsag->name = "hw_counters";
+       hsag->attrs = (void *)hsag + sizeof(*hsag);
+
+       for (i = 0; i < stats->num_counters; i++) {
+               hsag->attrs[i] = alloc_hsa(i, port_num, stats->names[i]);
+               if (!hsag->attrs[i])
+                       goto err;
+       }
+
+       /* treat an error here as non-fatal */
+       hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num);
+
+       if (port) {
+               struct kobject *kobj = &port->kobj;
+               ret = sysfs_create_group(kobj, hsag);
+               if (ret)
+                       goto err;
+               port->hw_stats_ag = hsag;
+               port->hw_stats = stats;
+       } else {
+               struct kobject *kobj = &device->dev.kobj;
+               ret = sysfs_create_group(kobj, hsag);
+               if (ret)
+                       goto err;
+               device->hw_stats_ag = hsag;
+               device->hw_stats = stats;
+       }
+
+       return;
+
+err:
+       kfree(stats);
+       for (; i >= 0; i--)
+               kfree(hsag->attrs[i]);
+       kfree(hsag);
+       return;
+}
+
 static int add_port(struct ib_device *device, int port_num,
                    int (*port_callback)(struct ib_device *,
                                         u8, struct kobject *))
@@ -835,6 +1055,14 @@ static int add_port(struct ib_device *device, int port_num,
                        goto err_remove_pkey;
        }
 
+       /*
+        * If port == 0, it means we have only one port and the parent
+        * device, not this port device, should be the holder of the
+        * hw_counters
+        */
+       if (device->alloc_hw_stats && port_num)
+               setup_hw_stats(device, p, port_num);
+
        list_add_tail(&p->kobj.entry, &device->port_list);
 
        kobject_uevent(&p->kobj, KOBJ_ADD);
@@ -972,120 +1200,6 @@ static struct device_attribute *ib_class_attributes[] = {
        &dev_attr_node_desc
 };
 
-/* Show a given an attribute in the statistics group */
-static ssize_t show_protocol_stat(const struct device *device,
-                           struct device_attribute *attr, char *buf,
-                           unsigned offset)
-{
-       struct ib_device *dev = container_of(device, struct ib_device, dev);
-       union rdma_protocol_stats stats;
-       ssize_t ret;
-
-       ret = dev->get_protocol_stats(dev, &stats);
-       if (ret)
-               return ret;
-
-       return sprintf(buf, "%llu\n",
-                      (unsigned long long) ((u64 *) &stats)[offset]);
-}
-
-/* generate a read-only iwarp statistics attribute */
-#define IW_STATS_ENTRY(name)                                           \
-static ssize_t show_##name(struct device *device,                      \
-                          struct device_attribute *attr, char *buf)    \
-{                                                                      \
-       return show_protocol_stat(device, attr, buf,                    \
-                                 offsetof(struct iw_protocol_stats, name) / \
-                                 sizeof (u64));                        \
-}                                                                      \
-static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
-
-IW_STATS_ENTRY(ipInReceives);
-IW_STATS_ENTRY(ipInHdrErrors);
-IW_STATS_ENTRY(ipInTooBigErrors);
-IW_STATS_ENTRY(ipInNoRoutes);
-IW_STATS_ENTRY(ipInAddrErrors);
-IW_STATS_ENTRY(ipInUnknownProtos);
-IW_STATS_ENTRY(ipInTruncatedPkts);
-IW_STATS_ENTRY(ipInDiscards);
-IW_STATS_ENTRY(ipInDelivers);
-IW_STATS_ENTRY(ipOutForwDatagrams);
-IW_STATS_ENTRY(ipOutRequests);
-IW_STATS_ENTRY(ipOutDiscards);
-IW_STATS_ENTRY(ipOutNoRoutes);
-IW_STATS_ENTRY(ipReasmTimeout);
-IW_STATS_ENTRY(ipReasmReqds);
-IW_STATS_ENTRY(ipReasmOKs);
-IW_STATS_ENTRY(ipReasmFails);
-IW_STATS_ENTRY(ipFragOKs);
-IW_STATS_ENTRY(ipFragFails);
-IW_STATS_ENTRY(ipFragCreates);
-IW_STATS_ENTRY(ipInMcastPkts);
-IW_STATS_ENTRY(ipOutMcastPkts);
-IW_STATS_ENTRY(ipInBcastPkts);
-IW_STATS_ENTRY(ipOutBcastPkts);
-IW_STATS_ENTRY(tcpRtoAlgorithm);
-IW_STATS_ENTRY(tcpRtoMin);
-IW_STATS_ENTRY(tcpRtoMax);
-IW_STATS_ENTRY(tcpMaxConn);
-IW_STATS_ENTRY(tcpActiveOpens);
-IW_STATS_ENTRY(tcpPassiveOpens);
-IW_STATS_ENTRY(tcpAttemptFails);
-IW_STATS_ENTRY(tcpEstabResets);
-IW_STATS_ENTRY(tcpCurrEstab);
-IW_STATS_ENTRY(tcpInSegs);
-IW_STATS_ENTRY(tcpOutSegs);
-IW_STATS_ENTRY(tcpRetransSegs);
-IW_STATS_ENTRY(tcpInErrs);
-IW_STATS_ENTRY(tcpOutRsts);
-
-static struct attribute *iw_proto_stats_attrs[] = {
-       &dev_attr_ipInReceives.attr,
-       &dev_attr_ipInHdrErrors.attr,
-       &dev_attr_ipInTooBigErrors.attr,
-       &dev_attr_ipInNoRoutes.attr,
-       &dev_attr_ipInAddrErrors.attr,
-       &dev_attr_ipInUnknownProtos.attr,
-       &dev_attr_ipInTruncatedPkts.attr,
-       &dev_attr_ipInDiscards.attr,
-       &dev_attr_ipInDelivers.attr,
-       &dev_attr_ipOutForwDatagrams.attr,
-       &dev_attr_ipOutRequests.attr,
-       &dev_attr_ipOutDiscards.attr,
-       &dev_attr_ipOutNoRoutes.attr,
-       &dev_attr_ipReasmTimeout.attr,
-       &dev_attr_ipReasmReqds.attr,
-       &dev_attr_ipReasmOKs.attr,
-       &dev_attr_ipReasmFails.attr,
-       &dev_attr_ipFragOKs.attr,
-       &dev_attr_ipFragFails.attr,
-       &dev_attr_ipFragCreates.attr,
-       &dev_attr_ipInMcastPkts.attr,
-       &dev_attr_ipOutMcastPkts.attr,
-       &dev_attr_ipInBcastPkts.attr,
-       &dev_attr_ipOutBcastPkts.attr,
-       &dev_attr_tcpRtoAlgorithm.attr,
-       &dev_attr_tcpRtoMin.attr,
-       &dev_attr_tcpRtoMax.attr,
-       &dev_attr_tcpMaxConn.attr,
-       &dev_attr_tcpActiveOpens.attr,
-       &dev_attr_tcpPassiveOpens.attr,
-       &dev_attr_tcpAttemptFails.attr,
-       &dev_attr_tcpEstabResets.attr,
-       &dev_attr_tcpCurrEstab.attr,
-       &dev_attr_tcpInSegs.attr,
-       &dev_attr_tcpOutSegs.attr,
-       &dev_attr_tcpRetransSegs.attr,
-       &dev_attr_tcpInErrs.attr,
-       &dev_attr_tcpOutRsts.attr,
-       NULL
-};
-
-static struct attribute_group iw_stats_group = {
-       .name   = "proto_stats",
-       .attrs  = iw_proto_stats_attrs,
-};
-
 static void free_port_list_attributes(struct ib_device *device)
 {
        struct kobject *p, *t;
@@ -1093,6 +1207,10 @@ static void free_port_list_attributes(struct ib_device *device)
        list_for_each_entry_safe(p, t, &device->port_list, entry) {
                struct ib_port *port = container_of(p, struct ib_port, kobj);
                list_del(&p->entry);
+               if (port->hw_stats) {
+                       kfree(port->hw_stats);
+                       free_hsag(&port->kobj, port->hw_stats_ag);
+               }
                sysfs_remove_group(p, port->pma_table);
                sysfs_remove_group(p, &port->pkey_group);
                sysfs_remove_group(p, &port->gid_group);
@@ -1149,11 +1267,8 @@ int ib_device_register_sysfs(struct ib_device *device,
                }
        }
 
-       if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) {
-               ret = sysfs_create_group(&class_dev->kobj, &iw_stats_group);
-               if (ret)
-                       goto err_put;
-       }
+       if (device->alloc_hw_stats)
+               setup_hw_stats(device, NULL, 0);
 
        return 0;
 
@@ -1169,15 +1284,18 @@ err:
 
 void ib_device_unregister_sysfs(struct ib_device *device)
 {
-       /* Hold kobject until ib_dealloc_device() */
-       struct kobject *kobj_dev = kobject_get(&device->dev.kobj);
        int i;
 
-       if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats)
-               sysfs_remove_group(kobj_dev, &iw_stats_group);
+       /* Hold kobject until ib_dealloc_device() */
+       kobject_get(&device->dev.kobj);
 
        free_port_list_attributes(device);
 
+       if (device->hw_stats) {
+               kfree(device->hw_stats);
+               free_hsag(&device->dev.kobj, device->hw_stats_ag);
+       }
+
        for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
                device_remove_file(&device->dev, ib_class_attributes[i]);
 
index 47cb927a0dd665bd0dfb0f85508650aa97aaa7b7..bb1a839d4d6d43af7c36f881c2be14145fa5d9d7 100644 (file)
@@ -1218,59 +1218,119 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
                       iwch_dev->rdev.rnic_info.pdev->device);
 }
 
-static int iwch_get_mib(struct ib_device *ibdev,
-                       union rdma_protocol_stats *stats)
+enum counters {
+       IPINRECEIVES,
+       IPINHDRERRORS,
+       IPINADDRERRORS,
+       IPINUNKNOWNPROTOS,
+       IPINDISCARDS,
+       IPINDELIVERS,
+       IPOUTREQUESTS,
+       IPOUTDISCARDS,
+       IPOUTNOROUTES,
+       IPREASMTIMEOUT,
+       IPREASMREQDS,
+       IPREASMOKS,
+       IPREASMFAILS,
+       TCPACTIVEOPENS,
+       TCPPASSIVEOPENS,
+       TCPATTEMPTFAILS,
+       TCPESTABRESETS,
+       TCPCURRESTAB,
+       TCPINSEGS,
+       TCPOUTSEGS,
+       TCPRETRANSSEGS,
+       TCPINERRS,
+       TCPOUTRSTS,
+       TCPRTOMIN,
+       TCPRTOMAX,
+       NR_COUNTERS
+};
+
+static const char * const names[] = {
+       [IPINRECEIVES] = "ipInReceives",
+       [IPINHDRERRORS] = "ipInHdrErrors",
+       [IPINADDRERRORS] = "ipInAddrErrors",
+       [IPINUNKNOWNPROTOS] = "ipInUnknownProtos",
+       [IPINDISCARDS] = "ipInDiscards",
+       [IPINDELIVERS] = "ipInDelivers",
+       [IPOUTREQUESTS] = "ipOutRequests",
+       [IPOUTDISCARDS] = "ipOutDiscards",
+       [IPOUTNOROUTES] = "ipOutNoRoutes",
+       [IPREASMTIMEOUT] = "ipReasmTimeout",
+       [IPREASMREQDS] = "ipReasmReqds",
+       [IPREASMOKS] = "ipReasmOKs",
+       [IPREASMFAILS] = "ipReasmFails",
+       [TCPACTIVEOPENS] = "tcpActiveOpens",
+       [TCPPASSIVEOPENS] = "tcpPassiveOpens",
+       [TCPATTEMPTFAILS] = "tcpAttemptFails",
+       [TCPESTABRESETS] = "tcpEstabResets",
+       [TCPCURRESTAB] = "tcpCurrEstab",
+       [TCPINSEGS] = "tcpInSegs",
+       [TCPOUTSEGS] = "tcpOutSegs",
+       [TCPRETRANSSEGS] = "tcpRetransSegs",
+       [TCPINERRS] = "tcpInErrs",
+       [TCPOUTRSTS] = "tcpOutRsts",
+       [TCPRTOMIN] = "tcpRtoMin",
+       [TCPRTOMAX] = "tcpRtoMax",
+};
+
+static struct rdma_hw_stats *iwch_alloc_stats(struct ib_device *ibdev,
+                                             u8 port_num)
+{
+       BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
+
+       /* Our driver only supports device level stats */
+       if (port_num != 0)
+               return NULL;
+
+       return rdma_alloc_hw_stats_struct(names, NR_COUNTERS,
+                                         RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+                       u8 port, int index)
 {
        struct iwch_dev *dev;
        struct tp_mib_stats m;
        int ret;
 
+       if (port != 0 || !stats)
+               return -ENOSYS;
+
        PDBG("%s ibdev %p\n", __func__, ibdev);
        dev = to_iwch_dev(ibdev);
        ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m);
        if (ret)
                return -ENOSYS;
 
-       memset(stats, 0, sizeof *stats);
-       stats->iw.ipInReceives = ((u64) m.ipInReceive_hi << 32) +
-                               m.ipInReceive_lo;
-       stats->iw.ipInHdrErrors = ((u64) m.ipInHdrErrors_hi << 32) +
-                                 m.ipInHdrErrors_lo;
-       stats->iw.ipInAddrErrors = ((u64) m.ipInAddrErrors_hi << 32) +
-                                  m.ipInAddrErrors_lo;
-       stats->iw.ipInUnknownProtos = ((u64) m.ipInUnknownProtos_hi << 32) +
-                                     m.ipInUnknownProtos_lo;
-       stats->iw.ipInDiscards = ((u64) m.ipInDiscards_hi << 32) +
-                                m.ipInDiscards_lo;
-       stats->iw.ipInDelivers = ((u64) m.ipInDelivers_hi << 32) +
-                                m.ipInDelivers_lo;
-       stats->iw.ipOutRequests = ((u64) m.ipOutRequests_hi << 32) +
-                                 m.ipOutRequests_lo;
-       stats->iw.ipOutDiscards = ((u64) m.ipOutDiscards_hi << 32) +
-                                 m.ipOutDiscards_lo;
-       stats->iw.ipOutNoRoutes = ((u64) m.ipOutNoRoutes_hi << 32) +
-                                 m.ipOutNoRoutes_lo;
-       stats->iw.ipReasmTimeout = (u64) m.ipReasmTimeout;
-       stats->iw.ipReasmReqds = (u64) m.ipReasmReqds;
-       stats->iw.ipReasmOKs = (u64) m.ipReasmOKs;
-       stats->iw.ipReasmFails = (u64) m.ipReasmFails;
-       stats->iw.tcpActiveOpens = (u64) m.tcpActiveOpens;
-       stats->iw.tcpPassiveOpens = (u64) m.tcpPassiveOpens;
-       stats->iw.tcpAttemptFails = (u64) m.tcpAttemptFails;
-       stats->iw.tcpEstabResets = (u64) m.tcpEstabResets;
-       stats->iw.tcpOutRsts = (u64) m.tcpOutRsts;
-       stats->iw.tcpCurrEstab = (u64) m.tcpCurrEstab;
-       stats->iw.tcpInSegs = ((u64) m.tcpInSegs_hi << 32) +
-                             m.tcpInSegs_lo;
-       stats->iw.tcpOutSegs = ((u64) m.tcpOutSegs_hi << 32) +
-                              m.tcpOutSegs_lo;
-       stats->iw.tcpRetransSegs = ((u64) m.tcpRetransSeg_hi << 32) +
-                                 m.tcpRetransSeg_lo;
-       stats->iw.tcpInErrs = ((u64) m.tcpInErrs_hi << 32) +
-                             m.tcpInErrs_lo;
-       stats->iw.tcpRtoMin = (u64) m.tcpRtoMin;
-       stats->iw.tcpRtoMax = (u64) m.tcpRtoMax;
-       return 0;
+       stats->value[IPINRECEIVES] = ((u64)m.ipInReceive_hi << 32) +    m.ipInReceive_lo;
+       stats->value[IPINHDRERRORS] = ((u64)m.ipInHdrErrors_hi << 32) + m.ipInHdrErrors_lo;
+       stats->value[IPINADDRERRORS] = ((u64)m.ipInAddrErrors_hi << 32) + m.ipInAddrErrors_lo;
+       stats->value[IPINUNKNOWNPROTOS] = ((u64)m.ipInUnknownProtos_hi << 32) + m.ipInUnknownProtos_lo;
+       stats->value[IPINDISCARDS] = ((u64)m.ipInDiscards_hi << 32) + m.ipInDiscards_lo;
+       stats->value[IPINDELIVERS] = ((u64)m.ipInDelivers_hi << 32) + m.ipInDelivers_lo;
+       stats->value[IPOUTREQUESTS] = ((u64)m.ipOutRequests_hi << 32) + m.ipOutRequests_lo;
+       stats->value[IPOUTDISCARDS] = ((u64)m.ipOutDiscards_hi << 32) + m.ipOutDiscards_lo;
+       stats->value[IPOUTNOROUTES] = ((u64)m.ipOutNoRoutes_hi << 32) + m.ipOutNoRoutes_lo;
+       stats->value[IPREASMTIMEOUT] =  m.ipReasmTimeout;
+       stats->value[IPREASMREQDS] = m.ipReasmReqds;
+       stats->value[IPREASMOKS] = m.ipReasmOKs;
+       stats->value[IPREASMFAILS] = m.ipReasmFails;
+       stats->value[TCPACTIVEOPENS] =  m.tcpActiveOpens;
+       stats->value[TCPPASSIVEOPENS] = m.tcpPassiveOpens;
+       stats->value[TCPATTEMPTFAILS] = m.tcpAttemptFails;
+       stats->value[TCPESTABRESETS] = m.tcpEstabResets;
+       stats->value[TCPCURRESTAB] = m.tcpOutRsts;
+       stats->value[TCPINSEGS] = m.tcpCurrEstab;
+       stats->value[TCPOUTSEGS] = ((u64)m.tcpInSegs_hi << 32) + m.tcpInSegs_lo;
+       stats->value[TCPRETRANSSEGS] = ((u64)m.tcpOutSegs_hi << 32) + m.tcpOutSegs_lo;
+       stats->value[TCPINERRS] = ((u64)m.tcpRetransSeg_hi << 32) + m.tcpRetransSeg_lo,
+       stats->value[TCPOUTRSTS] = ((u64)m.tcpInErrs_hi << 32) + m.tcpInErrs_lo;
+       stats->value[TCPRTOMIN] = m.tcpRtoMin;
+       stats->value[TCPRTOMAX] = m.tcpRtoMax;
+
+       return stats->num_counters;
 }
 
 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
@@ -1373,7 +1433,8 @@ int iwch_register_device(struct iwch_dev *dev)
        dev->ibdev.req_notify_cq = iwch_arm_cq;
        dev->ibdev.post_send = iwch_post_send;
        dev->ibdev.post_recv = iwch_post_receive;
-       dev->ibdev.get_protocol_stats = iwch_get_mib;
+       dev->ibdev.alloc_hw_stats = iwch_alloc_stats;
+       dev->ibdev.get_hw_stats = iwch_get_mib;
        dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
        dev->ibdev.get_port_immutable = iwch_port_immutable;
 
index 7574f394fdac892856f9e7e7f98d3d112dc321b1..dd8a86b726d2a70cf5bbee75e89db437fa33eca7 100644 (file)
@@ -446,20 +446,59 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
                       c4iw_dev->rdev.lldi.pdev->device);
 }
 
+enum counters {
+       IP4INSEGS,
+       IP4OUTSEGS,
+       IP4RETRANSSEGS,
+       IP4OUTRSTS,
+       IP6INSEGS,
+       IP6OUTSEGS,
+       IP6RETRANSSEGS,
+       IP6OUTRSTS,
+       NR_COUNTERS
+};
+
+static const char * const names[] = {
+       [IP4INSEGS] = "ip4InSegs",
+       [IP4OUTSEGS] = "ip4OutSegs",
+       [IP4RETRANSSEGS] = "ip4RetransSegs",
+       [IP4OUTRSTS] = "ip4OutRsts",
+       [IP6INSEGS] = "ip6InSegs",
+       [IP6OUTSEGS] = "ip6OutSegs",
+       [IP6RETRANSSEGS] = "ip6RetransSegs",
+       [IP6OUTRSTS] = "ip6OutRsts"
+};
+
+static struct rdma_hw_stats *c4iw_alloc_stats(struct ib_device *ibdev,
+                                             u8 port_num)
+{
+       BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
+
+       if (port_num != 0)
+               return NULL;
+
+       return rdma_alloc_hw_stats_struct(names, NR_COUNTERS,
+                                         RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
 static int c4iw_get_mib(struct ib_device *ibdev,
-                       union rdma_protocol_stats *stats)
+                       struct rdma_hw_stats *stats,
+                       u8 port, int index)
 {
        struct tp_tcp_stats v4, v6;
        struct c4iw_dev *c4iw_dev = to_c4iw_dev(ibdev);
 
        cxgb4_get_tcp_stats(c4iw_dev->rdev.lldi.pdev, &v4, &v6);
-       memset(stats, 0, sizeof *stats);
-       stats->iw.tcpInSegs = v4.tcp_in_segs + v6.tcp_in_segs;
-       stats->iw.tcpOutSegs = v4.tcp_out_segs + v6.tcp_out_segs;
-       stats->iw.tcpRetransSegs = v4.tcp_retrans_segs + v6.tcp_retrans_segs;
-       stats->iw.tcpOutRsts = v4.tcp_out_rsts + v6.tcp_out_rsts;
-
-       return 0;
+       stats->value[IP4INSEGS] = v4.tcp_in_segs;
+       stats->value[IP4OUTSEGS] = v4.tcp_out_segs;
+       stats->value[IP4RETRANSSEGS] = v4.tcp_retrans_segs;
+       stats->value[IP4OUTRSTS] = v4.tcp_out_rsts;
+       stats->value[IP6INSEGS] = v6.tcp_in_segs;
+       stats->value[IP6OUTSEGS] = v6.tcp_out_segs;
+       stats->value[IP6RETRANSSEGS] = v6.tcp_retrans_segs;
+       stats->value[IP6OUTRSTS] = v6.tcp_out_rsts;
+
+       return stats->num_counters;
 }
 
 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
@@ -562,7 +601,8 @@ int c4iw_register_device(struct c4iw_dev *dev)
        dev->ibdev.req_notify_cq = c4iw_arm_cq;
        dev->ibdev.post_send = c4iw_post_send;
        dev->ibdev.post_recv = c4iw_post_receive;
-       dev->ibdev.get_protocol_stats = c4iw_get_mib;
+       dev->ibdev.alloc_hw_stats = c4iw_alloc_stats;
+       dev->ibdev.get_hw_stats = c4iw_get_mib;
        dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
        dev->ibdev.get_port_immutable = c4iw_port_immutable;
        dev->ibdev.drain_sq = c4iw_drain_sq;
index 4a740f7a0519bcac97d35b86844ae844727daeb4..02a735b64208c83f35e00e3dd29b4be1bba5b068 100644 (file)
@@ -2361,58 +2361,130 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
        return 0;
 }
 
+static const char * const i40iw_hw_stat_names[] = {
+       // 32bit names
+       [I40IW_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards",
+       [I40IW_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts",
+       [I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes",
+       [I40IW_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards",
+       [I40IW_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts",
+       [I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes",
+       [I40IW_HW_STAT_INDEX_TCPRTXSEG] = "tcpRetransSegs",
+       [I40IW_HW_STAT_INDEX_TCPRXOPTERR] = "tcpInOptErrors",
+       [I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = "tcpInProtoErrors",
+       // 64bit names
+       [I40IW_HW_STAT_INDEX_IP4RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip4InOctets",
+       [I40IW_HW_STAT_INDEX_IP4RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip4InPkts",
+       [I40IW_HW_STAT_INDEX_IP4RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip4InReasmRqd",
+       [I40IW_HW_STAT_INDEX_IP4RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip4InMcastPkts",
+       [I40IW_HW_STAT_INDEX_IP4TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip4OutOctets",
+       [I40IW_HW_STAT_INDEX_IP4TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip4OutPkts",
+       [I40IW_HW_STAT_INDEX_IP4TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip4OutSegRqd",
+       [I40IW_HW_STAT_INDEX_IP4TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip4OutMcastPkts",
+       [I40IW_HW_STAT_INDEX_IP6RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip6InOctets",
+       [I40IW_HW_STAT_INDEX_IP6RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip6InPkts",
+       [I40IW_HW_STAT_INDEX_IP6RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip6InReasmRqd",
+       [I40IW_HW_STAT_INDEX_IP6RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip6InMcastPkts",
+       [I40IW_HW_STAT_INDEX_IP6TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip6OutOctets",
+       [I40IW_HW_STAT_INDEX_IP6TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip6OutPkts",
+       [I40IW_HW_STAT_INDEX_IP6TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip6OutSegRqd",
+       [I40IW_HW_STAT_INDEX_IP6TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "ip6OutMcastPkts",
+       [I40IW_HW_STAT_INDEX_TCPRXSEGS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "tcpInSegs",
+       [I40IW_HW_STAT_INDEX_TCPTXSEG + I40IW_HW_STAT_INDEX_MAX_32] =
+               "tcpOutSegs",
+       [I40IW_HW_STAT_INDEX_RDMARXRDS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "iwInRdmaReads",
+       [I40IW_HW_STAT_INDEX_RDMARXSNDS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "iwInRdmaSends",
+       [I40IW_HW_STAT_INDEX_RDMARXWRS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "iwInRdmaWrites",
+       [I40IW_HW_STAT_INDEX_RDMATXRDS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "iwOutRdmaReads",
+       [I40IW_HW_STAT_INDEX_RDMATXSNDS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "iwOutRdmaSends",
+       [I40IW_HW_STAT_INDEX_RDMATXWRS + I40IW_HW_STAT_INDEX_MAX_32] =
+               "iwOutRdmaWrites",
+       [I40IW_HW_STAT_INDEX_RDMAVBND + I40IW_HW_STAT_INDEX_MAX_32] =
+               "iwRdmaBnd",
+       [I40IW_HW_STAT_INDEX_RDMAVINV + I40IW_HW_STAT_INDEX_MAX_32] =
+               "iwRdmaInv"
+};
+
 /**
- * i40iw_get_protocol_stats - Populates the rdma_stats structure
- * @ibdev: ib dev struct
- * @stats: iw protocol stats struct
+ * i40iw_alloc_hw_stats - Allocate a hw stats structure
+ * @ibdev: device pointer from stack
+ * @port_num: port number
  */
-static int i40iw_get_protocol_stats(struct ib_device *ibdev,
-                                   union rdma_protocol_stats *stats)
+static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev,
+                                                 u8 port_num)
+{
+       struct i40iw_device *iwdev = to_iwdev(ibdev);
+       struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+       int num_counters = I40IW_HW_STAT_INDEX_MAX_32 +
+               I40IW_HW_STAT_INDEX_MAX_64;
+       unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN;
+
+       BUILD_BUG_ON(ARRAY_SIZE(i40iw_hw_stat_names) !=
+                    (I40IW_HW_STAT_INDEX_MAX_32 +
+                     I40IW_HW_STAT_INDEX_MAX_64));
+
+       /*
+        * PFs get the default update lifespan, but VFs only update once
+        * per second
+        */
+       if (!dev->is_pf)
+               lifespan = 1000;
+       return rdma_alloc_hw_stats_struct(i40iw_hw_stat_names, num_counters,
+                                         lifespan);
+}
+
+/**
+ * i40iw_get_hw_stats - Populates the rdma_hw_stats structure
+ * @ibdev: device pointer from stack
+ * @stats: stats pointer from stack
+ * @port_num: port number
+ * @index: which hw counter the stack is requesting we update
+ */
+static int i40iw_get_hw_stats(struct ib_device *ibdev,
+                             struct rdma_hw_stats *stats,
+                             u8 port_num, int index)
 {
        struct i40iw_device *iwdev = to_iwdev(ibdev);
        struct i40iw_sc_dev *dev = &iwdev->sc_dev;
        struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
        struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
-       struct timespec curr_time;
-       static struct timespec last_rd_time = {0, 0};
        unsigned long flags;
 
-       curr_time = current_kernel_time();
-       memset(stats, 0, sizeof(*stats));
-
        if (dev->is_pf) {
                spin_lock_irqsave(&devstat->stats_lock, flags);
                devstat->ops.iw_hw_stat_read_all(devstat,
                        &devstat->hw_stats);
                spin_unlock_irqrestore(&devstat->stats_lock, flags);
        } else {
-               if (((u64)curr_time.tv_sec - (u64)last_rd_time.tv_sec) > 1)
-                       if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats))
-                               return -ENOSYS;
+               if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats))
+                       return -ENOSYS;
        }
 
-       stats->iw.ipInReceives = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] +
-                                hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXPKTS];
-       stats->iw.ipInTruncatedPkts = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] +
-                                     hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC];
-       stats->iw.ipInDiscards = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] +
-                                hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD];
-       stats->iw.ipOutNoRoutes = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] +
-                                 hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE];
-       stats->iw.ipReasmReqds = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] +
-                                hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS];
-       stats->iw.ipFragCreates = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] +
-                                 hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS];
-       stats->iw.ipInMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] +
-                                 hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS];
-       stats->iw.ipOutMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] +
-                                  hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXMCPKTS];
-       stats->iw.tcpOutSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPTXSEG];
-       stats->iw.tcpInSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPRXSEGS];
-       stats->iw.tcpRetransSegs = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_TCPRTXSEG];
-
-       last_rd_time = curr_time;
-       return 0;
+       memcpy(&stats->value[0], &hw_stats, sizeof(*hw_stats));
+
+       return stats->num_counters;
 }
 
 /**
@@ -2551,7 +2623,8 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
        iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr;
        iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr;
        iwibdev->ibdev.dereg_mr = i40iw_dereg_mr;
-       iwibdev->ibdev.get_protocol_stats = i40iw_get_protocol_stats;
+       iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats;
+       iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats;
        iwibdev->ibdev.query_device = i40iw_query_device;
        iwibdev->ibdev.create_ah = i40iw_create_ah;
        iwibdev->ibdev.destroy_ah = i40iw_destroy_ah;
index fc0320c004a306a9007a93d1bc6a6633f7fc971d..432bed510369e7b19a4773ec5a8bb09b8d9bb1e6 100644 (file)
@@ -403,56 +403,55 @@ enum ib_port_speed {
        IB_SPEED_EDR    = 32
 };
 
-struct ib_protocol_stats {
-       /* TBD... */
-};
-
-struct iw_protocol_stats {
-       u64     ipInReceives;
-       u64     ipInHdrErrors;
-       u64     ipInTooBigErrors;
-       u64     ipInNoRoutes;
-       u64     ipInAddrErrors;
-       u64     ipInUnknownProtos;
-       u64     ipInTruncatedPkts;
-       u64     ipInDiscards;
-       u64     ipInDelivers;
-       u64     ipOutForwDatagrams;
-       u64     ipOutRequests;
-       u64     ipOutDiscards;
-       u64     ipOutNoRoutes;
-       u64     ipReasmTimeout;
-       u64     ipReasmReqds;
-       u64     ipReasmOKs;
-       u64     ipReasmFails;
-       u64     ipFragOKs;
-       u64     ipFragFails;
-       u64     ipFragCreates;
-       u64     ipInMcastPkts;
-       u64     ipOutMcastPkts;
-       u64     ipInBcastPkts;
-       u64     ipOutBcastPkts;
-
-       u64     tcpRtoAlgorithm;
-       u64     tcpRtoMin;
-       u64     tcpRtoMax;
-       u64     tcpMaxConn;
-       u64     tcpActiveOpens;
-       u64     tcpPassiveOpens;
-       u64     tcpAttemptFails;
-       u64     tcpEstabResets;
-       u64     tcpCurrEstab;
-       u64     tcpInSegs;
-       u64     tcpOutSegs;
-       u64     tcpRetransSegs;
-       u64     tcpInErrs;
-       u64     tcpOutRsts;
-};
-
-union rdma_protocol_stats {
-       struct ib_protocol_stats        ib;
-       struct iw_protocol_stats        iw;
-};
+/**
+ * struct rdma_hw_stats
+ * @timestamp - Used by the core code to track when the last update was
+ * @lifespan - Used by the core code to determine how old the counters
+ *   should be before being updated again.  Stored in jiffies, defaults
+ *   to 10 milliseconds, drivers can override the default be specifying
+ *   their own value during their allocation routine.
+ * @name - Array of pointers to static names used for the counters in
+ *   directory.
+ * @num_counters - How many hardware counters there are.  If name is
+ *   shorter than this number, a kernel oops will result.  Driver authors
+ *   are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters)
+ *   in their code to prevent this.
+ * @value - Array of u64 counters that are accessed by the sysfs code and
+ *   filled in by the drivers get_stats routine
+ */
+struct rdma_hw_stats {
+       unsigned long   timestamp;
+       unsigned long   lifespan;
+       const char * const *names;
+       int             num_counters;
+       u64             value[];
+};
+
+#define RDMA_HW_STATS_DEFAULT_LIFESPAN 10
+/**
+ * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct
+ *   for drivers.
+ * @names - Array of static const char *
+ * @num_counters - How many elements in array
+ * @lifespan - How many milliseconds between updates
+ */
+static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
+               const char * const *names, int num_counters,
+               unsigned long lifespan)
+{
+       struct rdma_hw_stats *stats;
+
+       stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64),
+                       GFP_KERNEL);
+       if (!stats)
+               return NULL;
+       stats->names = names;
+       stats->num_counters = num_counters;
+       stats->lifespan = msecs_to_jiffies(lifespan);
+
+       return stats;
+}
+
 
 /* Define bits for the various functionality this port needs to be supported by
  * the core.
@@ -1707,8 +1706,29 @@ struct ib_device {
 
        struct iw_cm_verbs           *iwcm;
 
-       int                        (*get_protocol_stats)(struct ib_device *device,
-                                                        union rdma_protocol_stats *stats);
+       /**
+        * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
+        *   driver initialized data.  The struct is kfree()'ed by the sysfs
+        *   core when the device is removed.  A lifespan of -1 in the return
+        *   struct tells the core to set a default lifespan.
+        */
+       struct rdma_hw_stats      *(*alloc_hw_stats)(struct ib_device *device,
+                                                    u8 port_num);
+       /**
+        * get_hw_stats - Fill in the counter value(s) in the stats struct.
+        * @index - The index in the value array we wish to have updated, or
+        *   num_counters if we want all stats updated
+        * Return codes -
+        *   < 0 - Error, no counters updated
+        *   index - Updated the single counter pointed to by index
+        *   num_counters - Updated all counters (will reset the timestamp
+        *     and prevent further calls for lifespan milliseconds)
+        * Drivers are allowed to update all counters in leiu of just the
+        *   one given in index at their option
+        */
+       int                        (*get_hw_stats)(struct ib_device *device,
+                                                  struct rdma_hw_stats *stats,
+                                                  u8 port, int index);
        int                        (*query_device)(struct ib_device *device,
                                                   struct ib_device_attr *device_attr,
                                                   struct ib_udata *udata);
@@ -1926,6 +1946,8 @@ struct ib_device {
        u8                           node_type;
        u8                           phys_port_cnt;
        struct ib_device_attr        attrs;
+       struct attribute_group       *hw_stats_ag;
+       struct rdma_hw_stats         *hw_stats;
 
        /**
         * The following mandatory functions are used only at device