mlx4/mlx5: Use dma_wmb/rmb where appropriate
authorAlexander Duyck <alexander.h.duyck@redhat.com>
Thu, 9 Apr 2015 01:49:36 +0000 (18:49 -0700)
committerDavid S. Miller <davem@davemloft.net>
Thu, 9 Apr 2015 18:25:25 +0000 (14:25 -0400)
This patch should help to improve the performance of the mlx4 and mlx5 on a
number of architectures.  For example, on x86 the dma_wmb/rmb equates out
to a barrer() call as the architecture is already strong ordered, and on
PowerPC the call works out to a lwsync which is significantly less expensive
than the sync call that was being used for wmb.

I placed the new barriers between any spots that seemed to be trying to
order memory/memory reads or writes, if there are any spots that involved
MMIO I left the existing wmb in place as the new barriers cannot order
transactions between coherent and non-coherent memories.

v2: Reduced the replacments to just the spots where I could clearly
    identify the usage pattern.

Cc: Amir Vadai <amirv@mellanox.com>
Cc: Ido Shamay <idos@mellanox.com>
Cc: Eli Cohen <eli@mellanox.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/eq.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c

index 79b1501e7951fc18a97caf3c3f127a50c6c3e256..4fdd3c37e47bf7c7862b9edf569be6f7f38e8dae 100644 (file)
@@ -771,7 +771,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
                /*
                 * make sure we read the CQE after we read the ownership bit
                 */
-               rmb();
+               dma_rmb();
 
                /* Drop packet on bad receive or bad checksum */
                if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
index 55f9f5c5344e19a3d8d76b083e0fd73d44b9870b..1783705273d89773c0a462cb28684f2969e55ac4 100644 (file)
@@ -416,7 +416,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
                 * make sure we read the CQE after we read the
                 * ownership bit
                 */
-               rmb();
+               dma_rmb();
 
                if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
                             MLX4_CQE_OPCODE_ERROR)) {
@@ -667,7 +667,7 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
                                       skb_frag_size(&shinfo->frags[0]));
                }
 
-               wmb();
+               dma_wmb();
                inl->byte_count = cpu_to_be32(1 << 31 | (skb->len - spc));
        }
 }
@@ -804,7 +804,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 
                        data->addr = cpu_to_be64(dma);
                        data->lkey = ring->mr_key;
-                       wmb();
+                       dma_wmb();
                        data->byte_count = cpu_to_be32(byte_count);
                        --data;
                }
@@ -821,7 +821,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 
                        data->addr = cpu_to_be64(dma);
                        data->lkey = ring->mr_key;
-                       wmb();
+                       dma_wmb();
                        data->byte_count = cpu_to_be32(byte_count);
                }
                /* tx completion can avoid cache line miss for common cases */
@@ -938,7 +938,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
                /* Ensure new descriptor hits memory
                 * before setting ownership of this descriptor to HW
                 */
-               wmb();
+               dma_wmb();
                tx_desc->ctrl.owner_opcode = op_own;
 
                wmb();
@@ -958,7 +958,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
                /* Ensure new descriptor hits memory
                 * before setting ownership of this descriptor to HW
                 */
-               wmb();
+               dma_wmb();
                tx_desc->ctrl.owner_opcode = op_own;
                if (send_doorbell) {
                        wmb();
index 6e70ffee8e87ee2fa9957adeb5994ca346bf155b..190fd624bdfebd6e7b5b9e83f8470b840f8d09cb 100644 (file)
@@ -188,7 +188,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
        memcpy(s_eqe, eqe, dev->caps.eqe_size - 1);
        s_eqe->slave_id = slave;
        /* ensure all information is written before setting the ownersip bit */
-       wmb();
+       dma_wmb();
        s_eqe->owner = !!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE) ? 0x0 : 0x80;
        ++slave_eq->prod;
 
@@ -473,7 +473,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
                 * Make sure we read EQ entry contents after we've
                 * checked the ownership bit.
                 */
-               rmb();
+               dma_rmb();
 
                switch (eqe->type) {
                case MLX4_EVENT_TYPE_COMP:
index dbf190d9b9adb57c9108a5dcbea8dd80b6e25d5e..58800e4f39585c2fd30d76e8de4a21bfb6f8bf66 100644 (file)
@@ -208,7 +208,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
                 * Make sure we read EQ entry contents after we've
                 * checked the ownership bit.
                 */
-               rmb();
+               dma_rmb();
 
                mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
                              eq->eqn, eqe_type_str(eqe->type));