qed: Optimize qed_chain datapath usage
authorMintz, Yuval <Yuval.Mintz@cavium.com>
Tue, 29 Nov 2016 14:47:01 +0000 (16:47 +0200)
committerDavid S. Miller <davem@davemloft.net>
Wed, 30 Nov 2016 19:32:02 +0000 (14:32 -0500)
The chain structure and functions are widely used by the qed* modules,
both for configuration and datapath.
E.g., qede's Tx has one such chain and its Rx has two.

Currently, the strucutre's fields which are required for datapath
related functions [produce/consume] are intertwined with fields which
are required only for configuration purposes [init/destroy/etc.].

This patch re-arranges the chain structure so that all the fields which
are required for datapath usage could reside in a single cacheline instead
of the two which are required today.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/qlogic/qed/qed_dev.c
drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
include/linux/qed/qed_chain.h

index 5be7b8a25425733d26a7793ab42d10cf07958414..80162ee0391fe0284306adb25de20bdd9f5c4326 100644 (file)
@@ -2283,12 +2283,12 @@ static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
 {
        void **pp_virt_addr_tbl = p_chain->pbl.pp_virt_addr_tbl;
        u32 page_cnt = p_chain->page_cnt, i, pbl_size;
-       u8 *p_pbl_virt = p_chain->pbl.p_virt_table;
+       u8 *p_pbl_virt = p_chain->pbl_sp.p_virt_table;
 
        if (!pp_virt_addr_tbl)
                return;
 
-       if (!p_chain->pbl.p_virt_table)
+       if (!p_pbl_virt)
                goto out;
 
        for (i = 0; i < page_cnt; i++) {
@@ -2306,7 +2306,8 @@ static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
        pbl_size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE;
        dma_free_coherent(&cdev->pdev->dev,
                          pbl_size,
-                         p_chain->pbl.p_virt_table, p_chain->pbl.p_phys_table);
+                         p_chain->pbl_sp.p_virt_table,
+                         p_chain->pbl_sp.p_phys_table);
 out:
        vfree(p_chain->pbl.pp_virt_addr_tbl);
 }
index 2888eb0628f815446609aa035adf655d3d143720..d0a58282f2a8d3c93747fc892d6e011513d3e420 100644 (file)
@@ -347,11 +347,11 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 
        /* Place EQ address in RAMROD */
        DMA_REGPAIR_LE(p_ramrod->event_ring_pbl_addr,
-                      p_hwfn->p_eq->chain.pbl.p_phys_table);
+                      p_hwfn->p_eq->chain.pbl_sp.p_phys_table);
        page_cnt = (u8)qed_chain_get_page_cnt(&p_hwfn->p_eq->chain);
        p_ramrod->event_ring_num_pages = page_cnt;
        DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr,
-                      p_hwfn->p_consq->chain.pbl.p_phys_table);
+                      p_hwfn->p_consq->chain.pbl_sp.p_phys_table);
 
        qed_tunn_set_pf_start_params(p_hwfn, p_tunn, &p_ramrod->tunnel_config);
 
index 72d88cf3ca25b70fe160fa1d250803f78ee2a1ee..37dfba101c6cd8e5f05ca4fda6db321900dd9ccc 100644 (file)
@@ -56,23 +56,6 @@ struct qed_chain_pbl_u32 {
        u32 cons_page_idx;
 };
 
-struct qed_chain_pbl {
-       /* Base address of a pre-allocated buffer for pbl */
-       dma_addr_t      p_phys_table;
-       void            *p_virt_table;
-
-       /* Table for keeping the virtual addresses of the chain pages,
-        * respectively to the physical addresses in the pbl table.
-        */
-       void **pp_virt_addr_tbl;
-
-       /* Index to current used page by producer/consumer */
-       union {
-               struct qed_chain_pbl_u16 pbl16;
-               struct qed_chain_pbl_u32 pbl32;
-       } u;
-};
-
 struct qed_chain_u16 {
        /* Cyclic index of next element to produce/consme */
        u16 prod_idx;
@@ -86,46 +69,78 @@ struct qed_chain_u32 {
 };
 
 struct qed_chain {
-       void                    *p_virt_addr;
-       dma_addr_t              p_phys_addr;
-       void                    *p_prod_elem;
-       void                    *p_cons_elem;
+       /* fastpath portion of the chain - required for commands such
+        * as produce / consume.
+        */
+       /* Point to next element to produce/consume */
+       void *p_prod_elem;
+       void *p_cons_elem;
+
+       /* Fastpath portions of the PBL [if exists] */
+       struct {
+               /* Table for keeping the virtual addresses of the chain pages,
+                * respectively to the physical addresses in the pbl table.
+                */
+               void **pp_virt_addr_tbl;
 
-       enum qed_chain_mode     mode;
-       enum qed_chain_use_mode intended_use; /* used to produce/consume */
-       enum qed_chain_cnt_type cnt_type;
+               union {
+                       struct qed_chain_pbl_u16 u16;
+                       struct qed_chain_pbl_u32 u32;
+               } c;
+       } pbl;
 
        union {
                struct qed_chain_u16 chain16;
                struct qed_chain_u32 chain32;
        } u;
 
+       /* Capacity counts only usable elements */
+       u32 capacity;
        u32 page_cnt;
 
-       /* Number of elements - capacity is for usable elements only,
-        * while size will contain total number of elements [for entire chain].
+       enum qed_chain_mode mode;
+
+       /* Elements information for fast calculations */
+       u16 elem_per_page;
+       u16 elem_per_page_mask;
+       u16 elem_size;
+       u16 next_page_mask;
+       u16 usable_per_page;
+       u8 elem_unusable;
+
+       u8 cnt_type;
+
+       /* Slowpath of the chain - required for initialization and destruction,
+        * but isn't involved in regular functionality.
         */
-       u32 capacity;
+
+       /* Base address of a pre-allocated buffer for pbl */
+       struct {
+               dma_addr_t p_phys_table;
+               void *p_virt_table;
+       } pbl_sp;
+
+       /* Address of first page of the chain - the address is required
+        * for fastpath operation [consume/produce] but only for the the SINGLE
+        * flavour which isn't considered fastpath [== SPQ].
+        */
+       void *p_virt_addr;
+       dma_addr_t p_phys_addr;
+
+       /* Total number of elements [for entire chain] */
        u32 size;
 
-       /* Elements information for fast calculations */
-       u16                     elem_per_page;
-       u16                     elem_per_page_mask;
-       u16                     elem_unusable;
-       u16                     usable_per_page;
-       u16                     elem_size;
-       u16                     next_page_mask;
-       struct qed_chain_pbl    pbl;
+       u8 intended_use;
 };
 
 #define QED_CHAIN_PBL_ENTRY_SIZE        (8)
 #define QED_CHAIN_PAGE_SIZE             (0x1000)
 #define ELEMS_PER_PAGE(elem_size)       (QED_CHAIN_PAGE_SIZE / (elem_size))
 
-#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode)     \
-       ((mode == QED_CHAIN_MODE_NEXT_PTR) ?         \
-        (1 + ((sizeof(struct qed_chain_next) - 1) / \
-              (elem_size))) : 0)
+#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode)        \
+       (((mode) == QED_CHAIN_MODE_NEXT_PTR) ?           \
+        (u8)(1 + ((sizeof(struct qed_chain_next) - 1) / \
+                  (elem_size))) : 0)
 
 #define USABLE_ELEMS_PER_PAGE(elem_size, mode) \
        ((u32)(ELEMS_PER_PAGE(elem_size) -     \
@@ -186,7 +201,7 @@ static inline u16 qed_chain_get_usable_per_page(struct qed_chain *p_chain)
        return p_chain->usable_per_page;
 }
 
-static inline u16 qed_chain_get_unusable_per_page(struct qed_chain *p_chain)
+static inline u8 qed_chain_get_unusable_per_page(struct qed_chain *p_chain)
 {
        return p_chain->elem_unusable;
 }
@@ -198,7 +213,7 @@ static inline u32 qed_chain_get_page_cnt(struct qed_chain *p_chain)
 
 static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain)
 {
-       return p_chain->pbl.p_phys_table;
+       return p_chain->pbl_sp.p_phys_table;
 }
 
 /**
@@ -214,10 +229,10 @@ static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain)
 static inline void
 qed_chain_advance_page(struct qed_chain *p_chain,
                       void **p_next_elem, void *idx_to_inc, void *page_to_inc)
-
 {
        struct qed_chain_next *p_next = NULL;
        u32 page_index = 0;
+
        switch (p_chain->mode) {
        case QED_CHAIN_MODE_NEXT_PTR:
                p_next = *p_next_elem;
@@ -305,7 +320,7 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain)
                if ((p_chain->u.chain16.prod_idx &
                     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
                        p_prod_idx = &p_chain->u.chain16.prod_idx;
-                       p_prod_page_idx = &p_chain->pbl.u.pbl16.prod_page_idx;
+                       p_prod_page_idx = &p_chain->pbl.c.u16.prod_page_idx;
                        qed_chain_advance_page(p_chain, &p_chain->p_prod_elem,
                                               p_prod_idx, p_prod_page_idx);
                }
@@ -314,7 +329,7 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain)
                if ((p_chain->u.chain32.prod_idx &
                     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
                        p_prod_idx = &p_chain->u.chain32.prod_idx;
-                       p_prod_page_idx = &p_chain->pbl.u.pbl32.prod_page_idx;
+                       p_prod_page_idx = &p_chain->pbl.c.u32.prod_page_idx;
                        qed_chain_advance_page(p_chain, &p_chain->p_prod_elem,
                                               p_prod_idx, p_prod_page_idx);
                }
@@ -378,7 +393,7 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain)
                if ((p_chain->u.chain16.cons_idx &
                     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
                        p_cons_idx = &p_chain->u.chain16.cons_idx;
-                       p_cons_page_idx = &p_chain->pbl.u.pbl16.cons_page_idx;
+                       p_cons_page_idx = &p_chain->pbl.c.u16.cons_page_idx;
                        qed_chain_advance_page(p_chain, &p_chain->p_cons_elem,
                                               p_cons_idx, p_cons_page_idx);
                }
@@ -387,8 +402,8 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain)
                if ((p_chain->u.chain32.cons_idx &
                     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
                        p_cons_idx = &p_chain->u.chain32.cons_idx;
-                       p_cons_page_idx = &p_chain->pbl.u.pbl32.cons_page_idx;
-               qed_chain_advance_page(p_chain, &p_chain->p_cons_elem,
+                       p_cons_page_idx = &p_chain->pbl.c.u32.cons_page_idx;
+                       qed_chain_advance_page(p_chain, &p_chain->p_cons_elem,
                                               p_cons_idx, p_cons_page_idx);
                }
                p_chain->u.chain32.cons_idx++;
@@ -429,25 +444,26 @@ static inline void qed_chain_reset(struct qed_chain *p_chain)
                u32 reset_val = p_chain->page_cnt - 1;
 
                if (is_chain_u16(p_chain)) {
-                       p_chain->pbl.u.pbl16.prod_page_idx = (u16)reset_val;
-                       p_chain->pbl.u.pbl16.cons_page_idx = (u16)reset_val;
+                       p_chain->pbl.c.u16.prod_page_idx = (u16)reset_val;
+                       p_chain->pbl.c.u16.cons_page_idx = (u16)reset_val;
                } else {
-                       p_chain->pbl.u.pbl32.prod_page_idx = reset_val;
-                       p_chain->pbl.u.pbl32.cons_page_idx = reset_val;
+                       p_chain->pbl.c.u32.prod_page_idx = reset_val;
+                       p_chain->pbl.c.u32.cons_page_idx = reset_val;
                }
        }
 
        switch (p_chain->intended_use) {
-       case QED_CHAIN_USE_TO_CONSUME_PRODUCE:
-       case QED_CHAIN_USE_TO_PRODUCE:
-               /* Do nothing */
-               break;
-
        case QED_CHAIN_USE_TO_CONSUME:
                /* produce empty elements */
                for (i = 0; i < p_chain->capacity; i++)
                        qed_chain_recycle_consumed(p_chain);
                break;
+
+       case QED_CHAIN_USE_TO_CONSUME_PRODUCE:
+       case QED_CHAIN_USE_TO_PRODUCE:
+       default:
+               /* Do nothing */
+               break;
        }
 }
 
@@ -473,13 +489,13 @@ static inline void qed_chain_init_params(struct qed_chain *p_chain,
        p_chain->p_virt_addr = NULL;
        p_chain->p_phys_addr = 0;
        p_chain->elem_size      = elem_size;
-       p_chain->intended_use = intended_use;
+       p_chain->intended_use = (u8)intended_use;
        p_chain->mode           = mode;
-       p_chain->cnt_type = cnt_type;
+       p_chain->cnt_type = (u8)cnt_type;
 
-       p_chain->elem_per_page          = ELEMS_PER_PAGE(elem_size);
+       p_chain->elem_per_page = ELEMS_PER_PAGE(elem_size);
        p_chain->usable_per_page = USABLE_ELEMS_PER_PAGE(elem_size, mode);
-       p_chain->elem_per_page_mask     = p_chain->elem_per_page - 1;
+       p_chain->elem_per_page_mask = p_chain->elem_per_page - 1;
        p_chain->elem_unusable = UNUSABLE_ELEMS_PER_PAGE(elem_size, mode);
        p_chain->next_page_mask = (p_chain->usable_per_page &
                                   p_chain->elem_per_page_mask);
@@ -488,8 +504,8 @@ static inline void qed_chain_init_params(struct qed_chain *p_chain,
        p_chain->capacity = p_chain->usable_per_page * page_cnt;
        p_chain->size = p_chain->elem_per_page * page_cnt;
 
-       p_chain->pbl.p_phys_table = 0;
-       p_chain->pbl.p_virt_table = NULL;
+       p_chain->pbl_sp.p_phys_table = 0;
+       p_chain->pbl_sp.p_virt_table = NULL;
        p_chain->pbl.pp_virt_addr_tbl = NULL;
 }
 
@@ -530,8 +546,8 @@ static inline void qed_chain_init_pbl_mem(struct qed_chain *p_chain,
                                          dma_addr_t p_phys_pbl,
                                          void **pp_virt_addr_tbl)
 {
-       p_chain->pbl.p_phys_table = p_phys_pbl;
-       p_chain->pbl.p_virt_table = p_virt_pbl;
+       p_chain->pbl_sp.p_phys_table = p_phys_pbl;
+       p_chain->pbl_sp.p_virt_table = p_virt_pbl;
        p_chain->pbl.pp_virt_addr_tbl = pp_virt_addr_tbl;
 }