#include "iwl-csr.h"
#include "iwl-io.h"
#include "internal.h"
+#include "mvm/fw-api.h"
+
+/*
+ * iwl_pcie_txq_update_byte_tbl - Set up entry in Tx byte-count array
+ */
+static void iwl_pcie_gen2_update_byte_tbl(struct iwl_trans *trans,
+ struct iwl_txq *txq, u16 byte_cnt,
+ int num_tbs)
+{
+ struct iwlagn_scd_bc_tbl *scd_bc_tbl;
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ int write_ptr = txq->write_ptr;
+ u8 filled_tfd_size, num_fetch_chunks;
+ u16 len = byte_cnt;
+ __le16 bc_ent;
+
+ scd_bc_tbl = trans_pcie->scd_bc_tbls.addr;
+
+ len = DIV_ROUND_UP(len, 4);
+
+ if (WARN_ON(len > 0xFFF || write_ptr >= TFD_QUEUE_SIZE_MAX))
+ return;
+
+ filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) +
+ num_tbs * sizeof(struct iwl_tfh_tb);
+ /*
+ * filled_tfd_size contains the number of filled bytes in the TFD.
+ * Dividing it by 64 will give the number of chunks to fetch
+ * to SRAM- 0 for one chunk, 1 for 2 and so on.
+ * If, for example, TFD contains only 3 TBs then 32 bytes
+ * of the TFD are used, and only one chunk of 64 bytes should
+ * be fetched
+ */
+ num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1;
+
+ bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12));
+ scd_bc_tbl[txq->id].tfd_offset[write_ptr] = bc_ent;
+}
+
+/*
+ * iwl_pcie_gen2_txq_inc_wr_ptr - Send new write index to hardware
+ */
+static void iwl_pcie_gen2_txq_inc_wr_ptr(struct iwl_trans *trans,
+ struct iwl_txq *txq)
+{
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ u32 reg = 0;
+ int txq_id = txq->id;
+
+ lockdep_assert_held(&txq->lock);
+
+ /*
+ * explicitly wake up the NIC if:
+ * 1. shadow registers aren't enabled
+ * 2. NIC is woken up for CMD regardless of shadow outside this function
+ * 3. there is a chance that the NIC is asleep
+ */
+ if (!trans->cfg->base_params->shadow_reg_enable &&
+ txq_id != trans_pcie->cmd_queue &&
+ test_bit(STATUS_TPOWER_PMI, &trans->status)) {
+ /*
+ * wake up nic if it's powered down ...
+ * uCode will wake up, and interrupt us again, so next
+ * time we'll skip this part.
+ */
+ reg = iwl_read32(trans, CSR_UCODE_DRV_GP1);
+
+ if (reg & CSR_UCODE_DRV_GP1_BIT_MAC_SLEEP) {
+ IWL_DEBUG_INFO(trans,
+ "Tx queue %d requesting wakeup, GP1 = 0x%x\n",
+ txq_id, reg);
+ iwl_set_bit(trans, CSR_GP_CNTRL,
+ CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+ txq->need_update = true;
+ return;
+ }
+ }
+
+ /*
+ * if not in power-save mode, uCode will never sleep when we're
+ * trying to tx (during RFKILL, we're not trying to tx).
+ */
+ IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->write_ptr);
+ if (!txq->block)
+ iwl_write32(trans, HBUS_TARG_WRPTR,
+ txq->write_ptr | (txq_id << 8));
+}
+
+static inline u8 iwl_pcie_gen2_get_num_tbs(struct iwl_trans *trans, void *_tfd)
+{
+ if (trans->cfg->use_tfh) {
+ struct iwl_tfh_tfd *tfd = _tfd;
+
+ return le16_to_cpu(tfd->num_tbs) & 0x1f;
+ } else {
+ struct iwl_tfd *tfd = _tfd;
+
+ return tfd->num_tbs & 0x1f;
+ }
+}
+
+static inline dma_addr_t iwl_pcie_gen2_tb_get_addr(struct iwl_trans *trans,
+ void *_tfd, u8 idx)
+{
+ if (trans->cfg->use_tfh) {
+ struct iwl_tfh_tfd *tfd = _tfd;
+ struct iwl_tfh_tb *tb = &tfd->tbs[idx];
+
+ return (dma_addr_t)(le64_to_cpu(tb->addr));
+ } else {
+ struct iwl_tfd *tfd = _tfd;
+ struct iwl_tfd_tb *tb = &tfd->tbs[idx];
+ dma_addr_t addr = get_unaligned_le32(&tb->lo);
+ dma_addr_t hi_len;
+
+ if (sizeof(dma_addr_t) <= sizeof(u32))
+ return addr;
+
+ hi_len = le16_to_cpu(tb->hi_n_len) & 0xF;
+
+ /*
+ * shift by 16 twice to avoid warnings on 32-bit
+ * (where this code never runs anyway due to the
+ * if statement above)
+ */
+ return addr | ((hi_len << 16) << 16);
+ }
+}
+
+static void iwl_pcie_gen2_tfd_unmap(struct iwl_trans *trans,
+ struct iwl_cmd_meta *meta,
+ struct iwl_txq *txq, int index)
+{
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ int i, num_tbs;
+ void *tfd = iwl_pcie_get_tfd(trans_pcie, txq, index);
+
+ /* Sanity check on number of chunks */
+ num_tbs = iwl_pcie_gen2_get_num_tbs(trans, tfd);
+
+ if (num_tbs >= trans_pcie->max_tbs) {
+ IWL_ERR(trans, "Too many chunks: %i\n", num_tbs);
+ /* @todo issue fatal error, it is quite serious situation */
+ return;
+ }
+
+ /* first TB is never freed - it's the bidirectional DMA data */
+
+ for (i = 1; i < num_tbs; i++) {
+ if (meta->tbs & BIT(i))
+ dma_unmap_page(trans->dev,
+ iwl_pcie_gen2_tb_get_addr(trans, tfd,
+ i),
+ iwl_pcie_gen2_tb_get_addr(trans, tfd,
+ i),
+ DMA_TO_DEVICE);
+ else
+ dma_unmap_single(trans->dev,
+ iwl_pcie_gen2_tb_get_addr(trans, tfd,
+ i),
+ iwl_pcie_gen2_tb_get_addr(trans, tfd,
+ i),
+ DMA_TO_DEVICE);
+ }
+
+ if (trans->cfg->use_tfh) {
+ struct iwl_tfh_tfd *tfd_fh = (void *)tfd;
+
+ tfd_fh->num_tbs = 0;
+ } else {
+ struct iwl_tfd *tfd_fh = (void *)tfd;
+
+ tfd_fh->num_tbs = 0;
+ }
+}
+
+static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
+{
+ /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
+ * idx is bounded by n_window
+ */
+ int rd_ptr = txq->read_ptr;
+ int idx = get_cmd_index(txq, rd_ptr);
+
+ lockdep_assert_held(&txq->lock);
+
+ /* We have only q->n_window txq->entries, but we use
+ * TFD_QUEUE_SIZE_MAX tfds
+ */
+ iwl_pcie_gen2_tfd_unmap(trans, &txq->entries[idx].meta, txq, rd_ptr);
+
+ /* free SKB */
+ if (txq->entries) {
+ struct sk_buff *skb;
+
+ skb = txq->entries[idx].skb;
+
+ /* Can be called from irqs-disabled context
+ * If skb is not NULL, it means that the whole queue is being
+ * freed and that the queue is not empty - free the skb
+ */
+ if (skb) {
+ iwl_op_mode_free_skb(trans->op_mode, skb);
+ txq->entries[idx].skb = NULL;
+ }
+ }
+}
+
+static inline void iwl_pcie_gen2_set_tb(struct iwl_trans *trans, void *tfd,
+ u8 idx, dma_addr_t addr, u16 len)
+{
+ if (trans->cfg->use_tfh) {
+ struct iwl_tfh_tfd *tfd_fh = (void *)tfd;
+ struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx];
+
+ put_unaligned_le64(addr, &tb->addr);
+ tb->tb_len = cpu_to_le16(len);
+
+ tfd_fh->num_tbs = cpu_to_le16(idx + 1);
+ } else {
+ struct iwl_tfd *tfd_fh = (void *)tfd;
+ struct iwl_tfd_tb *tb = &tfd_fh->tbs[idx];
+
+ u16 hi_n_len = len << 4;
+
+ put_unaligned_le32(addr, &tb->lo);
+ hi_n_len |= iwl_get_dma_hi_addr(addr);
+
+ tb->hi_n_len = cpu_to_le16(hi_n_len);
+
+ tfd_fh->num_tbs = idx + 1;
+ }
+}
+
+int iwl_pcie_gen2_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
+ dma_addr_t addr, u16 len, bool reset)
+{
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ void *tfd;
+ u32 num_tbs;
+
+ tfd = txq->tfds + trans_pcie->tfd_size * txq->write_ptr;
+
+ if (reset)
+ memset(tfd, 0, trans_pcie->tfd_size);
+
+ num_tbs = iwl_pcie_gen2_get_num_tbs(trans, tfd);
+
+ /* Each TFD can point to a maximum max_tbs Tx buffers */
+ if (num_tbs >= trans_pcie->max_tbs) {
+ IWL_ERR(trans, "Error can not send more than %d chunks\n",
+ trans_pcie->max_tbs);
+ return -EINVAL;
+ }
+
+ if (WARN(addr & ~IWL_TX_DMA_MASK,
+ "Unaligned address = %llx\n", (unsigned long long)addr))
+ return -EINVAL;
+
+ iwl_pcie_gen2_set_tb(trans, tfd, num_tbs, addr, len);
+
+ return num_tbs;
+}
+
+static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
+ struct iwl_txq *txq, u8 hdr_len,
+ struct iwl_cmd_meta *out_meta,
+ struct iwl_device_cmd *dev_cmd, u16 tb1_len)
+{
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ u16 tb2_len;
+ int i;
+
+ /*
+ * Set up TFD's third entry to point directly to remainder
+ * of skb's head, if any
+ */
+ tb2_len = skb_headlen(skb) - hdr_len;
+
+ if (tb2_len > 0) {
+ dma_addr_t tb2_phys = dma_map_single(trans->dev,
+ skb->data + hdr_len,
+ tb2_len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(trans->dev, tb2_phys))) {
+ iwl_pcie_gen2_tfd_unmap(trans, out_meta, txq,
+ txq->write_ptr);
+ return -EINVAL;
+ }
+ iwl_pcie_gen2_build_tfd(trans, txq, tb2_phys, tb2_len, false);
+ }
+
+ /* set up the remaining entries to point to the data */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ dma_addr_t tb_phys;
+ int tb_idx;
+
+ if (!skb_frag_size(frag))
+ continue;
+
+ tb_phys = skb_frag_dma_map(trans->dev, frag, 0,
+ skb_frag_size(frag), DMA_TO_DEVICE);
+
+ if (unlikely(dma_mapping_error(trans->dev, tb_phys))) {
+ iwl_pcie_gen2_tfd_unmap(trans, out_meta, txq,
+ txq->write_ptr);
+ return -EINVAL;
+ }
+ tb_idx = iwl_pcie_gen2_build_tfd(trans, txq, tb_phys,
+ skb_frag_size(frag), false);
+
+ out_meta->tbs |= BIT(tb_idx);
+ }
+
+ trace_iwlwifi_dev_tx(trans->dev, skb,
+ iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
+ trans_pcie->tfd_size,
+ &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
+ skb->data + hdr_len, tb2_len);
+ trace_iwlwifi_dev_tx_data(trans->dev, skb,
+ hdr_len, skb->len - hdr_len);
+ return 0;
+}
+
+#define TX_CMD_FLG_MH_PAD_MSK cpu_to_le32(TX_CMD_FLG_MH_PAD)
+
+int iwl_trans_pcie_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb,
+ struct iwl_device_cmd *dev_cmd, int txq_id)
+{
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ struct ieee80211_hdr *hdr;
+ struct iwl_tx_cmd *tx_cmd = (struct iwl_tx_cmd *)dev_cmd->payload;
+ struct iwl_cmd_meta *out_meta;
+ struct iwl_txq *txq;
+ dma_addr_t tb0_phys, tb1_phys, scratch_phys;
+ void *tb1_addr;
+ void *tfd;
+ u16 len, tb1_len;
+ bool wait_write_ptr;
+ __le16 fc;
+ u8 hdr_len;
+ u16 wifi_seq;
+ bool amsdu;
+
+ txq = &trans_pcie->txq[txq_id];
+
+ if (WARN_ONCE(!test_bit(txq_id, trans_pcie->queue_used),
+ "TX on unused queue %d\n", txq_id))
+ return -EINVAL;
+
+ if (unlikely(trans_pcie->sw_csum_tx &&
+ skb->ip_summed == CHECKSUM_PARTIAL)) {
+ int offs = skb_checksum_start_offset(skb);
+ int csum_offs = offs + skb->csum_offset;
+ __wsum csum;
+
+ if (skb_ensure_writable(skb, csum_offs + sizeof(__sum16)))
+ return -1;
+
+ csum = skb_checksum(skb, offs, skb->len - offs, 0);
+ *(__sum16 *)(skb->data + csum_offs) = csum_fold(csum);
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+
+ if (skb_is_nonlinear(skb) &&
+ skb_shinfo(skb)->nr_frags > IWL_PCIE_MAX_FRAGS(trans_pcie) &&
+ __skb_linearize(skb))
+ return -ENOMEM;
+
+ /* mac80211 always puts the full header into the SKB's head,
+ * so there's no need to check if it's readable there
+ */
+ hdr = (struct ieee80211_hdr *)skb->data;
+ fc = hdr->frame_control;
+ hdr_len = ieee80211_hdrlen(fc);
+
+ spin_lock(&txq->lock);
+
+ if (iwl_queue_space(txq) < txq->high_mark) {
+ iwl_stop_queue(trans, txq);
+
+ /* don't put the packet on the ring, if there is no room */
+ if (unlikely(iwl_queue_space(txq) < 3)) {
+ struct iwl_device_cmd **dev_cmd_ptr;
+
+ dev_cmd_ptr = (void *)((u8 *)skb->cb +
+ trans_pcie->dev_cmd_offs);
+
+ *dev_cmd_ptr = dev_cmd;
+ __skb_queue_tail(&txq->overflow_q, skb);
+
+ spin_unlock(&txq->lock);
+ return 0;
+ }
+ }
+
+ /* In AGG mode, the index in the ring must correspond to the WiFi
+ * sequence number. This is a HW requirements to help the SCD to parse
+ * the BA.
+ * Check here that the packets are in the right place on the ring.
+ */
+ wifi_seq = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
+ WARN_ONCE(txq->ampdu &&
+ (wifi_seq & 0xff) != txq->write_ptr,
+ "Q: %d WiFi Seq %d tfdNum %d",
+ txq_id, wifi_seq, txq->write_ptr);
+
+ /* Set up driver data for this TFD */
+ txq->entries[txq->write_ptr].skb = skb;
+ txq->entries[txq->write_ptr].cmd = dev_cmd;
+
+ dev_cmd->hdr.sequence =
+ cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
+ INDEX_TO_SEQ(txq->write_ptr)));
+
+ tb0_phys = iwl_pcie_get_first_tb_dma(txq, txq->write_ptr);
+ scratch_phys = tb0_phys + sizeof(struct iwl_cmd_header) +
+ offsetof(struct iwl_tx_cmd, scratch);
+
+ tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
+ tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys);
+
+ /* Set up first empty entry in queue's array of Tx/cmd buffers */
+ out_meta = &txq->entries[txq->write_ptr].meta;
+ out_meta->flags = 0;
+
+ /*
+ * The second TB (tb1) points to the remainder of the TX command
+ * and the 802.11 header - dword aligned size
+ * (This calculation modifies the TX command, so do it before the
+ * setup of the first TB)
+ */
+ len = sizeof(struct iwl_tx_cmd) + sizeof(struct iwl_cmd_header) +
+ hdr_len - IWL_FIRST_TB_SIZE;
+ /* do not align A-MSDU to dword as the subframe header aligns it */
+ amsdu = ieee80211_is_data_qos(fc) &&
+ (*ieee80211_get_qos_ctl(hdr) &
+ IEEE80211_QOS_CTL_A_MSDU_PRESENT);
+ if (trans_pcie->sw_csum_tx || !amsdu) {
+ tb1_len = ALIGN(len, 4);
+ /* Tell NIC about any 2-byte padding after MAC header */
+ if (tb1_len != len)
+ tx_cmd->tx_flags |= TX_CMD_FLG_MH_PAD_MSK;
+ } else {
+ tb1_len = len;
+ }
+
+ /*
+ * The first TB points to bi-directional DMA data, we'll
+ * memcpy the data into it later.
+ */
+ iwl_pcie_gen2_build_tfd(trans, txq, tb0_phys, IWL_FIRST_TB_SIZE, true);
+
+ /* there must be data left over for TB1 or this code must be changed */
+ BUILD_BUG_ON(sizeof(struct iwl_tx_cmd) < IWL_FIRST_TB_SIZE);
+
+ /* map the data for TB1 */
+ tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE;
+ tb1_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(trans->dev, tb1_phys)))
+ goto out_err;
+ iwl_pcie_gen2_build_tfd(trans, txq, tb1_phys, tb1_len, false);
+
+ if (amsdu) {
+ if (unlikely(iwl_fill_data_tbs_amsdu(trans, skb, txq, hdr_len,
+ out_meta, dev_cmd,
+ tb1_len)))
+ goto out_err;
+ } else if (unlikely(iwl_fill_data_tbs(trans, skb, txq, hdr_len,
+ out_meta, dev_cmd, tb1_len))) {
+ goto out_err;
+ }
+
+ /* building the A-MSDU might have changed this data, so memcpy it now */
+ memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr,
+ IWL_FIRST_TB_SIZE);
+
+ tfd = iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
+ /* Set up entry for this TFD in Tx byte-count array */
+ iwl_pcie_gen2_update_byte_tbl(trans, txq, le16_to_cpu(tx_cmd->len),
+ iwl_pcie_gen2_get_num_tbs(trans, tfd));
+
+ wait_write_ptr = ieee80211_has_morefrags(fc);
+
+ /* start timer if queue currently empty */
+ if (txq->read_ptr == txq->write_ptr) {
+ if (txq->wd_timeout) {
+ /*
+ * If the TXQ is active, then set the timer, if not,
+ * set the timer in remainder so that the timer will
+ * be armed with the right value when the station will
+ * wake up.
+ */
+ if (!txq->frozen)
+ mod_timer(&txq->stuck_timer,
+ jiffies + txq->wd_timeout);
+ else
+ txq->frozen_expiry_remainder = txq->wd_timeout;
+ }
+ IWL_DEBUG_RPM(trans, "Q: %d first tx - take ref\n", txq->id);
+ iwl_trans_ref(trans);
+ }
+
+ /* Tell device the write index *just past* this latest filled TFD */
+ txq->write_ptr = iwl_queue_inc_wrap(txq->write_ptr);
+ if (!wait_write_ptr)
+ iwl_pcie_gen2_txq_inc_wr_ptr(trans, txq);
+
+ /*
+ * At this point the frame is "transmitted" successfully
+ * and we will get a TX status notification eventually.
+ */
+ spin_unlock(&txq->lock);
+ return 0;
+out_err:
+ spin_unlock(&txq->lock);
+ return -1;
+}
/*
* iwl_pcie_gen2_txq_unmap - Unmap any remaining DMA mappings and free skb's