static void copy_wr_to_sq(struct t4_wq *wq, union t4_wr *wqe, u8 len16) { u64 *src, *dst; src = (u64 *)wqe; dst = (u64 *)((u8 *)wq->sq.queue + wq->sq.wq_pidx * T4_EQ_ENTRY_SIZE); if (t4_sq_onchip(wq)) { len16 = align(len16, 4); /* In onchip mode the copy below will be made to WC memory and * could trigger DMA. In offchip mode the copy below only * queues the WQE, DMA cannot start until t4_ring_sq_db * happens */ mmio_wc_start(); } while (len16) { *dst++ = *src++; if (dst == (u64 *)&wq->sq.queue[wq->sq.size]) dst = (u64 *)wq->sq.queue; *dst++ = *src++; if (dst == (u64 *)&wq->sq.queue[wq->sq.size]) dst = (u64 *)wq->sq.queue; len16--; /* NOTE len16 cannot be large enough to write to the same sq.queue memory twice in this loop */ } if (t4_sq_onchip(wq)) mmio_flush_writes(); }
static void copy_wr_to_sq(struct t4_wq *wq, union t4_wr *wqe, u8 len16) { void *src, *dst; uintptr_t end; int total, len; src = &wqe->flits[0]; dst = &wq->sq.queue->flits[wq->sq.wq_pidx * (T4_EQ_ENTRY_SIZE / sizeof(__be64))]; if (t4_sq_onchip(wq)) { len16 = align(len16, 4); /* In onchip mode the copy below will be made to WC memory and * could trigger DMA. In offchip mode the copy below only * queues the WQE, DMA cannot start until t4_ring_sq_db * happens */ mmio_wc_start(); } /* NOTE len16 cannot be large enough to write to the same sq.queue memory twice in this loop */ total = len16 * 16; end = (uintptr_t)&wq->sq.queue[wq->sq.size]; if (__predict_true((uintptr_t)dst + total <= end)) { /* Won't wrap around. */ memcpy(dst, src, total); } else { len = end - (uintptr_t)dst; memcpy(dst, src, len); memcpy(wq->sq.queue, src + len, total - len); } if (t4_sq_onchip(wq)) mmio_flush_writes(); }
static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) { if (t4_sq_onchip(sq)) dealloc_oc_sq(rdev, sq); else dealloc_host_sq(rdev, sq); }
static void copy_wr_to_sq(struct t4_wq *wq, union t4_wr *wqe, u8 len16) { void *src, *dst; uintptr_t end; int total, len; src = &wqe->flits[0]; dst = &wq->sq.queue->flits[wq->sq.wq_pidx * (T4_EQ_ENTRY_SIZE / sizeof(__be64))]; if (t4_sq_onchip(wq)) { len16 = align(len16, 4); wc_wmb(); } total = len16 * 16; end = (uintptr_t)&wq->sq.queue[wq->sq.size]; if (__predict_true((uintptr_t)dst + total <= end)) { /* Won't wrap around. */ memcpy(dst, src, total); } else { len = end - (uintptr_t)dst; memcpy(dst, src, len); memcpy(wq->sq.queue, src + len, total - len); } }
static void copy_wr_to_sq(struct t4_wq *wq, union t4_wr *wqe, u8 len16) { u64 *src, *dst; src = (u64 *)wqe; dst = (u64 *)((u8 *)wq->sq.queue + wq->sq.wq_pidx * T4_EQ_ENTRY_SIZE); if (t4_sq_onchip(wq)) { len16 = align(len16, 4); wc_wmb(); } while (len16) { *dst++ = *src++; if (dst == (u64 *)&wq->sq.queue[wq->sq.size]) dst = (u64 *)wq->sq.queue; *dst++ = *src++; if (dst == (u64 *)&wq->sq.queue[wq->sq.size]) dst = (u64 *)wq->sq.queue; len16--; } }
static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct t4_cq *rcq, struct t4_cq *scq, struct c4iw_dev_ucontext *uctx) { int user = (uctx != &rdev->uctx); struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; struct c4iw_wr_wait wr_wait; struct sk_buff *skb; int ret; int eqsize; wq->sq.qid = c4iw_get_qpid(rdev, uctx); if (!wq->sq.qid) return -ENOMEM; wq->rq.qid = c4iw_get_qpid(rdev, uctx); if (!wq->rq.qid) { ret = -ENOMEM; goto free_sq_qid; } if (!user) { wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq, GFP_KERNEL); if (!wq->sq.sw_sq) { ret = -ENOMEM; goto free_rq_qid; } wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq, GFP_KERNEL); if (!wq->rq.sw_rq) { ret = -ENOMEM; goto free_sw_sq; } } /* * RQT must be a power of 2. */ wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size); wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); if (!wq->rq.rqt_hwaddr) { ret = -ENOMEM; goto free_sw_rq; } if (user) { ret = alloc_oc_sq(rdev, &wq->sq); if (ret) goto free_hwaddr; ret = alloc_host_sq(rdev, &wq->sq); if (ret) goto free_sq; } else ret = alloc_host_sq(rdev, &wq->sq); if (ret) goto free_hwaddr; memset(wq->sq.queue, 0, wq->sq.memsize); dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), wq->rq.memsize, &(wq->rq.dma_addr), GFP_KERNEL); if (!wq->rq.queue) goto free_sq; PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", __func__, wq->sq.queue, (unsigned long long)virt_to_phys(wq->sq.queue), wq->rq.queue, (unsigned long long)virt_to_phys(wq->rq.queue)); memset(wq->rq.queue, 0, wq->rq.memsize); dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); wq->db = rdev->lldi.db_reg; wq->gts = rdev->lldi.gts_reg; if (user) { wq->sq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + (wq->sq.qid << rdev->qpshift); wq->sq.udb &= PAGE_MASK; wq->rq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + (wq->rq.qid << rdev->qpshift); wq->rq.udb &= PAGE_MASK; } wq->rdev = rdev; wq->rq.msn = 1; /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + 2 * sizeof *res; skb = alloc_skb(wr_len, GFP_KERNEL); if (!skb) { ret = -ENOMEM; goto free_dma; } set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); memset(res_wr, 0, wr_len); res_wr->op_nres = cpu_to_be32( FW_WR_OP(FW_RI_RES_WR) | V_FW_RI_RES_WR_NRES(2) | FW_WR_COMPL(1)); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; /* * eqsize is the number of 64B entries plus the status page size. */ eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + T4_EQ_STATUS_ENTRIES; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ (t4_sq_onchip(&wq->sq) ? F_FW_RI_RES_WR_ONCHIP : 0) | V_FW_RI_RES_WR_IQID(scq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | V_FW_RI_RES_WR_FBMIN(2) | V_FW_RI_RES_WR_FBMAX(2) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | V_FW_RI_RES_WR_EQSIZE(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr); res++; res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; /* * eqsize is the number of 64B entries plus the status page size. */ eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + T4_EQ_STATUS_ENTRIES; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ V_FW_RI_RES_WR_IQID(rcq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | V_FW_RI_RES_WR_FBMIN(2) | V_FW_RI_RES_WR_FBMAX(2) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | V_FW_RI_RES_WR_EQSIZE(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); c4iw_init_wr_wait(&wr_wait); ret = c4iw_ofld_send(rdev, skb); if (ret) goto free_dma; ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__); if (ret) goto free_dma; PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n", __func__, wq->sq.qid, wq->rq.qid, wq->db, (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); return 0; free_dma: dma_free_coherent(&(rdev->lldi.pdev->dev), wq->rq.memsize, wq->rq.queue, dma_unmap_addr(&wq->rq, mapping)); free_sq: dealloc_sq(rdev, &wq->sq); free_hwaddr: c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); free_sw_rq: kfree(wq->rq.sw_rq); free_sw_sq: kfree(wq->sq.sw_sq); free_rq_qid: c4iw_put_qpid(rdev, wq->rq.qid, uctx); free_sq_qid: c4iw_put_qpid(rdev, wq->sq.qid, uctx); return ret; }