static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) { sq->queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), sq->memsize, &(sq->dma_addr), GFP_KERNEL); if (!sq->queue) return -ENOMEM; sq->phys_addr = virt_to_phys(sq->queue); pci_unmap_addr_set(sq, mapping, sq->dma_addr); return 0; }
static int b44_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct b44 *bp = netdev_priv(dev); dma_addr_t mapping; u32 len, entry, ctrl; len = skb->len; spin_lock_irq(&bp->lock); /* This is a hard error, log it. */ if (unlikely(TX_BUFFS_AVAIL(bp) < 1)) { netif_stop_queue(dev); spin_unlock_irq(&bp->lock); printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n", dev->name); return 1; } entry = bp->tx_prod; mapping = pci_map_single(bp->pdev, skb->data, len, PCI_DMA_TODEVICE); bp->tx_buffers[entry].skb = skb; pci_unmap_addr_set(&bp->tx_buffers[entry], mapping, mapping); ctrl = (len & DESC_CTRL_LEN); ctrl |= DESC_CTRL_IOC | DESC_CTRL_SOF | DESC_CTRL_EOF; if (entry == (B44_TX_RING_SIZE - 1)) ctrl |= DESC_CTRL_EOT; bp->tx_ring[entry].ctrl = cpu_to_le32(ctrl); bp->tx_ring[entry].addr = cpu_to_le32((u32) mapping+bp->dma_offset); entry = NEXT_TX(entry); bp->tx_prod = entry; wmb(); bw32(bp, B44_DMATX_PTR, entry * sizeof(struct dma_desc)); if (bp->flags & B44_FLAG_BUGGY_TXPTR) bw32(bp, B44_DMATX_PTR, entry * sizeof(struct dma_desc)); if (bp->flags & B44_FLAG_REORDER_BUG) br32(bp, B44_DMATX_PTR); if (TX_BUFFS_AVAIL(bp) < 1) netif_stop_queue(dev); spin_unlock_irq(&bp->lock); dev->trans_start = jiffies; return 0; }
static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) { sq->queue = contigmalloc(sq->memsize, M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0); if (sq->queue) sq->dma_addr = vtophys(sq->queue); else return -ENOMEM; sq->phys_addr = vtophys(sq->queue); pci_unmap_addr_set(sq, mapping, sq->dma_addr); CTR4(KTR_IW_CXGBE, "%s sq %p dma_addr %p phys_addr %p", __func__, sq->queue, sq->dma_addr, sq->phys_addr); return 0; }
/* Works like this. This chip writes a 'struct rx_header" 30 bytes * before the DMA address you give it. So we allocate 30 more bytes * for the RX buffer, DMA map all of it, skb_reserve the 30 bytes, then * point the chip at 30 bytes past where the rx_header will go. */ static int b44_alloc_rx_skb(struct b44 *bp, int src_idx, u32 dest_idx_unmasked) { struct dma_desc *dp; struct ring_info *src_map, *map; struct rx_header *rh; struct sk_buff *skb; dma_addr_t mapping; int dest_idx; u32 ctrl; src_map = NULL; if (src_idx >= 0) src_map = &bp->rx_buffers[src_idx]; dest_idx = dest_idx_unmasked & (B44_RX_RING_SIZE - 1); map = &bp->rx_buffers[dest_idx]; skb = dev_alloc_skb(RX_PKT_BUF_SZ); if (skb == NULL) return -ENOMEM; skb->dev = bp->dev; mapping = pci_map_single(bp->pdev, skb->data, RX_PKT_BUF_SZ, PCI_DMA_FROMDEVICE); skb_reserve(skb, bp->rx_offset); rh = (struct rx_header *) (skb->data - bp->rx_offset); rh->len = 0; rh->flags = 0; map->skb = skb; pci_unmap_addr_set(map, mapping, mapping); if (src_map != NULL) src_map->skb = NULL; ctrl = (DESC_CTRL_LEN & (RX_PKT_BUF_SZ - bp->rx_offset)); if (dest_idx == (B44_RX_RING_SIZE - 1)) ctrl |= DESC_CTRL_EOT; dp = &bp->rx_ring[dest_idx]; dp->ctrl = cpu_to_le32(ctrl); dp->addr = cpu_to_le32((u32) mapping + bp->rx_offset + bp->dma_offset); return RX_PKT_BUF_SZ; }
static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size, int msg_size) { u8 *pool_start; pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size, &mq->host_dma, GFP_KERNEL); if (!pool_start) return -ENOMEM; c2_mq_rep_init(mq, 0, /* index (currently unknown) */ q_size, msg_size, pool_start, NULL, /* peer (currently unknown) */ C2_MQ_HOST_TARGET); pci_unmap_addr_set(mq, mapping, mq->host_dma); return 0; }
static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, struct c4iw_dev_ucontext *uctx) { struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; int user = (uctx != &rdev->uctx); struct c4iw_wr_wait wr_wait; int ret; struct sk_buff *skb; cq->cqid = c4iw_get_cqid(rdev, uctx); if (!cq->cqid) { ret = -ENOMEM; goto err1; } if (!user) { cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL); if (!cq->sw_queue) { ret = -ENOMEM; goto err2; } } cq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, cq->memsize, &cq->dma_addr, GFP_KERNEL); if (!cq->queue) { ret = -ENOMEM; goto err3; } pci_unmap_addr_set(cq, mapping, cq->dma_addr); memset(cq->queue, 0, cq->memsize); /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + sizeof *res; skb = alloc_skb(wr_len, GFP_KERNEL); if (!skb) { ret = -ENOMEM; goto err4; } set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); memset(res_wr, 0, wr_len); res_wr->op_nres = cpu_to_be32( FW_WR_OP(FW_RI_RES_WR) | V_FW_RI_RES_WR_NRES(1) | FW_WR_COMPL(1)); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; res->u.cq.restype = FW_RI_RES_TYPE_CQ; res->u.cq.op = FW_RI_RES_OP_WRITE; res->u.cq.iqid = cpu_to_be32(cq->cqid); res->u.cq.iqandst_to_iqandstindex = cpu_to_be32( V_FW_RI_RES_WR_IQANUS(0) | V_FW_RI_RES_WR_IQANUD(1) | F_FW_RI_RES_WR_IQANDST | V_FW_RI_RES_WR_IQANDSTINDEX(*rdev->lldi.rxq_ids)); res->u.cq.iqdroprss_to_iqesize = cpu_to_be16( F_FW_RI_RES_WR_IQDROPRSS | V_FW_RI_RES_WR_IQPCIECH(2) | V_FW_RI_RES_WR_IQINTCNTTHRESH(0) | F_FW_RI_RES_WR_IQO | V_FW_RI_RES_WR_IQESIZE(1)); res->u.cq.iqsize = cpu_to_be16(cq->size); res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); c4iw_init_wr_wait(&wr_wait); ret = c4iw_ofld_send(rdev, skb); if (ret) goto err4; PDBG("%s wait_event wr_wait %p\n", __func__, &wr_wait); ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); if (ret) goto err4; cq->gen = 1; cq->gts = rdev->lldi.gts_reg; cq->rdev = rdev; if (user) { cq->ugts = (u64)pci_resource_start(rdev->lldi.pdev, 2) + (cq->cqid << rdev->cqshift); cq->ugts &= PAGE_MASK; } return 0; err4: dma_free_coherent(&rdev->lldi.pdev->dev, cq->memsize, cq->queue, pci_unmap_addr(cq, mapping)); err3: kfree(cq->sw_queue); err2: c4iw_put_cqid(rdev, cq->cqid, uctx); err1: return ret; }
/* * Allocate and register buffer for WQEs. qp->rq.max, sq.max, * rq.max_gs and sq.max_gs must all be assigned. * mthca_alloc_wqe_buf will calculate rq.wqe_shift and * sq.wqe_shift (as well as send_wqe_offset, is_direct, and * queue) */ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_qp *qp) { int size; int i; int npages, shift; dma_addr_t t; u64 *dma_list = NULL; int err = -ENOMEM; size = sizeof (struct mthca_next_seg) + qp->rq.max_gs * sizeof (struct mthca_data_seg); for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size; qp->rq.wqe_shift++) ; /* nothing */ size = sizeof (struct mthca_next_seg) + qp->sq.max_gs * sizeof (struct mthca_data_seg); if (qp->transport == MLX) size += 2 * sizeof (struct mthca_data_seg); else if (qp->transport == UD) size += sizeof (struct mthca_ud_seg); else /* bind seg is as big as atomic + raddr segs */ size += sizeof (struct mthca_bind_seg); for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; qp->sq.wqe_shift++) ; /* nothing */ qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift, 1 << qp->sq.wqe_shift); size = PAGE_ALIGN(qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift)); qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64), GFP_KERNEL); if (!qp->wrid) goto err_out; if (size <= MTHCA_MAX_DIRECT_QP_SIZE) { qp->is_direct = 1; npages = 1; shift = get_order(size) + PAGE_SHIFT; if (0) mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n", size, shift); qp->queue.direct.buf = pci_alloc_consistent(dev->pdev, size, &t); if (!qp->queue.direct.buf) goto err_out; pci_unmap_addr_set(&qp->queue.direct, mapping, t); memset(qp->queue.direct.buf, 0, size); while (t & ((1 << shift) - 1)) { --shift; npages *= 2; } dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); if (!dma_list) goto err_out_free; for (i = 0; i < npages; ++i) dma_list[i] = t + i * (1 << shift); } else { qp->is_direct = 0; npages = size / PAGE_SIZE; shift = PAGE_SHIFT; if (0) mthca_dbg(dev, "Creating indirect QP with %d pages\n", npages); dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); if (!dma_list) goto err_out; qp->queue.page_list = kmalloc(npages * sizeof *qp->queue.page_list, GFP_KERNEL); if (!qp->queue.page_list) goto err_out; for (i = 0; i < npages; ++i) { qp->queue.page_list[i].buf = pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t); if (!qp->queue.page_list[i].buf) goto err_out_free; memset(qp->queue.page_list[i].buf, 0, PAGE_SIZE); pci_unmap_addr_set(&qp->queue.page_list[i], mapping, t); dma_list[i] = t; } } err = mthca_mr_alloc_phys(dev, pd->pd_num, dma_list, shift, npages, 0, size, MTHCA_MPT_FLAG_LOCAL_WRITE | MTHCA_MPT_FLAG_LOCAL_READ, &qp->mr); if (err) goto err_out_free; kfree(dma_list); return 0; err_out_free: if (qp->is_direct) { pci_free_consistent(dev->pdev, size, qp->queue.direct.buf, pci_unmap_addr(&qp->queue.direct, mapping)); } else for (i = 0; i < npages; ++i) { if (qp->queue.page_list[i].buf) pci_free_consistent(dev->pdev, PAGE_SIZE, qp->queue.page_list[i].buf, pci_unmap_addr(&qp->queue.page_list[i], mapping)); } err_out: kfree(qp->wrid); kfree(dma_list); return err; }
static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct t4_cq *rcq, struct t4_cq *scq, struct c4iw_dev_ucontext *uctx) { int user = (uctx != &rdev->uctx); struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; struct c4iw_wr_wait wr_wait; struct sk_buff *skb; int ret; int eqsize; wq->sq.qid = c4iw_get_qpid(rdev, uctx); if (!wq->sq.qid) return -ENOMEM; wq->rq.qid = c4iw_get_qpid(rdev, uctx); if (!wq->rq.qid) goto err1; if (!user) { wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq, GFP_KERNEL); if (!wq->sq.sw_sq) goto err2; wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq, GFP_KERNEL); if (!wq->rq.sw_rq) goto err3; } /* * RQT must be a power of 2. */ wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size); wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); if (!wq->rq.rqt_hwaddr) goto err4; if (user) { if (alloc_oc_sq(rdev, &wq->sq) && alloc_host_sq(rdev, &wq->sq)) goto err5; } else if (alloc_host_sq(rdev, &wq->sq)) goto err5; memset(wq->sq.queue, 0, wq->sq.memsize); pci_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), wq->rq.memsize, &(wq->rq.dma_addr), GFP_KERNEL); if (!wq->rq.queue) goto err6; PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", __func__, wq->sq.queue, (unsigned long long)virt_to_phys(wq->sq.queue), wq->rq.queue, (unsigned long long)virt_to_phys(wq->rq.queue)); memset(wq->rq.queue, 0, wq->rq.memsize); pci_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); wq->db = rdev->lldi.db_reg; wq->gts = rdev->lldi.gts_reg; if (user) { wq->sq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + (wq->sq.qid << rdev->qpshift); wq->sq.udb &= PAGE_MASK; wq->rq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + (wq->rq.qid << rdev->qpshift); wq->rq.udb &= PAGE_MASK; } wq->rdev = rdev; wq->rq.msn = 1; /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + 2 * sizeof *res; skb = alloc_skb(wr_len, GFP_KERNEL); if (!skb) { ret = -ENOMEM; goto err7; } set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); memset(res_wr, 0, wr_len); res_wr->op_nres = cpu_to_be32( FW_WR_OP(FW_RI_RES_WR) | V_FW_RI_RES_WR_NRES(2) | FW_WR_COMPL(1)); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; /* * eqsize is the number of 64B entries plus the status page size. */ eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + T4_EQ_STATUS_ENTRIES; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ (t4_sq_onchip(&wq->sq) ? F_FW_RI_RES_WR_ONCHIP : 0) | V_FW_RI_RES_WR_IQID(scq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | V_FW_RI_RES_WR_FBMIN(2) | V_FW_RI_RES_WR_FBMAX(2) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | V_FW_RI_RES_WR_EQSIZE(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr); res++; res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; /* * eqsize is the number of 64B entries plus the status page size. */ eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + T4_EQ_STATUS_ENTRIES; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ V_FW_RI_RES_WR_IQID(rcq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | V_FW_RI_RES_WR_FBMIN(2) | V_FW_RI_RES_WR_FBMAX(2) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | V_FW_RI_RES_WR_EQSIZE(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); c4iw_init_wr_wait(&wr_wait); ret = c4iw_ofld_send(rdev, skb); if (ret) goto err7; ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__); if (ret) goto err7; PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n", __func__, wq->sq.qid, wq->rq.qid, wq->db, (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); return 0; err7: dma_free_coherent(&(rdev->lldi.pdev->dev), wq->rq.memsize, wq->rq.queue, pci_unmap_addr(&wq->rq, mapping)); err6: dealloc_sq(rdev, &wq->sq); err5: c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); err4: kfree(wq->rq.sw_rq); err3: kfree(wq->sq.sw_sq); err2: c4iw_put_qpid(rdev, wq->rq.qid, uctx); err1: c4iw_put_qpid(rdev, wq->sq.qid, uctx); return -ENOMEM; }
int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, union mthca_buf *buf, int *is_direct, struct mthca_pd *pd, int hca_write, struct mthca_mr *mr) { int err = -ENOMEM; int npages, shift; u64 *dma_list = NULL; dma_addr_t t; int i; if (size <= max_direct) { *is_direct = 1; npages = 1; shift = get_order(size) + PAGE_SHIFT; buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, size, &t, GFP_KERNEL); if (!buf->direct.buf) return -ENOMEM; pci_unmap_addr_set(&buf->direct, mapping, t); memset(buf->direct.buf, 0, size); while (t & ((1 << shift) - 1)) { --shift; npages *= 2; } dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); if (!dma_list) goto err_free; for (i = 0; i < npages; ++i) dma_list[i] = t + i * (1 << shift); } else { *is_direct = 0; npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; shift = PAGE_SHIFT; dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); if (!dma_list) return -ENOMEM; buf->page_list = kmalloc(npages * sizeof *buf->page_list, GFP_KERNEL); if (!buf->page_list) goto err_out; for (i = 0; i < npages; ++i) buf->page_list[i].buf = NULL; for (i = 0; i < npages; ++i) { buf->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, &t, GFP_KERNEL); if (!buf->page_list[i].buf) goto err_free; dma_list[i] = t; pci_unmap_addr_set(&buf->page_list[i], mapping, t); memset(buf->page_list[i].buf, 0, PAGE_SIZE); } } err = mthca_mr_alloc_phys(dev, pd->pd_num, dma_list, shift, npages, 0, size, MTHCA_MPT_FLAG_LOCAL_READ | (hca_write ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0), mr); if (err) goto err_free; kfree(dma_list); return 0; err_free: mthca_buf_free(dev, size, buf, *is_direct, NULL); err_out: kfree(dma_list); return err; }
static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct t4_cq *rcq, struct t4_cq *scq, struct c4iw_dev_ucontext *uctx) { struct adapter *sc = rdev->adap; int user = (uctx != &rdev->uctx); struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; struct c4iw_wr_wait wr_wait; int ret; int eqsize; struct wrqe *wr; wq->sq.qid = c4iw_get_qpid(rdev, uctx); if (!wq->sq.qid) return -ENOMEM; wq->rq.qid = c4iw_get_qpid(rdev, uctx); if (!wq->rq.qid) goto err1; if (!user) { wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq, GFP_KERNEL); if (!wq->sq.sw_sq) goto err2; wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq, GFP_KERNEL); if (!wq->rq.sw_rq) goto err3; } /* RQT must be a power of 2. */ wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size); wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); if (!wq->rq.rqt_hwaddr) goto err4; if (alloc_host_sq(rdev, &wq->sq)) goto err5; memset(wq->sq.queue, 0, wq->sq.memsize); pci_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); wq->rq.queue = contigmalloc(wq->rq.memsize, M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0); if (wq->rq.queue) wq->rq.dma_addr = vtophys(wq->rq.queue); else goto err6; CTR5(KTR_IW_CXGBE, "%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx", __func__, wq->sq.queue, (unsigned long long)vtophys(wq->sq.queue), wq->rq.queue, (unsigned long long)vtophys(wq->rq.queue)); memset(wq->rq.queue, 0, wq->rq.memsize); pci_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); wq->db = (void *)((unsigned long)rman_get_virtual(sc->regs_res) + MYPF_REG(SGE_PF_KDOORBELL)); wq->gts = (void *)((unsigned long)rman_get_virtual(rdev->adap->regs_res) + MYPF_REG(SGE_PF_GTS)); if (user) { wq->sq.udb = (u64)((char*)rman_get_virtual(rdev->adap->udbs_res) + (wq->sq.qid << rdev->qpshift)); wq->sq.udb &= PAGE_MASK; wq->rq.udb = (u64)((char*)rman_get_virtual(rdev->adap->udbs_res) + (wq->rq.qid << rdev->qpshift)); wq->rq.udb &= PAGE_MASK; } wq->rdev = rdev; wq->rq.msn = 1; /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + 2 * sizeof *res; wr = alloc_wrqe(wr_len, &sc->sge.mgmtq); if (wr == NULL) return (0); res_wr = wrtod(wr); memset(res_wr, 0, wr_len); res_wr->op_nres = cpu_to_be32( V_FW_WR_OP(FW_RI_RES_WR) | V_FW_RI_RES_WR_NRES(2) | F_FW_WR_COMPL); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; /* eqsize is the number of 64B entries plus the status page size. */ eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + spg_creds; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ V_FW_RI_RES_WR_IQID(scq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | V_FW_RI_RES_WR_FBMIN(2) | V_FW_RI_RES_WR_FBMAX(2) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | V_FW_RI_RES_WR_EQSIZE(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr); res++; res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; /* eqsize is the number of 64B entries plus the status page size. */ eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + spg_creds ; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ V_FW_RI_RES_WR_IQID(rcq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | V_FW_RI_RES_WR_FBMIN(2) | V_FW_RI_RES_WR_FBMAX(2) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | V_FW_RI_RES_WR_EQSIZE(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); c4iw_init_wr_wait(&wr_wait); t4_wrq_tx(sc, wr); ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__); if (ret) goto err7; CTR6(KTR_IW_CXGBE, "%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx", __func__, wq->sq.qid, wq->rq.qid, wq->db, (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); return 0; err7: contigfree(wq->rq.queue, wq->rq.memsize, M_DEVBUF); err6: dealloc_sq(rdev, &wq->sq); err5: c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); err4: kfree(wq->rq.sw_rq); err3: kfree(wq->sq.sw_sq); err2: c4iw_put_qpid(rdev, wq->rq.qid, uctx); err1: c4iw_put_qpid(rdev, wq->sq.qid, uctx); return -ENOMEM; }
/* * Called by c2_probe to initialize the RNIC. This principally * involves initalizing the various limits and resouce pools that * comprise the RNIC instance. */ int __devinit c2_rnic_init(struct c2_dev *c2dev) { int err; u32 qsize, msgsize; void *q1_pages; void *q2_pages; void __iomem *mmio_regs; /* Device capabilities */ c2dev->device_cap_flags = (IB_DEVICE_RESIZE_MAX_WR | IB_DEVICE_CURR_QP_STATE_MOD | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW); /* Allocate the qptr_array */ c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *)); if (!c2dev->qptr_array) { return -ENOMEM; } /* Inialize the qptr_array */ memset(c2dev->qptr_array, 0, C2_MAX_CQS * sizeof(void *)); c2dev->qptr_array[0] = (void *) &c2dev->req_vq; c2dev->qptr_array[1] = (void *) &c2dev->rep_vq; c2dev->qptr_array[2] = (void *) &c2dev->aeq; /* Initialize data structures */ init_waitqueue_head(&c2dev->req_vq_wo); spin_lock_init(&c2dev->vqlock); spin_lock_init(&c2dev->lock); /* Allocate MQ shared pointer pool for kernel clients. User * mode client pools are hung off the user context */ err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool); if (err) { goto bail0; } /* Allocate shared pointers for Q0, Q1, and Q2 from * the shared pointer pool. */ c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, &c2dev->hint_count_dma, GFP_KERNEL); c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, &c2dev->req_vq.shared_dma, GFP_KERNEL); c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, &c2dev->rep_vq.shared_dma, GFP_KERNEL); c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, &c2dev->aeq.shared_dma, GFP_KERNEL); if (!c2dev->hint_count || !c2dev->req_vq.shared || !c2dev->rep_vq.shared || !c2dev->aeq.shared) { err = -ENOMEM; goto bail1; } mmio_regs = c2dev->kva; /* Initialize the Verbs Request Queue */ c2_mq_req_init(&c2dev->req_vq, 0, be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_QSIZE)), be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_MSGSIZE)), mmio_regs + be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_POOLSTART)), mmio_regs + be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_SHARED)), C2_MQ_ADAPTER_TARGET); /* Initialize the Verbs Reply Queue */ qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_QSIZE)); msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_MSGSIZE)); q1_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize, &c2dev->rep_vq.host_dma, GFP_KERNEL); if (!q1_pages) { err = -ENOMEM; goto bail1; } pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma); pr_debug("%s rep_vq va %p dma %llx\n", __func__, q1_pages, (unsigned long long) c2dev->rep_vq.host_dma); c2_mq_rep_init(&c2dev->rep_vq, 1, qsize, msgsize, q1_pages, mmio_regs + be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_SHARED)), C2_MQ_HOST_TARGET); /* Initialize the Asynchronus Event Queue */ qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_QSIZE)); msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_MSGSIZE)); q2_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize, &c2dev->aeq.host_dma, GFP_KERNEL); if (!q2_pages) { err = -ENOMEM; goto bail2; } pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma); pr_debug("%s aeq va %p dma %llx\n", __func__, q2_pages, (unsigned long long) c2dev->aeq.host_dma); c2_mq_rep_init(&c2dev->aeq, 2, qsize, msgsize, q2_pages, mmio_regs + be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_SHARED)), C2_MQ_HOST_TARGET); /* Initialize the verbs request allocator */ err = vq_init(c2dev); if (err) goto bail3; /* Enable interrupts on the adapter */ writel(0, c2dev->regs + C2_IDIS); /* create the WR init message */ err = c2_adapter_init(c2dev); if (err) goto bail4; c2dev->init++; /* open an adapter instance */ err = c2_rnic_open(c2dev); if (err) goto bail4; /* Initialize cached the adapter limits */ if (c2_rnic_query(c2dev, &c2dev->props)) goto bail5; /* Initialize the PD pool */ err = c2_init_pd_table(c2dev); if (err) goto bail5; /* Initialize the QP pool */ c2_init_qp_table(c2dev); return 0; bail5: c2_rnic_close(c2dev); bail4: vq_term(c2dev); bail3: dma_free_coherent(&c2dev->pcidev->dev, c2dev->aeq.q_size * c2dev->aeq.msg_size, q2_pages, pci_unmap_addr(&c2dev->aeq, mapping)); bail2: dma_free_coherent(&c2dev->pcidev->dev, c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size, q1_pages, pci_unmap_addr(&c2dev->rep_vq, mapping)); bail1: c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool); bail0: vfree(c2dev->qptr_array); return err; }