void mthca_free_av(struct mthca_ah *ah) { if (mthca_is_memfree(ah->ibv_ah.context)) { free(ah->av); } else { struct mthca_pd *pd = to_mpd(ah->ibv_ah.pd); struct mthca_ah_page *page; int i; pthread_mutex_lock(&pd->ah_mutex); page = ah->page; i = ((void *) ah->av - page->buf.buf) / sizeof *ah->av; page->free[i / (8 * sizeof (int))] |= 1 << (i % (8 * sizeof (int))); if (!--page->use_cnt) { if (page->prev) page->prev->next = page->next; else pd->ah_list = page->next; if (page->next) page->next->prev = page->prev; mthca_dereg_mr(page->mr); mthca_free_buf(&page->buf); free(page); } pthread_mutex_unlock(&pd->ah_mutex); } }
s64 mthca_make_profile(struct mthca_dev *dev, struct mthca_profile *request, struct mthca_dev_lim *dev_lim, struct mthca_init_hca_param *init_hca) { struct mthca_resource { u64 size; u64 start; int type; int num; int log_num; }; u64 mem_base, mem_avail; s64 total_size = 0; struct mthca_resource *profile; struct mthca_resource tmp; int i, j; profile = kzalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL); if (!profile) return -ENOMEM; profile[MTHCA_RES_QP].size = dev_lim->qpc_entry_sz; profile[MTHCA_RES_EEC].size = dev_lim->eec_entry_sz; profile[MTHCA_RES_SRQ].size = dev_lim->srq_entry_sz; profile[MTHCA_RES_CQ].size = dev_lim->cqc_entry_sz; profile[MTHCA_RES_EQP].size = dev_lim->eqpc_entry_sz; profile[MTHCA_RES_EEEC].size = dev_lim->eeec_entry_sz; profile[MTHCA_RES_EQ].size = dev_lim->eqc_entry_sz; profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE; profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE; profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz; profile[MTHCA_RES_MTT].size = dev->limits.mtt_seg_size; profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz; profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE; profile[MTHCA_RES_UARC].size = request->uarc_size; profile[MTHCA_RES_QP].num = request->num_qp; profile[MTHCA_RES_SRQ].num = request->num_srq; profile[MTHCA_RES_EQP].num = request->num_qp; profile[MTHCA_RES_RDB].num = request->num_qp * request->rdb_per_qp; profile[MTHCA_RES_CQ].num = request->num_cq; profile[MTHCA_RES_EQ].num = MTHCA_NUM_EQS; profile[MTHCA_RES_MCG].num = request->num_mcg; profile[MTHCA_RES_MPT].num = request->num_mpt; profile[MTHCA_RES_MTT].num = request->num_mtt; profile[MTHCA_RES_UAR].num = request->num_uar; profile[MTHCA_RES_UARC].num = request->num_uar; profile[MTHCA_RES_UDAV].num = request->num_udav; for (i = 0; i < MTHCA_RES_NUM; ++i) { profile[i].type = i; profile[i].log_num = max(ffs(profile[i].num) - 1, 0); profile[i].size *= profile[i].num; if (mthca_is_memfree(dev)) profile[i].size = max(profile[i].size, (u64) PAGE_SIZE); } if (mthca_is
void __devexit mthca_cleanup_av_table(struct mthca_dev *dev) { if (mthca_is_memfree(dev)) return; if (dev->av_table.av_map) iounmap(dev->av_table.av_map); pci_pool_destroy(dev->av_table.pool); mthca_alloc_cleanup(&dev->av_table.alloc); }
static int mthca_init_hca(struct mthca_dev *mdev) { u8 status; int err; struct mthca_adapter adapter; if (mthca_is_memfree(mdev)) err = mthca_init_arbel(mdev); else err = mthca_init_tavor(mdev); if (err) return err; err = mthca_QUERY_ADAPTER(mdev, &adapter, &status); if (err) { mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n"); goto err_close; } if (status) { mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, " "aborting.\n", status); err = -EINVAL; goto err_close; } mdev->eq_table.inta_pin = adapter.inta_pin; if (!mthca_is_memfree(mdev)) mdev->rev_id = adapter.revision_id; memcpy(mdev->board_id, adapter.board_id, sizeof mdev->board_id); return 0; err_close: mthca_close_hca(mdev); return err; }
static void mthca_close_hca(struct mthca_dev *mdev) { u8 status; mthca_CLOSE_HCA(mdev, 0, &status); if (mthca_is_memfree(mdev)) { mthca_free_icms(mdev); mthca_UNMAP_FA(mdev, &status); mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0); if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) mthca_DISABLE_LAM(mdev, &status); } else mthca_SYS_DIS(mdev, &status); }
int __devinit mthca_init_av_table(struct mthca_dev *dev) { int err; if (mthca_is_memfree(dev)) return 0; err = mthca_alloc_init(&dev->av_table.alloc, dev->av_table.num_ddr_avs, dev->av_table.num_ddr_avs - 1, 0); if (err) return err; dev->av_table.pool = pci_pool_create("mthca_av", dev->pdev, MTHCA_AV_SIZE, MTHCA_AV_SIZE, 0); if (!dev->av_table.pool) goto out_free_alloc; if (!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { dev->av_table.av_map = ioremap(pci_resource_start(dev->pdev, 4) + dev->av_table.ddr_av_base - dev->ddr_start, dev->av_table.num_ddr_avs * MTHCA_AV_SIZE); if (!dev->av_table.av_map) goto out_free_pool; } else dev->av_table.av_map = NULL; return 0; out_free_pool: pci_pool_destroy(dev->av_table.pool); out_free_alloc: mthca_alloc_cleanup(&dev->av_table.alloc); return -ENOMEM; }
int mthca_alloc_av(struct mthca_pd *pd, struct ibv_ah_attr *attr, struct mthca_ah *ah) { if (mthca_is_memfree(pd->ibv_pd.context)) { ah->av = malloc(sizeof *ah->av); if (!ah->av) return -1; } else { struct mthca_ah_page *page; int ps; int pp; int i, j; ps = to_mdev(pd->ibv_pd.context->device)->page_size; pp = ps / (sizeof *ah->av * 8 * sizeof (int)); pthread_mutex_lock(&pd->ah_mutex); for (page = pd->ah_list; page; page = page->next) if (page->use_cnt < ps / sizeof *ah->av) for (i = 0; i < pp; ++i) if (page->free[i]) goto found; page = __add_page(pd, ps, pp); if (!page) { pthread_mutex_unlock(&pd->ah_mutex); return -1; } found: ++page->use_cnt; for (i = 0, j = -1; i < pp; ++i) if (page->free[i]) { j = ffs(page->free[i]); page->free[i] &= ~(1 << (j - 1)); ah->av = page->buf.buf + (i * 8 * sizeof (int) + (j - 1)) * sizeof *ah->av; break; } ah->key = page->mr->lkey; ah->page = page; pthread_mutex_unlock(&pd->ah_mutex); } memset(ah->av, 0, sizeof *ah->av); ah->av->port_pd = htonl(pd->pdn | (attr->port_num << 24)); ah->av->g_slid = attr->src_path_bits; ah->av->dlid = htons(attr->dlid); ah->av->msg_sr = (3 << 4) | /* 2K message */ attr->static_rate; ah->av->sl_tclass_flowlabel = htonl(attr->sl << 28); if (attr->is_global) { ah->av->g_slid |= 0x80; /* XXX get gid_table length */ ah->av->gid_index = (attr->port_num - 1) * 32 + attr->grh.sgid_index; ah->av->hop_limit = attr->grh.hop_limit; ah->av->sl_tclass_flowlabel |= htonl((attr->grh.traffic_class << 20) | attr->grh.flow_label); memcpy(ah->av->dgid, attr->grh.dgid.raw, 16); } else { /* Arbel workaround -- low byte of GID must be 2 */ ah->av->dgid[3] = htonl(2); } return 0; }
int mthca_create_ah(struct mthca_dev *dev, struct mthca_pd *pd, struct ib_ah_attr *ah_attr, struct mthca_ah *ah) { u32 index = -1; struct mthca_av *av = NULL; ah->type = MTHCA_AH_PCI_POOL; if (mthca_is_memfree(dev)) { ah->av = kmalloc(sizeof *ah->av, GFP_ATOMIC); if (!ah->av) return -ENOMEM; ah->type = MTHCA_AH_KMALLOC; av = ah->av; } else if (!atomic_read(&pd->sqp_count) && !(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { index = mthca_alloc(&dev->av_table.alloc); /* fall back to allocate in host memory */ if (index == -1) goto on_hca_fail; av = kmalloc(sizeof *av, GFP_ATOMIC); if (!av) goto on_hca_fail; ah->type = MTHCA_AH_ON_HCA; ah->avdma = dev->av_table.ddr_av_base + index * MTHCA_AV_SIZE; } on_hca_fail: if (ah->type == MTHCA_AH_PCI_POOL) { ah->av = pci_pool_alloc(dev->av_table.pool, SLAB_ATOMIC, &ah->avdma); if (!ah->av) return -ENOMEM; av = ah->av; } ah->key = pd->ntmr.ibmr.lkey; memset(av, 0, MTHCA_AV_SIZE); av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24)); av->g_slid = ah_attr->src_path_bits; av->dlid = cpu_to_be16(ah_attr->dlid); av->msg_sr = (3 << 4) | /* 2K message */ ah_attr->static_rate; av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); if (ah_attr->ah_flags & IB_AH_GRH) { av->g_slid |= 0x80; av->gid_index = (ah_attr->port_num - 1) * dev->limits.gid_table_len + ah_attr->grh.sgid_index; av->hop_limit = ah_attr->grh.hop_limit; av->sl_tclass_flowlabel |= cpu_to_be32((ah_attr->grh.traffic_class << 20) | ah_attr->grh.flow_label); memcpy(av->dgid, ah_attr->grh.dgid.raw, 16); } else { /* Arbel workaround -- low byte of GID must be 2 */ av->dgid[3] = cpu_to_be32(2); } if (0) { int j; mthca_dbg(dev, "Created UDAV at %p/%08lx:\n", av, (unsigned long) ah->avdma); for (j = 0; j < 8; ++j) printk(KERN_DEBUG " [%2x] %08x\n", j * 4, be32_to_cpu(((__be32 *) av)[j])); } if (ah->type == MTHCA_AH_ON_HCA) { memcpy_toio(dev->av_table.av_map + index * MTHCA_AV_SIZE, av, MTHCA_AV_SIZE); kfree(av); } return 0; }
static struct ibv_context *mthca_alloc_context(struct ibv_device *ibdev, int cmd_fd) { struct mthca_context *context; struct ibv_get_context cmd; struct mthca_alloc_ucontext_resp resp; int i; context = calloc(1, sizeof *context); if (!context) return NULL; context->ibv_ctx.cmd_fd = cmd_fd; if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd, &resp.ibv_resp, sizeof resp)) goto err_free; context->num_qps = resp.qp_tab_size; context->qp_table_shift = ffs(context->num_qps) - 1 - MTHCA_QP_TABLE_BITS; context->qp_table_mask = (1 << context->qp_table_shift) - 1; /* * Need to set ibv_ctx.device because mthca_is_memfree() will * look at it to figure out the HCA type. */ context->ibv_ctx.device = ibdev; if (mthca_is_memfree(&context->ibv_ctx)) { context->db_tab = mthca_alloc_db_tab(resp.uarc_size); if (!context->db_tab) goto err_free; } else context->db_tab = NULL; pthread_mutex_init(&context->qp_table_mutex, NULL); for (i = 0; i < MTHCA_QP_TABLE_SIZE; ++i) context->qp_table[i].refcnt = 0; context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE, MAP_SHARED, cmd_fd, 0); if (context->uar == MAP_FAILED) goto err_db_tab; pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); context->pd = mthca_alloc_pd(&context->ibv_ctx); if (!context->pd) goto err_unmap; context->pd->context = &context->ibv_ctx; context->ibv_ctx.ops = mthca_ctx_ops; if (mthca_is_memfree(&context->ibv_ctx)) { context->ibv_ctx.ops.req_notify_cq = mthca_arbel_arm_cq; context->ibv_ctx.ops.cq_event = mthca_arbel_cq_event; context->ibv_ctx.ops.post_send = mthca_arbel_post_send; context->ibv_ctx.ops.post_recv = mthca_arbel_post_recv; context->ibv_ctx.ops.post_srq_recv = mthca_arbel_post_srq_recv; } else { context->ibv_ctx.ops.req_notify_cq = mthca_tavor_arm_cq; context->ibv_ctx.ops.cq_event = NULL; context->ibv_ctx.ops.post_send = mthca_tavor_post_send; context->ibv_ctx.ops.post_recv = mthca_tavor_post_recv; context->ibv_ctx.ops.post_srq_recv = mthca_tavor_post_srq_recv; } return &context->ibv_ctx; err_unmap: munmap(context->uar, to_mdev(ibdev)->page_size); err_db_tab: mthca_free_db_tab(context->db_tab); err_free: free(context); return NULL; }
static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) { int err; u8 status; mdev->limits.mtt_seg_size = (1 << log_mtts_per_seg) * 8; err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status); if (err) { mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); return err; } if (status) { mthca_err(mdev, "QUERY_DEV_LIM returned status 0x%02x, " "aborting.\n", status); return -EINVAL; } if (dev_lim->min_page_sz > PAGE_SIZE) { mthca_err(mdev, "HCA minimum page size of %d bigger than " "kernel PAGE_SIZE of %ld, aborting.\n", dev_lim->min_page_sz, PAGE_SIZE); return -ENODEV; } if (dev_lim->num_ports > MTHCA_MAX_PORTS) { mthca_err(mdev, "HCA has %d ports, but we only support %d, " "aborting.\n", dev_lim->num_ports, MTHCA_MAX_PORTS); return -ENODEV; } if (dev_lim->uar_size > pci_resource_len(mdev->pdev, 2)) { mthca_err(mdev, "HCA reported UAR size of 0x%x bigger than " "PCI resource 2 size of 0x%llx, aborting.\n", dev_lim->uar_size, (unsigned long long)pci_resource_len(mdev->pdev, 2)); return -ENODEV; } mdev->limits.num_ports = dev_lim->num_ports; mdev->limits.vl_cap = dev_lim->max_vl; mdev->limits.mtu_cap = dev_lim->max_mtu; mdev->limits.gid_table_len = dev_lim->max_gids; mdev->limits.pkey_table_len = dev_lim->max_pkeys; mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay; /* * Need to allow for worst case send WQE overhead and check * whether max_desc_sz imposes a lower limit than max_sg; UD * send has the biggest overhead. */ mdev->limits.max_sg = min_t(int, dev_lim->max_sg, (dev_lim->max_desc_sz - sizeof (struct mthca_next_seg) - (mthca_is_memfree(mdev) ? sizeof (struct mthca_arbel_ud_seg) : sizeof (struct mthca_tavor_ud_seg))) / sizeof (struct mthca_data_seg)); mdev->limits.max_wqes = dev_lim->max_qp_sz; mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp; mdev->limits.reserved_qps = dev_lim->reserved_qps; mdev->limits.max_srq_wqes = dev_lim->max_srq_sz; mdev->limits.reserved_srqs = dev_lim->reserved_srqs; mdev->limits.reserved_eecs = dev_lim->reserved_eecs; mdev->limits.max_desc_sz = dev_lim->max_desc_sz; mdev->limits.max_srq_sge = mthca_max_srq_sge(mdev); /* * Subtract 1 from the limit because we need to allocate a * spare CQE so the HCA HW can tell the difference between an * empty CQ and a full CQ. */ mdev->limits.max_cqes = dev_lim->max_cq_sz - 1; mdev->limits.reserved_cqs = dev_lim->reserved_cqs; mdev->limits.reserved_eqs = dev_lim->reserved_eqs; mdev->limits.reserved_mtts = dev_lim->reserved_mtts; mdev->limits.reserved_mrws = dev_lim->reserved_mrws; mdev->limits.reserved_uars = dev_lim->reserved_uars; mdev->limits.reserved_pds = dev_lim->reserved_pds; mdev->limits.port_width_cap = dev_lim->max_port_width; mdev->limits.page_size_cap = ~(u32) (dev_lim->min_page_sz - 1); mdev->limits.flags = dev_lim->flags; /* * For old FW that doesn't return static rate support, use a * value of 0x3 (only static rate values of 0 or 1 are handled), * except on Sinai, where even old FW can handle static rate * values of 2 and 3. */ if (dev_lim->stat_rate_support) mdev->limits.stat_rate_support = dev_lim->stat_rate_support; else if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT) mdev->limits.stat_rate_support = 0xf; else mdev->limits.stat_rate_support = 0x3; /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. May be doable since hardware supports it for SRQ. IB_DEVICE_N_NOTIFY_CQ is supported by hardware but not by driver. IB_DEVICE_SRQ_RESIZE is supported by hardware but SRQ is not supported by driver. */ mdev->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN; if (dev_lim->flags & DEV_LIM_FLAG_BAD_PKEY_CNTR) mdev->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (dev_lim->flags & DEV_LIM_FLAG_BAD_QKEY_CNTR) mdev->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; if (dev_lim->flags & DEV_LIM_FLAG_RAW_MULTI) mdev->device_cap_flags |= IB_DEVICE_RAW_MULTI; if (dev_lim->flags & DEV_LIM_FLAG_AUTO_PATH_MIG) mdev->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; if (dev_lim->flags & DEV_LIM_FLAG_UD_AV_PORT_ENFORCE) mdev->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; if (dev_lim->flags & DEV_LIM_FLAG_SRQ) mdev->mthca_flags |= MTHCA_FLAG_SRQ; if (mthca_is_memfree(mdev)) if (dev_lim->flags & DEV_LIM_FLAG_IPOIB_CSUM) mdev->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; return 0; }
static struct verbs_context *mthca_alloc_context(struct ibv_device *ibdev, int cmd_fd, void *private_data) { struct mthca_context *context; struct ibv_get_context cmd; struct umthca_alloc_ucontext_resp resp; int i; context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx, RDMA_DRIVER_MTHCA); if (!context) return NULL; if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd, &resp.ibv_resp, sizeof resp)) goto err_free; context->num_qps = resp.qp_tab_size; context->qp_table_shift = ffs(context->num_qps) - 1 - MTHCA_QP_TABLE_BITS; context->qp_table_mask = (1 << context->qp_table_shift) - 1; if (mthca_is_memfree(&context->ibv_ctx.context)) { context->db_tab = mthca_alloc_db_tab(resp.uarc_size); if (!context->db_tab) goto err_free; } else context->db_tab = NULL; pthread_mutex_init(&context->qp_table_mutex, NULL); for (i = 0; i < MTHCA_QP_TABLE_SIZE; ++i) context->qp_table[i].refcnt = 0; context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE, MAP_SHARED, cmd_fd, 0); if (context->uar == MAP_FAILED) goto err_db_tab; pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); context->pd = mthca_alloc_pd(&context->ibv_ctx.context); if (!context->pd) goto err_unmap; context->pd->context = &context->ibv_ctx.context; verbs_set_ops(&context->ibv_ctx, &mthca_ctx_common_ops); if (mthca_is_memfree(&context->ibv_ctx.context)) verbs_set_ops(&context->ibv_ctx, &mthca_ctx_arbel_ops); else verbs_set_ops(&context->ibv_ctx, &mthca_ctx_tavor_ops); return &context->ibv_ctx; err_unmap: munmap(context->uar, to_mdev(ibdev)->page_size); err_db_tab: mthca_free_db_tab(context->db_tab); err_free: verbs_uninit_context(&context->ibv_ctx); free(context); return NULL; }