static int psmx_getinfo(uint32_t version, const char *node, const char *service, uint64_t flags, struct fi_info *hints, struct fi_info **info) { struct fi_info *psmx_info; uint32_t cnt = 0; void *dest_addr = NULL; int ep_type = FI_EP_RDM; int av_type = FI_AV_UNSPEC; enum fi_mr_mode mr_mode = FI_MR_SCALABLE; int caps = 0; uint64_t max_tag_value = 0; int err = -FI_ENODATA; FI_INFO(&psmx_prov, FI_LOG_CORE,"\n"); *info = NULL; if (psm_ep_num_devunits(&cnt) || !cnt) { FI_INFO(&psmx_prov, FI_LOG_CORE, "no PSM device is found.\n"); return -FI_ENODATA; } if (node && !(flags & FI_SOURCE)) dest_addr = psmx_resolve_name(node, 0); if (hints) { switch (hints->addr_format) { case FI_FORMAT_UNSPEC: case FI_ADDR_PSMX: break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->addr_format=%d, supported=%d,%d.\n", hints->addr_format, FI_FORMAT_UNSPEC, FI_ADDR_PSMX); goto err_out; } if (hints->ep_attr) { switch (hints->ep_attr->type) { case FI_EP_UNSPEC: case FI_EP_DGRAM: case FI_EP_RDM: break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->type=%d, supported=%d,%d,%d.\n", hints->ep_attr->type, FI_EP_UNSPEC, FI_EP_DGRAM, FI_EP_RDM); goto err_out; } switch (hints->ep_attr->protocol) { case FI_PROTO_UNSPEC: case FI_PROTO_PSMX: break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->protocol=%d, supported=%d %d\n", hints->ep_attr->protocol, FI_PROTO_UNSPEC, FI_PROTO_PSMX); goto err_out; } if (hints->ep_attr->tx_ctx_cnt > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->tx_ctx_cnt=%d, supported=0,1\n", hints->ep_attr->tx_ctx_cnt); goto err_out; } if (hints->ep_attr->rx_ctx_cnt > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->rx_ctx_cnt=%d, supported=0,1\n", hints->ep_attr->rx_ctx_cnt); goto err_out; } } if ((hints->caps & PSMX_CAPS) != hints->caps && (hints->caps & PSMX_CAPS2) != hints->caps) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->caps=0x%llx, supported=0x%llx,0x%llx\n", hints->caps, PSMX_CAPS, PSMX_CAPS2); goto err_out; } if (hints->tx_attr) { if ((hints->tx_attr->op_flags & PSMX_OP_FLAGS) != hints->tx_attr->op_flags) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx->flags=0x%llx, " "supported=0x%llx\n", hints->tx_attr->op_flags, PSMX_OP_FLAGS); goto err_out; } if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->inject_size=%ld," "supported=%ld.\n", hints->tx_attr->inject_size, PSMX_INJECT_SIZE); goto err_out; } } if (hints->rx_attr && (hints->rx_attr->op_flags & PSMX_OP_FLAGS) != hints->rx_attr->op_flags) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->rx->flags=0x%llx, supported=0x%llx\n", hints->rx_attr->op_flags, PSMX_OP_FLAGS); goto err_out; } if ((hints->mode & PSMX_MODE) != PSMX_MODE) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->mode=0x%llx, required=0x%llx\n", hints->mode, PSMX_MODE); goto err_out; } if (hints->fabric_attr && hints->fabric_attr->name && strncmp(hints->fabric_attr->name, PSMX_FABRIC_NAME, PSMX_FABRIC_NAME_LEN)) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->fabric_name=%s, supported=psm\n", hints->fabric_attr->name); goto err_out; } if (hints->domain_attr) { if (hints->domain_attr->name && strncmp(hints->domain_attr->name, PSMX_DOMAIN_NAME, PSMX_DOMAIN_NAME_LEN)) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_name=%s, supported=psm\n", hints->domain_attr->name); goto err_out; } switch (hints->domain_attr->av_type) { case FI_AV_UNSPEC: case FI_AV_MAP: case FI_AV_TABLE: av_type = hints->domain_attr->av_type; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->av_type=%d, supported=%d %d %d\n", hints->domain_attr->av_type, FI_AV_UNSPEC, FI_AV_MAP, FI_AV_TABLE); goto err_out; } switch (hints->domain_attr->mr_mode) { case FI_MR_UNSPEC: break; case FI_MR_BASIC: case FI_MR_SCALABLE: mr_mode = hints->domain_attr->mr_mode; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->mr_mode=%d, supported=%d %d %d\n", hints->domain_attr->mr_mode, FI_MR_UNSPEC, FI_MR_BASIC, FI_MR_SCALABLE); goto err_out; } } if (hints->ep_attr) { if (hints->ep_attr->max_msg_size > PSMX_MAX_MSG_SIZE) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->max_msg_size=%ld," "supported=%ld.\n", hints->ep_attr->max_msg_size, PSMX_MAX_MSG_SIZE); goto err_out; } max_tag_value = fi_tag_bits(hints->ep_attr->mem_tag_format); } caps = hints->caps; /* TODO: check other fields of hints */ } if (psmx_reserve_tag_bits(&caps, &max_tag_value) < 0) goto err_out; psmx_info = fi_allocinfo(); if (!psmx_info) { err = -FI_ENOMEM; goto err_out; } psmx_info->ep_attr->type = ep_type; psmx_info->ep_attr->protocol = FI_PROTO_PSMX; psmx_info->ep_attr->protocol_version = PSM_VERNO; psmx_info->ep_attr->max_msg_size = PSMX_MAX_MSG_SIZE; psmx_info->ep_attr->mem_tag_format = fi_tag_format(max_tag_value); psmx_info->ep_attr->tx_ctx_cnt = 1; psmx_info->ep_attr->rx_ctx_cnt = 1; psmx_info->domain_attr->threading = FI_THREAD_COMPLETION; psmx_info->domain_attr->control_progress = FI_PROGRESS_MANUAL; psmx_info->domain_attr->data_progress = FI_PROGRESS_MANUAL; psmx_info->domain_attr->name = strdup(PSMX_DOMAIN_NAME); psmx_info->domain_attr->resource_mgmt = FI_RM_ENABLED; psmx_info->domain_attr->av_type = av_type; psmx_info->domain_attr->mr_mode = mr_mode; psmx_info->domain_attr->mr_key_size = sizeof(uint64_t); psmx_info->domain_attr->cq_data_size = 4; psmx_info->domain_attr->cq_cnt = 65535; psmx_info->domain_attr->ep_cnt = 65535; psmx_info->domain_attr->tx_ctx_cnt = 1; psmx_info->domain_attr->rx_ctx_cnt = 1; psmx_info->domain_attr->max_ep_tx_ctx = 65535; psmx_info->domain_attr->max_ep_rx_ctx = 1; psmx_info->domain_attr->max_ep_stx_ctx = 65535; psmx_info->domain_attr->max_ep_srx_ctx = 0; psmx_info->next = NULL; psmx_info->caps = (hints && hints->caps) ? hints->caps : caps; psmx_info->mode = PSMX_MODE; psmx_info->addr_format = FI_ADDR_PSMX; psmx_info->src_addrlen = 0; psmx_info->dest_addrlen = sizeof(psm_epid_t); psmx_info->src_addr = NULL; psmx_info->dest_addr = dest_addr; psmx_info->fabric_attr->name = strdup(PSMX_FABRIC_NAME); psmx_info->fabric_attr->prov_name = strdup(PSMX_PROV_NAME); psmx_info->tx_attr->caps = psmx_info->caps; psmx_info->tx_attr->mode = psmx_info->mode; psmx_info->tx_attr->op_flags = (hints && hints->tx_attr && hints->tx_attr->op_flags) ? hints->tx_attr->op_flags : 0; psmx_info->tx_attr->msg_order = FI_ORDER_SAS; psmx_info->tx_attr->comp_order = FI_ORDER_NONE; psmx_info->tx_attr->inject_size = PSMX_INJECT_SIZE; psmx_info->tx_attr->size = UINT64_MAX; psmx_info->tx_attr->iov_limit = 1; psmx_info->rx_attr->caps = psmx_info->caps; psmx_info->rx_attr->mode = psmx_info->mode; psmx_info->rx_attr->op_flags = (hints && hints->rx_attr && hints->tx_attr->op_flags) ? hints->tx_attr->op_flags : 0; psmx_info->rx_attr->msg_order = FI_ORDER_SAS; psmx_info->rx_attr->comp_order = FI_ORDER_NONE; psmx_info->rx_attr->total_buffered_recv = ~(0ULL); /* that's how PSM handles it internally! */ psmx_info->rx_attr->size = UINT64_MAX; psmx_info->rx_attr->iov_limit = 1; *info = psmx_info; return 0; err_out: return err; }
psm_error_t __psm_ep_open_internal(psm_uuid_t const unique_job_key, int *devid_enabled, struct psm_ep_open_opts const *opts_i, psm_mq_t mq, psm_ep_t *epo, psm_epid_t *epido) { psm_ep_t ep = NULL; uint32_t num_units; size_t len; psm_error_t err; psm_epaddr_t epaddr = NULL; char buf[128], *p, *e; union psmi_envvar_val envvar_val; size_t ptl_sizes; struct psm_ep_open_opts opts; ptl_t *amsh_ptl, *ips_ptl, *self_ptl; int i; /* First get the set of default options, we overwrite with the user's * desired values afterwards */ if ((err = psm_ep_open_opts_get_defaults(&opts))) goto fail; if (opts_i != NULL) { if (opts_i->timeout != -1) opts.timeout = opts_i->timeout; if (opts_i->unit != -1) opts.unit = opts_i->unit; if (opts_i->affinity != -1) opts.affinity = opts_i->affinity; if (opts_i->sendbufs_num != -1) opts.sendbufs_num = opts_i->sendbufs_num; if (opts_i->network_pkey != HFI_DEFAULT_P_KEY) opts.network_pkey = opts_i->network_pkey; if (opts_i->port != 0) opts.port = opts_i->port; if (opts_i->outsl != -1) opts.outsl = opts_i->outsl; if (opts_i->service_id) opts.service_id = (uint64_t) opts_i->service_id; if (opts_i->path_res_type != PSM_PATH_RES_NONE) opts.path_res_type = opts_i->path_res_type; if (opts_i->senddesc_num) opts.senddesc_num = opts_i->senddesc_num; if (opts_i->imm_size) opts.imm_size = opts_i->imm_size; } /* Get Service ID from environment */ if (!psmi_getenv("PSM_IB_SERVICE_ID", "HFI Service ID for path resolution", PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_ULONG_ULONG, (union psmi_envvar_val)HFI_DEFAULT_SERVICE_ID, &envvar_val)) { opts.service_id = (uint64_t) envvar_val.e_ulonglong; } /* Get Path resolution type from environment Possible choices are: * * NONE : Default same as previous instances. Utilizes static data. * OPP : Use OFED Plus Plus library to do path record queries. * UMAD : Use raw libibumad interface to form and process path records. */ if (!psmi_getenv("PSM_PATH_REC", "Mechanism to query HFI path record (default is no path query)", PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_STR, (union psmi_envvar_val)"none", &envvar_val)) { if (!strcasecmp(envvar_val.e_str, "none")) opts.path_res_type = PSM_PATH_RES_NONE; else if (!strcasecmp(envvar_val.e_str, "opp")) opts.path_res_type = PSM_PATH_RES_OPP; else if (!strcasecmp(envvar_val.e_str, "umad")) opts.path_res_type = PSM_PATH_RES_UMAD; else { _HFI_ERROR("Unknown path resolution type %s. " "Disabling use of path record query.\n", envvar_val.e_str); opts.path_res_type = PSM_PATH_RES_NONE; } } /* If a specific unit is set in the environment, use that one. */ if (!psmi_getenv("HFI_UNIT", "Device Unit number (-1 autodetects)", PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_LONG, (union psmi_envvar_val)HFI_UNIT_ID_ANY, &envvar_val)) { opts.unit = envvar_val.e_long; } /* Get user specified port number to use. */ if (!psmi_getenv("HFI_PORT", "IB Port number (0 autodetects)", PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_LONG, (union psmi_envvar_val)HFI_PORT_NUM_ANY, &envvar_val)) { opts.port = envvar_val.e_long; } /* Get service level from environment, path-query overrides it */ if (!psmi_getenv ("HFI_SL", "HFI outging ServiceLevel number (default 0)", PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_LONG, (union psmi_envvar_val)PSMI_SL_DEFAULT, &envvar_val)) { opts.outsl = envvar_val.e_long; } /* Get network key from environment. MVAPICH and other vendor MPIs do not * specify it on ep open and we may require it for vFabrics. * path-query will override it. */ if (!psmi_getenv("PSM_PKEY", "HFI PKey to use for endpoint", PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_ULONG, (union psmi_envvar_val)HFI_DEFAULT_P_KEY, &envvar_val)) { opts.network_pkey = (uint64_t) envvar_val.e_ulong; } /* BACKWARDS COMPATIBILITY: Open MPI likes to choose its own PKEY of 0x7FFF. That's no longer a valid default, so override it if the client was compiled against PSM v1 */ if (PSMI_VERNO_GET_MAJOR(psmi_verno_client()) < 2 && opts.network_pkey == 0x7FFF) { opts.network_pkey = HFI_DEFAULT_P_KEY; } /* Get number of default send buffers from environment */ if (!psmi_getenv("PSM_NUM_SEND_BUFFERS", "Number of send buffers to allocate [1024]", PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT, (union psmi_envvar_val)1024, &envvar_val)) { opts.sendbufs_num = envvar_val.e_uint; } /* Get immediate data size - transfers less than immediate data size do * not consume a send buffer and require just a send descriptor. */ if (!psmi_getenv("PSM_SEND_IMMEDIATE_SIZE", "Immediate data send size not requiring a buffer [128]", PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT, (union psmi_envvar_val)128, &envvar_val)) { opts.imm_size = envvar_val.e_uint; } /* Get numner of send descriptors - by default this is 4 times the number * of send buffers - mainly used for short/inlined messages. */ if (!psmi_getenv("PSM_NUM_SEND_DESCRIPTORS", "Number of send descriptors to allocate [4096]", PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT, (union psmi_envvar_val)4096, &envvar_val)) { opts.senddesc_num = envvar_val.e_uint; } if (psmi_device_is_enabled(devid_enabled, PTL_DEVID_IPS)) { if ((err = psm_ep_num_devunits(&num_units)) != PSM_OK) goto fail; } else num_units = 0; /* do some error checking */ if (opts.timeout < -1) { err = psmi_handle_error(NULL, PSM_PARAM_ERR, "Invalid timeout value %lld", (long long)opts.timeout); goto fail; } else if (num_units && (opts.unit < -1 || opts.unit >= (int)num_units)) { err = psmi_handle_error(NULL, PSM_PARAM_ERR, "Invalid Device Unit ID %d (%d units found)", opts.unit, num_units); goto fail; } else if (opts.port < 0 || opts.port > HFI_MAX_PORT) { err = psmi_handle_error(NULL, PSM_PARAM_ERR, "Invalid Device port number %d", opts.port); goto fail; } else if (opts.affinity < 0 || opts.affinity > PSM_EP_OPEN_AFFINITY_FORCE) { err = psmi_handle_error(NULL, PSM_PARAM_ERR, "Invalid Affinity option: %d", opts.affinity); goto fail; } else if (opts.outsl < PSMI_SL_MIN || opts.outsl > PSMI_SL_MAX) { err = psmi_handle_error(NULL, PSM_PARAM_ERR, "Invalid SL number: %lld", (unsigned long long)opts.outsl); goto fail; } /* Set environment variable if PSM is not allowed to set affinity */ if (opts.affinity == PSM_EP_OPEN_AFFINITY_SKIP) setenv("HFI_NO_CPUAFFINITY", "1", 1); /* Allocate end point structure storage */ ptl_sizes = (psmi_device_is_enabled(devid_enabled, PTL_DEVID_SELF) ? psmi_ptl_self.sizeof_ptl() : 0) + (psmi_device_is_enabled(devid_enabled, PTL_DEVID_IPS) ? psmi_ptl_ips.sizeof_ptl() : 0) + (psmi_device_is_enabled(devid_enabled, PTL_DEVID_AMSH) ? psmi_ptl_amsh.sizeof_ptl() : 0); if (ptl_sizes == 0) return PSM_EP_NO_DEVICE; ep = (psm_ep_t) psmi_memalign(PSMI_EP_NONE, UNDEFINED, 64, sizeof(struct psm_ep) + ptl_sizes); epaddr = (psm_epaddr_t) psmi_calloc(PSMI_EP_NONE, PER_PEER_ENDPOINT, 1, sizeof(struct psm_epaddr)); if (ep == NULL || epaddr == NULL) { err = psmi_handle_error(NULL, PSM_NO_MEMORY, "Couldn't allocate memory for %s structure", ep == NULL ? "psm_ep" : "psm_epaddr"); goto fail; } /* Copy PTL enabled status */ for (i = 0; i < PTL_MAX_INIT; i++) ep->devid_enabled[i] = devid_enabled[i]; /* Matched Queue initialization. We do this early because we have to * make sure ep->mq exists and is valid before calling ips_do_work. */ ep->mq = mq; /* Get ready for PTL initialization */ memcpy(&ep->uuid, (void *)unique_job_key, sizeof(psm_uuid_t)); ep->epaddr = epaddr; ep->memmode = mq->memmode; ep->hfi_num_sendbufs = opts.sendbufs_num; ep->service_id = opts.service_id; ep->path_res_type = opts.path_res_type; ep->hfi_num_descriptors = opts.senddesc_num; ep->hfi_imm_size = opts.imm_size; ep->errh = psmi_errhandler_global; /* by default use the global one */ ep->ptl_amsh.ep_poll = psmi_poll_noop; ep->ptl_ips.ep_poll = psmi_poll_noop; ep->connections = 0; /* See how many iterations we want to spin before yielding */ psmi_getenv("PSM_YIELD_SPIN_COUNT", "Spin poll iterations before yield", PSMI_ENVVAR_LEVEL_HIDDEN, PSMI_ENVVAR_TYPE_UINT, (union psmi_envvar_val)PSMI_BLOCKUNTIL_POLLS_BEFORE_YIELD, &envvar_val); ep->yield_spin_cnt = envvar_val.e_uint; ptl_sizes = 0; amsh_ptl = ips_ptl = self_ptl = NULL; if (psmi_ep_device_is_enabled(ep, PTL_DEVID_AMSH)) { amsh_ptl = (ptl_t *) (ep->ptl_base_data + ptl_sizes); ptl_sizes += psmi_ptl_amsh.sizeof_ptl(); } if (psmi_ep_device_is_enabled(ep, PTL_DEVID_IPS)) { ips_ptl = (ptl_t *) (ep->ptl_base_data + ptl_sizes); ptl_sizes += psmi_ptl_ips.sizeof_ptl(); } if (psmi_ep_device_is_enabled(ep, PTL_DEVID_SELF)) { self_ptl = (ptl_t *) (ep->ptl_base_data + ptl_sizes); ptl_sizes += psmi_ptl_self.sizeof_ptl(); } if ((err = psmi_ep_open_device(ep, &opts, unique_job_key, &(ep->context), &ep->epid))) goto fail; psmi_assert_always(ep->epid != 0); ep->epaddr->epid = ep->epid; _HFI_VDBG("psmi_ep_open_device() passed\n"); /* Set our new label as soon as we know what it is */ strncpy(buf, psmi_gethostname(), sizeof(buf) - 1); buf[sizeof(buf) - 1] = '\0'; p = buf + strlen(buf); /* If our rank is set, use it. If not, use context.subcontext notation */ if (((e = getenv("MPI_RANKID")) != NULL && *e) || ((e = getenv("PSC_MPI_RANK")) != NULL && *e)) len = snprintf(p, sizeof(buf) - strlen(buf), ":%d.", atoi(e)); else len = snprintf(p, sizeof(buf) - strlen(buf), ":%d.%d.", (uint32_t) psm_epid_context(ep->epid), (uint32_t) psmi_epid_subcontext(ep->epid)); *(p + len) = '\0'; ep->context_mylabel = psmi_strdup(ep, buf); if (ep->context_mylabel == NULL) { err = PSM_NO_MEMORY; goto fail; } /* hfi_set_mylabel(ep->context_mylabel); */ if ((err = psmi_epid_set_hostname(psm_epid_nid(ep->epid), buf, 0))) goto fail; _HFI_VDBG("start ptl device init...\n"); if (psmi_ep_device_is_enabled(ep, PTL_DEVID_SELF)) { if ((err = psmi_ptl_self.init(ep, self_ptl, &ep->ptl_self))) goto fail; } if (psmi_ep_device_is_enabled(ep, PTL_DEVID_IPS)) { if ((err = psmi_ptl_ips.init(ep, ips_ptl, &ep->ptl_ips))) goto fail; } /* If we're shm-only, this device is enabled above */ if (psmi_ep_device_is_enabled(ep, PTL_DEVID_AMSH)) { if ((err = psmi_ptl_amsh.init(ep, amsh_ptl, &ep->ptl_amsh))) goto fail; } else { /* We may have pre-attached as part of getting our rank for enabling * shared contexts. */ } _HFI_VDBG("finish ptl device init...\n"); /* * Keep only IPS since only IPS support multi-rail, other devices * are only setup once. IPS device can come to this function again. */ for (i = 0; i < PTL_MAX_INIT; i++) { if (devid_enabled[i] != PTL_DEVID_IPS) { devid_enabled[i] = -1; } } *epido = ep->epid; *epo = ep; return PSM_OK; fail: if (ep != NULL) { if (ep->context.fd != -1) close(ep->context.fd); psmi_free(ep); } if (epaddr != NULL) psmi_free(epaddr); return err; }
static psm_error_t psmi_ep_devlids(uint16_t **lids, uint32_t *num_lids_o, uint64_t my_gid_hi, uint64_t my_gid_lo) { static uint16_t *hfi_lids; static uint32_t nlids; uint32_t num_units; int i; psm_error_t err = PSM_OK; PSMI_ERR_UNLESS_INITIALIZED(NULL); if (hfi_lids == NULL) { if ((err = psm_ep_num_devunits(&num_units))) goto fail; hfi_lids = (uint16_t *) psmi_calloc(PSMI_EP_NONE, UNDEFINED, num_units * HFI_MAX_PORT, sizeof(uint16_t)); if (hfi_lids == NULL) { err = psmi_handle_error(NULL, PSM_NO_MEMORY, "Couldn't allocate memory for dev_lids structure"); goto fail; } for (i = 0; i < num_units; i++) { int j; for (j = 1; j <= HFI_MAX_PORT; j++) { int lid = hfi_get_port_lid(i, j); int ret; uint64_t gid_hi = 0, gid_lo = 0; if (lid == -1) continue; ret = hfi_get_port_gid(i, j, &gid_hi, &gid_lo); if (ret == -1) continue; else if (my_gid_hi != gid_hi) { _HFI_VDBG("LID %d, unit %d, port %d, " "mismatched GID %llx:%llx and " "%llx:%llx\n", lid, i, j, (unsigned long long)gid_hi, (unsigned long long)gid_lo, (unsigned long long)my_gid_hi, (unsigned long long) my_gid_lo); continue; } _HFI_VDBG("LID %d, unit %d, port %d, " "matching GID %llx:%llx and " "%llx:%llx\n", lid, i, j, (unsigned long long)gid_hi, (unsigned long long)gid_lo, (unsigned long long)my_gid_hi, (unsigned long long)my_gid_lo); hfi_lids[nlids++] = (uint16_t) lid; } } if (nlids == 0) { err = psmi_handle_error(NULL, PSM_EP_DEVICE_FAILURE, "Couldn't get lid&gid from any unit/port"); goto fail; } } *lids = hfi_lids; *num_lids_o = nlids; fail: return err; }
static psm_error_t psmi_ep_multirail(int *num_rails, uint32_t *unit, uint16_t *port) { uint32_t num_units; uint64_t gid_hi, gid_lo; int i, j, ret, count = 0; char *env; psm_error_t err = PSM_OK; uint64_t gidh[HFI_MAX_RAILS][3]; env = getenv("PSM_MULTIRAIL"); if (!env || atoi(env) == 0) { *num_rails = 0; return err; } /* * map is in format: unit:port,unit:port,... */ if ((env = getenv("PSM_MULTIRAIL_MAP"))) { if (sscanf(env, "%d:%d", &i, &j) == 2) { char *comma = strchr(env, ','); unit[count] = i; port[count] = j; count++; while (comma) { if (sscanf(comma, ",%d:%d", &i, &j) != 2) { break; } unit[count] = i; port[count] = j; count++; if (count == HFI_MAX_RAILS) break; comma = strchr(comma + 1, ','); } } *num_rails = count; /* * Check if any of the port is not usable. */ for (i = 0; i < count; i++) { ret = hfi_get_port_lid(unit[i], port[i]); if (ret == -1) { err = psmi_handle_error(NULL, PSM_EP_DEVICE_FAILURE, "Couldn't get lid for unit %d:%d", unit[i], port[i]); return err; } ret = hfi_get_port_gid(unit[i], port[i], &gid_hi, &gid_lo); if (ret == -1) { err = psmi_handle_error(NULL, PSM_EP_DEVICE_FAILURE, "Couldn't get gid for unit %d:%d", unit[i], port[i]); return err; } } return err; } if ((err = psm_ep_num_devunits(&num_units))) { return err; } if (num_units > HFI_MAX_RAILS) { _HFI_INFO ("Found %d units, max %d units are supported, use %d\n", num_units, HFI_MAX_RAILS, HFI_MAX_RAILS); num_units = HFI_MAX_RAILS; } /* * Get all the ports with a valid lid and gid, one per unit. */ for (i = 0; i < num_units; i++) { for (j = 1; j <= HFI_MAX_PORT; j++) { ret = hfi_get_port_lid(i, j); if (ret == -1) continue; ret = hfi_get_port_gid(i, j, &gid_hi, &gid_lo); if (ret == -1) continue; gidh[count][0] = gid_hi; gidh[count][1] = i; gidh[count][2] = j; count++; break; } } /* * Sort all the ports with gidh from small to big. * This is for multiple fabrics, and we use fabric with the * smallest gid to make the master connection. */ qsort(gidh, count, sizeof(uint64_t) * 3, cmpfunc); for (i = 0; i < count; i++) { unit[i] = (uint32_t) gidh[i][1]; port[i] = (uint16_t) (uint32_t) gidh[i][2]; } *num_rails = count; return err; }
static int psmx_getinfo(uint32_t version, const char *node, const char *service, uint64_t flags, const struct fi_info *hints, struct fi_info **info) { struct fi_info *psmx_info; uint32_t cnt = 0; psm_epid_t *dest_addr = NULL; struct psmx_src_name *src_addr = NULL; int ep_type = FI_EP_RDM; int av_type = FI_AV_UNSPEC; uint64_t mode = FI_CONTEXT; enum fi_mr_mode mr_mode = FI_MR_SCALABLE; enum fi_threading threading = FI_THREAD_COMPLETION; enum fi_progress control_progress = FI_PROGRESS_MANUAL; enum fi_progress data_progress = FI_PROGRESS_MANUAL; int caps = 0; uint64_t max_tag_value = 0; int err = -FI_ENODATA; int svc0, svc = PSMX_ANY_SERVICE; FI_INFO(&psmx_prov, FI_LOG_CORE,"\n"); *info = NULL; /* Perform some quick check first to avoid unnecessary operations */ if (hints) { if (hints->fabric_attr && hints->fabric_attr->name && strcasecmp(hints->fabric_attr->name, PSMX_FABRIC_NAME)) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->fabric_name=%s, supported=%s\n", hints->fabric_attr->name, PSMX_FABRIC_NAME); goto err_out; } if (hints->domain_attr && hints->domain_attr->name && strcasecmp(hints->domain_attr->name, PSMX_DOMAIN_NAME)) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_name=%s, supported=%s\n", hints->domain_attr->name, PSMX_DOMAIN_NAME); goto err_out; } if (hints->ep_attr) { switch (hints->ep_attr->type) { case FI_EP_UNSPEC: case FI_EP_DGRAM: case FI_EP_RDM: break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->type=%d, supported=%d,%d,%d.\n", hints->ep_attr->type, FI_EP_UNSPEC, FI_EP_DGRAM, FI_EP_RDM); goto err_out; } } if ((hints->caps & PSMX_CAPS) != hints->caps && (hints->caps & PSMX_CAPS2) != hints->caps) { uint64_t psmx_caps = PSMX_CAPS; uint64_t psmx_caps2 = PSMX_CAPS2; PSMX_INFO_DIFF("hints->caps", hints->caps, psmx_caps, FI_TYPE_CAPS); PSMX_INFO_DIFF("alternatively, hints->caps", hints->caps, psmx_caps2, FI_TYPE_CAPS); goto err_out; } } if (FI_VERSION_GE(version, FI_VERSION(1,5))) mr_mode = 0; if (psmx_init_lib()) return -FI_ENODATA; if (psmx_compat_lib) { /* * native PSM running over TrueScale doesn't have the issue handled * here. it's only present when PSM is supported via the psm2-compat * library, where the PSM functions are just wrappers around the PSM2 * counterparts. * * psm2_ep_num_devunits() may wait for 15 seconds before return * when /dev/hfi1_0 is not present. Check the existence of any hfi1 * device interface first to avoid this delay. Note that the devices * don't necessarily appear consecutively so we need to check all * possible device names before returning "no device found" error. * This also means if "/dev/hfi1_0" doesn't exist but other devices * exist, we are still going to see the delay; but that's a rare case. */ glob_t glob_buf; if ((glob("/dev/hfi1_[0-9]", 0, NULL, &glob_buf) != 0) && (glob("/dev/hfi1_[0-9][0-9]", GLOB_APPEND, NULL, &glob_buf) != 0)) { FI_INFO(&psmx_prov, FI_LOG_CORE, "no hfi1 device is found.\n"); return -FI_ENODATA; } globfree(&glob_buf); } if (psm_ep_num_devunits(&cnt) || !cnt) { FI_INFO(&psmx_prov, FI_LOG_CORE, "no PSM device is found.\n"); return -FI_ENODATA; } src_addr = calloc(1, sizeof(*src_addr)); if (!src_addr) { FI_INFO(&psmx_prov, FI_LOG_CORE, "failed to allocate src addr.\n"); return -FI_ENODATA; } src_addr->signature = 0xFFFF; src_addr->unit = PSMX_DEFAULT_UNIT; src_addr->port = PSMX_DEFAULT_PORT; src_addr->service = PSMX_ANY_SERVICE; if (flags & FI_SOURCE) { if (node) sscanf(node, "%*[^:]:%" SCNi8 ":%" SCNu8, &src_addr->unit, &src_addr->port); if (service) sscanf(service, "%" SCNu32, &src_addr->service); FI_INFO(&psmx_prov, FI_LOG_CORE, "node '%s' service '%s' converted to <unit=%d, port=%d, service=%d>\n", node, service, src_addr->unit, src_addr->port, src_addr->service); } else if (node) { psm_uuid_t uuid; psmx_get_uuid(uuid); struct util_ns ns = { .port = psmx_uuid_to_port(uuid), .name_len = sizeof(*dest_addr), .service_len = sizeof(svc), .service_cmp = psmx_ns_service_cmp, .is_service_wildcard = psmx_ns_is_service_wildcard, }; ofi_ns_init(&ns); if (service) svc = atoi(service); svc0 = svc; dest_addr = (psm_epid_t *)ofi_ns_resolve_name(&ns, node, &svc); if (dest_addr) { FI_INFO(&psmx_prov, FI_LOG_CORE, "'%s:%u' resolved to <epid=%"PRIu64">:%u\n", node, svc0, *dest_addr, svc); } else { FI_INFO(&psmx_prov, FI_LOG_CORE, "failed to resolve '%s:%u'.\n", node, svc); err = -FI_ENODATA; goto err_out; } } if (hints) { switch (hints->addr_format) { case FI_FORMAT_UNSPEC: case FI_ADDR_PSMX: break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->addr_format=%d, supported=%d,%d.\n", hints->addr_format, FI_FORMAT_UNSPEC, FI_ADDR_PSMX); goto err_out; } if (hints->ep_attr) { switch (hints->ep_attr->protocol) { case FI_PROTO_UNSPEC: case FI_PROTO_PSMX: break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->protocol=%d, supported=%d %d\n", hints->ep_attr->protocol, FI_PROTO_UNSPEC, FI_PROTO_PSMX); goto err_out; } if (hints->ep_attr->tx_ctx_cnt > 1 && hints->ep_attr->tx_ctx_cnt != FI_SHARED_CONTEXT) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->tx_ctx_cnt=%"PRIu64", supported=0,1\n", hints->ep_attr->tx_ctx_cnt); goto err_out; } if (hints->ep_attr->rx_ctx_cnt > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->rx_ctx_cnt=%"PRIu64", supported=0,1\n", hints->ep_attr->rx_ctx_cnt); goto err_out; } } if (hints->tx_attr) { if ((hints->tx_attr->op_flags & PSMX_OP_FLAGS) != hints->tx_attr->op_flags) { uint64_t psmx_op_flags = PSMX_OP_FLAGS; PSMX_INFO_DIFF("hints->tx_attr->of_flags", hints->tx_attr->op_flags, psmx_op_flags, FI_TYPE_OP_FLAGS); goto err_out; } if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->inject_size=%"PRIu64"," "supported=%d.\n", hints->tx_attr->inject_size, PSMX_INJECT_SIZE); goto err_out; } } if (hints->rx_attr && (hints->rx_attr->op_flags & PSMX_OP_FLAGS) != hints->rx_attr->op_flags) { uint64_t psmx_op_flags = PSMX_OP_FLAGS; PSMX_INFO_DIFF("hints->rx_attr->of_flags", hints->rx_attr->op_flags, psmx_op_flags, FI_TYPE_OP_FLAGS); goto err_out; } if ((hints->caps & FI_TAGGED) || ((hints->caps & FI_MSG) && !psmx_env.am_msg)) { if ((hints->mode & FI_CONTEXT) != FI_CONTEXT) { uint64_t psmx_mode = FI_CONTEXT; PSMX_INFO_DIFF("hints->mode", hints->mode, psmx_mode, FI_TYPE_MODE); goto err_out; } } else { mode = 0; } if (hints->domain_attr) { switch (hints->domain_attr->av_type) { case FI_AV_UNSPEC: case FI_AV_MAP: case FI_AV_TABLE: av_type = hints->domain_attr->av_type; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->av_type=%d, supported=%d %d %d\n", hints->domain_attr->av_type, FI_AV_UNSPEC, FI_AV_MAP, FI_AV_TABLE); goto err_out; } if (hints->domain_attr->mr_mode == FI_MR_BASIC) { mr_mode = FI_MR_BASIC; } else if (hints->domain_attr->mr_mode == FI_MR_SCALABLE) { mr_mode = FI_MR_SCALABLE; } else if (hints->domain_attr->mr_mode & (FI_MR_BASIC | FI_MR_SCALABLE)) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->mr_mode has FI_MR_BASIC or FI_MR_SCALABLE " "combined with other bits\n"); goto err_out; } switch (hints->domain_attr->threading) { case FI_THREAD_UNSPEC: break; case FI_THREAD_FID: case FI_THREAD_ENDPOINT: case FI_THREAD_COMPLETION: case FI_THREAD_DOMAIN: threading = hints->domain_attr->threading; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->threading=%d, supported=%d %d %d %d %d\n", hints->domain_attr->threading, FI_THREAD_UNSPEC, FI_THREAD_FID, FI_THREAD_ENDPOINT, FI_THREAD_COMPLETION, FI_THREAD_DOMAIN); goto err_out; } switch (hints->domain_attr->control_progress) { case FI_PROGRESS_UNSPEC: break; case FI_PROGRESS_MANUAL: case FI_PROGRESS_AUTO: control_progress = hints->domain_attr->control_progress; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->control_progress=%d, supported=%d %d %d\n", hints->domain_attr->control_progress, FI_PROGRESS_UNSPEC, FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO); goto err_out; } switch (hints->domain_attr->data_progress) { case FI_PROGRESS_UNSPEC: break; case FI_PROGRESS_MANUAL: case FI_PROGRESS_AUTO: data_progress = hints->domain_attr->data_progress; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->data_progress=%d, supported=%d %d %d\n", hints->domain_attr->data_progress, FI_PROGRESS_UNSPEC, FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO); goto err_out; } if (hints->domain_attr->caps & FI_SHARED_AV) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->caps=%lx, shared AV is unsupported\n", hints->domain_attr->caps); goto err_out; } } if (hints->ep_attr) { if (hints->ep_attr->max_msg_size > PSMX_MAX_MSG_SIZE) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->max_msg_size=%"PRIu64"," "supported=%llu.\n", hints->ep_attr->max_msg_size, PSMX_MAX_MSG_SIZE); goto err_out; } max_tag_value = ofi_max_tag(hints->ep_attr->mem_tag_format); } if (hints->tx_attr) { if ((hints->tx_attr->msg_order & PSMX_MSG_ORDER) != hints->tx_attr->msg_order) { uint64_t psmx_msg_order = PSMX_MSG_ORDER; PSMX_INFO_DIFF("hints->tx_attr->msg_order", hints->tx_attr->msg_order, psmx_msg_order, FI_TYPE_MSG_ORDER); goto err_out; } if ((hints->tx_attr->comp_order & PSMX_COMP_ORDER) != hints->tx_attr->comp_order) { uint64_t psmx_comp_order = PSMX_COMP_ORDER; PSMX_INFO_DIFF("hints->tx_attr->comp_order", hints->tx_attr->comp_order, psmx_comp_order, FI_TYPE_MSG_ORDER); goto err_out; } if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->inject_size=%ld," "supported=%d.\n", hints->tx_attr->inject_size, PSMX_INJECT_SIZE); goto err_out; } if (hints->tx_attr->iov_limit > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->iov_limit=%ld," "supported=1.\n", hints->tx_attr->iov_limit); goto err_out; } if (hints->tx_attr->rma_iov_limit > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->rma_iov_limit=%ld," "supported=1.\n", hints->tx_attr->rma_iov_limit); goto err_out; } } if (hints->rx_attr) { if ((hints->rx_attr->msg_order & PSMX_MSG_ORDER) != hints->rx_attr->msg_order) { uint64_t psmx_msg_order = PSMX_MSG_ORDER; PSMX_INFO_DIFF("hints->rx_attr->msg_order", hints->rx_attr->msg_order, psmx_msg_order, FI_TYPE_MSG_ORDER); goto err_out; } if ((hints->rx_attr->comp_order & PSMX_COMP_ORDER) != hints->rx_attr->comp_order) { uint64_t psmx_comp_order = PSMX_COMP_ORDER; PSMX_INFO_DIFF("hints->rx_attr->comp_order", hints->rx_attr->comp_order, psmx_comp_order, FI_TYPE_MSG_ORDER); goto err_out; } if (hints->rx_attr->iov_limit > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->rx_attr->iov_limit=%ld," "supported=1.\n", hints->rx_attr->iov_limit); goto err_out; } } caps = hints->caps; /* TODO: check other fields of hints */ } if (psmx_reserve_tag_bits(&caps, &max_tag_value) < 0) goto err_out; psmx_info = fi_allocinfo(); if (!psmx_info) { err = -FI_ENOMEM; goto err_out; } psmx_info->ep_attr->type = ep_type; psmx_info->ep_attr->protocol = FI_PROTO_PSMX; psmx_info->ep_attr->protocol_version = PSM_VERNO; psmx_info->ep_attr->max_msg_size = PSMX_MAX_MSG_SIZE; psmx_info->ep_attr->max_order_raw_size = PSMX_RMA_ORDER_SIZE; psmx_info->ep_attr->max_order_war_size = PSMX_RMA_ORDER_SIZE; psmx_info->ep_attr->max_order_waw_size = PSMX_RMA_ORDER_SIZE; psmx_info->ep_attr->mem_tag_format = ofi_tag_format(max_tag_value); psmx_info->ep_attr->tx_ctx_cnt = 1; psmx_info->ep_attr->rx_ctx_cnt = 1; psmx_info->domain_attr->threading = threading; psmx_info->domain_attr->control_progress = control_progress; psmx_info->domain_attr->data_progress = data_progress; psmx_info->domain_attr->name = strdup(PSMX_DOMAIN_NAME); psmx_info->domain_attr->resource_mgmt = FI_RM_ENABLED; psmx_info->domain_attr->av_type = av_type; psmx_info->domain_attr->mr_mode = mr_mode; psmx_info->domain_attr->mr_key_size = sizeof(uint64_t); psmx_info->domain_attr->cq_data_size = 4; psmx_info->domain_attr->cq_cnt = 65535; psmx_info->domain_attr->ep_cnt = 65535; psmx_info->domain_attr->tx_ctx_cnt = 1; psmx_info->domain_attr->rx_ctx_cnt = 1; psmx_info->domain_attr->max_ep_tx_ctx = 1; psmx_info->domain_attr->max_ep_rx_ctx = 1; psmx_info->domain_attr->max_ep_stx_ctx = 65535; psmx_info->domain_attr->max_ep_srx_ctx = 0; psmx_info->domain_attr->cntr_cnt = 65535; psmx_info->domain_attr->mr_iov_limit = 65535; psmx_info->domain_attr->caps = PSMX_DOM_CAPS; psmx_info->domain_attr->mode = 0; psmx_info->domain_attr->mr_cnt = 65535; psmx_info->next = NULL; psmx_info->caps = (hints && hints->caps) ? hints->caps : caps; psmx_info->mode = mode; psmx_info->addr_format = FI_ADDR_PSMX; psmx_info->src_addr = src_addr; psmx_info->src_addrlen = sizeof(*src_addr); psmx_info->dest_addr = dest_addr; psmx_info->dest_addrlen = sizeof(*dest_addr); psmx_info->fabric_attr->name = strdup(PSMX_FABRIC_NAME); psmx_info->fabric_attr->prov_name = NULL; psmx_info->fabric_attr->prov_version = PSMX_VERSION; psmx_info->tx_attr->caps = psmx_info->caps; psmx_info->tx_attr->mode = psmx_info->mode; psmx_info->tx_attr->op_flags = (hints && hints->tx_attr && hints->tx_attr->op_flags) ? hints->tx_attr->op_flags : 0; psmx_info->tx_attr->msg_order = PSMX_MSG_ORDER; psmx_info->tx_attr->comp_order = PSMX_COMP_ORDER; psmx_info->tx_attr->inject_size = PSMX_INJECT_SIZE; psmx_info->tx_attr->size = UINT64_MAX; psmx_info->tx_attr->iov_limit = 1; psmx_info->tx_attr->rma_iov_limit = 1; psmx_info->rx_attr->caps = psmx_info->caps; psmx_info->rx_attr->mode = psmx_info->mode; psmx_info->rx_attr->op_flags = (hints && hints->rx_attr && hints->rx_attr->op_flags) ? hints->rx_attr->op_flags : 0; psmx_info->rx_attr->msg_order = PSMX_MSG_ORDER; psmx_info->rx_attr->comp_order = PSMX_COMP_ORDER; psmx_info->rx_attr->total_buffered_recv = ~(0ULL); /* that's how PSM handles it internally! */ psmx_info->rx_attr->size = UINT64_MAX; psmx_info->rx_attr->iov_limit = 1; *info = psmx_info; return 0; err_out: free(dest_addr); free(src_addr); return err; } static void psmx_fini(void) { FI_INFO(&psmx_prov, FI_LOG_CORE, "\n"); if (! --psmx_init_count && psmx_lib_initialized) { /* This function is called from a library destructor, which is called * automatically when exit() is called. The call to psm_finalize() * might cause deadlock if the applicaiton is terminated with Ctrl-C * -- the application could be inside a PSM call, holding a lock that * psm_finalize() tries to acquire. This can be avoided by only * calling psm_finalize() when PSM is guaranteed to be unused. */ if (psmx_active_fabric) { FI_INFO(&psmx_prov, FI_LOG_CORE, "psmx_active_fabric != NULL, skip psm_finalize\n"); } else { psm_finalize(); psmx_lib_initialized = 0; } } }
static int psmx_getinfo(uint32_t version, const char *node, const char *service, uint64_t flags, struct fi_info *hints, struct fi_info **info) { struct fi_info *psmx_info; uint32_t cnt = 0; psm_epid_t *dest_addr = NULL; struct psmx_src_name *src_addr; int ep_type = FI_EP_RDM; int av_type = FI_AV_UNSPEC; uint64_t mode = FI_CONTEXT; enum fi_mr_mode mr_mode = FI_MR_SCALABLE; enum fi_threading threading = FI_THREAD_COMPLETION; enum fi_progress control_progress = FI_PROGRESS_MANUAL; enum fi_progress data_progress = FI_PROGRESS_MANUAL; int caps = 0; uint64_t max_tag_value = 0; int err = -FI_ENODATA; int svc0, svc = PSMX_ANY_SERVICE; FI_INFO(&psmx_prov, FI_LOG_CORE,"\n"); *info = NULL; if (psmx_init_lib()) return -FI_ENODATA; if (psm_ep_num_devunits(&cnt) || !cnt) { FI_INFO(&psmx_prov, FI_LOG_CORE, "no PSM device is found.\n"); return -FI_ENODATA; } psmx_init_env(); src_addr = calloc(1, sizeof(*src_addr)); if (!src_addr) { FI_INFO(&psmx_prov, FI_LOG_CORE, "failed to allocate src addr.\n"); return -FI_ENODATA; } src_addr->unit = PSMX_DEFAULT_UNIT; src_addr->port = PSMX_DEFAULT_PORT; src_addr->service = PSMX_ANY_SERVICE; if (flags & FI_SOURCE) { if (node) sscanf(node, "%*[^:]:%d:%d", &src_addr->unit, &src_addr->port); if (service) sscanf(service, "%d", &src_addr->service); FI_INFO(&psmx_prov, FI_LOG_CORE, "node '%s' service '%s' converted to <unit=%d, port=%d, service=%d>\n", node, service, src_addr->unit, src_addr->port, src_addr->service); } else if (node) { if (service) svc = atoi(service); svc0 = svc; dest_addr = psmx_ns_resolve_name(node, &svc); if (dest_addr) { FI_INFO(&psmx_prov, FI_LOG_CORE, "'%s:%u' resolved to <epid=0x%llx>:%u\n", node, svc0, *dest_addr, svc); } else { FI_INFO(&psmx_prov, FI_LOG_CORE, "failed to resolve '%s:%u'.\n", node, svc); err = -FI_ENODATA; goto err_out; } } if (hints) { switch (hints->addr_format) { case FI_FORMAT_UNSPEC: case FI_ADDR_PSMX: break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->addr_format=%d, supported=%d,%d.\n", hints->addr_format, FI_FORMAT_UNSPEC, FI_ADDR_PSMX); goto err_out; } if (hints->ep_attr) { switch (hints->ep_attr->type) { case FI_EP_UNSPEC: case FI_EP_DGRAM: case FI_EP_RDM: break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->type=%d, supported=%d,%d,%d.\n", hints->ep_attr->type, FI_EP_UNSPEC, FI_EP_DGRAM, FI_EP_RDM); goto err_out; } switch (hints->ep_attr->protocol) { case FI_PROTO_UNSPEC: case FI_PROTO_PSMX: break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->protocol=%d, supported=%d %d\n", hints->ep_attr->protocol, FI_PROTO_UNSPEC, FI_PROTO_PSMX); goto err_out; } if (hints->ep_attr->tx_ctx_cnt > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->tx_ctx_cnt=%d, supported=0,1\n", hints->ep_attr->tx_ctx_cnt); goto err_out; } if (hints->ep_attr->rx_ctx_cnt > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->rx_ctx_cnt=%d, supported=0,1\n", hints->ep_attr->rx_ctx_cnt); goto err_out; } } if ((hints->caps & PSMX_CAPS) != hints->caps && (hints->caps & PSMX_CAPS2) != hints->caps) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->caps=0x%llx, supported=0x%llx,0x%llx\n", hints->caps, PSMX_CAPS, PSMX_CAPS2); goto err_out; } if (hints->tx_attr) { if ((hints->tx_attr->op_flags & PSMX_OP_FLAGS) != hints->tx_attr->op_flags) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx->flags=0x%llx, " "supported=0x%llx\n", hints->tx_attr->op_flags, PSMX_OP_FLAGS); goto err_out; } if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->inject_size=%ld," "supported=%ld.\n", hints->tx_attr->inject_size, PSMX_INJECT_SIZE); goto err_out; } } if (hints->rx_attr && (hints->rx_attr->op_flags & PSMX_OP_FLAGS) != hints->rx_attr->op_flags) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->rx->flags=0x%llx, supported=0x%llx\n", hints->rx_attr->op_flags, PSMX_OP_FLAGS); goto err_out; } if ((hints->caps & FI_TAGGED) || ((hints->caps & FI_MSG) && !psmx_env.am_msg)) { if ((hints->mode & FI_CONTEXT) != FI_CONTEXT) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->mode=0x%llx, required=0x%llx\n", hints->mode, FI_CONTEXT); goto err_out; } } else { mode = 0; } if (hints->fabric_attr && hints->fabric_attr->name && strcmp(hints->fabric_attr->name, PSMX_FABRIC_NAME)) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->fabric_name=%s, supported=psm\n", hints->fabric_attr->name); goto err_out; } if (hints->domain_attr) { if (hints->domain_attr->name && strcmp(hints->domain_attr->name, PSMX_DOMAIN_NAME)) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_name=%s, supported=psm\n", hints->domain_attr->name); goto err_out; } switch (hints->domain_attr->av_type) { case FI_AV_UNSPEC: case FI_AV_MAP: case FI_AV_TABLE: av_type = hints->domain_attr->av_type; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->av_type=%d, supported=%d %d %d\n", hints->domain_attr->av_type, FI_AV_UNSPEC, FI_AV_MAP, FI_AV_TABLE); goto err_out; } switch (hints->domain_attr->mr_mode) { case FI_MR_UNSPEC: break; case FI_MR_BASIC: case FI_MR_SCALABLE: mr_mode = hints->domain_attr->mr_mode; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->mr_mode=%d, supported=%d %d %d\n", hints->domain_attr->mr_mode, FI_MR_UNSPEC, FI_MR_BASIC, FI_MR_SCALABLE); goto err_out; } switch (hints->domain_attr->threading) { case FI_THREAD_UNSPEC: break; case FI_THREAD_FID: case FI_THREAD_ENDPOINT: case FI_THREAD_COMPLETION: case FI_THREAD_DOMAIN: threading = hints->domain_attr->threading; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->threading=%d, supported=%d %d %d %d %d\n", hints->domain_attr->threading, FI_THREAD_UNSPEC, FI_THREAD_FID, FI_THREAD_ENDPOINT, FI_THREAD_COMPLETION, FI_THREAD_DOMAIN); goto err_out; } switch (hints->domain_attr->control_progress) { case FI_PROGRESS_UNSPEC: break; case FI_PROGRESS_MANUAL: case FI_PROGRESS_AUTO: control_progress = hints->domain_attr->control_progress; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->control_progress=%d, supported=%d %d %d\n", hints->domain_attr->control_progress, FI_PROGRESS_UNSPEC, FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO); goto err_out; } switch (hints->domain_attr->data_progress) { case FI_PROGRESS_UNSPEC: break; case FI_PROGRESS_MANUAL: case FI_PROGRESS_AUTO: data_progress = hints->domain_attr->data_progress; break; default: FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->data_progress=%d, supported=%d %d %d\n", hints->domain_attr->data_progress, FI_PROGRESS_UNSPEC, FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO); goto err_out; } if (hints->domain_attr->caps & FI_SHARED_AV) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->domain_attr->caps=%lx, shared AV is unsupported\n", hints->domain_attr->caps); goto err_out; } } if (hints->ep_attr) { if (hints->ep_attr->max_msg_size > PSMX_MAX_MSG_SIZE) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->ep_attr->max_msg_size=%ld," "supported=%ld.\n", hints->ep_attr->max_msg_size, PSMX_MAX_MSG_SIZE); goto err_out; } max_tag_value = fi_tag_bits(hints->ep_attr->mem_tag_format); } if (hints->tx_attr) { if ((hints->tx_attr->msg_order & PSMX_MSG_ORDER) != hints->tx_attr->msg_order) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->msg_order=%lx," "supported=%lx.\n", hints->tx_attr->msg_order, PSMX_MSG_ORDER); goto err_out; } if ((hints->tx_attr->comp_order & PSMX_COMP_ORDER) != hints->tx_attr->comp_order) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->msg_order=%lx," "supported=%lx.\n", hints->tx_attr->comp_order, PSMX_COMP_ORDER); goto err_out; } if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->inject_size=%ld," "supported=%d.\n", hints->tx_attr->inject_size, PSMX_INJECT_SIZE); goto err_out; } if (hints->tx_attr->iov_limit > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->iov_limit=%ld," "supported=1.\n", hints->tx_attr->iov_limit); goto err_out; } if (hints->tx_attr->rma_iov_limit > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->tx_attr->rma_iov_limit=%ld," "supported=1.\n", hints->tx_attr->rma_iov_limit); goto err_out; } } if (hints->rx_attr) { if ((hints->rx_attr->msg_order & PSMX_MSG_ORDER) != hints->rx_attr->msg_order) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->rx_attr->msg_order=%lx," "supported=%lx.\n", hints->rx_attr->msg_order, PSMX_MSG_ORDER); goto err_out; } if ((hints->rx_attr->comp_order & PSMX_COMP_ORDER) != hints->rx_attr->comp_order) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->rx_attr->msg_order=%lx," "supported=%lx.\n", hints->rx_attr->comp_order, PSMX_COMP_ORDER); goto err_out; } if (hints->rx_attr->iov_limit > 1) { FI_INFO(&psmx_prov, FI_LOG_CORE, "hints->rx_attr->iov_limit=%ld," "supported=1.\n", hints->rx_attr->iov_limit); goto err_out; } } caps = hints->caps; /* TODO: check other fields of hints */ } if (psmx_reserve_tag_bits(&caps, &max_tag_value) < 0) goto err_out; psmx_info = fi_allocinfo(); if (!psmx_info) { err = -FI_ENOMEM; goto err_out; } psmx_info->ep_attr->type = ep_type; psmx_info->ep_attr->protocol = FI_PROTO_PSMX; psmx_info->ep_attr->protocol_version = PSM_VERNO; psmx_info->ep_attr->max_msg_size = PSMX_MAX_MSG_SIZE; psmx_info->ep_attr->mem_tag_format = fi_tag_format(max_tag_value); psmx_info->ep_attr->tx_ctx_cnt = 1; psmx_info->ep_attr->rx_ctx_cnt = 1; psmx_info->domain_attr->threading = threading; psmx_info->domain_attr->control_progress = control_progress; psmx_info->domain_attr->data_progress = data_progress; psmx_info->domain_attr->name = strdup(PSMX_DOMAIN_NAME); psmx_info->domain_attr->resource_mgmt = FI_RM_ENABLED; psmx_info->domain_attr->av_type = av_type; psmx_info->domain_attr->mr_mode = mr_mode; psmx_info->domain_attr->mr_key_size = sizeof(uint64_t); psmx_info->domain_attr->cq_data_size = 4; psmx_info->domain_attr->cq_cnt = 65535; psmx_info->domain_attr->ep_cnt = 65535; psmx_info->domain_attr->tx_ctx_cnt = 1; psmx_info->domain_attr->rx_ctx_cnt = 1; psmx_info->domain_attr->max_ep_tx_ctx = 1; psmx_info->domain_attr->max_ep_rx_ctx = 1; psmx_info->domain_attr->max_ep_stx_ctx = 65535; psmx_info->domain_attr->max_ep_srx_ctx = 0; psmx_info->domain_attr->cntr_cnt = 65535; psmx_info->domain_attr->mr_iov_limit = 65535; psmx_info->domain_attr->caps = PSMX_DOM_CAPS; psmx_info->domain_attr->mode = 0; psmx_info->next = NULL; psmx_info->caps = (hints && hints->caps) ? hints->caps : caps; psmx_info->mode = mode; psmx_info->addr_format = FI_ADDR_PSMX; psmx_info->src_addr = src_addr; psmx_info->src_addrlen = sizeof(*src_addr); psmx_info->dest_addr = dest_addr; psmx_info->dest_addrlen = sizeof(*dest_addr); psmx_info->fabric_attr->name = strdup(PSMX_FABRIC_NAME); psmx_info->fabric_attr->prov_name = NULL; psmx_info->fabric_attr->prov_version = PSMX_VERSION; psmx_info->tx_attr->caps = psmx_info->caps; psmx_info->tx_attr->mode = psmx_info->mode; psmx_info->tx_attr->op_flags = (hints && hints->tx_attr && hints->tx_attr->op_flags) ? hints->tx_attr->op_flags : 0; psmx_info->tx_attr->msg_order = PSMX_MSG_ORDER; psmx_info->tx_attr->comp_order = PSMX_COMP_ORDER; psmx_info->tx_attr->inject_size = PSMX_INJECT_SIZE; psmx_info->tx_attr->size = UINT64_MAX; psmx_info->tx_attr->iov_limit = 1; psmx_info->tx_attr->rma_iov_limit = 1; psmx_info->rx_attr->caps = psmx_info->caps; psmx_info->rx_attr->mode = psmx_info->mode; psmx_info->rx_attr->op_flags = (hints && hints->rx_attr && hints->rx_attr->op_flags) ? hints->rx_attr->op_flags : 0; psmx_info->rx_attr->msg_order = PSMX_MSG_ORDER; psmx_info->rx_attr->comp_order = PSMX_COMP_ORDER; psmx_info->rx_attr->total_buffered_recv = ~(0ULL); /* that's how PSM handles it internally! */ psmx_info->rx_attr->size = UINT64_MAX; psmx_info->rx_attr->iov_limit = 1; *info = psmx_info; return 0; err_out: free(dest_addr); free(src_addr); return err; }