static int pspsm_open_endpoint(void) { psm_error_t ret; if (!pspsm_ep){ struct psm_ep_open_opts opts; ret = psm_ep_open_opts_get_defaults(&opts); if (ret != PSM_OK) goto err; ret = psm_ep_open(pspsm_uuid.as_uuid, &opts, &pspsm_ep, &pspsm_epid); if (ret != PSM_OK) goto err; sendbuf = malloc(pscom.env.readahead); pspsm_dprint(2, "pspsm_open_endpoint: OK"); } return 0; err: pspsm_err(psm_error_get_string(ret)); pspsm_dprint(1, "pspsm_open_endpoint: %s", pspsm_err_str); return -1; }
int try_to_open_psm_endpoint(psm_ep_t *ep, // output endpoint handle psm_epid_t *epid, // output endpoint identifier int unit) { struct psm_ep_open_opts epopts; psm_uuid_t job_uuid; // Let PSM assign its default values to the endpoint options. psm_ep_open_opts_get_defaults(&epopts); // We want a stricter timeout and a specific unit epopts.timeout = 15*1e9; // 15 second timeout epopts.unit = unit; // We want a specific unit, -1 would let PSM // choose the unit for us. // ve already set affinity, t let PSM do so if it wants to. if (epopts.affinity == PSM_EP_OPEN_AFFINITY_SET) epopts.affinity = PSM_EP_OPEN_AFFINITY_SKIP; // ENDPOINT_UUID is set to the same value in the environment of all the // processes that wish to communicate over PSM and was generated by // the process spawning utility /* c = getenv("ENDPOINT_UUID"); */ /* if (c && *c) */ /* implementor_string_to_16byte_packing(c, job_uuid); */ /* else { */ /* fprintf(stderr, "t find UUID for endpoint\n"); */ /* return -1; */ /* } */ //psm_uuid_generate(job_uuid); memset(&job_uuid, 1, sizeof(psm_uuid_t)); /* int i; */ /* for (i = 0; i < 16; i++) { */ /* printf("%lu\n", job_uuid[i]); */ /* } */ printf("%u\n", job_uuid[0]); // Assume we t want to handle errors here. psm_ep_open(job_uuid, &epopts, ep, epid); return 1; }
int try_open_endpoint_and_initialize_mq(psm_ep_t *ep, // endpoint handle psm_epid_t *epid, // unique endpoint ID psm_uuid_t job_uuid, // unique job uuid, for ep_open psm_mq_t *mq, // MQ handle initialized on uint64_t communicator_bits) // Where we store our communicator or // context bits in the 64-bit tag. { // Simplifed open, see psm_ep_open documentation for more info psm_ep_open(job_uuid, NULL, // no options ep, epid); // We initialize a matched queue by telling PSM the bits that are // order-significant in the tag. Point-to-point ordering will not be // maintained between senders where the communicator bits are not the // same. psm_mq_init(*ep, communicator_bits, NULL, // no other MQ options 0, // 0 options passed mq); // newly initialized matched Queue return 1; }
int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) { psm_error_t err; psm_ep_t ep; /* endpoint handle */ psm_mq_t mq; psm_epid_t epid; /* unique lid+port identifier */ psm_uuid_t unique_job_key; struct psm_ep_open_opts ep_opt; unsigned long long *uu = (unsigned long long *) unique_job_key; char *generated_key; char env_string[256]; generated_key = getenv("OMPI_MCA_orte_precondition_transports"); memset(uu, 0, sizeof(psm_uuid_t)); if (!generated_key || (strlen(generated_key) != 33) || sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2) { opal_show_help("help-mtl-psm.txt", "no uuid present", true, generated_key ? "could not be parsed from" : "not present in", ompi_process_info.nodename); return OMPI_ERROR; } /* Handle our own errors for opening endpoints */ psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler); /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM can allocate hardware * contexts correctly. */ snprintf(env_string, sizeof(env_string), "%d", local_rank); setenv("MPI_LOCALRANKID", env_string, 0); snprintf(env_string, sizeof(env_string), "%d", num_local_procs); setenv("MPI_LOCALNRANKS", env_string, 0); /* Setup the endpoint options. */ bzero((void*) &ep_opt, sizeof(ep_opt)); ep_opt.timeout = ompi_mtl_psm.connect_timeout * 1e9; ep_opt.unit = ompi_mtl_psm.ib_unit; ep_opt.affinity = PSM_EP_OPEN_AFFINITY_SKIP; /* do not let PSM set affinity */ ep_opt.shm_mbytes = -1; /* Choose PSM defaults */ ep_opt.sendbufs_num = -1; /* Choose PSM defaults */ #if PSM_VERNO >= 0x0101 ep_opt.network_pkey = ompi_mtl_psm.ib_pkey; #endif #if PSM_VERNO >= 0x0107 ep_opt.port = ompi_mtl_psm.ib_port; ep_opt.outsl = ompi_mtl_psm.ib_service_level; #endif #if PSM_VERNO >= 0x010d ep_opt.service_id = ompi_mtl_psm.ib_service_id; ep_opt.path_res_type = ompi_mtl_psm.path_res_type; #endif /* Open PSM endpoint */ err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid); if (err) { opal_show_help("help-mtl-psm.txt", "unable to open endpoint", true, psm_error_get_string(err)); return OMPI_ERROR; } /* Future errors are handled by the default error handler */ psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_DEFAULT); err = psm_mq_init(ep, 0xffff000000000000ULL, NULL, 0, &mq); if (err) { opal_show_help("help-mtl-psm.txt", "psm init", true, psm_error_get_string(err)); return OMPI_ERROR; } ompi_mtl_psm.ep = ep; ompi_mtl_psm.epid = epid; ompi_mtl_psm.mq = mq; if (OMPI_SUCCESS != ompi_modex_send( &mca_mtl_psm_component.super.mtl_version, &ompi_mtl_psm.epid, sizeof(psm_epid_t))) { opal_output(0, "Open MPI couldn't send PSM epid to head node process"); return OMPI_ERROR; } /* register the psm progress function */ opal_progress_register(ompi_mtl_psm_progress); return OMPI_SUCCESS; }
static int psmx_domain_init(struct psmx_fid_domain *domain, struct psmx_src_name *src_addr) { struct psmx_fid_fabric *fabric = domain->fabric; struct psm_ep_open_opts opts; int err; psm_ep_open_opts_get_defaults(&opts); FI_INFO(&psmx_prov, FI_LOG_CORE, "uuid: %s\n", psmx_uuid_to_string(fabric->uuid)); if (src_addr) { opts.unit = src_addr->unit; opts.port = src_addr->port; FI_INFO(&psmx_prov, FI_LOG_CORE, "ep_open_opts: unit=%d port=%u\n", opts.unit, opts.port); } err = psm_ep_open(fabric->uuid, &opts, &domain->psm_ep, &domain->psm_epid); if (err != PSM_OK) { FI_WARN(&psmx_prov, FI_LOG_CORE, "psm_ep_open returns %d, errno=%d\n", err, errno); err = psmx_errno(err); goto err_out; } FI_INFO(&psmx_prov, FI_LOG_CORE, "epid: 0x%016lx\n", domain->psm_epid); err = psm_mq_init(domain->psm_ep, PSM_MQ_ORDERMASK_ALL, NULL, 0, &domain->psm_mq); if (err != PSM_OK) { FI_WARN(&psmx_prov, FI_LOG_CORE, "psm_mq_init returns %d, errno=%d\n", err, errno); err = psmx_errno(err); goto err_out_close_ep; } err = fastlock_init(&domain->mr_lock); if (err) { FI_WARN(&psmx_prov, FI_LOG_CORE, "fastlock_init(mr_lock) returns %d\n", err); goto err_out_finalize_mq; } domain->mr_map = rbtNew(&psmx_key_compare); if (!domain->mr_map) { FI_WARN(&psmx_prov, FI_LOG_CORE, "rbtNew failed\n"); goto err_out_destroy_mr_lock; } domain->mr_reserved_key = 1; err = fastlock_init(&domain->poll_lock); if (err) { FI_WARN(&psmx_prov, FI_LOG_CORE, "fastlock_init(poll_lock) returns %d\n", err); goto err_out_delete_mr_map; } /* Set active domain before psmx_domain_enable_ep() installs the * AM handlers to ensure that psmx_active_fabric->active_domain * is always non-NULL inside the handlers. Notice that the vlaue * active_domain becomes NULL again only when the domain is closed. * At that time the AM handlers are gone with the PSM endpoint. */ fabric->active_domain = domain; if (psmx_domain_enable_ep(domain, NULL) < 0) goto err_out_reset_active_domain; if (domain->progress_thread_enabled) psmx_domain_start_progress(domain); return 0; err_out_reset_active_domain: fabric->active_domain = NULL; fastlock_destroy(&domain->poll_lock); err_out_delete_mr_map: rbtDelete(domain->mr_map); err_out_destroy_mr_lock: fastlock_destroy(&domain->mr_lock); err_out_finalize_mq: psm_mq_finalize(domain->psm_mq); err_out_close_ep: if (psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_GRACEFUL, (int64_t) psmx_env.timeout * 1000000000LL) != PSM_OK) psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_FORCE, 0); err_out: return err; }