static int pspsm_con_connect(pspsm_con_info_t *con_info, pspsm_info_msg_t *info_msg) { psm_error_t ret, ret1; if (memcmp(info_msg->protocol_version, PSPSM_PROTOCOL_VERSION, sizeof(info_msg->protocol_version))) { goto err_protocol; } ret = psm_ep_connect(pspsm_ep, 1, &info_msg->epid, NULL, &ret1, &con_info->epaddr, 0); con_info->send_id = info_msg->id; if (ret != PSM_OK) goto err_connect; pspsm_dprint(2, "pspsm_con_connect: OK"); pspsm_dprint(2, "sending with %"PRIx64", receiving %"PRIx64, con_info->send_id, con_info->recv_id); return 0; err_connect: pspsm_err(psm_error_get_string(ret)); pspsm_dprint(1, "pspsm_con_connect: %s", pspsm_err_str); return -1; err_protocol: { char str[80]; snprintf(str, sizeof(str), "protocol error : '%.8s' != '%.8s'", info_msg->protocol_version, PSPSM_PROTOCOL_VERSION); pspsm_err(str); pspsm_dprint(1, "pspsm_con_connect: %s", pspsm_err_str); } return -1; }
static int pspsm_open_endpoint(void) { psm_error_t ret; if (!pspsm_ep){ struct psm_ep_open_opts opts; ret = psm_ep_open_opts_get_defaults(&opts); if (ret != PSM_OK) goto err; ret = psm_ep_open(pspsm_uuid.as_uuid, &opts, &pspsm_ep, &pspsm_epid); if (ret != PSM_OK) goto err; sendbuf = malloc(pscom.env.readahead); pspsm_dprint(2, "pspsm_open_endpoint: OK"); } return 0; err: pspsm_err(psm_error_get_string(ret)); pspsm_dprint(1, "pspsm_open_endpoint: %s", pspsm_err_str); return -1; }
static int pscom_psm_peek() { unsigned read_progress = 0; psm_mq_req_t req; psm_mq_status_t status; psm_error_t ret; do { ret = psm_mq_ipeek(pspsm_mq, &req, /* status */ NULL); if (ret == PSM_MQ_INCOMPLETE) return read_progress; if (ret != PSM_OK) goto err; ret = psm_mq_test(&req, &status); if (ret != PSM_OK) goto err; read_progress += pscom_psm_process(&status); } while (1); err: pspsm_err(psm_error_get_string(ret)); pspsm_dprint(1, "pscom_psm_peek: %s", pspsm_err_str); return read_progress; }
static int pspsm_close_endpoint(void) { #if 1 /* psm_ep_close() SegFaults. A sleep(1) before sometimes helps, disabling the cleanup always helps. (Seen with infinipath-libs-3.2-32129.1162_rhel6_qlc.x86_64) */ return 0; #else psm_error_t ret; if (pspsm_ep){ ret = psm_ep_close(pspsm_ep, PSM_EP_CLOSE_GRACEFUL, 0); pspsm_ep = NULL; if (ret != PSM_OK) goto err; if (sendbuf) free(sendbuf); pspsm_dprint(2, "pspsm_close_endpoint: OK"); } return 0; err: pspsm_err(psm_error_get_string(ret)); pspsm_dprint(1, "pspsm_close_endpoint: %s", pspsm_err_str); return -1; #endif }
int pspsm_finalize_mq(void) { psm_error_t ret; if (pspsm_mq){ ret = psm_mq_finalize(pspsm_mq); if (ret != PSM_OK) goto err; pspsm_dprint(2, "pspsm_finalize_mq: OK"); } return 0; err: pspsm_err(psm_error_get_string(ret)); pspsm_dprint(1, "pspsm_finalize_mq: %s", pspsm_err_str); return -1; }
static inline int _pspsm_send_buf(pspsm_con_info_t *con_info, char *buf, size_t len, uint64_t tag, psm_mq_req_t *req, unsigned long nr) { void *context = (void *)((uintptr_t)con_info | nr); psm_error_t ret; assert(*req == PSM_MQ_REQINVALID); ret = psm_mq_isend(pspsm_mq, con_info->epaddr, /* flags */ 0, tag, buf, len, context, req); if (ret != PSM_OK) goto err; return 0; err: pspsm_err(psm_error_get_string(ret)); pspsm_dprint(1, "_pspsm_send_buf: %s", pspsm_err_str); return -EPIPE; }
static int pspsm_init_mq(void) { psm_error_t ret; if (!pspsm_mq){ ret = psm_mq_init(pspsm_ep, PSM_MQ_ORDERMASK_ALL, NULL, 0, &pspsm_mq); if (ret != PSM_OK) goto err; pspsm_dprint(2, "pspsm_init_mq: OK"); } return 0; err: pspsm_err(psm_error_get_string(ret)); pspsm_dprint(1, "pspsm_init_mq: %s", pspsm_err_str); return -1; }
static int pspsm_close_endpoint(void) { psm_error_t ret; if (pspsm_ep){ ret = psm_ep_close(pspsm_ep, PSM_EP_CLOSE_GRACEFUL, 0); pspsm_ep = NULL; if (ret != PSM_OK) goto err; if (sendbuf) free(sendbuf); pspsm_dprint(2, "pspsm_close_endpoint: OK"); } return 0; err: pspsm_err(psm_error_get_string(ret)); pspsm_dprint(1, "pspsm_close_endpoint: %s", pspsm_err_str); return -1; }
/* sends an iov. FIXME: returns 0 if the send is complete, -EAGAIN if it created one or more requests for it, and -EPIPE in case of an error. */ static int _pspsm_sendv(pspsm_con_info_t *con_info, uint64_t magic) { uint64_t tag = con_info->send_id | magic; unsigned int i=0; psm_error_t ret; size_t len = con_info->iov[0].iov_len + con_info->iov[1].iov_len; if (len <= pscom.env.readahead){ pscom_memcpy_from_iov(sendbuf, con_info->iov, len); /* we hope that doesn't block - it shouldn't, as the * message is sufficiently small */ ret = psm_mq_send(pspsm_mq, con_info->epaddr, /* flags*/ 0, tag, sendbuf, len); if (ret != PSM_OK) goto err; return 0; } for (i=0; i<2; i++){ if (con_info->iov[i].iov_len){ /* pspsm_dprint(0, "Send part[%d], %p len %d to con %s\n", i, con_info->iov[i].iov_base, (int)con_info->iov[i].iov_len, con_info->con->pub.remote_con_info.name); */ if (_pspsm_send_buf(con_info, con_info->iov[i].iov_base, con_info->iov[i].iov_len, tag, &con_info->sreqs[i], i)){ return -EPIPE; } /* inc for each outstanding send request */ poll_user_inc(); } } return -EAGAIN; err: pspsm_err(psm_error_get_string(ret)); pspsm_dprint(1, "_pspsm_send_buf: %s", pspsm_err_str); return -EPIPE; }
static int pspsm_recvlook(pspsm_con_info_t *con_info) { /* ToDo: rename me to something like "post a receive". */ psm_error_t ret; uint64_t rtag = con_info->recv_id; void *context = (void *)((uintptr_t)con_info | 2); assert(con_info->rreq == PSM_MQ_REQINVALID); ret = psm_mq_irecv(pspsm_mq, rtag, mask, 0 /*flags*/, con_info->rbuf, con_info->rbuflen, context, &con_info->rreq); if (ret != PSM_OK) goto out_err; /* FIXME: Should probably not return an error code to indicate success. */ return -EAGAIN; out_err: pspsm_err(psm_error_get_string(ret)); pspsm_dprint(1, "pspsm_recvlook: %s", pspsm_err_str); return -1; }
static int pspsm_init(void) { static pspsm_init_state_t init_state = PSPSM_INIT_START; int verno_minor = PSM_VERNO_MINOR; int verno_major = PSM_VERNO_MAJOR; psm_error_t ret; if (init_state == PSPSM_INIT_START) { /* Check for an available /dev/ipath */ ret = pspsm_check_dev_ipath(); if (ret != 0) { goto err_dev_ipath; } ret = psm_init(&verno_major, &verno_minor); if (ret != PSM_OK) { goto err_init; } /* * All processes wanting to communicate need to use * the same UUID. * * It is unclear whether there are drawbacks from * simply using the same UUID for groups of processes * that will never communicate. * * On top of a constant fill pattern, we use: * * - PSP_PSM_UNIQ_ID if set and not zero, or * - PMI_ID, if set and not zero - that's not entirely * clean, but a practical solution for MPI apps (as * long as we do not implement communication between * two sets of MPI processes not sharing a * communicator). */ memset(pspsm_uuid.as_uuid, DEFAULT_UUID_PATTERN, sizeof(pspsm_uuid.as_uuid)); if (pscom.env.psm_uniq_id) { pspsm_dprint(2, "seeding PSM UUID with %u", pscom.env.psm_uniq_id); pspsm_uuid.as_uint = pscom.env.psm_uniq_id; } /* Open the endpoint here in init with the hope that every mpi rank call indirect psm_ep_open() before transmitting any data from or to this endpoint. This is to avoid a race condition in libpsm_infinipath. Downside: We consume PSM Contexts even in the case of only local communication. You could use PSP_PSM=0 in this case. */ if (pspsm_open_endpoint()) goto err_ep; if (pspsm_init_mq()) goto err_mq; pspsm_dprint(2, "pspsm_init: OK"); init_state = PSPSM_INIT_DONE; } return init_state; /* 0 = success, -1 = error */ err_dev_ipath: pspsm_dprint(2, "pspsm_init: No \"/dev/ipath\" found. Arch psm is disabled."); goto err_exit; err_init: pspsm_err(psm_error_get_string(ret)); pspsm_dprint(1, "pspsm_init: %s", pspsm_err_str); // Fall through err_ep: err_mq: err_exit: init_state = PSPSM_INIT_FAILED; return init_state; /* 0 = success, -1 = error */ }