/* * rpmemd_fip_cq_thread -- completion queue worker thread */ static void * rpmemd_fip_cq_thread(void *arg) { struct rpmemd_fip *fip = arg; struct fi_cq_err_entry err; const char *str_err; ssize_t sret; int ret = 0; while (!fip->closing) { sret = fi_cq_sread(fip->cq, fip->cq_entries, fip->cq_size, NULL, RPMEM_FIP_CQ_WAIT_MS); if (unlikely(fip->closing)) break; if (unlikely(sret == -FI_EAGAIN)) continue; if (unlikely(sret < 0)) { ret = (int)sret; goto err_cq_read; } for (ssize_t i = 0; i < sret; i++) { struct fi_cq_msg_entry *entry = &fip->cq_entries[i]; RPMEMD_ASSERT(entry->op_context); struct rpmemd_fip_lane *lanep = entry->op_context; /* signal lane about SEND completion */ if (entry->flags & FI_SEND) rpmem_fip_lane_signal(&lanep->lane, FI_SEND); /* add lane to worker's ring buffer */ if (entry->flags & FI_RECV) { ret = rpmemd_fip_worker_push(lanep->worker, lanep); } if (ret) goto err; } } return 0; err_cq_read: sret = fi_cq_readerr(fip->cq, &err, 0); if (sret < 0) { RPMEMD_FI_ERR((int)sret, "error reading from completion queue: " "cannot read error from completion queue"); goto err; } str_err = fi_cq_strerror(fip->cq, err.prov_errno, NULL, NULL, 0); RPMEMD_LOG(ERR, "error reading from completion queue: %s", str_err); err: return (void *)(uintptr_t)ret; }
/* * rpmem_fip_process_apm -- (internal) process completion queue entry for APM */ static int rpmem_fip_process_apm(struct rpmem_fip *fip, void *context, uint64_t flags) { struct rpmem_fip_lane *lanep = context; /* signal READ operation completion */ rpmem_fip_lane_signal(lanep, flags); return 0; }
/* * rpmemd_fip_process_stop_gpspm -- stop processing GPSPM messages */ static int rpmemd_fip_process_stop_gpspm(struct rpmemd_fip *fip) { int lret = 0; /* this stops all worker threads */ fip->closing = 1; /* * Signal all lanes that SEND has been completed. * Some workers may still be waiting for this completion. */ for (unsigned i = 0; i < fip->nlanes; i++) rpmem_fip_lane_signal(&fip->lanes[i].lane, FI_SEND); void *tret; int ret; errno = pthread_join(fip->cq_thread, &tret); if (errno) { RPMEMD_LOG(ERR, "!joining cq thread"); lret = -1; } else { ret = (int)(uintptr_t)tret; if (ret) { RPMEMD_LOG(ERR, "cq thread failed with " "code -- %d", ret); lret = ret; } } free(fip->cq_entries); for (size_t i = 0; i < fip->nthreads; i++) { ret = rpmemd_fip_worker_fini(fip->workers[i]); if (ret) { RPMEMD_LOG(ERR, "worker failed with code -- %d", ret); lret = ret; } } free(fip->workers); return lret; }
/* * rpmem_fip_process_gpspm -- (internal) process completion queue entry for * GPSPM */ static int rpmem_fip_process_gpspm(struct rpmem_fip *fip, void *context, uint64_t flags) { if (flags & FI_RECV) { /* RECV completion */ struct rpmem_fip_msg *resp = context; struct rpmem_msg_persist_resp *msg_resp = rpmem_fip_msg_get_pres(resp); VALGRIND_DO_MAKE_MEM_DEFINED(msg_resp, sizeof(*msg_resp)); if (unlikely(msg_resp->lane >= fip->nlanes)) { RPMEM_LOG(ERR, "lane number received (%lu) is greater " "than maximum lane number (%u)", msg_resp->lane, fip->nlanes - 1); return -1; } struct rpmem_fip_lane *lanep = &fip->lanes.gpspm[msg_resp->lane].lane; /* post RECV buffer immediately */ int ret = rpmem_fip_gpspm_post_resp(fip, resp); if (unlikely(ret)) RPMEM_FI_ERR((int)ret, "MSG send"); rpmem_fip_lane_sigret(lanep, flags, ret); return ret; } struct rpmem_fip_lane *lanep = context; /* SEND completion */ rpmem_fip_lane_signal(lanep, flags); return 0; }
/* * rpmem_fip_process -- (internal) process completion events */ static int rpmem_fip_process(struct rpmem_fip *fip) { ssize_t sret; struct fi_cq_err_entry err; const char *str_err; int ret; struct fi_cq_msg_entry *cq_entries; cq_entries = malloc(fip->cq_size * sizeof(*cq_entries)); if (!cq_entries) { RPMEM_LOG(ERR, "!allocating completion queue buffer"); return -1; } while (!fip->closing) { sret = fi_cq_sread(fip->cq, cq_entries, fip->cq_size, NULL, RPMEM_FIP_CQ_WAIT_MS); if (unlikely(fip->closing)) break; if (unlikely(sret == -FI_EAGAIN)) continue; if (unlikely(sret < 0)) { ret = (int)sret; goto err_cq_read; } for (ssize_t i = 0; i < sret; i++) { struct fi_cq_msg_entry *comp = &cq_entries[i]; /* * If the context is NULL it probably means that * we get an unexpected CQ entry. The CQ is configured * with FI_SELECTIVE_COMPLETION so every inbound or * outbound operation must be issued with FI_COMPLETION * flag and non-NULL context. */ RPMEM_ASSERT(comp->op_context); /* read operation */ if (unlikely(comp->op_context == &fip->rd_lane)) { rpmem_fip_lane_signal(&fip->rd_lane.lane, FI_READ); continue; } /* persist operation */ ret = fip->ops->process(fip, comp->op_context, comp->flags); if (unlikely(ret)) { RPMEM_LOG(ERR, "persist operation failed"); goto err; } } } free(cq_entries); return 0; err_cq_read: sret = fi_cq_readerr(fip->cq, &err, 0); if (sret < 0) { RPMEM_FI_ERR((int)sret, "error reading from completion queue: " "cannot read error from event queue"); goto err; } str_err = fi_cq_strerror(fip->cq, err.prov_errno, NULL, NULL, 0); RPMEM_LOG(ERR, "error reading from completion queue: %s", str_err); err: rpmem_fip_signal_all(fip, ret); free(cq_entries); return ret; }