int sheep_exec_req(const struct node_id *nid, struct sd_req *hdr, void *buf) { struct sd_rsp *rsp = (struct sd_rsp *)hdr; struct sockfd *sfd; int ret; assert(is_worker_thread()); sfd = sockfd_cache_get(nid); if (!sfd) return SD_RES_NETWORK_ERROR; ret = exec_req(sfd->fd, hdr, buf, sheep_need_retry, hdr->epoch, MAX_RETRY_COUNT); if (ret) { sd_dprintf("remote node might have gone away"); sockfd_cache_del(nid, sfd); return SD_RES_NETWORK_ERROR; } ret = rsp->result; if (ret != SD_RES_SUCCESS) sd_eprintf("failed %s", sd_strerror(ret)); sockfd_cache_put(nid, sfd); return ret; }
/* * Wait for all forward requests completion. * * Even if something goes wrong, we have to wait forward requests completion to * avoid interleaved requests. * * Return error code if any one request fails. */ static int wait_forward_request(struct write_info *wi, struct request *req) { int nr_sent, err_ret = SD_RES_SUCCESS, ret, pollret, i, repeat = MAX_RETRY_COUNT; struct pfd_info pi; struct sd_rsp *rsp = &req->rp; again: pfd_info_init(wi, &pi); pollret = poll(pi.pfds, pi.nr, 1000 * POLL_TIMEOUT); if (pollret < 0) { if (errno == EINTR) goto again; panic("%m"); } else if (pollret == 0) { /* * If IO NIC is down, epoch isn't incremented, so we can't retry * for ever. */ if (sheep_need_retry(req->rq.epoch) && repeat) { repeat--; sd_warn("poll timeout %d, disks of some nodes or " "network is busy. Going to poll-wait again", wi->nr_sent); goto again; } nr_sent = wi->nr_sent; /* XXX Blinedly close all the connections */ for (i = 0; i < nr_sent; i++) sockfd_cache_del(wi->ent[i].nid, wi->ent[i].sfd); return SD_RES_NETWORK_ERROR; } nr_sent = wi->nr_sent; for (i = 0; i < nr_sent; i++) if (pi.pfds[i].revents & POLLIN) break; if (i < nr_sent) { int re = pi.pfds[i].revents; sd_debug("%d, revents %x", i, re); if (re & (POLLERR | POLLHUP | POLLNVAL)) { err_ret = SD_RES_NETWORK_ERROR; finish_one_write_err(wi, i); goto finish_write; } if (do_read(pi.pfds[i].fd, rsp, sizeof(*rsp), sheep_need_retry, req->rq.epoch, MAX_RETRY_COUNT)) { sd_err("remote node might have gone away"); err_ret = SD_RES_NETWORK_ERROR; finish_one_write_err(wi, i); goto finish_write; } ret = rsp->result; if (ret != SD_RES_SUCCESS) { sd_err("fail %"PRIx64", %s", req->rq.obj.oid, sd_strerror(ret)); err_ret = ret; } finish_one_write(wi, i); } finish_write: if (wi->nr_sent > 0) goto again; return err_ret; }
static inline void finish_one_write_err(struct write_info *wi, int i) { sockfd_cache_del(wi->ent[i].nid, wi->ent[i].sfd); write_info_update(wi, i); }