예제 #1
0
파일: sheep.c 프로젝트: eaglesjune/sheepdog
static void check_host_env(void)
{
	struct rlimit r;

	if (getrlimit(RLIMIT_NOFILE, &r) < 0)
		sd_err("failed to get nofile %m");
	/*
	 * 1024 is default for NOFILE on most distributions, which is very
	 * dangerous to run Sheepdog cluster.
	 */
	else if (r.rlim_cur == 1024)
		sd_warn("Allowed open files 1024 too small, suggested %u",
			SD_RLIM_NOFILE);
	else if (r.rlim_cur < SD_RLIM_NOFILE)
		sd_info("Allowed open files %lu, suggested %u", r.rlim_cur,
			SD_RLIM_NOFILE);

	if (getrlimit(RLIMIT_CORE, &r) < 0)
		sd_debug("failed to get core %m");
	else if (r.rlim_cur < RLIM_INFINITY)
		sd_debug("Allowed core file size %lu, suggested unlimited",
			 r.rlim_cur);

	/*
	 * Disable glibc's dynamic mmap threshold and set it as 512k.
	 *
	 * We have to disable dynamic threshold because its inefficiency to
	 * release freed memory back to OS. Setting it as 512k practically means
	 * allocation larger than or equal to 512k will use mmap() for malloc()
	 * and munmap() for free(), guaranteeing allocated memory will not be
	 * cached in the glibc's ptmalloc internal pool.
	 *
	 * 512k is not a well tested optimal value for IO request size, I choose
	 * it because it is default value for disk drive that it can transfer at
	 * a time. So default installation of guest will issue at most 512K
	 * sized request.
	 */
	mallopt(M_MMAP_THRESHOLD, 512 * 1024);
}
예제 #2
0
/*
 * Wait for all forward requests completion.
 *
 * Even if something goes wrong, we have to wait forward requests completion to
 * avoid interleaved requests.
 *
 * Return error code if any one request fails.
 */
static int wait_forward_request(struct write_info *wi, struct request *req)
{
	int nr_sent, err_ret = SD_RES_SUCCESS, ret, pollret, i,
	    repeat = MAX_RETRY_COUNT;
	struct pfd_info pi;
	struct sd_rsp *rsp = &req->rp;
again:
	pfd_info_init(wi, &pi);
	pollret = poll(pi.pfds, pi.nr, 1000 * POLL_TIMEOUT);
	if (pollret < 0) {
		if (errno == EINTR)
			goto again;

		panic("%m");
	} else if (pollret == 0) {
		/*
		 * If IO NIC is down, epoch isn't incremented, so we can't retry
		 * for ever.
		 */
		if (sheep_need_retry(req->rq.epoch) && repeat) {
			repeat--;
			sd_warn("poll timeout %d, disks of some nodes or "
				"network is busy. Going to poll-wait again",
				wi->nr_sent);
			goto again;
		}

		nr_sent = wi->nr_sent;
		/* XXX Blinedly close all the connections */
		for (i = 0; i < nr_sent; i++)
			sockfd_cache_del(wi->ent[i].nid, wi->ent[i].sfd);

		return SD_RES_NETWORK_ERROR;
	}

	nr_sent = wi->nr_sent;
	for (i = 0; i < nr_sent; i++)
		if (pi.pfds[i].revents & POLLIN)
			break;
	if (i < nr_sent) {
		int re = pi.pfds[i].revents;
		sd_debug("%d, revents %x", i, re);
		if (re & (POLLERR | POLLHUP | POLLNVAL)) {
			err_ret = SD_RES_NETWORK_ERROR;
			finish_one_write_err(wi, i);
			goto finish_write;
		}
		if (do_read(pi.pfds[i].fd, rsp, sizeof(*rsp), sheep_need_retry,
			    req->rq.epoch, MAX_RETRY_COUNT)) {
			sd_err("remote node might have gone away");
			err_ret = SD_RES_NETWORK_ERROR;
			finish_one_write_err(wi, i);
			goto finish_write;
		}

		ret = rsp->result;
		if (ret != SD_RES_SUCCESS) {
			sd_err("fail %"PRIx64", %s", req->rq.obj.oid,
			       sd_strerror(ret));
			err_ret = ret;
		}
		finish_one_write(wi, i);
	}
finish_write:
	if (wi->nr_sent > 0)
		goto again;

	return err_ret;
}