예제 #1
0
파일: plain_store.c 프로젝트: DLag/sheepdog
static int default_read_from_path(uint64_t oid, const char *path,
				  const struct siocb *iocb)
{
	int flags = prepare_iocb(oid, iocb, false), fd,
	    ret = SD_RES_SUCCESS;
	ssize_t size;

	/*
	 * Make sure oid is in the right place because oid might be misplaced
	 * in a wrong place, due to 'shutdown/restart with less disks' or any
	 * bugs. We need call err_to_sderr() to return EIO if disk is broken.
	 *
	 * For stale path, get_store_stale_path already does default_exist job.
	 */
	if (!is_stale_path(path) && !default_exist(oid, iocb->ec_index))
		return err_to_sderr(path, oid, ENOENT);

	fd = open(path, flags);
	if (fd < 0)
		return err_to_sderr(path, oid, errno);

	size = xpread(fd, iocb->buf, iocb->length, iocb->offset);
	if (unlikely(size != iocb->length)) {
		sd_err("failed to read object %"PRIx64", path=%s, offset=%"
		       PRId32", size=%"PRId32", result=%zd, %m", oid, path,
		       iocb->offset, iocb->length, size);
		ret = err_to_sderr(path, oid, errno);
	}
	close(fd);
	return ret;
}
예제 #2
0
int default_write(uint64_t oid, struct siocb *iocb)
{
	int flags = get_open_flags(oid, false), fd, ret = SD_RES_SUCCESS;
	char path[PATH_MAX];
	ssize_t size;

	if (iocb->epoch < sys_epoch()) {
		dprintf("%"PRIu32" sys %"PRIu32"\n", iocb->epoch, sys_epoch());
		return SD_RES_OLD_NODE_VER;
	}

	get_obj_path(oid, path);
	if (iocb->flags & SD_FLAG_CMD_CACHE && is_disk_cache_enabled())
		flags &= ~O_DSYNC;
	fd = open(path, flags, def_fmode);
	if (fd < 0)
		return err_to_sderr(oid, errno);

	size = xpwrite(fd, iocb->buf, iocb->length, iocb->offset);
	if (size != iocb->length) {
		eprintf("failed to write object %"PRIx64", path=%s, offset=%"
			PRId64", size=%"PRId32", result=%zd, %m\n", oid, path,
			iocb->offset, iocb->length, size);
		ret = err_to_sderr(oid, errno);
		goto out;
	}
out:
	close(fd);
	return ret;
}
예제 #3
0
파일: plain_store.c 프로젝트: DLag/sheepdog
int default_write(uint64_t oid, const struct siocb *iocb)
{
	int flags = prepare_iocb(oid, iocb, false), fd,
	    ret = SD_RES_SUCCESS;
	char path[PATH_MAX];
	ssize_t size;
	uint32_t len = iocb->length;
	uint64_t offset = iocb->offset;
	static bool trim_is_supported = true;

	if (iocb->epoch < sys_epoch()) {
		sd_debug("%"PRIu32" sys %"PRIu32, iocb->epoch, sys_epoch());
		return SD_RES_OLD_NODE_VER;
	}

	if (uatomic_is_true(&sys->use_journal) &&
	    unlikely(journal_write_store(oid, iocb->buf, iocb->length,
					 iocb->offset, false))
	    != SD_RES_SUCCESS) {
		sd_err("turn off journaling");
		uatomic_set_false(&sys->use_journal);
		flags |= O_DSYNC;
		sync();
	}

	get_store_path(oid, iocb->ec_index, path);

	/*
	 * Make sure oid is in the right place because oid might be misplaced
	 * in a wrong place, due to 'shutdown/restart with less/more disks' or
	 * any bugs. We need call err_to_sderr() to return EIO if disk is broken
	 */
	if (!default_exist(oid, iocb->ec_index))
		return err_to_sderr(path, oid, ENOENT);

	fd = open(path, flags, sd_def_fmode);
	if (unlikely(fd < 0))
		return err_to_sderr(path, oid, errno);

	if (trim_is_supported && is_sparse_object(oid)) {
		if (default_trim(fd, oid, iocb, &offset, &len) < 0) {
			trim_is_supported = false;
			offset = iocb->offset;
			len = iocb->length;
		}
	}

	size = xpwrite(fd, iocb->buf, len, offset);
	if (unlikely(size != len)) {
		sd_err("failed to write object %"PRIx64", path=%s, offset=%"
		       PRId32", size=%"PRId32", result=%zd, %m", oid, path,
		       iocb->offset, iocb->length, size);
		ret = err_to_sderr(path, oid, errno);
		goto out;
	}
out:
	close(fd);
	return ret;
}
예제 #4
0
int default_create_and_write(uint64_t oid, struct siocb *iocb)
{
	char path[PATH_MAX], tmp_path[PATH_MAX];
	int flags = get_open_flags(oid, true);
	int ret, fd;
	uint32_t len = iocb->length;

	get_obj_path(oid, path);
	get_tmp_obj_path(oid, tmp_path);

	fd = open(tmp_path, flags, def_fmode);
	if (fd < 0) {
		if (errno == EEXIST)
			/* This happens if node membership changes during object
			 * creation; while gateway retries a CREATE request,
			 * recovery process could also recover the object at the
			 * same time.  They should try to write the same date,
			 * so it is okay to simply return success here. */
			dprintf("%s exists\n", tmp_path);
			return SD_RES_SUCCESS;

		eprintf("failed to open %s: %m\n", tmp_path);
		return err_to_sderr(oid, errno);
	}

	if (iocb->offset != 0 || iocb->length != get_objsize(oid)) {
		ret = prealloc(fd, get_objsize(oid));
		if (ret != SD_RES_SUCCESS)
			goto out;
	}

	ret = xpwrite(fd, iocb->buf, len, iocb->offset);
	if (ret != len) {
		eprintf("failed to write object. %m\n");
		ret = err_to_sderr(oid, errno);
		goto out;
	}

	ret = rename(tmp_path, path);
	if (ret < 0) {
		eprintf("failed to rename %s to %s: %m\n", tmp_path, path);
		ret = err_to_sderr(oid, errno);
		goto out;
	}
	dprintf("%"PRIx64"\n", oid);
	ret = SD_RES_SUCCESS;
out:
	if (ret != SD_RES_SUCCESS)
		unlink(tmp_path);
	close(fd);
	return ret;
}
예제 #5
0
파일: farm.c 프로젝트: yaekumo/sheepdog
static int farm_open(uint64_t oid, struct siocb *iocb, int create)
{
    struct strbuf buf = STRBUF_INIT;
    int ret = SD_RES_SUCCESS, fd;
    int flags = def_open_flags;

    if (iocb->epoch < sys->epoch)
        goto out;

    if (is_vdi_obj(oid))
        flags &= ~O_DIRECT;

    if (create)
        flags |= O_CREAT | O_TRUNC;

    strbuf_addstr(&buf, obj_path);
    strbuf_addf(&buf, "%016" PRIx64, oid);
    fd = open(buf.buf, flags, def_fmode);
    if (fd < 0) {
        ret = err_to_sderr(oid, errno);
        goto out;
    }
    iocb->fd = fd;
    ret = SD_RES_SUCCESS;
    if (!(iocb->flags & SD_FLAG_CMD_COW) && create) {
        ret = prealloc(fd, iocb->length);
        if (ret != SD_RES_SUCCESS)
            close(fd);
    }
out:
    strbuf_release(&buf);
    return ret;
}
예제 #6
0
파일: farm.c 프로젝트: yamt/sheepdog
static int farm_write(uint64_t oid, struct siocb *iocb, int create)
{
	int flags = def_open_flags, fd, ret = SD_RES_SUCCESS;
	char path[PATH_MAX];
	ssize_t size;

	if (iocb->epoch < sys_epoch()) {
		dprintf("%"PRIu32" sys %"PRIu32"\n", iocb->epoch, sys_epoch());
		return SD_RES_OLD_NODE_VER;
	}
	if (!is_data_obj(oid))
		flags &= ~O_DIRECT;

	if (create)
		flags |= O_CREAT | O_TRUNC;

	sprintf(path, "%s%016"PRIx64, obj_path, oid);
	fd = open(path, flags, def_fmode);
	if (fd < 0)
		return err_to_sderr(oid, errno);

	if (flock(fd, LOCK_EX) < 0) {
		ret = SD_RES_EIO;
		eprintf("%m\n");
		goto out;
	}
	if (create && !(iocb->flags & SD_FLAG_CMD_COW)) {
		ret = prealloc(fd, get_objsize(oid));
		if (ret != SD_RES_SUCCESS) {
			if (flock(fd, LOCK_UN) < 0) {
				ret = SD_RES_EIO;
				eprintf("%m\n");
				goto out;
			}
			goto out;
		}
	}
	size = xpwrite(fd, iocb->buf, iocb->length, iocb->offset);
	if (flock(fd, LOCK_UN) < 0) {
		ret = SD_RES_EIO;
		eprintf("%m\n");
		goto out;
	}
	if (size != iocb->length) {
		eprintf("%m\n");
		ret = SD_RES_EIO;
		goto out;
	}

	trunk_update_entry(oid);
out:
	close(fd);
	return ret;
}
예제 #7
0
int default_write(uint64_t oid, const struct siocb *iocb)
{
	int flags = get_open_flags(oid, false, iocb->flags), fd,
	    ret = SD_RES_SUCCESS;
	char path[PATH_MAX];
	ssize_t size;

	if (iocb->epoch < sys_epoch()) {
		sd_dprintf("%"PRIu32" sys %"PRIu32"\n",
			iocb->epoch, sys_epoch());
		return SD_RES_OLD_NODE_VER;
	}

	get_obj_path(oid, path);

	if (uatomic_is_true(&sys->use_journal) &&
	    journal_file_write(oid, iocb->buf, iocb->length, iocb->offset,
			       false)
	    != SD_RES_SUCCESS) {
		sd_eprintf("turn off journaling\n");
		uatomic_set_false(&sys->use_journal);
		flags |= O_DSYNC;
		sync();
	}

	fd = open(path, flags, def_fmode);
	if (fd < 0)
		return err_to_sderr(oid, errno);

	size = xpwrite(fd, iocb->buf, iocb->length, iocb->offset);
	if (size != iocb->length) {
		sd_eprintf("failed to write object %"PRIx64", path=%s, offset=%"
			PRId64", size=%"PRId32", result=%zd, %m\n", oid, path,
			iocb->offset, iocb->length, size);
		ret = err_to_sderr(oid, errno);
		goto out;
	}
out:
	close(fd);
	return ret;
}
예제 #8
0
/*
 * Preallocate the whole object to get a better filesystem layout.
 */
int prealloc(int fd, uint32_t size)
{
	int ret = fallocate(fd, 0, 0, size);
	if (ret < 0) {
		if (errno != ENOSYS && errno != EOPNOTSUPP)
			ret = err_to_sderr(0, errno); /* FIXME: set oid */
		else
			ret = write_last_sector(fd, size);
	} else
		ret = SD_RES_SUCCESS;
	return ret;
}
예제 #9
0
int default_write(uint64_t oid, const struct siocb *iocb)
{
	int flags = prepare_iocb(oid, iocb, false), fd,
	    ret = SD_RES_SUCCESS;
	char path[PATH_MAX];
	ssize_t size;

	if (iocb->epoch < sys_epoch()) {
		sd_debug("%"PRIu32" sys %"PRIu32, iocb->epoch, sys_epoch());
		return SD_RES_OLD_NODE_VER;
	}

	if (uatomic_is_true(&sys->use_journal) &&
	    unlikely(journal_write_store(oid, iocb->buf, iocb->length,
					 iocb->offset, false))
	    != SD_RES_SUCCESS) {
		sd_err("turn off journaling");
		uatomic_set_false(&sys->use_journal);
		flags |= O_DSYNC;
		sync();
	}

	get_obj_path(oid, path);

	fd = open(path, flags, sd_def_fmode);
	if (unlikely(fd < 0))
		return err_to_sderr(path, oid, errno);

	size = xpwrite(fd, iocb->buf, iocb->length, iocb->offset);
	if (unlikely(size != iocb->length)) {
		sd_err("failed to write object %"PRIx64", path=%s, offset=%"
		       PRId64", size=%"PRId32", result=%zd, %m", oid, path,
		       iocb->offset, iocb->length, size);
		ret = err_to_sderr(path, oid, errno);
		goto out;
	}
out:
	close(fd);
	return ret;
}
예제 #10
0
static int default_read_from_path(uint64_t oid, char *path,
				  const struct siocb *iocb)
{
	int flags = prepare_iocb(oid, iocb, false), fd,
	    ret = SD_RES_SUCCESS;
	ssize_t size;

	fd = open(path, flags);

	if (fd < 0)
		return err_to_sderr(path, oid, errno);

	size = xpread(fd, iocb->buf, iocb->length, iocb->offset);
	if (unlikely(size != iocb->length)) {
		sd_err("failed to read object %"PRIx64", path=%s, offset=%"
		       PRId64", size=%"PRId32", result=%zd, %m", oid, path,
		       iocb->offset, iocb->length, size);
		ret = err_to_sderr(path, oid, errno);
	}
	close(fd);
	return ret;
}
예제 #11
0
int default_write(uint64_t oid, const struct siocb *iocb)
{
	int flags = prepare_iocb(oid, iocb, false), fd,
	    ret = SD_RES_SUCCESS;
	char path[PATH_MAX];
	ssize_t size;

	if (iocb->epoch < sys_epoch()) {
		sd_debug("%"PRIu32" sys %"PRIu32, iocb->epoch, sys_epoch());
		return SD_RES_OLD_NODE_VER;
	}

	get_store_path(oid, iocb->ec_index, path);

	/*
	 * Make sure oid is in the right place because oid might be misplaced
	 * in a wrong place, due to 'shutdown/restart with less/more disks' or
	 * any bugs. We need call err_to_sderr() to return EIO if disk is broken
	 */
	if (!default_exist(oid, iocb->ec_index))
		return err_to_sderr(path, oid, errno);

	fd = open(path, flags, sd_def_fmode);
	if (unlikely(fd < 0))
		return err_to_sderr(path, oid, errno);

	size = xpwrite(fd, iocb->buf, iocb->length, iocb->offset);
	if (unlikely(size != iocb->length)) {
		sd_err("failed to write object %"PRIx64", path=%s, offset=%"
		       PRId32", size=%"PRId32", result=%zd, %m", oid, path,
		       iocb->offset, iocb->length, size);
		ret = err_to_sderr(path, oid, errno);
		goto out;
	}
out:
	close(fd);
	return ret;
}
예제 #12
0
static int default_read_from_path(uint64_t oid, char *path,
				       struct siocb *iocb)
{
	int flags = get_open_flags(oid, false), fd, ret = SD_RES_SUCCESS;
	ssize_t size;

	fd = open(path, flags);

	if (fd < 0)
		return err_to_sderr(oid, errno);

	size = xpread(fd, iocb->buf, iocb->length, iocb->offset);
	if (size != iocb->length) {
		eprintf("failed to read object %"PRIx64", path=%s, offset=%"
			PRId64", size=%"PRId32", result=%zd, %m\n", oid, path,
			iocb->offset, iocb->length, size);
		ret = err_to_sderr(oid, errno);
	}

	close(fd);

	return ret;
}
예제 #13
0
static int default_read_from_path(uint64_t oid, const char *path,
				  const struct siocb *iocb)
{
	int flags = prepare_iocb(oid, iocb, false), fd,
	    ret = SD_RES_SUCCESS;
	ssize_t size;

	fd = open(path, flags);

	if (fd < 0)
		return err_to_sderr(path, oid, errno);

	if (is_erasure_oid(oid) && iocb->ec_index <= SD_MAX_COPIES) {
		uint8_t idx;

		if (get_erasure_index(path, &idx) < 0) {
			close(fd);
			return err_to_sderr(path, oid, errno);
		}
		/* We pretend NO-OBJ to read old object in the stale dir */
		if (idx != iocb->ec_index) {
			sd_debug("ec_index %d != %d", iocb->ec_index, idx);
			close(fd);
			return SD_RES_NO_OBJ;
		}
	}

	size = xpread(fd, iocb->buf, iocb->length, iocb->offset);
	if (unlikely(size != iocb->length)) {
		sd_err("failed to read object %"PRIx64", path=%s, offset=%"
		       PRId32", size=%"PRId32", result=%zd, %m", oid, path,
		       iocb->offset, iocb->length, size);
		ret = err_to_sderr(path, oid, errno);
	}
	close(fd);
	return ret;
}
예제 #14
0
int default_remove_object(uint64_t oid, uint8_t ec_index)
{
	char path[PATH_MAX];

	get_store_path(oid, ec_index, path);

	if (unlink(path) < 0) {
		if (errno == ENOENT)
			return SD_RES_NO_OBJ;

		return err_to_sderr(path, oid, errno);
	}

	return SD_RES_SUCCESS;
}
예제 #15
0
int default_create_and_write(uint64_t oid, const struct siocb *iocb)
{
	char path[PATH_MAX], tmp_path[PATH_MAX];
	int flags = prepare_iocb(oid, iocb, true);
	int ret, fd;
	uint32_t len = iocb->length;
	bool ec = is_erasure_obj(oid, iocb->copy_policy);
	size_t obj_size;

	sd_debug("%"PRIx64, oid);
	get_obj_path(oid, path, sizeof(path));
	get_tmp_obj_path(oid, tmp_path, sizeof(tmp_path));

	if (uatomic_is_true(&sys->use_journal) &&
	    journal_write_store(oid, iocb->buf, iocb->length,
				iocb->offset, true)
	    != SD_RES_SUCCESS) {
		sd_err("turn off journaling");
		uatomic_set_false(&sys->use_journal);
		flags |= O_DSYNC;
		sync();
	}

	fd = open(tmp_path, flags, sd_def_fmode);
	if (fd < 0) {
		if (errno == EEXIST) {
			/*
			 * This happens if node membership changes during object
			 * creation; while gateway retries a CREATE request,
			 * recovery process could also recover the object at the
			 * same time.  They should try to write the same date,
			 * so it is okay to simply return success here.
			 */
			sd_debug("%s exists", tmp_path);
			return SD_RES_SUCCESS;
		}

		sd_err("failed to open %s: %m", tmp_path);
		return err_to_sderr(path, oid, errno);
	}

	if (ec) {
		uint8_t policy = iocb->copy_policy ?:
			get_vdi_copy_policy(oid_to_vid(oid));
		int d;
		ec_policy_to_dp(policy, &d, NULL);
		obj_size = SD_DATA_OBJ_SIZE / d;
	} else
예제 #16
0
int default_link(uint64_t oid, struct siocb *iocb, uint32_t tgt_epoch)
{
	char path[PATH_MAX], stale_path[PATH_MAX];

	dprintf("try link %"PRIx64" from snapshot with epoch %d\n", oid,
		tgt_epoch);

	get_obj_path(oid, path);
	get_stale_obj_path(oid, tgt_epoch, stale_path);

	if (link(stale_path, path) < 0) {
		eprintf("failed to link from %s to %s, %m\n", stale_path,
			path);
		return err_to_sderr(oid, errno);
	}

	return SD_RES_SUCCESS;
}
예제 #17
0
static int write_last_sector(int fd, uint32_t length)
{
	const int size = SECTOR_SIZE;
	char *buf;
	int ret;
	off_t off = length - size;

	buf = valloc(size);
	if (!buf) {
		eprintf("failed to allocate memory\n");
		return SD_RES_NO_MEM;
	}
	memset(buf, 0, size);

	ret = xpwrite(fd, buf, size, off);
	if (ret != size)
		ret = err_to_sderr(0, errno); /* FIXME: set oid */
	else
		ret = SD_RES_SUCCESS;
	free(buf);

	return ret;
}
예제 #18
0
파일: plain_store.c 프로젝트: DLag/sheepdog
int default_link(uint64_t oid, uint32_t tgt_epoch)
{
	char path[PATH_MAX], stale_path[PATH_MAX];

	sd_debug("try link %"PRIx64" from snapshot with epoch %d", oid,
		 tgt_epoch);

	snprintf(path, PATH_MAX, "%s/%016"PRIx64, md_get_object_dir(oid), oid);
	get_store_stale_path(oid, tgt_epoch, 0, stale_path);

	if (link(stale_path, path) < 0) {
		/*
		 * Recovery thread and main thread might try to recover the
		 * same object and we might get EEXIST in such case.
		 */
		if (errno == EEXIST)
			goto out;

		sd_debug("failed to link from %s to %s, %m", stale_path, path);
		return err_to_sderr(path, oid, errno);
	}
out:
	return SD_RES_SUCCESS;
}
예제 #19
0
파일: plain_store.c 프로젝트: DLag/sheepdog
int default_create_and_write(uint64_t oid, const struct siocb *iocb)
{
	char path[PATH_MAX], tmp_path[PATH_MAX];
	int flags = prepare_iocb(oid, iocb, true);
	int ret, fd;
	uint32_t len = iocb->length;
	size_t obj_size;
	uint64_t offset = iocb->offset;

	sd_debug("%"PRIx64, oid);
	get_store_path(oid, iocb->ec_index, path);
	get_store_tmp_path(oid, iocb->ec_index, tmp_path);

	if (uatomic_is_true(&sys->use_journal) &&
	    journal_write_store(oid, iocb->buf, iocb->length,
				iocb->offset, true)
	    != SD_RES_SUCCESS) {
		sd_err("turn off journaling");
		uatomic_set_false(&sys->use_journal);
		flags |= O_DSYNC;
		sync();
	}

	fd = open(tmp_path, flags, sd_def_fmode);
	if (fd < 0) {
		if (errno == EEXIST) {
			/*
			 * This happens if node membership changes during object
			 * creation; while gateway retries a CREATE request,
			 * recovery process could also recover the object at the
			 * same time.  They should try to write the same date,
			 * so it is okay to simply return success here.
			 */
			sd_debug("%s exists", tmp_path);
			return SD_RES_SUCCESS;
		}

		sd_err("failed to open %s: %m", tmp_path);
		return err_to_sderr(path, oid, errno);
	}

	obj_size = get_store_objsize(oid);

	trim_zero_blocks(iocb->buf, &offset, &len);

	if (offset != 0 || len != get_objsize(oid)) {
		if (is_sparse_object(oid))
			ret = xftruncate(fd, obj_size);
		else
			ret = prealloc(fd, obj_size);
		if (ret < 0) {
			ret = err_to_sderr(path, oid, errno);
			goto out;
		}
	}

	ret = xpwrite(fd, iocb->buf, len, offset);
	if (ret != len) {
		sd_err("failed to write object. %m");
		ret = err_to_sderr(path, oid, errno);
		goto out;
	}

	ret = rename(tmp_path, path);
	if (ret < 0) {
		sd_err("failed to rename %s to %s: %m", tmp_path, path);
		ret = err_to_sderr(path, oid, errno);
		goto out;
	}

	ret = SD_RES_SUCCESS;
	objlist_cache_insert(oid);
out:
	if (ret != SD_RES_SUCCESS)
		unlink(tmp_path);
	close(fd);
	return ret;
}
예제 #20
0
int default_create_and_write(uint64_t oid, const struct siocb *iocb)
{
	char path[PATH_MAX], tmp_path[PATH_MAX];
	int flags = get_open_flags(oid, true, iocb->flags);
	int ret, fd;
	uint32_t len = iocb->length;

	get_obj_path(oid, path);
	get_tmp_obj_path(oid, tmp_path);

	if (uatomic_is_true(&sys->use_journal) &&
	    journal_file_write(oid, iocb->buf, iocb->length, iocb->offset, true)
	    != SD_RES_SUCCESS) {
		sd_eprintf("turn off journaling");
		uatomic_set_false(&sys->use_journal);
		flags |= O_DSYNC;
		sync();
	}

	fd = open(tmp_path, flags, def_fmode);
	if (fd < 0) {
		if (errno == EEXIST) {
			/*
			 * This happens if node membership changes during object
			 * creation; while gateway retries a CREATE request,
			 * recovery process could also recover the object at the
			 * same time.  They should try to write the same date,
			 * so it is okay to simply return success here.
			 */
			sd_dprintf("%s exists", tmp_path);
			return SD_RES_SUCCESS;
		}

		sd_eprintf("failed to open %s: %m", tmp_path);
		return err_to_sderr(oid, errno);
	}

	if (iocb->offset != 0 || iocb->length != get_objsize(oid)) {
		ret = prealloc(fd, get_objsize(oid));
		if (ret < 0) {
			ret = err_to_sderr(oid, errno);
			goto out;
		}
	}

	ret = xpwrite(fd, iocb->buf, len, iocb->offset);
	if (ret != len) {
		sd_eprintf("failed to write object. %m");
		ret = err_to_sderr(oid, errno);
		goto out;
	}

	ret = rename(tmp_path, path);
	if (ret < 0) {
		sd_eprintf("failed to rename %s to %s: %m", tmp_path, path);
		ret = err_to_sderr(oid, errno);
		goto out;
	}
	sd_dprintf("%"PRIx64, oid);
	ret = SD_RES_SUCCESS;
out:
	if (ret != SD_RES_SUCCESS)
		unlink(tmp_path);
	close(fd);
	return ret;
}
예제 #21
0
파일: farm.c 프로젝트: yamt/sheepdog
static int farm_read(uint64_t oid, struct siocb *iocb)
{
	int flags = def_open_flags, fd, ret = SD_RES_SUCCESS;
	uint32_t epoch = sys_epoch();
	char path[PATH_MAX];
	ssize_t size;
	int i;
	void *buffer;

	if (iocb->epoch < epoch) {

		buffer = read_working_object(oid, iocb->offset, iocb->length);
		if (!buffer) {
			/* Here if read the object from the targeted epoch failed,
			 * we need to read from the later epoch, because at some epoch
			 * we doesn't write the object to the snapshot, we assume
			 * it in the current local object directory, but maybe
			 * in the next epoch we removed it from the local directory.
			 * in this case, we should try to retrieve object upwards, since.
			 * when the object is to be removed, it will get written to the
			 * snapshot at later epoch.
			 */
			for (i = iocb->epoch; i < epoch; i++) {
				buffer = retrieve_object_from_snap(oid, i);
				if (buffer)
					break;
			}
		}
		if (!buffer)
			return SD_RES_NO_OBJ;
		memcpy(iocb->buf, buffer, iocb->length);
		free(buffer);

		return SD_RES_SUCCESS;
	}

	if (!is_data_obj(oid))
		flags &= ~O_DIRECT;

	sprintf(path, "%s%016"PRIx64, obj_path, oid);
	fd = open(path, flags);

	if (fd < 0)
		return err_to_sderr(oid, errno);

	if (flock(fd, LOCK_SH) < 0) {
		ret = SD_RES_EIO;
		eprintf("%m\n");
		goto out;
	}
	size = xpread(fd, iocb->buf, iocb->length, iocb->offset);
	if (flock(fd, LOCK_UN) < 0) {
		ret = SD_RES_EIO;
		eprintf("%m\n");
		goto out;
	}
	if (size != iocb->length) {
		ret = SD_RES_EIO;
		goto out;
	}
out:
	close(fd);
	return ret;
}
예제 #22
0
int default_create_and_write(uint64_t oid, const struct siocb *iocb)
{
	char path[PATH_MAX], tmp_path[PATH_MAX];
	int flags = prepare_iocb(oid, iocb, true);
	int ret, fd;
	uint32_t len = iocb->length;
	size_t obj_size;

	sd_debug("%"PRIx64, oid);
	get_store_path(oid, iocb->ec_index, path);
	get_store_tmp_path(oid, iocb->ec_index, tmp_path);
	fd = open(tmp_path, flags, sd_def_fmode);
	if (fd < 0) {
		if (errno == EEXIST) {
			/*
			 * This happens if node membership changes during object
			 * creation; while gateway retries a CREATE request,
			 * recovery process could also recover the object at the
			 * same time.  They should try to write the same date,
			 * so it is okay to simply return success here.
			 */
			sd_debug("%s exists", tmp_path);
			return SD_RES_SUCCESS;
		}

		sd_err("failed to open %s: %m", tmp_path);
		return err_to_sderr(path, oid, errno);
	}

	obj_size = get_store_objsize(oid);
	ret = prealloc(fd, obj_size);
	if (ret < 0) {
	          ret = err_to_sderr(path, oid, errno);
		  goto out;
	}

	ret = xpwrite(fd, iocb->buf, len, iocb->offset);
	if (ret != len) {
		sd_err("failed to write object. %m");
		ret = err_to_sderr(path, oid, errno);
		goto out;
	}

	/*
	 * Modern FS like ext4, xfs defaults to automatic syncing of files after
	 * replace-via-rename and replace-via-truncate operations. So rename
	 * without fsync() is actually safe.
	 */
	ret = rename(tmp_path, path);
	if (ret < 0) {
		sd_err("failed to rename %s to %s: %m", tmp_path, path);
		ret = err_to_sderr(path, oid, errno);
		goto out;
	}

	ret = SD_RES_SUCCESS;
	objlist_cache_insert(oid);
out:
	if (ret != SD_RES_SUCCESS && unlink(tmp_path) != 0)
		sd_err("failed to unlink %s: %m", tmp_path);
	close(fd);
	return ret;
}