Esempio n. 1
0
int trunk_init(void)
{
	DIR *dir;
	struct dirent *d;
	uint64_t oid;

	dir = opendir(obj_path);
	if (!dir)
		return -1;

	while ((d = readdir(dir))) {
		if (!strncmp(d->d_name, ".", 1))
			continue;
		oid = strtoull(d->d_name, NULL, 16);
		if (oid == 0 || oid == ULLONG_MAX)
			continue;
		objlist_cache_insert(oid);
		lookup_trunk_entry(oid, 1);
	}

	omap_tree = RB_ROOT;

	closedir(dir);
	return 0;
}
Esempio n. 2
0
static int init_objlist_and_vdi_bitmap(uint64_t oid, char *wd, uint32_t epoch,
				       void *arg)
{
	int ret;
	objlist_cache_insert(oid);

	if (is_vdi_obj(oid)) {
		sd_debug("found the VDI object %" PRIx64, oid);
		ret = init_vdi_state(oid, wd, epoch);
		if (ret != SD_RES_SUCCESS)
			return ret;
	}
	return SD_RES_SUCCESS;
}
Esempio n. 3
0
static int init_objlist_and_vdi_bitmap(uint64_t oid, void *arg)
{
	int ret;
	objlist_cache_insert(oid);

	if (is_vdi_obj(oid)) {
		vprintf(SDOG_DEBUG, "found the VDI object %" PRIx64 "\n", oid);
		set_bit(oid_to_vid(oid), sys->vdi_inuse);
		ret = init_vdi_copy_number(oid);
		if (ret != SD_RES_SUCCESS)
			return ret;
	}
	return SD_RES_SUCCESS;
}
Esempio n. 4
0
static int init_objlist_and_vdi_bitmap(uint64_t oid, const char *wd,
				       uint32_t epoch, uint8_t ec_index,
				       struct vnode_info *vinfo,
				       void *arg)
{
	int ret = SD_RES_SUCCESS;
	objlist_cache_insert(oid);

	if (is_vdi_obj(oid)) {
		sd_debug("found the VDI object %" PRIx64" epoch %"PRIu32
			 " at %s", oid, epoch, wd);
		atomic_set_bit(oid_to_vid(oid), sys->vdi_inuse);
	}
	return ret;
}
Esempio n. 5
0
static int recover_object_from_replica(uint64_t oid,
				       const struct sd_vnode *vnode,
				       uint32_t epoch, uint32_t tgt_epoch)
{
	struct sd_req hdr;
	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
	unsigned rlen;
	int ret = SD_RES_NO_MEM;
	void *buf = NULL;
	struct siocb iocb = { 0 };

	if (vnode_is_local(vnode)) {
		ret = sd_store->link(oid, tgt_epoch);
		goto out;
	}

	rlen = get_objsize(oid);
	buf = valloc(rlen);
	if (!buf) {
		sd_eprintf("%m");
		goto out;
	}

	sd_init_req(&hdr, SD_OP_READ_PEER);
	hdr.epoch = epoch;
	hdr.flags = SD_FLAG_CMD_RECOVERY;
	hdr.data_length = rlen;
	hdr.obj.oid = oid;
	hdr.obj.tgt_epoch = tgt_epoch;

	ret = sheep_exec_req(&vnode->nid, &hdr, buf);
	if (ret != SD_RES_SUCCESS)
		goto out;
	iocb.epoch = epoch;
	iocb.length = rsp->data_length;
	iocb.offset = rsp->obj.offset;
	iocb.buf = buf;
	ret = sd_store->create_and_write(oid, &iocb);
out:
	if (ret == SD_RES_SUCCESS) {
		sd_dprintf("recovered oid %"PRIx64" from %d to epoch %d", oid,
			tgt_epoch, epoch);
		objlist_cache_insert(oid);
	}
	free(buf);
	return ret;
}
Esempio n. 6
0
static int init_objlist_and_vdi_bitmap(uint64_t oid, const char *wd,
				       uint32_t epoch, uint8_t ec_index,
				       struct vnode_info *vinfo,
				       void *arg)
{
	int ret;
	objlist_cache_insert(oid);

	if (is_vdi_obj(oid)) {
		sd_debug("found the VDI object %" PRIx64" epoch %"PRIu32
			 " at %s", oid, epoch, wd);
		ret = init_vdi_state(oid, wd, epoch);
		if (ret != SD_RES_SUCCESS)
			return ret;
	}
	return SD_RES_SUCCESS;
}
Esempio n. 7
0
int default_create_and_write(uint64_t oid, const struct siocb *iocb)
{
	char path[PATH_MAX], tmp_path[PATH_MAX];
	int flags = prepare_iocb(oid, iocb, true);
	int ret, fd;
	uint32_t len = iocb->length;
	size_t obj_size;
	uint64_t offset = iocb->offset;

	sd_debug("%"PRIx64, oid);
	get_store_path(oid, iocb->ec_index, path);
	get_store_tmp_path(oid, iocb->ec_index, tmp_path);

	if (uatomic_is_true(&sys->use_journal) &&
	    journal_write_store(oid, iocb->buf, iocb->length,
				iocb->offset, true)
	    != SD_RES_SUCCESS) {
		sd_err("turn off journaling");
		uatomic_set_false(&sys->use_journal);
		flags |= O_DSYNC;
		sync();
	}

	fd = open(tmp_path, flags, sd_def_fmode);
	if (fd < 0) {
		if (errno == EEXIST) {
			/*
			 * This happens if node membership changes during object
			 * creation; while gateway retries a CREATE request,
			 * recovery process could also recover the object at the
			 * same time.  They should try to write the same date,
			 * so it is okay to simply return success here.
			 */
			sd_debug("%s exists", tmp_path);
			return SD_RES_SUCCESS;
		}

		sd_err("failed to open %s: %m", tmp_path);
		return err_to_sderr(path, oid, errno);
	}

	obj_size = get_store_objsize(oid);

	trim_zero_blocks(iocb->buf, &offset, &len);

	if (offset != 0 || len != get_objsize(oid)) {
		if (is_sparse_object(oid))
			ret = xftruncate(fd, obj_size);
		else
			ret = prealloc(fd, obj_size);
		if (ret < 0) {
			ret = err_to_sderr(path, oid, errno);
			goto out;
		}
	}

	ret = xpwrite(fd, iocb->buf, len, offset);
	if (ret != len) {
		sd_err("failed to write object. %m");
		ret = err_to_sderr(path, oid, errno);
		goto out;
	}

	ret = rename(tmp_path, path);
	if (ret < 0) {
		sd_err("failed to rename %s to %s: %m", tmp_path, path);
		ret = err_to_sderr(path, oid, errno);
		goto out;
	}

	ret = SD_RES_SUCCESS;
	objlist_cache_insert(oid);
out:
	if (ret != SD_RES_SUCCESS)
		unlink(tmp_path);
	close(fd);
	return ret;
}
Esempio n. 8
0
static int recover_object_from_replica(uint64_t oid,
				       struct sd_vnode *entry,
				       uint32_t epoch, uint32_t tgt_epoch)
{
	struct sd_req hdr;
	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
	char name[128];
	unsigned wlen = 0, rlen;
	int fd, ret = -1;
	void *buf;
	struct siocb iocb = { 0 };

	rlen = get_objsize(oid);

	buf = valloc(rlen);
	if (!buf) {
		eprintf("%m\n");
		goto out;
	}

	if (vnode_is_local(entry)) {
		iocb.epoch = epoch;
		iocb.length = rlen;
		ret = sd_store->link(oid, &iocb, tgt_epoch);
		if (ret == SD_RES_SUCCESS) {
			ret = 0;
			goto done;
		} else {
			ret = -1;
			goto out;
		}
	}

	addr_to_str(name, sizeof(name), entry->nid.addr, 0);
	fd = connect_to(name, entry->nid.port);
	dprintf("%s, %d\n", name, entry->nid.port);
	if (fd < 0) {
		eprintf("failed to connect to %s:%"PRIu32"\n", name, entry->nid.port);
		ret = -1;
		goto out;
	}

	sd_init_req(&hdr, SD_OP_READ_PEER);
	hdr.epoch = epoch;
	hdr.flags = SD_FLAG_CMD_RECOVERY;
	hdr.data_length = rlen;

	hdr.obj.oid = oid;
	hdr.obj.tgt_epoch = tgt_epoch;

	ret = exec_req(fd, &hdr, buf, &wlen, &rlen);

	close(fd);

	if (ret != 0) {
		eprintf("res: %"PRIx32"\n", rsp->result);
		ret = -1;
		goto out;
	}

	rsp = (struct sd_rsp *)&hdr;

	if (rsp->result == SD_RES_SUCCESS) {
		iocb.epoch = epoch;
		iocb.length = rlen;
		iocb.buf = buf;
		ret = sd_store->atomic_put(oid, &iocb);
		if (ret != SD_RES_SUCCESS) {
			ret = -1;
			goto out;
		}
	} else {
		eprintf("failed, res: %"PRIx32"\n", rsp->result);
		ret = rsp->result;
		goto out;
	}
done:
	dprintf("recovered oid %"PRIx64" from %d to epoch %d\n", oid, tgt_epoch, epoch);
out:
	if (ret == SD_RES_SUCCESS)
		objlist_cache_insert(oid);
	free(buf);
	return ret;
}
Esempio n. 9
0
int default_create_and_write(uint64_t oid, const struct siocb *iocb)
{
	char path[PATH_MAX], tmp_path[PATH_MAX];
	int flags = prepare_iocb(oid, iocb, true);
	int ret, fd;
	uint32_t len = iocb->length;
	size_t obj_size;

	sd_debug("%"PRIx64, oid);
	get_store_path(oid, iocb->ec_index, path);
	get_store_tmp_path(oid, iocb->ec_index, tmp_path);
	fd = open(tmp_path, flags, sd_def_fmode);
	if (fd < 0) {
		if (errno == EEXIST) {
			/*
			 * This happens if node membership changes during object
			 * creation; while gateway retries a CREATE request,
			 * recovery process could also recover the object at the
			 * same time.  They should try to write the same date,
			 * so it is okay to simply return success here.
			 */
			sd_debug("%s exists", tmp_path);
			return SD_RES_SUCCESS;
		}

		sd_err("failed to open %s: %m", tmp_path);
		return err_to_sderr(path, oid, errno);
	}

	obj_size = get_store_objsize(oid);
	ret = prealloc(fd, obj_size);
	if (ret < 0) {
	          ret = err_to_sderr(path, oid, errno);
		  goto out;
	}

	ret = xpwrite(fd, iocb->buf, len, iocb->offset);
	if (ret != len) {
		sd_err("failed to write object. %m");
		ret = err_to_sderr(path, oid, errno);
		goto out;
	}

	/*
	 * Modern FS like ext4, xfs defaults to automatic syncing of files after
	 * replace-via-rename and replace-via-truncate operations. So rename
	 * without fsync() is actually safe.
	 */
	ret = rename(tmp_path, path);
	if (ret < 0) {
		sd_err("failed to rename %s to %s: %m", tmp_path, path);
		ret = err_to_sderr(path, oid, errno);
		goto out;
	}

	ret = SD_RES_SUCCESS;
	objlist_cache_insert(oid);
out:
	if (ret != SD_RES_SUCCESS && unlink(tmp_path) != 0)
		sd_err("failed to unlink %s: %m", tmp_path);
	close(fd);
	return ret;
}