Esempio n. 1
0
static void get_data_oid(char *sheep, uint64_t oid, struct sd_obj_rsp *rsp,
			 char *buf, void *data)
{
	struct get_data_oid_info *info = data;
	struct sheepdog_inode *inode = (struct sheepdog_inode *)buf;

	switch (rsp->result) {
	case SD_RES_SUCCESS:
		if (info->success)
			break;
		info->success = 1;
		if (inode->data_vdi_id[info->idx])
			info->data_oid = vid_to_data_oid(inode->data_vdi_id[info->idx], info->idx);
		break;
	case SD_RES_NO_OBJ:
		break;
	case SD_RES_OLD_NODE_VER:
	case SD_RES_NEW_NODE_VER:
		printf("the node list has changed, try again\n");
		break;
	default:
		printf("%s: hit an expected error, %d\n",
		       sheep, rsp->result);
		break;
	}
}
Esempio n. 2
0
static int bnode_do_create(struct kv_bnode *bnode, struct sd_inode *inode,
			   uint32_t idx, bool create)
{
	uint32_t vid = inode->vdi_id;
	uint64_t oid = vid_to_data_oid(vid, idx);
	int ret;

	bnode->oid = oid;
	ret = sd_write_object(oid, (char *)bnode, sizeof(*bnode), 0, create);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to create object, %" PRIx64, oid);
		goto out;
	}
	if (!create)
		goto out;

	sd_inode_set_vid(inode, idx, vid);
	ret = sd_inode_write_vid(inode, idx, vid, vid, 0, false, false);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to update inode, %" PRIx64,
		       vid_to_vdi_oid(vid));
		goto out;
	}
out:
	return ret;
}
Esempio n. 3
0
File: kv.c Progetto: skuanr/sheepdog
static void bucket_iterater(void *data, enum btree_node_type type, void *arg)
{
	struct sd_extent *ext;
	struct bucket_iterater_arg *biarg = arg;
	struct kv_bnode bnode;
	uint64_t oid;
	int ret;

	if (type == BTREE_EXT) {
		ext = (struct sd_extent *)data;
		if (!ext->vdi_id)
			return;

		oid = vid_to_data_oid(ext->vdi_id, ext->idx);
		ret = sd_read_object(oid, (char *)&bnode, sizeof(bnode), 0);
		if (ret != SD_RES_SUCCESS) {
			sd_err("Failed to read data object %"PRIx64, oid);
			return;
		}

		if (bnode.name[0] == 0)
			return;
		if (biarg->cb)
			biarg->cb(bnode.name, biarg->opaque);
		biarg->bucket_count++;
		biarg->object_count += bnode.object_count;
		biarg->bytes_used += bnode.bytes_used;
	}
}
Esempio n. 4
0
static void bucket_iterater(struct sd_index *idx, void *arg, int ignore)
{
	struct bucket_iterater_arg *biarg = arg;
	struct kv_bnode bnode;
	uint64_t oid;
	int ret;

	if (!idx->vdi_id)
		return;

	oid = vid_to_data_oid(idx->vdi_id, idx->idx);
	ret = sd_read_object(oid, (char *)&bnode, sizeof(bnode), 0);
	if (ret != SD_RES_SUCCESS) {
		sd_err("Failed to read data object %"PRIx64, oid);
		return;
	}

	if (bnode.name[0] == 0)
		return;
	if (biarg->cb)
		biarg->cb(bnode.name, biarg->opaque);
	biarg->bucket_count++;
	biarg->object_count += bnode.object_count;
	biarg->bytes_used += bnode.bytes_used;
}
Esempio n. 5
0
File: kv.c Progetto: skuanr/sheepdog
static int onode_do_create(struct kv_onode *onode, struct sd_inode *inode,
			   uint32_t idx, bool create)
{
	uint32_t vid = inode->vdi_id;
	uint64_t oid = vid_to_data_oid(vid, idx), len;
	int ret;

	onode->oid = oid;
	if (onode->inlined)
		len = onode->size;
	else
		len = sizeof(struct onode_extent) * onode->nr_extent;

	ret = sd_write_object(oid, (char *)onode, BLOCK_SIZE + len,
			      0, create);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to create object, %" PRIx64, oid);
		goto out;
	}
	if (!create)
		goto out;

	INODE_SET_VID(inode, idx, vid);
	ret = sd_inode_write_vid(sheep_bnode_writer, inode, idx,
				 vid, vid, 0, false, false);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to update inode, %" PRIx64,
		       vid_to_vdi_oid(vid));
		goto out;
	}
out:
	return ret;
}
Esempio n. 6
0
File: kv.c Progetto: skuanr/sheepdog
static void object_iterater(void *data, enum btree_node_type type, void *arg)
{
	struct sd_extent *ext;
	struct object_iterater_arg *oiarg = arg;
	struct kv_onode *onode = NULL;
	uint64_t oid;
	int ret;

	if (type == BTREE_EXT) {
		ext = (struct sd_extent *)data;
		if (!ext->vdi_id)
			goto out;

		onode = xmalloc(SD_DATA_OBJ_SIZE);
		oid = vid_to_data_oid(ext->vdi_id, ext->idx);
		ret = sd_read_object(oid, (char *)onode, SD_DATA_OBJ_SIZE, 0);
		if (ret != SD_RES_SUCCESS) {
			sd_err("Failed to read data object %"PRIx64, oid);
			goto out;
		}

		if (onode->name[0] == '\0')
			goto out;
		if (oiarg->cb)
			oiarg->cb(onode->name, oiarg->opaque);
		oiarg->count++;
	}
out:
	free(onode);
}
Esempio n. 7
0
static int get_data_oid(char *sheep, uint64_t oid, struct sd_rsp *rsp,
			 char *buf, void *data)
{
	struct get_data_oid_info *info = data;
	struct sheepdog_inode *inode = (struct sheepdog_inode *)buf;

	switch (rsp->result) {
	case SD_RES_SUCCESS:
		if (info->success)
			break;
		info->success = 1;
		if (inode->data_vdi_id[info->idx]) {
			info->data_oid = vid_to_data_oid(inode->data_vdi_id[info->idx], info->idx);
			return 1;
		}
		break;
	case SD_RES_NO_OBJ:
		break;
	case SD_RES_OLD_NODE_VER:
	case SD_RES_NEW_NODE_VER:
		fprintf(stderr, "The node list has changed: please try again\n");
		break;
	default:
		fprintf(stderr, "%s: hit an unexpected error (%d)\n",
		       sheep, rsp->result);
		break;
	}

	return 0;
}
Esempio n. 8
0
File: kv.c Progetto: skuanr/sheepdog
static int onode_create(struct kv_onode *onode, uint32_t bucket_vid)
{
	struct sd_inode *inode = xmalloc(sizeof(struct sd_inode));
	uint32_t tmp_vid, idx;
	uint64_t hval, i;
	int ret;
	bool create = true;

	sys->cdrv->lock(bucket_vid);
	ret = sd_read_object(vid_to_vdi_oid(bucket_vid), (char *)inode,
			       sizeof(*inode), 0);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to read %" PRIx32 " %s", bucket_vid,
		       sd_strerror(ret));
		goto out;
	}

	hval = sd_hash(onode->name, strlen(onode->name));
	for (i = 0; i < MAX_DATA_OBJS; i++) {
		idx = (hval + i) % MAX_DATA_OBJS;
		tmp_vid = INODE_GET_VID(inode, idx);
		if (tmp_vid) {
			uint64_t oid = vid_to_data_oid(bucket_vid, idx);
			char name[SD_MAX_OBJECT_NAME] = { };

			ret = sd_read_object(oid, name, sizeof(name), 0);
			if (ret != SD_RES_SUCCESS)
				goto out;
			if (name[0] == 0) {
				create = false;
				goto create;
			}
		} else
			break;
	}
	if (i == MAX_DATA_OBJS) {
		ret = SD_RES_NO_SPACE;
		goto out;
	}
create:
	ret = onode_do_create(onode, inode, idx, create);
out:
	free(inode);
	sys->cdrv->unlock(bucket_vid);
	return ret;
}
Esempio n. 9
0
File: kv.c Progetto: skuanr/sheepdog
/*
 * Check if object by name exists in a bucket and init 'onode' if it exists.
 *
 * Return SD_RES_SUCCESS if found, SD_RES_NO_OBJ if not found.
 *
 * We check adjacent objects one by one once we get a start index by hashing
 * name. Unallocated slot marks the end of the check window.
 *
 * For e.g, if we are going to check if fish in the following bucket, assume
 * fish hashes to 'sheep', so we compare the name one by one from 'sheep' to
 * 'fish'. '\0' indicates that object was deleted before checking.
 *
 * [ sheep, dog, wolve, '\0', fish, {unallocated}, tiger, ]
 */
static int onode_lookup(struct kv_onode *onode, uint32_t ovid, const char *name)
{
	struct sd_inode *inode = xmalloc(sizeof(struct sd_inode));
	uint32_t tmp_vid, idx;
	uint64_t hval, i;
	int ret;

	sys->cdrv->lock(ovid);
	ret = sd_read_object(vid_to_vdi_oid(ovid), (char *)inode,
			     sizeof(*inode), 0);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to read %" PRIx32 " %s", ovid,
		       sd_strerror(ret));
		goto out;
	}

	hval = sd_hash(name, strlen(name));
	for (i = 0; i < MAX_DATA_OBJS; i++) {
		idx = (hval + i) % MAX_DATA_OBJS;
		tmp_vid = INODE_GET_VID(inode, idx);
		if (tmp_vid) {
			uint64_t oid = vid_to_data_oid(ovid, idx);

			ret = sd_read_object(oid, (char *)onode,
					     sizeof(*onode), 0);
			if (ret != SD_RES_SUCCESS)
				goto out;
			if (strcmp(onode->name, name) == 0)
				break;
		} else {
			ret = SD_RES_NO_OBJ;
			break;
		}
	}
	if (i == MAX_DATA_OBJS) {
		ret = SD_RES_NO_OBJ;
		goto out;
	}
out:
	free(inode);
	sys->cdrv->unlock(ovid);
	return ret;
}
Esempio n. 10
0
File: kv.c Progetto: skuanr/sheepdog
static int vdi_read_write(uint32_t vid, char *data, size_t length,
			  off_t offset, bool is_read)
{
	struct sd_req hdr;
	uint32_t idx = offset / SD_DATA_OBJ_SIZE;
	uint64_t done = 0;
	struct request_iocb *iocb;
	int ret;

	iocb = local_req_init();
	if (!iocb)
		return SD_RES_SYSTEM_ERROR;

	offset %= SD_DATA_OBJ_SIZE;
	while (done < length) {
		size_t len = min(length - done, SD_DATA_OBJ_SIZE - offset);

		if (is_read) {
			sd_init_req(&hdr, SD_OP_READ_OBJ);
		} else {
			sd_init_req(&hdr, SD_OP_CREATE_AND_WRITE_OBJ);
			hdr.flags = SD_FLAG_CMD_WRITE;
		}
		hdr.data_length = len;
		hdr.obj.oid = vid_to_data_oid(vid, idx);
		hdr.obj.offset = offset;

		ret = exec_local_req_async(&hdr, data, iocb);
		if (ret != SD_RES_SUCCESS)
			sd_err("failed to write object %" PRIx64 ", %s",
			       hdr.obj.oid, sd_strerror(ret));

		offset += len;
		if (offset == SD_DATA_OBJ_SIZE) {
			offset = 0;
			idx++;
		}
		done += len;
		data += len;
	}

	return local_req_wait(iocb);
}
Esempio n. 11
0
File: kv.c Progetto: skuanr/sheepdog
static int bnode_lookup(struct kv_bnode *bnode, uint32_t vid, const char *name)
{
	uint64_t hval, i;
	int ret;

	hval = sd_hash(name, strlen(name));
	for (i = 0; i < MAX_DATA_OBJS; i++) {
		uint32_t idx = (hval + i) % MAX_DATA_OBJS;
		uint64_t oid = vid_to_data_oid(vid, idx);

		ret = sd_read_object(oid, (char *)bnode, sizeof(*bnode), 0);
		if (ret != SD_RES_SUCCESS)
			goto out;
		if (strcmp(bnode->name, name) == 0)
			break;
	}

	if (i == MAX_DATA_OBJS)
		ret = SD_RES_NO_OBJ;
out:
	return ret;
}
Esempio n. 12
0
static int vdi_create(int argc, char **argv)
{
	char *vdiname = argv[optind++];
	uint64_t size;
	uint32_t vid;
	uint64_t oid;
	int idx, max_idx, ret;
	struct sheepdog_inode *inode = NULL;
	char *buf = NULL;

	if (!argv[optind]) {
		fprintf(stderr, "please specify the size of vdi\n");
		return EXIT_USAGE;
	}
	ret = parse_option_size(argv[optind], &size);
	if (ret < 0)
		return EXIT_USAGE;
	if (size > SD_MAX_VDI_SIZE) {
		fprintf(stderr, "too big image size, %s\n", argv[optind]);
		return EXIT_USAGE;
	}

	ret = do_vdi_create(vdiname, size, 0, &vid, 0);
	if (ret != EXIT_SUCCESS || !vdi_cmd_data.prealloc)
		goto out;

	inode = malloc(sizeof(*inode));
	buf = zalloc(SD_DATA_OBJ_SIZE);
	if (!inode || !buf) {
		fprintf(stderr, "oom\n");
		ret = EXIT_SYSFAIL;
		goto out;
	}

	ret = sd_read_object(vid_to_vdi_oid(vid), inode, sizeof(*inode), 0);
	if (ret != SD_RES_SUCCESS) {
		fprintf(stderr, "failed to read a newly created vdi object\n");
		ret = EXIT_FAILURE;
		goto out;
	}
	max_idx = DIV_ROUND_UP(size, SD_DATA_OBJ_SIZE);

	for (idx = 0; idx < max_idx; idx++) {
		oid = vid_to_data_oid(vid, idx);

		ret = sd_write_object(oid, 0, buf, SD_DATA_OBJ_SIZE, 0, 0,
				      inode->nr_copies, 1);
		if (ret != SD_RES_SUCCESS) {
			ret = EXIT_FAILURE;
			goto out;
		}

		inode->data_vdi_id[idx] = vid;
		ret = sd_write_object(vid_to_vdi_oid(vid), 0, &vid, sizeof(vid),
				      SD_INODE_HEADER_SIZE + sizeof(vid) * idx, 0,
				      inode->nr_copies, 0);
		if (ret) {
			ret = EXIT_FAILURE;
			goto out;
		}
	}
	ret = EXIT_SUCCESS;
out:
	free(inode);
	free(buf);
	return ret;
}
Esempio n. 13
0
/*
 * Initialize the data vdi
 *
 * @vid: the vdi where the allocator resides
 */
int oalloc_init(uint32_t vid)
{
	struct strbuf buf = STRBUF_INIT;
	struct sd_inode *inode = xmalloc(sizeof(struct sd_inode));
	struct header hd = {
		.nr_free = 1,
	};
	struct free_desc fd = {
		.start = 1, /* Use first object as the meta object */
		.count = MAX_DATA_OBJS - 1,
	};
	int ret;

	strbuf_add(&buf, &hd, sizeof(hd));
	strbuf_add(&buf, &fd, sizeof(fd));

	ret = sd_read_object(vid_to_vdi_oid(vid), (char *)inode,
			     sizeof(*inode), 0);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to read inode, %" PRIx32", %s", vid,
		       sd_strerror(ret));
		goto out;
	}
	ret = sd_write_object(vid_to_data_oid(vid, 0), buf.buf,
			      buf.len, 0, true);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to create meta object for %" PRIx32", %s", vid,
		       sd_strerror(ret));
		goto out;
	}
	sd_inode_set_vid(inode, 0, vid);
	ret = sd_inode_write_vid(inode, 0, vid, vid, 0, false, false);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to update inode, %" PRIx32", %s", vid,
		       sd_strerror(ret));
		goto out;
	}
out:
	strbuf_release(&buf);
	free(inode);
	return ret;
}

/*
 * Allocate the objects and update the free list.
 *
 * Callers are expected to call oalloc_new_finish() to update the inode bitmap
 * after filling up the data.
 *
 * @vid: the vdi where the allocator resides
 * @start: start index of the objects to allocate
 * @count: number of the objects to allocate
 */
int oalloc_new_prepare(uint32_t vid, uint64_t *start, uint64_t count)
{
	char *meta = xvalloc(SD_DATA_OBJ_SIZE);
	struct header *hd;
	struct free_desc *fd;
	uint64_t oid = vid_to_data_oid(vid, 0), i;
	int ret;

	ret = sd_read_object(oid, meta, SD_DATA_OBJ_SIZE, 0);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to read meta %" PRIx64 ", %s", oid,
		       sd_strerror(ret));
		goto out;
	}

	hd = (struct header *)meta;
	fd = (struct free_desc *)(meta + oalloc_meta_length(hd)) - 1;
	sd_debug("used %"PRIu64", nr_free %"PRIu64, hd->used, hd->nr_free);
	for (i = 0; i < hd->nr_free; i++, fd--) {
		sd_debug("start %"PRIu64", count %"PRIu64, fd->start,
			 fd->count);
		if (fd->count > count)
			break;
	}
	if (i == hd->nr_free) {
		ret = SD_RES_NO_SPACE;
		goto out;
	}

	*start = fd->start;
	fd->start += count;
	fd->count -= count;
	hd->used += count;

	/* Update the meta object */
	ret = sd_write_object(oid, meta, oalloc_meta_length(hd), 0, false);
	if (ret != SD_RES_SUCCESS)
		sd_err("failed to update meta %"PRIx64 ", %s", oid,
		       sd_strerror(ret));
out:
	free(meta);
	return ret;
}

/*
 * Update the inode map of the vid
 *
 * @vid: the vdi where the allocator resides
 * @start: start index of the objects to update
 * @count: number of the objects to update
 */
int oalloc_new_finish(uint32_t vid, uint64_t start, uint64_t count)
{
	struct sd_inode *inode = xmalloc(sizeof(struct sd_inode));
	int ret;

	ret = sd_read_object(vid_to_vdi_oid(vid), (char *)inode,
			     sizeof(*inode), 0);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to read inode, %" PRIx64 ", %s",
		       vid_to_vdi_oid(vid), sd_strerror(ret));
		goto out;
	}

	sd_debug("start %"PRIu64" end %"PRIu64, start, start + count - 1);
	sd_inode_set_vid_range(inode, start, (start + count - 1), vid);

	ret = sd_inode_write(inode, 0, false, false);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to update inode, %" PRIx64", %s",
		       vid_to_vdi_oid(vid), sd_strerror(ret));
		goto out;
	}
out:
	free(inode);
	return ret;
}

static int free_desc_cmp(struct free_desc *a, struct free_desc *b)
{
	return -intcmp(a->start, b->start);
}

static inline int update_and_merge_free_desc(char *meta, uint64_t start,
					     uint64_t count, uint32_t vid)
{
	struct header *hd = (struct header *)meta;
	struct free_desc *tail, *fd = HEADER_TO_FREE_DESC(hd);
	uint64_t i, j;

	/* Try our best to merge it in place, or append it to tail */
	for (i = 0; i < hd->nr_free; i++) {
		if (start + count == fd->start) {
			fd->start = start;
			fd->count += count;
			break;
		} else if(fd->start + fd->count == start) {
			fd->count +=count;
			break;
		}
		fd++;
	}

	if (i == hd->nr_free) {
		if (hd->nr_free >= MAX_FREE_DESC)
			return SD_RES_NO_SPACE;

		tail = (struct free_desc *)(meta + oalloc_meta_length(hd));
		tail->start = start;
		tail->count = count;
		hd->nr_free++;
	}

	hd->used -= count;
	xqsort(HEADER_TO_FREE_DESC(hd), hd->nr_free, free_desc_cmp);

	/* Merge as hard as we can */
	j = hd->nr_free - 1;
	tail = (struct free_desc *)(meta + oalloc_meta_length(hd)) - 1;
	for (i = 0; i < j; i++, tail--) {
		struct free_desc *front = tail - 1;

		sd_debug("start %"PRIu64", count %"PRIu64, tail->start,
			 tail->count);
		if (tail->start + tail->count > front->start)
			sd_emerg("bad free descriptor found at %"PRIx32, vid);
		if (tail->start + tail->count == front->start) {
			front->start = tail->start;
			front->count += tail->count;
			memmove(tail, tail + 1, sizeof(*tail) * i);
			hd->nr_free--;
		}
	}

	return SD_RES_SUCCESS;
}

/*
 * Discard the allocated objects and update the free list of the allocator
 *
 * Caller should check the return value since it might fail.
 *
 * @vid: the vdi where the allocator resides
 * @start: start index of the objects to free
 * @count: number of the objects to free
 */
int oalloc_free(uint32_t vid, uint64_t start, uint64_t count)
{
	char *meta = xvalloc(SD_DATA_OBJ_SIZE);
	struct header *hd;
	uint64_t oid = vid_to_data_oid(vid, 0), i;
	struct sd_inode *inode = xmalloc(sizeof(struct sd_inode));
	int ret;

	ret = sd_read_object(vid_to_vdi_oid(vid), (char *)inode,
			     sizeof(*inode), 0);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to read inode, %" PRIx64 ", %s",
		       vid_to_vdi_oid(vid), sd_strerror(ret));
		goto out;
	}

	sd_debug("discard start %"PRIu64" end %"PRIu64, start,
		 start + count - 1);
	sd_inode_set_vid_range(inode, start, (start + count - 1), 0);

	ret = sd_inode_write(inode, 0, false, false);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to update inode, %" PRIx64", %s",
		       vid_to_vdi_oid(vid), sd_strerror(ret));
		goto out;
	}

	ret = sd_read_object(oid, meta, SD_DATA_OBJ_SIZE, 0);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to read meta %" PRIx64 ", %s", oid,
		       sd_strerror(ret));
		goto out;
	}

	ret = update_and_merge_free_desc(meta, start, count, vid);
	if (ret != SD_RES_SUCCESS)
		goto out;

	/* XXX use aio to speed up remove of objects */
	for (i = 0; i < count; i++) {
		struct sd_req hdr;
		int res;

		sd_init_req(&hdr, SD_OP_REMOVE_OBJ);
		hdr.obj.oid = vid_to_data_oid(vid, start + i);
		res = exec_local_req(&hdr, NULL);
		/*
		 * return the error code if it does not
		 * success or can't find obj.
		 */
		if (res != SD_RES_SUCCESS && res != SD_RES_NO_OBJ)
			ret = res;
	}

	hd = (struct header *)meta;
	ret = sd_write_object(oid, meta, oalloc_meta_length(hd), 0, false);
	if (ret != SD_RES_SUCCESS) {
		sd_err("failed to update meta %"PRIx64 ", %s", oid,
		       sd_strerror(ret));
		goto out;
	}
	sd_debug("used %"PRIu64", nr_free %"PRIu64, hd->used, hd->nr_free);
out:
	free(meta);
	free(inode);
	return ret;
}