Esempio n. 1
0
/*
 * Recover the object from its track in epoch history. That is,
 * the routine will try to recovery it from the nodes it has stayed,
 * at least, *theoretically* on consistent hash ring.
 */
static int do_recover_object(struct recovery_work *rw)
{
	struct vnode_info *old;
	uint64_t oid = rw->oids[rw->done];
	uint32_t epoch = rw->epoch, tgt_epoch = rw->epoch - 1;
	int nr_copies, ret, i;

	old = grab_vnode_info(rw->old_vinfo);

again:
	sd_dprintf("try recover object %"PRIx64" from epoch %"PRIu32, oid,
		   tgt_epoch);

	/* Let's do a breadth-first search */
	nr_copies = get_obj_copy_number(oid, old->nr_zones);
	for (i = 0; i < nr_copies; i++) {
		const struct sd_vnode *tgt_vnode;

		tgt_vnode = oid_to_vnode(old->vnodes, old->nr_vnodes, oid, i);

		if (is_invalid_vnode(tgt_vnode, rw->cur_vinfo->nodes,
				     rw->cur_vinfo->nr_nodes))
			continue;
		ret = recover_object_from_replica(oid, tgt_vnode,
						  epoch, tgt_epoch);
		if (ret == SD_RES_SUCCESS) {
			/* Succeed */
			break;
		} else if (SD_RES_OLD_NODE_VER == ret) {
			rw->stop = true;
			goto err;
		} else
			ret = -1;
	}

	/* No luck, roll back to an older configuration and try again */
	if (ret < 0) {
		struct vnode_info *new_old;

rollback:
		tgt_epoch--;
		if (tgt_epoch < 1) {
			sd_eprintf("can not recover oid %"PRIx64, oid);
			ret = -1;
			goto err;
		}

		new_old = get_vnode_info_epoch(tgt_epoch);
		if (!new_old)
			/* We rollback in case we don't get a valid epoch */
			goto rollback;

		put_vnode_info(old);
		old = new_old;
		goto again;
	}
err:
	put_vnode_info(old);
	return ret;
}
Esempio n. 2
0
int start_recovery(struct vnode_info *cur_vinfo, struct vnode_info *old_vinfo)
{
	struct recovery_work *rw;

	if (node_is_gateway_only())
		return 0;

	rw = zalloc(sizeof(struct recovery_work));
	if (!rw) {
		eprintf("%m\n");
		return -1;
	}

	rw->state = RW_INIT;
	rw->oids = xmalloc(1 << 20); /* FIXME */
	rw->epoch = sys->epoch;
	rw->count = 0;

	rw->cur_vinfo = grab_vnode_info(cur_vinfo);
	rw->old_vinfo = grab_vnode_info(old_vinfo);

	rw->work.fn = prepare_object_list;
	rw->work.done = finish_object_list;

	if (sd_store->begin_recover) {
		struct siocb iocb = { 0 };
		iocb.epoch = rw->epoch;
		sd_store->begin_recover(&iocb);
	}

	if (recovering_work != NULL) {
		/* skip the previous epoch recovery */
		if (next_rw)
			free_recovery_work(next_rw);
		dprintf("recovery skipped\n");
		next_rw = rw;
	} else {
		recovering_work = rw;
		queue_work(sys->recovery_wqueue, &rw->work);
	}

	resume_wait_epoch_requests();

	return 0;
}
Esempio n. 3
0
int start_recovery(struct vnode_info *cur_vinfo, struct vnode_info *old_vinfo)
{
	struct recovery_work *rw;

	if (node_is_gateway_only())
		goto out;

	rw = xzalloc(sizeof(struct recovery_work));
	rw->state = RW_INIT;
	rw->oids = xmalloc(list_buffer_size);
	rw->epoch = sys->epoch;
	rw->count = 0;

	rw->cur_vinfo = grab_vnode_info(cur_vinfo);
	rw->old_vinfo = grab_vnode_info(old_vinfo);

	rw->work.fn = prepare_object_list;
	rw->work.done = finish_object_list;

	if (recovering_work != NULL) {
		/* skip the previous epoch recovery */
		if (next_rw)
			free_recovery_work(next_rw);
		sd_dprintf("recovery skipped");
		next_rw = rw;

		/*
		 * This is necesary to invoke run_next_rw when
		 * recovery work is suspended.
		 */
		resume_suspended_recovery();
	} else {
		recovering_work = rw;
		queue_work(sys->recovery_wqueue, &rw->work);
	}
out:
	wakeup_requests_on_epoch();
	return 0;
}
Esempio n. 4
0
/*
 * Get a reference to the currently active vnode information structure,
 * this must only be called from the main thread.
 */
struct vnode_info *get_vnode_info(void)
{
	assert(current_vnode_info);

	return grab_vnode_info(current_vnode_info);
}