Esempio n. 1
0
/*
 * Recover the object from its track in epoch history. That is,
 * the routine will try to recovery it from the nodes it has stayed,
 * at least, *theoretically* on consistent hash ring.
 */
static int do_recover_object(struct recovery_work *rw)
{
	struct vnode_info *old;
	uint64_t oid = rw->oids[rw->done];
	uint32_t epoch = rw->epoch, tgt_epoch = rw->epoch - 1;
	int nr_copies, ret, i;

	old = grab_vnode_info(rw->old_vinfo);

again:
	sd_dprintf("try recover object %"PRIx64" from epoch %"PRIu32, oid,
		   tgt_epoch);

	/* Let's do a breadth-first search */
	nr_copies = get_obj_copy_number(oid, old->nr_zones);
	for (i = 0; i < nr_copies; i++) {
		const struct sd_vnode *tgt_vnode;

		tgt_vnode = oid_to_vnode(old->vnodes, old->nr_vnodes, oid, i);

		if (is_invalid_vnode(tgt_vnode, rw->cur_vinfo->nodes,
				     rw->cur_vinfo->nr_nodes))
			continue;
		ret = recover_object_from_replica(oid, tgt_vnode,
						  epoch, tgt_epoch);
		if (ret == SD_RES_SUCCESS) {
			/* Succeed */
			break;
		} else if (SD_RES_OLD_NODE_VER == ret) {
			rw->stop = true;
			goto err;
		} else
			ret = -1;
	}

	/* No luck, roll back to an older configuration and try again */
	if (ret < 0) {
		struct vnode_info *new_old;

rollback:
		tgt_epoch--;
		if (tgt_epoch < 1) {
			sd_eprintf("can not recover oid %"PRIx64, oid);
			ret = -1;
			goto err;
		}

		new_old = get_vnode_info_epoch(tgt_epoch);
		if (!new_old)
			/* We rollback in case we don't get a valid epoch */
			goto rollback;

		put_vnode_info(old);
		old = new_old;
		goto again;
	}
err:
	put_vnode_info(old);
	return ret;
}
Esempio n. 2
0
static void free_recovery_work(struct recovery_work *rw)
{
	put_vnode_info(rw->cur_vinfo);
	put_vnode_info(rw->old_vinfo);
	free(rw->oids);
	free(rw);
}
Esempio n. 3
0
static inline void kick_recover(void)
{
	struct vnode_info *vinfo = get_vnode_info();

	start_recovery(vinfo, vinfo);
	put_vnode_info(vinfo);
}
Esempio n. 4
0
static bool oid_stale(uint64_t oid)
{
	int i, nr_copies;
	struct vnode_info *vinfo;
	struct sd_vnode *v;
	bool ret = true;
	struct sd_vnode *obj_vnodes[SD_MAX_COPIES];

	vinfo = get_vnode_info();
	nr_copies = get_obj_copy_number(oid, vinfo->nr_zones);
	if (!nr_copies) {
		ret = false;
		goto out;
	}

	oid_to_vnodes(vinfo->vnodes, vinfo->nr_vnodes, oid,
		      nr_copies, obj_vnodes);
	for (i = 0; i < nr_copies; i++) {
		v = obj_vnodes[i];
		if (vnode_is_local(v)) {
			ret = false;
			break;
		}
	}
out:
	put_vnode_info(vinfo);
	return ret;
}
Esempio n. 5
0
static size_t get_nr_nodes(void)
{
	struct vnode_info *vinfo;
	size_t nr = 1;

	vinfo = get_vnode_info();
	if (vinfo != NULL)
		nr = vinfo->nr_nodes;
	put_vnode_info(vinfo);

	return nr;
}
Esempio n. 6
0
static int update_vnode_info(void)
{
	struct vnode_info *vnode_info;

	vnode_info = zalloc(sizeof(*vnode_info));
	if (!vnode_info) {
		eprintf("failed to allocate memory\n");
		return 1;
	}

	vnode_info->nr_vnodes = nodes_to_vnodes(sys->nodes, sys->nr_nodes,
						vnode_info->entries);
	vnode_info->nr_zones = get_zones_nr_from(sys->nodes, sys->nr_nodes);
	vnode_info->refcnt = 1;

	put_vnode_info(current_vnode_info);
	current_vnode_info = vnode_info;
	return 0;
}
Esempio n. 7
0
static void update_cluster_info(struct join_message *msg,
				struct sd_node *joined, struct sd_node *nodes,
				size_t nr_nodes)
{
	struct vnode_info *old_vnode_info = NULL;

	eprintf("status = %d, epoch = %d, finished: %d\n", msg->cluster_status,
		msg->epoch, sys->join_finished);

	sys->disable_recovery = msg->disable_recovery;

	if (!sys->join_finished)
		finish_join(msg, joined, nodes, nr_nodes);

	if (!sys->disable_recovery) {
		old_vnode_info = current_vnode_info;
		current_vnode_info = alloc_vnode_info(nodes, nr_nodes);
	}

	switch (msg->cluster_status) {
	case SD_STATUS_OK:
	case SD_STATUS_HALT:
		switch (sys->status) {
		case SD_STATUS_WAIT_FOR_FORMAT:
			sys->nr_copies = msg->nr_copies;
			sys->flags = msg->cluster_flags;

			set_cluster_copies(sys->nr_copies);
			set_cluster_flags(sys->flags);
			set_cluster_ctime(msg->ctime);
			/*FALLTHROUGH*/
		case SD_STATUS_WAIT_FOR_JOIN:
			get_vdi_bitmap(nodes, nr_nodes);
			break;
		default:
			break;
		}

		sys->status = msg->cluster_status;

		if (msg->inc_epoch) {
			if (!sys->disable_recovery) {
				uatomic_inc(&sys->epoch);
				log_current_epoch();
				clear_exceptional_node_lists();

				if (!old_vnode_info) {
					old_vnode_info =
						alloc_old_vnode_info(joined,
							nodes, nr_nodes);
				}

				start_recovery(current_vnode_info,
					       old_vnode_info);
			} else
				prepare_recovery(joined, nodes, nr_nodes);
		}

		if (have_enough_zones())
			sys->status = SD_STATUS_OK;
		break;
	default:
		sys->status = msg->cluster_status;
		break;
	}

	put_vnode_info(old_vnode_info);

	sockfd_cache_add(&joined->nid);
}
Esempio n. 8
0
/*
 * update currently active vnode information structure,
 * this must only be called from the main thread.
 */
void update_vnode_info(struct vnode_info *vnode_info)
{

	put_vnode_info(current_vnode_info);
	current_vnode_info = vnode_info;
}