Beispiel #1
0
int default_purge_obj(void)
{
	uint32_t tgt_epoch = get_latest_epoch();

	return for_each_object_in_wd(move_object_to_stale_dir, true,
				     &tgt_epoch);
}
Beispiel #2
0
static void finish_join(struct join_message *msg, struct sd_node *joined,
		struct sd_node *nodes, size_t nr_nodes)
{
	sys->join_finished = 1;
	sys->epoch = msg->epoch;

	if (msg->cluster_status != SD_STATUS_OK)
		update_exceptional_node_list(get_latest_epoch(), msg);

	/* We don't need backend for gateway-only node */
	if (!sys->gateway_only && !sd_store && strlen((char *)msg->store)) {
		sd_store = find_store_driver((char *)msg->store);
		if (sd_store) {
			if (sd_store->init(obj_path) != SD_RES_SUCCESS)
				panic("failed to initialize store\n");
			if (set_cluster_store(sd_store->name) != SD_RES_SUCCESS)
				panic("failed to store into config file\n");
		} else
				panic("backend store %s not supported\n",
				      msg->store);
	}

	/* We need to purge the stale objects for sheep joining back
	 * after crash
	 */
	if (msg->inc_epoch)
		if (!sys->gateway_only &&
		    sd_store->purge_obj &&
		    sd_store->purge_obj() != SD_RES_SUCCESS)
			panic("can't remove stale objects\n");

	sockfd_cache_add_group(nodes, nr_nodes);
}
Beispiel #3
0
static void finish_join(struct join_message *msg, struct sd_node *joined,
		struct sd_node *nodes, size_t nr_nodes)
{
	int i;

	sys->nr_copies = msg->nr_copies;
	sys->epoch = msg->epoch;

	/* add nodes execept for newly joined one */
	for (i = 0; i < nr_nodes; i++) {
		if (node_eq(nodes + i, joined))
			continue;

		sys->nodes[sys->nr_nodes++] = nodes[i];
	}
	qsort(sys->nodes, sys->nr_nodes, sizeof(*sys->nodes), node_cmp);

	if (msg->cluster_status != SD_STATUS_OK) {
		int nr_leave_nodes;
		uint32_t le;

		nr_leave_nodes = msg->nr_leave_nodes;
		le = get_latest_epoch();
		for (i = 0; i < nr_leave_nodes; i++) {
			struct node *n;

			if (find_entry_list(&msg->leave_nodes[i], &sys->leave_list) ||
			    !find_entry_epoch(&msg->leave_nodes[i], le)) {
				continue;
			}

			n = zalloc(sizeof(*n));
			if (!n)
				panic("failed to allocate memory\n");
			n->ent = msg->leave_nodes[i];
			list_add_tail(&n->list, &sys->leave_list);
		}
	}

	sys->join_finished = 1;

	if ((msg->cluster_status == SD_STATUS_OK ||
	     msg->cluster_status == SD_STATUS_HALT) && msg->inc_epoch)
		update_epoch_log(sys->epoch);

	if (!sd_store && strlen((char *)msg->store)) {
		sd_store = find_store_driver((char *)msg->store);
		if (sd_store) {
			sd_store->init(obj_path);
			if (set_cluster_store(sd_store->name) != SD_RES_SUCCESS)
				panic("failed to store into config file\n");
		} else
				panic("backend store %s not supported\n", msg->store);
	}
}
Beispiel #4
0
static int cluster_sanity_check(struct sd_node *entries,
			     int nr_entries, uint64_t ctime, uint32_t epoch)
{
	int ret = SD_RES_SUCCESS, nr_local_entries;
	struct sd_node local_entries[SD_MAX_NODES];
	uint32_t lepoch;

	if (sys_stat_wait_format() || sys_stat_shutdown())
		goto out;
	/* When the joining node is newly created, we need not check anything. */
	if (nr_entries == 0)
		goto out;

	if (ctime != get_cluster_ctime()) {
		ret = SD_RES_INVALID_CTIME;
		goto out;
	}

	lepoch = get_latest_epoch();
	if (epoch > lepoch) {
		ret = SD_RES_OLD_NODE_VER;
		goto out;
	}

	if (sys_can_recover())
		goto out;

	if (epoch < lepoch) {
		ret = SD_RES_NEW_NODE_VER;
		goto out;
	}

	nr_local_entries = epoch_log_read_nr(epoch, (char *)local_entries,
			sizeof(local_entries));

	if (nr_entries != nr_local_entries ||
	    memcmp(entries, local_entries, sizeof(entries[0]) * nr_entries) != 0) {
		ret = SD_RES_INVALID_EPOCH;
		goto out;
	}

out:
	return ret;
}
Beispiel #5
0
static int cluster_sanity_check(struct join_message *jm)
{
	uint64_t local_ctime = get_cluster_ctime();
	uint32_t local_epoch = get_latest_epoch();
	uint8_t local_nr_copies;

	if (get_cluster_copies(&local_nr_copies)) {
		eprintf("failed to get nr_copies\n");
		return CJ_RES_FAIL;
	}

	if (jm->ctime != local_ctime) {
		eprintf("joining node ctime doesn't match: %"
			PRIu64 " vs %" PRIu64 "\n",
			jm->ctime, local_ctime);
		return CJ_RES_FAIL;
	}

	if (jm->epoch > local_epoch) {
		eprintf("joining node epoch too large: %"
			PRIu32 " vs %" PRIu32 "\n",
			jm->epoch, local_epoch);
		return CJ_RES_FAIL;
	}

	if (jm->nr_copies != local_nr_copies) {
		eprintf("joining node nr_copies doesn't match: %u vs %u\n",
			jm->nr_copies, local_nr_copies);
		return CJ_RES_FAIL;
	}

	if (jm->cluster_flags != sys->flags) {
		eprintf("joining node cluster_flags don't match: %u vs %u\n",
			jm->cluster_flags, sys->flags);
		return CJ_RES_FAIL;
	}

	return CJ_RES_SUCCESS;
}
Beispiel #6
0
static int cluster_wait_for_join_check(struct sd_node *joined,
		struct join_message *jm)
{
	struct sd_node local_entries[SD_MAX_NODES];
	int nr, nr_local_entries, nr_failed_entries, nr_delayed_nodes;
	uint32_t local_epoch = get_latest_epoch();
	int ret;

	if (jm->nr_nodes == 0)
		return CJ_RES_JOIN_LATER;

	ret = cluster_sanity_check(jm);
	if (ret != CJ_RES_SUCCESS)  {
		if (jm->epoch > sys->epoch) {
			eprintf("transfer mastership (%d, %d)\n",
				jm->epoch, sys->epoch);
			return CJ_RES_MASTER_TRANSFER;
		}
		return ret;
	}

	nr_local_entries = epoch_log_read(jm->epoch, local_entries,
					  sizeof(local_entries));
	if (nr_local_entries == -1)
		return CJ_RES_FAIL;

	if (jm->epoch < local_epoch) {
		eprintf("joining node epoch too small: %"
			PRIu32 " vs %" PRIu32 "\n",
			jm->epoch, local_epoch);
		return CJ_RES_JOIN_LATER;
	}

	if (jm->nr_nodes != nr_local_entries) {
		eprintf("epoch log entries do not match: %d vs %d\n",
			jm->nr_nodes, nr_local_entries);
		return CJ_RES_FAIL;
	}


	if (memcmp(jm->nodes, local_entries,
		   sizeof(jm->nodes[0]) * jm->nr_nodes) != 0) {
		eprintf("epoch log entries does not match\n");
		return CJ_RES_FAIL;
	}

	if (!current_vnode_info)
		nr = 1;
	else
		nr = current_vnode_info->nr_nodes + 1;

	nr_delayed_nodes = get_nodes_nr_from(&sys->delayed_nodes);

	/*
	 * If we have all members from the last epoch log in the in-memory
	 * node list, and no new nodes joining we can set the cluster live
	 * now without incrementing the epoch.
	 */
	if (nr == nr_local_entries && !nr_delayed_nodes) {
		jm->cluster_status = SD_STATUS_OK;
		return CJ_RES_SUCCESS;
	}

	/*
	 * If we reach the old node count, but some node failed we have to
	 * update the epoch before setting the cluster live.
	 */
	nr_failed_entries = get_nodes_nr_from(&sys->failed_nodes);
	if (nr_local_entries == nr + nr_failed_entries - nr_delayed_nodes) {
		jm->inc_epoch = 1;
		jm->cluster_status = SD_STATUS_OK;
		return CJ_RES_SUCCESS;
	}

	/*
	 * The join was successful, but we don't have enough nodes yet to set
	 * the cluster live.
	 */
	return CJ_RES_SUCCESS;
}