Example #1
0
static int cluster_running_check(struct join_message *jm)
{
	int ret;

	/*
	 * When the joining node is newly created and we are not waiting for
	 * join we do not need to check anything.
	 */
	if (jm->nr_nodes != 0) {
		ret = cluster_sanity_check(jm);
		if (ret != CJ_RES_SUCCESS)
			return ret;
	}

	jm->inc_epoch = 1;
	return CJ_RES_SUCCESS;
}
Example #2
0
static int get_cluster_status(struct sd_node *from,
			      struct sd_node *entries,
			      int nr_entries, uint64_t ctime, uint32_t epoch,
			      uint32_t *status, uint8_t *inc_epoch)
{
	int i, j, ret = SD_RES_SUCCESS;
	int nr, nr_local_entries, nr_leave_entries;
	struct sd_node local_entries[SD_MAX_NODES];
	char str[256];
	uint32_t sys_stat = sys_stat_get();

	*status = sys_stat;
	if (inc_epoch)
		*inc_epoch = 0;

	ret = cluster_sanity_check(entries, nr_entries, ctime, epoch);
	if (ret)
		goto out;

	switch (sys_stat) {
	case SD_STATUS_HALT:
	case SD_STATUS_OK:
		if (inc_epoch)
			*inc_epoch = 1;
		break;
	case SD_STATUS_WAIT_FOR_FORMAT:
		if (nr_entries != 0)
			ret = SD_RES_NOT_FORMATTED;
		break;
	case SD_STATUS_WAIT_FOR_JOIN:
		nr = sys->nr_nodes + 1;
		nr_local_entries = epoch_log_read_nr(epoch, (char *)local_entries,
						  sizeof(local_entries));

		if (nr != nr_local_entries) {
			nr_leave_entries = get_nodes_nr_from(&sys->leave_list);
			if (nr_local_entries == nr + nr_leave_entries) {
				/* Even though some nodes have left, we can make do without them.
				 * Order cluster to do recovery right now.
				 */
				if (inc_epoch)
					*inc_epoch = 1;
				*status = SD_STATUS_OK;
			}
			break;
		}

		for (i = 0; i < nr_local_entries; i++) {
			if (node_eq(local_entries + i, from))
				goto next;
			for (j = 0; j < sys->nr_nodes; j++) {
				if (node_eq(local_entries + i, sys->nodes + j))
					goto next;
			}
			break;
		next:
			;
		}

		*status = SD_STATUS_OK;
		break;
	case SD_STATUS_SHUTDOWN:
		ret = SD_RES_SHUTDOWN;
		break;
	default:
		break;
	}
out:
	if (ret)
		eprintf("%x, %s\n", ret,
			addr_to_str(str, sizeof(str), from->addr, from->port));

	return ret;
}
Example #3
0
static int cluster_wait_for_join_check(struct sd_node *joined,
		struct join_message *jm)
{
	struct sd_node local_entries[SD_MAX_NODES];
	int nr, nr_local_entries, nr_failed_entries, nr_delayed_nodes;
	uint32_t local_epoch = get_latest_epoch();
	int ret;

	if (jm->nr_nodes == 0)
		return CJ_RES_JOIN_LATER;

	ret = cluster_sanity_check(jm);
	if (ret != CJ_RES_SUCCESS)  {
		if (jm->epoch > sys->epoch) {
			eprintf("transfer mastership (%d, %d)\n",
				jm->epoch, sys->epoch);
			return CJ_RES_MASTER_TRANSFER;
		}
		return ret;
	}

	nr_local_entries = epoch_log_read(jm->epoch, local_entries,
					  sizeof(local_entries));
	if (nr_local_entries == -1)
		return CJ_RES_FAIL;

	if (jm->epoch < local_epoch) {
		eprintf("joining node epoch too small: %"
			PRIu32 " vs %" PRIu32 "\n",
			jm->epoch, local_epoch);
		return CJ_RES_JOIN_LATER;
	}

	if (jm->nr_nodes != nr_local_entries) {
		eprintf("epoch log entries do not match: %d vs %d\n",
			jm->nr_nodes, nr_local_entries);
		return CJ_RES_FAIL;
	}


	if (memcmp(jm->nodes, local_entries,
		   sizeof(jm->nodes[0]) * jm->nr_nodes) != 0) {
		eprintf("epoch log entries does not match\n");
		return CJ_RES_FAIL;
	}

	if (!current_vnode_info)
		nr = 1;
	else
		nr = current_vnode_info->nr_nodes + 1;

	nr_delayed_nodes = get_nodes_nr_from(&sys->delayed_nodes);

	/*
	 * If we have all members from the last epoch log in the in-memory
	 * node list, and no new nodes joining we can set the cluster live
	 * now without incrementing the epoch.
	 */
	if (nr == nr_local_entries && !nr_delayed_nodes) {
		jm->cluster_status = SD_STATUS_OK;
		return CJ_RES_SUCCESS;
	}

	/*
	 * If we reach the old node count, but some node failed we have to
	 * update the epoch before setting the cluster live.
	 */
	nr_failed_entries = get_nodes_nr_from(&sys->failed_nodes);
	if (nr_local_entries == nr + nr_failed_entries - nr_delayed_nodes) {
		jm->inc_epoch = 1;
		jm->cluster_status = SD_STATUS_OK;
		return CJ_RES_SUCCESS;
	}

	/*
	 * The join was successful, but we don't have enough nodes yet to set
	 * the cluster live.
	 */
	return CJ_RES_SUCCESS;
}