int default_purge_obj(void) { uint32_t tgt_epoch = get_latest_epoch(); return for_each_object_in_wd(move_object_to_stale_dir, true, &tgt_epoch); }
static void finish_join(struct join_message *msg, struct sd_node *joined, struct sd_node *nodes, size_t nr_nodes) { sys->join_finished = 1; sys->epoch = msg->epoch; if (msg->cluster_status != SD_STATUS_OK) update_exceptional_node_list(get_latest_epoch(), msg); /* We don't need backend for gateway-only node */ if (!sys->gateway_only && !sd_store && strlen((char *)msg->store)) { sd_store = find_store_driver((char *)msg->store); if (sd_store) { if (sd_store->init(obj_path) != SD_RES_SUCCESS) panic("failed to initialize store\n"); if (set_cluster_store(sd_store->name) != SD_RES_SUCCESS) panic("failed to store into config file\n"); } else panic("backend store %s not supported\n", msg->store); } /* We need to purge the stale objects for sheep joining back * after crash */ if (msg->inc_epoch) if (!sys->gateway_only && sd_store->purge_obj && sd_store->purge_obj() != SD_RES_SUCCESS) panic("can't remove stale objects\n"); sockfd_cache_add_group(nodes, nr_nodes); }
static void finish_join(struct join_message *msg, struct sd_node *joined, struct sd_node *nodes, size_t nr_nodes) { int i; sys->nr_copies = msg->nr_copies; sys->epoch = msg->epoch; /* add nodes execept for newly joined one */ for (i = 0; i < nr_nodes; i++) { if (node_eq(nodes + i, joined)) continue; sys->nodes[sys->nr_nodes++] = nodes[i]; } qsort(sys->nodes, sys->nr_nodes, sizeof(*sys->nodes), node_cmp); if (msg->cluster_status != SD_STATUS_OK) { int nr_leave_nodes; uint32_t le; nr_leave_nodes = msg->nr_leave_nodes; le = get_latest_epoch(); for (i = 0; i < nr_leave_nodes; i++) { struct node *n; if (find_entry_list(&msg->leave_nodes[i], &sys->leave_list) || !find_entry_epoch(&msg->leave_nodes[i], le)) { continue; } n = zalloc(sizeof(*n)); if (!n) panic("failed to allocate memory\n"); n->ent = msg->leave_nodes[i]; list_add_tail(&n->list, &sys->leave_list); } } sys->join_finished = 1; if ((msg->cluster_status == SD_STATUS_OK || msg->cluster_status == SD_STATUS_HALT) && msg->inc_epoch) update_epoch_log(sys->epoch); if (!sd_store && strlen((char *)msg->store)) { sd_store = find_store_driver((char *)msg->store); if (sd_store) { sd_store->init(obj_path); if (set_cluster_store(sd_store->name) != SD_RES_SUCCESS) panic("failed to store into config file\n"); } else panic("backend store %s not supported\n", msg->store); } }
static int cluster_sanity_check(struct sd_node *entries, int nr_entries, uint64_t ctime, uint32_t epoch) { int ret = SD_RES_SUCCESS, nr_local_entries; struct sd_node local_entries[SD_MAX_NODES]; uint32_t lepoch; if (sys_stat_wait_format() || sys_stat_shutdown()) goto out; /* When the joining node is newly created, we need not check anything. */ if (nr_entries == 0) goto out; if (ctime != get_cluster_ctime()) { ret = SD_RES_INVALID_CTIME; goto out; } lepoch = get_latest_epoch(); if (epoch > lepoch) { ret = SD_RES_OLD_NODE_VER; goto out; } if (sys_can_recover()) goto out; if (epoch < lepoch) { ret = SD_RES_NEW_NODE_VER; goto out; } nr_local_entries = epoch_log_read_nr(epoch, (char *)local_entries, sizeof(local_entries)); if (nr_entries != nr_local_entries || memcmp(entries, local_entries, sizeof(entries[0]) * nr_entries) != 0) { ret = SD_RES_INVALID_EPOCH; goto out; } out: return ret; }
static int cluster_sanity_check(struct join_message *jm) { uint64_t local_ctime = get_cluster_ctime(); uint32_t local_epoch = get_latest_epoch(); uint8_t local_nr_copies; if (get_cluster_copies(&local_nr_copies)) { eprintf("failed to get nr_copies\n"); return CJ_RES_FAIL; } if (jm->ctime != local_ctime) { eprintf("joining node ctime doesn't match: %" PRIu64 " vs %" PRIu64 "\n", jm->ctime, local_ctime); return CJ_RES_FAIL; } if (jm->epoch > local_epoch) { eprintf("joining node epoch too large: %" PRIu32 " vs %" PRIu32 "\n", jm->epoch, local_epoch); return CJ_RES_FAIL; } if (jm->nr_copies != local_nr_copies) { eprintf("joining node nr_copies doesn't match: %u vs %u\n", jm->nr_copies, local_nr_copies); return CJ_RES_FAIL; } if (jm->cluster_flags != sys->flags) { eprintf("joining node cluster_flags don't match: %u vs %u\n", jm->cluster_flags, sys->flags); return CJ_RES_FAIL; } return CJ_RES_SUCCESS; }
static int cluster_wait_for_join_check(struct sd_node *joined, struct join_message *jm) { struct sd_node local_entries[SD_MAX_NODES]; int nr, nr_local_entries, nr_failed_entries, nr_delayed_nodes; uint32_t local_epoch = get_latest_epoch(); int ret; if (jm->nr_nodes == 0) return CJ_RES_JOIN_LATER; ret = cluster_sanity_check(jm); if (ret != CJ_RES_SUCCESS) { if (jm->epoch > sys->epoch) { eprintf("transfer mastership (%d, %d)\n", jm->epoch, sys->epoch); return CJ_RES_MASTER_TRANSFER; } return ret; } nr_local_entries = epoch_log_read(jm->epoch, local_entries, sizeof(local_entries)); if (nr_local_entries == -1) return CJ_RES_FAIL; if (jm->epoch < local_epoch) { eprintf("joining node epoch too small: %" PRIu32 " vs %" PRIu32 "\n", jm->epoch, local_epoch); return CJ_RES_JOIN_LATER; } if (jm->nr_nodes != nr_local_entries) { eprintf("epoch log entries do not match: %d vs %d\n", jm->nr_nodes, nr_local_entries); return CJ_RES_FAIL; } if (memcmp(jm->nodes, local_entries, sizeof(jm->nodes[0]) * jm->nr_nodes) != 0) { eprintf("epoch log entries does not match\n"); return CJ_RES_FAIL; } if (!current_vnode_info) nr = 1; else nr = current_vnode_info->nr_nodes + 1; nr_delayed_nodes = get_nodes_nr_from(&sys->delayed_nodes); /* * If we have all members from the last epoch log in the in-memory * node list, and no new nodes joining we can set the cluster live * now without incrementing the epoch. */ if (nr == nr_local_entries && !nr_delayed_nodes) { jm->cluster_status = SD_STATUS_OK; return CJ_RES_SUCCESS; } /* * If we reach the old node count, but some node failed we have to * update the epoch before setting the cluster live. */ nr_failed_entries = get_nodes_nr_from(&sys->failed_nodes); if (nr_local_entries == nr + nr_failed_entries - nr_delayed_nodes) { jm->inc_epoch = 1; jm->cluster_status = SD_STATUS_OK; return CJ_RES_SUCCESS; } /* * The join was successful, but we don't have enough nodes yet to set * the cluster live. */ return CJ_RES_SUCCESS; }