/* * check_uuids_between_replicas -- (internal) check if uuids between internally * consistent adjacent replicas are consistent */ static int check_uuids_between_replicas(struct pool_set *set, struct poolset_health_status *set_hs) { for (unsigned r = 0; r < set->nreplicas; ++r) { /* skip comparing inconsistent pairs of replicas */ if (!replica_is_replica_consistent(r, set_hs) || !replica_is_replica_consistent(r + 1, set_hs)) continue; struct pool_replica *rep = REP(set, r); struct pool_replica *rep_n = REP(set, r + 1); struct replica_health_status *rep_hs = REP(set_hs, r); struct replica_health_status *rep_n_hs = REP(set_hs, r + 1); /* check adjacent replica uuids for yet unbroken parts */ unsigned p = replica_find_unbroken_part(r, set_hs); unsigned p_n = replica_find_unbroken_part(r + 1, set_hs); /* if the first part is broken, cannot compare replica uuids */ if (p > 0) { rep_hs->flags |= IS_BROKEN; continue; } /* if the first part is broken, cannot compare replica uuids */ if (p_n > 0) { rep_n_hs->flags |= IS_BROKEN; continue; } /* check if replica uuids are consistent between replicas */ if (uuidcmp(HDR(rep_n, p_n)->prev_repl_uuid, HDR(rep, p)->uuid) || uuidcmp( HDR(rep, p)->next_repl_uuid, HDR(rep_n, p_n)->uuid)) { if (set->nreplicas == 1) { rep_hs->flags |= IS_INCONSISTENT; } else { if (replica_is_replica_broken(r, set_hs)) { rep_hs->flags |= IS_BROKEN; continue; } if (replica_is_replica_broken(r + 1, set_hs)) { rep_n_hs->flags |= IS_BROKEN; continue; } // two unbroken and internally consistent // adjacent replicas have different adjacent // replica uuids - mark one as inconsistent rep_n_hs->flags |= IS_INCONSISTENT; continue; } } } return 0; }
/* * replica_is_replica_healthy -- check if replica is unbroken and consistent */ int replica_is_replica_healthy(unsigned repn, struct poolset_health_status *set_hs) { return !replica_is_replica_broken(repn, set_hs) && replica_is_replica_consistent(repn, set_hs); }
/* * check_poolset_uuids -- (internal) check if poolset_uuid fields are consistent * among all internally consistent replicas */ static int check_poolset_uuids(struct pool_set *set, struct poolset_health_status *set_hs) { LOG(3, "set %p, set_hs %p", set, set_hs); unsigned r_h = replica_find_healthy_replica(set_hs); if (r_h == UNDEF_REPLICA) { ERR("no healthy replica. Cannot synchronize."); return -1; } uuid_t poolset_uuid; memcpy(poolset_uuid, HDR(REP(set, r_h), 0)->poolset_uuid, POOL_HDR_UUID_LEN); for (unsigned r = 0; r < set->nreplicas; ++r) { /* skip inconsistent replicas */ if (!replica_is_replica_consistent(r, set_hs) || r == r_h) continue; if (check_replica_poolset_uuids(set, r, poolset_uuid, set_hs)) { ERR("inconsistent poolset uuids between replicas %u and" " %u; cannot synchronize", r_h, r); return -1; } } return 0; }
/* * find_consistent_replica -- (internal) find a replica number, which is not * marked as inconsistent in the helping structure */ static unsigned find_consistent_replica(struct poolset_health_status *set_hs) { for (unsigned r = 0; r < set_hs->nreplicas; ++r) { if (replica_is_replica_consistent(r, set_hs)) return r; } return UNDEF_REPLICA; }
/* * copy_data_to_broken_parts -- (internal) copy data to all parts created * in place of the broken ones */ static int copy_data_to_broken_parts(struct pool_set *set, unsigned healthy_replica, unsigned flags, struct poolset_health_status *set_hs) { size_t poolsize = replica_get_pool_size(set, healthy_replica); for (unsigned r = 0; r < set_hs->nreplicas; ++r) { /* skip unbroken and consistent replicas */ if (replica_is_replica_healthy(r, set_hs)) continue; struct pool_replica *rep = REP(set, r); struct pool_replica *rep_h = REP(set, healthy_replica); for (unsigned p = 0; p < rep->nparts; ++p) { /* skip unbroken parts from consistent replicas */ if (!replica_is_part_broken(r, p, set_hs) && replica_is_replica_consistent(r, set_hs)) continue; const struct pool_set_part *part = &rep->part[p]; size_t off = replica_get_part_data_offset(set, r, p); size_t len = replica_get_part_data_len(set, r, p); /* do not allow copying too much data */ if (off >= poolsize) continue; if (off + len > poolsize) len = poolsize - off; void *src_addr = ADDR_SUM(rep_h->part[0].addr, off); /* First part of replica is mapped with header */ size_t fpoff = (p == 0) ? POOL_HDR_SIZE : 0; /* copy all data */ if (!is_dry_run(flags)) { memcpy(ADDR_SUM(part->addr, fpoff), src_addr, len); pmem_msync(ADDR_SUM(part->addr, fpoff), len); } } } return 0; }
/* * copy_data_to_broken_parts -- (internal) copy data to all parts created * in place of the broken ones */ static int copy_data_to_broken_parts(struct pool_set *set, unsigned healthy_replica, unsigned flags, struct poolset_health_status *set_hs) { /* get pool size from healthy replica */ size_t poolsize = set->poolsize; for (unsigned r = 0; r < set_hs->nreplicas; ++r) { /* skip unbroken and consistent replicas */ if (replica_is_replica_healthy(r, set_hs)) continue; struct pool_replica *rep = REP(set, r); struct pool_replica *rep_h = REP(set, healthy_replica); for (unsigned p = 0; p < rep->nparts; ++p) { /* skip unbroken parts from consistent replicas */ if (!replica_is_part_broken(r, p, set_hs) && replica_is_replica_consistent(r, set_hs)) continue; const struct pool_set_part *part = &rep->part[p]; size_t off = replica_get_part_data_offset(set, r, p); size_t len = replica_get_part_data_len(set, r, p); if (rep->remote) len = poolsize - off; /* do not allow copying too much data */ if (off >= poolsize) continue; /* * First part of replica is mapped * with header */ size_t fpoff = (p == 0) ? POOL_HDR_SIZE : 0; void *dst_addr = ADDR_SUM(part->addr, fpoff); if (rep->remote) { int ret = Rpmem_persist(rep->remote->rpp, off - POOL_HDR_SIZE, len, 0); if (ret) { LOG(1, "Copying data to remote node " "failed -- '%s' on '%s'", rep->remote->pool_desc, rep->remote->node_addr); return -1; } } else if (rep_h->remote) { int ret = Rpmem_read(rep_h->remote->rpp, dst_addr, off - POOL_HDR_SIZE, len); if (ret) { LOG(1, "Reading data from remote node " "failed -- '%s' on '%s'", rep_h->remote->pool_desc, rep_h->remote->node_addr); return -1; } } else { if (off + len > poolsize) len = poolsize - off; void *src_addr = ADDR_SUM(rep_h->part[0].addr, off); /* copy all data */ memcpy(dst_addr, src_addr, len); pmem_msync(dst_addr, len); } } } return 0; }
/* * check_uuids_between_replicas -- (internal) check if uuids between internally * consistent adjacent replicas are consistent */ static int check_uuids_between_replicas(struct pool_set *set, struct poolset_health_status *set_hs) { LOG(3, "set %p, set_hs %p", set, set_hs); for (unsigned r = 0; r < set->nreplicas; ++r) { /* skip comparing inconsistent pairs of replicas */ if (!replica_is_replica_consistent(r, set_hs) || !replica_is_replica_consistent(r + 1, set_hs)) continue; struct pool_replica *rep = REP(set, r); struct pool_replica *rep_n = REPN(set, r); /* get uuids of the two adjacent replicas */ uuid_t *rep_uuidp = NULL; uuid_t *rep_n_uuidp = NULL; unsigned r_n = REPNidx(set_hs, r); if (get_replica_uuid(rep, r, set_hs, &rep_uuidp)) LOG(2, "cannot get replica uuid, replica %u", r); if (get_replica_uuid(rep_n, r_n, set_hs, &rep_n_uuidp)) LOG(2, "cannot get replica uuid, replica %u", r_n); /* * check if replica uuids are consistent between two adjacent * replicas */ unsigned p = replica_find_unbroken_part(r, set_hs); unsigned p_n = replica_find_unbroken_part(r_n, set_hs); if (p_n != UNDEF_PART && rep_uuidp != NULL && uuidcmp(*rep_uuidp, HDR(rep_n, p_n)->prev_repl_uuid)) { ERR( "inconsistent replica uuids between replicas %u and %u", r, r_n); return -1; } if (p != UNDEF_PART && rep_n_uuidp != NULL && uuidcmp(*rep_n_uuidp, HDR(rep, p)->next_repl_uuid)) { ERR( "inconsistent replica uuids between replicas %u and %u", r, r_n); return -1; } /* * check if replica uuids on borders of a broken replica are * consistent */ unsigned r_nn = REPNidx(set_hs, r_n); if (set->nreplicas > 1 && p != UNDEF_PART && replica_is_replica_broken(r_n, set_hs) && replica_is_replica_consistent(r_nn, set_hs)) { unsigned p_nn = replica_find_unbroken_part(r_nn, set_hs); if (p_nn == UNDEF_PART) { LOG(2, "cannot compare uuids on borders of replica %u", r); continue; } struct pool_replica *rep_nn = REP(set, r_nn); if (uuidcmp(HDR(rep, p)->next_repl_uuid, HDR(rep_nn, p_nn)->prev_repl_uuid)) { ERR( "inconsistent replica uuids on borders of replica %u", r); return -1; } } } return 0; }