/* * recreate_broken_parts -- (internal) create parts in place of the broken ones */ static int recreate_broken_parts(struct pool_set *set, struct poolset_health_status *set_hs, unsigned flags) { for (unsigned r = 0; r < set_hs->nreplicas; ++r) { struct pool_replica *broken_r = set->replica[r]; for (unsigned p = 0; p < set_hs->replica[r]->nparts; ++p) { /* skip unbroken parts */ if (!replica_is_part_broken(r, p, set_hs)) continue; /* remove parts from broken replica */ if (!is_dry_run(flags)) { if (replica_remove_part(set, r, p)) { ERR("Cannot remove part"); errno = EINVAL; return -1; } } /* create removed part and open it */ if (util_part_open(&broken_r->part[p], 0, !is_dry_run(flags))) { ERR("Cannot open/create parts"); errno = EINVAL; return -1; } } } return 0; }
/* * copy_data_to_broken_parts -- (internal) copy data to all parts created * in place of the broken ones */ static int copy_data_to_broken_parts(struct pool_set *set, unsigned healthy_replica, unsigned flags, struct poolset_health_status *set_hs) { size_t poolsize = replica_get_pool_size(set, healthy_replica); for (unsigned r = 0; r < set_hs->nreplicas; ++r) { /* skip unbroken and consistent replicas */ if (replica_is_replica_healthy(r, set_hs)) continue; struct pool_replica *rep = REP(set, r); struct pool_replica *rep_h = REP(set, healthy_replica); for (unsigned p = 0; p < rep->nparts; ++p) { /* skip unbroken parts from consistent replicas */ if (!replica_is_part_broken(r, p, set_hs) && replica_is_replica_consistent(r, set_hs)) continue; const struct pool_set_part *part = &rep->part[p]; size_t off = replica_get_part_data_offset(set, r, p); size_t len = replica_get_part_data_len(set, r, p); /* do not allow copying too much data */ if (off >= poolsize) continue; if (off + len > poolsize) len = poolsize - off; void *src_addr = ADDR_SUM(rep_h->part[0].addr, off); /* First part of replica is mapped with header */ size_t fpoff = (p == 0) ? POOL_HDR_SIZE : 0; /* copy all data */ if (!is_dry_run(flags)) { memcpy(ADDR_SUM(part->addr, fpoff), src_addr, len); pmem_msync(ADDR_SUM(part->addr, fpoff), len); } } } return 0; }
/* * check_and_open_poolset_part_files -- (internal) for each part in a poolset * check if the part files are accessible, and if not, mark it as broken * in a helping structure; then open the part file */ static int check_and_open_poolset_part_files(struct pool_set *set, struct poolset_health_status *set_hs, unsigned flags) { LOG(3, "set %p, set_hs %p, flags %u", set, set_hs, flags); for (unsigned r = 0; r < set->nreplicas; ++r) { struct pool_replica *rep = set->replica[r]; struct replica_health_status *rep_hs = set_hs->replica[r]; if (rep->remote) { if (util_replica_open_remote(set, r, 0)) { LOG(1, "cannot open remote replica no %u", r); return -1; } unsigned nlanes = REMOTE_NLANES; int ret = util_poolset_remote_open(rep, r, rep->repsize, 0, rep->part[0].addr, rep->part[0].size, &nlanes); if (ret) rep_hs->flags |= IS_BROKEN; continue; } for (unsigned p = 0; p < rep->nparts; ++p) { if (access(rep->part[p].path, R_OK|W_OK) != 0) { LOG(1, "part file %s is not accessible", rep->part[p].path); errno = 0; rep_hs->part[p] |= IS_BROKEN; if (is_dry_run(flags)) continue; } if (util_part_open(&rep->part[p], 0, 0)) { LOG(1, "opening part %s failed", rep->part[p].path); errno = 0; rep_hs->part[p] |= IS_BROKEN; } } } return 0; }
/* * sync_replica -- synchronize data across replicas within a poolset */ int sync_replica(struct pool_set *set, unsigned flags) { ASSERTne(set, NULL); /* examine poolset's health */ struct poolset_health_status *set_hs = NULL; if (replica_check_poolset_health(set, &set_hs, flags)) { ERR("poolset health check failed"); return -1; } /* check if poolset is broken; if not, nothing to do */ if (replica_is_poolset_healthy(set_hs)) { LOG(1, "Poolset is healthy"); goto OK_close; } /* find one good replica; it will be the source of data */ unsigned healthy_replica = replica_find_healthy_replica(set_hs); if (healthy_replica == UNDEF_REPLICA) { ERR("no healthy replica found"); goto err; } /* in dry-run mode we can stop here */ if (is_dry_run(flags)) { LOG(1, "Sync in dry-run mode finished successfully"); goto OK_close; } /* recreate broken parts */ if (recreate_broken_parts(set, set_hs, flags)) { ERR("recreating broken parts failed"); goto err; } /* open all part files */ if (replica_open_poolset_part_files(set)) { ERR("opening poolset part files failed"); goto err; } /* map all replicas */ if (util_poolset_open(set)) { ERR("opening poolset failed"); goto err; } /* this is required for opening remote pools */ set->poolsize = set_hs->replica[healthy_replica]->pool_size; /* open all remote replicas */ if (open_remote_replicas(set, set_hs)) { ERR("opening remote replicas failed"); goto err; } /* update uuid fields in the set structure with part headers */ if (fill_struct_uuids(set, healthy_replica, set_hs, flags)) { ERR("gathering uuids failed"); goto err; } /* create headers for broken parts */ if (!is_dry_run(flags)) { if (create_headers_for_broken_parts(set, healthy_replica, set_hs)) { ERR("creating headers for broken parts failed"); goto err; } } if (is_dry_run(flags)) goto OK_close; /* update uuids of replicas and parts */ update_uuids(set, set_hs); /* create all remote replicas */ if (create_remote_replicas(set, set_hs)) { ERR("creating remote replicas failed"); goto err; } /* check and copy data if possible */ if (copy_data_to_broken_parts(set, healthy_replica, flags, set_hs)) { ERR("copying data to broken parts failed"); goto err; } /* grant permissions to all created parts */ if (grant_broken_parts_perm(set, healthy_replica, set_hs)) { ERR("granting permissions to broken parts failed"); goto err; } OK_close: replica_free_poolset_health_status(set_hs); return 0; err: replica_free_poolset_health_status(set_hs); return -1; }
/* * pmempool_transform -- alter poolset structure */ int pmempool_transform(const char *poolset_src, const char *poolset_dst, unsigned flags) { LOG(3, "poolset_src %s, poolset_dst %s, flags %u", poolset_src, poolset_dst, flags); ASSERTne(poolset_src, NULL); ASSERTne(poolset_dst, NULL); /* check if the source poolset has correct signature */ if (util_is_poolset_file(poolset_src) != 1) { ERR("source file is not a poolset file"); goto err; } /* check if the destination poolset has correct signature */ if (util_is_poolset_file(poolset_dst) != 1) { ERR("destination file is not a poolset file"); goto err; } /* check if flags are supported */ if (check_flags_transform(flags)) { ERR("unsupported flags"); errno = EINVAL; goto err; } /* open the source poolset file */ int fd_in = util_file_open(poolset_src, NULL, 0, O_RDONLY); if (fd_in < 0) { ERR("cannot open source poolset file"); goto err; } /* parse the source poolset file */ struct pool_set *set_in = NULL; if (util_poolset_parse(&set_in, poolset_src, fd_in)) { ERR("parsing source poolset failed"); close(fd_in); goto err; } close(fd_in); /* open the destination poolset file */ int fd_out = util_file_open(poolset_dst, NULL, 0, O_RDONLY); if (fd_out < 0) { ERR("cannot open destination poolset file"); goto err; } int del = 0; /* parse the destination poolset file */ struct pool_set *set_out = NULL; if (util_poolset_parse(&set_out, poolset_dst, fd_out)) { ERR("parsing destination poolset failed"); close(fd_out); goto err_free_poolin; } close(fd_out); /* check if the source poolset is of a correct type */ if (pool_set_type(set_in) != POOL_TYPE_OBJ) { ERR("source poolset is of a wrong type"); goto err_free_poolout; } /* check if the source poolset is healthy */ struct poolset_health_status *set_in_hs = NULL; if (replica_check_poolset_health(set_in, &set_in_hs, flags)) { ERR("source poolset health check failed"); goto err_free_poolout; } if (!replica_is_poolset_healthy(set_in_hs)) { ERR("source poolset is broken"); replica_free_poolset_health_status(set_in_hs); goto err_free_poolout; } replica_free_poolset_health_status(set_in_hs); del = !is_dry_run(flags); /* transform poolset */ if (replica_transform(set_in, set_out, flags)) { ERR("transformation failed"); goto err_free_poolout; } util_poolset_close(set_in, 0); util_poolset_close(set_out, 0); return 0; err_free_poolout: util_poolset_close(set_out, del); err_free_poolin: util_poolset_close(set_in, 0); err: if (errno == 0) errno = EINVAL; return -1; }
static inline #endif int pmempool_transformU(const char *poolset_src, const char *poolset_dst, unsigned flags) { LOG(3, "poolset_src %s, poolset_dst %s, flags %u", poolset_src, poolset_dst, flags); ASSERTne(poolset_src, NULL); ASSERTne(poolset_dst, NULL); /* check if the source poolset has correct signature */ if (util_is_poolset_file(poolset_src) != 1) { ERR("source file is not a poolset file"); goto err; } /* check if the destination poolset has correct signature */ if (util_is_poolset_file(poolset_dst) != 1) { ERR("destination file is not a poolset file"); goto err; } /* check if flags are supported */ if (check_flags_transform(flags)) { ERR("unsupported flags"); errno = EINVAL; goto err; } /* open the source poolset file */ int fd_in = util_file_open(poolset_src, NULL, 0, O_RDONLY); if (fd_in < 0) { ERR("cannot open source poolset file"); goto err; } /* parse the source poolset file */ struct pool_set *set_in = NULL; if (util_poolset_parse(&set_in, poolset_src, fd_in)) { ERR("parsing source poolset failed"); os_close(fd_in); goto err; } os_close(fd_in); /* open the destination poolset file */ int fd_out = util_file_open(poolset_dst, NULL, 0, O_RDONLY); if (fd_out < 0) { ERR("cannot open destination poolset file"); goto err; } enum del_parts_mode del = DO_NOT_DELETE_PARTS; /* parse the destination poolset file */ struct pool_set *set_out = NULL; if (util_poolset_parse(&set_out, poolset_dst, fd_out)) { ERR("parsing destination poolset failed"); os_close(fd_out); goto err_free_poolin; } os_close(fd_out); /* check if the source poolset is of a correct type */ if (pool_set_type(set_in) != POOL_TYPE_OBJ) { ERR("source poolset is of a wrong type"); goto err_free_poolout; } /* load remote library if needed */ if (set_in->remote && util_remote_load()) { ERR("remote replication not available"); goto err_free_poolout; } if (set_out->remote && util_remote_load()) { ERR("remote replication not available"); goto err_free_poolout; } del = is_dry_run(flags) ? DO_NOT_DELETE_PARTS : DELETE_CREATED_PARTS; /* transform poolset */ if (replica_transform(set_in, set_out, flags)) { ERR("transformation failed"); goto err_free_poolout; } util_poolset_close(set_in, DO_NOT_DELETE_PARTS); util_poolset_close(set_out, DO_NOT_DELETE_PARTS); return 0; err_free_poolout: util_poolset_close(set_out, del); err_free_poolin: util_poolset_close(set_in, DO_NOT_DELETE_PARTS); err: if (errno == 0) errno = EINVAL; return -1; }
/* * sync_replica -- synchronize data across replicas within a poolset */ int replica_sync(struct pool_set *set, struct poolset_health_status *s_hs, unsigned flags) { LOG(3, "set %p, flags %u", set, flags); int ret = 0; struct poolset_health_status *set_hs = NULL; /* check if we already know the poolset health status */ if (s_hs == NULL) { /* validate poolset before checking its health */ if (validate_args(set)) return -1; /* examine poolset's health */ if (replica_check_poolset_health(set, &set_hs, flags)) { ERR("poolset health check failed"); return -1; } /* check if poolset is broken; if not, nothing to do */ if (replica_is_poolset_healthy(set_hs)) { LOG(1, "Poolset is healthy"); goto out; } } else { set_hs = s_hs; } /* find one good replica; it will be the source of data */ unsigned healthy_replica = replica_find_healthy_replica(set_hs); if (healthy_replica == UNDEF_REPLICA) { ERR("no healthy replica found"); ret = -1; goto out; } /* in dry-run mode we can stop here */ if (is_dry_run(flags)) { LOG(1, "Sync in dry-run mode finished successfully"); goto out; } /* recreate broken parts */ if (recreate_broken_parts(set, set_hs, flags)) { ERR("recreating broken parts failed"); ret = -1; goto out; } /* open all part files */ if (replica_open_poolset_part_files(set)) { ERR("opening poolset part files failed"); ret = -1; goto out; } /* map all replicas */ if (util_poolset_open(set)) { ERR("opening poolset failed"); ret = -1; goto out; } /* this is required for opening remote pools */ set->poolsize = set_hs->replica[healthy_replica]->pool_size; /* open all remote replicas */ if (open_remote_replicas(set, set_hs)) { ERR("opening remote replicas failed"); ret = -1; goto out; } /* update uuid fields in the set structure with part headers */ if (fill_struct_uuids(set, healthy_replica, set_hs, flags)) { ERR("gathering uuids failed"); ret = -1; goto out; } /* create headers for broken parts */ if (!is_dry_run(flags)) { if (create_headers_for_broken_parts(set, healthy_replica, set_hs)) { ERR("creating headers for broken parts failed"); ret = -1; goto out; } } if (is_dry_run(flags)) goto out; /* create all remote replicas */ if (create_remote_replicas(set, set_hs, flags)) { ERR("creating remote replicas failed"); ret = -1; goto out; } /* check and copy data if possible */ if (copy_data_to_broken_parts(set, healthy_replica, flags, set_hs)) { ERR("copying data to broken parts failed"); ret = -1; goto out; } /* update uuids of replicas and parts */ if (update_uuids(set, set_hs)) { ERR("updating uuids failed"); ret = -1; goto out; } /* grant permissions to all created parts */ if (grant_created_parts_perm(set, healthy_replica, set_hs)) { ERR("granting permissions to created parts failed"); ret = -1; } out: if (s_hs == NULL) replica_free_poolset_health_status(set_hs); return ret; }
/* * transform_replica -- transforming one poolset into another */ int replica_transform(struct pool_set *set_in, struct pool_set *set_out, unsigned flags) { LOG(3, "set_in %p, set_out %p", set_in, set_out); int ret = 0; /* validate user arguments */ if (validate_args(set_in, set_out)) return -1; /* check if the source poolset is healthy */ struct poolset_health_status *set_in_hs = NULL; if (replica_check_poolset_health(set_in, &set_in_hs, flags)) { ERR("source poolset health check failed"); return -1; } if (!replica_is_poolset_healthy(set_in_hs)) { ERR("source poolset is broken"); ret = -1; errno = EINVAL; goto free_hs_in; } struct poolset_health_status *set_out_hs = NULL; if (replica_create_poolset_health_status(set_out, &set_out_hs)) { ERR("creating poolset health status failed"); ret = -1; goto free_hs_in; } /* check if the poolsets are transformable */ struct poolset_compare_status *set_in_cs = NULL; struct poolset_compare_status *set_out_cs = NULL; if (compare_poolsets(set_in, set_out, &set_in_cs, &set_out_cs)) { ERR("comparing poolsets failed"); ret = -1; goto free_hs_out; } enum transform_op operation = identify_transform_operation(set_in_cs, set_out_cs, set_in_hs, set_out_hs); if (operation == NOT_TRANSFORMABLE) { LOG(1, "poolsets are not transformable"); ret = -1; errno = EINVAL; goto free_cs; } if (operation == RM_HDRS) { if (!is_dry_run(flags) && remove_hdrs(set_in, set_out, set_in_hs, flags)) { ERR("removing headers failed; falling back to the " "input poolset"); if (replica_sync(set_in, set_in_hs, flags | IS_TRANSFORMED)) { LOG(1, "falling back to the input poolset " "failed"); } else { LOG(1, "falling back to the input poolset " "succeeded"); } ret = -1; } goto free_cs; } if (operation == ADD_HDRS) { if (!is_dry_run(flags) && add_hdrs(set_in, set_out, set_in_hs, flags)) { ERR("adding headers failed; falling back to the " "input poolset"); if (replica_sync(set_in, set_in_hs, flags | IS_TRANSFORMED)) { LOG(1, "falling back to the input poolset " "failed"); } else { LOG(1, "falling back to the input poolset " "succeeded"); } ret = -1; } goto free_cs; } if (operation == ADD_REPLICAS) { /* * check if any of the parts that are to be added already exists */ if (do_added_parts_exist(set_out, set_out_hs)) { ERR("some parts being added already exist"); ret = -1; errno = EINVAL; goto free_cs; } } /* signal that sync is called by transform */ if (replica_sync(set_out, set_out_hs, flags | IS_TRANSFORMED)) { ret = -1; goto free_cs; } if (operation == RM_REPLICAS) { if (!is_dry_run(flags) && delete_replicas(set_in, set_in_cs)) ret = -1; } free_cs: Free(set_in_cs); Free(set_out_cs); free_hs_out: replica_free_poolset_health_status(set_out_hs); free_hs_in: replica_free_poolset_health_status(set_in_hs); return ret; }