/* * replica_get_pool_size -- find the effective size (mapped) of a pool based * on metadata from given replica */ size_t replica_get_pool_size(struct pool_set *set, unsigned repn) { struct pool_set_part *part = &PART(REP(set, repn), 0); int should_close_part = 0; if (part->fd == -1) { if (util_part_open(part, 0, 0)) return set->poolsize; if (util_map_part(part, NULL, sizeof(PMEMobjpool), 0, MAP_PRIVATE|MAP_NORESERVE)) { util_part_fdclose(part); return set->poolsize; } should_close_part = 1; } PMEMobjpool *pop = (PMEMobjpool *)part->addr; size_t ret = pop->heap_offset + pop->heap_size; if (should_close_part) { util_unmap_part(part); util_part_fdclose(part); } return ret; }
/* * replica_get_pool_size -- find the effective size (mapped) of a pool based * on metadata from given replica */ size_t replica_get_pool_size(struct pool_set *set, unsigned repn) { LOG(3, "set %p, repn %u", set, repn); struct pool_set_part *part = &PART(REP(set, repn), 0); int should_close_part = 0; int should_unmap_part = 0; if (part->fd == -1) { if (util_part_open(part, 0, 0)) return set->poolsize; should_close_part = 1; } if (part->addr == NULL) { if (util_map_part(part, NULL, MMAP_ALIGN_UP(sizeof(PMEMobjpool)), 0, MAP_SHARED, 1)) { util_part_fdclose(part); return set->poolsize; } should_unmap_part = 1; } PMEMobjpool *pop = (PMEMobjpool *)part->addr; size_t ret = pop->heap_offset + pop->heap_size; if (should_unmap_part) util_unmap_part(part); if (should_close_part) util_part_fdclose(part); return ret; }
/* * util_replica_close -- (internal) close a memory pool replica * * This function unmaps all mapped memory regions. */ static int util_replica_close(struct pool_set *set, unsigned repidx) { LOG(3, "set %p repidx %u\n", set, repidx); struct pool_replica *rep = set->replica[repidx]; for (unsigned p = 0; p < rep->nparts; p++) util_unmap_hdr(&rep->part[p]); util_unmap_part(&rep->part[0]); return 0; }
/* * replica_check_store_size -- (internal) store size from pool descriptor for * replica */ static int replica_check_store_size(struct pool_set *set, struct poolset_health_status *set_hs, unsigned repn) { LOG(3, "set %p, set_hs %p, repn %u", set, set_hs, repn); struct pool_replica *rep = set->replica[repn]; struct pmemobjpool pop; if (rep->remote) { memcpy(&pop.hdr, rep->part[0].hdr, sizeof(pop.hdr)); void *descr = (void *)((uintptr_t)&pop + POOL_HDR_SIZE); if (Rpmem_read(rep->remote->rpp, descr, POOL_HDR_SIZE, sizeof(pop) - POOL_HDR_SIZE, 0)) { return -1; } } else { /* round up map size to Mmap align size */ if (util_map_part(&rep->part[0], NULL, MMAP_ALIGN_UP(sizeof(pop)), 0, MAP_SHARED, 1)) { return -1; } memcpy(&pop, rep->part[0].addr, sizeof(pop)); util_unmap_part(&rep->part[0]); } void *dscp = (void *)((uintptr_t)&pop + sizeof(pop.hdr)); if (!util_checksum(dscp, OBJ_DSC_P_SIZE, &pop.checksum, 0, 0)) { set_hs->replica[repn]->flags |= IS_BROKEN; return 0; } set_hs->replica[repn]->pool_size = pop.heap_offset + pop.heap_size; return 0; }
/* * util_poolset_close -- unmap and close all the parts of the pool set * * Optionally, it also unlinks the newly created pool set files. */ void util_poolset_close(struct pool_set *set, int del) { LOG(3, "set %p del %d", set, del); int oerrno = errno; for (unsigned r = 0; r < set->nreplicas; r++) { struct pool_replica *rep = set->replica[r]; /* it's enough to unmap part[0] only */ util_unmap_part(&rep->part[0]); for (unsigned p = 0; p < rep->nparts; p++) { if (rep->part[p].fd != -1) (void) close(rep->part[p].fd); if (del && rep->part[p].created) { LOG(4, "unlink %s", rep->part[p].path); unlink(rep->part[p].path); } } } util_poolset_free(set); errno = oerrno; }
/* * replica_check_store_size -- (internal) store size from pool descriptor for * replica */ static int replica_check_store_size(struct pool_set *set, struct poolset_health_status *set_hs, unsigned r) { struct pool_replica *rep = set->replica[r]; struct pmemobjpool pop; if (rep->remote) { memcpy(&pop.hdr, rep->part[0].hdr, sizeof(pop.hdr)); void *descr = (void *)((uintptr_t)&pop + POOL_HDR_SIZE); if (Rpmem_read(rep->remote->rpp, descr, 0, sizeof(pop) - POOL_HDR_SIZE)) { return -1; } } else { if (util_map_part(&rep->part[0], NULL, sizeof(pop), 0, MAP_PRIVATE|MAP_NORESERVE)) { return -1; } memcpy(&pop, rep->part[0].addr, sizeof(pop)); util_unmap_part(&rep->part[0]); } void *dscp = (void *)((uintptr_t)&pop + sizeof(pop.hdr)); if (!util_checksum(dscp, OBJ_DSC_P_SIZE, &pop.checksum, 0)) { set_hs->replica[r]->flags |= IS_BROKEN; return 0; } set_hs->replica[r]->pool_size = pop.heap_offset + pop.heap_size; return 0; }
/* * util_replica_open -- (internal) open a memory pool replica */ static int util_replica_open(struct pool_set *set, unsigned repidx, int flags, size_t hdrsize) { LOG(3, "set %p repidx %u flags %d hdrsize %zu\n", set, repidx, flags, hdrsize); struct pool_replica *rep = set->replica[repidx]; rep->repsize -= (rep->nparts - 1) * hdrsize; /* determine a hint address for mmap() */ void *addr = util_map_hint(rep->repsize); /* XXX - randomize */ if (addr == NULL) { ERR("cannot find a contiguous region of given size"); return -1; } /* map the first part and reserve space for remaining parts */ if (util_map_part(&rep->part[0], addr, rep->repsize, 0, flags) != 0) { LOG(2, "pool mapping failed - part #0"); return -1; } VALGRIND_REGISTER_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size); VALGRIND_REGISTER_PMEM_FILE(rep->part[0].fd, rep->part[0].addr, rep->part[0].size, 0); /* map all headers - don't care about the address */ for (unsigned p = 0; p < rep->nparts; p++) { if (util_map_hdr(&rep->part[p], hdrsize, 0, flags) != 0) { LOG(2, "header mapping failed - part #%d", p); goto err; } } size_t mapsize = rep->part[0].filesize & ~(Pagesize - 1); addr = (char *)rep->part[0].addr + mapsize; /* * map the remaining parts of the usable pool space * (4K-aligned) */ for (unsigned p = 1; p < rep->nparts; p++) { /* map data part */ if (util_map_part(&rep->part[p], addr, 0, hdrsize, flags | MAP_FIXED) != 0) { LOG(2, "usable space mapping failed - part #%d", p); goto err; } VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd, rep->part[p].addr, rep->part[p].size, hdrsize); mapsize += rep->part[p].size; addr = (char *)addr + rep->part[p].size; } rep->is_pmem = pmem_is_pmem(rep->part[0].addr, rep->part[0].size); ASSERTeq(mapsize, rep->repsize); /* calculate pool size - choose the smallest replica size */ if (rep->repsize < set->poolsize) set->poolsize = rep->repsize; LOG(3, "replica addr %p", rep->part[0].addr); return 0; err: LOG(4, "error clean up"); int oerrno = errno; for (unsigned p = 0; p < rep->nparts; p++) util_unmap_hdr(&rep->part[p]); util_unmap_part(&rep->part[0]); errno = oerrno; return -1; }
/* * util_replica_create -- (internal) create a new memory pool replica */ static int util_replica_create(struct pool_set *set, unsigned repidx, int flags, const char *sig, uint32_t major, uint32_t compat, uint32_t incompat, uint32_t ro_compat, const unsigned char *prev_repl_uuid, const unsigned char *next_repl_uuid, const unsigned char *arch_flags) { LOG(3, "set %p repidx %u flags %d sig %.8s major %u " "compat %#x incompat %#x ro_comapt %#x" "prev_repl_uuid %p next_repl_uuid %p arch_flags %p", set, repidx, flags, sig, major, compat, incompat, ro_compat, prev_repl_uuid, next_repl_uuid, arch_flags); struct pool_replica *rep = set->replica[repidx]; /* determine a hint address for mmap() */ void *addr = util_map_hint(rep->repsize, 0); if (addr == MAP_FAILED) { ERR("cannot find a contiguous region of given size"); return -1; } /* map the first part and reserve space for remaining parts */ /* XXX investigate this idea of reserving space on Windows */ if (util_map_part(&rep->part[0], addr, rep->repsize, 0, flags) != 0) { LOG(2, "pool mapping failed - part #0"); return -1; } VALGRIND_REGISTER_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size); VALGRIND_REGISTER_PMEM_FILE(rep->part[0].fd, rep->part[0].addr, rep->part[0].size, 0); /* map all headers - don't care about the address */ for (unsigned p = 0; p < rep->nparts; p++) { if (util_map_hdr(&rep->part[p], flags) != 0) { LOG(2, "header mapping failed - part #%d", p); goto err; } } /* create headers, set UUID's */ for (unsigned p = 0; p < rep->nparts; p++) { if (util_header_create(set, repidx, p, sig, major, compat, incompat, ro_compat, prev_repl_uuid, next_repl_uuid, arch_flags) != 0) { LOG(2, "header creation failed - part #%d", p); goto err; } } /* unmap all headers */ for (unsigned p = 0; p < rep->nparts; p++) util_unmap_hdr(&rep->part[p]); set->zeroed &= rep->part[0].created; size_t mapsize = rep->part[0].filesize & ~(Pagesize - 1); addr = (char *)rep->part[0].addr + mapsize; /* * map the remaining parts of the usable pool space (4K-aligned) */ for (unsigned p = 1; p < rep->nparts; p++) { /* map data part */ if (util_map_part(&rep->part[p], addr, 0, POOL_HDR_SIZE, flags | MAP_FIXED) != 0) { LOG(2, "usable space mapping failed - part #%d", p); goto err; } VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd, rep->part[p].addr, rep->part[p].size, POOL_HDR_SIZE); mapsize += rep->part[p].size; set->zeroed &= rep->part[p].created; addr = (char *)addr + rep->part[p].size; } rep->is_pmem = pmem_is_pmem(rep->part[0].addr, rep->part[0].size); ASSERTeq(mapsize, rep->repsize); LOG(3, "replica addr %p", rep->part[0].addr); return 0; err: LOG(4, "error clean up"); int oerrno = errno; for (unsigned p = 0; p < rep->nparts; p++) util_unmap_hdr(&rep->part[p]); util_unmap_part(&rep->part[0]); errno = oerrno; return -1; }
/* * util_replica_create -- (internal) create a new memory pool replica */ static int util_replica_create(struct pool_set *set, unsigned repidx, int flags, size_t hdrsize, const char *sig, uint32_t major, uint32_t compat, uint32_t incompat, uint32_t ro_compat) { LOG(3, "set %p repidx %u flags %d hdrsize %zu sig %s major %u " "compat %#x incompat %#x ro_comapt %#x", set, repidx, flags, hdrsize, sig, major, compat, incompat, ro_compat); struct pool_replica *rep = set->replica[repidx]; rep->repsize -= (rep->nparts - 1) * hdrsize; /* determine a hint address for mmap() */ void *addr = util_map_hint(rep->repsize); /* XXX - randomize */ if (addr == NULL) { ERR("cannot find a contiguous region of given size"); return -1; } /* map the first part and reserve space for remaining parts */ if (util_map_part(&rep->part[0], addr, rep->repsize, 0, flags) != 0) { LOG(2, "pool mapping failed - part #0"); return -1; } VALGRIND_REGISTER_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size); VALGRIND_REGISTER_PMEM_FILE(rep->part[0].fd, rep->part[0].addr, rep->part[0].size, 0); /* map all the remaining headers - don't care about the address */ for (unsigned p = 1; p < rep->nparts; p++) { if (util_map_part(&rep->part[p], NULL, hdrsize, 0, flags) != 0) { LOG(2, "header mapping failed - part #%d", p); goto err; } VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd, rep->part[p].addr, rep->part[p].size, 0); } /* create headers, set UUID's */ for (unsigned p = 0; p < rep->nparts; p++) { if (util_header_create(set, repidx, p, sig, major, compat, incompat, ro_compat) != 0) { LOG(2, "header creation failed - part #%d", p); goto err; } } set->zeroed &= rep->part[0].created; size_t mapsize = rep->part[0].filesize & ~(Pagesize - 1); addr = rep->part[0].addr + mapsize; /* * unmap headers; map the remaining parts of the usable pool space * (4K-aligned) */ for (unsigned p = 1; p < rep->nparts; p++) { /* unmap header */ if (util_unmap_part(&rep->part[p]) != 0) { LOG(2, "header unmapping failed - part #%d", p); } /* map data part */ if (util_map_part(&rep->part[p], addr, 0, hdrsize, flags | MAP_FIXED) != 0) { LOG(2, "usable space mapping failed - part #%d", p); goto err; } VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd, rep->part[p].addr, rep->part[p].size, hdrsize); mapsize += rep->part[p].size; set->zeroed &= rep->part[p].created; addr += rep->part[p].size; } rep->is_pmem = pmem_is_pmem(rep->part[0].addr, rep->part[0].size); ASSERTeq(mapsize, rep->repsize); /* calculate pool size - choose the smallest replica size */ if (rep->repsize < set->poolsize) set->poolsize = rep->repsize; LOG(3, "replica addr %p", rep->part[0].addr); return 0; err: LOG(4, "error clean up"); int oerrno = errno; VALGRIND_REMOVE_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size); util_unmap(rep->part[0].addr, rep->part[0].size); errno = oerrno; return -1; }