/* * util_replica_open -- (internal) open a memory pool replica */ static int util_replica_open(struct pool_set *set, unsigned repidx, int flags, size_t hdrsize) { LOG(3, "set %p repidx %u flags %d hdrsize %zu\n", set, repidx, flags, hdrsize); struct pool_replica *rep = set->replica[repidx]; rep->repsize -= (rep->nparts - 1) * hdrsize; /* determine a hint address for mmap() */ void *addr = util_map_hint(rep->repsize, 0); if (addr == NULL) { ERR("cannot find a contiguous region of given size"); return -1; } /* map the first part and reserve space for remaining parts */ if (util_map_part(&rep->part[0], addr, rep->repsize, 0, flags) != 0) { LOG(2, "pool mapping failed - part #0"); return -1; } VALGRIND_REGISTER_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size); VALGRIND_REGISTER_PMEM_FILE(rep->part[0].fd, rep->part[0].addr, rep->part[0].size, 0); /* map all headers - don't care about the address */ for (unsigned p = 0; p < rep->nparts; p++) { if (util_map_hdr(&rep->part[p], hdrsize, flags) != 0) { LOG(2, "header mapping failed - part #%d", p); goto err; } } size_t mapsize = rep->part[0].filesize & ~(Pagesize - 1); addr = (char *)rep->part[0].addr + mapsize; /* * map the remaining parts of the usable pool space * (4K-aligned) */ for (unsigned p = 1; p < rep->nparts; p++) { /* map data part */ if (util_map_part(&rep->part[p], addr, 0, hdrsize, flags | MAP_FIXED) != 0) { LOG(2, "usable space mapping failed - part #%d", p); goto err; } VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd, rep->part[p].addr, rep->part[p].size, hdrsize); mapsize += rep->part[p].size; addr = (char *)addr + rep->part[p].size; } rep->is_pmem = pmem_is_pmem(rep->part[0].addr, rep->part[0].size); ASSERTeq(mapsize, rep->repsize); /* calculate pool size - choose the smallest replica size */ if (rep->repsize < set->poolsize) set->poolsize = rep->repsize; LOG(3, "replica addr %p", rep->part[0].addr); return 0; err: LOG(4, "error clean up"); int oerrno = errno; for (unsigned p = 0; p < rep->nparts; p++) util_unmap_hdr(&rep->part[p]); util_unmap_part(&rep->part[0]); errno = oerrno; return -1; }
/* * util_replica_create -- (internal) create a new memory pool replica */ static int util_replica_create(struct pool_set *set, unsigned repidx, int flags, size_t hdrsize, const char *sig, uint32_t major, uint32_t compat, uint32_t incompat, uint32_t ro_compat) { LOG(3, "set %p repidx %u flags %d hdrsize %zu sig %s major %u " "compat %#x incompat %#x ro_comapt %#x", set, repidx, flags, hdrsize, sig, major, compat, incompat, ro_compat); struct pool_replica *rep = set->replica[repidx]; rep->repsize -= (rep->nparts - 1) * hdrsize; /* determine a hint address for mmap() */ void *addr = util_map_hint(rep->repsize); /* XXX - randomize */ if (addr == NULL) { ERR("cannot find a contiguous region of given size"); return -1; } /* map the first part and reserve space for remaining parts */ if (util_map_part(&rep->part[0], addr, rep->repsize, 0, flags) != 0) { LOG(2, "pool mapping failed - part #0"); return -1; } VALGRIND_REGISTER_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size); VALGRIND_REGISTER_PMEM_FILE(rep->part[0].fd, rep->part[0].addr, rep->part[0].size, 0); /* map all the remaining headers - don't care about the address */ for (unsigned p = 1; p < rep->nparts; p++) { if (util_map_part(&rep->part[p], NULL, hdrsize, 0, flags) != 0) { LOG(2, "header mapping failed - part #%d", p); goto err; } VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd, rep->part[p].addr, rep->part[p].size, 0); } /* create headers, set UUID's */ for (unsigned p = 0; p < rep->nparts; p++) { if (util_header_create(set, repidx, p, sig, major, compat, incompat, ro_compat) != 0) { LOG(2, "header creation failed - part #%d", p); goto err; } } set->zeroed &= rep->part[0].created; size_t mapsize = rep->part[0].filesize & ~(Pagesize - 1); addr = rep->part[0].addr + mapsize; /* * unmap headers; map the remaining parts of the usable pool space * (4K-aligned) */ for (unsigned p = 1; p < rep->nparts; p++) { /* unmap header */ if (util_unmap_part(&rep->part[p]) != 0) { LOG(2, "header unmapping failed - part #%d", p); } /* map data part */ if (util_map_part(&rep->part[p], addr, 0, hdrsize, flags | MAP_FIXED) != 0) { LOG(2, "usable space mapping failed - part #%d", p); goto err; } VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd, rep->part[p].addr, rep->part[p].size, hdrsize); mapsize += rep->part[p].size; set->zeroed &= rep->part[p].created; addr += rep->part[p].size; } rep->is_pmem = pmem_is_pmem(rep->part[0].addr, rep->part[0].size); ASSERTeq(mapsize, rep->repsize); /* calculate pool size - choose the smallest replica size */ if (rep->repsize < set->poolsize) set->poolsize = rep->repsize; LOG(3, "replica addr %p", rep->part[0].addr); return 0; err: LOG(4, "error clean up"); int oerrno = errno; VALGRIND_REMOVE_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size); util_unmap(rep->part[0].addr, rep->part[0].size); errno = oerrno; return -1; }
static inline #endif void * pmem_map_fileU(const char *path, size_t len, int flags, mode_t mode, size_t *mapped_lenp, int *is_pmemp) { LOG(3, "path \"%s\" size %zu flags %x mode %o mapped_lenp %p " "is_pmemp %p", path, len, flags, mode, mapped_lenp, is_pmemp); int oerrno; int fd; int open_flags = O_RDWR; int delete_on_err = 0; int file_type = util_file_get_type(path); #ifdef _WIN32 open_flags |= O_BINARY; #endif if (file_type == OTHER_ERROR) return NULL; if (flags & ~(PMEM_FILE_ALL_FLAGS)) { ERR("invalid flag specified %x", flags); errno = EINVAL; return NULL; } if (file_type == TYPE_DEVDAX) { if (flags & ~(PMEM_DAX_VALID_FLAGS)) { ERR("flag unsupported for Device DAX %x", flags); errno = EINVAL; return NULL; } else { /* we are ignoring all of the flags */ flags = 0; ssize_t actual_len = util_file_get_size(path); if (actual_len < 0) { ERR("unable to read Device DAX size"); errno = EINVAL; return NULL; } if (len != 0 && len != (size_t)actual_len) { ERR("Device DAX length must be either 0 or " "the exact size of the device: %zu", actual_len); errno = EINVAL; return NULL; } len = 0; } } if (flags & PMEM_FILE_CREATE) { if ((os_off_t)len < 0) { ERR("invalid file length %zu", len); errno = EINVAL; return NULL; } open_flags |= O_CREAT; } if (flags & PMEM_FILE_EXCL) open_flags |= O_EXCL; if ((len != 0) && !(flags & PMEM_FILE_CREATE)) { ERR("non-zero 'len' not allowed without PMEM_FILE_CREATE"); errno = EINVAL; return NULL; } if ((len == 0) && (flags & PMEM_FILE_CREATE)) { ERR("zero 'len' not allowed with PMEM_FILE_CREATE"); errno = EINVAL; return NULL; } if ((flags & PMEM_FILE_TMPFILE) && !(flags & PMEM_FILE_CREATE)) { ERR("PMEM_FILE_TMPFILE not allowed without PMEM_FILE_CREATE"); errno = EINVAL; return NULL; } if (flags & PMEM_FILE_TMPFILE) { if ((fd = util_tmpfile(path, OS_DIR_SEP_STR"pmem.XXXXXX", open_flags & O_EXCL)) < 0) { LOG(2, "failed to create temporary file at \"%s\"", path); return NULL; } } else { if ((fd = os_open(path, open_flags, mode)) < 0) { ERR("!open %s", path); return NULL; } if ((flags & PMEM_FILE_CREATE) && (flags & PMEM_FILE_EXCL)) delete_on_err = 1; } if (flags & PMEM_FILE_CREATE) { /* * Always set length of file to 'len'. * (May either extend or truncate existing file.) */ if (os_ftruncate(fd, (os_off_t)len) != 0) { ERR("!ftruncate"); goto err; } if ((flags & PMEM_FILE_SPARSE) == 0) { if ((errno = os_posix_fallocate(fd, 0, (os_off_t)len)) != 0) { ERR("!posix_fallocate"); goto err; } } } else { ssize_t actual_size = util_file_get_size(path); if (actual_size < 0) { ERR("stat %s: negative size", path); errno = EINVAL; goto err; } len = (size_t)actual_size; } void *addr = pmem_map_register(fd, len, path, file_type == TYPE_DEVDAX); if (addr == NULL) goto err; if (mapped_lenp != NULL) *mapped_lenp = len; if (is_pmemp != NULL) *is_pmemp = pmem_is_pmem(addr, len); LOG(3, "returning %p", addr); VALGRIND_REGISTER_PMEM_MAPPING(addr, len); VALGRIND_REGISTER_PMEM_FILE(fd, addr, len, 0); (void) os_close(fd); return addr; err: oerrno = errno; (void) os_close(fd); if (delete_on_err) (void) os_unlink(path); errno = oerrno; return NULL; }