/* * worker -- the work each thread performs */ static void * worker(void *arg) { int *ret = (int *)arg; *ret = pmem_is_pmem(Addr, Size); return NULL; }
int main(int argc, char *argv[]) { START(argc, argv, "pmem_is_pmem_posix"); if (argc < 4) UT_FATAL("usage: %s op addr len type [op addr len type ...]", argv[0]); /* insert memory regions to the list */ int i; for (i = 1; i < argc; ) { UT_ASSERT(i + 2 < argc); errno = 0; void *addr = (void *)strtoull(argv[i + 1], NULL, 0); UT_ASSERTeq(errno, 0); size_t len = strtoull(argv[i + 2], NULL, 0); UT_ASSERTeq(errno, 0); int ret; switch (argv[i][0]) { case 'a': ret = util_range_register(addr, len, "", str2type(argv[i + 3])); if (ret != 0) UT_OUT("%s", pmem_errormsg()); i += 4; break; case 'r': ret = util_range_unregister(addr, len); UT_ASSERTeq(ret, 0); i += 3; break; case 't': UT_OUT("addr %p len %zu is_pmem %d", addr, len, pmem_is_pmem(addr, len)); i += 3; break; case 'f': do_fault_injection_register(addr, len, str2type(argv[i + 3])); i += 4; break; case 's': do_fault_injection_split(addr, len); i += 3; break; default: FATAL("invalid op '%c'", argv[i][0]); } } DONE(NULL); }
int main(int argc, char *argv[]) { int srcfd; int dstfd; struct stat stbuf; char *pmemaddr; if (argc != 3) { fprintf(stderr, "usage: %s src-file dst-file\n", argv[0]); exit(1); } /* open src-file */ if ((srcfd = open(argv[1], O_RDONLY)) < 0) { perror(argv[1]); exit(1); } /* create a pmem file */ if ((dstfd = open(argv[2], O_CREAT|O_EXCL|O_RDWR, 0666)) < 0) { perror(argv[2]); exit(1); } /* find the size of the src-file */ if (fstat(srcfd, &stbuf) < 0) { perror("fstat"); exit(1); } /* allocate the pmem */ if ((errno = posix_fallocate(dstfd, 0, stbuf.st_size)) != 0) { perror("posix_fallocate"); exit(1); } /* memory map it */ if ((pmemaddr = pmem_map(dstfd)) == NULL) { perror("pmem_map"); exit(1); } close(dstfd); /* determine if range is true pmem, call appropriate copy routine */ if (pmem_is_pmem(pmemaddr, stbuf.st_size)) do_copy_to_pmem(pmemaddr, srcfd, stbuf.st_size); else do_copy_to_non_pmem(pmemaddr, srcfd, stbuf.st_size); close(srcfd); exit(0); }
int main(int argc, char *argv[]) { int fd; struct stat stbuf; char *dest; START(argc, argv, "pmem_valgr_simple"); if (argc != 4) FATAL("usage: %s file offset length", argv[0]); fd = OPEN(argv[1], O_RDWR); int dest_off = atoi(argv[2]); size_t bytes = strtoul(argv[3], NULL, 0); FSTAT(fd, &stbuf); dest = pmem_map(fd); if (dest == NULL) FATAL("!Could not mmap %s\n", argv[1]); /* these will not be made persistent */ *(int *)dest = 4; /* this will be made persistent */ uint64_t *tmp64dst = (void *)((uintptr_t)dest + 4096); *tmp64dst = 50; if (pmem_is_pmem(dest, sizeof (*tmp64dst))) { pmem_persist(tmp64dst, sizeof (*tmp64dst)); } else { pmem_msync(tmp64dst, sizeof (*tmp64dst)); } uint16_t *tmp16dst = (void *)((uintptr_t)dest + 1024); *tmp16dst = 21; /* will appear as flushed in valgrind log */ pmem_flush(tmp16dst, sizeof (*tmp16dst)); /* shows strange behavior of memset in some cases */ memset(dest + dest_off, 0, bytes); pmem_unmap(dest, stbuf.st_size); CLOSE(fd); DONE(NULL); }
PMEMobjpool * pmemobj_open_mock(const char *fname) { int fd = open(fname, O_RDWR); if (fd == -1) { OUT("!%s: open", fname); return NULL; } struct stat stbuf; if (fstat(fd, &stbuf) < 0) { OUT("!fstat"); return NULL; } ASSERT(stbuf.st_size > PMEMOBJ_POOL_HDR_SIZE); void *addr = pmem_map(fd); if (!addr) { OUT("!%s: pmem_map", fname); return NULL; } close(fd); PMEMobjpool *pop = (PMEMobjpool *)addr; VALGRIND_REMOVE_PMEM_MAPPING(addr + sizeof (pop->hdr), 4096); pop->addr = addr; pop->size = stbuf.st_size; pop->is_pmem = pmem_is_pmem(addr, stbuf.st_size); pop->rdonly = 0; if (pop->is_pmem) { pop->persist = pmem_persist; pop->flush = pmem_flush; pop->drain = pmem_drain; } else { pop->persist = (persist_fn)pmem_msync; pop->flush = (persist_fn)pmem_msync; pop->drain = pmem_drain_nop; } return pop; }
int main(int argc, char *argv[]) { START(argc, argv, "pmem_is_pmem"); if (argc < 2 || argc > 3) UT_FATAL("usage: %s file [env]", argv[0]); if (argc == 3) UT_ASSERTeq(setenv("PMEM_IS_PMEM_FORCE", argv[2], 1), 0); int fd = OPEN(argv[1], O_RDWR); ut_util_stat_t stbuf; FSTAT(fd, &stbuf); Size = stbuf.st_size; Addr = MMAP(0, stbuf.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); CLOSE(fd); pthread_t threads[NTHREAD]; int ret[NTHREAD]; /* kick off NTHREAD threads */ for (int i = 0; i < NTHREAD; i++) PTHREAD_CREATE(&threads[i], NULL, worker, &ret[i]); /* wait for all the threads to complete */ for (int i = 0; i < NTHREAD; i++) PTHREAD_JOIN(threads[i], NULL); /* verify that all the threads return the same value */ for (int i = 1; i < NTHREAD; i++) UT_ASSERTeq(ret[0], ret[i]); UT_OUT("%d", ret[0]); UT_ASSERTeq(unsetenv("PMEM_IS_PMEM_FORCE"), 0); UT_OUT("%d", pmem_is_pmem(Addr, Size)); DONE(NULL); }
int main(int argc, char *argv[]) { START(argc, argv, "pmem_is_pmem_linux"); if (argc < 3) UT_FATAL("usage: %s op addr len [op addr len ...]", argv[0]); /* insert memory regions to the list */ int i; for (i = 1; i < argc; i += 3) { UT_ASSERT(i + 2 < argc); errno = 0; void *addr = (void *)strtoull(argv[i + 1], NULL, 0); UT_ASSERTeq(errno, 0); size_t len = strtoull(argv[i + 2], NULL, 0); UT_ASSERTeq(errno, 0); int ret; switch (argv[i][0]) { case 'a': ret = util_range_register(addr, len); UT_ASSERTeq(ret, 0); break; case 'r': ret = util_range_unregister(addr, len); UT_ASSERTeq(ret, 0); break; case 't': UT_OUT("addr %p len %zu is_pmem %d", addr, len, pmem_is_pmem(addr, len)); break; default: FATAL("invalid op"); } } DONE(NULL); }
/* * is_pmem -- checks if given path points to pmem-aware filesystem */ static int is_pmem(const char *path) { int ret; void *addr = util_map_tmpfile(path, SIZE, 0); if (addr == NULL) { fprintf(stderr, "file creation failed\n"); return -1; } if (pmem_is_pmem(addr, SIZE)) ret = 1; else ret = 0; util_unmap(addr, SIZE); return ret; }
int main(int argc, char *argv[]) { START(argc, argv, "pmem_is_pmem_proc_linux"); if (argc < 5) UT_FATAL("usage: %s nfiles file.. nregions " "(addr len)... (addr len)...", argv[0]); Nfiles = atoi(argv[1]); UT_ASSERT(Nfiles < MAX_FILES); for (int i = 0; i < Nfiles; i++) { Sfile[i] = argv[2 + i]; } Nregions = atoi(argv[2 + Nfiles]); UT_ASSERT(Nregions < MAX_REGIONS); for (int i = 0; i < Nregions; i += 2) { char *str_addr = argv[3 + Nfiles + i + 0]; char *str_len = argv[3 + Nfiles + i + 1]; Mincore[i].addr = (uintptr_t)strtoull(str_addr, NULL, 16); Mincore[i].len = (size_t)strtoull(str_len, NULL, 10); } for (int arg = 2 + Nfiles + 1 + 2 * Nregions; arg < argc; arg += 2) { void *addr; size_t len; addr = (void *)strtoull(argv[arg], NULL, 16); len = (size_t)strtoull(argv[arg + 1], NULL, 10); Curfile = 0; UT_OUT("addr %p, len %zu: %d", addr, len, pmem_is_pmem(addr, len)); } DONE(NULL); }
int main(int argc, char *argv[]) { START(argc, argv, "pmem_is_pmem_proc"); if (argc < 4 || argc % 2) FATAL("usage: %s file addr len [addr len]...", argv[0]); Sfile = argv[1]; for (int arg = 2; arg < argc; arg += 2) { void *addr; size_t len; addr = (void *)strtoull(argv[arg], NULL, 16); len = (size_t)strtoull(argv[arg + 1], NULL, 10); OUT("addr %p, len %zu: %d", addr, len, pmem_is_pmem(addr, len)); } DONE(NULL); }
int main(int argc, char *argv[]) { START(argc, argv, "pmem_map_file"); int fd; void *addr; size_t mlen; size_t *mlenp; const char *path; unsigned long long len; int flags; unsigned mode; int is_pmem; int *is_pmemp; int use_mlen; int use_is_pmem; int err_code; if (argc < 8) UT_FATAL("usage: %s path len flags mode use_mlen " "use_is_pmem err_code...", argv[0]); for (int i = 1; i + 6 < argc; i += 7) { path = argv[i]; len = strtoull(argv[i + 1], NULL, 0); flags = parse_flags(argv[i + 2]); mode = STRTOU(argv[i + 3], NULL, 8); use_mlen = atoi(argv[i + 4]); use_is_pmem = atoi(argv[i + 5]); err_code = parse_err_code(argv[i + 6]); mlen = SIZE_MAX; if (use_mlen) mlenp = &mlen; else mlenp = NULL; if (use_is_pmem) is_pmemp = &is_pmem; else is_pmemp = NULL; UT_OUT("%s %lld %s %o %d %d %d", path, len, argv[i + 2], mode, use_mlen, use_is_pmem, err_code); addr = pmem_map_file(path, len, flags, mode, mlenp, is_pmemp); if (err_code != 0) { UT_ASSERTeq(errno, err_code); } if (addr == NULL) { UT_OUT("!pmem_map_file"); continue; } if (use_mlen) { UT_ASSERTne(mlen, SIZE_MAX); UT_OUT("mapped_len %zu", mlen); } else { mlen = len; } if (addr) { /* is_pmem must be true for device DAX */ int is_pmem_check = pmem_is_pmem(addr, mlen); UT_ASSERT(!is_dev_dax || is_pmem_check); /* check is_pmem returned from pmem_map_file */ if (use_is_pmem) UT_ASSERTeq(is_pmem, is_pmem_check); if ((flags & PMEM_FILE_TMPFILE) == 0 && !is_dev_dax) { fd = OPEN(argv[i], O_RDWR); if (!use_mlen) { os_stat_t stbuf; FSTAT(fd, &stbuf); mlen = (size_t)stbuf.st_size; } if (fd != -1) { do_check(fd, addr, mlen); (void) CLOSE(fd); } else { UT_OUT("!cannot open file: %s", argv[i]); } } else { UT_ASSERTeq(pmem_unmap(addr, mlen), 0); } } } DONE(NULL); }
int main(int argc, char *argv[]) { int srcfd; int dstfd; char buf[BUF_LEN]; char *pmemaddr; int is_pmem; int cc; if (argc != 3) { fprintf(stderr, "usage: %s src-file dst-file\n", argv[0]); exit(1); } /* open src-file */ if ((srcfd = open(argv[1], O_RDONLY)) < 0) { perror(argv[1]); exit(1); } /* create a pmem file */ if ((dstfd = open(argv[2], O_CREAT|O_EXCL|O_RDWR, 0666)) < 0) { perror(argv[2]); exit(1); } /* allocate the pmem */ if ((errno = posix_fallocate(dstfd, 0, BUF_LEN)) != 0) { perror("posix_fallocate"); exit(1); } /* memory map it */ if ((pmemaddr = pmem_map(dstfd)) == NULL) { perror("pmem_map"); exit(1); } close(dstfd); /* determine if range is true pmem */ is_pmem = pmem_is_pmem(pmemaddr, BUF_LEN); /* read up to BUF_LEN from srcfd */ if ((cc = read(srcfd, buf, BUF_LEN)) < 0) { perror("read"); exit(1); } /* write it to the pmem */ if (is_pmem) { pmem_memcpy(pmemaddr, buf, cc); } else { memcpy(pmemaddr, buf, cc); pmem_msync(pmemaddr, cc); } close(srcfd); exit(0); }
int main(int argc, char *argv[]) { char *path; size_t data_size, pool_size; int iterations; int is_pmem; int fd; uint8_t *pool; double exec_time_pmem_persist, exec_time_pmem_msync; if (argc < 4) { printf("Usage %s <file_name> <data_size> <iterations>\n", argv[0]); return 0; } path = argv[1]; data_size = atoi(argv[2]); iterations = atoi(argv[3]); pool_size = data_size * iterations; if ((fd = open(path, O_RDWR|O_CREAT|O_EXCL, S_IRWXU)) < 0) { perror("open"); return -1; } if ((errno = posix_fallocate(fd, 0, pool_size)) != 0) { perror("posix_fallocate"); goto err; } if ((pool = pmem_map(fd)) == NULL) { perror("pmem_map"); goto err; } close(fd); /* check if range is true pmem */ is_pmem = pmem_is_pmem(pool, pool_size); if (is_pmem) { /* benchmarking pmem_persist */ exec_time_pmem_persist = benchmark_func(pmem_persist, pool, data_size, iterations); } else { fprintf(stderr, "Notice: pmem_persist is not benchmarked," " because given file (%s) is not in Persistent Memory" " aware file system.\n", path); exec_time_pmem_persist = 0.0; } /* benchmarking pmem_msync */ exec_time_pmem_msync = benchmark_func((persist_fn)pmem_msync, pool, data_size, iterations); printf("%zu;%e;%zu;%e;%zu;%e\n", data_size, exec_time_pmem_persist, data_size, exec_time_pmem_msync, data_size, exec_time_pmem_persist / exec_time_pmem_msync); return 0; err: close(fd); unlink(path); return -1; }
/* * util_replica_open -- (internal) open a memory pool replica */ static int util_replica_open(struct pool_set *set, unsigned repidx, int flags, size_t hdrsize) { LOG(3, "set %p repidx %u flags %d hdrsize %zu\n", set, repidx, flags, hdrsize); struct pool_replica *rep = set->replica[repidx]; rep->repsize -= (rep->nparts - 1) * hdrsize; /* determine a hint address for mmap() */ void *addr = util_map_hint(rep->repsize); /* XXX - randomize */ if (addr == NULL) { ERR("cannot find a contiguous region of given size"); return -1; } /* map the first part and reserve space for remaining parts */ if (util_map_part(&rep->part[0], addr, rep->repsize, 0, flags) != 0) { LOG(2, "pool mapping failed - part #0"); return -1; } VALGRIND_REGISTER_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size); VALGRIND_REGISTER_PMEM_FILE(rep->part[0].fd, rep->part[0].addr, rep->part[0].size, 0); /* map all headers - don't care about the address */ for (unsigned p = 0; p < rep->nparts; p++) { if (util_map_hdr(&rep->part[p], hdrsize, 0, flags) != 0) { LOG(2, "header mapping failed - part #%d", p); goto err; } } size_t mapsize = rep->part[0].filesize & ~(Pagesize - 1); addr = (char *)rep->part[0].addr + mapsize; /* * map the remaining parts of the usable pool space * (4K-aligned) */ for (unsigned p = 1; p < rep->nparts; p++) { /* map data part */ if (util_map_part(&rep->part[p], addr, 0, hdrsize, flags | MAP_FIXED) != 0) { LOG(2, "usable space mapping failed - part #%d", p); goto err; } VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd, rep->part[p].addr, rep->part[p].size, hdrsize); mapsize += rep->part[p].size; addr = (char *)addr + rep->part[p].size; } rep->is_pmem = pmem_is_pmem(rep->part[0].addr, rep->part[0].size); ASSERTeq(mapsize, rep->repsize); /* calculate pool size - choose the smallest replica size */ if (rep->repsize < set->poolsize) set->poolsize = rep->repsize; LOG(3, "replica addr %p", rep->part[0].addr); return 0; err: LOG(4, "error clean up"); int oerrno = errno; for (unsigned p = 0; p < rep->nparts; p++) util_unmap_hdr(&rep->part[p]); util_unmap_part(&rep->part[0]); errno = oerrno; return -1; }
/* * pmemblk_map_common -- (internal) map a block memory pool * * This routine does all the work, but takes a rdonly flag so internal * calls can map a read-only pool if required. * * Passing in bsize == 0 means a valid pool header must exist (which * will supply the block size). */ static PMEMblk * pmemblk_map_common(int fd, size_t bsize, int rdonly) { LOG(3, "fd %d bsize %zu rdonly %d", fd, bsize, rdonly); /* things free by "goto err" if not NULL */ void *addr = NULL; struct btt *bttp = NULL; pthread_mutex_t *locks = NULL; struct stat stbuf; if (fstat(fd, &stbuf) < 0) { LOG(1, "!fstat"); return NULL; } if (stbuf.st_size < PMEMBLK_MIN_POOL) { LOG(1, "size %zu smaller than %zu", stbuf.st_size, PMEMBLK_MIN_POOL); errno = EINVAL; return NULL; } if ((addr = util_map(fd, stbuf.st_size, rdonly)) == NULL) return NULL; /* util_map() set errno, called LOG */ /* check if the mapped region is located in persistent memory */ int is_pmem = pmem_is_pmem(addr, stbuf.st_size); /* opaque info lives at the beginning of mapped memory pool */ struct pmemblk *pbp = addr; struct pool_hdr hdr; memcpy(&hdr, &pbp->hdr, sizeof (hdr)); if (util_convert_hdr(&hdr)) { /* * valid header found */ if (strncmp(hdr.signature, BLK_HDR_SIG, POOL_HDR_SIG_LEN)) { LOG(1, "wrong pool type: \"%s\"", hdr.signature); errno = EINVAL; goto err; } if (hdr.major != BLK_FORMAT_MAJOR) { LOG(1, "blk pool version %d (library expects %d)", hdr.major, BLK_FORMAT_MAJOR); errno = EINVAL; goto err; } size_t hdr_bsize = le32toh(pbp->bsize); if (bsize && bsize != hdr_bsize) { LOG(1, "wrong bsize (%zu), pool created with bsize %zu", bsize, hdr_bsize); errno = EINVAL; goto err; } bsize = hdr_bsize; LOG(3, "using block size from header: %zu", bsize); int retval = util_feature_check(&hdr, BLK_FORMAT_INCOMPAT, BLK_FORMAT_RO_COMPAT, BLK_FORMAT_COMPAT); if (retval < 0) goto err; else if (retval == 0) rdonly = 1; } else { /* * no valid header was found */ if (rdonly) { LOG(1, "read-only and no header found"); errno = EROFS; goto err; } LOG(3, "creating new blk memory pool"); struct pool_hdr *hdrp = &pbp->hdr; memset(hdrp, '\0', sizeof (*hdrp)); strncpy(hdrp->signature, BLK_HDR_SIG, POOL_HDR_SIG_LEN); hdrp->major = htole32(BLK_FORMAT_MAJOR); hdrp->compat_features = htole32(BLK_FORMAT_COMPAT); hdrp->incompat_features = htole32(BLK_FORMAT_INCOMPAT); hdrp->ro_compat_features = htole32(BLK_FORMAT_RO_COMPAT); uuid_generate(hdrp->uuid); hdrp->crtime = htole64((uint64_t)time(NULL)); util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1); hdrp->checksum = htole64(hdrp->checksum); /* store pool's header */ libpmem_persist(is_pmem, hdrp, sizeof (*hdrp)); /* create rest of required metadata */ pbp->bsize = htole32(bsize); libpmem_persist(is_pmem, &pbp->bsize, sizeof (bsize)); } /* * Use some of the memory pool area for run-time info. This * run-time state is never loaded from the file, it is always * created here, so no need to worry about byte-order. */ pbp->addr = addr; pbp->size = stbuf.st_size; pbp->rdonly = rdonly; pbp->is_pmem = is_pmem; pbp->data = addr + roundup(sizeof (*pbp), BLK_FORMAT_DATA_ALIGN); pbp->datasize = (pbp->addr + pbp->size) - pbp->data; LOG(4, "data area %p data size %zu bsize %zu", pbp->data, pbp->datasize, bsize); int ncpus = sysconf(_SC_NPROCESSORS_ONLN); if (ncpus < 1) ncpus = 1; bttp = btt_init(pbp->datasize, (uint32_t)bsize, pbp->hdr.uuid, ncpus, pbp, &ns_cb); if (bttp == NULL) goto err; /* btt_init set errno, called LOG */ pbp->bttp = bttp; pbp->nlane = btt_nlane(pbp->bttp); pbp->next_lane = 0; if ((locks = Malloc(pbp->nlane * sizeof (*locks))) == NULL) { LOG(1, "!Malloc for lane locks"); goto err; } for (int i = 0; i < pbp->nlane; i++) if (pthread_mutex_init(&locks[i], NULL) < 0) { LOG(1, "!pthread_mutex_init"); goto err; } pbp->locks = locks; #ifdef DEBUG /* initialize debug lock */ if (pthread_mutex_init(&pbp->write_lock, NULL) < 0) { LOG(1, "!pthread_mutex_init"); goto err; } #endif /* * If possible, turn off all permissions on the pool header page. * * The prototype PMFS doesn't allow this when large pages are in * use not it is not considered an error if this fails. */ util_range_none(addr, sizeof (struct pool_hdr)); /* the data area should be kept read-only for debug version */ RANGE_RO(pbp->data, pbp->datasize); LOG(3, "pbp %p", pbp); return pbp; err: LOG(4, "error clean up"); int oerrno = errno; if (locks) Free((void *)locks); if (bttp) btt_fini(bttp); util_unmap(addr, stbuf.st_size); errno = oerrno; return NULL; }
StorageManager::StorageManager() : data_file_address(nullptr), is_pmem(false), data_file_len(0), data_file_offset(0) { // Check if we need a data pool if (IsBasedOnWriteAheadLogging(peloton_logging_mode) == true || peloton_logging_mode == LOGGING_TYPE_INVALID) { return; } int data_fd; std::string data_file_name; struct stat data_stat; // Initialize file size if (peloton_data_file_size != 0) data_file_len = peloton_data_file_size * 1024 * 1024; // MB else data_file_len = DATA_FILE_LEN; // Check for relevant file system bool found_file_system = false; switch (peloton_logging_mode) { // Check for NVM FS for data case LOGGING_TYPE_NVM_NVM: case LOGGING_TYPE_NVM_HDD: case LOGGING_TYPE_NVM_SSD: { int status = stat(NVM_DIR, &data_stat); if (status == 0 && S_ISDIR(data_stat.st_mode)) { data_file_name = std::string(NVM_DIR) + std::string(DATA_FILE_NAME); found_file_system = true; } } break; // Check for SSD FS case LOGGING_TYPE_SSD_NVM: case LOGGING_TYPE_SSD_SSD: case LOGGING_TYPE_SSD_HDD: { int status = stat(SSD_DIR, &data_stat); if (status == 0 && S_ISDIR(data_stat.st_mode)) { data_file_name = std::string(SSD_DIR) + std::string(DATA_FILE_NAME); found_file_system = true; } } break; // Check for HDD FS case LOGGING_TYPE_HDD_NVM: case LOGGING_TYPE_HDD_SSD: case LOGGING_TYPE_HDD_HDD: { int status = stat(HDD_DIR, &data_stat); if (status == 0 && S_ISDIR(data_stat.st_mode)) { data_file_name = std::string(HDD_DIR) + std::string(DATA_FILE_NAME); found_file_system = true; } } break; default: break; } // Fallback to tmp directory if needed if (found_file_system == false) { int status = stat(TMP_DIR, &data_stat); if (status == 0 && S_ISDIR(data_stat.st_mode)) { data_file_name = std::string(TMP_DIR) + std::string(DATA_FILE_NAME); } else { throw Exception("Could not find temp directory : " + std::string(TMP_DIR)); } } LOG_INFO("DATA DIR :: %s ", data_file_name.c_str()); // TODO: std::cout << "Data path :: " << data_file_name << "\n"; // Create a data file if ((data_fd = open(data_file_name.c_str(), O_CREAT | O_RDWR, 0666)) < 0) { perror(data_file_name.c_str()); exit(EXIT_FAILURE); } // Allocate the data file if ((errno = posix_fallocate(data_fd, 0, data_file_len)) != 0) { perror("posix_fallocate"); exit(EXIT_FAILURE); } // memory map the data file if ((data_file_address = reinterpret_cast<char *>(pmem_map(data_fd))) == NULL) { perror("pmem_map"); exit(EXIT_FAILURE); } // true only if the entire range [addr, addr+len) consists of persistent // memory is_pmem = pmem_is_pmem(data_file_address, data_file_len); // close the pmem file -- it will remain mapped close(data_fd); }
/* * pmemlog_map_common -- (internal) map a log memory pool * * This routine does all the work, but takes a rdonly flag so internal * calls can map a read-only pool if required. * * If empty flag is set, the file is assumed to be a new memory pool, and * a new pool header is created. Otherwise, a valid header must exist. */ static PMEMlogpool * pmemlog_map_common(int fd, size_t poolsize, int rdonly, int empty) { LOG(3, "fd %d poolsize %zu rdonly %d empty %d", fd, poolsize, rdonly, empty); void *addr; if ((addr = util_map(fd, poolsize, rdonly)) == NULL) { (void) close(fd); return NULL; /* util_map() set errno, called LOG */ } VALGRIND_REGISTER_PMEM_MAPPING(addr, poolsize); VALGRIND_REGISTER_PMEM_FILE(fd, addr, poolsize, 0); (void) close(fd); /* check if the mapped region is located in persistent memory */ int is_pmem = pmem_is_pmem(addr, poolsize); /* opaque info lives at the beginning of mapped memory pool */ struct pmemlog *plp = addr; if (!empty) { struct pool_hdr hdr; memcpy(&hdr, &plp->hdr, sizeof (hdr)); if (!util_convert_hdr(&hdr)) { errno = EINVAL; goto err; } /* * valid header found */ if (strncmp(hdr.signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN)) { ERR("wrong pool type: \"%s\"", hdr.signature); errno = EINVAL; goto err; } if (hdr.major != LOG_FORMAT_MAJOR) { ERR("log pool version %d (library expects %d)", hdr.major, LOG_FORMAT_MAJOR); errno = EINVAL; goto err; } /* XXX - pools sets / replicas */ if (memcmp(hdr.uuid, hdr.prev_part_uuid, POOL_HDR_UUID_LEN) || memcmp(hdr.uuid, hdr.next_part_uuid, POOL_HDR_UUID_LEN) || memcmp(hdr.uuid, hdr.prev_repl_uuid, POOL_HDR_UUID_LEN) || memcmp(hdr.uuid, hdr.next_repl_uuid, POOL_HDR_UUID_LEN)) { ERR("wrong UUID"); errno = EINVAL; goto err; } uint64_t hdr_start = le64toh(plp->start_offset); uint64_t hdr_end = le64toh(plp->end_offset); uint64_t hdr_write = le64toh(plp->write_offset); if ((hdr_start != roundup(sizeof (*plp), LOG_FORMAT_DATA_ALIGN)) || (hdr_end != poolsize) || (hdr_start > hdr_end)) { ERR("wrong start/end offsets (start: %ju end: %ju), " "pool size %zu", hdr_start, hdr_end, poolsize); errno = EINVAL; goto err; } if ((hdr_write > hdr_end) || (hdr_write < hdr_start)) { ERR("wrong write offset " "(start: %ju end: %ju write: %ju)", hdr_start, hdr_end, hdr_write); errno = EINVAL; goto err; } LOG(3, "start: %ju, end: %ju, write: %ju", hdr_start, hdr_end, hdr_write); int retval = util_feature_check(&hdr, LOG_FORMAT_INCOMPAT, LOG_FORMAT_RO_COMPAT, LOG_FORMAT_COMPAT); if (retval < 0) goto err; else if (retval == 0) rdonly = 1; } else { LOG(3, "creating new log memory pool"); ASSERTeq(rdonly, 0); struct pool_hdr *hdrp = &plp->hdr; /* check if the pool header is all zero */ if (!util_is_zeroed(hdrp, sizeof (*hdrp))) { ERR("Non-empty file detected"); errno = EINVAL; goto err; } /* create required metadata first */ plp->start_offset = htole64(roundup(sizeof (*plp), LOG_FORMAT_DATA_ALIGN)); plp->end_offset = htole64(poolsize); plp->write_offset = plp->start_offset; /* store non-volatile part of pool's descriptor */ pmem_msync(&plp->start_offset, 3 * sizeof (uint64_t)); /* create pool header */ strncpy(hdrp->signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN); hdrp->major = htole32(LOG_FORMAT_MAJOR); hdrp->compat_features = htole32(LOG_FORMAT_COMPAT); hdrp->incompat_features = htole32(LOG_FORMAT_INCOMPAT); hdrp->ro_compat_features = htole32(LOG_FORMAT_RO_COMPAT); uuid_generate(hdrp->uuid); /* XXX - pools sets / replicas */ uuid_generate(hdrp->poolset_uuid); memcpy(hdrp->prev_part_uuid, hdrp->uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_part_uuid, hdrp->uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->prev_repl_uuid, hdrp->uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_repl_uuid, hdrp->uuid, POOL_HDR_UUID_LEN); hdrp->crtime = htole64((uint64_t)time(NULL)); if (util_get_arch_flags(&hdrp->arch_flags)) { ERR("Reading architecture flags failed\n"); errno = EINVAL; goto err; } hdrp->arch_flags.alignment_desc = htole64(hdrp->arch_flags.alignment_desc); hdrp->arch_flags.e_machine = htole16(hdrp->arch_flags.e_machine); util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1); /* store pool's header */ pmem_msync(hdrp, sizeof (*hdrp)); } /* remove volatile part of header */ VALGRIND_REMOVE_PMEM_MAPPING(&plp->addr, sizeof (struct pmemlog) - sizeof (struct pool_hdr) - 3 * sizeof (uint64_t)); /* * Use some of the memory pool area for run-time info. This * run-time state is never loaded from the file, it is always * created here, so no need to worry about byte-order. */ plp->addr = addr; plp->size = poolsize; plp->rdonly = rdonly; plp->is_pmem = is_pmem; if ((plp->rwlockp = Malloc(sizeof (*plp->rwlockp))) == NULL) { ERR("!Malloc for a RW lock"); goto err; } if ((errno = pthread_rwlock_init(plp->rwlockp, NULL))) { ERR("!pthread_rwlock_init"); goto err_free; } /* * If possible, turn off all permissions on the pool header page. * * The prototype PMFS doesn't allow this when large pages are in * use. It is not considered an error if this fails. */ util_range_none(addr, sizeof (struct pool_hdr)); /* the rest should be kept read-only (debug version only) */ RANGE_RO(addr + sizeof (struct pool_hdr), poolsize - sizeof (struct pool_hdr)); LOG(3, "plp %p", plp); return plp; err_free: Free((void *)plp->rwlockp); err: LOG(4, "error clean up"); int oerrno = errno; VALGRIND_REMOVE_PMEM_MAPPING(addr, poolsize); util_unmap(addr, poolsize); errno = oerrno; return NULL; }
/* * util_replica_create -- (internal) create a new memory pool replica */ static int util_replica_create(struct pool_set *set, unsigned repidx, int flags, const char *sig, uint32_t major, uint32_t compat, uint32_t incompat, uint32_t ro_compat, const unsigned char *prev_repl_uuid, const unsigned char *next_repl_uuid, const unsigned char *arch_flags) { LOG(3, "set %p repidx %u flags %d sig %.8s major %u " "compat %#x incompat %#x ro_comapt %#x" "prev_repl_uuid %p next_repl_uuid %p arch_flags %p", set, repidx, flags, sig, major, compat, incompat, ro_compat, prev_repl_uuid, next_repl_uuid, arch_flags); struct pool_replica *rep = set->replica[repidx]; /* determine a hint address for mmap() */ void *addr = util_map_hint(rep->repsize, 0); if (addr == MAP_FAILED) { ERR("cannot find a contiguous region of given size"); return -1; } /* map the first part and reserve space for remaining parts */ /* XXX investigate this idea of reserving space on Windows */ if (util_map_part(&rep->part[0], addr, rep->repsize, 0, flags) != 0) { LOG(2, "pool mapping failed - part #0"); return -1; } VALGRIND_REGISTER_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size); VALGRIND_REGISTER_PMEM_FILE(rep->part[0].fd, rep->part[0].addr, rep->part[0].size, 0); /* map all headers - don't care about the address */ for (unsigned p = 0; p < rep->nparts; p++) { if (util_map_hdr(&rep->part[p], flags) != 0) { LOG(2, "header mapping failed - part #%d", p); goto err; } } /* create headers, set UUID's */ for (unsigned p = 0; p < rep->nparts; p++) { if (util_header_create(set, repidx, p, sig, major, compat, incompat, ro_compat, prev_repl_uuid, next_repl_uuid, arch_flags) != 0) { LOG(2, "header creation failed - part #%d", p); goto err; } } /* unmap all headers */ for (unsigned p = 0; p < rep->nparts; p++) util_unmap_hdr(&rep->part[p]); set->zeroed &= rep->part[0].created; size_t mapsize = rep->part[0].filesize & ~(Pagesize - 1); addr = (char *)rep->part[0].addr + mapsize; /* * map the remaining parts of the usable pool space (4K-aligned) */ for (unsigned p = 1; p < rep->nparts; p++) { /* map data part */ if (util_map_part(&rep->part[p], addr, 0, POOL_HDR_SIZE, flags | MAP_FIXED) != 0) { LOG(2, "usable space mapping failed - part #%d", p); goto err; } VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd, rep->part[p].addr, rep->part[p].size, POOL_HDR_SIZE); mapsize += rep->part[p].size; set->zeroed &= rep->part[p].created; addr = (char *)addr + rep->part[p].size; } rep->is_pmem = pmem_is_pmem(rep->part[0].addr, rep->part[0].size); ASSERTeq(mapsize, rep->repsize); LOG(3, "replica addr %p", rep->part[0].addr); return 0; err: LOG(4, "error clean up"); int oerrno = errno; for (unsigned p = 0; p < rep->nparts; p++) util_unmap_hdr(&rep->part[p]); util_unmap_part(&rep->part[0]); errno = oerrno; return -1; }
/* * pool_parse_params -- parse pool type, file size and block size */ static int pool_params_parse(const PMEMpoolcheck *ppc, struct pool_params *params, int check) { LOG(3, NULL); int is_btt = ppc->args.pool_type == PMEMPOOL_POOL_TYPE_BTT; params->type = POOL_TYPE_UNKNOWN; params->is_poolset = util_is_poolset_file(ppc->path) == 1; int fd = util_file_open(ppc->path, NULL, 0, O_RDONLY); if (fd < 0) return -1; int ret = 0; os_stat_t stat_buf; ret = os_fstat(fd, &stat_buf); if (ret) goto out_close; ASSERT(stat_buf.st_size >= 0); params->mode = stat_buf.st_mode; struct pool_set *set; void *addr; if (params->is_poolset) { /* * Need to close the poolset because it will be opened with * flock in the following instructions. */ os_close(fd); fd = -1; if (check) { if (pool_set_map(ppc->path, &set, 0)) return -1; } else { ret = util_poolset_create_set(&set, ppc->path, 0, 0, true); if (ret < 0) { LOG(2, "cannot open pool set -- '%s'", ppc->path); return -1; } if (set->remote) { ERR("poolsets with remote replicas are not " "supported"); return -1; } if (util_pool_open_nocheck(set, POOL_OPEN_IGNORE_BAD_BLOCKS)) return -1; } params->size = set->poolsize; addr = set->replica[0]->part[0].addr; /* * XXX mprotect for device dax with length not aligned to its * page granularity causes SIGBUS on the next page fault. * The length argument of this call should be changed to * set->poolsize once the kernel issue is solved. */ if (mprotect(addr, set->replica[0]->repsize, PROT_READ) < 0) { ERR("!mprotect"); goto out_unmap; } params->is_dev_dax = set->replica[0]->part[0].is_dev_dax; params->is_pmem = set->replica[0]->is_pmem; } else if (is_btt) { params->size = (size_t)stat_buf.st_size; #ifndef _WIN32 if (params->mode & S_IFBLK) if (ioctl(fd, BLKGETSIZE64, ¶ms->size)) { ERR("!ioctl"); goto out_close; } #endif addr = NULL; } else { enum file_type type = util_file_get_type(ppc->path); if (type < 0) { ret = -1; goto out_close; } ssize_t s = util_file_get_size(ppc->path); if (s < 0) { ret = -1; goto out_close; } params->size = (size_t)s; int map_sync; addr = util_map(fd, params->size, MAP_SHARED, 1, 0, &map_sync); if (addr == NULL) { ret = -1; goto out_close; } params->is_dev_dax = type == TYPE_DEVDAX; params->is_pmem = params->is_dev_dax || map_sync || pmem_is_pmem(addr, params->size); } /* stop processing for BTT device */ if (is_btt) { params->type = POOL_TYPE_BTT; params->is_part = false; goto out_close; } struct pool_hdr hdr; memcpy(&hdr, addr, sizeof(hdr)); util_convert2h_hdr_nocheck(&hdr); pool_params_from_header(params, &hdr); if (ppc->args.pool_type != PMEMPOOL_POOL_TYPE_DETECT) { enum pool_type declared_type = pool_check_type_to_pool_type(ppc->args.pool_type); if ((params->type & ~declared_type) != 0) { ERR("declared pool type does not match"); errno = EINVAL; ret = 1; goto out_unmap; } } if (params->type == POOL_TYPE_BLK) { struct pmemblk pbp; memcpy(&pbp, addr, sizeof(pbp)); params->blk.bsize = le32toh(pbp.bsize); } else if (params->type == POOL_TYPE_OBJ) { struct pmemobjpool *pop = addr; memcpy(params->obj.layout, pop->layout, PMEMOBJ_MAX_LAYOUT); } out_unmap: if (params->is_poolset) { ASSERTeq(fd, -1); ASSERTne(addr, NULL); util_poolset_close(set, DO_NOT_DELETE_PARTS); } else if (!is_btt) { ASSERTne(fd, -1); ASSERTne(addr, NULL); munmap(addr, params->size); } out_close: if (fd != -1) os_close(fd); return ret; }
static inline #endif void * pmem_map_fileU(const char *path, size_t len, int flags, mode_t mode, size_t *mapped_lenp, int *is_pmemp) { LOG(3, "path \"%s\" size %zu flags %x mode %o mapped_lenp %p " "is_pmemp %p", path, len, flags, mode, mapped_lenp, is_pmemp); int oerrno; int fd; int open_flags = O_RDWR; int delete_on_err = 0; int file_type = util_file_get_type(path); #ifdef _WIN32 open_flags |= O_BINARY; #endif if (file_type == OTHER_ERROR) return NULL; if (flags & ~(PMEM_FILE_ALL_FLAGS)) { ERR("invalid flag specified %x", flags); errno = EINVAL; return NULL; } if (file_type == TYPE_DEVDAX) { if (flags & ~(PMEM_DAX_VALID_FLAGS)) { ERR("flag unsupported for Device DAX %x", flags); errno = EINVAL; return NULL; } else { /* we are ignoring all of the flags */ flags = 0; ssize_t actual_len = util_file_get_size(path); if (actual_len < 0) { ERR("unable to read Device DAX size"); errno = EINVAL; return NULL; } if (len != 0 && len != (size_t)actual_len) { ERR("Device DAX length must be either 0 or " "the exact size of the device: %zu", actual_len); errno = EINVAL; return NULL; } len = 0; } } if (flags & PMEM_FILE_CREATE) { if ((os_off_t)len < 0) { ERR("invalid file length %zu", len); errno = EINVAL; return NULL; } open_flags |= O_CREAT; } if (flags & PMEM_FILE_EXCL) open_flags |= O_EXCL; if ((len != 0) && !(flags & PMEM_FILE_CREATE)) { ERR("non-zero 'len' not allowed without PMEM_FILE_CREATE"); errno = EINVAL; return NULL; } if ((len == 0) && (flags & PMEM_FILE_CREATE)) { ERR("zero 'len' not allowed with PMEM_FILE_CREATE"); errno = EINVAL; return NULL; } if ((flags & PMEM_FILE_TMPFILE) && !(flags & PMEM_FILE_CREATE)) { ERR("PMEM_FILE_TMPFILE not allowed without PMEM_FILE_CREATE"); errno = EINVAL; return NULL; } if (flags & PMEM_FILE_TMPFILE) { if ((fd = util_tmpfile(path, OS_DIR_SEP_STR"pmem.XXXXXX", open_flags & O_EXCL)) < 0) { LOG(2, "failed to create temporary file at \"%s\"", path); return NULL; } } else { if ((fd = os_open(path, open_flags, mode)) < 0) { ERR("!open %s", path); return NULL; } if ((flags & PMEM_FILE_CREATE) && (flags & PMEM_FILE_EXCL)) delete_on_err = 1; } if (flags & PMEM_FILE_CREATE) { /* * Always set length of file to 'len'. * (May either extend or truncate existing file.) */ if (os_ftruncate(fd, (os_off_t)len) != 0) { ERR("!ftruncate"); goto err; } if ((flags & PMEM_FILE_SPARSE) == 0) { if ((errno = os_posix_fallocate(fd, 0, (os_off_t)len)) != 0) { ERR("!posix_fallocate"); goto err; } } } else { ssize_t actual_size = util_file_get_size(path); if (actual_size < 0) { ERR("stat %s: negative size", path); errno = EINVAL; goto err; } len = (size_t)actual_size; } void *addr = pmem_map_register(fd, len, path, file_type == TYPE_DEVDAX); if (addr == NULL) goto err; if (mapped_lenp != NULL) *mapped_lenp = len; if (is_pmemp != NULL) *is_pmemp = pmem_is_pmem(addr, len); LOG(3, "returning %p", addr); VALGRIND_REGISTER_PMEM_MAPPING(addr, len); VALGRIND_REGISTER_PMEM_FILE(fd, addr, len, 0); (void) os_close(fd); return addr; err: oerrno = errno; (void) os_close(fd); if (delete_on_err) (void) os_unlink(path); errno = oerrno; return NULL; }
/* * pmemlog_map_common -- (internal) map a log memory pool * * This routine does all the work, but takes a rdonly flag so internal * calls can map a read-only pool if required. */ static PMEMlog * pmemlog_map_common(int fd, int rdonly) { LOG(3, "fd %d rdonly %d", fd, rdonly); struct stat stbuf; if (fstat(fd, &stbuf) < 0) { LOG(1, "!fstat"); return NULL; } if (stbuf.st_size < PMEMLOG_MIN_POOL) { LOG(1, "size %lld smaller than %zu", (long long)stbuf.st_size, PMEMLOG_MIN_POOL); errno = EINVAL; return NULL; } void *addr; if ((addr = util_map(fd, stbuf.st_size, rdonly)) == NULL) return NULL; /* util_map() set errno, called LOG */ /* check if the mapped region is located in persistent memory */ int is_pmem = pmem_is_pmem(addr, stbuf.st_size); /* opaque info lives at the beginning of mapped memory pool */ struct pmemlog *plp = addr; struct pool_hdr hdr; memcpy(&hdr, &plp->hdr, sizeof (hdr)); if (util_convert_hdr(&hdr)) { /* * valid header found */ if (strncmp(hdr.signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN)) { LOG(1, "wrong pool type: \"%s\"", hdr.signature); errno = EINVAL; goto err; } if (hdr.major != LOG_FORMAT_MAJOR) { LOG(1, "log pool version %d (library expects %d)", hdr.major, LOG_FORMAT_MAJOR); errno = EINVAL; goto err; } uint64_t hdr_start = le64toh(plp->start_offset); uint64_t hdr_end = le64toh(plp->end_offset); uint64_t hdr_write = le64toh(plp->write_offset); if ((hdr_start != roundup(sizeof (*plp), LOG_FORMAT_DATA_ALIGN)) || (hdr_end != stbuf.st_size) || (hdr_start > hdr_end)) { LOG(1, "wrong start/end offsets (start: %ju end: %ju), " "pool size %lld", hdr_start, hdr_end, (long long)stbuf.st_size); errno = EINVAL; goto err; } if ((hdr_write > hdr_end) || (hdr_write < hdr_start)) { LOG(1, "wrong write offset " "(start: %ju end: %ju write: %ju)", hdr_start, hdr_end, hdr_write); errno = EINVAL; goto err; } LOG(3, "start: %ju, end: %ju, write: %ju", hdr_start, hdr_end, hdr_write); int retval = util_feature_check(&hdr, LOG_FORMAT_INCOMPAT, LOG_FORMAT_RO_COMPAT, LOG_FORMAT_COMPAT); if (retval < 0) goto err; else if (retval == 0) rdonly = 1; } else { /* * no valid header was found */ if (rdonly) { LOG(1, "read-only and no header found"); errno = EROFS; goto err; } LOG(3, "creating new log memory pool"); struct pool_hdr *hdrp = &plp->hdr; memset(hdrp, '\0', sizeof (*hdrp)); strncpy(hdrp->signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN); hdrp->major = htole32(LOG_FORMAT_MAJOR); hdrp->compat_features = htole32(LOG_FORMAT_COMPAT); hdrp->incompat_features = htole32(LOG_FORMAT_INCOMPAT); hdrp->ro_compat_features = htole32(LOG_FORMAT_RO_COMPAT); uuid_generate(hdrp->uuid); hdrp->crtime = htole64((uint64_t)time(NULL)); util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1); hdrp->checksum = htole64(hdrp->checksum); /* store pool's header */ libpmem_persist(is_pmem, hdrp, sizeof (*hdrp)); /* create rest of required metadata */ plp->start_offset = htole64(roundup(sizeof (*plp), LOG_FORMAT_DATA_ALIGN)); plp->end_offset = htole64(stbuf.st_size); plp->write_offset = plp->start_offset; /* store non-volatile part of pool's descriptor */ libpmem_persist(is_pmem, &plp->start_offset, 3 * sizeof (uint64_t)); } /* * Use some of the memory pool area for run-time info. This * run-time state is never loaded from the file, it is always * created here, so no need to worry about byte-order. */ plp->addr = addr; plp->size = stbuf.st_size; plp->rdonly = rdonly; plp->is_pmem = is_pmem; if ((plp->rwlockp = Malloc(sizeof (*plp->rwlockp))) == NULL) { LOG(1, "!Malloc for a RW lock"); goto err; } if (pthread_rwlock_init(plp->rwlockp, NULL)) { LOG(1, "!pthread_rwlock_init"); goto err_free; } /* * If possible, turn off all permissions on the pool header page. * * The prototype PMFS doesn't allow this when large pages are in * use. It is not considered an error if this fails. */ util_range_none(addr, sizeof (struct pool_hdr)); /* the rest should be kept read-only (debug version only) */ RANGE_RO(addr + sizeof (struct pool_hdr), stbuf.st_size - sizeof (struct pool_hdr)); LOG(3, "plp %p", plp); return plp; err_free: Free((void *)plp->rwlockp); err: LOG(4, "error clean up"); int oerrno = errno; util_unmap(addr, stbuf.st_size); errno = oerrno; return NULL; }
/* * util_replica_create -- (internal) create a new memory pool replica */ static int util_replica_create(struct pool_set *set, unsigned repidx, int flags, size_t hdrsize, const char *sig, uint32_t major, uint32_t compat, uint32_t incompat, uint32_t ro_compat) { LOG(3, "set %p repidx %u flags %d hdrsize %zu sig %s major %u " "compat %#x incompat %#x ro_comapt %#x", set, repidx, flags, hdrsize, sig, major, compat, incompat, ro_compat); struct pool_replica *rep = set->replica[repidx]; rep->repsize -= (rep->nparts - 1) * hdrsize; /* determine a hint address for mmap() */ void *addr = util_map_hint(rep->repsize); /* XXX - randomize */ if (addr == NULL) { ERR("cannot find a contiguous region of given size"); return -1; } /* map the first part and reserve space for remaining parts */ if (util_map_part(&rep->part[0], addr, rep->repsize, 0, flags) != 0) { LOG(2, "pool mapping failed - part #0"); return -1; } VALGRIND_REGISTER_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size); VALGRIND_REGISTER_PMEM_FILE(rep->part[0].fd, rep->part[0].addr, rep->part[0].size, 0); /* map all the remaining headers - don't care about the address */ for (unsigned p = 1; p < rep->nparts; p++) { if (util_map_part(&rep->part[p], NULL, hdrsize, 0, flags) != 0) { LOG(2, "header mapping failed - part #%d", p); goto err; } VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd, rep->part[p].addr, rep->part[p].size, 0); } /* create headers, set UUID's */ for (unsigned p = 0; p < rep->nparts; p++) { if (util_header_create(set, repidx, p, sig, major, compat, incompat, ro_compat) != 0) { LOG(2, "header creation failed - part #%d", p); goto err; } } set->zeroed &= rep->part[0].created; size_t mapsize = rep->part[0].filesize & ~(Pagesize - 1); addr = rep->part[0].addr + mapsize; /* * unmap headers; map the remaining parts of the usable pool space * (4K-aligned) */ for (unsigned p = 1; p < rep->nparts; p++) { /* unmap header */ if (util_unmap_part(&rep->part[p]) != 0) { LOG(2, "header unmapping failed - part #%d", p); } /* map data part */ if (util_map_part(&rep->part[p], addr, 0, hdrsize, flags | MAP_FIXED) != 0) { LOG(2, "usable space mapping failed - part #%d", p); goto err; } VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd, rep->part[p].addr, rep->part[p].size, hdrsize); mapsize += rep->part[p].size; set->zeroed &= rep->part[p].created; addr += rep->part[p].size; } rep->is_pmem = pmem_is_pmem(rep->part[0].addr, rep->part[0].size); ASSERTeq(mapsize, rep->repsize); /* calculate pool size - choose the smallest replica size */ if (rep->repsize < set->poolsize) set->poolsize = rep->repsize; LOG(3, "replica addr %p", rep->part[0].addr); return 0; err: LOG(4, "error clean up"); int oerrno = errno; VALGRIND_REMOVE_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size); util_unmap(rep->part[0].addr, rep->part[0].size); errno = oerrno; return -1; }