/* * util_file_map_whole -- maps the entire file into memory */ void * util_file_map_whole(const char *path) { int fd; int olderrno; void *addr = NULL; if ((fd = open(path, O_RDWR)) < 0) return NULL; ssize_t size = util_file_get_size(path); if (size < 0) goto out; addr = util_map(fd, (size_t)size, 0, 0); if (addr == NULL) goto out; out: olderrno = errno; (void) close(fd); errno = olderrno; return addr; }
/* * util_file_zero_whole -- zeroes the entire file */ int util_file_zero_whole(const char *path) { int fd; int olderrno; int ret = 0; if ((fd = open(path, O_RDWR)) < 0) return -1; ssize_t size = util_file_get_size(path); if (size < 0) { ret = -1; goto out; } void *addr = util_map(fd, (size_t)size, 0, 0); if (addr == NULL) { ret = -1; goto out; } /* zero initialize the entire device */ memset(addr, 0, (size_t)size); util_unmap(addr, (size_t)size); out: olderrno = errno; (void) close(fd); errno = olderrno; return ret; }
/* * util_file_map_whole -- maps the entire file into memory */ void * util_file_map_whole(const char *path) { LOG(3, "path \"%s\"", path); int fd; int olderrno; void *addr = NULL; if ((fd = os_open(path, O_RDWR)) < 0) { ERR("!open \"%s\"", path); return NULL; } ssize_t size = util_file_get_size(path); if (size < 0) { LOG(2, "cannot determine file length \"%s\"", path); goto out; } addr = util_map(fd, (size_t)size, MAP_SHARED, 0, 0, NULL); if (addr == NULL) { LOG(2, "failed to map entire file \"%s\"", path); goto out; } out: olderrno = errno; (void) os_close(fd); errno = olderrno; return addr; }
/* * util_file_zero -- zeroes the specified region of the file */ int util_file_zero(const char *path, os_off_t off, size_t len) { LOG(3, "path \"%s\" off %ju len %zu", path, off, len); int fd; int olderrno; int ret = 0; if ((fd = os_open(path, O_RDWR)) < 0) { ERR("!open \"%s\"", path); return -1; } ssize_t size = util_file_get_size(path); if (size < 0) { LOG(2, "cannot determine file length \"%s\"", path); ret = -1; goto out; } if (off > size) { LOG(2, "offset beyond file length, %ju > %ju", off, size); ret = -1; goto out; } if ((size_t)off + len > (size_t)size) { LOG(2, "requested size of write goes beyond the file length, " "%zu > %zu", (size_t)off + len, size); LOG(4, "adjusting len to %zu", size - off); len = (size_t)(size - off); } void *addr = util_map(fd, (size_t)size, MAP_SHARED, 0, 0, NULL); if (addr == NULL) { LOG(2, "failed to map entire file \"%s\"", path); ret = -1; goto out; } /* zero initialize the specified region */ memset((char *)addr + off, 0, len); util_unmap(addr, (size_t)size); out: olderrno = errno; (void) os_close(fd); errno = olderrno; return ret; }
/* * libvmmalloc_create -- (internal) create a memory pool in a temp file */ static VMEM * libvmmalloc_create(const char *dir, size_t size) { LOG(3, "dir \"%s\" size %zu", dir, size); if (size < VMMALLOC_MIN_POOL) { LOG(1, "size %zu smaller than %zu", size, VMMALLOC_MIN_POOL); errno = EINVAL; return NULL; } /* silently enforce multiple of page size */ size = roundup(size, Pagesize); Fd = util_tmpfile(dir, size); if (Fd == -1) return NULL; void *addr; if ((addr = util_map(Fd, size, 0, 4 << 20)) == NULL) return NULL; /* store opaque info at beginning of mapped area */ struct vmem *vmp = addr; memset(&vmp->hdr, '\0', sizeof (vmp->hdr)); memcpy(vmp->hdr.signature, VMEM_HDR_SIG, POOL_HDR_SIG_LEN); vmp->addr = addr; vmp->size = size; vmp->caller_mapped = 0; /* Prepare pool for jemalloc */ if (je_vmem_pool_create((void *)((uintptr_t)addr + Header_size), size - Header_size, 1) == NULL) { LOG(1, "vmem pool creation failed"); util_unmap(vmp->addr, vmp->size); return NULL; } /* * If possible, turn off all permissions on the pool header page. * * The prototype PMFS doesn't allow this when large pages are in * use. It is not considered an error if this fails. */ util_range_none(addr, sizeof (struct pool_hdr)); LOG(3, "vmp %p", vmp); return vmp; }
/* * pmem_map -- map the entire file for read/write access */ void * pmem_map(int fd) { LOG(3, "fd %d", fd); struct stat stbuf; if (fstat(fd, &stbuf) < 0) { ERR("!fstat"); return NULL; } void *addr; if ((addr = util_map(fd, stbuf.st_size, 0)) == NULL) return NULL; /* util_map() set errno, called LOG */ LOG(3, "returning %p", addr); VALGRIND_REGISTER_PMEM_MAPPING(addr, stbuf.st_size); VALGRIND_REGISTER_PMEM_FILE(fd, addr, stbuf.st_size, 0); return addr; }
/* * util_map_tmpfile -- reserve space in an unlinked file and memory-map it * * size must be multiple of page size. */ void * util_map_tmpfile(const char *dir, size_t size, size_t req_align) { int oerrno; if (((off_t)size) < 0) { ERR("invalid size (%zu) for off_t", size); errno = EFBIG; return NULL; } int fd = util_tmpfile(dir, "/vmem.XXXXXX"); if (fd == -1) { LOG(2, "cannot create temporary file in dir %s", dir); goto err; } if ((errno = posix_fallocate(fd, 0, (off_t)size)) != 0) { ERR("!posix_fallocate"); goto err; } void *base; if ((base = util_map(fd, size, 0, req_align)) == NULL) { LOG(2, "cannot mmap temporary file"); goto err; } (void) close(fd); return base; err: oerrno = errno; if (fd != -1) (void) close(fd); errno = oerrno; return NULL; }
/* * pool_parse_params -- parse pool type, file size and block size */ static int pool_params_parse(const PMEMpoolcheck *ppc, struct pool_params *params, int check) { LOG(3, NULL); int is_btt = ppc->args.pool_type == PMEMPOOL_POOL_TYPE_BTT; params->type = POOL_TYPE_UNKNOWN; params->is_poolset = util_is_poolset_file(ppc->path) == 1; int fd = util_file_open(ppc->path, NULL, 0, O_RDONLY); if (fd < 0) return -1; int ret = 0; os_stat_t stat_buf; ret = os_fstat(fd, &stat_buf); if (ret) goto out_close; ASSERT(stat_buf.st_size >= 0); params->mode = stat_buf.st_mode; struct pool_set *set; void *addr; if (params->is_poolset) { /* * Need to close the poolset because it will be opened with * flock in the following instructions. */ os_close(fd); fd = -1; if (check) { if (pool_set_map(ppc->path, &set, 0)) return -1; } else { ret = util_poolset_create_set(&set, ppc->path, 0, 0); if (ret < 0) { LOG(2, "cannot open pool set -- '%s'", ppc->path); return -1; } if (set->remote) { ERR("poolsets with remote replicas are not " "supported"); return -1; } if (util_pool_open_nocheck(set, 0)) return -1; } params->size = set->poolsize; addr = set->replica[0]->part[0].addr; /* * XXX mprotect for device dax with length not aligned to its * page granularity causes SIGBUS on the next page fault. * The length argument of this call should be changed to * set->poolsize once the kernel issue is solved. */ if (mprotect(addr, set->replica[0]->repsize, PROT_READ) < 0) { ERR("!mprotect"); goto out_unmap; } params->is_dev_dax = set->replica[0]->part[0].is_dev_dax; } else if (is_btt) { params->size = (size_t)stat_buf.st_size; #ifndef _WIN32 if (params->mode & S_IFBLK) if (ioctl(fd, BLKGETSIZE64, ¶ms->size)) { ERR("!ioctl"); goto out_close; } #endif addr = NULL; } else { ssize_t s = util_file_get_size(ppc->path); if (s < 0) { ret = -1; goto out_close; } params->size = (size_t)s; addr = util_map(fd, params->size, MAP_SHARED, 1, 0); if (addr == NULL) { ret = -1; goto out_close; } params->is_dev_dax = util_file_is_device_dax(ppc->path); } /* stop processing for BTT device */ if (is_btt) { params->type = POOL_TYPE_BTT; params->is_part = false; goto out_close; } struct pool_hdr hdr; memcpy(&hdr, addr, sizeof(hdr)); util_convert2h_hdr_nocheck(&hdr); pool_params_from_header(params, &hdr); if (ppc->args.pool_type != PMEMPOOL_POOL_TYPE_DETECT) { enum pool_type declared_type = pool_check_type_to_pool_type(ppc->args.pool_type); if ((params->type & ~declared_type) != 0) { ERR("declared pool type does not match"); ret = 1; goto out_unmap; } } if (params->type == POOL_TYPE_BLK) { struct pmemblk pbp; memcpy(&pbp, addr, sizeof(pbp)); params->blk.bsize = le32toh(pbp.bsize); } else if (params->type == POOL_TYPE_OBJ) { struct pmemobjpool *pop = addr; memcpy(params->obj.layout, pop->layout, PMEMOBJ_MAX_LAYOUT); } out_unmap: if (params->is_poolset) { ASSERTeq(fd, -1); ASSERTne(addr, NULL); util_poolset_close(set, DO_NOT_DELETE_PARTS); } else if (!is_btt) { ASSERTne(fd, -1); ASSERTne(addr, NULL); munmap(addr, params->size); } out_close: if (fd != -1) os_close(fd); return ret; }
/* * pmemlog_map_common -- (internal) map a log memory pool * * This routine does all the work, but takes a rdonly flag so internal * calls can map a read-only pool if required. * * If empty flag is set, the file is assumed to be a new memory pool, and * a new pool header is created. Otherwise, a valid header must exist. */ static PMEMlogpool * pmemlog_map_common(int fd, size_t poolsize, int rdonly, int empty) { LOG(3, "fd %d poolsize %zu rdonly %d empty %d", fd, poolsize, rdonly, empty); void *addr; if ((addr = util_map(fd, poolsize, rdonly)) == NULL) { (void) close(fd); return NULL; /* util_map() set errno, called LOG */ } VALGRIND_REGISTER_PMEM_MAPPING(addr, poolsize); VALGRIND_REGISTER_PMEM_FILE(fd, addr, poolsize, 0); (void) close(fd); /* check if the mapped region is located in persistent memory */ int is_pmem = pmem_is_pmem(addr, poolsize); /* opaque info lives at the beginning of mapped memory pool */ struct pmemlog *plp = addr; if (!empty) { struct pool_hdr hdr; memcpy(&hdr, &plp->hdr, sizeof (hdr)); if (!util_convert_hdr(&hdr)) { errno = EINVAL; goto err; } /* * valid header found */ if (strncmp(hdr.signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN)) { ERR("wrong pool type: \"%s\"", hdr.signature); errno = EINVAL; goto err; } if (hdr.major != LOG_FORMAT_MAJOR) { ERR("log pool version %d (library expects %d)", hdr.major, LOG_FORMAT_MAJOR); errno = EINVAL; goto err; } /* XXX - pools sets / replicas */ if (memcmp(hdr.uuid, hdr.prev_part_uuid, POOL_HDR_UUID_LEN) || memcmp(hdr.uuid, hdr.next_part_uuid, POOL_HDR_UUID_LEN) || memcmp(hdr.uuid, hdr.prev_repl_uuid, POOL_HDR_UUID_LEN) || memcmp(hdr.uuid, hdr.next_repl_uuid, POOL_HDR_UUID_LEN)) { ERR("wrong UUID"); errno = EINVAL; goto err; } uint64_t hdr_start = le64toh(plp->start_offset); uint64_t hdr_end = le64toh(plp->end_offset); uint64_t hdr_write = le64toh(plp->write_offset); if ((hdr_start != roundup(sizeof (*plp), LOG_FORMAT_DATA_ALIGN)) || (hdr_end != poolsize) || (hdr_start > hdr_end)) { ERR("wrong start/end offsets (start: %ju end: %ju), " "pool size %zu", hdr_start, hdr_end, poolsize); errno = EINVAL; goto err; } if ((hdr_write > hdr_end) || (hdr_write < hdr_start)) { ERR("wrong write offset " "(start: %ju end: %ju write: %ju)", hdr_start, hdr_end, hdr_write); errno = EINVAL; goto err; } LOG(3, "start: %ju, end: %ju, write: %ju", hdr_start, hdr_end, hdr_write); int retval = util_feature_check(&hdr, LOG_FORMAT_INCOMPAT, LOG_FORMAT_RO_COMPAT, LOG_FORMAT_COMPAT); if (retval < 0) goto err; else if (retval == 0) rdonly = 1; } else { LOG(3, "creating new log memory pool"); ASSERTeq(rdonly, 0); struct pool_hdr *hdrp = &plp->hdr; /* check if the pool header is all zero */ if (!util_is_zeroed(hdrp, sizeof (*hdrp))) { ERR("Non-empty file detected"); errno = EINVAL; goto err; } /* create required metadata first */ plp->start_offset = htole64(roundup(sizeof (*plp), LOG_FORMAT_DATA_ALIGN)); plp->end_offset = htole64(poolsize); plp->write_offset = plp->start_offset; /* store non-volatile part of pool's descriptor */ pmem_msync(&plp->start_offset, 3 * sizeof (uint64_t)); /* create pool header */ strncpy(hdrp->signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN); hdrp->major = htole32(LOG_FORMAT_MAJOR); hdrp->compat_features = htole32(LOG_FORMAT_COMPAT); hdrp->incompat_features = htole32(LOG_FORMAT_INCOMPAT); hdrp->ro_compat_features = htole32(LOG_FORMAT_RO_COMPAT); uuid_generate(hdrp->uuid); /* XXX - pools sets / replicas */ uuid_generate(hdrp->poolset_uuid); memcpy(hdrp->prev_part_uuid, hdrp->uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_part_uuid, hdrp->uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->prev_repl_uuid, hdrp->uuid, POOL_HDR_UUID_LEN); memcpy(hdrp->next_repl_uuid, hdrp->uuid, POOL_HDR_UUID_LEN); hdrp->crtime = htole64((uint64_t)time(NULL)); if (util_get_arch_flags(&hdrp->arch_flags)) { ERR("Reading architecture flags failed\n"); errno = EINVAL; goto err; } hdrp->arch_flags.alignment_desc = htole64(hdrp->arch_flags.alignment_desc); hdrp->arch_flags.e_machine = htole16(hdrp->arch_flags.e_machine); util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1); /* store pool's header */ pmem_msync(hdrp, sizeof (*hdrp)); } /* remove volatile part of header */ VALGRIND_REMOVE_PMEM_MAPPING(&plp->addr, sizeof (struct pmemlog) - sizeof (struct pool_hdr) - 3 * sizeof (uint64_t)); /* * Use some of the memory pool area for run-time info. This * run-time state is never loaded from the file, it is always * created here, so no need to worry about byte-order. */ plp->addr = addr; plp->size = poolsize; plp->rdonly = rdonly; plp->is_pmem = is_pmem; if ((plp->rwlockp = Malloc(sizeof (*plp->rwlockp))) == NULL) { ERR("!Malloc for a RW lock"); goto err; } if ((errno = pthread_rwlock_init(plp->rwlockp, NULL))) { ERR("!pthread_rwlock_init"); goto err_free; } /* * If possible, turn off all permissions on the pool header page. * * The prototype PMFS doesn't allow this when large pages are in * use. It is not considered an error if this fails. */ util_range_none(addr, sizeof (struct pool_hdr)); /* the rest should be kept read-only (debug version only) */ RANGE_RO(addr + sizeof (struct pool_hdr), poolsize - sizeof (struct pool_hdr)); LOG(3, "plp %p", plp); return plp; err_free: Free((void *)plp->rwlockp); err: LOG(4, "error clean up"); int oerrno = errno; VALGRIND_REMOVE_PMEM_MAPPING(addr, poolsize); util_unmap(addr, poolsize); errno = oerrno; return NULL; }
/* * pmemlog_map_common -- (internal) map a log memory pool * * This routine does all the work, but takes a rdonly flag so internal * calls can map a read-only pool if required. */ static PMEMlog * pmemlog_map_common(int fd, int rdonly) { LOG(3, "fd %d rdonly %d", fd, rdonly); struct stat stbuf; if (fstat(fd, &stbuf) < 0) { LOG(1, "!fstat"); return NULL; } if (stbuf.st_size < PMEMLOG_MIN_POOL) { LOG(1, "size %lld smaller than %zu", (long long)stbuf.st_size, PMEMLOG_MIN_POOL); errno = EINVAL; return NULL; } void *addr; if ((addr = util_map(fd, stbuf.st_size, rdonly)) == NULL) return NULL; /* util_map() set errno, called LOG */ /* check if the mapped region is located in persistent memory */ int is_pmem = pmem_is_pmem(addr, stbuf.st_size); /* opaque info lives at the beginning of mapped memory pool */ struct pmemlog *plp = addr; struct pool_hdr hdr; memcpy(&hdr, &plp->hdr, sizeof (hdr)); if (util_convert_hdr(&hdr)) { /* * valid header found */ if (strncmp(hdr.signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN)) { LOG(1, "wrong pool type: \"%s\"", hdr.signature); errno = EINVAL; goto err; } if (hdr.major != LOG_FORMAT_MAJOR) { LOG(1, "log pool version %d (library expects %d)", hdr.major, LOG_FORMAT_MAJOR); errno = EINVAL; goto err; } uint64_t hdr_start = le64toh(plp->start_offset); uint64_t hdr_end = le64toh(plp->end_offset); uint64_t hdr_write = le64toh(plp->write_offset); if ((hdr_start != roundup(sizeof (*plp), LOG_FORMAT_DATA_ALIGN)) || (hdr_end != stbuf.st_size) || (hdr_start > hdr_end)) { LOG(1, "wrong start/end offsets (start: %ju end: %ju), " "pool size %lld", hdr_start, hdr_end, (long long)stbuf.st_size); errno = EINVAL; goto err; } if ((hdr_write > hdr_end) || (hdr_write < hdr_start)) { LOG(1, "wrong write offset " "(start: %ju end: %ju write: %ju)", hdr_start, hdr_end, hdr_write); errno = EINVAL; goto err; } LOG(3, "start: %ju, end: %ju, write: %ju", hdr_start, hdr_end, hdr_write); int retval = util_feature_check(&hdr, LOG_FORMAT_INCOMPAT, LOG_FORMAT_RO_COMPAT, LOG_FORMAT_COMPAT); if (retval < 0) goto err; else if (retval == 0) rdonly = 1; } else { /* * no valid header was found */ if (rdonly) { LOG(1, "read-only and no header found"); errno = EROFS; goto err; } LOG(3, "creating new log memory pool"); struct pool_hdr *hdrp = &plp->hdr; memset(hdrp, '\0', sizeof (*hdrp)); strncpy(hdrp->signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN); hdrp->major = htole32(LOG_FORMAT_MAJOR); hdrp->compat_features = htole32(LOG_FORMAT_COMPAT); hdrp->incompat_features = htole32(LOG_FORMAT_INCOMPAT); hdrp->ro_compat_features = htole32(LOG_FORMAT_RO_COMPAT); uuid_generate(hdrp->uuid); hdrp->crtime = htole64((uint64_t)time(NULL)); util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1); hdrp->checksum = htole64(hdrp->checksum); /* store pool's header */ libpmem_persist(is_pmem, hdrp, sizeof (*hdrp)); /* create rest of required metadata */ plp->start_offset = htole64(roundup(sizeof (*plp), LOG_FORMAT_DATA_ALIGN)); plp->end_offset = htole64(stbuf.st_size); plp->write_offset = plp->start_offset; /* store non-volatile part of pool's descriptor */ libpmem_persist(is_pmem, &plp->start_offset, 3 * sizeof (uint64_t)); } /* * Use some of the memory pool area for run-time info. This * run-time state is never loaded from the file, it is always * created here, so no need to worry about byte-order. */ plp->addr = addr; plp->size = stbuf.st_size; plp->rdonly = rdonly; plp->is_pmem = is_pmem; if ((plp->rwlockp = Malloc(sizeof (*plp->rwlockp))) == NULL) { LOG(1, "!Malloc for a RW lock"); goto err; } if (pthread_rwlock_init(plp->rwlockp, NULL)) { LOG(1, "!pthread_rwlock_init"); goto err_free; } /* * If possible, turn off all permissions on the pool header page. * * The prototype PMFS doesn't allow this when large pages are in * use. It is not considered an error if this fails. */ util_range_none(addr, sizeof (struct pool_hdr)); /* the rest should be kept read-only (debug version only) */ RANGE_RO(addr + sizeof (struct pool_hdr), stbuf.st_size - sizeof (struct pool_hdr)); LOG(3, "plp %p", plp); return plp; err_free: Free((void *)plp->rwlockp); err: LOG(4, "error clean up"); int oerrno = errno; util_unmap(addr, stbuf.st_size); errno = oerrno; return NULL; }
/* * pmemblk_map_common -- (internal) map a block memory pool * * This routine does all the work, but takes a rdonly flag so internal * calls can map a read-only pool if required. * * Passing in bsize == 0 means a valid pool header must exist (which * will supply the block size). */ static PMEMblk * pmemblk_map_common(int fd, size_t bsize, int rdonly) { LOG(3, "fd %d bsize %zu rdonly %d", fd, bsize, rdonly); /* things free by "goto err" if not NULL */ void *addr = NULL; struct btt *bttp = NULL; pthread_mutex_t *locks = NULL; struct stat stbuf; if (fstat(fd, &stbuf) < 0) { LOG(1, "!fstat"); return NULL; } if (stbuf.st_size < PMEMBLK_MIN_POOL) { LOG(1, "size %zu smaller than %zu", stbuf.st_size, PMEMBLK_MIN_POOL); errno = EINVAL; return NULL; } if ((addr = util_map(fd, stbuf.st_size, rdonly)) == NULL) return NULL; /* util_map() set errno, called LOG */ /* check if the mapped region is located in persistent memory */ int is_pmem = pmem_is_pmem(addr, stbuf.st_size); /* opaque info lives at the beginning of mapped memory pool */ struct pmemblk *pbp = addr; struct pool_hdr hdr; memcpy(&hdr, &pbp->hdr, sizeof (hdr)); if (util_convert_hdr(&hdr)) { /* * valid header found */ if (strncmp(hdr.signature, BLK_HDR_SIG, POOL_HDR_SIG_LEN)) { LOG(1, "wrong pool type: \"%s\"", hdr.signature); errno = EINVAL; goto err; } if (hdr.major != BLK_FORMAT_MAJOR) { LOG(1, "blk pool version %d (library expects %d)", hdr.major, BLK_FORMAT_MAJOR); errno = EINVAL; goto err; } size_t hdr_bsize = le32toh(pbp->bsize); if (bsize && bsize != hdr_bsize) { LOG(1, "wrong bsize (%zu), pool created with bsize %zu", bsize, hdr_bsize); errno = EINVAL; goto err; } bsize = hdr_bsize; LOG(3, "using block size from header: %zu", bsize); int retval = util_feature_check(&hdr, BLK_FORMAT_INCOMPAT, BLK_FORMAT_RO_COMPAT, BLK_FORMAT_COMPAT); if (retval < 0) goto err; else if (retval == 0) rdonly = 1; } else { /* * no valid header was found */ if (rdonly) { LOG(1, "read-only and no header found"); errno = EROFS; goto err; } LOG(3, "creating new blk memory pool"); struct pool_hdr *hdrp = &pbp->hdr; memset(hdrp, '\0', sizeof (*hdrp)); strncpy(hdrp->signature, BLK_HDR_SIG, POOL_HDR_SIG_LEN); hdrp->major = htole32(BLK_FORMAT_MAJOR); hdrp->compat_features = htole32(BLK_FORMAT_COMPAT); hdrp->incompat_features = htole32(BLK_FORMAT_INCOMPAT); hdrp->ro_compat_features = htole32(BLK_FORMAT_RO_COMPAT); uuid_generate(hdrp->uuid); hdrp->crtime = htole64((uint64_t)time(NULL)); util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1); hdrp->checksum = htole64(hdrp->checksum); /* store pool's header */ libpmem_persist(is_pmem, hdrp, sizeof (*hdrp)); /* create rest of required metadata */ pbp->bsize = htole32(bsize); libpmem_persist(is_pmem, &pbp->bsize, sizeof (bsize)); } /* * Use some of the memory pool area for run-time info. This * run-time state is never loaded from the file, it is always * created here, so no need to worry about byte-order. */ pbp->addr = addr; pbp->size = stbuf.st_size; pbp->rdonly = rdonly; pbp->is_pmem = is_pmem; pbp->data = addr + roundup(sizeof (*pbp), BLK_FORMAT_DATA_ALIGN); pbp->datasize = (pbp->addr + pbp->size) - pbp->data; LOG(4, "data area %p data size %zu bsize %zu", pbp->data, pbp->datasize, bsize); int ncpus = sysconf(_SC_NPROCESSORS_ONLN); if (ncpus < 1) ncpus = 1; bttp = btt_init(pbp->datasize, (uint32_t)bsize, pbp->hdr.uuid, ncpus, pbp, &ns_cb); if (bttp == NULL) goto err; /* btt_init set errno, called LOG */ pbp->bttp = bttp; pbp->nlane = btt_nlane(pbp->bttp); pbp->next_lane = 0; if ((locks = Malloc(pbp->nlane * sizeof (*locks))) == NULL) { LOG(1, "!Malloc for lane locks"); goto err; } for (int i = 0; i < pbp->nlane; i++) if (pthread_mutex_init(&locks[i], NULL) < 0) { LOG(1, "!pthread_mutex_init"); goto err; } pbp->locks = locks; #ifdef DEBUG /* initialize debug lock */ if (pthread_mutex_init(&pbp->write_lock, NULL) < 0) { LOG(1, "!pthread_mutex_init"); goto err; } #endif /* * If possible, turn off all permissions on the pool header page. * * The prototype PMFS doesn't allow this when large pages are in * use not it is not considered an error if this fails. */ util_range_none(addr, sizeof (struct pool_hdr)); /* the data area should be kept read-only for debug version */ RANGE_RO(pbp->data, pbp->datasize); LOG(3, "pbp %p", pbp); return pbp; err: LOG(4, "error clean up"); int oerrno = errno; if (locks) Free((void *)locks); if (bttp) btt_fini(bttp); util_unmap(addr, stbuf.st_size); errno = oerrno; return NULL; }