/* * pmemblk_runtime_init -- (internal) initialize block memory pool runtime data */ static int pmemblk_runtime_init(PMEMblkpool *pbp, size_t bsize, int rdonly, int is_pmem) { LOG(3, "pbp %p bsize %zu rdonly %d is_pmem %d", pbp, bsize, rdonly, is_pmem); /* remove volatile part of header */ VALGRIND_REMOVE_PMEM_MAPPING(&pbp->addr, sizeof (struct pmemblk) - sizeof (struct pool_hdr) - sizeof (pbp->bsize) - sizeof (pbp->is_zeroed)); /* * Use some of the memory pool area for run-time info. This * run-time state is never loaded from the file, it is always * created here, so no need to worry about byte-order. */ pbp->rdonly = rdonly; pbp->is_pmem = is_pmem; pbp->data = (char *)pbp->addr + roundup(sizeof (*pbp), BLK_FORMAT_DATA_ALIGN); ASSERT(((char *)pbp->addr + pbp->size) >= (char *)pbp->data); pbp->datasize = (size_t) (((char *)pbp->addr + pbp->size) - (char *)pbp->data); LOG(4, "data area %p data size %zu bsize %zu", pbp->data, pbp->datasize, bsize); long ncpus = sysconf(_SC_NPROCESSORS_ONLN); if (ncpus < 1) ncpus = 1; ns_cb.ns_is_zeroed = pbp->is_zeroed; /* things free by "goto err" if not NULL */ struct btt *bttp = NULL; pthread_mutex_t *locks = NULL; bttp = btt_init(pbp->datasize, (uint32_t)bsize, pbp->hdr.poolset_uuid, (unsigned)ncpus * 2, pbp, &ns_cb); if (bttp == NULL) goto err; /* btt_init set errno, called LOG */ pbp->bttp = bttp; pbp->nlane = btt_nlane(pbp->bttp); pbp->next_lane = 0; if ((locks = Malloc(pbp->nlane * sizeof (*locks))) == NULL) { ERR("!Malloc for lane locks"); goto err; } for (unsigned i = 0; i < pbp->nlane; i++) util_mutex_init(&locks[i], NULL); pbp->locks = locks; #ifdef DEBUG /* initialize debug lock */ util_mutex_init(&pbp->write_lock, NULL); #endif /* * If possible, turn off all permissions on the pool header page. * * The prototype PMFS doesn't allow this when large pages are in * use. It is not considered an error if this fails. */ util_range_none(pbp->addr, sizeof (struct pool_hdr)); /* the data area should be kept read-only for debug version */ RANGE_RO(pbp->data, pbp->datasize); return 0; err: LOG(4, "error clean up"); int oerrno = errno; if (locks) Free((void *)locks); if (bttp) btt_fini(bttp); errno = oerrno; return -1; }
/* * pmemlog_append -- add data to a log memory pool */ int pmemlog_append(PMEMlogpool *plp, const void *buf, size_t count) { int ret = 0; LOG(3, "plp %p buf %p count %zu", plp, buf, count); if (plp->rdonly) { ERR("can't append to read-only log"); errno = EROFS; return -1; } if ((errno = pthread_rwlock_wrlock(plp->rwlockp))) { ERR("!pthread_rwlock_wrlock"); return -1; } /* get the current values */ uint64_t end_offset = le64toh(plp->end_offset); uint64_t write_offset = le64toh(plp->write_offset); if (write_offset >= end_offset) { /* no space left */ errno = ENOSPC; ERR("!pmemlog_append"); ret = -1; } else { /* make sure we don't write past the available space */ if (count > (end_offset - write_offset)) { errno = ENOSPC; ERR("!pmemlog_append"); ret = -1; } else { char *data = plp->addr; /* * unprotect the log space range, * where the new data will be stored * (debug version only) */ RANGE_RW(&data[write_offset], count); if (plp->is_pmem) pmem_memcpy_nodrain(&data[write_offset], buf, count); else memcpy(&data[write_offset], buf, count); /* protect the log space range (debug version only) */ RANGE_RO(&data[write_offset], count); write_offset += count; } } /* persist the data and the metadata only if there was no error */ if (ret == 0) pmemlog_persist(plp, write_offset); int oerrno = errno; if ((errno = pthread_rwlock_unlock(plp->rwlockp))) ERR("!pthread_rwlock_unlock"); errno = oerrno; return ret; }
/* * pmemlog_map_common -- (internal) map a log memory pool * * This routine does all the work, but takes a rdonly flag so internal * calls can map a read-only pool if required. */ static PMEMlog * pmemlog_map_common(int fd, int rdonly) { LOG(3, "fd %d rdonly %d", fd, rdonly); struct stat stbuf; if (fstat(fd, &stbuf) < 0) { LOG(1, "!fstat"); return NULL; } if (stbuf.st_size < PMEMLOG_MIN_POOL) { LOG(1, "size %lld smaller than %zu", (long long)stbuf.st_size, PMEMLOG_MIN_POOL); errno = EINVAL; return NULL; } void *addr; if ((addr = util_map(fd, stbuf.st_size, rdonly)) == NULL) return NULL; /* util_map() set errno, called LOG */ /* check if the mapped region is located in persistent memory */ int is_pmem = pmem_is_pmem(addr, stbuf.st_size); /* opaque info lives at the beginning of mapped memory pool */ struct pmemlog *plp = addr; struct pool_hdr hdr; memcpy(&hdr, &plp->hdr, sizeof (hdr)); if (util_convert_hdr(&hdr)) { /* * valid header found */ if (strncmp(hdr.signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN)) { LOG(1, "wrong pool type: \"%s\"", hdr.signature); errno = EINVAL; goto err; } if (hdr.major != LOG_FORMAT_MAJOR) { LOG(1, "log pool version %d (library expects %d)", hdr.major, LOG_FORMAT_MAJOR); errno = EINVAL; goto err; } uint64_t hdr_start = le64toh(plp->start_offset); uint64_t hdr_end = le64toh(plp->end_offset); uint64_t hdr_write = le64toh(plp->write_offset); if ((hdr_start != roundup(sizeof (*plp), LOG_FORMAT_DATA_ALIGN)) || (hdr_end != stbuf.st_size) || (hdr_start > hdr_end)) { LOG(1, "wrong start/end offsets (start: %ju end: %ju), " "pool size %lld", hdr_start, hdr_end, (long long)stbuf.st_size); errno = EINVAL; goto err; } if ((hdr_write > hdr_end) || (hdr_write < hdr_start)) { LOG(1, "wrong write offset " "(start: %ju end: %ju write: %ju)", hdr_start, hdr_end, hdr_write); errno = EINVAL; goto err; } LOG(3, "start: %ju, end: %ju, write: %ju", hdr_start, hdr_end, hdr_write); int retval = util_feature_check(&hdr, LOG_FORMAT_INCOMPAT, LOG_FORMAT_RO_COMPAT, LOG_FORMAT_COMPAT); if (retval < 0) goto err; else if (retval == 0) rdonly = 1; } else { /* * no valid header was found */ if (rdonly) { LOG(1, "read-only and no header found"); errno = EROFS; goto err; } LOG(3, "creating new log memory pool"); struct pool_hdr *hdrp = &plp->hdr; memset(hdrp, '\0', sizeof (*hdrp)); strncpy(hdrp->signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN); hdrp->major = htole32(LOG_FORMAT_MAJOR); hdrp->compat_features = htole32(LOG_FORMAT_COMPAT); hdrp->incompat_features = htole32(LOG_FORMAT_INCOMPAT); hdrp->ro_compat_features = htole32(LOG_FORMAT_RO_COMPAT); uuid_generate(hdrp->uuid); hdrp->crtime = htole64((uint64_t)time(NULL)); util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1); hdrp->checksum = htole64(hdrp->checksum); /* store pool's header */ libpmem_persist(is_pmem, hdrp, sizeof (*hdrp)); /* create rest of required metadata */ plp->start_offset = htole64(roundup(sizeof (*plp), LOG_FORMAT_DATA_ALIGN)); plp->end_offset = htole64(stbuf.st_size); plp->write_offset = plp->start_offset; /* store non-volatile part of pool's descriptor */ libpmem_persist(is_pmem, &plp->start_offset, 3 * sizeof (uint64_t)); } /* * Use some of the memory pool area for run-time info. This * run-time state is never loaded from the file, it is always * created here, so no need to worry about byte-order. */ plp->addr = addr; plp->size = stbuf.st_size; plp->rdonly = rdonly; plp->is_pmem = is_pmem; if ((plp->rwlockp = Malloc(sizeof (*plp->rwlockp))) == NULL) { LOG(1, "!Malloc for a RW lock"); goto err; } if (pthread_rwlock_init(plp->rwlockp, NULL)) { LOG(1, "!pthread_rwlock_init"); goto err_free; } /* * If possible, turn off all permissions on the pool header page. * * The prototype PMFS doesn't allow this when large pages are in * use. It is not considered an error if this fails. */ util_range_none(addr, sizeof (struct pool_hdr)); /* the rest should be kept read-only (debug version only) */ RANGE_RO(addr + sizeof (struct pool_hdr), stbuf.st_size - sizeof (struct pool_hdr)); LOG(3, "plp %p", plp); return plp; err_free: Free((void *)plp->rwlockp); err: LOG(4, "error clean up"); int oerrno = errno; util_unmap(addr, stbuf.st_size); errno = oerrno; return NULL; }
/* * pmemlog_appendv -- add gathered data to a log memory pool */ int pmemlog_appendv(PMEMlog *plp, const struct iovec *iov, int iovcnt) { LOG(3, "plp %p iovec %p iovcnt %d", plp, iov, iovcnt); int ret = 0; // success int i; if (plp->rdonly) { LOG(1, "can't append to read-only log"); errno = EROFS; return -1; } if (pthread_rwlock_wrlock(plp->rwlockp)) { LOG(1, "!pthread_rwlock_wrlock"); return -1; } /* get the current values */ uint64_t end_offset = le64toh(plp->end_offset); uint64_t write_offset = le64toh(plp->write_offset); if (write_offset >= end_offset) { /* no space left */ errno = ENOSPC; ret = -1; } else { char *data = plp->addr; uint64_t count = 0; char *buf; /* calculate required space */ for (i = 0; i < iovcnt; ++i) count += iov[i].iov_len; /* check if there is enough free space */ if (count > (end_offset - write_offset)) { errno = ENOSPC; ret = -1; } else { /* append the data */ for (i = 0; i < iovcnt; ++i) { buf = iov[i].iov_base; count = iov[i].iov_len; /* * unprotect the log space range, * where the new data will be stored * (debug version only) */ RANGE_RW(&data[write_offset], count); memcpy(&data[write_offset], buf, count); /* * protect the log space range * (debug version only) */ RANGE_RO(&data[write_offset], count); write_offset += count; } } } /* persist the data and the metadata only if there was no error */ if (ret == 0) pmemlog_persist(plp, write_offset); int oerrno = errno; if (pthread_rwlock_unlock(plp->rwlockp)) LOG(1, "!pthread_rwlock_unlock"); errno = oerrno; return ret; }
/* * write_layout -- (internal) write out the initial btt metadata layout * * Called with write == 1 only once in the life time of a btt namespace, when * the first write happens. The caller of this routine is responsible for * locking out multiple threads. This routine doesn't read anything -- by the * time it is called, it is known there's no layout in the namespace and a new * layout should be written. * * Calling with write == 0 tells this routine to do the calculations for * bttp->narena and bttp->nlba, but don't write out any metadata. * * If successful, sets bttp->layout to 1 and returns 0. Otherwise -1 * is returned and errno is set, and bttp->layout remains 0 so that * later attempts to write will try again to create the layout. */ static int write_layout(struct btt *bttp, int lane, int write) { LOG(3, "bttp %p lane %d write %d", bttp, lane, write); ASSERT(bttp->rawsize >= BTT_MIN_SIZE); ASSERT(bttp->nfree); /* * The number of arenas is the number of full arena of * size BTT_MAX_ARENA that fit into rawsize and then, if * the remainder is at least BTT_MIN_SIZE in size, then * that adds one more arena. */ bttp->narena = bttp->rawsize / BTT_MAX_ARENA; if (bttp->rawsize % BTT_MAX_ARENA >= BTT_MIN_SIZE) bttp->narena++; LOG(4, "narena %u", bttp->narena); int flog_size = bttp->nfree * 2 * sizeof (struct btt_flog); flog_size = roundup(flog_size, BTT_ALIGNMENT); uint32_t internal_lbasize = bttp->lbasize; if (internal_lbasize < BTT_MIN_LBA) internal_lbasize = BTT_MIN_LBA; internal_lbasize = roundup(internal_lbasize, BTT_INTERNAL_LBA_ALIGNMENT); LOG(4, "adjusted internal_lbasize %u", internal_lbasize); uint64_t total_nlba = 0; uint64_t rawsize = bttp->rawsize; int arena_num = 0; off_t arena_off = 0; /* * for each arena... */ while (rawsize >= BTT_MIN_SIZE) { LOG(4, "layout arena %u", arena_num); uint64_t arena_rawsize = rawsize; if (arena_rawsize > BTT_MAX_ARENA) { arena_rawsize = BTT_MAX_ARENA; } rawsize -= arena_rawsize; arena_num++; uint64_t arena_datasize = arena_rawsize; arena_datasize -= 2 * sizeof (struct btt_info); arena_datasize -= flog_size; /* allow for map alignment padding */ uint64_t internal_nlba = (arena_datasize - BTT_ALIGNMENT) / (internal_lbasize + BTT_MAP_ENTRY_SIZE); uint64_t external_nlba = internal_nlba - bttp->nfree; LOG(4, "internal_nlba %zu external_nlba %zu", internal_nlba, external_nlba); total_nlba += external_nlba; /* * The rest of the loop body calculates metadata structures * and lays it out for this arena. So only continue if * the write flag is set. */ if (!write) continue; uint64_t mapsize = roundup(external_nlba * BTT_MAP_ENTRY_SIZE, BTT_ALIGNMENT); arena_datasize -= mapsize; ASSERT(arena_datasize / internal_lbasize >= internal_nlba); /* * Calculate offsets for the BTT info block. These are * all relative to the beginning of the arena. */ uint64_t nextoff; if (rawsize) nextoff = arena_rawsize; else nextoff = 0; uint64_t infooff = arena_rawsize - sizeof (struct btt_info); uint64_t flogoff = infooff - flog_size; uint64_t mapoff = flogoff - mapsize; uint64_t dataoff = sizeof (struct btt_info); LOG(4, "nextoff 0x%016lx", nextoff); LOG(4, "dataoff 0x%016lx", dataoff); LOG(4, "mapoff 0x%016lx", mapoff); LOG(4, "flogoff 0x%016lx", flogoff); LOG(4, "infooff 0x%016lx", infooff); ASSERTeq(arena_datasize, mapoff - dataoff); /* write out the initial map, identity style */ off_t map_entry_off = arena_off + mapoff; uint32_t *mapp = NULL; int mlen = 0; int next_index = 0; int remaining = 0; for (int i = 0; i < external_nlba; i++) { if (remaining == 0) { /* flush previous mapped area */ if (mapp != NULL) { /* * Protect the memory again * (debug version only). * If (mapp != NULL) it had to be * unprotected earlier. */ RANGE_RO(mapp, mlen); (*bttp->ns_cbp->nssync)(bttp->ns, lane, mapp, mlen); } /* request a mapping of remaining map area */ mlen = (*bttp->ns_cbp->nsmap)(bttp->ns, lane, (void **)&mapp, (external_nlba - i) * sizeof (uint32_t), map_entry_off); if (mlen < 0) return -1; /* unprotect the memory (debug version only) */ RANGE_RW(mapp, mlen); remaining = mlen; next_index = 0; } mapp[next_index++] = htole32(i | BTT_MAP_ENTRY_ZERO); remaining -= sizeof (uint32_t); } /* protect the memory again (debug version only) */ RANGE_RO(mapp, mlen); /* flush previous mapped area */ if (mapp != NULL) (*bttp->ns_cbp->nssync)(bttp->ns, lane, mapp, mlen); /* write out the initial flog */ off_t flog_entry_off = arena_off + flogoff; uint32_t next_free_lba = external_nlba; for (int i = 0; i < bttp->nfree; i++) { struct btt_flog flog; flog.lba = 0; flog.old_map = flog.new_map = htole32(next_free_lba | BTT_MAP_ENTRY_ZERO); flog.seq = htole32(1); /* * Write both btt_flog structs in the pair, writing * the second one as all zeros. */ LOG(6, "flog[%d] entry off %zu initial %u + zero = %u", i, flog_entry_off, next_free_lba, next_free_lba | BTT_MAP_ENTRY_ZERO); if ((*bttp->ns_cbp->nswrite)(bttp->ns, lane, &flog, sizeof (flog), flog_entry_off) < 0) return -1; flog_entry_off += sizeof (flog); LOG(6, "flog[%d] entry off %zu zeros", i, flog_entry_off); if ((*bttp->ns_cbp->nswrite)(bttp->ns, lane, &Zflog, sizeof (Zflog), flog_entry_off) < 0) return -1; flog_entry_off += sizeof (flog); next_free_lba++; } /* * Construct the BTT info block and write it out * at both the beginning and end of the arena. */ struct btt_info info; memset(&info, '\0', sizeof (info)); memcpy(info.sig, Sig, BTTINFO_SIG_LEN); memcpy(info.parent_uuid, bttp->parent_uuid, BTTINFO_UUID_LEN); info.major = htole16(BTTINFO_MAJOR_VERSION); info.minor = htole16(BTTINFO_MINOR_VERSION); info.external_lbasize = htole32(bttp->lbasize); info.external_nlba = htole32(external_nlba); info.internal_lbasize = htole32(internal_lbasize); info.internal_nlba = htole32(internal_nlba); info.nfree = htole32(bttp->nfree); info.infosize = htole32(sizeof (info)); info.nextoff = htole64(nextoff); info.dataoff = htole64(dataoff); info.mapoff = htole64(mapoff); info.flogoff = htole64(flogoff); info.infooff = htole64(infooff); util_checksum(&info, sizeof (info), &info.checksum, 1); if ((*bttp->ns_cbp->nswrite)(bttp->ns, lane, &info, sizeof (info), arena_off) < 0) return -1; if ((*bttp->ns_cbp->nswrite)(bttp->ns, lane, &info, sizeof (info), arena_off + nextoff) < 0) return -1; arena_off += nextoff; } ASSERTeq(bttp->narena, arena_num); bttp->nlba = total_nlba; if (write) { /* * The layout is written now, so load up the arenas. */ return read_arenas(bttp, lane, bttp->narena); } return 0; }
/* * pmemlog_appendv -- add gathered data to a log memory pool */ int pmemlog_appendv(PMEMlogpool *plp, const struct iovec *iov, int iovcnt) { LOG(3, "plp %p iovec %p iovcnt %d", plp, iov, iovcnt); int ret = 0; // success int i; ASSERT(iovcnt > 0); if (plp->rdonly) { ERR("can't append to read-only log"); errno = EROFS; return -1; } if ((errno = pthread_rwlock_wrlock(plp->rwlockp))) { ERR("!pthread_rwlock_wrlock"); return -1; } /* get the current values */ uint64_t end_offset = le64toh(plp->end_offset); uint64_t write_offset = le64toh(plp->write_offset); if (write_offset >= end_offset) { /* no space left */ errno = ENOSPC; ERR("!pmemlog_appendv"); ret = -1; goto end; } char *data = plp->addr; uint64_t count = 0; char *buf; /* calculate required space */ for (i = 0; i < iovcnt; ++i) count += iov[i].iov_len; /* check if there is enough free space */ if (count > (end_offset - write_offset)) { errno = ENOSPC; ret = -1; goto end; } /* append the data */ for (i = 0; i < iovcnt; ++i) { buf = iov[i].iov_base; count = iov[i].iov_len; /* * unprotect the log space range, where the new data will be * stored (debug version only) */ RANGE_RW(&data[write_offset], count, plp->is_dax); if (plp->is_pmem) pmem_memcpy_nodrain(&data[write_offset], buf, count); else memcpy(&data[write_offset], buf, count); /* * protect the log space range (debug version only) */ RANGE_RO(&data[write_offset], count, plp->is_dax); write_offset += count; } /* persist the data and the metadata */ pmemlog_persist(plp, write_offset); end: util_rwlock_unlock(plp->rwlockp); return ret; }
/* * pmemblk_map_common -- (internal) map a block memory pool * * This routine does all the work, but takes a rdonly flag so internal * calls can map a read-only pool if required. * * Passing in bsize == 0 means a valid pool header must exist (which * will supply the block size). */ static PMEMblk * pmemblk_map_common(int fd, size_t bsize, int rdonly) { LOG(3, "fd %d bsize %zu rdonly %d", fd, bsize, rdonly); /* things free by "goto err" if not NULL */ void *addr = NULL; struct btt *bttp = NULL; pthread_mutex_t *locks = NULL; struct stat stbuf; if (fstat(fd, &stbuf) < 0) { LOG(1, "!fstat"); return NULL; } if (stbuf.st_size < PMEMBLK_MIN_POOL) { LOG(1, "size %zu smaller than %zu", stbuf.st_size, PMEMBLK_MIN_POOL); errno = EINVAL; return NULL; } if ((addr = util_map(fd, stbuf.st_size, rdonly)) == NULL) return NULL; /* util_map() set errno, called LOG */ /* check if the mapped region is located in persistent memory */ int is_pmem = pmem_is_pmem(addr, stbuf.st_size); /* opaque info lives at the beginning of mapped memory pool */ struct pmemblk *pbp = addr; struct pool_hdr hdr; memcpy(&hdr, &pbp->hdr, sizeof (hdr)); if (util_convert_hdr(&hdr)) { /* * valid header found */ if (strncmp(hdr.signature, BLK_HDR_SIG, POOL_HDR_SIG_LEN)) { LOG(1, "wrong pool type: \"%s\"", hdr.signature); errno = EINVAL; goto err; } if (hdr.major != BLK_FORMAT_MAJOR) { LOG(1, "blk pool version %d (library expects %d)", hdr.major, BLK_FORMAT_MAJOR); errno = EINVAL; goto err; } size_t hdr_bsize = le32toh(pbp->bsize); if (bsize && bsize != hdr_bsize) { LOG(1, "wrong bsize (%zu), pool created with bsize %zu", bsize, hdr_bsize); errno = EINVAL; goto err; } bsize = hdr_bsize; LOG(3, "using block size from header: %zu", bsize); int retval = util_feature_check(&hdr, BLK_FORMAT_INCOMPAT, BLK_FORMAT_RO_COMPAT, BLK_FORMAT_COMPAT); if (retval < 0) goto err; else if (retval == 0) rdonly = 1; } else { /* * no valid header was found */ if (rdonly) { LOG(1, "read-only and no header found"); errno = EROFS; goto err; } LOG(3, "creating new blk memory pool"); struct pool_hdr *hdrp = &pbp->hdr; memset(hdrp, '\0', sizeof (*hdrp)); strncpy(hdrp->signature, BLK_HDR_SIG, POOL_HDR_SIG_LEN); hdrp->major = htole32(BLK_FORMAT_MAJOR); hdrp->compat_features = htole32(BLK_FORMAT_COMPAT); hdrp->incompat_features = htole32(BLK_FORMAT_INCOMPAT); hdrp->ro_compat_features = htole32(BLK_FORMAT_RO_COMPAT); uuid_generate(hdrp->uuid); hdrp->crtime = htole64((uint64_t)time(NULL)); util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1); hdrp->checksum = htole64(hdrp->checksum); /* store pool's header */ libpmem_persist(is_pmem, hdrp, sizeof (*hdrp)); /* create rest of required metadata */ pbp->bsize = htole32(bsize); libpmem_persist(is_pmem, &pbp->bsize, sizeof (bsize)); } /* * Use some of the memory pool area for run-time info. This * run-time state is never loaded from the file, it is always * created here, so no need to worry about byte-order. */ pbp->addr = addr; pbp->size = stbuf.st_size; pbp->rdonly = rdonly; pbp->is_pmem = is_pmem; pbp->data = addr + roundup(sizeof (*pbp), BLK_FORMAT_DATA_ALIGN); pbp->datasize = (pbp->addr + pbp->size) - pbp->data; LOG(4, "data area %p data size %zu bsize %zu", pbp->data, pbp->datasize, bsize); int ncpus = sysconf(_SC_NPROCESSORS_ONLN); if (ncpus < 1) ncpus = 1; bttp = btt_init(pbp->datasize, (uint32_t)bsize, pbp->hdr.uuid, ncpus, pbp, &ns_cb); if (bttp == NULL) goto err; /* btt_init set errno, called LOG */ pbp->bttp = bttp; pbp->nlane = btt_nlane(pbp->bttp); pbp->next_lane = 0; if ((locks = Malloc(pbp->nlane * sizeof (*locks))) == NULL) { LOG(1, "!Malloc for lane locks"); goto err; } for (int i = 0; i < pbp->nlane; i++) if (pthread_mutex_init(&locks[i], NULL) < 0) { LOG(1, "!pthread_mutex_init"); goto err; } pbp->locks = locks; #ifdef DEBUG /* initialize debug lock */ if (pthread_mutex_init(&pbp->write_lock, NULL) < 0) { LOG(1, "!pthread_mutex_init"); goto err; } #endif /* * If possible, turn off all permissions on the pool header page. * * The prototype PMFS doesn't allow this when large pages are in * use not it is not considered an error if this fails. */ util_range_none(addr, sizeof (struct pool_hdr)); /* the data area should be kept read-only for debug version */ RANGE_RO(pbp->data, pbp->datasize); LOG(3, "pbp %p", pbp); return pbp; err: LOG(4, "error clean up"); int oerrno = errno; if (locks) Free((void *)locks); if (bttp) btt_fini(bttp); util_unmap(addr, stbuf.st_size); errno = oerrno; return NULL; }