Exemplo n.º 1
0
Arquivo: file.c Projeto: ChandKV/nvml
/*
 * util_file_pread -- reads from a file with an offset
 */
ssize_t
util_file_pread(const char *path, void *buffer, size_t size,
	off_t offset)
{
	if (!util_file_is_device_dax(path)) {
		int fd = util_file_open(path, NULL, 0, O_RDONLY);
		if (fd < 0)
			return -1;

		ssize_t read_len = pread(fd, buffer, size, offset);
		int olderrno = errno;
		(void) close(fd);
		errno = olderrno;
		return read_len;
	}

	ssize_t file_size = util_file_get_size(path);
	if (file_size < 0)
		return -1;

	size_t max_size = (size_t)(file_size - offset);
	if (size > max_size) {
		LOG(1, "Requested size of read goes beyond the mapped memory");
		size = max_size;
	}

	void *addr = util_file_map_whole(path);
	if (addr == NULL)
		return -1;

	memcpy(buffer, ADDR_SUM(addr, offset), size);
	util_unmap(addr, (size_t)file_size);
	return (ssize_t)size;
}
Exemplo n.º 2
0
Arquivo: file.c Projeto: ChandKV/nvml
/*
 * util_file_zero_whole -- zeroes the entire file
 */
int
util_file_zero_whole(const char *path)
{
	int fd;
	int olderrno;
	int ret = 0;

	if ((fd = open(path, O_RDWR)) < 0)
		return -1;

	ssize_t size = util_file_get_size(path);
	if (size < 0) {
		ret = -1;
		goto out;
	}

	void *addr = util_map(fd, (size_t)size, 0, 0);
	if (addr == NULL) {
		ret = -1;
		goto out;
	}

	/* zero initialize the entire device */
	memset(addr, 0, (size_t)size);

	util_unmap(addr, (size_t)size);

out:
	olderrno = errno;
	(void) close(fd);
	errno = olderrno;

	return ret;
}
Exemplo n.º 3
0
Arquivo: set.c Projeto: jebtang/nvml
/*
 * util_pool_create -- create a new memory pool (set or a single file)
 *
 * On success returns 0 and a pointer to a newly allocated structure
 * containing the info of all the parts of the pool set and replicas.
 */
int
util_pool_create(struct pool_set **setp, const char *path, size_t poolsize,
	size_t minsize, size_t hdrsize, const char *sig,
	uint32_t major, uint32_t compat, uint32_t incompat, uint32_t ro_compat)
{
	LOG(3, "setp %p path %s poolsize %zu minsize %zu "
		"hdrsize %zu sig %s major %u "
		"compat %#x incompat %#x ro_comapt %#x",
		setp, path, poolsize, minsize, hdrsize,
		sig, major, compat, incompat, ro_compat);

	int flags = MAP_SHARED;

	int ret = util_poolset_create(setp, path, poolsize, minsize);
	if (ret < 0) {
		LOG(2, "cannot create pool set");
		return -1;
	}

	struct pool_set *set = *setp;

	ASSERT(set->nreplicas > 0);

	set->zeroed = 1;
	set->poolsize = SIZE_MAX;

	/* generate pool set UUID */
	uuid_generate(set->uuid);

	/* generate UUID's for all the parts */
	for (unsigned r = 0; r < set->nreplicas; r++) {
		struct pool_replica *rep = set->replica[r];
		for (unsigned i = 0; i < rep->nparts; i++)
			uuid_generate(rep->part[i].uuid);
	}

	for (unsigned r = 0; r < set->nreplicas; r++) {
		if (util_replica_create(set, r, flags, hdrsize, sig,
				major, compat, incompat, ro_compat) != 0) {
			LOG(2, "replica creation failed");
			goto err;
		}
	}

	return 0;

err:
	LOG(4, "error clean up");
	int oerrno = errno;
	for (unsigned r = 0; r < set->nreplicas; r++) {
		struct pool_replica *rep = set->replica[r];
		VALGRIND_REMOVE_PMEM_MAPPING(rep->part[0].addr,
						rep->part[0].size);
		util_unmap(rep->part[0].addr, rep->part[0].size);
	}
	util_poolset_close(set, 1);
	errno = oerrno;
	return -1;
}
Exemplo n.º 4
0
Arquivo: vmem.c Projeto: mdalecki/nvml
/*
 * vmem_pool_delete -- delete a memory pool
 */
void
vmem_pool_delete(VMEM *vmp)
{
	LOG(3, "vmp %p", vmp);

	je_vmem_pool_delete((pool_t *)((uintptr_t)vmp + Header_size));

	if (vmp->caller_mapped == 0)
		util_unmap(vmp->addr, vmp->size);
}
Exemplo n.º 5
0
/*
 * pmemlog_unmap -- unmap a log memory pool
 */
void
pmemlog_unmap(PMEMlog *plp)
{
	LOG(3, "plp %p", plp);

	if (pthread_rwlock_destroy(plp->rwlockp))
		LOG(1, "!pthread_rwlock_destroy");
	Free((void *)plp->rwlockp);
	util_unmap(plp->addr, plp->size);
}
Exemplo n.º 6
0
Arquivo: pmem.c Projeto: jxy859/nvml
/*
 * pmem_unmap -- unmap the specified region
 */
int
pmem_unmap(void *addr, size_t len)
{
	LOG(3, "addr %p len %zu", addr, len);

	int ret = util_unmap(addr, len);

	VALGRIND_REMOVE_PMEM_MAPPING(addr, len);

	return ret;
}
Exemplo n.º 7
0
Arquivo: pmem.c Projeto: ldorau/nvml
/*
 * pmem_unmap -- unmap the specified region
 */
int
pmem_unmap(void *addr, size_t len)
{
	LOG(3, "addr %p len %zu", addr, len);

#ifndef _WIN32
	util_range_unregister(addr, len);
#endif
	VALGRIND_REMOVE_PMEM_MAPPING(addr, len);
	return util_unmap(addr, len);
}
Exemplo n.º 8
0
Arquivo: file.c Projeto: krzycz/nvml
/*
 * util_file_zero -- zeroes the specified region of the file
 */
int
util_file_zero(const char *path, os_off_t off, size_t len)
{
	LOG(3, "path \"%s\" off %ju len %zu", path, off, len);

	int fd;
	int olderrno;
	int ret = 0;

	if ((fd = os_open(path, O_RDWR)) < 0) {
		ERR("!open \"%s\"", path);
		return -1;
	}

	ssize_t size = util_file_get_size(path);
	if (size < 0) {
		LOG(2, "cannot determine file length \"%s\"", path);
		ret = -1;
		goto out;
	}

	if (off > size) {
		LOG(2, "offset beyond file length, %ju > %ju", off, size);
		ret = -1;
		goto out;
	}

	if ((size_t)off + len > (size_t)size) {
		LOG(2, "requested size of write goes beyond the file length, "
					"%zu > %zu", (size_t)off + len, size);
		LOG(4, "adjusting len to %zu", size - off);
		len = (size_t)(size - off);
	}

	void *addr = util_map(fd, (size_t)size, MAP_SHARED, 0, 0, NULL);
	if (addr == NULL) {
		LOG(2, "failed to map entire file \"%s\"", path);
		ret = -1;
		goto out;
	}

	/* zero initialize the specified region */
	memset((char *)addr + off, 0, len);

	util_unmap(addr, (size_t)size);

out:
	olderrno = errno;
	(void) os_close(fd);
	errno = olderrno;

	return ret;
}
Exemplo n.º 9
0
Arquivo: log.c Projeto: harrybaa/nvml
/*
 * pmemlog_close -- close a log memory pool
 */
void
pmemlog_close(PMEMlogpool *plp)
{
    LOG(3, "plp %p", plp);

    if ((errno = pthread_rwlock_destroy(plp->rwlockp)))
        ERR("!pthread_rwlock_destroy");
    Free((void *)plp->rwlockp);

    VALGRIND_REMOVE_PMEM_MAPPING(plp->addr, plp->size);
    util_unmap(plp->addr, plp->size);
}
Exemplo n.º 10
0
/*
 * libvmmalloc_create -- (internal) create a memory pool in a temp file
 */
static VMEM *
libvmmalloc_create(const char *dir, size_t size)
{
	LOG(3, "dir \"%s\" size %zu", dir, size);

	if (size < VMMALLOC_MIN_POOL) {
		LOG(1, "size %zu smaller than %zu", size, VMMALLOC_MIN_POOL);
		errno = EINVAL;
		return NULL;
	}

	/* silently enforce multiple of page size */
	size = roundup(size, Pagesize);

	Fd = util_tmpfile(dir, size);
	if (Fd == -1)
		return NULL;

	void *addr;
	if ((addr = util_map(Fd, size, 0, 4 << 20)) == NULL)
		return NULL;

	/* store opaque info at beginning of mapped area */
	struct vmem *vmp = addr;
	memset(&vmp->hdr, '\0', sizeof (vmp->hdr));
	memcpy(vmp->hdr.signature, VMEM_HDR_SIG, POOL_HDR_SIG_LEN);
	vmp->addr = addr;
	vmp->size = size;
	vmp->caller_mapped = 0;

	/* Prepare pool for jemalloc */
	if (je_vmem_pool_create((void *)((uintptr_t)addr + Header_size),
			size - Header_size, 1) == NULL) {
		LOG(1, "vmem pool creation failed");
		util_unmap(vmp->addr, vmp->size);
		return NULL;
	}

	/*
	 * If possible, turn off all permissions on the pool header page.
	 *
	 * The prototype PMFS doesn't allow this when large pages are in
	 * use. It is not considered an error if this fails.
	 */
	util_range_none(addr, sizeof (struct pool_hdr));

	LOG(3, "vmp %p", vmp);
	return vmp;
}
Exemplo n.º 11
0
Arquivo: file.c Projeto: krzycz/nvml
/*
 * util_file_pread -- reads from a file with an offset
 */
ssize_t
util_file_pread(const char *path, void *buffer, size_t size,
	os_off_t offset)
{
	LOG(3, "path \"%s\" buffer %p size %zu offset %ju",
			path, buffer, size, offset);

	enum file_type type = util_file_get_type(path);
	if (type < 0)
		return -1;

	if (type == TYPE_NORMAL) {
		int fd = util_file_open(path, NULL, 0, O_RDONLY);
		if (fd < 0) {
			LOG(2, "failed to open file \"%s\"", path);
			return -1;
		}

		ssize_t read_len = pread(fd, buffer, size, offset);
		int olderrno = errno;
		(void) os_close(fd);
		errno = olderrno;
		return read_len;
	}

	ssize_t file_size = util_file_get_size(path);
	if (file_size < 0) {
		LOG(2, "cannot determine file length \"%s\"", path);
		return -1;
	}

	size_t max_size = (size_t)(file_size - offset);
	if (size > max_size) {
		LOG(2, "requested size of read goes beyond the file length, "
			"%zu > %zu", size, max_size);
		LOG(4, "adjusting size to %zu", max_size);
		size = max_size;
	}

	void *addr = util_file_map_whole(path);
	if (addr == NULL) {
		LOG(2, "failed to map entire file \"%s\"", path);
		return -1;
	}

	memcpy(buffer, ADDR_SUM(addr, offset), size);
	util_unmap(addr, (size_t)file_size);
	return (ssize_t)size;
}
Exemplo n.º 12
0
/*
 * vmem_delete -- delete a memory pool
 */
void
vmem_delete(VMEM *vmp)
{
	LOG(3, "vmp %p", vmp);

	int ret = je_vmem_pool_delete((pool_t *)((uintptr_t)vmp + Header_size));
	if (ret != 0) {
		ERR("invalid pool handle: %p", vmp);
		errno = EINVAL;
		return;
	}

	util_range_rw(vmp->addr, sizeof (struct pool_hdr));

	if (vmp->caller_mapped == 0)
		util_unmap(vmp->addr, vmp->size);
}
Exemplo n.º 13
0
Arquivo: blk.c Projeto: mdalecki/nvml
/*
 * pmemblk_unmap -- unmap a block memory pool
 */
void
pmemblk_unmap(PMEMblk *pbp)
{
	LOG(3, "pbp %p", pbp);

	btt_fini(pbp->bttp);
	if (pbp->locks) {
		for (int i = 0; i < pbp->nlane; i++)
			pthread_mutex_destroy(&pbp->locks[i]);
		Free((void *)pbp->locks);
	}

#ifdef DEBUG
	/* destroy debug lock */
	pthread_mutex_destroy(&pbp->write_lock);
#endif

	util_unmap(pbp->addr, pbp->size);
}
Exemplo n.º 14
0
/*
 * is_pmem -- checks if given path points to pmem-aware filesystem
 */
static int
is_pmem(const char *path)
{
	int ret;

	void *addr = util_map_tmpfile(path, SIZE, 0);
	if (addr == NULL) {
		fprintf(stderr, "file creation failed\n");
		return -1;
	}

	if (pmem_is_pmem(addr, SIZE))
		ret = 1;
	else
		ret = 0;

	util_unmap(addr, SIZE);

	return ret;
}
Exemplo n.º 15
0
Arquivo: blk.c Projeto: xguo/nvml
/*
 * pmemblk_close -- close a block memory pool
 */
void
pmemblk_close(PMEMblkpool *pbp)
{
	LOG(3, "pbp %p", pbp);

	btt_fini(pbp->bttp);
	if (pbp->locks) {
		for (unsigned i = 0; i < pbp->nlane; i++)
			pthread_mutex_destroy(&pbp->locks[i]);
		Free((void *)pbp->locks);
	}

#ifdef DEBUG
	/* destroy debug lock */
	pthread_mutex_destroy(&pbp->write_lock);
#endif

	VALGRIND_REMOVE_PMEM_MAPPING(pbp->addr, pbp->size);
	util_unmap(pbp->addr, pbp->size);
}
Exemplo n.º 16
0
Arquivo: obj.c Projeto: jxy859/nvml
/*
 * pmemobj_check -- transactional memory pool consistency check
 */
int
pmemobj_check(const char *path, const char *layout)
{
	LOG(3, "path %s layout %s", path, layout);

	PMEMobjpool *pop = pmemobj_open_common(path, layout, 1, 0);
	if (pop == NULL)
		return -1;	/* errno set by pmemobj_open_common() */

	int consistent = 1;

	/*
	 * For replicated pools, basic consistency check is performed
	 * in pmemobj_open_common().
	 */
	if (pop->replica == NULL)
		consistent = pmemobj_check_basic(pop);

	if (consistent && (errno = pmemobj_boot(pop)) != 0) {
		LOG(3, "!pmemobj_boot");
		consistent = 0;
	}

	if (consistent) {
		pmemobj_cleanup(pop);
	} else {
		/* unmap all the replicas */
		PMEMobjpool *rep;
		do {
			rep = pop->replica;
			VALGRIND_REMOVE_PMEM_MAPPING(pop->addr, pop->size);
			util_unmap(pop->addr, pop->size);
			pop = rep;
		} while (pop);
	}

	if (consistent)
		LOG(4, "pool consistency check OK");

	return consistent;
}
Exemplo n.º 17
0
Arquivo: obj.c Projeto: jxy859/nvml
/*
 * pmemobj_cleanup -- (internal) cleanup the pool and unmap
 */
static void
pmemobj_cleanup(PMEMobjpool *pop)
{
	LOG(3, "pop %p", pop);

	if ((errno = heap_cleanup(pop)) != 0)
		ERR("!heap_cleanup");

	if ((errno = lane_cleanup(pop)) != 0)
		ERR("!lane_cleanup");

	VALGRIND_DO_DESTROY_MEMPOOL(pop);

	/* unmap all the replicas */
	PMEMobjpool *rep;
	do {
		rep = pop->replica;
		VALGRIND_REMOVE_PMEM_MAPPING(pop->addr, pop->size);
		util_unmap(pop->addr, pop->size);
		pop = rep;
	} while (pop);
}
Exemplo n.º 18
0
Arquivo: set.c Projeto: jebtang/nvml
/*
 * util_pool_open -- open a memory pool (set or a single file)
 *
 * This routine does all the work, but takes a rdonly flag so internal
 * calls can map a read-only pool if required.
 */
int
util_pool_open(struct pool_set **setp, const char *path, int rdonly,
	size_t minsize, size_t hdrsize, const char *sig,
	uint32_t major, uint32_t compat, uint32_t incompat, uint32_t ro_compat)
{
	LOG(3, "setp %p path %s rdonly %d minsize %zu "
		"hdrsize %zu sig %s major %u "
		"compat %#x incompat %#x ro_comapt %#x",
		setp, path, rdonly, minsize, hdrsize,
		sig, major, compat, incompat, ro_compat);

	int flags = rdonly ? MAP_PRIVATE|MAP_NORESERVE : MAP_SHARED;

	int ret = util_poolset_open(setp, path, minsize);
	if (ret < 0) {
		LOG(2, "cannot open pool set");
		return -1;
	}

	struct pool_set *set = *setp;

	ASSERT(set->nreplicas > 0);

	set->rdonly = 0;
	set->poolsize = SIZE_MAX;

	for (unsigned r = 0; r < set->nreplicas; r++) {
		if (util_replica_open(set, r, flags, hdrsize, sig,
				major, compat, incompat, ro_compat) != 0) {
			LOG(2, "replica open failed");
			goto err;
		}
	}

	/* check replicas linkage */
	for (unsigned r = 0; r < set->nreplicas; r++) {
		if (memcmp(HDR(REP(set, r - 1), 0)->uuid,
					HDR(REP(set, r), 0)->prev_repl_uuid,
					POOL_HDR_UUID_LEN) ||
		    memcmp(HDR(REP(set, r + 1), 0)->uuid,
					HDR(REP(set, r), 0)->next_repl_uuid,
					POOL_HDR_UUID_LEN)) {
			ERR("wrong replica UUID");
			errno = EINVAL;
			goto err;
		}
	}

	return 0;

err:
	LOG(4, "error clean up");
	int oerrno = errno;
	for (unsigned r = 0; r < set->nreplicas; r++) {
		struct pool_replica *rep = set->replica[r];
		VALGRIND_REMOVE_PMEM_MAPPING(rep->part[0].addr,
						rep->part[0].size);
		util_unmap(rep->part[0].addr, rep->part[0].size);
	}
	util_poolset_close(set, 0);
	errno = oerrno;
	return -1;
}
Exemplo n.º 19
0
Arquivo: blk.c Projeto: mdalecki/nvml
/*
 * pmemblk_map_common -- (internal) map a block memory pool
 *
 * This routine does all the work, but takes a rdonly flag so internal
 * calls can map a read-only pool if required.
 *
 * Passing in bsize == 0 means a valid pool header must exist (which
 * will supply the block size).
 */
static PMEMblk *
pmemblk_map_common(int fd, size_t bsize, int rdonly)
{
	LOG(3, "fd %d bsize %zu rdonly %d", fd, bsize, rdonly);

	/* things free by "goto err" if not NULL */
	void *addr = NULL;
	struct btt *bttp = NULL;
	pthread_mutex_t *locks = NULL;

	struct stat stbuf;
	if (fstat(fd, &stbuf) < 0) {
		LOG(1, "!fstat");
		return NULL;
	}

	if (stbuf.st_size < PMEMBLK_MIN_POOL) {
		LOG(1, "size %zu smaller than %zu",
				stbuf.st_size, PMEMBLK_MIN_POOL);
		errno = EINVAL;
		return NULL;
	}

	if ((addr = util_map(fd, stbuf.st_size, rdonly)) == NULL)
		return NULL;	/* util_map() set errno, called LOG */

	/* check if the mapped region is located in persistent memory */
	int is_pmem = pmem_is_pmem(addr, stbuf.st_size);

	/* opaque info lives at the beginning of mapped memory pool */
	struct pmemblk *pbp = addr;

	struct pool_hdr hdr;
	memcpy(&hdr, &pbp->hdr, sizeof (hdr));

	if (util_convert_hdr(&hdr)) {
		/*
		 * valid header found
		 */
		if (strncmp(hdr.signature, BLK_HDR_SIG, POOL_HDR_SIG_LEN)) {
			LOG(1, "wrong pool type: \"%s\"", hdr.signature);

			errno = EINVAL;
			goto err;
		}

		if (hdr.major != BLK_FORMAT_MAJOR) {
			LOG(1, "blk pool version %d (library expects %d)",
				hdr.major, BLK_FORMAT_MAJOR);

			errno = EINVAL;
			goto err;
		}

		size_t hdr_bsize = le32toh(pbp->bsize);
		if (bsize && bsize != hdr_bsize) {
			LOG(1, "wrong bsize (%zu), pool created with bsize %zu",
					bsize, hdr_bsize);
			errno = EINVAL;
			goto err;
		}
		bsize = hdr_bsize;
		LOG(3, "using block size from header: %zu", bsize);

		int retval = util_feature_check(&hdr, BLK_FORMAT_INCOMPAT,
							BLK_FORMAT_RO_COMPAT,
							BLK_FORMAT_COMPAT);
		if (retval < 0)
		    goto err;
		else if (retval == 0)
		    rdonly = 1;
	} else {
		/*
		 * no valid header was found
		 */
		if (rdonly) {
			LOG(1, "read-only and no header found");
			errno = EROFS;
			goto err;
		}
		LOG(3, "creating new blk memory pool");

		struct pool_hdr *hdrp = &pbp->hdr;

		memset(hdrp, '\0', sizeof (*hdrp));
		strncpy(hdrp->signature, BLK_HDR_SIG, POOL_HDR_SIG_LEN);
		hdrp->major = htole32(BLK_FORMAT_MAJOR);
		hdrp->compat_features = htole32(BLK_FORMAT_COMPAT);
		hdrp->incompat_features = htole32(BLK_FORMAT_INCOMPAT);
		hdrp->ro_compat_features = htole32(BLK_FORMAT_RO_COMPAT);
		uuid_generate(hdrp->uuid);
		hdrp->crtime = htole64((uint64_t)time(NULL));
		util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1);
		hdrp->checksum = htole64(hdrp->checksum);

		/* store pool's header */
		libpmem_persist(is_pmem, hdrp, sizeof (*hdrp));

		/* create rest of required metadata */
		pbp->bsize = htole32(bsize);
		libpmem_persist(is_pmem, &pbp->bsize, sizeof (bsize));
	}

	/*
	 * Use some of the memory pool area for run-time info.  This
	 * run-time state is never loaded from the file, it is always
	 * created here, so no need to worry about byte-order.
	 */
	pbp->addr = addr;
	pbp->size = stbuf.st_size;
	pbp->rdonly = rdonly;
	pbp->is_pmem = is_pmem;
	pbp->data = addr + roundup(sizeof (*pbp), BLK_FORMAT_DATA_ALIGN);
	pbp->datasize = (pbp->addr + pbp->size) - pbp->data;

	LOG(4, "data area %p data size %zu bsize %zu",
		pbp->data, pbp->datasize, bsize);

	int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
	if (ncpus < 1)
		ncpus = 1;

	bttp = btt_init(pbp->datasize, (uint32_t)bsize, pbp->hdr.uuid,
			ncpus, pbp, &ns_cb);

	if (bttp == NULL)
		goto err;	/* btt_init set errno, called LOG */

	pbp->bttp = bttp;

	pbp->nlane = btt_nlane(pbp->bttp);
	pbp->next_lane = 0;
	if ((locks = Malloc(pbp->nlane * sizeof (*locks))) == NULL) {
		LOG(1, "!Malloc for lane locks");
		goto err;
	}

	for (int i = 0; i < pbp->nlane; i++)
		if (pthread_mutex_init(&locks[i], NULL) < 0) {
			LOG(1, "!pthread_mutex_init");
			goto err;
		}

	pbp->locks = locks;

#ifdef DEBUG
	/* initialize debug lock */
	if (pthread_mutex_init(&pbp->write_lock, NULL) < 0) {
		LOG(1, "!pthread_mutex_init");
		goto err;
	}
#endif

	/*
	 * If possible, turn off all permissions on the pool header page.
	 *
	 * The prototype PMFS doesn't allow this when large pages are in
	 * use not it is not considered an error if this fails.
	 */
	util_range_none(addr, sizeof (struct pool_hdr));

	/* the data area should be kept read-only for debug version */
	RANGE_RO(pbp->data, pbp->datasize);

	LOG(3, "pbp %p", pbp);
	return pbp;

err:
	LOG(4, "error clean up");
	int oerrno = errno;
	if (locks)
		Free((void *)locks);
	if (bttp)
		btt_fini(bttp);
	util_unmap(addr, stbuf.st_size);
	errno = oerrno;
	return NULL;
}
Exemplo n.º 20
0
/*
 * pmemlog_map_common -- (internal) map a log memory pool
 *
 * This routine does all the work, but takes a rdonly flag so internal
 * calls can map a read-only pool if required.
 */
static PMEMlog *
pmemlog_map_common(int fd, int rdonly)
{
	LOG(3, "fd %d rdonly %d", fd, rdonly);

	struct stat stbuf;
	if (fstat(fd, &stbuf) < 0) {
		LOG(1, "!fstat");
		return NULL;
	}

	if (stbuf.st_size < PMEMLOG_MIN_POOL) {
		LOG(1, "size %lld smaller than %zu",
				(long long)stbuf.st_size, PMEMLOG_MIN_POOL);
		errno = EINVAL;
		return NULL;
	}

	void *addr;
	if ((addr = util_map(fd, stbuf.st_size, rdonly)) == NULL)
		return NULL;	/* util_map() set errno, called LOG */

	/* check if the mapped region is located in persistent memory */
	int is_pmem = pmem_is_pmem(addr, stbuf.st_size);

	/* opaque info lives at the beginning of mapped memory pool */
	struct pmemlog *plp = addr;
	struct pool_hdr hdr;

	memcpy(&hdr, &plp->hdr, sizeof (hdr));

	if (util_convert_hdr(&hdr)) {
		/*
		 * valid header found
		 */
		if (strncmp(hdr.signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN)) {
			LOG(1, "wrong pool type: \"%s\"", hdr.signature);
			errno = EINVAL;
			goto err;
		}

		if (hdr.major != LOG_FORMAT_MAJOR) {
			LOG(1, "log pool version %d (library expects %d)",
				hdr.major, LOG_FORMAT_MAJOR);
			errno = EINVAL;
			goto err;
		}

		uint64_t hdr_start = le64toh(plp->start_offset);
		uint64_t hdr_end = le64toh(plp->end_offset);
		uint64_t hdr_write = le64toh(plp->write_offset);

		if ((hdr_start != roundup(sizeof (*plp),
					LOG_FORMAT_DATA_ALIGN)) ||
			(hdr_end != stbuf.st_size) || (hdr_start > hdr_end)) {
			LOG(1, "wrong start/end offsets (start: %ju end: %ju), "
				"pool size %lld",
				hdr_start, hdr_end, (long long)stbuf.st_size);
			errno = EINVAL;
			goto err;
		}

		if ((hdr_write > hdr_end) || (hdr_write < hdr_start)) {
			LOG(1, "wrong write offset "
				"(start: %ju end: %ju write: %ju)",
				hdr_start, hdr_end, hdr_write);
			errno = EINVAL;
			goto err;
		}

		LOG(3, "start: %ju, end: %ju, write: %ju",
			hdr_start, hdr_end, hdr_write);

		int retval = util_feature_check(&hdr, LOG_FORMAT_INCOMPAT,
							LOG_FORMAT_RO_COMPAT,
							LOG_FORMAT_COMPAT);
		if (retval < 0)
		    goto err;
		else if (retval == 0)
		    rdonly = 1;
	} else {
		/*
		 * no valid header was found
		 */
		if (rdonly) {
			LOG(1, "read-only and no header found");
			errno = EROFS;
			goto err;
		}
		LOG(3, "creating new log memory pool");

		struct pool_hdr *hdrp = &plp->hdr;

		memset(hdrp, '\0', sizeof (*hdrp));
		strncpy(hdrp->signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN);
		hdrp->major = htole32(LOG_FORMAT_MAJOR);
		hdrp->compat_features = htole32(LOG_FORMAT_COMPAT);
		hdrp->incompat_features = htole32(LOG_FORMAT_INCOMPAT);
		hdrp->ro_compat_features = htole32(LOG_FORMAT_RO_COMPAT);
		uuid_generate(hdrp->uuid);
		hdrp->crtime = htole64((uint64_t)time(NULL));
		util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1);
		hdrp->checksum = htole64(hdrp->checksum);

		/* store pool's header */
		libpmem_persist(is_pmem, hdrp, sizeof (*hdrp));

		/* create rest of required metadata */
		plp->start_offset = htole64(roundup(sizeof (*plp),
						LOG_FORMAT_DATA_ALIGN));
		plp->end_offset = htole64(stbuf.st_size);
		plp->write_offset = plp->start_offset;

		/* store non-volatile part of pool's descriptor */
		libpmem_persist(is_pmem, &plp->start_offset,
							3 * sizeof (uint64_t));
	}

	/*
	 * Use some of the memory pool area for run-time info.  This
	 * run-time state is never loaded from the file, it is always
	 * created here, so no need to worry about byte-order.
	 */
	plp->addr = addr;
	plp->size = stbuf.st_size;
	plp->rdonly = rdonly;
	plp->is_pmem = is_pmem;

	if ((plp->rwlockp = Malloc(sizeof (*plp->rwlockp))) == NULL) {
		LOG(1, "!Malloc for a RW lock");
		goto err;
	}

	if (pthread_rwlock_init(plp->rwlockp, NULL)) {
		LOG(1, "!pthread_rwlock_init");
		goto err_free;
	}

	/*
	 * If possible, turn off all permissions on the pool header page.
	 *
	 * The prototype PMFS doesn't allow this when large pages are in
	 * use. It is not considered an error if this fails.
	 */
	util_range_none(addr, sizeof (struct pool_hdr));

	/* the rest should be kept read-only (debug version only) */
	RANGE_RO(addr + sizeof (struct pool_hdr),
			stbuf.st_size - sizeof (struct pool_hdr));

	LOG(3, "plp %p", plp);
	return plp;

err_free:
	Free((void *)plp->rwlockp);
err:
	LOG(4, "error clean up");
	int oerrno = errno;
	util_unmap(addr, stbuf.st_size);
	errno = oerrno;
	return NULL;
}
Exemplo n.º 21
0
Arquivo: set.c Projeto: jebtang/nvml
/*
 * util_replica_create -- (internal) create a new memory pool replica
 */
static int
util_replica_create(struct pool_set *set, unsigned repidx, int flags,
	size_t hdrsize, const char *sig,
	uint32_t major, uint32_t compat, uint32_t incompat, uint32_t ro_compat)
{
	LOG(3, "set %p repidx %u flags %d hdrsize %zu sig %s major %u "
		"compat %#x incompat %#x ro_comapt %#x",
		set, repidx, flags, hdrsize, sig, major,
		compat, incompat, ro_compat);

	struct pool_replica *rep = set->replica[repidx];

	rep->repsize -= (rep->nparts - 1) * hdrsize;

	/* determine a hint address for mmap() */
	void *addr = util_map_hint(rep->repsize); /* XXX - randomize */
	if (addr == NULL) {
		ERR("cannot find a contiguous region of given size");
		return -1;
	}

	/* map the first part and reserve space for remaining parts */
	if (util_map_part(&rep->part[0], addr, rep->repsize, 0, flags) != 0) {
		LOG(2, "pool mapping failed - part #0");
		return -1;
	}

	VALGRIND_REGISTER_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size);
	VALGRIND_REGISTER_PMEM_FILE(rep->part[0].fd,
				rep->part[0].addr, rep->part[0].size, 0);

	/* map all the remaining headers - don't care about the address */
	for (unsigned p = 1; p < rep->nparts; p++) {
		if (util_map_part(&rep->part[p], NULL,
				hdrsize, 0, flags) != 0) {
			LOG(2, "header mapping failed - part #%d", p);
			goto err;
		}

		VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd,
			rep->part[p].addr, rep->part[p].size, 0);
	}

	/* create headers, set UUID's */
	for (unsigned p = 0; p < rep->nparts; p++) {
		if (util_header_create(set, repidx, p, sig, major,
				compat, incompat, ro_compat) != 0) {
			LOG(2, "header creation failed - part #%d", p);
			goto err;
		}
	}

	set->zeroed &= rep->part[0].created;

	size_t mapsize = rep->part[0].filesize & ~(Pagesize - 1);
	addr = rep->part[0].addr + mapsize;

	/*
	 * unmap headers; map the remaining parts of the usable pool space
	 * (4K-aligned)
	 */
	for (unsigned p = 1; p < rep->nparts; p++) {
		/* unmap header */
		if (util_unmap_part(&rep->part[p]) != 0) {
			LOG(2, "header unmapping failed - part #%d", p);
		}

		/* map data part */
		if (util_map_part(&rep->part[p], addr, 0, hdrsize,
				flags | MAP_FIXED) != 0) {
			LOG(2, "usable space mapping failed - part #%d", p);
			goto err;
		}

		VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd,
			rep->part[p].addr, rep->part[p].size, hdrsize);

		mapsize += rep->part[p].size;
		set->zeroed &= rep->part[p].created;
		addr += rep->part[p].size;
	}

	rep->is_pmem = pmem_is_pmem(rep->part[0].addr, rep->part[0].size);

	ASSERTeq(mapsize, rep->repsize);

	/* calculate pool size - choose the smallest replica size */
	if (rep->repsize < set->poolsize)
		set->poolsize = rep->repsize;

	LOG(3, "replica addr %p", rep->part[0].addr);

	return 0;

err:
	LOG(4, "error clean up");
	int oerrno = errno;
	VALGRIND_REMOVE_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size);
	util_unmap(rep->part[0].addr, rep->part[0].size);
	errno = oerrno;
	return -1;
}
Exemplo n.º 22
0
Arquivo: log.c Projeto: harrybaa/nvml
/*
 * pmemlog_map_common -- (internal) map a log memory pool
 *
 * This routine does all the work, but takes a rdonly flag so internal
 * calls can map a read-only pool if required.
 *
 * If empty flag is set, the file is assumed to be a new memory pool, and
 * a new pool header is created.  Otherwise, a valid header must exist.
 */
static PMEMlogpool *
pmemlog_map_common(int fd, size_t poolsize, int rdonly, int empty)
{
    LOG(3, "fd %d poolsize %zu rdonly %d empty %d",
        fd, poolsize, rdonly, empty);

    void *addr;
    if ((addr = util_map(fd, poolsize, rdonly)) == NULL) {
        (void) close(fd);
        return NULL;	/* util_map() set errno, called LOG */
    }

    VALGRIND_REGISTER_PMEM_MAPPING(addr, poolsize);
    VALGRIND_REGISTER_PMEM_FILE(fd, addr, poolsize, 0);

    (void) close(fd);

    /* check if the mapped region is located in persistent memory */
    int is_pmem = pmem_is_pmem(addr, poolsize);

    /* opaque info lives at the beginning of mapped memory pool */
    struct pmemlog *plp = addr;

    if (!empty) {
        struct pool_hdr hdr;

        memcpy(&hdr, &plp->hdr, sizeof (hdr));

        if (!util_convert_hdr(&hdr)) {
            errno = EINVAL;
            goto err;
        }

        /*
         * valid header found
         */
        if (strncmp(hdr.signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN)) {
            ERR("wrong pool type: \"%s\"", hdr.signature);
            errno = EINVAL;
            goto err;
        }

        if (hdr.major != LOG_FORMAT_MAJOR) {
            ERR("log pool version %d (library expects %d)",
                hdr.major, LOG_FORMAT_MAJOR);
            errno = EINVAL;
            goto err;
        }

        /* XXX - pools sets / replicas */
        if (memcmp(hdr.uuid, hdr.prev_part_uuid, POOL_HDR_UUID_LEN) ||
                memcmp(hdr.uuid, hdr.next_part_uuid, POOL_HDR_UUID_LEN) ||
                memcmp(hdr.uuid, hdr.prev_repl_uuid, POOL_HDR_UUID_LEN) ||
                memcmp(hdr.uuid, hdr.next_repl_uuid, POOL_HDR_UUID_LEN)) {
            ERR("wrong UUID");
            errno = EINVAL;
            goto err;
        }

        uint64_t hdr_start = le64toh(plp->start_offset);
        uint64_t hdr_end = le64toh(plp->end_offset);
        uint64_t hdr_write = le64toh(plp->write_offset);

        if ((hdr_start != roundup(sizeof (*plp),
                                  LOG_FORMAT_DATA_ALIGN)) ||
                (hdr_end != poolsize) || (hdr_start > hdr_end)) {
            ERR("wrong start/end offsets (start: %ju end: %ju), "
                "pool size %zu",
                hdr_start, hdr_end, poolsize);
            errno = EINVAL;
            goto err;
        }

        if ((hdr_write > hdr_end) || (hdr_write < hdr_start)) {
            ERR("wrong write offset "
                "(start: %ju end: %ju write: %ju)",
                hdr_start, hdr_end, hdr_write);
            errno = EINVAL;
            goto err;
        }

        LOG(3, "start: %ju, end: %ju, write: %ju",
            hdr_start, hdr_end, hdr_write);

        int retval = util_feature_check(&hdr, LOG_FORMAT_INCOMPAT,
                                        LOG_FORMAT_RO_COMPAT,
                                        LOG_FORMAT_COMPAT);
        if (retval < 0)
            goto err;
        else if (retval == 0)
            rdonly = 1;
    } else {
        LOG(3, "creating new log memory pool");

        ASSERTeq(rdonly, 0);

        struct pool_hdr *hdrp = &plp->hdr;

        /* check if the pool header is all zero */
        if (!util_is_zeroed(hdrp, sizeof (*hdrp))) {
            ERR("Non-empty file detected");
            errno = EINVAL;
            goto err;
        }

        /* create required metadata first */
        plp->start_offset = htole64(roundup(sizeof (*plp),
                                            LOG_FORMAT_DATA_ALIGN));
        plp->end_offset = htole64(poolsize);
        plp->write_offset = plp->start_offset;

        /* store non-volatile part of pool's descriptor */
        pmem_msync(&plp->start_offset, 3 * sizeof (uint64_t));

        /* create pool header */
        strncpy(hdrp->signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN);
        hdrp->major = htole32(LOG_FORMAT_MAJOR);
        hdrp->compat_features = htole32(LOG_FORMAT_COMPAT);
        hdrp->incompat_features = htole32(LOG_FORMAT_INCOMPAT);
        hdrp->ro_compat_features = htole32(LOG_FORMAT_RO_COMPAT);
        uuid_generate(hdrp->uuid);
        /* XXX - pools sets / replicas */
        uuid_generate(hdrp->poolset_uuid);
        memcpy(hdrp->prev_part_uuid, hdrp->uuid, POOL_HDR_UUID_LEN);
        memcpy(hdrp->next_part_uuid, hdrp->uuid, POOL_HDR_UUID_LEN);
        memcpy(hdrp->prev_repl_uuid, hdrp->uuid, POOL_HDR_UUID_LEN);
        memcpy(hdrp->next_repl_uuid, hdrp->uuid, POOL_HDR_UUID_LEN);
        hdrp->crtime = htole64((uint64_t)time(NULL));

        if (util_get_arch_flags(&hdrp->arch_flags)) {
            ERR("Reading architecture flags failed\n");
            errno = EINVAL;
            goto err;
        }

        hdrp->arch_flags.alignment_desc =
            htole64(hdrp->arch_flags.alignment_desc);
        hdrp->arch_flags.e_machine =
            htole16(hdrp->arch_flags.e_machine);

        util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1);

        /* store pool's header */
        pmem_msync(hdrp, sizeof (*hdrp));
    }

    /* remove volatile part of header */
    VALGRIND_REMOVE_PMEM_MAPPING(&plp->addr,
                                 sizeof (struct pmemlog) -
                                 sizeof (struct pool_hdr) -
                                 3 * sizeof (uint64_t));

    /*
     * Use some of the memory pool area for run-time info.  This
     * run-time state is never loaded from the file, it is always
     * created here, so no need to worry about byte-order.
     */
    plp->addr = addr;
    plp->size = poolsize;
    plp->rdonly = rdonly;
    plp->is_pmem = is_pmem;

    if ((plp->rwlockp = Malloc(sizeof (*plp->rwlockp))) == NULL) {
        ERR("!Malloc for a RW lock");
        goto err;
    }

    if ((errno = pthread_rwlock_init(plp->rwlockp, NULL))) {
        ERR("!pthread_rwlock_init");
        goto err_free;
    }

    /*
     * If possible, turn off all permissions on the pool header page.
     *
     * The prototype PMFS doesn't allow this when large pages are in
     * use. It is not considered an error if this fails.
     */
    util_range_none(addr, sizeof (struct pool_hdr));

    /* the rest should be kept read-only (debug version only) */
    RANGE_RO(addr + sizeof (struct pool_hdr),
             poolsize - sizeof (struct pool_hdr));

    LOG(3, "plp %p", plp);
    return plp;

err_free:
    Free((void *)plp->rwlockp);
err:
    LOG(4, "error clean up");
    int oerrno = errno;
    VALGRIND_REMOVE_PMEM_MAPPING(addr, poolsize);
    util_unmap(addr, poolsize);
    errno = oerrno;
    return NULL;
}