Esempio n. 1
0
/*
 * worker -- the work each thread performs
 */
static void *
worker(void *arg)
{
    int *ret = (int *)arg;
    *ret =  pmem_is_pmem(Addr, Size);
    return NULL;
}
Esempio n. 2
0
int
main(int argc, char *argv[])
{
	START(argc, argv, "pmem_is_pmem_posix");

	if (argc < 4)
		UT_FATAL("usage: %s op addr len type [op addr len type ...]",
				argv[0]);

	/* insert memory regions to the list */
	int i;
	for (i = 1; i < argc; ) {
		UT_ASSERT(i + 2 < argc);

		errno = 0;
		void *addr = (void *)strtoull(argv[i + 1], NULL, 0);
		UT_ASSERTeq(errno, 0);

		size_t len = strtoull(argv[i + 2], NULL, 0);
		UT_ASSERTeq(errno, 0);

		int ret;

		switch (argv[i][0]) {
		case 'a':
			ret = util_range_register(addr, len, "",
					str2type(argv[i + 3]));
			if (ret != 0)
				UT_OUT("%s", pmem_errormsg());
			i += 4;
			break;
		case 'r':
			ret = util_range_unregister(addr, len);
			UT_ASSERTeq(ret, 0);
			i += 3;
			break;
		case 't':
			UT_OUT("addr %p len %zu is_pmem %d",
					addr, len, pmem_is_pmem(addr, len));
			i += 3;
			break;
		case 'f':
			do_fault_injection_register(addr, len,
					str2type(argv[i + 3]));
			i += 4;
			break;
		case 's':
			do_fault_injection_split(addr, len);
			i += 3;
			break;
		default:
			FATAL("invalid op '%c'", argv[i][0]);
		}
	}

	DONE(NULL);
}
Esempio n. 3
0
int
main(int argc, char *argv[])
{
	int srcfd;
	int dstfd;
	struct stat stbuf;
	char *pmemaddr;

	if (argc != 3) {
		fprintf(stderr, "usage: %s src-file dst-file\n", argv[0]);
		exit(1);
	}

	/* open src-file */
	if ((srcfd = open(argv[1], O_RDONLY)) < 0) {
		perror(argv[1]);
		exit(1);
	}

	/* create a pmem file */
	if ((dstfd = open(argv[2], O_CREAT|O_EXCL|O_RDWR, 0666)) < 0) {
		perror(argv[2]);
		exit(1);
	}

	/* find the size of the src-file */
	if (fstat(srcfd, &stbuf) < 0) {
		perror("fstat");
		exit(1);
	}

	/* allocate the pmem */
	if ((errno = posix_fallocate(dstfd, 0, stbuf.st_size)) != 0) {
		perror("posix_fallocate");
		exit(1);
	}

	/* memory map it */
	if ((pmemaddr = pmem_map(dstfd)) == NULL) {
		perror("pmem_map");
		exit(1);
	}
	close(dstfd);

	/* determine if range is true pmem, call appropriate copy routine */
	if (pmem_is_pmem(pmemaddr, stbuf.st_size))
		do_copy_to_pmem(pmemaddr, srcfd, stbuf.st_size);
	else
		do_copy_to_non_pmem(pmemaddr, srcfd, stbuf.st_size);

	close(srcfd);

	exit(0);
}
Esempio n. 4
0
int
main(int argc, char *argv[])
{
	int fd;
	struct stat stbuf;
	char *dest;

	START(argc, argv, "pmem_valgr_simple");

	if (argc != 4)
		FATAL("usage: %s file offset length", argv[0]);

	fd = OPEN(argv[1], O_RDWR);
	int dest_off = atoi(argv[2]);
	size_t bytes = strtoul(argv[3], NULL, 0);

	FSTAT(fd, &stbuf);

	dest = pmem_map(fd);
	if (dest == NULL)
		FATAL("!Could not mmap %s\n", argv[1]);

	/* these will not be made persistent */
	*(int *)dest = 4;

	/* this will be made persistent */
	uint64_t *tmp64dst = (void *)((uintptr_t)dest + 4096);
	*tmp64dst = 50;

	if (pmem_is_pmem(dest, sizeof (*tmp64dst))) {
		pmem_persist(tmp64dst, sizeof (*tmp64dst));
	} else {
		pmem_msync(tmp64dst, sizeof (*tmp64dst));
	}

	uint16_t *tmp16dst = (void *)((uintptr_t)dest + 1024);
	*tmp16dst = 21;
	/* will appear as flushed in valgrind log */
	pmem_flush(tmp16dst, sizeof (*tmp16dst));

	/* shows strange behavior of memset in some cases */
	memset(dest + dest_off, 0, bytes);

	pmem_unmap(dest, stbuf.st_size);

	CLOSE(fd);

	DONE(NULL);
}
Esempio n. 5
0
PMEMobjpool *
pmemobj_open_mock(const char *fname)
{
	int fd = open(fname, O_RDWR);
	if (fd == -1) {
		OUT("!%s: open", fname);

		return NULL;
	}

	struct stat stbuf;
	if (fstat(fd, &stbuf) < 0) {
		OUT("!fstat");

		return NULL;
	}

	ASSERT(stbuf.st_size > PMEMOBJ_POOL_HDR_SIZE);

	void *addr = pmem_map(fd);
	if (!addr) {
		OUT("!%s: pmem_map", fname);

		return NULL;
	}

	close(fd);

	PMEMobjpool *pop = (PMEMobjpool *)addr;
	VALGRIND_REMOVE_PMEM_MAPPING(addr + sizeof (pop->hdr), 4096);
	pop->addr = addr;
	pop->size = stbuf.st_size;
	pop->is_pmem = pmem_is_pmem(addr, stbuf.st_size);
	pop->rdonly = 0;

	if (pop->is_pmem) {
		pop->persist = pmem_persist;
		pop->flush = pmem_flush;
		pop->drain = pmem_drain;
	} else {
		pop->persist = (persist_fn)pmem_msync;
		pop->flush = (persist_fn)pmem_msync;
		pop->drain = pmem_drain_nop;
	}

	return pop;
}
Esempio n. 6
0
int
main(int argc, char *argv[])
{
    START(argc, argv, "pmem_is_pmem");

    if (argc <  2 || argc > 3)
        UT_FATAL("usage: %s file [env]", argv[0]);

    if (argc == 3)
        UT_ASSERTeq(setenv("PMEM_IS_PMEM_FORCE", argv[2], 1), 0);

    int fd = OPEN(argv[1], O_RDWR);

    ut_util_stat_t stbuf;
    FSTAT(fd, &stbuf);

    Size = stbuf.st_size;
    Addr = MMAP(0, stbuf.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

    CLOSE(fd);

    pthread_t threads[NTHREAD];
    int ret[NTHREAD];

    /* kick off NTHREAD threads */
    for (int i = 0; i < NTHREAD; i++)
        PTHREAD_CREATE(&threads[i], NULL, worker, &ret[i]);

    /* wait for all the threads to complete */
    for (int i = 0; i < NTHREAD; i++)
        PTHREAD_JOIN(threads[i], NULL);

    /* verify that all the threads return the same value */
    for (int i = 1; i < NTHREAD; i++)
        UT_ASSERTeq(ret[0], ret[i]);

    UT_OUT("%d", ret[0]);

    UT_ASSERTeq(unsetenv("PMEM_IS_PMEM_FORCE"), 0);

    UT_OUT("%d", pmem_is_pmem(Addr, Size));

    DONE(NULL);
}
Esempio n. 7
0
int
main(int argc, char *argv[])
{
	START(argc, argv, "pmem_is_pmem_linux");

	if (argc < 3)
		UT_FATAL("usage: %s op addr len [op addr len ...]",
				argv[0]);

	/* insert memory regions to the list */
	int i;
	for (i = 1; i < argc; i += 3) {
		UT_ASSERT(i + 2 < argc);

		errno = 0;
		void *addr = (void *)strtoull(argv[i + 1], NULL, 0);
		UT_ASSERTeq(errno, 0);

		size_t len = strtoull(argv[i + 2], NULL, 0);
		UT_ASSERTeq(errno, 0);

		int ret;

		switch (argv[i][0]) {
		case 'a':
			ret = util_range_register(addr, len);
			UT_ASSERTeq(ret, 0);
			break;
		case 'r':
			ret = util_range_unregister(addr, len);
			UT_ASSERTeq(ret, 0);
			break;
		case 't':
			UT_OUT("addr %p len %zu is_pmem %d",
					addr, len, pmem_is_pmem(addr, len));
			break;
		default:
			FATAL("invalid op");
		}
	}

	DONE(NULL);
}
Esempio n. 8
0
/*
 * is_pmem -- checks if given path points to pmem-aware filesystem
 */
static int
is_pmem(const char *path)
{
	int ret;

	void *addr = util_map_tmpfile(path, SIZE, 0);
	if (addr == NULL) {
		fprintf(stderr, "file creation failed\n");
		return -1;
	}

	if (pmem_is_pmem(addr, SIZE))
		ret = 1;
	else
		ret = 0;

	util_unmap(addr, SIZE);

	return ret;
}
Esempio n. 9
0
int
main(int argc, char *argv[])
{
	START(argc, argv, "pmem_is_pmem_proc_linux");

	if (argc < 5)
		UT_FATAL("usage: %s nfiles file.. nregions "
			"(addr len)... (addr len)...", argv[0]);

	Nfiles = atoi(argv[1]);
	UT_ASSERT(Nfiles < MAX_FILES);

	for (int i = 0; i < Nfiles; i++) {
		Sfile[i] = argv[2 + i];
	}

	Nregions = atoi(argv[2 + Nfiles]);
	UT_ASSERT(Nregions < MAX_REGIONS);
	for (int i = 0; i < Nregions; i += 2) {
		char *str_addr = argv[3 + Nfiles + i + 0];
		char *str_len  = argv[3 + Nfiles + i + 1];

		Mincore[i].addr = (uintptr_t)strtoull(str_addr, NULL, 16);
		Mincore[i].len = (size_t)strtoull(str_len, NULL, 10);
	}

	for (int arg = 2 + Nfiles + 1 + 2 * Nregions; arg < argc; arg += 2) {
		void *addr;
		size_t len;

		addr = (void *)strtoull(argv[arg], NULL, 16);
		len = (size_t)strtoull(argv[arg + 1], NULL, 10);

		Curfile = 0;

		UT_OUT("addr %p, len %zu: %d", addr, len,
				pmem_is_pmem(addr, len));
	}

	DONE(NULL);
}
Esempio n. 10
0
int
main(int argc, char *argv[])
{
	START(argc, argv, "pmem_is_pmem_proc");

	if (argc < 4 || argc % 2)
		FATAL("usage: %s file addr len [addr len]...", argv[0]);

	Sfile = argv[1];

	for (int arg = 2; arg < argc; arg += 2) {
		void *addr;
		size_t len;

		addr = (void *)strtoull(argv[arg], NULL, 16);
		len = (size_t)strtoull(argv[arg + 1], NULL, 10);
		OUT("addr %p, len %zu: %d", addr, len, pmem_is_pmem(addr, len));
	}

	DONE(NULL);
}
Esempio n. 11
0
int
main(int argc, char *argv[])
{
	START(argc, argv, "pmem_map_file");

	int fd;
	void *addr;
	size_t mlen;
	size_t *mlenp;
	const char *path;
	unsigned long long len;
	int flags;
	unsigned mode;
	int is_pmem;
	int *is_pmemp;
	int use_mlen;
	int use_is_pmem;
	int err_code;

	if (argc < 8)
		UT_FATAL("usage: %s path len flags mode use_mlen "
				"use_is_pmem err_code...", argv[0]);

	for (int i = 1; i + 6 < argc; i += 7) {
		path = argv[i];
		len = strtoull(argv[i + 1], NULL, 0);
		flags = parse_flags(argv[i + 2]);
		mode = STRTOU(argv[i + 3], NULL, 8);
		use_mlen = atoi(argv[i + 4]);
		use_is_pmem = atoi(argv[i + 5]);
		err_code = parse_err_code(argv[i + 6]);

		mlen = SIZE_MAX;
		if (use_mlen)
			mlenp = &mlen;
		else
			mlenp = NULL;

		if (use_is_pmem)
			is_pmemp = &is_pmem;
		else
			is_pmemp = NULL;

		UT_OUT("%s %lld %s %o %d %d %d",
			path, len, argv[i + 2], mode, use_mlen,
			use_is_pmem, err_code);

		addr = pmem_map_file(path, len, flags, mode, mlenp, is_pmemp);

		if (err_code != 0) {
			UT_ASSERTeq(errno, err_code);
		}

		if (addr == NULL) {
			UT_OUT("!pmem_map_file");
			continue;
		}

		if (use_mlen) {
			UT_ASSERTne(mlen, SIZE_MAX);
			UT_OUT("mapped_len %zu", mlen);
		} else {
			mlen = len;
		}

		if (addr) {
			/* is_pmem must be true for device DAX */
			int is_pmem_check = pmem_is_pmem(addr, mlen);
			UT_ASSERT(!is_dev_dax || is_pmem_check);

			/* check is_pmem returned from pmem_map_file */
			if (use_is_pmem)
				UT_ASSERTeq(is_pmem, is_pmem_check);

			if ((flags & PMEM_FILE_TMPFILE) == 0 && !is_dev_dax) {
				fd = OPEN(argv[i], O_RDWR);

				if (!use_mlen) {
					os_stat_t stbuf;
					FSTAT(fd, &stbuf);
					mlen = (size_t)stbuf.st_size;
				}

				if (fd != -1) {
					do_check(fd, addr, mlen);
					(void) CLOSE(fd);
				} else {
					UT_OUT("!cannot open file: %s",
							argv[i]);
				}
			} else {
				UT_ASSERTeq(pmem_unmap(addr, mlen), 0);
			}
		}
	}

	DONE(NULL);
}
Esempio n. 12
0
int
main(int argc, char *argv[])
{
	int srcfd;
	int dstfd;
	char buf[BUF_LEN];
	char *pmemaddr;
	int is_pmem;
	int cc;

	if (argc != 3) {
		fprintf(stderr, "usage: %s src-file dst-file\n", argv[0]);
		exit(1);
	}

	/* open src-file */
	if ((srcfd = open(argv[1], O_RDONLY)) < 0) {
		perror(argv[1]);
		exit(1);
	}

	/* create a pmem file */
	if ((dstfd = open(argv[2], O_CREAT|O_EXCL|O_RDWR, 0666)) < 0) {
		perror(argv[2]);
		exit(1);
	}

	/* allocate the pmem */
	if ((errno = posix_fallocate(dstfd, 0, BUF_LEN)) != 0) {
		perror("posix_fallocate");
		exit(1);
	}

	/* memory map it */
	if ((pmemaddr = pmem_map(dstfd)) == NULL) {
		perror("pmem_map");
		exit(1);
	}
	close(dstfd);

	/* determine if range is true pmem */
	is_pmem = pmem_is_pmem(pmemaddr, BUF_LEN);

	/* read up to BUF_LEN from srcfd */
	if ((cc = read(srcfd, buf, BUF_LEN)) < 0) {
		perror("read");
		exit(1);
	}

	/* write it to the pmem */
	if (is_pmem) {
		pmem_memcpy(pmemaddr, buf, cc);
	} else {
		memcpy(pmemaddr, buf, cc);
		pmem_msync(pmemaddr, cc);
	}

	close(srcfd);

	exit(0);
}
Esempio n. 13
0
int
main(int argc, char *argv[])
{
	char *path;
	size_t data_size, pool_size;
	int iterations;
	int is_pmem;
	int fd;
	uint8_t *pool;
	double exec_time_pmem_persist, exec_time_pmem_msync;

	if (argc < 4) {
		printf("Usage %s <file_name> <data_size> <iterations>\n",
			argv[0]);
		return 0;
	}

	path = argv[1];
	data_size = atoi(argv[2]);
	iterations = atoi(argv[3]);
	pool_size = data_size * iterations;

	if ((fd = open(path, O_RDWR|O_CREAT|O_EXCL, S_IRWXU)) < 0) {
		perror("open");
		return -1;
	}

	if ((errno = posix_fallocate(fd, 0, pool_size)) != 0) {
		perror("posix_fallocate");
		goto err;
	}

	if ((pool = pmem_map(fd)) == NULL) {
		perror("pmem_map");
		goto err;
	}
	close(fd);

	/* check if range is true pmem */
	is_pmem = pmem_is_pmem(pool, pool_size);
	if (is_pmem) {
		/* benchmarking pmem_persist */
		exec_time_pmem_persist = benchmark_func(pmem_persist, pool,
						data_size, iterations);
	} else {
		fprintf(stderr, "Notice: pmem_persist is not benchmarked,"
			" because given file (%s) is not in Persistent Memory"
			" aware file system.\n", path);
		exec_time_pmem_persist = 0.0;
	}

	/* benchmarking pmem_msync */
	exec_time_pmem_msync = benchmark_func((persist_fn)pmem_msync, pool,
						data_size, iterations);

	printf("%zu;%e;%zu;%e;%zu;%e\n",
		data_size, exec_time_pmem_persist,
		data_size, exec_time_pmem_msync,
		data_size, exec_time_pmem_persist / exec_time_pmem_msync);

	return 0;

err:
	close(fd);
	unlink(path);
	return -1;
}
Esempio n. 14
0
File: set.c Progetto: tgockel/nvml
/*
 * util_replica_open -- (internal) open a memory pool replica
 */
static int
util_replica_open(struct pool_set *set, unsigned repidx, int flags,
	size_t hdrsize)
{
	LOG(3, "set %p repidx %u flags %d hdrsize %zu\n",
		set, repidx, flags, hdrsize);

	struct pool_replica *rep = set->replica[repidx];

	rep->repsize -= (rep->nparts - 1) * hdrsize;

	/* determine a hint address for mmap() */
	void *addr = util_map_hint(rep->repsize); /* XXX - randomize */
	if (addr == NULL) {
		ERR("cannot find a contiguous region of given size");
		return -1;
	}

	/* map the first part and reserve space for remaining parts */
	if (util_map_part(&rep->part[0], addr, rep->repsize, 0, flags) != 0) {
		LOG(2, "pool mapping failed - part #0");
		return -1;
	}

	VALGRIND_REGISTER_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size);
	VALGRIND_REGISTER_PMEM_FILE(rep->part[0].fd,
				rep->part[0].addr, rep->part[0].size, 0);

	/* map all headers - don't care about the address */
	for (unsigned p = 0; p < rep->nparts; p++) {
		if (util_map_hdr(&rep->part[p],
				hdrsize, 0, flags) != 0) {
			LOG(2, "header mapping failed - part #%d", p);
			goto err;
		}
	}

	size_t mapsize = rep->part[0].filesize & ~(Pagesize - 1);
	addr = (char *)rep->part[0].addr + mapsize;

	/*
	 * map the remaining parts of the usable pool space
	 * (4K-aligned)
	 */
	for (unsigned p = 1; p < rep->nparts; p++) {
		/* map data part */
		if (util_map_part(&rep->part[p], addr, 0, hdrsize,
				flags | MAP_FIXED) != 0) {
			LOG(2, "usable space mapping failed - part #%d", p);
			goto err;
		}

		VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd,
			rep->part[p].addr, rep->part[p].size, hdrsize);

		mapsize += rep->part[p].size;
		addr = (char *)addr + rep->part[p].size;
	}

	rep->is_pmem = pmem_is_pmem(rep->part[0].addr, rep->part[0].size);

	ASSERTeq(mapsize, rep->repsize);

	/* calculate pool size - choose the smallest replica size */
	if (rep->repsize < set->poolsize)
		set->poolsize = rep->repsize;

	LOG(3, "replica addr %p", rep->part[0].addr);

	return 0;
err:
	LOG(4, "error clean up");
	int oerrno = errno;
	for (unsigned p = 0; p < rep->nparts; p++)
		util_unmap_hdr(&rep->part[p]);
	util_unmap_part(&rep->part[0]);
	errno = oerrno;
	return -1;
}
Esempio n. 15
0
File: blk.c Progetto: mdalecki/nvml
/*
 * pmemblk_map_common -- (internal) map a block memory pool
 *
 * This routine does all the work, but takes a rdonly flag so internal
 * calls can map a read-only pool if required.
 *
 * Passing in bsize == 0 means a valid pool header must exist (which
 * will supply the block size).
 */
static PMEMblk *
pmemblk_map_common(int fd, size_t bsize, int rdonly)
{
	LOG(3, "fd %d bsize %zu rdonly %d", fd, bsize, rdonly);

	/* things free by "goto err" if not NULL */
	void *addr = NULL;
	struct btt *bttp = NULL;
	pthread_mutex_t *locks = NULL;

	struct stat stbuf;
	if (fstat(fd, &stbuf) < 0) {
		LOG(1, "!fstat");
		return NULL;
	}

	if (stbuf.st_size < PMEMBLK_MIN_POOL) {
		LOG(1, "size %zu smaller than %zu",
				stbuf.st_size, PMEMBLK_MIN_POOL);
		errno = EINVAL;
		return NULL;
	}

	if ((addr = util_map(fd, stbuf.st_size, rdonly)) == NULL)
		return NULL;	/* util_map() set errno, called LOG */

	/* check if the mapped region is located in persistent memory */
	int is_pmem = pmem_is_pmem(addr, stbuf.st_size);

	/* opaque info lives at the beginning of mapped memory pool */
	struct pmemblk *pbp = addr;

	struct pool_hdr hdr;
	memcpy(&hdr, &pbp->hdr, sizeof (hdr));

	if (util_convert_hdr(&hdr)) {
		/*
		 * valid header found
		 */
		if (strncmp(hdr.signature, BLK_HDR_SIG, POOL_HDR_SIG_LEN)) {
			LOG(1, "wrong pool type: \"%s\"", hdr.signature);

			errno = EINVAL;
			goto err;
		}

		if (hdr.major != BLK_FORMAT_MAJOR) {
			LOG(1, "blk pool version %d (library expects %d)",
				hdr.major, BLK_FORMAT_MAJOR);

			errno = EINVAL;
			goto err;
		}

		size_t hdr_bsize = le32toh(pbp->bsize);
		if (bsize && bsize != hdr_bsize) {
			LOG(1, "wrong bsize (%zu), pool created with bsize %zu",
					bsize, hdr_bsize);
			errno = EINVAL;
			goto err;
		}
		bsize = hdr_bsize;
		LOG(3, "using block size from header: %zu", bsize);

		int retval = util_feature_check(&hdr, BLK_FORMAT_INCOMPAT,
							BLK_FORMAT_RO_COMPAT,
							BLK_FORMAT_COMPAT);
		if (retval < 0)
		    goto err;
		else if (retval == 0)
		    rdonly = 1;
	} else {
		/*
		 * no valid header was found
		 */
		if (rdonly) {
			LOG(1, "read-only and no header found");
			errno = EROFS;
			goto err;
		}
		LOG(3, "creating new blk memory pool");

		struct pool_hdr *hdrp = &pbp->hdr;

		memset(hdrp, '\0', sizeof (*hdrp));
		strncpy(hdrp->signature, BLK_HDR_SIG, POOL_HDR_SIG_LEN);
		hdrp->major = htole32(BLK_FORMAT_MAJOR);
		hdrp->compat_features = htole32(BLK_FORMAT_COMPAT);
		hdrp->incompat_features = htole32(BLK_FORMAT_INCOMPAT);
		hdrp->ro_compat_features = htole32(BLK_FORMAT_RO_COMPAT);
		uuid_generate(hdrp->uuid);
		hdrp->crtime = htole64((uint64_t)time(NULL));
		util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1);
		hdrp->checksum = htole64(hdrp->checksum);

		/* store pool's header */
		libpmem_persist(is_pmem, hdrp, sizeof (*hdrp));

		/* create rest of required metadata */
		pbp->bsize = htole32(bsize);
		libpmem_persist(is_pmem, &pbp->bsize, sizeof (bsize));
	}

	/*
	 * Use some of the memory pool area for run-time info.  This
	 * run-time state is never loaded from the file, it is always
	 * created here, so no need to worry about byte-order.
	 */
	pbp->addr = addr;
	pbp->size = stbuf.st_size;
	pbp->rdonly = rdonly;
	pbp->is_pmem = is_pmem;
	pbp->data = addr + roundup(sizeof (*pbp), BLK_FORMAT_DATA_ALIGN);
	pbp->datasize = (pbp->addr + pbp->size) - pbp->data;

	LOG(4, "data area %p data size %zu bsize %zu",
		pbp->data, pbp->datasize, bsize);

	int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
	if (ncpus < 1)
		ncpus = 1;

	bttp = btt_init(pbp->datasize, (uint32_t)bsize, pbp->hdr.uuid,
			ncpus, pbp, &ns_cb);

	if (bttp == NULL)
		goto err;	/* btt_init set errno, called LOG */

	pbp->bttp = bttp;

	pbp->nlane = btt_nlane(pbp->bttp);
	pbp->next_lane = 0;
	if ((locks = Malloc(pbp->nlane * sizeof (*locks))) == NULL) {
		LOG(1, "!Malloc for lane locks");
		goto err;
	}

	for (int i = 0; i < pbp->nlane; i++)
		if (pthread_mutex_init(&locks[i], NULL) < 0) {
			LOG(1, "!pthread_mutex_init");
			goto err;
		}

	pbp->locks = locks;

#ifdef DEBUG
	/* initialize debug lock */
	if (pthread_mutex_init(&pbp->write_lock, NULL) < 0) {
		LOG(1, "!pthread_mutex_init");
		goto err;
	}
#endif

	/*
	 * If possible, turn off all permissions on the pool header page.
	 *
	 * The prototype PMFS doesn't allow this when large pages are in
	 * use not it is not considered an error if this fails.
	 */
	util_range_none(addr, sizeof (struct pool_hdr));

	/* the data area should be kept read-only for debug version */
	RANGE_RO(pbp->data, pbp->datasize);

	LOG(3, "pbp %p", pbp);
	return pbp;

err:
	LOG(4, "error clean up");
	int oerrno = errno;
	if (locks)
		Free((void *)locks);
	if (bttp)
		btt_fini(bttp);
	util_unmap(addr, stbuf.st_size);
	errno = oerrno;
	return NULL;
}
Esempio n. 16
0
StorageManager::StorageManager()
: data_file_address(nullptr),
  is_pmem(false),
  data_file_len(0),
  data_file_offset(0) {
  // Check if we need a data pool
  if (IsBasedOnWriteAheadLogging(peloton_logging_mode) == true ||
      peloton_logging_mode == LOGGING_TYPE_INVALID) {
    return;
  }

  int data_fd;
  std::string data_file_name;
  struct stat data_stat;

  // Initialize file size
  if (peloton_data_file_size != 0)
    data_file_len = peloton_data_file_size * 1024 * 1024;  // MB
  else
    data_file_len = DATA_FILE_LEN;

  // Check for relevant file system
  bool found_file_system = false;

  switch (peloton_logging_mode) {
    // Check for NVM FS for data
    case LOGGING_TYPE_NVM_NVM:
    case LOGGING_TYPE_NVM_HDD:
    case LOGGING_TYPE_NVM_SSD: {
      int status = stat(NVM_DIR, &data_stat);
      if (status == 0 && S_ISDIR(data_stat.st_mode)) {
        data_file_name = std::string(NVM_DIR) + std::string(DATA_FILE_NAME);
        found_file_system = true;
      }

    } break;

    // Check for SSD FS
    case LOGGING_TYPE_SSD_NVM:
    case LOGGING_TYPE_SSD_SSD:
    case LOGGING_TYPE_SSD_HDD: {
      int status = stat(SSD_DIR, &data_stat);
      if (status == 0 && S_ISDIR(data_stat.st_mode)) {
        data_file_name = std::string(SSD_DIR) + std::string(DATA_FILE_NAME);
        found_file_system = true;
      }

    } break;

    // Check for HDD FS
    case LOGGING_TYPE_HDD_NVM:
    case LOGGING_TYPE_HDD_SSD:
    case LOGGING_TYPE_HDD_HDD: {
      int status = stat(HDD_DIR, &data_stat);
      if (status == 0 && S_ISDIR(data_stat.st_mode)) {
        data_file_name = std::string(HDD_DIR) + std::string(DATA_FILE_NAME);
        found_file_system = true;
      }

    } break;

    default:
      break;
  }

  // Fallback to tmp directory if needed
  if (found_file_system == false) {
    int status = stat(TMP_DIR, &data_stat);
    if (status == 0 && S_ISDIR(data_stat.st_mode)) {
      data_file_name = std::string(TMP_DIR) + std::string(DATA_FILE_NAME);
    }
    else {
      throw Exception("Could not find temp directory : " + std::string(TMP_DIR));
    }
  }

  LOG_INFO("DATA DIR :: %s ", data_file_name.c_str());

  // TODO:
  std::cout << "Data path :: " << data_file_name << "\n";

  // Create a data file
  if ((data_fd = open(data_file_name.c_str(), O_CREAT | O_RDWR, 0666)) < 0) {
    perror(data_file_name.c_str());
    exit(EXIT_FAILURE);
  }

  // Allocate the data file
  if ((errno = posix_fallocate(data_fd, 0, data_file_len)) != 0) {
    perror("posix_fallocate");
    exit(EXIT_FAILURE);
  }

  // memory map the data file
  if ((data_file_address = reinterpret_cast<char *>(pmem_map(data_fd))) ==
      NULL) {
    perror("pmem_map");
    exit(EXIT_FAILURE);
  }

  // true only if the entire range [addr, addr+len) consists of persistent
  // memory
  is_pmem = pmem_is_pmem(data_file_address, data_file_len);

  // close the pmem file -- it will remain mapped
  close(data_fd);
}
Esempio n. 17
0
File: log.c Progetto: harrybaa/nvml
/*
 * pmemlog_map_common -- (internal) map a log memory pool
 *
 * This routine does all the work, but takes a rdonly flag so internal
 * calls can map a read-only pool if required.
 *
 * If empty flag is set, the file is assumed to be a new memory pool, and
 * a new pool header is created.  Otherwise, a valid header must exist.
 */
static PMEMlogpool *
pmemlog_map_common(int fd, size_t poolsize, int rdonly, int empty)
{
    LOG(3, "fd %d poolsize %zu rdonly %d empty %d",
        fd, poolsize, rdonly, empty);

    void *addr;
    if ((addr = util_map(fd, poolsize, rdonly)) == NULL) {
        (void) close(fd);
        return NULL;	/* util_map() set errno, called LOG */
    }

    VALGRIND_REGISTER_PMEM_MAPPING(addr, poolsize);
    VALGRIND_REGISTER_PMEM_FILE(fd, addr, poolsize, 0);

    (void) close(fd);

    /* check if the mapped region is located in persistent memory */
    int is_pmem = pmem_is_pmem(addr, poolsize);

    /* opaque info lives at the beginning of mapped memory pool */
    struct pmemlog *plp = addr;

    if (!empty) {
        struct pool_hdr hdr;

        memcpy(&hdr, &plp->hdr, sizeof (hdr));

        if (!util_convert_hdr(&hdr)) {
            errno = EINVAL;
            goto err;
        }

        /*
         * valid header found
         */
        if (strncmp(hdr.signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN)) {
            ERR("wrong pool type: \"%s\"", hdr.signature);
            errno = EINVAL;
            goto err;
        }

        if (hdr.major != LOG_FORMAT_MAJOR) {
            ERR("log pool version %d (library expects %d)",
                hdr.major, LOG_FORMAT_MAJOR);
            errno = EINVAL;
            goto err;
        }

        /* XXX - pools sets / replicas */
        if (memcmp(hdr.uuid, hdr.prev_part_uuid, POOL_HDR_UUID_LEN) ||
                memcmp(hdr.uuid, hdr.next_part_uuid, POOL_HDR_UUID_LEN) ||
                memcmp(hdr.uuid, hdr.prev_repl_uuid, POOL_HDR_UUID_LEN) ||
                memcmp(hdr.uuid, hdr.next_repl_uuid, POOL_HDR_UUID_LEN)) {
            ERR("wrong UUID");
            errno = EINVAL;
            goto err;
        }

        uint64_t hdr_start = le64toh(plp->start_offset);
        uint64_t hdr_end = le64toh(plp->end_offset);
        uint64_t hdr_write = le64toh(plp->write_offset);

        if ((hdr_start != roundup(sizeof (*plp),
                                  LOG_FORMAT_DATA_ALIGN)) ||
                (hdr_end != poolsize) || (hdr_start > hdr_end)) {
            ERR("wrong start/end offsets (start: %ju end: %ju), "
                "pool size %zu",
                hdr_start, hdr_end, poolsize);
            errno = EINVAL;
            goto err;
        }

        if ((hdr_write > hdr_end) || (hdr_write < hdr_start)) {
            ERR("wrong write offset "
                "(start: %ju end: %ju write: %ju)",
                hdr_start, hdr_end, hdr_write);
            errno = EINVAL;
            goto err;
        }

        LOG(3, "start: %ju, end: %ju, write: %ju",
            hdr_start, hdr_end, hdr_write);

        int retval = util_feature_check(&hdr, LOG_FORMAT_INCOMPAT,
                                        LOG_FORMAT_RO_COMPAT,
                                        LOG_FORMAT_COMPAT);
        if (retval < 0)
            goto err;
        else if (retval == 0)
            rdonly = 1;
    } else {
        LOG(3, "creating new log memory pool");

        ASSERTeq(rdonly, 0);

        struct pool_hdr *hdrp = &plp->hdr;

        /* check if the pool header is all zero */
        if (!util_is_zeroed(hdrp, sizeof (*hdrp))) {
            ERR("Non-empty file detected");
            errno = EINVAL;
            goto err;
        }

        /* create required metadata first */
        plp->start_offset = htole64(roundup(sizeof (*plp),
                                            LOG_FORMAT_DATA_ALIGN));
        plp->end_offset = htole64(poolsize);
        plp->write_offset = plp->start_offset;

        /* store non-volatile part of pool's descriptor */
        pmem_msync(&plp->start_offset, 3 * sizeof (uint64_t));

        /* create pool header */
        strncpy(hdrp->signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN);
        hdrp->major = htole32(LOG_FORMAT_MAJOR);
        hdrp->compat_features = htole32(LOG_FORMAT_COMPAT);
        hdrp->incompat_features = htole32(LOG_FORMAT_INCOMPAT);
        hdrp->ro_compat_features = htole32(LOG_FORMAT_RO_COMPAT);
        uuid_generate(hdrp->uuid);
        /* XXX - pools sets / replicas */
        uuid_generate(hdrp->poolset_uuid);
        memcpy(hdrp->prev_part_uuid, hdrp->uuid, POOL_HDR_UUID_LEN);
        memcpy(hdrp->next_part_uuid, hdrp->uuid, POOL_HDR_UUID_LEN);
        memcpy(hdrp->prev_repl_uuid, hdrp->uuid, POOL_HDR_UUID_LEN);
        memcpy(hdrp->next_repl_uuid, hdrp->uuid, POOL_HDR_UUID_LEN);
        hdrp->crtime = htole64((uint64_t)time(NULL));

        if (util_get_arch_flags(&hdrp->arch_flags)) {
            ERR("Reading architecture flags failed\n");
            errno = EINVAL;
            goto err;
        }

        hdrp->arch_flags.alignment_desc =
            htole64(hdrp->arch_flags.alignment_desc);
        hdrp->arch_flags.e_machine =
            htole16(hdrp->arch_flags.e_machine);

        util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1);

        /* store pool's header */
        pmem_msync(hdrp, sizeof (*hdrp));
    }

    /* remove volatile part of header */
    VALGRIND_REMOVE_PMEM_MAPPING(&plp->addr,
                                 sizeof (struct pmemlog) -
                                 sizeof (struct pool_hdr) -
                                 3 * sizeof (uint64_t));

    /*
     * Use some of the memory pool area for run-time info.  This
     * run-time state is never loaded from the file, it is always
     * created here, so no need to worry about byte-order.
     */
    plp->addr = addr;
    plp->size = poolsize;
    plp->rdonly = rdonly;
    plp->is_pmem = is_pmem;

    if ((plp->rwlockp = Malloc(sizeof (*plp->rwlockp))) == NULL) {
        ERR("!Malloc for a RW lock");
        goto err;
    }

    if ((errno = pthread_rwlock_init(plp->rwlockp, NULL))) {
        ERR("!pthread_rwlock_init");
        goto err_free;
    }

    /*
     * If possible, turn off all permissions on the pool header page.
     *
     * The prototype PMFS doesn't allow this when large pages are in
     * use. It is not considered an error if this fails.
     */
    util_range_none(addr, sizeof (struct pool_hdr));

    /* the rest should be kept read-only (debug version only) */
    RANGE_RO(addr + sizeof (struct pool_hdr),
             poolsize - sizeof (struct pool_hdr));

    LOG(3, "plp %p", plp);
    return plp;

err_free:
    Free((void *)plp->rwlockp);
err:
    LOG(4, "error clean up");
    int oerrno = errno;
    VALGRIND_REMOVE_PMEM_MAPPING(addr, poolsize);
    util_unmap(addr, poolsize);
    errno = oerrno;
    return NULL;
}
Esempio n. 18
0
File: set.c Progetto: bgbhpe/nvml
/*
 * util_replica_create -- (internal) create a new memory pool replica
 */
static int
util_replica_create(struct pool_set *set, unsigned repidx, int flags,
	const char *sig, uint32_t major, uint32_t compat, uint32_t incompat,
	uint32_t ro_compat, const unsigned char *prev_repl_uuid,
	const unsigned char *next_repl_uuid, const unsigned char *arch_flags)
{
	LOG(3, "set %p repidx %u flags %d sig %.8s major %u "
		"compat %#x incompat %#x ro_comapt %#x"
		"prev_repl_uuid %p next_repl_uuid %p arch_flags %p",
		set, repidx, flags, sig, major,
		compat, incompat, ro_compat,
		prev_repl_uuid, next_repl_uuid, arch_flags);

	struct pool_replica *rep = set->replica[repidx];

	/* determine a hint address for mmap() */
	void *addr = util_map_hint(rep->repsize, 0);
	if (addr == MAP_FAILED) {
		ERR("cannot find a contiguous region of given size");
		return -1;
	}

	/* map the first part and reserve space for remaining parts */
	/* XXX investigate this idea of reserving space on Windows */
	if (util_map_part(&rep->part[0], addr, rep->repsize, 0, flags) != 0) {
		LOG(2, "pool mapping failed - part #0");
		return -1;
	}

	VALGRIND_REGISTER_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size);
	VALGRIND_REGISTER_PMEM_FILE(rep->part[0].fd,
				rep->part[0].addr, rep->part[0].size, 0);

	/* map all headers - don't care about the address */
	for (unsigned p = 0; p < rep->nparts; p++) {
		if (util_map_hdr(&rep->part[p], flags) != 0) {
			LOG(2, "header mapping failed - part #%d", p);
			goto err;
		}
	}

	/* create headers, set UUID's */
	for (unsigned p = 0; p < rep->nparts; p++) {
		if (util_header_create(set, repidx, p, sig, major,
				compat, incompat, ro_compat,
				prev_repl_uuid, next_repl_uuid,
				arch_flags) != 0) {
			LOG(2, "header creation failed - part #%d", p);
			goto err;
		}
	}

	/* unmap all headers */
	for (unsigned p = 0; p < rep->nparts; p++)
		util_unmap_hdr(&rep->part[p]);

	set->zeroed &= rep->part[0].created;

	size_t mapsize = rep->part[0].filesize & ~(Pagesize - 1);
	addr = (char *)rep->part[0].addr + mapsize;

	/*
	 * map the remaining parts of the usable pool space (4K-aligned)
	 */
	for (unsigned p = 1; p < rep->nparts; p++) {
		/* map data part */
		if (util_map_part(&rep->part[p], addr, 0, POOL_HDR_SIZE,
				flags | MAP_FIXED) != 0) {
			LOG(2, "usable space mapping failed - part #%d", p);
			goto err;
		}

		VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd,
			rep->part[p].addr, rep->part[p].size, POOL_HDR_SIZE);

		mapsize += rep->part[p].size;
		set->zeroed &= rep->part[p].created;
		addr = (char *)addr + rep->part[p].size;
	}

	rep->is_pmem = pmem_is_pmem(rep->part[0].addr, rep->part[0].size);

	ASSERTeq(mapsize, rep->repsize);

	LOG(3, "replica addr %p", rep->part[0].addr);

	return 0;

err:
	LOG(4, "error clean up");
	int oerrno = errno;
	for (unsigned p = 0; p < rep->nparts; p++)
		util_unmap_hdr(&rep->part[p]);
	util_unmap_part(&rep->part[0]);
	errno = oerrno;
	return -1;
}
Esempio n. 19
0
File: pool.c Progetto: krzycz/nvml
/*
 * pool_parse_params -- parse pool type, file size and block size
 */
static int
pool_params_parse(const PMEMpoolcheck *ppc, struct pool_params *params,
	int check)
{
	LOG(3, NULL);
	int is_btt = ppc->args.pool_type == PMEMPOOL_POOL_TYPE_BTT;

	params->type = POOL_TYPE_UNKNOWN;
	params->is_poolset = util_is_poolset_file(ppc->path) == 1;

	int fd = util_file_open(ppc->path, NULL, 0, O_RDONLY);
	if (fd < 0)
		return -1;

	int ret = 0;

	os_stat_t stat_buf;
	ret = os_fstat(fd, &stat_buf);
	if (ret)
		goto out_close;

	ASSERT(stat_buf.st_size >= 0);

	params->mode = stat_buf.st_mode;

	struct pool_set *set;
	void *addr;
	if (params->is_poolset) {
		/*
		 * Need to close the poolset because it will be opened with
		 * flock in the following instructions.
		 */
		os_close(fd);
		fd = -1;

		if (check) {
			if (pool_set_map(ppc->path, &set, 0))
				return -1;
		} else {
			ret = util_poolset_create_set(&set, ppc->path,
				0, 0, true);
			if (ret < 0) {
				LOG(2, "cannot open pool set -- '%s'",
					ppc->path);
				return -1;
			}
			if (set->remote) {
				ERR("poolsets with remote replicas are not "
					"supported");
				return -1;
			}
			if (util_pool_open_nocheck(set,
						POOL_OPEN_IGNORE_BAD_BLOCKS))
				return -1;
		}

		params->size = set->poolsize;
		addr = set->replica[0]->part[0].addr;

		/*
		 * XXX mprotect for device dax with length not aligned to its
		 * page granularity causes SIGBUS on the next page fault.
		 * The length argument of this call should be changed to
		 * set->poolsize once the kernel issue is solved.
		 */
		if (mprotect(addr, set->replica[0]->repsize,
			PROT_READ) < 0) {
			ERR("!mprotect");
			goto out_unmap;
		}
		params->is_dev_dax = set->replica[0]->part[0].is_dev_dax;
		params->is_pmem = set->replica[0]->is_pmem;
	} else if (is_btt) {
		params->size = (size_t)stat_buf.st_size;
#ifndef _WIN32
		if (params->mode & S_IFBLK)
			if (ioctl(fd, BLKGETSIZE64, &params->size)) {
				ERR("!ioctl");
				goto out_close;
			}
#endif
		addr = NULL;
	} else {
		enum file_type type = util_file_get_type(ppc->path);
		if (type < 0) {
			ret = -1;
			goto out_close;
		}

		ssize_t s = util_file_get_size(ppc->path);
		if (s < 0) {
			ret = -1;
			goto out_close;
		}
		params->size = (size_t)s;
		int map_sync;
		addr = util_map(fd, params->size, MAP_SHARED, 1, 0, &map_sync);
		if (addr == NULL) {
			ret = -1;
			goto out_close;
		}
		params->is_dev_dax = type == TYPE_DEVDAX;
		params->is_pmem = params->is_dev_dax || map_sync ||
			pmem_is_pmem(addr, params->size);
	}

	/* stop processing for BTT device */
	if (is_btt) {
		params->type = POOL_TYPE_BTT;
		params->is_part = false;
		goto out_close;
	}

	struct pool_hdr hdr;
	memcpy(&hdr, addr, sizeof(hdr));
	util_convert2h_hdr_nocheck(&hdr);
	pool_params_from_header(params, &hdr);

	if (ppc->args.pool_type != PMEMPOOL_POOL_TYPE_DETECT) {
		enum pool_type declared_type =
			pool_check_type_to_pool_type(ppc->args.pool_type);
		if ((params->type & ~declared_type) != 0) {
			ERR("declared pool type does not match");
			errno = EINVAL;
			ret = 1;
			goto out_unmap;
		}
	}

	if (params->type == POOL_TYPE_BLK) {
		struct pmemblk pbp;
		memcpy(&pbp, addr, sizeof(pbp));
		params->blk.bsize = le32toh(pbp.bsize);
	} else if (params->type == POOL_TYPE_OBJ) {
		struct pmemobjpool *pop = addr;
		memcpy(params->obj.layout, pop->layout,
			PMEMOBJ_MAX_LAYOUT);
	}

out_unmap:
	if (params->is_poolset) {
		ASSERTeq(fd, -1);
		ASSERTne(addr, NULL);
		util_poolset_close(set, DO_NOT_DELETE_PARTS);
	} else if (!is_btt) {
		ASSERTne(fd, -1);
		ASSERTne(addr, NULL);
		munmap(addr, params->size);
	}
out_close:
	if (fd != -1)
		os_close(fd);
	return ret;
}
Esempio n. 20
0
File: pmem.c Progetto: ldorau/nvml
static inline
#endif
void *
pmem_map_fileU(const char *path, size_t len, int flags,
	mode_t mode, size_t *mapped_lenp, int *is_pmemp)
{
	LOG(3, "path \"%s\" size %zu flags %x mode %o mapped_lenp %p "
		"is_pmemp %p", path, len, flags, mode, mapped_lenp, is_pmemp);

	int oerrno;
	int fd;
	int open_flags = O_RDWR;
	int delete_on_err = 0;
	int file_type = util_file_get_type(path);
#ifdef _WIN32
	open_flags |= O_BINARY;
#endif

	if (file_type == OTHER_ERROR)
		return NULL;

	if (flags & ~(PMEM_FILE_ALL_FLAGS)) {
		ERR("invalid flag specified %x", flags);
		errno = EINVAL;
		return NULL;
	}

	if (file_type == TYPE_DEVDAX) {
		if (flags & ~(PMEM_DAX_VALID_FLAGS)) {
			ERR("flag unsupported for Device DAX %x", flags);
			errno = EINVAL;
			return NULL;
		} else {
			/* we are ignoring all of the flags */
			flags = 0;
			ssize_t actual_len = util_file_get_size(path);
			if (actual_len < 0) {
				ERR("unable to read Device DAX size");
				errno = EINVAL;
				return NULL;
			}
			if (len != 0 && len != (size_t)actual_len) {
				ERR("Device DAX length must be either 0 or "
					"the exact size of the device: %zu",
					actual_len);
				errno = EINVAL;
				return NULL;
			}
			len = 0;
		}
	}

	if (flags & PMEM_FILE_CREATE) {
		if ((os_off_t)len < 0) {
			ERR("invalid file length %zu", len);
			errno = EINVAL;
			return NULL;
		}
		open_flags |= O_CREAT;
	}

	if (flags & PMEM_FILE_EXCL)
		open_flags |= O_EXCL;

	if ((len != 0) && !(flags & PMEM_FILE_CREATE)) {
		ERR("non-zero 'len' not allowed without PMEM_FILE_CREATE");
		errno = EINVAL;
		return NULL;
	}

	if ((len == 0) && (flags & PMEM_FILE_CREATE)) {
		ERR("zero 'len' not allowed with PMEM_FILE_CREATE");
		errno = EINVAL;
		return NULL;
	}

	if ((flags & PMEM_FILE_TMPFILE) && !(flags & PMEM_FILE_CREATE)) {
		ERR("PMEM_FILE_TMPFILE not allowed without PMEM_FILE_CREATE");
		errno = EINVAL;
		return NULL;
	}

	if (flags & PMEM_FILE_TMPFILE) {
		if ((fd = util_tmpfile(path,
					OS_DIR_SEP_STR"pmem.XXXXXX",
					open_flags & O_EXCL)) < 0) {
			LOG(2, "failed to create temporary file at \"%s\"",
				path);
			return NULL;
		}
	} else {
		if ((fd = os_open(path, open_flags, mode)) < 0) {
			ERR("!open %s", path);
			return NULL;
		}
		if ((flags & PMEM_FILE_CREATE) && (flags & PMEM_FILE_EXCL))
			delete_on_err = 1;
	}

	if (flags & PMEM_FILE_CREATE) {
		/*
		 * Always set length of file to 'len'.
		 * (May either extend or truncate existing file.)
		 */
		if (os_ftruncate(fd, (os_off_t)len) != 0) {
			ERR("!ftruncate");
			goto err;
		}
		if ((flags & PMEM_FILE_SPARSE) == 0) {
			if ((errno = os_posix_fallocate(fd, 0,
							(os_off_t)len)) != 0) {
				ERR("!posix_fallocate");
				goto err;
			}
		}
	} else {
		ssize_t actual_size = util_file_get_size(path);
		if (actual_size < 0) {
			ERR("stat %s: negative size", path);
			errno = EINVAL;
			goto err;
		}

		len = (size_t)actual_size;
	}

	void *addr = pmem_map_register(fd, len, path, file_type == TYPE_DEVDAX);
	if (addr == NULL)
		goto err;

	if (mapped_lenp != NULL)
		*mapped_lenp = len;

	if (is_pmemp != NULL)
		*is_pmemp = pmem_is_pmem(addr, len);

	LOG(3, "returning %p", addr);

	VALGRIND_REGISTER_PMEM_MAPPING(addr, len);
	VALGRIND_REGISTER_PMEM_FILE(fd, addr, len, 0);

	(void) os_close(fd);

	return addr;
err:
	oerrno = errno;
	(void) os_close(fd);
	if (delete_on_err)
		(void) os_unlink(path);
	errno = oerrno;
	return NULL;
}
Esempio n. 21
0
/*
 * pmemlog_map_common -- (internal) map a log memory pool
 *
 * This routine does all the work, but takes a rdonly flag so internal
 * calls can map a read-only pool if required.
 */
static PMEMlog *
pmemlog_map_common(int fd, int rdonly)
{
	LOG(3, "fd %d rdonly %d", fd, rdonly);

	struct stat stbuf;
	if (fstat(fd, &stbuf) < 0) {
		LOG(1, "!fstat");
		return NULL;
	}

	if (stbuf.st_size < PMEMLOG_MIN_POOL) {
		LOG(1, "size %lld smaller than %zu",
				(long long)stbuf.st_size, PMEMLOG_MIN_POOL);
		errno = EINVAL;
		return NULL;
	}

	void *addr;
	if ((addr = util_map(fd, stbuf.st_size, rdonly)) == NULL)
		return NULL;	/* util_map() set errno, called LOG */

	/* check if the mapped region is located in persistent memory */
	int is_pmem = pmem_is_pmem(addr, stbuf.st_size);

	/* opaque info lives at the beginning of mapped memory pool */
	struct pmemlog *plp = addr;
	struct pool_hdr hdr;

	memcpy(&hdr, &plp->hdr, sizeof (hdr));

	if (util_convert_hdr(&hdr)) {
		/*
		 * valid header found
		 */
		if (strncmp(hdr.signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN)) {
			LOG(1, "wrong pool type: \"%s\"", hdr.signature);
			errno = EINVAL;
			goto err;
		}

		if (hdr.major != LOG_FORMAT_MAJOR) {
			LOG(1, "log pool version %d (library expects %d)",
				hdr.major, LOG_FORMAT_MAJOR);
			errno = EINVAL;
			goto err;
		}

		uint64_t hdr_start = le64toh(plp->start_offset);
		uint64_t hdr_end = le64toh(plp->end_offset);
		uint64_t hdr_write = le64toh(plp->write_offset);

		if ((hdr_start != roundup(sizeof (*plp),
					LOG_FORMAT_DATA_ALIGN)) ||
			(hdr_end != stbuf.st_size) || (hdr_start > hdr_end)) {
			LOG(1, "wrong start/end offsets (start: %ju end: %ju), "
				"pool size %lld",
				hdr_start, hdr_end, (long long)stbuf.st_size);
			errno = EINVAL;
			goto err;
		}

		if ((hdr_write > hdr_end) || (hdr_write < hdr_start)) {
			LOG(1, "wrong write offset "
				"(start: %ju end: %ju write: %ju)",
				hdr_start, hdr_end, hdr_write);
			errno = EINVAL;
			goto err;
		}

		LOG(3, "start: %ju, end: %ju, write: %ju",
			hdr_start, hdr_end, hdr_write);

		int retval = util_feature_check(&hdr, LOG_FORMAT_INCOMPAT,
							LOG_FORMAT_RO_COMPAT,
							LOG_FORMAT_COMPAT);
		if (retval < 0)
		    goto err;
		else if (retval == 0)
		    rdonly = 1;
	} else {
		/*
		 * no valid header was found
		 */
		if (rdonly) {
			LOG(1, "read-only and no header found");
			errno = EROFS;
			goto err;
		}
		LOG(3, "creating new log memory pool");

		struct pool_hdr *hdrp = &plp->hdr;

		memset(hdrp, '\0', sizeof (*hdrp));
		strncpy(hdrp->signature, LOG_HDR_SIG, POOL_HDR_SIG_LEN);
		hdrp->major = htole32(LOG_FORMAT_MAJOR);
		hdrp->compat_features = htole32(LOG_FORMAT_COMPAT);
		hdrp->incompat_features = htole32(LOG_FORMAT_INCOMPAT);
		hdrp->ro_compat_features = htole32(LOG_FORMAT_RO_COMPAT);
		uuid_generate(hdrp->uuid);
		hdrp->crtime = htole64((uint64_t)time(NULL));
		util_checksum(hdrp, sizeof (*hdrp), &hdrp->checksum, 1);
		hdrp->checksum = htole64(hdrp->checksum);

		/* store pool's header */
		libpmem_persist(is_pmem, hdrp, sizeof (*hdrp));

		/* create rest of required metadata */
		plp->start_offset = htole64(roundup(sizeof (*plp),
						LOG_FORMAT_DATA_ALIGN));
		plp->end_offset = htole64(stbuf.st_size);
		plp->write_offset = plp->start_offset;

		/* store non-volatile part of pool's descriptor */
		libpmem_persist(is_pmem, &plp->start_offset,
							3 * sizeof (uint64_t));
	}

	/*
	 * Use some of the memory pool area for run-time info.  This
	 * run-time state is never loaded from the file, it is always
	 * created here, so no need to worry about byte-order.
	 */
	plp->addr = addr;
	plp->size = stbuf.st_size;
	plp->rdonly = rdonly;
	plp->is_pmem = is_pmem;

	if ((plp->rwlockp = Malloc(sizeof (*plp->rwlockp))) == NULL) {
		LOG(1, "!Malloc for a RW lock");
		goto err;
	}

	if (pthread_rwlock_init(plp->rwlockp, NULL)) {
		LOG(1, "!pthread_rwlock_init");
		goto err_free;
	}

	/*
	 * If possible, turn off all permissions on the pool header page.
	 *
	 * The prototype PMFS doesn't allow this when large pages are in
	 * use. It is not considered an error if this fails.
	 */
	util_range_none(addr, sizeof (struct pool_hdr));

	/* the rest should be kept read-only (debug version only) */
	RANGE_RO(addr + sizeof (struct pool_hdr),
			stbuf.st_size - sizeof (struct pool_hdr));

	LOG(3, "plp %p", plp);
	return plp;

err_free:
	Free((void *)plp->rwlockp);
err:
	LOG(4, "error clean up");
	int oerrno = errno;
	util_unmap(addr, stbuf.st_size);
	errno = oerrno;
	return NULL;
}
Esempio n. 22
0
File: set.c Progetto: jebtang/nvml
/*
 * util_replica_create -- (internal) create a new memory pool replica
 */
static int
util_replica_create(struct pool_set *set, unsigned repidx, int flags,
	size_t hdrsize, const char *sig,
	uint32_t major, uint32_t compat, uint32_t incompat, uint32_t ro_compat)
{
	LOG(3, "set %p repidx %u flags %d hdrsize %zu sig %s major %u "
		"compat %#x incompat %#x ro_comapt %#x",
		set, repidx, flags, hdrsize, sig, major,
		compat, incompat, ro_compat);

	struct pool_replica *rep = set->replica[repidx];

	rep->repsize -= (rep->nparts - 1) * hdrsize;

	/* determine a hint address for mmap() */
	void *addr = util_map_hint(rep->repsize); /* XXX - randomize */
	if (addr == NULL) {
		ERR("cannot find a contiguous region of given size");
		return -1;
	}

	/* map the first part and reserve space for remaining parts */
	if (util_map_part(&rep->part[0], addr, rep->repsize, 0, flags) != 0) {
		LOG(2, "pool mapping failed - part #0");
		return -1;
	}

	VALGRIND_REGISTER_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size);
	VALGRIND_REGISTER_PMEM_FILE(rep->part[0].fd,
				rep->part[0].addr, rep->part[0].size, 0);

	/* map all the remaining headers - don't care about the address */
	for (unsigned p = 1; p < rep->nparts; p++) {
		if (util_map_part(&rep->part[p], NULL,
				hdrsize, 0, flags) != 0) {
			LOG(2, "header mapping failed - part #%d", p);
			goto err;
		}

		VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd,
			rep->part[p].addr, rep->part[p].size, 0);
	}

	/* create headers, set UUID's */
	for (unsigned p = 0; p < rep->nparts; p++) {
		if (util_header_create(set, repidx, p, sig, major,
				compat, incompat, ro_compat) != 0) {
			LOG(2, "header creation failed - part #%d", p);
			goto err;
		}
	}

	set->zeroed &= rep->part[0].created;

	size_t mapsize = rep->part[0].filesize & ~(Pagesize - 1);
	addr = rep->part[0].addr + mapsize;

	/*
	 * unmap headers; map the remaining parts of the usable pool space
	 * (4K-aligned)
	 */
	for (unsigned p = 1; p < rep->nparts; p++) {
		/* unmap header */
		if (util_unmap_part(&rep->part[p]) != 0) {
			LOG(2, "header unmapping failed - part #%d", p);
		}

		/* map data part */
		if (util_map_part(&rep->part[p], addr, 0, hdrsize,
				flags | MAP_FIXED) != 0) {
			LOG(2, "usable space mapping failed - part #%d", p);
			goto err;
		}

		VALGRIND_REGISTER_PMEM_FILE(rep->part[p].fd,
			rep->part[p].addr, rep->part[p].size, hdrsize);

		mapsize += rep->part[p].size;
		set->zeroed &= rep->part[p].created;
		addr += rep->part[p].size;
	}

	rep->is_pmem = pmem_is_pmem(rep->part[0].addr, rep->part[0].size);

	ASSERTeq(mapsize, rep->repsize);

	/* calculate pool size - choose the smallest replica size */
	if (rep->repsize < set->poolsize)
		set->poolsize = rep->repsize;

	LOG(3, "replica addr %p", rep->part[0].addr);

	return 0;

err:
	LOG(4, "error clean up");
	int oerrno = errno;
	VALGRIND_REMOVE_PMEM_MAPPING(rep->part[0].addr, rep->part[0].size);
	util_unmap(rep->part[0].addr, rep->part[0].size);
	errno = oerrno;
	return -1;
}