Esempio n. 1
0
struct psc_memnode *
psc_memnode_get(void)
{
	struct psc_memnode *pmn, **pmnv;
	int memnid, rc;

	pmn = pthread_getspecific(psc_memnodes_key);
	if (pmn)
		return (pmn);

	memnid = psc_memnode_getid();
	spinlock(&psc_memnodes_lock);
	if (psc_dynarray_ensurelen(&psc_memnodes, memnid + 1) == -1)
		psc_fatalx("ensurelen");
	pmnv = psc_dynarray_get_mutable(&psc_memnodes);
	pmn = pmnv[memnid];
	if (pmn == NULL) {
		pmn = psc_alloc(sizeof(*pmn), PAF_NOLOG);
		INIT_SPINLOCK(&pmn->pmn_lock);
		psc_dynarray_init(&pmn->pmn_keys);
		rc = pthread_setspecific(psc_memnodes_key, pmn);
		if (rc)
			psc_fatalx("pthread_setspecific: %s",
			    strerror(rc));
		psc_dynarray_setpos(&psc_memnodes, memnid, pmn);
	}
	freelock(&psc_memnodes_lock);
	return (pmn);
}
Esempio n. 2
0
void
psc_subsys_register(int ssid, const char *name)
{
	struct psc_subsys *ss;
	char *p, buf[BUFSIZ];
	int nss;

	nss = psc_dynarray_len(&psc_subsystems);
	ss = psc_alloc(sizeof(*ss), PAF_NOLOG);
	ss->pss_name = name;

	snprintf(buf, sizeof(buf), "PSC_LOG_LEVEL_%s", name);
	p = getenv(buf);
	if (p) {
		ss->pss_loglevel = psc_loglevel_fromstr(p);
		if (ss->pss_loglevel == PNLOGLEVELS)
			psc_fatalx("invalid %s value", name);
	} else {
		ss->pss_loglevel = psc_log_getlevel_global();
		if (ssid == PSS_TMP)
			ss->pss_loglevel = PLL_DEBUG;
	}

	snprintf(buf, sizeof(buf), "PSC_SYSLOG_%s", name);
	if (getenv(buf) || getenv("PSC_SYSLOG")) {
		static int init;

		if (!init) {
			extern const char *__progname;
			const char *ident = __progname;

			init = 1;
			p = getenv("PFL_SYSLOG_IDENT"); 
			if (p) {
				static char idbuf[32];

				ident = idbuf;
				(void)FMTSTR(idbuf, sizeof(idbuf), p,
				    FMTSTRCASE('n', "s", __progname)
				);
			}
			openlog(ident, LOG_CONS | LOG_NDELAY | LOG_PID,
			    LOG_DAEMON);
		}

		pfl_syslog = psc_realloc(pfl_syslog,
		    sizeof(*pfl_syslog) * (nss + 1), PAF_NOLOG);
		pfl_syslog[nss] = 1;
	}

	if (ssid != nss)
		psc_fatalx("bad ID %d for subsys %s [want %d], "
		    "check order", ssid, name, nss);
	psc_dynarray_add(&psc_subsystems, ss);
}
Esempio n. 3
0
int
pflnet_getifaddr(const struct ifaddrs *ifa0, const char *ifname,
    union pfl_sockaddr *sa)
{
	const struct ifaddrs *ifa;
	struct ifreq ifr;
	int rc, s;

	if (ifa0) {
		for (ifa = ifa0; ifa; ifa = ifa->ifa_next)
			if (strcmp(ifa->ifa_name, ifname) == 0 &&
			    ifa->ifa_addr->sa_family == AF_INET) {
				memcpy(&sa->sa, ifa->ifa_addr,
				    sizeof(sa->sin));
				return (1);
			}
	} else {
psc_fatalx("broke");
		strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
		s = socket(AF_INET, SOCK_DGRAM, 0);
		if (s == -1)
			psc_fatal("socket");

		rc = ioctl(s, SIOCGIFADDR, &ifr);
		if (rc == -1)
			psc_fatal("ioctl SIOCGIFNAME");
		close(s);

//		memcpy(sap, ifr.ifr_addr, );
	}
	return (0);
}
Esempio n. 4
0
/*
 * Access an item in a dynamic array.
 * @pda: dynamic array to access.
 * @pos: item index.
 */
void *
psc_dynarray_getpos(const struct psc_dynarray *pda, int pos)
{
	psc_assert(pos >= 0);
	if (pos >= psc_dynarray_len(pda))
		psc_fatalx("out of bounds array access");
	return (pda->pda_items[pos]);
}
Esempio n. 5
0
void
psc_memnode_init(void)
{
	int rc;

	rc = pthread_key_create(&psc_memnodes_key, NULL);
	if (rc)
		psc_fatalx("pthread_key_create: %s", strerror(rc));
}
Esempio n. 6
0
/*
 * Set the item for a position in a dynamic array.
 * @pda: dynamic array to access.
 * @pos: item index.
 * @p: item.
 */
void
psc_dynarray_setpos(struct psc_dynarray *pda, int pos, void *p)
{
	psc_assert(pos >= 0);
	if (pos >= pda->pda_nalloc)
		psc_fatalx("out of bounds array access");
	pda->pda_items[pos] = p;
	if (pos >= pda->pda_pos)
		pda->pda_pos = pos + 1;
}
Esempio n. 7
0
void
psc_memnode_setkey(struct psc_memnode *pmn, int pos, void *val)
{
	int locked;

	locked = reqlock(&pmn->pmn_lock);
	if (psc_dynarray_ensurelen(&pmn->pmn_keys, pos + 1) == -1)
		psc_fatalx("ensurelen");
	psc_dynarray_setpos(&pmn->pmn_keys, pos, val);
	ureqlock(&pmn->pmn_lock, locked);
}
Esempio n. 8
0
/*
 * Remove an item from a dynamic array.
 * @pda: dynamic array to remove from.
 * @item: item to remove.
 * Returns the position index the item had.
 * Notes: this routine swaps the last element in the dynarray into the
 *	slot opened up by the removal.
 */
int
psc_dynarray_removeitem(struct psc_dynarray *pda, const void *item)
{
	int idx;

	idx = psc_dynarray_finditem(pda, item);
	if (idx == -1)
		psc_fatalx("element not found");
	psc_dynarray_removepos(pda, idx);
	return (idx);
}
Esempio n. 9
0
void
psc_log_setlevel_ss(int ssid, int newlevel)
{
	struct psc_subsys **ss;
	int i, nss;

	if (newlevel >= PNLOGLEVELS || newlevel < 0)
		psc_fatalx("log level out of bounds (%d, max %d)",
		    newlevel, PNLOGLEVELS);

	ss = psc_dynarray_get(&psc_subsystems);
	nss = psc_dynarray_len(&psc_subsystems);

	if (ssid == PSS_ALL)
		for (i = 0; i < nss; i++)
			ss[i]->pss_loglevel = newlevel;
	else if (ssid >= nss || ssid < 0)
		psc_fatalx("subsystem out of bounds (%d, max %d)", ssid,
		    nss);
	else
		ss[ssid]->pss_loglevel = newlevel;
}
Esempio n. 10
0
void
slm_unmount_kstat(void)
{
	char buf[BUFSIZ];
	int rc;

	rc = snprintf(buf, sizeof(buf), "umount %s", _PATH_KSTAT);
	if (rc == -1)
		psc_fatal("snprintf: umount %s", _PATH_KSTAT);
	if (rc >= (int)sizeof(buf))
		psc_fatalx("snprintf: umount %s: too long", _PATH_KSTAT);
	if (system(buf) == -1)
		psclog_warn("system(%s)", buf);
}
Esempio n. 11
0
void
unmount(const char *mp)
{
	char buf[BUFSIZ];
	int rc;

	/* XXX do not let this hang */
	rc = snprintf(buf, sizeof(buf),
	    "umount '%s' || umount -f '%s' || umount -l '%s'",
	    mp, mp, mp);
	if (rc == -1)
		psc_fatal("snprintf: umount %s", mp);
	if ((size_t)rc >= sizeof(buf))
		psc_fatalx("snprintf: umount %s: too long", mp);
	if (system(buf) == -1)
		psclog_warn("system(%s)", buf);
}
Esempio n. 12
0
void
mds_brepls_check(uint8_t *repls, int nr)
{
	int val, off, i;

	psc_assert(nr > 0 && nr <= SL_MAX_REPLICAS);
	for (i = 0, off = 0; i < nr; i++, off += SL_BITS_PER_REPLICA) {
		val = SL_REPL_GET_BMAP_IOS_STAT(repls, off);
		switch (val) {
		case BREPLST_VALID:
		case BREPLST_GARBAGE_QUEUED:
		case BREPLST_GARBAGE_SCHED:
		case BREPLST_TRUNC_QUEUED:
		case BREPLST_TRUNC_SCHED:
			return;
		}
	}
	psc_fatalx("no valid replica states exist");
}
Esempio n. 13
0
void
slab_cache_init(void)
{
	size_t nbuf;

	psc_assert(SLASH_SLVR_SIZE <= LNET_MTU);

	if (slcfg_local->cfg_slab_cache_size < SLAB_CACHE_MIN)
		psc_fatalx("invalid slab_cache_size setting; "
		    "minimum allowed is %zu", SLAB_CACHE_MIN);

	nbuf = slcfg_local->cfg_slab_cache_size / SLASH_SLVR_SIZE;
	psc_poolmaster_init(&slab_poolmaster, struct slab,
	    slb_mgmt_lentry, PPMF_AUTO, nbuf, nbuf, nbuf,
	    slab_cache_reap, "slab",
	    NULL);
	slab_pool = psc_poolmaster_getmgr(&slab_poolmaster);

	pscthr_init(SLITHRT_BREAP, slibreapthr_main, 0, "slibreapthr");
}
Esempio n. 14
0
/**
 * pfl_rsx_newreq - Create a new request and associate it with the import.
 * @imp: import portal on which to create the request.
 * @version: version of communication protocol of channel.
 * @op: operation ID of command to send.
 * @rqp: value-result of pointer to RPC request.
 * @nqlens: number of request buffers.
 * @qlens: lengths of request buffers.
 * @nplens: number of reply buffers.
 * @plens: lengths of reply buffers.
 * @mqp: value-result of pointer to start of request buffer.
 */
int
_pfl_rsx_newreq(struct pscrpc_import *imp, int version, int op,
    struct pscrpc_request **rqp, int nqlens, int *qlens,
    int nplens, int *plens, void *mq0p)
{
	*(void **)mq0p = NULL;

	*rqp = pscrpc_prep_req(imp, version, op, nqlens, qlens, NULL);
	if (*rqp == NULL)
		return (-ENOMEM);

	/* Setup request buffer. */
	*(void **)mq0p = pscrpc_msg_buf((*rqp)->rq_reqmsg, 0, qlens[0]);
	if (*(void **)mq0p == NULL)
		psc_fatalx("pscrpc_msg_buf");

	/* Setup reply buffer now so asynchronous RPCs work, too. */
	(*rqp)->rq_replen = pscrpc_msg_size(nplens, plens);
	return (0);
}
Esempio n. 15
0
int
zfsslash2_init(void)
{
	struct pscfs_args args = PSCFS_ARGS_INIT(0, NULL);
	extern struct fuse_lowlevel_ops pscfs_fuse_ops;
	extern struct fuse_session *fuse_session;
	extern struct pollfd pscfs_fds[];
	extern int newfs_fd[2], pscfs_nfds;
	extern char *fuse_mount_options;
	char buf[BUFSIZ];
	int rc;

	rc = snprintf(buf, sizeof(buf), "umount %s", _PATH_KSTAT);
	if (rc == -1)
		psc_fatal("snprintf: umount %s", _PATH_KSTAT);
	if (rc >= (int)sizeof(buf))
		psc_fatalx("snprintf: umount %s: too long", _PATH_KSTAT);
	if (system(buf) == -1)
		psclog_warn("system(%s)", buf);

	if (pipe(newfs_fd) == -1)
		psc_fatal("pipe");

	pscfs_fds[0].fd = newfs_fd[0];
	pscfs_fds[0].events = POLLIN;
	pscfs_nfds = 1;

	fuse_session = fuse_lowlevel_new(&args.pfa_av, &pscfs_fuse_ops,
	    sizeof(pscfs_fuse_ops), NULL);

	pscthr_init(SLMTHRT_ZFS_KSTAT, slmzfskstatmthr_main, NULL, 0,
	    "slmzfskstatmthr");

	fuse_mount_options = "";
	rc = libzfs_init_fusesocket();
	if (rc == 0)
		rc = libzfs_init();
	atexit(slm_unmount_kstat);
	return (rc);
}
Esempio n. 16
0
/*
 * Register a list cache for external access.
 * @plc: the list cache to register.
 * @name: printf(3) format of name for list.
 * @ap: variable argument list for printf(3) name argument.
 */
void
lc_vregister(struct psc_listcache *plc, const char *name, va_list ap)
{
	int rc;

	PLL_LOCK(&psc_listcaches);
	LIST_CACHE_LOCK(plc);

	rc = vsnprintf(plc->plc_name, sizeof(plc->plc_name), name, ap);
	if (rc == -1)
		psc_fatal("vsnprintf");
	if (rc > (int)sizeof(plc->plc_name))
		psc_fatalx("plc_name is too large (%s)", name);

	plc->plc_nseen = pfl_opstat_initf(OPSTF_BASE10,
	    "listcache.%s.adds", plc->plc_name);
	plc->plc_st_removes = pfl_opstat_initf(OPSTF_BASE10,
	    "listcache.%s.removes", plc->plc_name);
	pll_add_sorted(&psc_listcaches, plc, lc_cmp);

	LIST_CACHE_ULOCK(plc);
	PLL_ULOCK(&psc_listcaches);
}
Esempio n. 17
0
/**
 * pjournal_dump - Dump the contents of a journal file.
 * @fn: journal filename to query.
 * @verbose: whether to report stats summary or full dump.
 *
 * Each time mds restarts, it writes log entries starting from the very
 * first slot of the log.  Anyway, the function dumps all log entries,
 * some of them may be from previous incarnations of the MDS.
 */
void
pjournal_dump(const char *fn)
{
	int i, ntotal, nmagic, nchksum, nformat, ndump, first = 1;
	uint32_t slot, highest_slot = -1, lowest_slot = -1;
	uint64_t chksum, highest_xid = 0, lowest_xid = 0;
	struct psc_journal_enthdr *pje;
	struct psc_journal_hdr *pjh;
	struct psc_journal *pj;
	struct stat statbuf;
	unsigned char *jbuf;
	ssize_t nb, pjhlen;
	time_t ts;

	ntotal = nmagic = nchksum = nformat = ndump = 0;

	pj = PSCALLOC(sizeof(*pj));

	strlcpy(pj->pj_name, pfl_basename(fn), sizeof(pj->pj_name));

	pj->pj_fd = open(fn, O_RDWR | O_DIRECT);
	if (pj->pj_fd == -1)
		psc_fatal("failed to open journal %s", fn);
	if (fstat(pj->pj_fd, &statbuf) == -1)
		psc_fatal("failed to stat journal %s", fn);

	/*
	 * O_DIRECT may impose alignment restrictions so align the
	 * buffer and perform I/O in multiples of file system block
	 * size.
	 */
	pjhlen = PSC_ALIGN(sizeof(*pjh), statbuf.st_blksize);
	pjh = psc_alloc(pjhlen, PAF_PAGEALIGN);
	nb = pread(pj->pj_fd, pjh, pjhlen, 0);
	if (nb != pjhlen)
		psc_fatal("failed to read journal header");

	pj->pj_hdr = pjh;
	if (pjh->pjh_magic != PJH_MAGIC)
		psc_fatalx("journal header has a bad magic number "
		    "%#"PRIx64, pjh->pjh_magic);

	if (pjh->pjh_version != PJH_VERSION)
		psc_fatalx("journal header has an invalid version "
		    "number %d", pjh->pjh_version);

	psc_crc64_init(&chksum);
	psc_crc64_add(&chksum, pjh, offsetof(struct psc_journal_hdr,
	    pjh_chksum));
	psc_crc64_fini(&chksum);

	if (pjh->pjh_chksum != chksum)
		psc_fatalx("journal header has an invalid checksum "
		    "value %"PSCPRIxCRC64" vs %"PSCPRIxCRC64,
		    pjh->pjh_chksum, chksum);

	if (S_ISREG(statbuf.st_mode) && statbuf.st_size !=
	    (off_t)(pjhlen + pjh->pjh_nents * PJ_PJESZ(pj)))
		psc_fatalx("size of the journal log %"PSCPRIdOFFT"d does "
		    "not match specs in its header", statbuf.st_size);

	if (pjh->pjh_nents % pjh->pjh_readsize)
		psc_fatalx("number of entries %d is not a multiple of the "
		    "readsize %d", pjh->pjh_nents, pjh->pjh_readsize);

	ts = pjh->pjh_timestamp;

	printf("%s:\n"
	    "  version: %u\n"
	    "  entry size: %u\n"
	    "  number of entries: %u\n"
	    "  batch read size: %u\n"
	    "  entry start offset: %"PRId64"\n"
	    "  format time: %s"
	    "  uuid: %"PRIx64"\n"
	    "  %4s  %3s %4s %4s %s\n",
	    fn, pjh->pjh_version, PJ_PJESZ(pj), pjh->pjh_nents,
	    pjh->pjh_readsize, pjh->pjh_start_off,
	    ctime(&ts), pjh->pjh_fsuuid,
	    "idx", "typ", "xid", "txg", "details");

	jbuf = psc_alloc(PJ_PJESZ(pj) * pj->pj_hdr->pjh_readsize,
	    PAF_PAGEALIGN);
	for (slot = 0; slot < pjh->pjh_nents;
	    slot += pjh->pjh_readsize) {
		nb = pread(pj->pj_fd, jbuf, PJ_PJESZ(pj) *
		    pjh->pjh_readsize, PJ_GETENTOFF(pj, slot));
		if (nb != PJ_PJESZ(pj) * pjh->pjh_readsize)
			warn("failed to read %d log entries at slot %d",
			    pjh->pjh_readsize, slot);

		for (i = 0; i < pjh->pjh_readsize; i++) {
			ntotal++;
			pje = (void *)&jbuf[PJ_PJESZ(pj) * i];
			if (pje->pje_magic != PJE_MAGIC) {
				nmagic++;
				warnx("journal slot %d has a bad magic"
				    "number", slot + i);
				continue;
			}

			/*
			 * If we hit a new entry that is never used, we
			 * assume that the rest of the journal is never
			 * used.
			 */
			if (pje->pje_type == PJE_FORMAT) {
				nformat = nformat + pjh->pjh_nents -
				    (slot + i);
				goto done;
			}

			psc_crc64_init(&chksum);
			psc_crc64_add(&chksum, pje, offsetof(
			    struct psc_journal_enthdr, pje_chksum));
			psc_crc64_add(&chksum, pje->pje_data,
			    pje->pje_len);
			psc_crc64_fini(&chksum);

			if (pje->pje_chksum != chksum) {
				nchksum++;
				warnx("journal slot %d has a corrupt "
				    "checksum", slot + i);
				goto done;
			}
			ndump++;
			if (verbose)
				pjournal_dump_entry(slot + i, pje);
			if (first) {
				first = 0;
				highest_xid = lowest_xid = pje->pje_xid;
				highest_slot = lowest_slot = slot + i;
				continue;
			}
			if (highest_xid < pje->pje_xid) {
				highest_xid = pje->pje_xid;
				highest_slot = slot + i;
			}
			if (lowest_xid > pje->pje_xid) {
				lowest_xid = pje->pje_xid;
				lowest_slot = slot + i;
			}
		}

	}

 done:
	if (close(pj->pj_fd) == -1)
		printf("failed closing journal %s", fn);

	psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(pj));
	PSCFREE(pj);

	printf("----------------------------------------------\n"
	    "%8d slot(s) scanned\n"
	    "%8d in use\n"
	    "%8d formatted\n"
	    "%8d bad magic\n"
	    "%8d bad checksum(s)\n"
	    "lowest transaction ID=%#"PRIx64" (slot=%d)\n"
	    "highest transaction ID=%#"PRIx64" (slot=%d)\n",
	    ntotal, ndump, nformat, nmagic, nchksum,
	    lowest_xid, lowest_slot,
	    highest_xid, highest_slot);
}
Esempio n. 18
0
/*
 * Initialize an on-disk journal.
 * @fn: file path to store journal.
 * @nents: number of entries journal may contain if non-zero.
 * @entsz: size of a journal entry.
 * @rs: read size.
 * Returns the number of entries created.
 */
uint32_t
sl_journal_format(const char *fn, uint32_t nents, uint32_t entsz,
    uint32_t rs, uint64_t uuid, int block_dev)
{
	uint32_t i, slot, max_nents;
	struct psc_journal_enthdr *pje;
	struct psc_journal pj;
	struct stat stb;
	unsigned char *jbuf;
	size_t numblocks;
	ssize_t nb;
	int fd;

	memset(&pj, 0, sizeof(pj));

	fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
	if (fd == -1)
		psc_fatal("%s", fn);

	if (fstat(fd, &stb) == -1)
		psc_fatal("stat %s", fn);

	/*
	 * If the user does not specify nents, either use default or
	 * based on the block device size.
	 */
	if (nents == 0 && !block_dev)
		nents = SLJ_MDS_JNENTS;

	if (block_dev) {
		if (ioctl(fd, BLKGETSIZE, &numblocks) == -1)
			err(1, "BLKGETSIZE: %s", fn);

		/* show progress, it is going to be a while */
		verbose = 1;

		/* deal with large disks */
		max_nents = MIN(numblocks, SLJ_MDS_MAX_JNENTS);

		/* leave room on both ends */
		max_nents -= stb.st_blksize / SLJ_MDS_ENTSIZE + 16;

		/* efficiency */
		max_nents = (max_nents / rs) * rs;
		if (nents)
			nents = MIN(nents, max_nents);
		else
			nents = max_nents;
	}

	if (nents % rs)
		psc_fatalx("number of slots (%u) should be a multiple of "
		    "readsize (%u)", nents, rs);

	pj.pj_fd = fd;
	pj.pj_hdr = PSCALLOC(PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize));

	pj.pj_hdr->pjh_entsz = entsz;
	pj.pj_hdr->pjh_nents = nents;
	pj.pj_hdr->pjh_version = PJH_VERSION;
	pj.pj_hdr->pjh_readsize = rs;
	pj.pj_hdr->pjh_iolen = PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize);
	pj.pj_hdr->pjh_magic = PJH_MAGIC;
	pj.pj_hdr->pjh_timestamp = time(NULL);
	pj.pj_hdr->pjh_fsuuid = uuid;

	psc_crc64_init(&pj.pj_hdr->pjh_chksum);
	psc_crc64_add(&pj.pj_hdr->pjh_chksum, pj.pj_hdr,
	    offsetof(struct psc_journal_hdr, pjh_chksum));
	psc_crc64_fini(&pj.pj_hdr->pjh_chksum);

	nb = pwrite(pj.pj_fd, pj.pj_hdr, pj.pj_hdr->pjh_iolen, 0);
	if ((size_t)nb != pj.pj_hdr->pjh_iolen)
		psc_fatalx("failed to write journal header: %s",
		    nb == -1 ? strerror(errno) : "short write");

	nb = PJ_PJESZ(&pj) * pj.pj_hdr->pjh_readsize;
	jbuf = psc_alloc(nb, PAF_PAGEALIGN);
	for (i = 0; i < rs; i++) {
		pje = PSC_AGP(jbuf, PJ_PJESZ(&pj) * i);
		pje->pje_magic = PJE_MAGIC;
		pje->pje_type = PJE_FORMAT;
		pje->pje_xid = PJE_XID_NONE;
		pje->pje_len = 0;

		psc_crc64_init(&pje->pje_chksum);
		psc_crc64_add(&pje->pje_chksum, pje,
		    offsetof(struct psc_journal_enthdr, pje_chksum));
		psc_crc64_add(&pje->pje_chksum, pje->pje_data,
		    pje->pje_len);
		psc_crc64_fini(&pje->pje_chksum);
	}

	i = 0;
	/* XXX use an option to write only one entry in fast create mode */
	for (slot = 0; slot < pj.pj_hdr->pjh_nents; slot += rs) {
		nb = pwrite(pj.pj_fd, jbuf, PJ_PJESZ(&pj) * rs,
		    PJ_GETENTOFF(&pj, slot));
		if ((size_t)nb != PJ_PJESZ(&pj) * rs)
			psc_fatal("failed to write slot %u (%zd)",
			    slot, nb);
		if (verbose && slot % 262144 == 0) {
			printf(".");
			fflush(stdout);
			fsync(pj.pj_fd);
			if (++i == 80) {
				printf("\n");
				i = 0;
			}
		}
	}
	if (verbose && i)
		printf("\n");
	if (close(fd) == -1)
		psc_fatal("failed to close journal");
	psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(&pj) * rs);

	return (nents);
}
Esempio n. 19
0
__static void
pflnet_getifnfordst_rtsock(const struct sockaddr *sa, char ifn[IFNAMSIZ])
{
	struct {
		struct rt_msghdr	rtm;
#define RT_SPACE 512
		unsigned char		buf[RT_SPACE];
	} m;
	struct rt_msghdr *rtm = &m.rtm;
	union pfl_sockaddr psa, *psap;
	unsigned char *p = m.buf;
	ssize_t len, rc;
	pid_t pid;
	int j, s;

	memset(&m, 0, sizeof(m));

#ifdef HAVE_SA_LEN
#define ADDSOCKADDR(p, sa)						\
	do {								\
		memcpy((p), (sa), (sa)->sa_len);			\
		(p) += SOCKADDR_GETLEN(sa);					\
	} while (0)
#else
#define ADDSOCKADDR(p, sa)						\
	do {								\
		memcpy((p), (sa), sizeof(*(sa)));			\
		(p) += SOCKADDR_GETLEN(sa);					\
	} while (0)
#endif

	ADDSOCKADDR(p, sa);

	memset(&psa, 0, sizeof(psa));
	psa.sdl.sdl_family = AF_LINK;
#ifdef HAVE_SA_LEN
	psa.sdl.sdl_len = sizeof(psa.sdl);
#endif
	ADDSOCKADDR(p, &psa.sa);

	rtm->rtm_type = RTM_GET;
	rtm->rtm_flags = RTF_STATIC | RTF_UP | RTF_HOST | RTF_GATEWAY;
	rtm->rtm_version = RTM_VERSION;
	rtm->rtm_addrs = RTA_DST | RTA_IFP;
#ifdef HAVE_RTM_HDRLEN
	rtm->rtm_hdrlen = sizeof(m.rtm);
#endif
	rtm->rtm_msglen = len = p - (unsigned char *)&m;

	s = socket(PF_ROUTE, SOCK_RAW, 0);
	if (s == -1)
		psc_fatal("socket");

	rc = write(s, &m, len);
	if (rc == -1)
		psc_fatal("writing to routing socket");
	if (rc != len)
		psc_fatalx("writing to routing socket: short write");

	pid = getpid();
	do {
		rc = read(s, &m, sizeof(m));
	} while (rc > 0 && (rtm->rtm_version != RTM_VERSION ||
	    rtm->rtm_seq || rtm->rtm_pid != pid));

	if (rc == -1)
		psc_fatal("read from routing socket");

	if (rtm->rtm_version != RTM_VERSION)
		psc_fatalx("routing message version mismatch; has=%d got=%d",
		    RTM_VERSION, rtm->rtm_version);
	if (rtm->rtm_errno)
		psc_fatalx("routing error: %s", strerror(rtm->rtm_errno));
	if (rtm->rtm_msglen > rc)
		psc_fatalx("routing message too large");

	close(s);

	p = m.buf;
	for (; rtm->rtm_addrs; rtm->rtm_addrs &= ~(1 << j),
	    p += SOCKADDR_GETLEN(&psap->sa)) {
		j = ffs(rtm->rtm_addrs) - 1;
		psap = (void *)p;
		switch (1 << j) {
		case RTA_IFP:
			if (psap->sdl.sdl_family == AF_LINK &&
			    psap->sdl.sdl_nlen) {
				strncpy(ifn, psap->sdl.sdl_data, IFNAMSIZ - 1);
				ifn[IFNAMSIZ - 1] = '\0';
				return;
			}
			break;
		}
	}

	psc_fatalx("interface message not received");
}
Esempio n. 20
0
/*
 * Return the index of the given IOS ID or a negative error code on failure.
 */
int
_mds_repl_ios_lookup(int vfsid, struct slash_inode_handle *ih,
    sl_ios_id_t ios, int flag)
{
	int locked, rc;
	struct slm_inox_od *ix = NULL;
	struct sl_resource *res;
	struct fidc_membh *f;
	sl_replica_t *repl;
	uint32_t i, j, nr;
	char buf[LINE_MAX];

	switch (flag) {
	    case IOSV_LOOKUPF_ADD:
		OPSTAT_INCR("replicate-add");
		break;
	    case IOSV_LOOKUPF_DEL:
		OPSTAT_INCR("replicate-del");
		break;
	    case IOSV_LOOKUPF_LOOKUP:
		OPSTAT_INCR("replicate-lookup");
		break;
	    default:
		psc_fatalx("Invalid IOS lookup flag %d", flag);
	}

	/*
 	 * Can I assume that IOS ID are non-zeros.  If so, I can use
 	 * zero to mark a free slot.  See sl_global_id_build().
 	 */
	f = inoh_2_fcmh(ih);
	nr = ih->inoh_ino.ino_nrepls;
	repl = ih->inoh_ino.ino_repls;
	locked = INOH_RLOCK(ih);

	psc_assert(nr <= SL_MAX_REPLICAS);
	if (nr == SL_MAX_REPLICAS && flag == IOSV_LOOKUPF_ADD) {
		DEBUG_INOH(PLL_WARN, ih, buf, "too many replicas");
		PFL_GOTOERR(out, rc = -ENOSPC);
	}

	res = libsl_id2res(ios);
	if (res == NULL || !RES_ISFS(res))
		PFL_GOTOERR(out, rc = -SLERR_RES_BADTYPE);

	/*
	 * 09/29/2016: Hit SLERR_SHORTIO in the function. Need more investigation.
	 */

	/*
 	 * Return ENOENT by default for IOSV_LOOKUPF_DEL & IOSV_LOOKUPF_LOOKUP.
 	 */
	rc = -ENOENT;

	/*
	 * Search the existing replicas to see if the given IOS is
	 * already there.
	 *
	 * The following code can step through zero IOS IDs just fine.
	 *
	 */
	for (i = 0, j = 0; i < nr; i++, j++) {
		if (i == SL_DEF_REPLICAS) {
			/*
			 * The first few replicas are in the inode
			 * itself, the rest are in the extra inode
			 * block.
			 */
			rc = mds_inox_ensure_loaded(ih);
			if (rc)
				goto out;
			ix = ih->inoh_extras;
			repl = ix->inox_repls;
			j = 0;
		}

		DEBUG_INOH(PLL_DEBUG, ih, buf, "is rep[%u](=%u) == %u ?",
		    j, repl[j].bs_id, ios);

		if (repl[j].bs_id == ios) {
			/*
 			 * Luckily, this code is only called by mds_repl_delrq() 
 			 * for directories.
 			 *
 			 * Make sure that the logic works for at least the following 
 			 * edge cases:
 			 *
 			 *    (1) There is only one item in the basic array.
 			 *    (2) There is only one item in the extra array.
 			 *    (3) The number of items is SL_DEF_REPLICAS.
 			 *    (4) The number of items is SL_MAX_REPLICAS.
 			 */
			if (flag == IOSV_LOOKUPF_DEL) {
				/*
				 * Compact the array if the IOS is not the last
				 * one. The last one will be either overwritten
				 * or zeroed.  Note that we might move extra 
				 * garbage at the end if the total number is less 
				 * than SL_DEF_REPLICAS.
				 */
				if (i < SL_DEF_REPLICAS - 1) {
					memmove(&repl[j], &repl[j + 1],
					    (SL_DEF_REPLICAS - j - 1) *
					    sizeof(*repl));
				}
				/*
				 * All items in the basic array, zero the last
				 * one and we are done.
				 */
				if (nr <= SL_DEF_REPLICAS) {
					repl[nr-1].bs_id = 0;
					goto syncit;
				}
				/*
				 * Now we know we have more than SL_DEF_REPLICAS
				 * items.  However, if we are in the basic array,
				 * we have not read the extra array yet. In this
				 * case, we should also move the first item from 
				 * the extra array to the last one in the basic 
				 * array (overwrite).
				 */
				if (i < SL_DEF_REPLICAS) {
					rc = mds_inox_ensure_loaded(ih);
					if (rc)
						goto out;
					ix = ih->inoh_extras;

					repl[SL_DEF_REPLICAS - 1].bs_id =
					    ix->inox_repls[0].bs_id;

					repl = ix->inox_repls;
					j = 0;
				}
				/*
				 * Compact the extra array unless the IOS is
				 * the last one, which will be zeroed.
				 */
				if (i < SL_MAX_REPLICAS - 1) {
					memmove(&repl[j], &repl[j + 1],
					    (SL_INOX_NREPLICAS - j - 1) * 
					    sizeof(*repl));
				}

				repl[nr-SL_DEF_REPLICAS-1].bs_id = 0;
 syncit:
				ih->inoh_ino.ino_nrepls = nr - 1;
				rc = mds_inodes_odsync(vfsid, f, mdslog_ino_repls);
				if (rc)
					goto out;
			}
			/* XXX EEXIST for IOSV_LOOKUPF_ADD? */
			rc = i; 
			goto out;
		}
	}

	/* It doesn't exist; add to inode replica table if requested. */
	if (flag == IOSV_LOOKUPF_ADD) {

		/* paranoid */
		psc_assert(i == nr);
		if (nr >= SL_DEF_REPLICAS) {
			/* be careful with the case of nr = SL_DEF_REPLICAS */
			rc = mds_inox_ensure_loaded(ih);
			if (rc)
				goto out;
			repl = ih->inoh_extras->inox_repls;
			j = i - SL_DEF_REPLICAS;

		} else {
			repl = ih->inoh_ino.ino_repls;
			j = i;
		}

		repl[j].bs_id = ios;

		DEBUG_INOH(PLL_DIAG, ih, buf, "add IOS(%u) at idx %d", ios, i);

		ih->inoh_ino.ino_nrepls = nr + 1;
		rc = mds_inodes_odsync(vfsid, f, mdslog_ino_repls);
		if (!rc)
			rc = i;
	}

 out:
	INOH_URLOCK(ih, locked);
	return (rc);
}
Esempio n. 21
0
int
main(int argc, char *argv[])
{
	struct psc_vbitmap *vb, vba = VBITMAP_INIT_AUTO;
	int i, c, u, t;
	size_t elem, j;

	pfl_init();
	progname = argv[0];
	while ((c = getopt(argc, argv, "")) != -1)
		switch (c) {
		default:
			usage();
		}

	argc -= optind;
	if (argc)
		usage();

	for (i = 0; i < 79; i++)
		if (psc_vbitmap_next(&vba, &j) != 1)
			psc_fatalx("psc_vbitmap_next failed with auto");
		else if (j != (size_t)i)
			psc_fatalx("elem %d is not supposed to be %zu", i, j);

	if ((vb = psc_vbitmap_new(213)) == NULL)
		psc_fatal("psc_vbitmap_new");

	psc_vbitmap_setrange(vb, 13, 9);
	psc_vbitmap_printbin1(vb);
	for (i = 0; i < 13; i++)
		psc_assert(psc_vbitmap_get(vb, i) == 0);
	for (j = 0; j < 9; j++, i++)
	    psc_assert(psc_vbitmap_get(vb, i) == 1);
	for (j = 0; j < 25; j++, i++)
	    psc_assert(psc_vbitmap_get(vb, i) == 0);

	psc_vbitmap_clearall(vb);
	for (i = 0; i < 213; i++)
	    psc_assert(psc_vbitmap_get(vb, i) == 0);

	psc_vbitmap_setrange(vb, 25, 3);
	for (i = 0; i < 25; i++)
		psc_assert(psc_vbitmap_get(vb, i) == 0);
	for (j = 0; j < 3; j++, i++)
	    psc_assert(psc_vbitmap_get(vb, i) == 1);
	for (j = 0; j < 25; j++, i++)
	    psc_assert(psc_vbitmap_get(vb, i) == 0);

	psc_vbitmap_clearall(vb);
	for (i = 0; i < 213; i++)
	    psc_assert(psc_vbitmap_get(vb, i) == 0);

	for (i = 0; i < 213; i++)
		if (!psc_vbitmap_next(vb, &elem))
			psc_fatalx("out of elements at pos %d", i);

	if (psc_vbitmap_next(vb, &elem))
		psc_fatalx("an unexpected extra unused elem was found; pos=%zu", elem);

	psc_vbitmap_getstats(vb, &u, &t);
	if (u != 213 || t != 213)
		psc_fatalx("wrong size, got %d,%d want %d", u, t, 213);

	psc_vbitmap_unsetrange(vb, 13, 2);
	for (i = 0; i < 13; i++)
		psc_assert(psc_vbitmap_get(vb, i) == 1);
	for (j = 0; j < 2; j++, i++)
	    psc_assert(psc_vbitmap_get(vb, i) == 0);
	for (j = 0; j < 25; j++, i++)
	    psc_assert(psc_vbitmap_get(vb, i) == 1);

	if (psc_vbitmap_resize(vb, NELEM) == -1)
		psc_fatal("psc_vbitmap_new");

	psc_assert(psc_vbitmap_getsize(vb) == NELEM);

	/* fill up bitmap */
	for (i = 0; i < NELEM - 211; i++)
		if (!psc_vbitmap_next(vb, &elem))
			psc_fatalx("out of elements at iter %d", i);

	/* try one past end of filled bitmap */
	if (psc_vbitmap_next(vb, &elem))
		psc_fatalx("an unexpected extra unused elem was found; pos=%zu", elem);

	/* free some slots */
	for (i = 0, elem = 0; elem < NELEM; i++, elem += NELEM / 10)
		psc_vbitmap_unset(vb, elem);

	t = psc_vbitmap_nfree(vb);
	if (t != i)
		psc_fatalx("wrong number of free elements; has=%d want=%d", t, i);
	psc_vbitmap_invert(vb);
	t = psc_vbitmap_nfree(vb);
	if (t != NELEM - i)
		psc_fatalx("wrong number of inverted elements; has=%d want=%d",
		    t, NELEM - i);
	psc_vbitmap_invert(vb);
	t = psc_vbitmap_nfree(vb);
	if (t != i)
		psc_fatalx("wrong number of original elements; has=%d want=%d", t, i);

	/* try to re-grab the freed slots */
	for (i = 0; i <= 10; i++)
		if (!psc_vbitmap_next(vb, &elem))
			psc_fatalx("out of elements, request %d/%d", i, 10);

	/* try one past end of filled bitmap */
	if (psc_vbitmap_next(vb, &elem))
		psc_fatalx("an unexpected extra unused elem was found; pos=%zu", elem);

	psc_vbitmap_setval_range(vb, 0, NELEM, 0);
	psc_assert(pfl_vbitmap_israngeset(vb, 0, 581, 371));
	psc_assert(pfl_vbitmap_israngeset(vb, 1, 581, 371) == 0);
	psc_assert(pfl_vbitmap_israngeset(vb, 0, 581, 1));
	psc_assert(pfl_vbitmap_israngeset(vb, 1, 581, 1) == 0);
	psc_assert(pfl_vbitmap_isempty(vb));

	psc_vbitmap_setval_range(vb, 0, NELEM, 1);
	psc_assert(pfl_vbitmap_israngeset(vb, 1, 581, 371));
	psc_assert(pfl_vbitmap_israngeset(vb, 0, 581, 371) == 0);
	psc_assert(pfl_vbitmap_israngeset(vb, 1, 581, 1));
	psc_assert(pfl_vbitmap_israngeset(vb, 0, 581, 1) == 0);
	psc_assert(psc_vbitmap_isfull(vb));

	psc_vbitmap_free(vb);

	vb = psc_vbitmap_new(0);
	for (i = 1; i < 101; i++) {
		if (psc_vbitmap_resize(vb, i) == -1)
			psc_fatal("psc_vbitmap_new");
		psc_vbitmap_setval(vb, i - 1, i % 2);
		psc_assert(psc_vbitmap_get(vb, i - 1) == i % 2);
	}

	exit(0);
}
Esempio n. 22
0
/*
 * Apply a translation matrix of residency states to a bmap.
 * @b: bmap.
 * @tract: translation actions, indexed by current bmap state with
 *	corresponding values to the new state that should be assigned.
 *	For example, index BREPLST_VALID in the array with the value
 *	BREPLST_INVALID would render a VALID state to an INVALID.
 * @retifset: return value, indexed in the same manner as @tract.
 * @flags: behavioral flags.
 * @off: offset int bmap residency table for IOS intended to be
 *	changed/queried.
 * @scircuit: value-result for batch operations.
 * @cbf: callback routine for more detailed processing.
 * @cbarg: argument to callback.
 *
 */
int
_mds_repl_bmap_apply(struct bmap *b, const int *tract,
    const int *retifset, int flags, int off, int *scircuit,
    brepl_walkcb_t cbf, void *cbarg)
{
	int val, rc = 0;
	struct timeval tv1, tv2, tvd;
	struct bmap_mds_info *bmi = bmap_2_bmi(b);

	BMAP_LOCK_ENSURE(b);
	if (tract) {
		/*
		 * The caller must set the flag if modifications are made.
		 */
		PFL_GETTIMEVAL(&tv1);
		bmap_wait_locked(b, b->bcm_flags & BMAPF_REPLMODWR);
		PFL_GETTIMEVAL(&tv2);
		timersub(&tv2, &tv1, &tvd);
		OPSTAT_ADD("bmap-wait-usecs", tvd.tv_sec * 1000000 + tvd.tv_usec);

		memcpy(bmi->bmi_orepls, bmi->bmi_repls,
		    sizeof(bmi->bmi_orepls));
		psc_assert((flags & REPL_WALKF_SCIRCUIT) == 0);
	}

	if (scircuit)
		*scircuit = 0;
	else
		psc_assert((flags & REPL_WALKF_SCIRCUIT) == 0);

	/* retrieve IOS status given a bit offset into the map */
	val = SL_REPL_GET_BMAP_IOS_STAT(bmi->bmi_repls, off);

	if (val >= NBREPLST)
		psc_fatalx("corrupt bmap, val = %d, bno = %d, fid="SLPRI_FID,
			 val, b->bcm_bmapno, fcmh_2_fid(b->bcm_fcmh));

	/* callback can also be used to track if we did make any changes */
	if (cbf)
		cbf(b, off / SL_BITS_PER_REPLICA, val, cbarg);

	/* check for & apply return values */
	if (retifset && retifset[val]) {
		rc = retifset[val];
		if (flags & REPL_WALKF_SCIRCUIT) {
			*scircuit = 1;
			goto out;
		}
	}

	/* apply any translations - this must be done after retifset */
	if (tract && tract[val] != -1) {
		DEBUG_BMAPOD(PLL_DEBUG, b, "before modification");
		SL_REPL_SET_BMAP_IOS_STAT(bmi->bmi_repls, off,
		    tract[val]);
		DEBUG_BMAPOD(PLL_DEBUG, b, "after modification");
	}

 out:
	return (rc);
}
Esempio n. 23
0
int
main(int argc, char *argv[])
{
    struct thr *thr;
    pthread_t pthr;
    int c, rc, i;

    pfl_init();
    progname = argv[0];
    while ((c = getopt(argc, argv, "i:n:")) != -1)
        switch (c) {
        case 'i':
            niter = atoi(optarg);
            break;
        case 'n':
            nthr = atoi(optarg);
            break;
        default:
            usage();
        }
    argc -= optind;
    if (argc)
        usage();

    psc_assert(psc_atomic64_read(&v64) == UINT64_C(100000000000));
    TEST(psc_atomic64, set, &v64, &v64, UINT64_C(2000000000000), UINT64_C(2000000000000));
    TEST(psc_atomic64, add, &v64, &v64, 15, UINT64_C(2000000000015));
    TEST(psc_atomic64, sub, &v64, &v64, 9, UINT64_C(2000000000006));
    TEST1(psc_atomic64, inc, &v64, UINT64_C(2000000000007));
    TEST1(psc_atomic64, dec, &v64, UINT64_C(2000000000006));

    psc_atomic16_set(&v16, 2);
    TEST(psc_atomic16, set, &v16, &v16, 200, 200);
    TEST(psc_atomic16, add, &v16, &v16, 15, 215);
    TEST(psc_atomic16, sub, &v16, &v16, 9, 206);
    TEST1(psc_atomic16, inc, &v16, 207);
    TEST1(psc_atomic16, dec, &v16, 206);
    TEST1V(psc_atomic16, dec_and_test0, &v16, 205, 0);
    TEST(psc_atomic16, set, &v16, &v16, 1, 1);
    TEST1V(psc_atomic16, dec_and_test0, &v16, 0, 1);
    TEST(psc_atomic16, setmask, &v16, &v16, 0x75, 0x75);
    TEST(psc_atomic16, clearmask, &v16, &v16, 0x41, 0x34);
    TEST(psc_atomic16, set, &v16, &v16, 0, 0);

    psc_atomic32_set(&v32, 2);
    TEST(psc_atomic32, set, &v32, &v32, 200, 200);
    TEST(psc_atomic32, add, &v32, &v32, 15, 215);
    TEST(psc_atomic32, sub, &v32, &v32, 9, 206);
    TEST1(psc_atomic32, inc, &v32, 207);
    TEST1(psc_atomic32, dec, &v32, 206);
    TEST1V(psc_atomic32, dec_and_test0, &v32, 205, 0);
    TEST(psc_atomic32, set, &v32, &v32, 1, 1);
    TEST1V(psc_atomic32, dec_and_test0, &v32, 0, 1);
    TEST(psc_atomic32, setmask, &v32, &v32, 0x75, 0x75);
    TEST(psc_atomic32, clearmask, &v32, &v32, 0x41, 0x34);
    TEST(psc_atomic32, set, &v32, &v32, 0, 0);

    psc_atomic64_set(&v64, 2);
    TEST(psc_atomic64, set, &v64, &v64, 200, 200);
    TEST(psc_atomic64, add, &v64, &v64, 15, 215);
    TEST(psc_atomic64, sub, &v64, &v64, 9, 206);
    TEST1(psc_atomic64, inc, &v64, 207);
    TEST1(psc_atomic64, dec, &v64, 206);
    TEST1V(psc_atomic64, dec_and_test0, &v64, 205, 0);
    TEST(psc_atomic64, set, &v64, &v64, 1, 1);
    TEST1V(psc_atomic64, dec_and_test0, &v64, 0, 1);
    TEST(psc_atomic64, setmask, &v64, &v64, 0x75, 0x75);
    TEST(psc_atomic64, clearmask, &v64, &v64, 0x41, 0x34);
    TEST(psc_atomic64, set, &v64, &v64, 0, 0);

    TEST1(psc_atomic16, inc, &v16, 1);
    TEST1V(psc_atomic16, dec_and_test0, &v16, 0, 1);

    rc = pthread_barrier_init(&barrier, NULL, nthr + 1);
    if (rc)
        psc_fatalx("pthread_barrier_init: %s", strerror(rc));
    for (i = 0; i < nthr; i++) {
        thr = PSCALLOC(sizeof(*thr));
        thr->pos = i;
        rc = pthread_create(&pthr, NULL, startf, thr);
        if (rc)
            psc_fatalx("pthread_create: %s", strerror(rc));
    }
    pthread_barrier_wait(&barrier);
    pthread_barrier_wait(&barrier);
    exit(0);
}
Esempio n. 24
0
__static void
pflnet_getifnfordst_rtnetlink(const struct sockaddr *sa,
    char ifn[IFNAMSIZ])
{
	struct {
		struct nlmsghdr	nmh;
		struct rtmsg	rtm;
#define RT_SPACE 8192
		unsigned char	buf[RT_SPACE];
	} rq;
	const struct sockaddr_in *sin;
	struct nlmsghdr *nmh;
	struct rtattr *rta;
	struct rtmsg *rtm;
	ssize_t rc, rca;
	int s, ifidx;
	size_t nb;

	sin = (void *)sa;

	s = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
	if (s == -1)
		psc_fatal("socket");

	memset(&rq, 0, sizeof(rq));
	rq.nmh.nlmsg_len = NLMSG_SPACE(sizeof(rq.rtm)) +
	    RTA_LENGTH(sizeof(sin->sin_addr));
	rq.nmh.nlmsg_flags = NLM_F_REQUEST;
	rq.nmh.nlmsg_type = RTM_GETROUTE;

	rq.rtm.rtm_family = sa->sa_family;
	rq.rtm.rtm_protocol = RTPROT_UNSPEC;
	rq.rtm.rtm_table = RT_TABLE_MAIN;
	/* # bits filled in target addr */
	rq.rtm.rtm_dst_len = sizeof(sin->sin_addr) * NBBY;
	rq.rtm.rtm_scope = RT_SCOPE_LINK;

	rta = (void *)((char *)&rq + NLMSG_SPACE(sizeof(rq.rtm)));
	rta->rta_type = RTA_DST;
	rta->rta_len = RTA_LENGTH(sizeof(sin->sin_addr));
	memcpy(RTA_DATA(rta), &sin->sin_addr,
	    sizeof(sin->sin_addr));

	errno = 0;
	rc = write(s, &rq, rq.nmh.nlmsg_len);
	if (rc != (ssize_t)rq.nmh.nlmsg_len)
		psc_fatal("routing socket length mismatch");

	rc = read(s, &rq, sizeof(rq));
	if (rc == -1)
		psc_fatal("routing socket read");
	close(s);

	switch (rq.nmh.nlmsg_type) {
	case NLMSG_ERROR: {
		struct nlmsgerr *nlerr;

		nlerr = NLMSG_DATA(&rq.nmh);
		psc_fatalx("netlink: %s", strerror(nlerr->error));
	    }
	case NLMSG_DONE:
		psc_fatalx("netlink: unexpected EOF");
	}

	nmh = &rq.nmh;
	nb = rc;
	for (; NLMSG_OK(nmh, nb); nmh = NLMSG_NEXT(nmh, nb)) {
		rtm = NLMSG_DATA(nmh);

		if (rtm->rtm_table != RT_TABLE_MAIN)
			continue;

		rta = RTM_RTA(rtm);
		rca = RTM_PAYLOAD(nmh);

		for (; RTA_OK(rta, rca); rta = RTA_NEXT(rta, rca)) {
			switch (rta->rta_type) {
			case RTA_OIF:
				memcpy(&ifidx, RTA_DATA(rta),
				    sizeof(ifidx));
				pflnet_getifname(ifidx, ifn);
				return;
			}
		}
	}
	psc_fatalx("no route for addr");
}
Esempio n. 25
0
/**
 * pjournal_format - Initialize an on-disk journal.
 * @fn: file path to store journal.
 * @nents: number of entries journal may contain.
 * @entsz: size of a journal entry.
 * Returns 0 on success, errno on error.
 */
void
pjournal_format(const char *fn, uint32_t nents, uint32_t entsz,
    uint32_t rs, uint64_t uuid)
{
	struct psc_journal_enthdr *pje;
	struct psc_journal pj;
	struct stat stb;
	unsigned char *jbuf;
	uint32_t i, j, slot;
	int rc, fd;
	ssize_t nb;

	if (nents % rs)
		psc_fatalx("number of slots (%u) should be a multiple of "
		    "readsize (%u)", nents, rs);

	memset(&pj, 0, sizeof(struct psc_journal));

	rc = 0;
	fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
	if (fd == -1)
		psc_fatal("%s", fn);

	if (fstat(fd, &stb) == -1)
		psc_fatal("stat %s", fn);

	pj.pj_fd = fd;
	pj.pj_hdr = PSCALLOC(PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize));

	pj.pj_hdr->pjh_entsz = entsz;
	pj.pj_hdr->pjh_nents = nents;
	pj.pj_hdr->pjh_version = PJH_VERSION;
	pj.pj_hdr->pjh_readsize = rs;
	pj.pj_hdr->pjh_iolen = PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize);
	pj.pj_hdr->pjh_magic = PJH_MAGIC;
	pj.pj_hdr->pjh_timestamp = time(NULL);
	pj.pj_hdr->pjh_fsuuid = uuid;

	psc_crc64_init(&pj.pj_hdr->pjh_chksum);
	psc_crc64_add(&pj.pj_hdr->pjh_chksum, pj.pj_hdr,
	    offsetof(struct psc_journal_hdr, pjh_chksum));
	psc_crc64_fini(&pj.pj_hdr->pjh_chksum);

	nb = pwrite(pj.pj_fd, pj.pj_hdr, pj.pj_hdr->pjh_iolen, 0);
	if ((size_t)nb != pj.pj_hdr->pjh_iolen)
		psc_fatalx("failed to write journal header: %s",
		    nb == -1 ? strerror(errno) : "short write");

	nb = PJ_PJESZ(&pj) * pj.pj_hdr->pjh_readsize;
	jbuf = psc_alloc(nb, PAF_PAGEALIGN);
	for (i = 0; i < rs; i++) {
		pje = PSC_AGP(jbuf, PJ_PJESZ(&pj) * i);
		pje->pje_magic = PJE_MAGIC;
		pje->pje_type = PJE_FORMAT;
		pje->pje_xid = PJE_XID_NONE;
		pje->pje_len = 0;

		psc_crc64_init(&pje->pje_chksum);
		psc_crc64_add(&pje->pje_chksum, pje,
		    offsetof(struct psc_journal_enthdr, pje_chksum));
		psc_crc64_add(&pje->pje_chksum, pje->pje_data,
		    pje->pje_len);
		psc_crc64_fini(&pje->pje_chksum);
	}

	j = 0;
	/* XXX use an option to write only one entry in fast create mode */
	for (slot = 0; slot < pj.pj_hdr->pjh_nents; slot += rs) {
		nb = pwrite(pj.pj_fd, jbuf, PJ_PJESZ(&pj) * rs,
		    PJ_GETENTOFF(&pj, slot));
		if ((size_t)nb != PJ_PJESZ(&pj) * rs)
			psc_fatal("failed to write slot %u (%zd)",
			    slot, nb);
		if (verbose && slot % 262144 == 0) {
			printf(".");
			fflush(stdout);
			fsync(pj.pj_fd);
			if (++j == 80) {
				printf("\n");
				j = 0;
			}
		}
	}
	if (verbose && j)
		printf("\n");
	if (close(fd) == -1)
		psc_fatal("failed to close journal");
	psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(&pj) * rs);
	psclog_info("journal %s formatted: %d slots, %d readsize, error=%d",
	    fn, nents, rs, rc);
}
Esempio n. 26
0
int
main(int argc, char *argv[])
{
	struct psc_vbitmap *vb, vba = VBITMAP_INIT_AUTO;
	size_t elem, j, cap, len, off;
	int i, c, u, t;

	pfl_init();
	while ((c = getopt(argc, argv, "")) != -1)
		switch (c) {
		default:
			usage();
		}

	argc -= optind;
	if (argc)
		usage();

	for (i = 0; i < 79; i++)
		if (psc_vbitmap_next(&vba, &j) != 1)
			psc_fatalx("psc_vbitmap_next failed with auto");
		else if (j != (size_t)i)
			psc_fatalx("elem %d is not supposed to be %zu", i, j);

	if ((vb = psc_vbitmap_new(213)) == NULL)
		psc_fatal("psc_vbitmap_new");

	psc_vbitmap_setrange(vb, 13, 9);
	psc_vbitmap_printbin1(vb);
	for (i = 0; i < 13; i++)
		pfl_assert(psc_vbitmap_get(vb, i) == 0);
	for (j = 0; j < 9; j++, i++)
	    pfl_assert(psc_vbitmap_get(vb, i) == 1);
	for (j = 0; j < 25; j++, i++)
	    pfl_assert(psc_vbitmap_get(vb, i) == 0);

	psc_vbitmap_clearall(vb);
	for (i = 0; i < 213; i++)
	    pfl_assert(psc_vbitmap_get(vb, i) == 0);

	psc_vbitmap_setrange(vb, 25, 3);
	for (i = 0; i < 25; i++)
		pfl_assert(psc_vbitmap_get(vb, i) == 0);
	for (j = 0; j < 3; j++, i++)
	    pfl_assert(psc_vbitmap_get(vb, i) == 1);
	for (j = 0; j < 25; j++, i++)
	    pfl_assert(psc_vbitmap_get(vb, i) == 0);

	psc_vbitmap_clearall(vb);
	for (i = 0; i < 213; i++)
	    pfl_assert(psc_vbitmap_get(vb, i) == 0);

	for (i = 0; i < 213; i++)
		if (!psc_vbitmap_next(vb, &elem))
			psc_fatalx("out of elements at pos %d", i);

	if (psc_vbitmap_next(vb, &elem))
		psc_fatalx("an unexpected extra unused elem was found; pos=%zu", elem);

	psc_vbitmap_getstats(vb, &u, &t);
	if (u != 213 || t != 213)
		psc_fatalx("wrong size, got %d,%d want %d", u, t, 213);

	psc_vbitmap_unsetrange(vb, 13, 2);
	for (i = 0; i < 13; i++)
		pfl_assert(psc_vbitmap_get(vb, i) == 1);
	for (j = 0; j < 2; j++, i++)
	    pfl_assert(psc_vbitmap_get(vb, i) == 0);
	for (j = 0; j < 25; j++, i++)
	    pfl_assert(psc_vbitmap_get(vb, i) == 1);

	if (psc_vbitmap_resize(vb, NELEM) == -1)
		psc_fatal("psc_vbitmap_resize");

	pfl_assert(psc_vbitmap_getsize(vb) == NELEM);

	/* fill up bitmap */
	for (i = 0; i < NELEM - 211; i++)
		if (!psc_vbitmap_next(vb, &elem))
			psc_fatalx("out of elements at iter %d", i);

	/* try one past end of filled bitmap */
	if (psc_vbitmap_next(vb, &elem))
		psc_fatalx("an unexpected extra unused elem was found; pos=%zu", elem);

	/* free some slots */
	for (i = 0, elem = 0; elem < NELEM; i++, elem += NELEM / 10)
		psc_vbitmap_unset(vb, elem);

	t = psc_vbitmap_nfree(vb);
	if (t != i)
		psc_fatalx("wrong number of free elements; has=%d want=%d", t, i);
	psc_vbitmap_invert(vb);
	t = psc_vbitmap_nfree(vb);
	if (t != NELEM - i)
		psc_fatalx("wrong number of inverted elements; has=%d want=%d",
		    t, NELEM - i);
	psc_vbitmap_invert(vb);
	t = psc_vbitmap_nfree(vb);
	if (t != i)
		psc_fatalx("wrong number of original elements; has=%d want=%d", t, i);

	/* try to re-grab the freed slots */
	for (i = 0; i <= 10; i++)
		if (!psc_vbitmap_next(vb, &elem))
			psc_fatalx("out of elements, request %d/%d", i, 10);

	/* try one past end of filled bitmap */
	if (psc_vbitmap_next(vb, &elem))
		psc_fatalx("an unexpected extra unused elem was found; pos=%zu", elem);

	psc_vbitmap_setval_range(vb, 0, NELEM, 0);
	CHECK_RANGE(vb, 0, 581, 371);
	CHECK_RANGE(vb, 0, 581, 1);
	pfl_assert(pfl_vbitmap_isempty(vb));

	psc_vbitmap_setval_range(vb, 0, NELEM, 1);
	CHECK_RANGE(vb, 1, 581, 371);
	CHECK_RANGE(vb, 1, 581, 1);
	pfl_assert(psc_vbitmap_isfull(vb));

	psc_vbitmap_free(vb);

	vb = psc_vbitmap_newf(0, PVBF_AUTO);
	pfl_assert(vb);
	pfl_assert(pfl_vbitmap_isempty(vb));
	pfl_assert(psc_vbitmap_getsize(vb) == 0);
	pfl_assert(psc_vbitmap_resize(vb, 6) == 0);
	pfl_assert(psc_vbitmap_getsize(vb) == 6);
	pfl_assert(pfl_vbitmap_isempty(vb));
	psc_vbitmap_free(vb);

	vb = psc_vbitmap_newf(0, PVBF_AUTO);
	cap = psc_vbitmap_getsize(vb);
	off = 0;
	len = 6;
	if (off + len > cap)
		psc_vbitmap_resize(vb, off + len);
	psc_vbitmap_setrange(vb, off, len);
	ENSURE(vb, "111111");
	CHECK_RANGE(vb, 1, 2, 4);

	psc_vbitmap_clearall(vb);
	pfl_assert(psc_vbitmap_setval_range(vb, 2, 4, 1) == 0);
	CHECK_RANGE(vb, 1, 2, 4);
	ENSURE(vb, "001111");
	psc_vbitmap_free(vb);

	vb = psc_vbitmap_new(0);
	for (i = 1; i < 101; i++) {
		if (psc_vbitmap_resize(vb, i) == -1)
			psc_fatal("psc_vbitmap_new");
		psc_vbitmap_setval(vb, i - 1, i % 2);
		pfl_assert(psc_vbitmap_get(vb, i - 1) == i % 2);
	}
	psc_vbitmap_free(vb);

	for (cap = 0; cap < 100; cap++) {
		for (off = 1; off < cap; off++) {
			for (len = 1; off + len < cap; len++) {
				size_t last;

				last = cap - off - len;
				vb = psc_vbitmap_new(cap);
				psc_vbitmap_setrange(vb, off, len);
				ENSURE(vb, "%0*d%*d%0*d", (int)off, 0,
				    (int)len, 1, (int)last, 0);
				CHECK_RANGE(vb, 0, 0, off);
				CHECK_RANGE(vb, 1, off, len);
				CHECK_RANGE(vb, 0, off+len, last);
				psc_vbitmap_free(vb);
			}
		}
	}

	vb = psc_vbitmap_new(8200);
	pfl_assert(pfl_vbitmap_isempty(vb));
	CHECK_RANGE(vb, 0, 8, 8192);
	psc_vbitmap_free(vb);

	vb = psc_vbitmap_new(16);
	ENSURE_ABBR(vb, "0:8,00000000");
	psc_vbitmap_free(vb);

	vb = psc_vbitmap_new(40);
	psc_vbitmap_setval_range(vb, 0, 10, 0);
	psc_vbitmap_setval_range(vb, 10, 10, 1);
	ENSURE(vb, "0000000000111111111100000000000000000000");
	ENSURE_ABBR(vb, "0:10,1:10,0:12,00000000");
	psc_vbitmap_free(vb);

	vb = psc_vbitmap_new(16);
	ENSURE(vb, "0000000000000000");
	pfl_assert(pfl_vbitmap_isempty(vb));
	CHECK_RANGE(vb, 0, 8, 8);
	CHECK_RANGE(vb, 0, 9, 7);
	psc_vbitmap_setval(vb, 15, 1);
	ENSURE(vb, "0000000000000001");
	CHECK_RANGE(vb, 0, 9, 6);
	CHECK_RANGE(vb, 1, 15, 1);
	pfl_assert(pfl_vbitmap_israngeset(vb, 0, 8, 8) == 0);
	pfl_assert(pfl_vbitmap_israngeset(vb, 1, 8, 8) == 0);
	psc_vbitmap_clearall(vb);
	pfl_assert(pfl_vbitmap_isempty(vb));
	psc_vbitmap_free(vb);

	exit(0);
}
Esempio n. 27
0
int
main(int argc, char *argv[])
{
	ssize_t nents = SLJ_MDS_JNENTS;
	char *endp, c, fn[PATH_MAX];
	uint64_t uuid = 0;
	long l;

	pfl_init();
	sl_subsys_register();

	fn[0] = '\0';
	progname = argv[0];
	while ((c = getopt(argc, argv, "b:D:fn:qu:v")) != -1)
		switch (c) {
		case 'b':
			strlcpy(fn, optarg, sizeof(fn));
			break;
		case 'D':
			datadir = optarg;
			break;
		case 'f':
			format = 1;
			break;
		case 'n':
			endp = NULL;
			l = strtol(optarg, &endp, 10);
			if (l <= 0 || l > INT_MAX ||
			    endp == optarg || *endp)
				errx(1, "invalid -n nentries: %s",
				    optarg);
			nents = (ssize_t)l;
			break;
		case 'q':
			query = 1;
			break;
		case 'u':
			endp = NULL;
			uuid = (uint64_t)strtoull(optarg, &endp, 16);
			if (endp == optarg || *endp)
				errx(1, "invalid -u fsuuid: %s",
				    optarg);
			break;
		case 'v':
			verbose = 1;
			break;
		default:
			usage();
		}
	argc -= optind;
	if (argc)
		usage();

	if (fn[0] == '\0') {
		if (mkdir(datadir, 0700) == -1)
			if (errno != EEXIST)
				err(1, "mkdir: %s", datadir);

		xmkfn(fn, "%s/%s", datadir, SL_FN_OPJOURNAL);
	}

	if (format) {
		if (!uuid)
			psc_fatalx("no fsuuid specified");
		pjournal_format(fn, nents, SLJ_MDS_ENTSIZE,
		    SLJ_MDS_READSZ, uuid);
		if (verbose)
			warnx("created log file %s with %zu %d-byte entries "
			      "(uuid=%"PRIx64")",
			      fn, nents, SLJ_MDS_ENTSIZE, uuid);
	} else if (query)
		pjournal_dump(fn);
	else
		usage();
	exit(0);
}
Esempio n. 28
0
__static int
pflnet_rtexists_rtnetlink(const struct sockaddr *sa)
{
	struct {
		struct nlmsghdr	nmh;
		struct rtmsg	rtm;
#define RT_SPACE 8192
		unsigned char	buf[RT_SPACE];
		struct rtattr	rta;
		struct nlmsgerr	nlerr;
	} rq;
	struct sockaddr_in *sin = (void *)sa;
	in_addr_t cmpaddr, zero = 0;
	struct nlmsghdr *nmh;
	struct rtattr *rta;
	struct rtmsg *rtm;
	ssize_t rc, rca;
	int rv = 0, s;
	size_t nb;

	s = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
	if (s == -1)
		psc_fatal("socket");

	memset(&rq, 0, sizeof(rq));
	rq.nmh.nlmsg_len = NLMSG_SPACE(sizeof(rq.rtm));
	rq.nmh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
	rq.nmh.nlmsg_type = RTM_GETROUTE;

	rq.rtm.rtm_family = sa->sa_family;
	rq.rtm.rtm_protocol = RTPROT_UNSPEC;
	rq.rtm.rtm_table = RT_TABLE_MAIN;
	rq.rtm.rtm_scope = RT_SCOPE_LINK;

	rta = (void *)((char *)&rq.rta + NLMSG_SPACE(sizeof(rq.rtm)));
	rta->rta_type = RTA_DST;
	rta->rta_len = RTA_LENGTH(sizeof(sin->sin_addr));
	memcpy(RTA_DATA(rta), &sin->sin_addr,
	    sizeof(sin->sin_addr));

	errno = 0;
	rc = write(s, &rq, rq.nmh.nlmsg_len);
	if (rc != (ssize_t)rq.nmh.nlmsg_len)
		psc_fatal("routing socket length mismatch");

	for (;;) {
		rc = read(s, &rq, sizeof(rq));
		if (rc == -1)
			psc_fatal("routing socket read");

		switch (rq.nmh.nlmsg_type) {
		case NLMSG_ERROR: {
			struct nlmsgerr *nlerr;

			nlerr = NLMSG_DATA(&rq.nlerr);
			psc_fatalx("netlink: %s", strerror(nlerr->error));
		    }
		case NLMSG_DONE:
			goto out;
		}

		nmh = &rq.nmh;
		nb = rc;
		for (; NLMSG_OK(nmh, nb); nmh = NLMSG_NEXT(nmh, nb)) {
			rtm = NLMSG_DATA(nmh);

			if (rtm->rtm_table != RT_TABLE_MAIN)
				continue;

			rta = RTM_RTA(rtm);
			rca = RTM_PAYLOAD(nmh);

			for (; RTA_OK(rta, rca);
			    rta = RTA_NEXT(rta, rca)) {
				switch (rta->rta_type) {
				case RTA_GATEWAY:
					cmpaddr = sin->sin_addr.s_addr;
					if (zero == cmpaddr) {
						rv = 1;
						goto out;
					}
					break;
				case RTA_DST:
					cmpaddr = sin->sin_addr.s_addr;

					pfl_bitstr_copy(&cmpaddr,
					    rtm->rtm_dst_len, &zero, 0,
					    sizeof(zero) * NBBY -
					    rtm->rtm_dst_len);

					if (cmpaddr == *(in_addr_t *)
					    RTA_DATA(rta)) {
						rv = 1;
						goto out;
					}
					break;
				}
			}
		}
	}
 out:
	close(s);
	return (rv);
}