Exemple #1
0
void
slcfg_init_res(struct sl_resource *r)
{
	struct resprof_mds_info *rpmi;
	struct sl_mds_peerinfo *sp;
	struct sl_mds_iosinfo *si;

	rpmi = res2rpmi(r);
	psc_mutex_init(&rpmi->rpmi_mutex);
	psc_waitq_init(&rpmi->rpmi_waitq, "rpmi");

	if (r->res_type == SLREST_MDS) {
		rpmi->rpmi_info = sp = PSCALLOC(sizeof(*sp));
		sp->sp_flags = SPF_NEED_JRNL_INIT;
		pfl_meter_init(&sp->sp_batchmeter, 0, "nsupd-%s",
		    r->res_name);
	} else {
		rpmi->rpmi_info = si = PSCALLOC(sizeof(*si));
		si->si_flags = SIF_NEED_JRNL_INIT;
		if (RES_ISFS(r))
			pfl_meter_init(&si->si_batchmeter, 0,
			    "reclaim-%s", r->res_name);
		if (r->res_flags & RESF_DISABLE_BIA)
			si->si_flags |= SIF_DISABLE_LEASE;
	}
}
Exemple #2
0
struct pfl_opstat *
pfl_opstat_initf(int flags, const char *namefmt, ...)
{
	struct pfl_opstat *opst;
	int sz, pos;
	va_list ap;
	char *name = pfl_opstat_name;

	spinlock(&pfl_opstats_lock);

	va_start(ap, namefmt);
	sz = vsnprintf(name, 128, namefmt, ap) + 1;
	va_end(ap);

	/* (gdb) p ((struct pfl_opstat *)pfl_opstats.pda_items[74]).opst_name */
	pos = psc_dynarray_bsearch(&pfl_opstats, name, _pfl_opstat_cmp);
	if (pos < psc_dynarray_len(&pfl_opstats)) {
		opst = psc_dynarray_getpos(&pfl_opstats, pos);
		if (strcmp(name, opst->opst_name) == 0) {
			pfl_assert((flags & OPSTF_EXCL) == 0);
			freelock(&pfl_opstats_lock);
			return (opst);
		}
	}
	pfl_opstats_sum++;
	opst = PSCALLOC(sizeof(*opst) + sz);
	strlcpy(opst->opst_name, name, 128);
	opst->opst_flags = flags;
	psc_dynarray_splice(&pfl_opstats, pos, 0, &opst, 1);
	freelock(&pfl_opstats_lock);
	return (opst);
}
Exemple #3
0
/*
 * Pull POSIX ACLs from an fcmh via RPCs to MDS.
 */
acl_t
slc_acl_get_fcmh(struct pscfs_req *pfr, const struct pscfs_creds *pcr,
    struct fidc_membh *f)
{
	char trybuf[64] = { 0 };
	void *buf = NULL;
	size_t retsz = 0;
	ssize_t rc;
	acl_t a;

	rc = slc_getxattr(pfr, pcr, ACL_EA_ACCESS, trybuf,
	    sizeof(trybuf), f, &retsz);
	if (rc == 0) {
		buf = trybuf;
	} else if (rc == ERANGE) {
		buf = PSCALLOC(retsz);
		rc = slc_getxattr(pfr, pcr, ACL_EA_ACCESS, buf, retsz,
		    f, &retsz);
		if (rc) {
			PSCFREE(buf);
			return (NULL);
		}
	} else
		return (NULL);

	a = pfl_acl_from_xattr(buf, retsz);

	if (buf != trybuf)
		PSCFREE(buf);
	return (a);
}
Exemple #4
0
/*
 * Handle a NAMESPACE_UPDATE request from another MDS.
 */
int
slm_rmm_handle_namespace_update(struct pscrpc_request *rq)
{
	struct srt_update_entry *entryp;
	struct srm_update_req *mq;
	struct srm_update_rep *mp;
	struct sl_mds_peerinfo *p;
	struct sl_resource *res;
	struct sl_site *site;
	struct iovec iov;
	int i, len, count;

	SL_RSX_ALLOCREP(rq, mq, mp);

	count = mq->count;
	if (count <= 0 || mq->size > LNET_MTU) {
		mp->rc = -EINVAL;
		return (mp->rc);
	}

	iov.iov_len = mq->size;
	iov.iov_base = PSCALLOC(mq->size);

	mp->rc = slrpc_bulkserver(rq, BULK_GET_SINK, SRMM_BULK_PORTAL,
	    &iov, 1);
	if (mp->rc)
		goto out;

	/* Search for the peer information by the given site ID. */
	site = libsl_siteid2site(mq->siteid);
	p = NULL;
	if (site)
		SITE_FOREACH_RES(site, res, i)
			if (res->res_type == SLREST_MDS) {
				p = res2rpmi(res)->rpmi_info;
				break;
			}
	if (p == NULL) {
		psclog_info("fail to find site ID %d", mq->siteid);
		PFL_GOTOERR(out, mp->rc = -EINVAL);
	}

	/*
	 * Iterate through the namespace update buffer and apply updates.
	 * If we fail to apply an update, we still report success to our
	 * peer because reporting an error does not help our cause.
	 */
	entryp = iov.iov_base;
	for (i = 0; i < count; i++) {
		slm_rmm_apply_update(entryp);
		len = UPDATE_ENTRY_LEN(entryp);
		entryp = PSC_AGP(entryp, len);
	}
	zfsslash2_wait_synced(0);

 out:
	PSCFREE(iov.iov_base);
	return (mp->rc);
}
Exemple #5
0
void
pfl_odt_allocitem(struct pfl_odt *t, void **pp)
{
	struct pfl_odt_hdr *h;
	void **p = (void **)pp;

	h = t->odt_hdr;
	*p = PSCALLOC(h->odth_itemsz);
}
Exemple #6
0
void *
pfl_memdup(const void *p, size_t len)
{
    void *d;

    d = PSCALLOC(len);
    memcpy(d, p, len);
    return (d);
}
Exemple #7
0
struct wok_module *
mod_load(const char *path, const char *opts, char *errbuf,
    size_t errlen)
{
	int (*loadf)(struct pscfs *);
	struct wok_module *wm;
	void *h;
	int rc;

	h = dlopen(path, RTLD_NOW);
	if (h == NULL) {
		snprintf(errbuf, LINE_MAX, "%s\n", dlerror()); 
		fprintf(stderr, errbuf);
		return (NULL);
	}

	loadf = dlsym(h, "pscfs_module_load");
	if (loadf == NULL) {
		dlclose(h);
		snprintf(errbuf, LINE_MAX,
		    "symbol pscfs_module_load undefined.\n");
		fprintf(stderr, errbuf);
		return (NULL);
	}

	wm = PSCALLOC(sizeof(*wm));
	wm->wm_path = pfl_strdup(path);
	wm->wm_handle = h;
	wm->wm_opts = pfl_strdup(opts);
	wm->wm_module.pf_private = wm;
	pflfs_module_init(&wm->wm_module, opts);
	rc = loadf(&wm->wm_module);

	/*
	 * XXX XXX XXX
	 * This is a complete hack but this flush somehow avoids a bunch
	 * of zeroes from ending up in the log...
	 * XXX XXX XXX
	 */
	fflush(stderr);

	if (rc) {
		wm->wm_module.pf_handle_destroy = NULL;
		pflfs_module_destroy(&wm->wm_module);

		dlclose(h);
		PSCFREE(wm->wm_path);
		PSCFREE(wm);
		psclog_warnx("module failed to load: rc=%d module=%s",
		    rc, path);
		strlcpy(errbuf, strerror(rc), errlen);
		return (NULL);
	}
	return (wm);
}
Exemple #8
0
void
pfl_odt_load(struct pfl_odt **tp, struct pfl_odt_ops *odtops, int oflg,
    const char *fn, const char *fmt, ...)
{
	struct pfl_odt_hdr *h;
	struct pfl_odt *t;
	uint64_t crc;
	va_list ap;

	*tp = t = PSCALLOC(sizeof(*t));
	t->odt_ops = *odtops;
	INIT_SPINLOCK(&t->odt_lock);
	INIT_PSC_LISTENTRY(&t->odt_lentry);

	va_start(ap, fmt);
	vsnprintf(t->odt_name, sizeof(t->odt_name), fmt, ap);
	va_end(ap);

	t->odt_iostats.rd = pfl_opstat_init("odt-%s-rd", t->odt_name);
	t->odt_iostats.wr = pfl_opstat_init("odt-%s-wr", t->odt_name);

	h = t->odt_hdr = PSCALLOC(sizeof(*h));

	/* pfl_odt_open() and slm_odt_open() */
	odtops->odtop_open(t, fn, oflg);

	psc_crc64_calc(&crc, t->odt_hdr, sizeof(*t->odt_hdr) -
	    sizeof(t->odt_hdr->odth_crc));
	pfl_assert(h->odth_crc == crc);

	t->odt_bitmap = psc_vbitmap_newf(h->odth_nitems, PVBF_AUTO);
	pfl_assert(t->odt_bitmap);
	/*
 	 * Skip the first slot, so that we can detect whether we have
 	 * assigned a lease easily.
 	 */
	psc_vbitmap_set(t->odt_bitmap, 0);

	PFLOG_ODT(PLL_DIAG, t, "loaded");

	pll_add(&pfl_odtables, t);
}
Exemple #9
0
struct slab *
slab_alloc(void)
{
	struct slab *slb;

	slb = psc_pool_get(slab_pool);
	slb->slb_base = PSCALLOC(SLASH_SLVR_SIZE);
	INIT_LISTENTRY(&slb->slb_mgmt_lentry);

	return (slb);
}
Exemple #10
0
int
main(int argc, char *argv[])
{
	size_t sz;
	void *p;

	pfl_init();
	if (getopt(argc, argv, "") != -1)
		usage();
	argc -= optind;
	if (argc)
		usage();

	p = PSCALLOC(213);
	p = psc_realloc(p, 65536, 0);
	p = psc_realloc(p, 0, 0);
	p = psc_realloc(p, 128, 0);
	p = psc_realloc(p, 0, 0);
	PSCFREE(p);

	p = PSCALLOC(128);
	PSCFREE(p);

	p = psc_alloc(24, PAF_PAGEALIGN);
	psc_free(p, PAF_PAGEALIGN);

	p = PSCALLOC(24);
	p = psc_realloc(p, 128, 0);
	PSCFREE(p);

	p = psc_alloc(8, PAF_LOCK);
	*(uint64_t *)p = 0;
	psc_free(p, PAF_LOCK, (size_t)8);

	sz = 1024;
	p = psc_alloc(sz, PAF_LOCK | PAF_PAGEALIGN);
	memset(p, 0, sz);
	psc_free(p, PAF_LOCK | PAF_PAGEALIGN, sz);

	exit(0);
}
Exemple #11
0
int
pfl_odt_create(const char *fn, int64_t nitems, size_t itemsz,
    int overwrite, size_t startoff, size_t pad, int tflg)
{
	int rc;
	int64_t	item;
	struct pfl_odt_slotftr f;
	struct pfl_odt_hdr *h;
	struct pfl_odt *t;

	t = PSCALLOC(sizeof(*t));
	t->odt_ops = pfl_odtops;
	INIT_SPINLOCK(&t->odt_lock);
	snprintf(t->odt_name, sizeof(t->odt_name), "%s", pfl_basename(fn));

	t->odt_iostats.rd = pfl_opstat_init("odt-%s-rd", t->odt_name);
	t->odt_iostats.wr = pfl_opstat_init("odt-%s-wr", t->odt_name);

	h = PSCALLOC(sizeof(*h));
	memset(h, 0, sizeof(*h));
	h->odth_nitems = nitems;
	h->odth_itemsz = itemsz;
	h->odth_slotsz = itemsz + pad + sizeof(f);
	h->odth_options = tflg;
	h->odth_start = startoff;
	t->odt_hdr = h;
	psc_crc64_calc(&h->odth_crc, h, sizeof(*h) - sizeof(h->odth_crc));

	/* pfl_odt_new() and slm_odt_new() */
	rc = t->odt_ops.odtop_new(t, fn, overwrite);
	if (rc)
		return (rc);

	for (item = 0; item < nitems; item++)
		_pfl_odt_doput(t, item, NULL, &f, 0);

	PFLOG_ODT(PLL_DIAG, t, "created");

	pfl_odt_release(t);
	return (0);
}
Exemple #12
0
struct psc_streenode *
psc_stree_addchild(struct psc_streenode *ptn, void *data)
{
	struct psc_streenode *child;

	child = PSCALLOC(sizeof(*child));
	INIT_PSCLIST_HEAD(&child->ptn_children);
	INIT_PSC_LISTENTRY(&child->ptn_sibling);
	child->ptn_data = data;
	psclist_add(&child->ptn_sibling, &ptn->ptn_children);
	return (child);
}
Exemple #13
0
/**
 * pflnet_getifaddrs - Acquire list of network interface addresses.
 * @ifap: value-result base of addresses array, must be when finished.
 */
int
pflnet_getifaddrs(struct ifaddrs **ifap)
{
#ifdef HAVE_GETIFADDRS
	return (getifaddrs(ifap));
#else
	int nifs, rc, s, n;
	struct ifconf ifc;
	struct ifreq *ifr;

	s = socket(AF_INET, SOCK_DGRAM, 0);
	if (s == -1)
		psc_fatal("socket");

	ifc.ifc_buf = NULL;
	rc = ioctl(s, SIOCGIFCONF, &ifc);
	if (rc == -1)
		psc_fatal("ioctl SIOCGIFCONF");

	/*
	 * If an interface is being added while we are fetching,
	 * there is no way to determine that we didn't get them
	 * all with this API.
	 */
	ifc.ifc_buf = PSCALLOC(ifc.ifc_len);
	rc = ioctl(s, SIOCGIFCONF, &ifc);
	if (rc == -1)
		psc_fatal("ioctl SIOCGIFCONF");

	close(s);

	nifs = ifc.ifc_len / sizeof(*ifr);
	*ifap = PSCALLOC(sizeof(**ifap) * nifs);

	ifr = (void *)ifc.ifc_buf;
	for (n = 0; n < nifs; n+++, ifr++)
		memcpy(*ifap + n, &ifr->ifr_addr, sizeof(**ifap));
#endif
	return (0);
}
Exemple #14
0
void
pfl_odt_getslot(struct pfl_odt *t, int64_t n,
    void *pp, struct pfl_odt_slotftr **fp)
{
	struct pfl_odt_hdr *h;
	void **p = (void **)pp;

	h = t->odt_hdr;
	pfl_assert(n <= h->odth_nitems - 1);

	if (p)
		*p = PSCALLOC(h->odth_itemsz);
	if (fp)
		*fp = PSCALLOC(sizeof(**fp));
		
	/* pfl_odt_read or slm_odt_read */
	t->odt_ops.odtop_read(t, n, p ? *p : NULL, fp ? *fp : NULL);

	pfl_opstat_add(t->odt_iostats.rd, h->odth_slotsz);

	ODT_STAT_INCR(t, read);
}
Exemple #15
0
struct psc_streenode *
psc_stree_addchild_sorted(struct psc_streenode *ptn, void *data,
    int (*cmpf)(const void *, const void *), off_t offset)
{
	struct psc_streenode *child;

	child = PSCALLOC(sizeof(*child));
	INIT_PSCLIST_HEAD(&child->ptn_children);
	INIT_PSC_LISTENTRY(&child->ptn_sibling);
	child->ptn_data = data;
	psclist_add_sorted(&ptn->ptn_children, &child->ptn_sibling,
	    cmpf, offset);
	return (child);
}
Exemple #16
0
void
push_filter(struct psc_dynarray *da, char *s, int type)
{
	struct {
		const char	*name;
		const char	*abbr;
		int		 type;
	} *ty, types[] = {
		{ "clear",	"!", FPT_CLEAR },
		{ "dir-merge",	":", FPT_DIRMERGE },
		{ "exclude",	"-", FPT_EXCL },
		{ "hide",	"H", FPT_HIDE },
		{ "include",	"+", FPT_INCL },
		{ "merge",	".", FPT_MERGE },
		{ "protect",	"P", FPT_PROTECT },
		{ "risk",	"R", FPT_RISK },
		{ "show",	"S", FPT_SHOW }
	};
	struct filterpattern *fp;
	char *sty, *sep;
	int n;

	fp = PSCALLOC(sizeof(*fp));
	if (type) {
		fp->fp_type = type;
		fp->fp_pat = s;
	} else {
		for (sty = s; *s && !isspace(*s); s++)
			;
		while (isspace(*s))
			s++;
		if (*s == '\0')
			psync_fatal("invalid format");
		sep = strchr(sty, ',');
		if (sep)
			*sep = '\0';

		for (n = 0, ty = types; n < nitems(types); ty++, n++)
			if (strcmp(ty->name, sty) == 0 ||
			    strcmp(ty->abbr, sty) == 0)
				break;
		if (n == nitems(types))
			psync_fatal("invalid format");
		fp->fp_type = ty->type;
		fp->fp_pat = s;
	}
	push(da, fp);
}
Exemple #17
0
void
pfl_opstats_grad_init(struct pfl_opstats_grad *og, int flags,
    int64_t *buckets, int nbuckets, const char *fmt, ...)
{
	const char *lower_suffix = "", *upper_suffix = "";
	int64_t lower_bound, upper_bound;
	struct pfl_opstat_bucket *ob;
	char label[16];
	int rc, i;

	og->og_buckets = PSCALLOC(nbuckets * sizeof(og->og_buckets[0]));
	og->og_nbuckets = nbuckets;

	for (i = 0, ob = og->og_buckets; i < nbuckets; i++, ob++) {
		if (i)
			pfl_assert(buckets[i - 1] < buckets[i]);
		else
			pfl_assert(buckets[i] == 0);

		lower_bound = buckets[i];
		if (!(flags & OPSTF_BASE10))
			lower_suffix = _pfl_opstats_base2_suffix(
			    &lower_bound);

		if (i == nbuckets - 1) {
			rc = snprintf(label, sizeof(label),
			    "%d:>=%"PRId64"%.1s", i, lower_bound,
			    lower_suffix);
		} else {
			upper_bound = buckets[i + 1];
			if (!(flags & OPSTF_BASE10))
				upper_suffix =
				    _pfl_opstats_base2_suffix(
					&upper_bound);

			rc = snprintf(label, sizeof(label),
			    "%d:%"PRId64"%.1s-<%"PRId64"%.1s", i,
			    lower_bound, lower_suffix,
			    upper_bound, upper_suffix);
		}
		if (rc == -1)
			psc_fatal("snprintf");
		ob->ob_lower_bound = buckets[i];
		ob->ob_opst = pfl_opstat_initf(flags | OPSTF_BASE10,
		    fmt, label);
	}
}
Exemple #18
0
__static int
pflnet_rtexists_sysctl(const struct sockaddr *sa)
{
	union {
		struct rt_msghdr *rtm;
		char *ch;
		void *p;
	} u;
	union pfl_sockaddr_ptr s, os;
	int rc = 0, mib[6];
	char *buf = NULL;
	size_t len;

	os.cp = sa;

	mib[0] = CTL_NET;
	mib[1] = PF_ROUTE;
	mib[2] = 0;		/* protocol */
	mib[3] = AF_INET;
	mib[4] = NET_RT_DUMP;
	mib[5] = 0;		/* no flags */
	if (sysctl(mib, nitems(mib), NULL, &len, NULL, 0) == -1)
		psc_fatal("route-sysctl-estimate");
	if (len) {
		buf = PSCALLOC(len);
		if (sysctl(mib, nitems(mib), buf, &len, NULL, 0) == -1)
			psc_fatal("actual retrieval of routing table");
	}

	for (u.p = buf; u.ch && u.ch < buf + len;
	    u.ch += u.rtm->rtm_msglen) {
		if (u.rtm->rtm_version != RTM_VERSION)
			continue;
		s.p = u.rtm + 1;

		if (s.s->sin.sin_addr.s_addr ==
		    os.s->sin.sin_addr.s_addr) {
			rc = 1;
			break;
		}
	}

	PSCFREE(buf);

	return (rc);
}
Exemple #19
0
int
pfl_vasprintf(char **p, const char *fmt, va_list ap)
{
	va_list apd;
	int sz;

	va_copy(apd, ap);
	sz = vsnprintf(NULL, 0, fmt, ap);
	psc_assert(sz != -1);

	sz++;
	*p = PSCALLOC(sz);

	vsnprintf(*p, sz, fmt, apd);
	va_end(apd);

	return (sz);
}
Exemple #20
0
void
pfl_register_errno(int code, const char *str)
{
	struct pfl_errno *e;
	uint64_t q;

	q = code;
	e = psc_hashtbl_search(&pfl_errno_hashtbl, &q);
	if (e) {
		pfl_assert(e->code == q);
		pfl_assert(strcmp(e->str, str) == 0);
		return;
	}

	e = PSCALLOC(sizeof(*e));
	e->code = q;
	e->str = pfl_strdup(str);
	psc_hashent_init(&pfl_errno_hashtbl, e);
	psc_hashtbl_add_item(&pfl_errno_hashtbl, e);
}
Exemple #21
0
int
mds_inode_update(int vfsid, struct slash_inode_handle *ih,
    int old_version)
{
	char fn[NAME_MAX + 1];
	struct sl_ino_compat *sic;
	struct fidc_membh *f;
	struct srt_stat sstb;
	void *h = NULL, *th;
	int rc;

	sic = &sl_ino_compat_table[old_version];
	rc = sic->sic_read_ino(ih);
	if (rc)
		return (rc);
	DEBUG_INOH(PLL_INFO, ih, "updating old inode (v %d)",
	    old_version);

	f = inoh_2_fcmh(ih);
	snprintf(fn, sizeof(fn), "%016"PRIx64".update", fcmh_2_fid(f));
	rc = mdsio_opencreatef(vfsid, mds_tmpdir_inum[vfsid],
	    &rootcreds, O_RDWR | O_CREAT | O_TRUNC,
	    MDSIO_OPENCRF_NOLINK, 0644, fn, NULL, NULL, &h, NULL, NULL,
	    0);
	if (rc)
		PFL_GOTOERR(out, rc);

	psc_assert(ih->inoh_extras == NULL);
	ih->inoh_extras = PSCALLOC(INOX_SZ);

	/* convert old structures into new into temp file */
	rc = sic->sic_read_inox(ih);
	if (rc)
		PFL_GOTOERR(out, rc);

	th = inoh_2_mfhp(ih)->fh;
	inoh_2_mfhp(ih)->fh = h;
	rc = mds_inode_dump(vfsid, sic, ih, th);
	inoh_2_mfhp(ih)->fh = th;
	if (rc)
		PFL_GOTOERR(out, rc);

	/* move new structures to inode meta file */
	memset(&sstb, 0, sizeof(sstb));
	rc = mdsio_setattr(vfsid, 0, &sstb, SL_SETATTRF_METASIZE,
	    &rootcreds, NULL, th, NULL);
	if (rc)
		PFL_GOTOERR(out, rc);

//	mdsio_rename(mds_tmpdir_inum, NULL, fn, &rootcreds, NULL);
	rc = mds_inode_dump(vfsid, NULL, ih, h);
	if (rc)
		PFL_GOTOERR(out, rc);

	mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn,
	    &rootcreds, NULL, NULL);

 out:
	if (h)
		mdsio_release(vfsid, &rootcreds, h);
	if (rc) {
		mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn,
		    &rootcreds, NULL, NULL);
		DEBUG_INOH(PLL_ERROR, ih, "error updating old inode "
		    "rc=%d", rc);
	}
	return (rc);
}
Exemple #22
0
/**
 * pjournal_format - Initialize an on-disk journal.
 * @fn: file path to store journal.
 * @nents: number of entries journal may contain.
 * @entsz: size of a journal entry.
 * Returns 0 on success, errno on error.
 */
void
pjournal_format(const char *fn, uint32_t nents, uint32_t entsz,
    uint32_t rs, uint64_t uuid)
{
	struct psc_journal_enthdr *pje;
	struct psc_journal pj;
	struct stat stb;
	unsigned char *jbuf;
	uint32_t i, j, slot;
	int rc, fd;
	ssize_t nb;

	if (nents % rs)
		psc_fatalx("number of slots (%u) should be a multiple of "
		    "readsize (%u)", nents, rs);

	memset(&pj, 0, sizeof(struct psc_journal));

	rc = 0;
	fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
	if (fd == -1)
		psc_fatal("%s", fn);

	if (fstat(fd, &stb) == -1)
		psc_fatal("stat %s", fn);

	pj.pj_fd = fd;
	pj.pj_hdr = PSCALLOC(PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize));

	pj.pj_hdr->pjh_entsz = entsz;
	pj.pj_hdr->pjh_nents = nents;
	pj.pj_hdr->pjh_version = PJH_VERSION;
	pj.pj_hdr->pjh_readsize = rs;
	pj.pj_hdr->pjh_iolen = PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize);
	pj.pj_hdr->pjh_magic = PJH_MAGIC;
	pj.pj_hdr->pjh_timestamp = time(NULL);
	pj.pj_hdr->pjh_fsuuid = uuid;

	psc_crc64_init(&pj.pj_hdr->pjh_chksum);
	psc_crc64_add(&pj.pj_hdr->pjh_chksum, pj.pj_hdr,
	    offsetof(struct psc_journal_hdr, pjh_chksum));
	psc_crc64_fini(&pj.pj_hdr->pjh_chksum);

	nb = pwrite(pj.pj_fd, pj.pj_hdr, pj.pj_hdr->pjh_iolen, 0);
	if ((size_t)nb != pj.pj_hdr->pjh_iolen)
		psc_fatalx("failed to write journal header: %s",
		    nb == -1 ? strerror(errno) : "short write");

	nb = PJ_PJESZ(&pj) * pj.pj_hdr->pjh_readsize;
	jbuf = psc_alloc(nb, PAF_PAGEALIGN);
	for (i = 0; i < rs; i++) {
		pje = PSC_AGP(jbuf, PJ_PJESZ(&pj) * i);
		pje->pje_magic = PJE_MAGIC;
		pje->pje_type = PJE_FORMAT;
		pje->pje_xid = PJE_XID_NONE;
		pje->pje_len = 0;

		psc_crc64_init(&pje->pje_chksum);
		psc_crc64_add(&pje->pje_chksum, pje,
		    offsetof(struct psc_journal_enthdr, pje_chksum));
		psc_crc64_add(&pje->pje_chksum, pje->pje_data,
		    pje->pje_len);
		psc_crc64_fini(&pje->pje_chksum);
	}

	j = 0;
	/* XXX use an option to write only one entry in fast create mode */
	for (slot = 0; slot < pj.pj_hdr->pjh_nents; slot += rs) {
		nb = pwrite(pj.pj_fd, jbuf, PJ_PJESZ(&pj) * rs,
		    PJ_GETENTOFF(&pj, slot));
		if ((size_t)nb != PJ_PJESZ(&pj) * rs)
			psc_fatal("failed to write slot %u (%zd)",
			    slot, nb);
		if (verbose && slot % 262144 == 0) {
			printf(".");
			fflush(stdout);
			fsync(pj.pj_fd);
			if (++j == 80) {
				printf("\n");
				j = 0;
			}
		}
	}
	if (verbose && j)
		printf("\n");
	if (close(fd) == -1)
		psc_fatal("failed to close journal");
	psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(&pj) * rs);
	psclog_info("journal %s formatted: %d slots, %d readsize, error=%d",
	    fn, nents, rs, rc);
}
Exemple #23
0
/**
 * pjournal_dump - Dump the contents of a journal file.
 * @fn: journal filename to query.
 * @verbose: whether to report stats summary or full dump.
 *
 * Each time mds restarts, it writes log entries starting from the very
 * first slot of the log.  Anyway, the function dumps all log entries,
 * some of them may be from previous incarnations of the MDS.
 */
void
pjournal_dump(const char *fn)
{
	int i, ntotal, nmagic, nchksum, nformat, ndump, first = 1;
	uint32_t slot, highest_slot = -1, lowest_slot = -1;
	uint64_t chksum, highest_xid = 0, lowest_xid = 0;
	struct psc_journal_enthdr *pje;
	struct psc_journal_hdr *pjh;
	struct psc_journal *pj;
	struct stat statbuf;
	unsigned char *jbuf;
	ssize_t nb, pjhlen;
	time_t ts;

	ntotal = nmagic = nchksum = nformat = ndump = 0;

	pj = PSCALLOC(sizeof(*pj));

	strlcpy(pj->pj_name, pfl_basename(fn), sizeof(pj->pj_name));

	pj->pj_fd = open(fn, O_RDWR | O_DIRECT);
	if (pj->pj_fd == -1)
		psc_fatal("failed to open journal %s", fn);
	if (fstat(pj->pj_fd, &statbuf) == -1)
		psc_fatal("failed to stat journal %s", fn);

	/*
	 * O_DIRECT may impose alignment restrictions so align the
	 * buffer and perform I/O in multiples of file system block
	 * size.
	 */
	pjhlen = PSC_ALIGN(sizeof(*pjh), statbuf.st_blksize);
	pjh = psc_alloc(pjhlen, PAF_PAGEALIGN);
	nb = pread(pj->pj_fd, pjh, pjhlen, 0);
	if (nb != pjhlen)
		psc_fatal("failed to read journal header");

	pj->pj_hdr = pjh;
	if (pjh->pjh_magic != PJH_MAGIC)
		psc_fatalx("journal header has a bad magic number "
		    "%#"PRIx64, pjh->pjh_magic);

	if (pjh->pjh_version != PJH_VERSION)
		psc_fatalx("journal header has an invalid version "
		    "number %d", pjh->pjh_version);

	psc_crc64_init(&chksum);
	psc_crc64_add(&chksum, pjh, offsetof(struct psc_journal_hdr,
	    pjh_chksum));
	psc_crc64_fini(&chksum);

	if (pjh->pjh_chksum != chksum)
		psc_fatalx("journal header has an invalid checksum "
		    "value %"PSCPRIxCRC64" vs %"PSCPRIxCRC64,
		    pjh->pjh_chksum, chksum);

	if (S_ISREG(statbuf.st_mode) && statbuf.st_size !=
	    (off_t)(pjhlen + pjh->pjh_nents * PJ_PJESZ(pj)))
		psc_fatalx("size of the journal log %"PSCPRIdOFFT"d does "
		    "not match specs in its header", statbuf.st_size);

	if (pjh->pjh_nents % pjh->pjh_readsize)
		psc_fatalx("number of entries %d is not a multiple of the "
		    "readsize %d", pjh->pjh_nents, pjh->pjh_readsize);

	ts = pjh->pjh_timestamp;

	printf("%s:\n"
	    "  version: %u\n"
	    "  entry size: %u\n"
	    "  number of entries: %u\n"
	    "  batch read size: %u\n"
	    "  entry start offset: %"PRId64"\n"
	    "  format time: %s"
	    "  uuid: %"PRIx64"\n"
	    "  %4s  %3s %4s %4s %s\n",
	    fn, pjh->pjh_version, PJ_PJESZ(pj), pjh->pjh_nents,
	    pjh->pjh_readsize, pjh->pjh_start_off,
	    ctime(&ts), pjh->pjh_fsuuid,
	    "idx", "typ", "xid", "txg", "details");

	jbuf = psc_alloc(PJ_PJESZ(pj) * pj->pj_hdr->pjh_readsize,
	    PAF_PAGEALIGN);
	for (slot = 0; slot < pjh->pjh_nents;
	    slot += pjh->pjh_readsize) {
		nb = pread(pj->pj_fd, jbuf, PJ_PJESZ(pj) *
		    pjh->pjh_readsize, PJ_GETENTOFF(pj, slot));
		if (nb != PJ_PJESZ(pj) * pjh->pjh_readsize)
			warn("failed to read %d log entries at slot %d",
			    pjh->pjh_readsize, slot);

		for (i = 0; i < pjh->pjh_readsize; i++) {
			ntotal++;
			pje = (void *)&jbuf[PJ_PJESZ(pj) * i];
			if (pje->pje_magic != PJE_MAGIC) {
				nmagic++;
				warnx("journal slot %d has a bad magic"
				    "number", slot + i);
				continue;
			}

			/*
			 * If we hit a new entry that is never used, we
			 * assume that the rest of the journal is never
			 * used.
			 */
			if (pje->pje_type == PJE_FORMAT) {
				nformat = nformat + pjh->pjh_nents -
				    (slot + i);
				goto done;
			}

			psc_crc64_init(&chksum);
			psc_crc64_add(&chksum, pje, offsetof(
			    struct psc_journal_enthdr, pje_chksum));
			psc_crc64_add(&chksum, pje->pje_data,
			    pje->pje_len);
			psc_crc64_fini(&chksum);

			if (pje->pje_chksum != chksum) {
				nchksum++;
				warnx("journal slot %d has a corrupt "
				    "checksum", slot + i);
				goto done;
			}
			ndump++;
			if (verbose)
				pjournal_dump_entry(slot + i, pje);
			if (first) {
				first = 0;
				highest_xid = lowest_xid = pje->pje_xid;
				highest_slot = lowest_slot = slot + i;
				continue;
			}
			if (highest_xid < pje->pje_xid) {
				highest_xid = pje->pje_xid;
				highest_slot = slot + i;
			}
			if (lowest_xid > pje->pje_xid) {
				lowest_xid = pje->pje_xid;
				lowest_slot = slot + i;
			}
		}

	}

 done:
	if (close(pj->pj_fd) == -1)
		printf("failed closing journal %s", fn);

	psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(pj));
	PSCFREE(pj);

	printf("----------------------------------------------\n"
	    "%8d slot(s) scanned\n"
	    "%8d in use\n"
	    "%8d formatted\n"
	    "%8d bad magic\n"
	    "%8d bad checksum(s)\n"
	    "lowest transaction ID=%#"PRIx64" (slot=%d)\n"
	    "highest transaction ID=%#"PRIx64" (slot=%d)\n",
	    ntotal, ndump, nformat, nmagic, nchksum,
	    lowest_xid, lowest_slot,
	    highest_xid, highest_slot);
}
int
main(int argc, char *argv[])
{
    struct thr *thr;
    pthread_t pthr;
    int c, rc, i;

    pfl_init();
    progname = argv[0];
    while ((c = getopt(argc, argv, "i:n:")) != -1)
        switch (c) {
        case 'i':
            niter = atoi(optarg);
            break;
        case 'n':
            nthr = atoi(optarg);
            break;
        default:
            usage();
        }
    argc -= optind;
    if (argc)
        usage();

    psc_assert(psc_atomic64_read(&v64) == UINT64_C(100000000000));
    TEST(psc_atomic64, set, &v64, &v64, UINT64_C(2000000000000), UINT64_C(2000000000000));
    TEST(psc_atomic64, add, &v64, &v64, 15, UINT64_C(2000000000015));
    TEST(psc_atomic64, sub, &v64, &v64, 9, UINT64_C(2000000000006));
    TEST1(psc_atomic64, inc, &v64, UINT64_C(2000000000007));
    TEST1(psc_atomic64, dec, &v64, UINT64_C(2000000000006));

    psc_atomic16_set(&v16, 2);
    TEST(psc_atomic16, set, &v16, &v16, 200, 200);
    TEST(psc_atomic16, add, &v16, &v16, 15, 215);
    TEST(psc_atomic16, sub, &v16, &v16, 9, 206);
    TEST1(psc_atomic16, inc, &v16, 207);
    TEST1(psc_atomic16, dec, &v16, 206);
    TEST1V(psc_atomic16, dec_and_test0, &v16, 205, 0);
    TEST(psc_atomic16, set, &v16, &v16, 1, 1);
    TEST1V(psc_atomic16, dec_and_test0, &v16, 0, 1);
    TEST(psc_atomic16, setmask, &v16, &v16, 0x75, 0x75);
    TEST(psc_atomic16, clearmask, &v16, &v16, 0x41, 0x34);
    TEST(psc_atomic16, set, &v16, &v16, 0, 0);

    psc_atomic32_set(&v32, 2);
    TEST(psc_atomic32, set, &v32, &v32, 200, 200);
    TEST(psc_atomic32, add, &v32, &v32, 15, 215);
    TEST(psc_atomic32, sub, &v32, &v32, 9, 206);
    TEST1(psc_atomic32, inc, &v32, 207);
    TEST1(psc_atomic32, dec, &v32, 206);
    TEST1V(psc_atomic32, dec_and_test0, &v32, 205, 0);
    TEST(psc_atomic32, set, &v32, &v32, 1, 1);
    TEST1V(psc_atomic32, dec_and_test0, &v32, 0, 1);
    TEST(psc_atomic32, setmask, &v32, &v32, 0x75, 0x75);
    TEST(psc_atomic32, clearmask, &v32, &v32, 0x41, 0x34);
    TEST(psc_atomic32, set, &v32, &v32, 0, 0);

    psc_atomic64_set(&v64, 2);
    TEST(psc_atomic64, set, &v64, &v64, 200, 200);
    TEST(psc_atomic64, add, &v64, &v64, 15, 215);
    TEST(psc_atomic64, sub, &v64, &v64, 9, 206);
    TEST1(psc_atomic64, inc, &v64, 207);
    TEST1(psc_atomic64, dec, &v64, 206);
    TEST1V(psc_atomic64, dec_and_test0, &v64, 205, 0);
    TEST(psc_atomic64, set, &v64, &v64, 1, 1);
    TEST1V(psc_atomic64, dec_and_test0, &v64, 0, 1);
    TEST(psc_atomic64, setmask, &v64, &v64, 0x75, 0x75);
    TEST(psc_atomic64, clearmask, &v64, &v64, 0x41, 0x34);
    TEST(psc_atomic64, set, &v64, &v64, 0, 0);

    TEST1(psc_atomic16, inc, &v16, 1);
    TEST1V(psc_atomic16, dec_and_test0, &v16, 0, 1);

    rc = pthread_barrier_init(&barrier, NULL, nthr + 1);
    if (rc)
        psc_fatalx("pthread_barrier_init: %s", strerror(rc));
    for (i = 0; i < nthr; i++) {
        thr = PSCALLOC(sizeof(*thr));
        thr->pos = i;
        rc = pthread_create(&pthr, NULL, startf, thr);
        if (rc)
            psc_fatalx("pthread_create: %s", strerror(rc));
    }
    pthread_barrier_wait(&barrier);
    pthread_barrier_wait(&barrier);
    exit(0);
}
Exemple #25
0
/*
 * Traverse a file hierarchy and perform an operation on each file
 * system entry.
 * @fn: file root.
 * @flags: behavorial flags.
 * @cmpf: optional dirent comparator for ordering.
 * @cbf: callback to invoke on each file.
 * @arg: optional argument to supply to callback.
 * Notes: the callback will be invoked with a fully resolved absolute
 *	path name unless the file in question is a symbolic link.
 */
int
pfl_filewalk(const char *fn, int flags, void *cmpf, int (*cbf)(FTSENT *,
    void *), void *arg)
{
	char * const pathv[] = { (char *)fn, NULL };
	int rc = 0, f_flags = 0;
	struct stat stb;
	FTSENT *f;
	FTS *fp;

	if (flags & PFL_FILEWALKF_RECURSIVE) {
		if (flags & PFL_FILEWALKF_NOSTAT)
			f_flags |= FTS_NOSTAT;
		if (flags & PFL_FILEWALKF_NOCHDIR)
			f_flags |= FTS_NOCHDIR;
		fp = pfl_fts_open(pathv, f_flags | FTS_COMFOLLOW |
		    FTS_PHYSICAL, cmpf);
		if (fp == NULL)
			psc_fatal("fts_open %s", fn);
		while ((f = pfl_fts_read(fp)) != NULL) {
			switch (f->fts_info) {
			case FTS_NS:
				psclog_warnx("%s: %s", f->fts_path,
				    strerror(f->fts_errno));
				break;
			case FTS_F:
			case FTS_D:
			case FTS_SL:
				if (flags & PFL_FILEWALKF_VERBOSE)
					warnx("processing %s%s",
					    fn, f->fts_info == FTS_D ?
					    "/" : "");
			case FTS_DP:
				rc = cbf(f, arg);
				if (rc) {
					pfl_fts_close(fp);
					return (rc);
				}
				break;
			default:
				if (f->fts_errno == 0)
					f->fts_errno = EOPNOTSUPP;
				psclog_warnx("%s: %s", f->fts_path,
				    strerror(f->fts_errno));
				break;
			}
		}
		pfl_fts_close(fp);
	} else {
		const char *basefn;
		size_t baselen;

		if (lstat(fn, &stb) == -1)
			err(1, "%s", fn);
		basefn = pfl_basename(fn);
		baselen = strlen(basefn);

		f = PSCALLOC(sizeof(*f) + baselen);
		f->fts_accpath = (char *)fn;
		f->fts_path = (char *)fn;
		f->fts_pathlen = strlen(fn);
		strlcpy(f->fts_name, basefn, baselen + 1);
		f->fts_namelen = baselen;
		f->fts_ino = stb.st_ino;
		f->fts_statp = &stb;
		switch (stb.st_mode & S_IFMT) {
		case S_IFDIR: f->fts_info = FTS_D; break;
		case S_IFREG: f->fts_info = FTS_F; break;
		case S_IFLNK: f->fts_info = FTS_SL; break;
		case S_IFBLK: f->fts_info = FTS_DEFAULT; break;
		default:
			psclog_warnx("%s: %s", fn,
			    strerror(EOPNOTSUPP));
			break;
		}
		rc = cbf(f, arg);
		PSCFREE(f);
	}
	return (rc);
}
Exemple #26
0
int
mds_inode_update_interrupted(int vfsid, struct slash_inode_handle *ih,
    int *rc)
{
	char fn[NAME_MAX + 1];
	struct srt_stat sstb;
	struct iovec iovs[2];
	uint64_t crc, od_crc;
	void *h = NULL, *th;
	mdsio_fid_t inum;
	int exists = 0;
	size_t nb;

	th = inoh_2_mfh(ih);

	snprintf(fn, sizeof(fn), "%016"PRIx64".update",
	    inoh_2_fid(ih));

	*rc = mdsio_lookup(vfsid, mds_tmpdir_inum[vfsid], fn, &inum,
	    &rootcreds, NULL);
	if (*rc)
		PFL_GOTOERR(out, *rc);

	*rc = mdsio_opencreatef(vfsid, inum, &rootcreds, O_RDONLY,
	    MDSIO_OPENCRF_NOLINK, 0644, NULL, NULL, NULL, &h, NULL,
	    NULL, 0);
	if (*rc)
		PFL_GOTOERR(out, *rc);

	iovs[0].iov_base = &ih->inoh_ino;
	iovs[0].iov_len = sizeof(ih->inoh_ino);
	iovs[1].iov_base = &od_crc;
	iovs[1].iov_len = sizeof(od_crc);
	*rc = mdsio_preadv(vfsid, &rootcreds, iovs, nitems(iovs), &nb, 0, h);
	if (*rc)
		PFL_GOTOERR(out, *rc);

	psc_crc64_calc(&crc, &ih->inoh_ino, sizeof(ih->inoh_ino));
	if (crc != od_crc) {
		*rc = PFLERR_BADCRC;
		PFL_GOTOERR(out, *rc);
	}

	exists = 1;

	psc_assert(ih->inoh_extras == NULL);
	ih->inoh_extras = PSCALLOC(INOX_SZ);

	inoh_2_mfh(ih) = h;
	*rc = mds_inox_ensure_loaded(ih);
	if (*rc)
		PFL_GOTOERR(out, *rc);

	inoh_2_mfh(ih) = th;

	memset(&sstb, 0, sizeof(sstb));
	*rc = mdsio_setattr(vfsid, 0, &sstb, SL_SETATTRF_METASIZE,
	    &rootcreds, NULL, th, NULL);
	if (*rc)
		PFL_GOTOERR(out, *rc);

	*rc = mds_inode_dump(vfsid, NULL, ih, h);
	if (*rc)
		PFL_GOTOERR(out, *rc);

	mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn,
	    &rootcreds, NULL, NULL);

 out:
	if (h)
		mdsio_release(vfsid, &rootcreds, h);
	if (*rc)
		mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn,
		    &rootcreds, NULL, NULL);
	inoh_2_mfh(ih) = th;
	return (exists);
}
void
visit(__unusedx void *data, struct pfl_odt_receipt *r,
    void *arg)
{
	char buf[LINE_MAX], *p = data;
	struct pfl_odt **t = arg;
	static int shown_hdr;
	union {
		int	*d;
		int64_t	*q;
		void	*p;
	} u;
	size_t i;

	if (num_free) {
		struct pfl_odt_receipt *rdup;

		rdup = PSCALLOC(sizeof(*rdup));
		memcpy(rdup, r, sizeof(*r));
		psc_dynarray_add(&rcpts, rdup);
		num_free--;
	}

	if (!show)
		return;

	if (!shown_hdr) {
		struct pfl_odt_hdr *h;

		h = (*t)->odt_hdr;
		printf("nelems\t%u\n", h->odth_nelems);
		printf("elemsz\t%u\n", h->odth_objsz);
		printf("%7s %16s data\n", "slot", "crc");
		shown_hdr = 1;
	}

	printf("%7zd %16"PRIx64" ", r->odtr_elem, r->odtr_crc);

	if (fmt) {
		(void)FMTSTR(buf, sizeof(buf), fmt,
		    FMTSTRCASE('d', "d",	(u.p = p, p += sizeof(int),	*u.d))
		    FMTSTRCASE('u', "u",	(u.p = p, p += sizeof(int),	*u.d))
		    FMTSTRCASE('x', "x",	(u.p = p, p += sizeof(int),	*u.d))
		    FMTSTRCASE('q', PRId64,	(u.p = p, p += sizeof(int64_t),	*u.q))
		    FMTSTRCASE('Q', PRIu64,	(u.p = p, p += sizeof(int64_t),	*u.q))
		    FMTSTRCASE('X', PRIx64,	(u.p = p, p += sizeof(int64_t),	*u.q))
		);
		printf("%s\n", buf);
		return;
	}

	/*
	 * If the first 10 characters aren't ASCII, don't display as
	 * such.
	 */
	for (i = 0, p = data; i < 10 && p; i++, p++)
		if (!isspace(*p) && !isgraph(*p))
			goto skip;
	if (i != 10)
		goto skip;
	printf("%s\n", (char *)data);
	return;

 skip:
	for (i = 0, p = data; i < elem_size; p++, i++)
		printf("%02x", *p);
	printf("\n");
}
Exemple #28
0
/*
 * Initialize an on-disk journal.
 * @fn: file path to store journal.
 * @nents: number of entries journal may contain if non-zero.
 * @entsz: size of a journal entry.
 * @rs: read size.
 * Returns the number of entries created.
 */
uint32_t
sl_journal_format(const char *fn, uint32_t nents, uint32_t entsz,
    uint32_t rs, uint64_t uuid, int block_dev)
{
	uint32_t i, slot, max_nents;
	struct psc_journal_enthdr *pje;
	struct psc_journal pj;
	struct stat stb;
	unsigned char *jbuf;
	size_t numblocks;
	ssize_t nb;
	int fd;

	memset(&pj, 0, sizeof(pj));

	fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
	if (fd == -1)
		psc_fatal("%s", fn);

	if (fstat(fd, &stb) == -1)
		psc_fatal("stat %s", fn);

	/*
	 * If the user does not specify nents, either use default or
	 * based on the block device size.
	 */
	if (nents == 0 && !block_dev)
		nents = SLJ_MDS_JNENTS;

	if (block_dev) {
		if (ioctl(fd, BLKGETSIZE, &numblocks) == -1)
			err(1, "BLKGETSIZE: %s", fn);

		/* show progress, it is going to be a while */
		verbose = 1;

		/* deal with large disks */
		max_nents = MIN(numblocks, SLJ_MDS_MAX_JNENTS);

		/* leave room on both ends */
		max_nents -= stb.st_blksize / SLJ_MDS_ENTSIZE + 16;

		/* efficiency */
		max_nents = (max_nents / rs) * rs;
		if (nents)
			nents = MIN(nents, max_nents);
		else
			nents = max_nents;
	}

	if (nents % rs)
		psc_fatalx("number of slots (%u) should be a multiple of "
		    "readsize (%u)", nents, rs);

	pj.pj_fd = fd;
	pj.pj_hdr = PSCALLOC(PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize));

	pj.pj_hdr->pjh_entsz = entsz;
	pj.pj_hdr->pjh_nents = nents;
	pj.pj_hdr->pjh_version = PJH_VERSION;
	pj.pj_hdr->pjh_readsize = rs;
	pj.pj_hdr->pjh_iolen = PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize);
	pj.pj_hdr->pjh_magic = PJH_MAGIC;
	pj.pj_hdr->pjh_timestamp = time(NULL);
	pj.pj_hdr->pjh_fsuuid = uuid;

	psc_crc64_init(&pj.pj_hdr->pjh_chksum);
	psc_crc64_add(&pj.pj_hdr->pjh_chksum, pj.pj_hdr,
	    offsetof(struct psc_journal_hdr, pjh_chksum));
	psc_crc64_fini(&pj.pj_hdr->pjh_chksum);

	nb = pwrite(pj.pj_fd, pj.pj_hdr, pj.pj_hdr->pjh_iolen, 0);
	if ((size_t)nb != pj.pj_hdr->pjh_iolen)
		psc_fatalx("failed to write journal header: %s",
		    nb == -1 ? strerror(errno) : "short write");

	nb = PJ_PJESZ(&pj) * pj.pj_hdr->pjh_readsize;
	jbuf = psc_alloc(nb, PAF_PAGEALIGN);
	for (i = 0; i < rs; i++) {
		pje = PSC_AGP(jbuf, PJ_PJESZ(&pj) * i);
		pje->pje_magic = PJE_MAGIC;
		pje->pje_type = PJE_FORMAT;
		pje->pje_xid = PJE_XID_NONE;
		pje->pje_len = 0;

		psc_crc64_init(&pje->pje_chksum);
		psc_crc64_add(&pje->pje_chksum, pje,
		    offsetof(struct psc_journal_enthdr, pje_chksum));
		psc_crc64_add(&pje->pje_chksum, pje->pje_data,
		    pje->pje_len);
		psc_crc64_fini(&pje->pje_chksum);
	}

	i = 0;
	/* XXX use an option to write only one entry in fast create mode */
	for (slot = 0; slot < pj.pj_hdr->pjh_nents; slot += rs) {
		nb = pwrite(pj.pj_fd, jbuf, PJ_PJESZ(&pj) * rs,
		    PJ_GETENTOFF(&pj, slot));
		if ((size_t)nb != PJ_PJESZ(&pj) * rs)
			psc_fatal("failed to write slot %u (%zd)",
			    slot, nb);
		if (verbose && slot % 262144 == 0) {
			printf(".");
			fflush(stdout);
			fsync(pj.pj_fd);
			if (++i == 80) {
				printf("\n");
				i = 0;
			}
		}
	}
	if (verbose && i)
		printf("\n");
	if (close(fd) == -1)
		psc_fatal("failed to close journal");
	psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(&pj) * rs);

	return (nents);
}