Ejemplo n.º 1
0
void
pfl_heap_remove(struct pfl_heap *ph, void *p)
{
	struct pfl_heap_entry *phe, *che;
	void *c, *minc;
	int idx, i;

	psc_assert(ph->ph_nitems > 0);

	psc_assert(p);
	phe = PSC_AGP(p, ph->ph_entoff);
	p = ph->ph_base[idx = phe->phe_idx] =
	    ph->ph_base[--ph->ph_nitems];
	phe = PSC_AGP(p, ph->ph_entoff);
	phe->phe_idx = idx;
	/* bubble down */
	for (;;) {
		for (minc = p, idx = phe->phe_idx * 2 + 1, i = 0;
		    i < 2 && idx < ph->ph_nitems; idx++, i++) {
			c = ph->ph_base[idx];
			if (ph->ph_cmpf(c, minc) == -1)
				minc = c;
		}
		if (minc == p)
			break;
		che = PSC_AGP(minc, ph->ph_entoff);
		_pfl_heap_swap(ph, phe, che);
	}
}
Ejemplo n.º 2
0
void
dump_reclaim_log(void *buf, int size)
{
	int i, count, order = 0;
	struct srt_reclaim_entry *entryp;
	uint64_t xid = 0;

	count = size / sizeof(struct srt_reclaim_entry);
	entryp = buf;
	if (entryp->xid != RECLAIM_MAGIC_VER ||
	    entryp->fg.fg_fid != RECLAIM_MAGIC_FID ||
	    entryp->fg.fg_gen != RECLAIM_MAGIC_GEN) {
		fprintf(stderr, "Reclaim log corrupted, invalid header.\n");
		exit(1);
	}
	count--;
	entryp = PSC_AGP(entryp, sizeof(struct srt_reclaim_entry));
	printf("   The entry size is %d bytes, total # of entries is %d\n\n",
	    (int)sizeof(struct srt_reclaim_entry), count);

	for (i = 0; i < count; i++) {
		if (entryp->xid < xid) {
			order++;
			printf("%4d:   xid = %"PRId64", fg = "SLPRI_FG" * \n",
			    i, entryp->xid, SLPRI_FG_ARGS(&entryp->fg));
		} else
			printf("%4d:   xid = %"PRId64", fg = "SLPRI_FG"\n",
			    i, entryp->xid, SLPRI_FG_ARGS(&entryp->fg));
		entryp = PSC_AGP(entryp, sizeof(struct srt_reclaim_entry));
	}
	printf("\n   Total number of out-of-order entries: %d\n", order);
}
Ejemplo n.º 3
0
void
pfl_heap_add(struct pfl_heap *ph, void *c)
{
	struct pfl_heap_entry *che, *phe;
	size_t nalloc;
	void *p;

	psc_assert(c);
	che = PSC_AGP(c, ph->ph_entoff);
	if (ph->ph_nitems == ph->ph_nalloc) {
		nalloc = MAX(8, 2 * ph->ph_nalloc);
		ph->ph_base = psc_realloc(ph->ph_base,
		    nalloc * sizeof(void *), 0);
		ph->ph_nalloc = nalloc;
	}
	ph->ph_base[che->phe_idx = ph->ph_nitems++] = c;
	/* bubble up */
	while (che->phe_idx > 0) {
		p = ph->ph_base[(che->phe_idx - 1) / 2];
		if (ph->ph_cmpf(p, c) != 1)
			break;
		phe = PSC_AGP(p, ph->ph_entoff);
		_pfl_heap_swap(ph, phe, che);
	}
}
Ejemplo n.º 4
0
/*
 * Handle a NAMESPACE_UPDATE request from another MDS.
 */
int
slm_rmm_handle_namespace_update(struct pscrpc_request *rq)
{
	struct srt_update_entry *entryp;
	struct srm_update_req *mq;
	struct srm_update_rep *mp;
	struct sl_mds_peerinfo *p;
	struct sl_resource *res;
	struct sl_site *site;
	struct iovec iov;
	int i, len, count;

	SL_RSX_ALLOCREP(rq, mq, mp);

	count = mq->count;
	if (count <= 0 || mq->size > LNET_MTU) {
		mp->rc = -EINVAL;
		return (mp->rc);
	}

	iov.iov_len = mq->size;
	iov.iov_base = PSCALLOC(mq->size);

	mp->rc = slrpc_bulkserver(rq, BULK_GET_SINK, SRMM_BULK_PORTAL,
	    &iov, 1);
	if (mp->rc)
		goto out;

	/* Search for the peer information by the given site ID. */
	site = libsl_siteid2site(mq->siteid);
	p = NULL;
	if (site)
		SITE_FOREACH_RES(site, res, i)
			if (res->res_type == SLREST_MDS) {
				p = res2rpmi(res)->rpmi_info;
				break;
			}
	if (p == NULL) {
		psclog_info("fail to find site ID %d", mq->siteid);
		PFL_GOTOERR(out, mp->rc = -EINVAL);
	}

	/*
	 * Iterate through the namespace update buffer and apply updates.
	 * If we fail to apply an update, we still report success to our
	 * peer because reporting an error does not help our cause.
	 */
	entryp = iov.iov_base;
	for (i = 0; i < count; i++) {
		slm_rmm_apply_update(entryp);
		len = UPDATE_ENTRY_LEN(entryp);
		entryp = PSC_AGP(entryp, len);
	}
	zfsslash2_wait_synced(0);

 out:
	PSCFREE(iov.iov_base);
	return (mp->rc);
}
Ejemplo n.º 5
0
void *
pfl_heap_peekidx(struct pfl_heap *ph, int idx)
{
	struct pfl_heap_entry *phe;
	void *p;

	if (idx >= ph->ph_nitems)
		return (NULL);

	p = ph->ph_base[idx];
	phe = PSC_AGP(p, ph->ph_entoff);
	psc_assert(phe->phe_idx == idx);
	return (p);
}
Ejemplo n.º 6
0
int
pfl_memchk(const void *buf, int val, size_t len)
{
    const char *p, *ep = PSC_AGP(buf, len);
    const uint64_t *ip;
    uint64_t ival;

    ival = ((uint64_t)val << 32) | val;
    for (ip = buf; (char *)(ip + 1) < ep; ip++)
        if (*ip != ival)
            return (0);
    for (p = (void *)ip; p < ep; p++)
        if (*p != val)
            return (0);
    return (1);
}
Ejemplo n.º 7
0
acl_t
pfl_acl_from_xattr(const void *buf, size_t size)
{
	int i, entries;
	const struct acl_ea_header *h = buf;
	const struct acl_ea_entry *xe = PSC_AGP(h + 1, 0);
	unsigned int xperms;
	acl_permset_t permset;
	acl_entry_t e;
	acl_tag_t tag;
	acl_t a;

	if (size < sizeof(*h)) {
		errno = EINVAL;
		return (NULL);
	}
	if (le32toh(h->version) != ACL_EA_VERSION) {
		errno = EINVAL;
		return (NULL);
	}
	size -= sizeof(*h);
	if (size % sizeof(*xe)) {
		errno = EINVAL;
		return (NULL);
	}
	entries = size / sizeof(*xe);

	a = acl_init(entries);
	if (a == NULL)
		return (NULL);
	for (i = 0; i < entries; i++, xe++) {
		acl_create_entry(&a, &e);
		if (acl_get_permset(e, &permset) == -1)
			psclog_error("get_permset");
		acl_clear_perms(permset);

		xperms = le16toh(xe->perm);

		if (xperms & ACL_READ)
			acl_add_perm(permset, ACL_READ);
		if (xperms & ACL_WRITE)
			acl_add_perm(permset, ACL_WRITE);
		if (xperms & ACL_EXECUTE)
			acl_add_perm(permset, ACL_EXECUTE);
		if (acl_set_permset(e, permset) == -1)
			psclog_error("set_permset");

		acl_set_tag_type(e, tag = le16toh(xe->tag));

		switch (tag) {
		case ACL_USER: {
			uid_t uid = le32toh(xe->id);

			acl_set_qualifier(e, &uid);
			break;
		    }
		case ACL_GROUP: {
			gid_t gid = le32toh(xe->id);

			acl_set_qualifier(e, &gid);
			break;
		    }
		}
	}
	return (a);
}
Ejemplo n.º 8
0
/**
 * pjournal_format - Initialize an on-disk journal.
 * @fn: file path to store journal.
 * @nents: number of entries journal may contain.
 * @entsz: size of a journal entry.
 * Returns 0 on success, errno on error.
 */
void
pjournal_format(const char *fn, uint32_t nents, uint32_t entsz,
    uint32_t rs, uint64_t uuid)
{
	struct psc_journal_enthdr *pje;
	struct psc_journal pj;
	struct stat stb;
	unsigned char *jbuf;
	uint32_t i, j, slot;
	int rc, fd;
	ssize_t nb;

	if (nents % rs)
		psc_fatalx("number of slots (%u) should be a multiple of "
		    "readsize (%u)", nents, rs);

	memset(&pj, 0, sizeof(struct psc_journal));

	rc = 0;
	fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
	if (fd == -1)
		psc_fatal("%s", fn);

	if (fstat(fd, &stb) == -1)
		psc_fatal("stat %s", fn);

	pj.pj_fd = fd;
	pj.pj_hdr = PSCALLOC(PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize));

	pj.pj_hdr->pjh_entsz = entsz;
	pj.pj_hdr->pjh_nents = nents;
	pj.pj_hdr->pjh_version = PJH_VERSION;
	pj.pj_hdr->pjh_readsize = rs;
	pj.pj_hdr->pjh_iolen = PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize);
	pj.pj_hdr->pjh_magic = PJH_MAGIC;
	pj.pj_hdr->pjh_timestamp = time(NULL);
	pj.pj_hdr->pjh_fsuuid = uuid;

	psc_crc64_init(&pj.pj_hdr->pjh_chksum);
	psc_crc64_add(&pj.pj_hdr->pjh_chksum, pj.pj_hdr,
	    offsetof(struct psc_journal_hdr, pjh_chksum));
	psc_crc64_fini(&pj.pj_hdr->pjh_chksum);

	nb = pwrite(pj.pj_fd, pj.pj_hdr, pj.pj_hdr->pjh_iolen, 0);
	if ((size_t)nb != pj.pj_hdr->pjh_iolen)
		psc_fatalx("failed to write journal header: %s",
		    nb == -1 ? strerror(errno) : "short write");

	nb = PJ_PJESZ(&pj) * pj.pj_hdr->pjh_readsize;
	jbuf = psc_alloc(nb, PAF_PAGEALIGN);
	for (i = 0; i < rs; i++) {
		pje = PSC_AGP(jbuf, PJ_PJESZ(&pj) * i);
		pje->pje_magic = PJE_MAGIC;
		pje->pje_type = PJE_FORMAT;
		pje->pje_xid = PJE_XID_NONE;
		pje->pje_len = 0;

		psc_crc64_init(&pje->pje_chksum);
		psc_crc64_add(&pje->pje_chksum, pje,
		    offsetof(struct psc_journal_enthdr, pje_chksum));
		psc_crc64_add(&pje->pje_chksum, pje->pje_data,
		    pje->pje_len);
		psc_crc64_fini(&pje->pje_chksum);
	}

	j = 0;
	/* XXX use an option to write only one entry in fast create mode */
	for (slot = 0; slot < pj.pj_hdr->pjh_nents; slot += rs) {
		nb = pwrite(pj.pj_fd, jbuf, PJ_PJESZ(&pj) * rs,
		    PJ_GETENTOFF(&pj, slot));
		if ((size_t)nb != PJ_PJESZ(&pj) * rs)
			psc_fatal("failed to write slot %u (%zd)",
			    slot, nb);
		if (verbose && slot % 262144 == 0) {
			printf(".");
			fflush(stdout);
			fsync(pj.pj_fd);
			if (++j == 80) {
				printf("\n");
				j = 0;
			}
		}
	}
	if (verbose && j)
		printf("\n");
	if (close(fd) == -1)
		psc_fatal("failed to close journal");
	psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(&pj) * rs);
	psclog_info("journal %s formatted: %d slots, %d readsize, error=%d",
	    fn, nents, rs, rc);
}
Ejemplo n.º 9
0
/*
 * Initialize an on-disk journal.
 * @fn: file path to store journal.
 * @nents: number of entries journal may contain if non-zero.
 * @entsz: size of a journal entry.
 * @rs: read size.
 * Returns the number of entries created.
 */
uint32_t
sl_journal_format(const char *fn, uint32_t nents, uint32_t entsz,
    uint32_t rs, uint64_t uuid, int block_dev)
{
	uint32_t i, slot, max_nents;
	struct psc_journal_enthdr *pje;
	struct psc_journal pj;
	struct stat stb;
	unsigned char *jbuf;
	size_t numblocks;
	ssize_t nb;
	int fd;

	memset(&pj, 0, sizeof(pj));

	fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
	if (fd == -1)
		psc_fatal("%s", fn);

	if (fstat(fd, &stb) == -1)
		psc_fatal("stat %s", fn);

	/*
	 * If the user does not specify nents, either use default or
	 * based on the block device size.
	 */
	if (nents == 0 && !block_dev)
		nents = SLJ_MDS_JNENTS;

	if (block_dev) {
		if (ioctl(fd, BLKGETSIZE, &numblocks) == -1)
			err(1, "BLKGETSIZE: %s", fn);

		/* show progress, it is going to be a while */
		verbose = 1;

		/* deal with large disks */
		max_nents = MIN(numblocks, SLJ_MDS_MAX_JNENTS);

		/* leave room on both ends */
		max_nents -= stb.st_blksize / SLJ_MDS_ENTSIZE + 16;

		/* efficiency */
		max_nents = (max_nents / rs) * rs;
		if (nents)
			nents = MIN(nents, max_nents);
		else
			nents = max_nents;
	}

	if (nents % rs)
		psc_fatalx("number of slots (%u) should be a multiple of "
		    "readsize (%u)", nents, rs);

	pj.pj_fd = fd;
	pj.pj_hdr = PSCALLOC(PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize));

	pj.pj_hdr->pjh_entsz = entsz;
	pj.pj_hdr->pjh_nents = nents;
	pj.pj_hdr->pjh_version = PJH_VERSION;
	pj.pj_hdr->pjh_readsize = rs;
	pj.pj_hdr->pjh_iolen = PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize);
	pj.pj_hdr->pjh_magic = PJH_MAGIC;
	pj.pj_hdr->pjh_timestamp = time(NULL);
	pj.pj_hdr->pjh_fsuuid = uuid;

	psc_crc64_init(&pj.pj_hdr->pjh_chksum);
	psc_crc64_add(&pj.pj_hdr->pjh_chksum, pj.pj_hdr,
	    offsetof(struct psc_journal_hdr, pjh_chksum));
	psc_crc64_fini(&pj.pj_hdr->pjh_chksum);

	nb = pwrite(pj.pj_fd, pj.pj_hdr, pj.pj_hdr->pjh_iolen, 0);
	if ((size_t)nb != pj.pj_hdr->pjh_iolen)
		psc_fatalx("failed to write journal header: %s",
		    nb == -1 ? strerror(errno) : "short write");

	nb = PJ_PJESZ(&pj) * pj.pj_hdr->pjh_readsize;
	jbuf = psc_alloc(nb, PAF_PAGEALIGN);
	for (i = 0; i < rs; i++) {
		pje = PSC_AGP(jbuf, PJ_PJESZ(&pj) * i);
		pje->pje_magic = PJE_MAGIC;
		pje->pje_type = PJE_FORMAT;
		pje->pje_xid = PJE_XID_NONE;
		pje->pje_len = 0;

		psc_crc64_init(&pje->pje_chksum);
		psc_crc64_add(&pje->pje_chksum, pje,
		    offsetof(struct psc_journal_enthdr, pje_chksum));
		psc_crc64_add(&pje->pje_chksum, pje->pje_data,
		    pje->pje_len);
		psc_crc64_fini(&pje->pje_chksum);
	}

	i = 0;
	/* XXX use an option to write only one entry in fast create mode */
	for (slot = 0; slot < pj.pj_hdr->pjh_nents; slot += rs) {
		nb = pwrite(pj.pj_fd, jbuf, PJ_PJESZ(&pj) * rs,
		    PJ_GETENTOFF(&pj, slot));
		if ((size_t)nb != PJ_PJESZ(&pj) * rs)
			psc_fatal("failed to write slot %u (%zd)",
			    slot, nb);
		if (verbose && slot % 262144 == 0) {
			printf(".");
			fflush(stdout);
			fsync(pj.pj_fd);
			if (++i == 80) {
				printf("\n");
				i = 0;
			}
		}
	}
	if (verbose && i)
		printf("\n");
	if (close(fd) == -1)
		psc_fatal("failed to close journal");
	psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(&pj) * rs);

	return (nents);
}