Exemple #1
0
/**
 * pjournal_dump - Dump the contents of a journal file.
 * @fn: journal filename to query.
 * @verbose: whether to report stats summary or full dump.
 *
 * Each time mds restarts, it writes log entries starting from the very
 * first slot of the log.  Anyway, the function dumps all log entries,
 * some of them may be from previous incarnations of the MDS.
 */
void
pjournal_dump(const char *fn)
{
	int i, ntotal, nmagic, nchksum, nformat, ndump, first = 1;
	uint32_t slot, highest_slot = -1, lowest_slot = -1;
	uint64_t chksum, highest_xid = 0, lowest_xid = 0;
	struct psc_journal_enthdr *pje;
	struct psc_journal_hdr *pjh;
	struct psc_journal *pj;
	struct stat statbuf;
	unsigned char *jbuf;
	ssize_t nb, pjhlen;
	time_t ts;

	ntotal = nmagic = nchksum = nformat = ndump = 0;

	pj = PSCALLOC(sizeof(*pj));

	strlcpy(pj->pj_name, pfl_basename(fn), sizeof(pj->pj_name));

	pj->pj_fd = open(fn, O_RDWR | O_DIRECT);
	if (pj->pj_fd == -1)
		psc_fatal("failed to open journal %s", fn);
	if (fstat(pj->pj_fd, &statbuf) == -1)
		psc_fatal("failed to stat journal %s", fn);

	/*
	 * O_DIRECT may impose alignment restrictions so align the
	 * buffer and perform I/O in multiples of file system block
	 * size.
	 */
	pjhlen = PSC_ALIGN(sizeof(*pjh), statbuf.st_blksize);
	pjh = psc_alloc(pjhlen, PAF_PAGEALIGN);
	nb = pread(pj->pj_fd, pjh, pjhlen, 0);
	if (nb != pjhlen)
		psc_fatal("failed to read journal header");

	pj->pj_hdr = pjh;
	if (pjh->pjh_magic != PJH_MAGIC)
		psc_fatalx("journal header has a bad magic number "
		    "%#"PRIx64, pjh->pjh_magic);

	if (pjh->pjh_version != PJH_VERSION)
		psc_fatalx("journal header has an invalid version "
		    "number %d", pjh->pjh_version);

	psc_crc64_init(&chksum);
	psc_crc64_add(&chksum, pjh, offsetof(struct psc_journal_hdr,
	    pjh_chksum));
	psc_crc64_fini(&chksum);

	if (pjh->pjh_chksum != chksum)
		psc_fatalx("journal header has an invalid checksum "
		    "value %"PSCPRIxCRC64" vs %"PSCPRIxCRC64,
		    pjh->pjh_chksum, chksum);

	if (S_ISREG(statbuf.st_mode) && statbuf.st_size !=
	    (off_t)(pjhlen + pjh->pjh_nents * PJ_PJESZ(pj)))
		psc_fatalx("size of the journal log %"PSCPRIdOFFT"d does "
		    "not match specs in its header", statbuf.st_size);

	if (pjh->pjh_nents % pjh->pjh_readsize)
		psc_fatalx("number of entries %d is not a multiple of the "
		    "readsize %d", pjh->pjh_nents, pjh->pjh_readsize);

	ts = pjh->pjh_timestamp;

	printf("%s:\n"
	    "  version: %u\n"
	    "  entry size: %u\n"
	    "  number of entries: %u\n"
	    "  batch read size: %u\n"
	    "  entry start offset: %"PRId64"\n"
	    "  format time: %s"
	    "  uuid: %"PRIx64"\n"
	    "  %4s  %3s %4s %4s %s\n",
	    fn, pjh->pjh_version, PJ_PJESZ(pj), pjh->pjh_nents,
	    pjh->pjh_readsize, pjh->pjh_start_off,
	    ctime(&ts), pjh->pjh_fsuuid,
	    "idx", "typ", "xid", "txg", "details");

	jbuf = psc_alloc(PJ_PJESZ(pj) * pj->pj_hdr->pjh_readsize,
	    PAF_PAGEALIGN);
	for (slot = 0; slot < pjh->pjh_nents;
	    slot += pjh->pjh_readsize) {
		nb = pread(pj->pj_fd, jbuf, PJ_PJESZ(pj) *
		    pjh->pjh_readsize, PJ_GETENTOFF(pj, slot));
		if (nb != PJ_PJESZ(pj) * pjh->pjh_readsize)
			warn("failed to read %d log entries at slot %d",
			    pjh->pjh_readsize, slot);

		for (i = 0; i < pjh->pjh_readsize; i++) {
			ntotal++;
			pje = (void *)&jbuf[PJ_PJESZ(pj) * i];
			if (pje->pje_magic != PJE_MAGIC) {
				nmagic++;
				warnx("journal slot %d has a bad magic"
				    "number", slot + i);
				continue;
			}

			/*
			 * If we hit a new entry that is never used, we
			 * assume that the rest of the journal is never
			 * used.
			 */
			if (pje->pje_type == PJE_FORMAT) {
				nformat = nformat + pjh->pjh_nents -
				    (slot + i);
				goto done;
			}

			psc_crc64_init(&chksum);
			psc_crc64_add(&chksum, pje, offsetof(
			    struct psc_journal_enthdr, pje_chksum));
			psc_crc64_add(&chksum, pje->pje_data,
			    pje->pje_len);
			psc_crc64_fini(&chksum);

			if (pje->pje_chksum != chksum) {
				nchksum++;
				warnx("journal slot %d has a corrupt "
				    "checksum", slot + i);
				goto done;
			}
			ndump++;
			if (verbose)
				pjournal_dump_entry(slot + i, pje);
			if (first) {
				first = 0;
				highest_xid = lowest_xid = pje->pje_xid;
				highest_slot = lowest_slot = slot + i;
				continue;
			}
			if (highest_xid < pje->pje_xid) {
				highest_xid = pje->pje_xid;
				highest_slot = slot + i;
			}
			if (lowest_xid > pje->pje_xid) {
				lowest_xid = pje->pje_xid;
				lowest_slot = slot + i;
			}
		}

	}

 done:
	if (close(pj->pj_fd) == -1)
		printf("failed closing journal %s", fn);

	psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(pj));
	PSCFREE(pj);

	printf("----------------------------------------------\n"
	    "%8d slot(s) scanned\n"
	    "%8d in use\n"
	    "%8d formatted\n"
	    "%8d bad magic\n"
	    "%8d bad checksum(s)\n"
	    "lowest transaction ID=%#"PRIx64" (slot=%d)\n"
	    "highest transaction ID=%#"PRIx64" (slot=%d)\n",
	    ntotal, ndump, nformat, nmagic, nchksum,
	    lowest_xid, lowest_slot,
	    highest_xid, highest_slot);
}
Exemple #2
0
/**
 * pjournal_format - Initialize an on-disk journal.
 * @fn: file path to store journal.
 * @nents: number of entries journal may contain.
 * @entsz: size of a journal entry.
 * Returns 0 on success, errno on error.
 */
void
pjournal_format(const char *fn, uint32_t nents, uint32_t entsz,
    uint32_t rs, uint64_t uuid)
{
	struct psc_journal_enthdr *pje;
	struct psc_journal pj;
	struct stat stb;
	unsigned char *jbuf;
	uint32_t i, j, slot;
	int rc, fd;
	ssize_t nb;

	if (nents % rs)
		psc_fatalx("number of slots (%u) should be a multiple of "
		    "readsize (%u)", nents, rs);

	memset(&pj, 0, sizeof(struct psc_journal));

	rc = 0;
	fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
	if (fd == -1)
		psc_fatal("%s", fn);

	if (fstat(fd, &stb) == -1)
		psc_fatal("stat %s", fn);

	pj.pj_fd = fd;
	pj.pj_hdr = PSCALLOC(PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize));

	pj.pj_hdr->pjh_entsz = entsz;
	pj.pj_hdr->pjh_nents = nents;
	pj.pj_hdr->pjh_version = PJH_VERSION;
	pj.pj_hdr->pjh_readsize = rs;
	pj.pj_hdr->pjh_iolen = PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize);
	pj.pj_hdr->pjh_magic = PJH_MAGIC;
	pj.pj_hdr->pjh_timestamp = time(NULL);
	pj.pj_hdr->pjh_fsuuid = uuid;

	psc_crc64_init(&pj.pj_hdr->pjh_chksum);
	psc_crc64_add(&pj.pj_hdr->pjh_chksum, pj.pj_hdr,
	    offsetof(struct psc_journal_hdr, pjh_chksum));
	psc_crc64_fini(&pj.pj_hdr->pjh_chksum);

	nb = pwrite(pj.pj_fd, pj.pj_hdr, pj.pj_hdr->pjh_iolen, 0);
	if ((size_t)nb != pj.pj_hdr->pjh_iolen)
		psc_fatalx("failed to write journal header: %s",
		    nb == -1 ? strerror(errno) : "short write");

	nb = PJ_PJESZ(&pj) * pj.pj_hdr->pjh_readsize;
	jbuf = psc_alloc(nb, PAF_PAGEALIGN);
	for (i = 0; i < rs; i++) {
		pje = PSC_AGP(jbuf, PJ_PJESZ(&pj) * i);
		pje->pje_magic = PJE_MAGIC;
		pje->pje_type = PJE_FORMAT;
		pje->pje_xid = PJE_XID_NONE;
		pje->pje_len = 0;

		psc_crc64_init(&pje->pje_chksum);
		psc_crc64_add(&pje->pje_chksum, pje,
		    offsetof(struct psc_journal_enthdr, pje_chksum));
		psc_crc64_add(&pje->pje_chksum, pje->pje_data,
		    pje->pje_len);
		psc_crc64_fini(&pje->pje_chksum);
	}

	j = 0;
	/* XXX use an option to write only one entry in fast create mode */
	for (slot = 0; slot < pj.pj_hdr->pjh_nents; slot += rs) {
		nb = pwrite(pj.pj_fd, jbuf, PJ_PJESZ(&pj) * rs,
		    PJ_GETENTOFF(&pj, slot));
		if ((size_t)nb != PJ_PJESZ(&pj) * rs)
			psc_fatal("failed to write slot %u (%zd)",
			    slot, nb);
		if (verbose && slot % 262144 == 0) {
			printf(".");
			fflush(stdout);
			fsync(pj.pj_fd);
			if (++j == 80) {
				printf("\n");
				j = 0;
			}
		}
	}
	if (verbose && j)
		printf("\n");
	if (close(fd) == -1)
		psc_fatal("failed to close journal");
	psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(&pj) * rs);
	psclog_info("journal %s formatted: %d slots, %d readsize, error=%d",
	    fn, nents, rs, rc);
}
Exemple #3
0
/*
 * Initialize an on-disk journal.
 * @fn: file path to store journal.
 * @nents: number of entries journal may contain if non-zero.
 * @entsz: size of a journal entry.
 * @rs: read size.
 * Returns the number of entries created.
 */
uint32_t
sl_journal_format(const char *fn, uint32_t nents, uint32_t entsz,
    uint32_t rs, uint64_t uuid, int block_dev)
{
	uint32_t i, slot, max_nents;
	struct psc_journal_enthdr *pje;
	struct psc_journal pj;
	struct stat stb;
	unsigned char *jbuf;
	size_t numblocks;
	ssize_t nb;
	int fd;

	memset(&pj, 0, sizeof(pj));

	fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
	if (fd == -1)
		psc_fatal("%s", fn);

	if (fstat(fd, &stb) == -1)
		psc_fatal("stat %s", fn);

	/*
	 * If the user does not specify nents, either use default or
	 * based on the block device size.
	 */
	if (nents == 0 && !block_dev)
		nents = SLJ_MDS_JNENTS;

	if (block_dev) {
		if (ioctl(fd, BLKGETSIZE, &numblocks) == -1)
			err(1, "BLKGETSIZE: %s", fn);

		/* show progress, it is going to be a while */
		verbose = 1;

		/* deal with large disks */
		max_nents = MIN(numblocks, SLJ_MDS_MAX_JNENTS);

		/* leave room on both ends */
		max_nents -= stb.st_blksize / SLJ_MDS_ENTSIZE + 16;

		/* efficiency */
		max_nents = (max_nents / rs) * rs;
		if (nents)
			nents = MIN(nents, max_nents);
		else
			nents = max_nents;
	}

	if (nents % rs)
		psc_fatalx("number of slots (%u) should be a multiple of "
		    "readsize (%u)", nents, rs);

	pj.pj_fd = fd;
	pj.pj_hdr = PSCALLOC(PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize));

	pj.pj_hdr->pjh_entsz = entsz;
	pj.pj_hdr->pjh_nents = nents;
	pj.pj_hdr->pjh_version = PJH_VERSION;
	pj.pj_hdr->pjh_readsize = rs;
	pj.pj_hdr->pjh_iolen = PSC_ALIGN(sizeof(struct psc_journal_hdr),
	    stb.st_blksize);
	pj.pj_hdr->pjh_magic = PJH_MAGIC;
	pj.pj_hdr->pjh_timestamp = time(NULL);
	pj.pj_hdr->pjh_fsuuid = uuid;

	psc_crc64_init(&pj.pj_hdr->pjh_chksum);
	psc_crc64_add(&pj.pj_hdr->pjh_chksum, pj.pj_hdr,
	    offsetof(struct psc_journal_hdr, pjh_chksum));
	psc_crc64_fini(&pj.pj_hdr->pjh_chksum);

	nb = pwrite(pj.pj_fd, pj.pj_hdr, pj.pj_hdr->pjh_iolen, 0);
	if ((size_t)nb != pj.pj_hdr->pjh_iolen)
		psc_fatalx("failed to write journal header: %s",
		    nb == -1 ? strerror(errno) : "short write");

	nb = PJ_PJESZ(&pj) * pj.pj_hdr->pjh_readsize;
	jbuf = psc_alloc(nb, PAF_PAGEALIGN);
	for (i = 0; i < rs; i++) {
		pje = PSC_AGP(jbuf, PJ_PJESZ(&pj) * i);
		pje->pje_magic = PJE_MAGIC;
		pje->pje_type = PJE_FORMAT;
		pje->pje_xid = PJE_XID_NONE;
		pje->pje_len = 0;

		psc_crc64_init(&pje->pje_chksum);
		psc_crc64_add(&pje->pje_chksum, pje,
		    offsetof(struct psc_journal_enthdr, pje_chksum));
		psc_crc64_add(&pje->pje_chksum, pje->pje_data,
		    pje->pje_len);
		psc_crc64_fini(&pje->pje_chksum);
	}

	i = 0;
	/* XXX use an option to write only one entry in fast create mode */
	for (slot = 0; slot < pj.pj_hdr->pjh_nents; slot += rs) {
		nb = pwrite(pj.pj_fd, jbuf, PJ_PJESZ(&pj) * rs,
		    PJ_GETENTOFF(&pj, slot));
		if ((size_t)nb != PJ_PJESZ(&pj) * rs)
			psc_fatal("failed to write slot %u (%zd)",
			    slot, nb);
		if (verbose && slot % 262144 == 0) {
			printf(".");
			fflush(stdout);
			fsync(pj.pj_fd);
			if (++i == 80) {
				printf("\n");
				i = 0;
			}
		}
	}
	if (verbose && i)
		printf("\n");
	if (close(fd) == -1)
		psc_fatal("failed to close journal");
	psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(&pj) * rs);

	return (nents);
}