/** * pjournal_dump - Dump the contents of a journal file. * @fn: journal filename to query. * @verbose: whether to report stats summary or full dump. * * Each time mds restarts, it writes log entries starting from the very * first slot of the log. Anyway, the function dumps all log entries, * some of them may be from previous incarnations of the MDS. */ void pjournal_dump(const char *fn) { int i, ntotal, nmagic, nchksum, nformat, ndump, first = 1; uint32_t slot, highest_slot = -1, lowest_slot = -1; uint64_t chksum, highest_xid = 0, lowest_xid = 0; struct psc_journal_enthdr *pje; struct psc_journal_hdr *pjh; struct psc_journal *pj; struct stat statbuf; unsigned char *jbuf; ssize_t nb, pjhlen; time_t ts; ntotal = nmagic = nchksum = nformat = ndump = 0; pj = PSCALLOC(sizeof(*pj)); strlcpy(pj->pj_name, pfl_basename(fn), sizeof(pj->pj_name)); pj->pj_fd = open(fn, O_RDWR | O_DIRECT); if (pj->pj_fd == -1) psc_fatal("failed to open journal %s", fn); if (fstat(pj->pj_fd, &statbuf) == -1) psc_fatal("failed to stat journal %s", fn); /* * O_DIRECT may impose alignment restrictions so align the * buffer and perform I/O in multiples of file system block * size. */ pjhlen = PSC_ALIGN(sizeof(*pjh), statbuf.st_blksize); pjh = psc_alloc(pjhlen, PAF_PAGEALIGN); nb = pread(pj->pj_fd, pjh, pjhlen, 0); if (nb != pjhlen) psc_fatal("failed to read journal header"); pj->pj_hdr = pjh; if (pjh->pjh_magic != PJH_MAGIC) psc_fatalx("journal header has a bad magic number " "%#"PRIx64, pjh->pjh_magic); if (pjh->pjh_version != PJH_VERSION) psc_fatalx("journal header has an invalid version " "number %d", pjh->pjh_version); psc_crc64_init(&chksum); psc_crc64_add(&chksum, pjh, offsetof(struct psc_journal_hdr, pjh_chksum)); psc_crc64_fini(&chksum); if (pjh->pjh_chksum != chksum) psc_fatalx("journal header has an invalid checksum " "value %"PSCPRIxCRC64" vs %"PSCPRIxCRC64, pjh->pjh_chksum, chksum); if (S_ISREG(statbuf.st_mode) && statbuf.st_size != (off_t)(pjhlen + pjh->pjh_nents * PJ_PJESZ(pj))) psc_fatalx("size of the journal log %"PSCPRIdOFFT"d does " "not match specs in its header", statbuf.st_size); if (pjh->pjh_nents % pjh->pjh_readsize) psc_fatalx("number of entries %d is not a multiple of the " "readsize %d", pjh->pjh_nents, pjh->pjh_readsize); ts = pjh->pjh_timestamp; printf("%s:\n" " version: %u\n" " entry size: %u\n" " number of entries: %u\n" " batch read size: %u\n" " entry start offset: %"PRId64"\n" " format time: %s" " uuid: %"PRIx64"\n" " %4s %3s %4s %4s %s\n", fn, pjh->pjh_version, PJ_PJESZ(pj), pjh->pjh_nents, pjh->pjh_readsize, pjh->pjh_start_off, ctime(&ts), pjh->pjh_fsuuid, "idx", "typ", "xid", "txg", "details"); jbuf = psc_alloc(PJ_PJESZ(pj) * pj->pj_hdr->pjh_readsize, PAF_PAGEALIGN); for (slot = 0; slot < pjh->pjh_nents; slot += pjh->pjh_readsize) { nb = pread(pj->pj_fd, jbuf, PJ_PJESZ(pj) * pjh->pjh_readsize, PJ_GETENTOFF(pj, slot)); if (nb != PJ_PJESZ(pj) * pjh->pjh_readsize) warn("failed to read %d log entries at slot %d", pjh->pjh_readsize, slot); for (i = 0; i < pjh->pjh_readsize; i++) { ntotal++; pje = (void *)&jbuf[PJ_PJESZ(pj) * i]; if (pje->pje_magic != PJE_MAGIC) { nmagic++; warnx("journal slot %d has a bad magic" "number", slot + i); continue; } /* * If we hit a new entry that is never used, we * assume that the rest of the journal is never * used. */ if (pje->pje_type == PJE_FORMAT) { nformat = nformat + pjh->pjh_nents - (slot + i); goto done; } psc_crc64_init(&chksum); psc_crc64_add(&chksum, pje, offsetof( struct psc_journal_enthdr, pje_chksum)); psc_crc64_add(&chksum, pje->pje_data, pje->pje_len); psc_crc64_fini(&chksum); if (pje->pje_chksum != chksum) { nchksum++; warnx("journal slot %d has a corrupt " "checksum", slot + i); goto done; } ndump++; if (verbose) pjournal_dump_entry(slot + i, pje); if (first) { first = 0; highest_xid = lowest_xid = pje->pje_xid; highest_slot = lowest_slot = slot + i; continue; } if (highest_xid < pje->pje_xid) { highest_xid = pje->pje_xid; highest_slot = slot + i; } if (lowest_xid > pje->pje_xid) { lowest_xid = pje->pje_xid; lowest_slot = slot + i; } } } done: if (close(pj->pj_fd) == -1) printf("failed closing journal %s", fn); psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(pj)); PSCFREE(pj); printf("----------------------------------------------\n" "%8d slot(s) scanned\n" "%8d in use\n" "%8d formatted\n" "%8d bad magic\n" "%8d bad checksum(s)\n" "lowest transaction ID=%#"PRIx64" (slot=%d)\n" "highest transaction ID=%#"PRIx64" (slot=%d)\n", ntotal, ndump, nformat, nmagic, nchksum, lowest_xid, lowest_slot, highest_xid, highest_slot); }
/** * pjournal_format - Initialize an on-disk journal. * @fn: file path to store journal. * @nents: number of entries journal may contain. * @entsz: size of a journal entry. * Returns 0 on success, errno on error. */ void pjournal_format(const char *fn, uint32_t nents, uint32_t entsz, uint32_t rs, uint64_t uuid) { struct psc_journal_enthdr *pje; struct psc_journal pj; struct stat stb; unsigned char *jbuf; uint32_t i, j, slot; int rc, fd; ssize_t nb; if (nents % rs) psc_fatalx("number of slots (%u) should be a multiple of " "readsize (%u)", nents, rs); memset(&pj, 0, sizeof(struct psc_journal)); rc = 0; fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd == -1) psc_fatal("%s", fn); if (fstat(fd, &stb) == -1) psc_fatal("stat %s", fn); pj.pj_fd = fd; pj.pj_hdr = PSCALLOC(PSC_ALIGN(sizeof(struct psc_journal_hdr), stb.st_blksize)); pj.pj_hdr->pjh_entsz = entsz; pj.pj_hdr->pjh_nents = nents; pj.pj_hdr->pjh_version = PJH_VERSION; pj.pj_hdr->pjh_readsize = rs; pj.pj_hdr->pjh_iolen = PSC_ALIGN(sizeof(struct psc_journal_hdr), stb.st_blksize); pj.pj_hdr->pjh_magic = PJH_MAGIC; pj.pj_hdr->pjh_timestamp = time(NULL); pj.pj_hdr->pjh_fsuuid = uuid; psc_crc64_init(&pj.pj_hdr->pjh_chksum); psc_crc64_add(&pj.pj_hdr->pjh_chksum, pj.pj_hdr, offsetof(struct psc_journal_hdr, pjh_chksum)); psc_crc64_fini(&pj.pj_hdr->pjh_chksum); nb = pwrite(pj.pj_fd, pj.pj_hdr, pj.pj_hdr->pjh_iolen, 0); if ((size_t)nb != pj.pj_hdr->pjh_iolen) psc_fatalx("failed to write journal header: %s", nb == -1 ? strerror(errno) : "short write"); nb = PJ_PJESZ(&pj) * pj.pj_hdr->pjh_readsize; jbuf = psc_alloc(nb, PAF_PAGEALIGN); for (i = 0; i < rs; i++) { pje = PSC_AGP(jbuf, PJ_PJESZ(&pj) * i); pje->pje_magic = PJE_MAGIC; pje->pje_type = PJE_FORMAT; pje->pje_xid = PJE_XID_NONE; pje->pje_len = 0; psc_crc64_init(&pje->pje_chksum); psc_crc64_add(&pje->pje_chksum, pje, offsetof(struct psc_journal_enthdr, pje_chksum)); psc_crc64_add(&pje->pje_chksum, pje->pje_data, pje->pje_len); psc_crc64_fini(&pje->pje_chksum); } j = 0; /* XXX use an option to write only one entry in fast create mode */ for (slot = 0; slot < pj.pj_hdr->pjh_nents; slot += rs) { nb = pwrite(pj.pj_fd, jbuf, PJ_PJESZ(&pj) * rs, PJ_GETENTOFF(&pj, slot)); if ((size_t)nb != PJ_PJESZ(&pj) * rs) psc_fatal("failed to write slot %u (%zd)", slot, nb); if (verbose && slot % 262144 == 0) { printf("."); fflush(stdout); fsync(pj.pj_fd); if (++j == 80) { printf("\n"); j = 0; } } } if (verbose && j) printf("\n"); if (close(fd) == -1) psc_fatal("failed to close journal"); psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(&pj) * rs); psclog_info("journal %s formatted: %d slots, %d readsize, error=%d", fn, nents, rs, rc); }
/* * Initialize an on-disk journal. * @fn: file path to store journal. * @nents: number of entries journal may contain if non-zero. * @entsz: size of a journal entry. * @rs: read size. * Returns the number of entries created. */ uint32_t sl_journal_format(const char *fn, uint32_t nents, uint32_t entsz, uint32_t rs, uint64_t uuid, int block_dev) { uint32_t i, slot, max_nents; struct psc_journal_enthdr *pje; struct psc_journal pj; struct stat stb; unsigned char *jbuf; size_t numblocks; ssize_t nb; int fd; memset(&pj, 0, sizeof(pj)); fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd == -1) psc_fatal("%s", fn); if (fstat(fd, &stb) == -1) psc_fatal("stat %s", fn); /* * If the user does not specify nents, either use default or * based on the block device size. */ if (nents == 0 && !block_dev) nents = SLJ_MDS_JNENTS; if (block_dev) { if (ioctl(fd, BLKGETSIZE, &numblocks) == -1) err(1, "BLKGETSIZE: %s", fn); /* show progress, it is going to be a while */ verbose = 1; /* deal with large disks */ max_nents = MIN(numblocks, SLJ_MDS_MAX_JNENTS); /* leave room on both ends */ max_nents -= stb.st_blksize / SLJ_MDS_ENTSIZE + 16; /* efficiency */ max_nents = (max_nents / rs) * rs; if (nents) nents = MIN(nents, max_nents); else nents = max_nents; } if (nents % rs) psc_fatalx("number of slots (%u) should be a multiple of " "readsize (%u)", nents, rs); pj.pj_fd = fd; pj.pj_hdr = PSCALLOC(PSC_ALIGN(sizeof(struct psc_journal_hdr), stb.st_blksize)); pj.pj_hdr->pjh_entsz = entsz; pj.pj_hdr->pjh_nents = nents; pj.pj_hdr->pjh_version = PJH_VERSION; pj.pj_hdr->pjh_readsize = rs; pj.pj_hdr->pjh_iolen = PSC_ALIGN(sizeof(struct psc_journal_hdr), stb.st_blksize); pj.pj_hdr->pjh_magic = PJH_MAGIC; pj.pj_hdr->pjh_timestamp = time(NULL); pj.pj_hdr->pjh_fsuuid = uuid; psc_crc64_init(&pj.pj_hdr->pjh_chksum); psc_crc64_add(&pj.pj_hdr->pjh_chksum, pj.pj_hdr, offsetof(struct psc_journal_hdr, pjh_chksum)); psc_crc64_fini(&pj.pj_hdr->pjh_chksum); nb = pwrite(pj.pj_fd, pj.pj_hdr, pj.pj_hdr->pjh_iolen, 0); if ((size_t)nb != pj.pj_hdr->pjh_iolen) psc_fatalx("failed to write journal header: %s", nb == -1 ? strerror(errno) : "short write"); nb = PJ_PJESZ(&pj) * pj.pj_hdr->pjh_readsize; jbuf = psc_alloc(nb, PAF_PAGEALIGN); for (i = 0; i < rs; i++) { pje = PSC_AGP(jbuf, PJ_PJESZ(&pj) * i); pje->pje_magic = PJE_MAGIC; pje->pje_type = PJE_FORMAT; pje->pje_xid = PJE_XID_NONE; pje->pje_len = 0; psc_crc64_init(&pje->pje_chksum); psc_crc64_add(&pje->pje_chksum, pje, offsetof(struct psc_journal_enthdr, pje_chksum)); psc_crc64_add(&pje->pje_chksum, pje->pje_data, pje->pje_len); psc_crc64_fini(&pje->pje_chksum); } i = 0; /* XXX use an option to write only one entry in fast create mode */ for (slot = 0; slot < pj.pj_hdr->pjh_nents; slot += rs) { nb = pwrite(pj.pj_fd, jbuf, PJ_PJESZ(&pj) * rs, PJ_GETENTOFF(&pj, slot)); if ((size_t)nb != PJ_PJESZ(&pj) * rs) psc_fatal("failed to write slot %u (%zd)", slot, nb); if (verbose && slot % 262144 == 0) { printf("."); fflush(stdout); fsync(pj.pj_fd); if (++i == 80) { printf("\n"); i = 0; } } } if (verbose && i) printf("\n"); if (close(fd) == -1) psc_fatal("failed to close journal"); psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(&pj) * rs); return (nents); }