void pfl_heap_remove(struct pfl_heap *ph, void *p) { struct pfl_heap_entry *phe, *che; void *c, *minc; int idx, i; psc_assert(ph->ph_nitems > 0); psc_assert(p); phe = PSC_AGP(p, ph->ph_entoff); p = ph->ph_base[idx = phe->phe_idx] = ph->ph_base[--ph->ph_nitems]; phe = PSC_AGP(p, ph->ph_entoff); phe->phe_idx = idx; /* bubble down */ for (;;) { for (minc = p, idx = phe->phe_idx * 2 + 1, i = 0; i < 2 && idx < ph->ph_nitems; idx++, i++) { c = ph->ph_base[idx]; if (ph->ph_cmpf(c, minc) == -1) minc = c; } if (minc == p) break; che = PSC_AGP(minc, ph->ph_entoff); _pfl_heap_swap(ph, phe, che); } }
void dump_reclaim_log(void *buf, int size) { int i, count, order = 0; struct srt_reclaim_entry *entryp; uint64_t xid = 0; count = size / sizeof(struct srt_reclaim_entry); entryp = buf; if (entryp->xid != RECLAIM_MAGIC_VER || entryp->fg.fg_fid != RECLAIM_MAGIC_FID || entryp->fg.fg_gen != RECLAIM_MAGIC_GEN) { fprintf(stderr, "Reclaim log corrupted, invalid header.\n"); exit(1); } count--; entryp = PSC_AGP(entryp, sizeof(struct srt_reclaim_entry)); printf(" The entry size is %d bytes, total # of entries is %d\n\n", (int)sizeof(struct srt_reclaim_entry), count); for (i = 0; i < count; i++) { if (entryp->xid < xid) { order++; printf("%4d: xid = %"PRId64", fg = "SLPRI_FG" * \n", i, entryp->xid, SLPRI_FG_ARGS(&entryp->fg)); } else printf("%4d: xid = %"PRId64", fg = "SLPRI_FG"\n", i, entryp->xid, SLPRI_FG_ARGS(&entryp->fg)); entryp = PSC_AGP(entryp, sizeof(struct srt_reclaim_entry)); } printf("\n Total number of out-of-order entries: %d\n", order); }
void pfl_heap_add(struct pfl_heap *ph, void *c) { struct pfl_heap_entry *che, *phe; size_t nalloc; void *p; psc_assert(c); che = PSC_AGP(c, ph->ph_entoff); if (ph->ph_nitems == ph->ph_nalloc) { nalloc = MAX(8, 2 * ph->ph_nalloc); ph->ph_base = psc_realloc(ph->ph_base, nalloc * sizeof(void *), 0); ph->ph_nalloc = nalloc; } ph->ph_base[che->phe_idx = ph->ph_nitems++] = c; /* bubble up */ while (che->phe_idx > 0) { p = ph->ph_base[(che->phe_idx - 1) / 2]; if (ph->ph_cmpf(p, c) != 1) break; phe = PSC_AGP(p, ph->ph_entoff); _pfl_heap_swap(ph, phe, che); } }
/* * Handle a NAMESPACE_UPDATE request from another MDS. */ int slm_rmm_handle_namespace_update(struct pscrpc_request *rq) { struct srt_update_entry *entryp; struct srm_update_req *mq; struct srm_update_rep *mp; struct sl_mds_peerinfo *p; struct sl_resource *res; struct sl_site *site; struct iovec iov; int i, len, count; SL_RSX_ALLOCREP(rq, mq, mp); count = mq->count; if (count <= 0 || mq->size > LNET_MTU) { mp->rc = -EINVAL; return (mp->rc); } iov.iov_len = mq->size; iov.iov_base = PSCALLOC(mq->size); mp->rc = slrpc_bulkserver(rq, BULK_GET_SINK, SRMM_BULK_PORTAL, &iov, 1); if (mp->rc) goto out; /* Search for the peer information by the given site ID. */ site = libsl_siteid2site(mq->siteid); p = NULL; if (site) SITE_FOREACH_RES(site, res, i) if (res->res_type == SLREST_MDS) { p = res2rpmi(res)->rpmi_info; break; } if (p == NULL) { psclog_info("fail to find site ID %d", mq->siteid); PFL_GOTOERR(out, mp->rc = -EINVAL); } /* * Iterate through the namespace update buffer and apply updates. * If we fail to apply an update, we still report success to our * peer because reporting an error does not help our cause. */ entryp = iov.iov_base; for (i = 0; i < count; i++) { slm_rmm_apply_update(entryp); len = UPDATE_ENTRY_LEN(entryp); entryp = PSC_AGP(entryp, len); } zfsslash2_wait_synced(0); out: PSCFREE(iov.iov_base); return (mp->rc); }
void * pfl_heap_peekidx(struct pfl_heap *ph, int idx) { struct pfl_heap_entry *phe; void *p; if (idx >= ph->ph_nitems) return (NULL); p = ph->ph_base[idx]; phe = PSC_AGP(p, ph->ph_entoff); psc_assert(phe->phe_idx == idx); return (p); }
int pfl_memchk(const void *buf, int val, size_t len) { const char *p, *ep = PSC_AGP(buf, len); const uint64_t *ip; uint64_t ival; ival = ((uint64_t)val << 32) | val; for (ip = buf; (char *)(ip + 1) < ep; ip++) if (*ip != ival) return (0); for (p = (void *)ip; p < ep; p++) if (*p != val) return (0); return (1); }
acl_t pfl_acl_from_xattr(const void *buf, size_t size) { int i, entries; const struct acl_ea_header *h = buf; const struct acl_ea_entry *xe = PSC_AGP(h + 1, 0); unsigned int xperms; acl_permset_t permset; acl_entry_t e; acl_tag_t tag; acl_t a; if (size < sizeof(*h)) { errno = EINVAL; return (NULL); } if (le32toh(h->version) != ACL_EA_VERSION) { errno = EINVAL; return (NULL); } size -= sizeof(*h); if (size % sizeof(*xe)) { errno = EINVAL; return (NULL); } entries = size / sizeof(*xe); a = acl_init(entries); if (a == NULL) return (NULL); for (i = 0; i < entries; i++, xe++) { acl_create_entry(&a, &e); if (acl_get_permset(e, &permset) == -1) psclog_error("get_permset"); acl_clear_perms(permset); xperms = le16toh(xe->perm); if (xperms & ACL_READ) acl_add_perm(permset, ACL_READ); if (xperms & ACL_WRITE) acl_add_perm(permset, ACL_WRITE); if (xperms & ACL_EXECUTE) acl_add_perm(permset, ACL_EXECUTE); if (acl_set_permset(e, permset) == -1) psclog_error("set_permset"); acl_set_tag_type(e, tag = le16toh(xe->tag)); switch (tag) { case ACL_USER: { uid_t uid = le32toh(xe->id); acl_set_qualifier(e, &uid); break; } case ACL_GROUP: { gid_t gid = le32toh(xe->id); acl_set_qualifier(e, &gid); break; } } } return (a); }
/** * pjournal_format - Initialize an on-disk journal. * @fn: file path to store journal. * @nents: number of entries journal may contain. * @entsz: size of a journal entry. * Returns 0 on success, errno on error. */ void pjournal_format(const char *fn, uint32_t nents, uint32_t entsz, uint32_t rs, uint64_t uuid) { struct psc_journal_enthdr *pje; struct psc_journal pj; struct stat stb; unsigned char *jbuf; uint32_t i, j, slot; int rc, fd; ssize_t nb; if (nents % rs) psc_fatalx("number of slots (%u) should be a multiple of " "readsize (%u)", nents, rs); memset(&pj, 0, sizeof(struct psc_journal)); rc = 0; fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd == -1) psc_fatal("%s", fn); if (fstat(fd, &stb) == -1) psc_fatal("stat %s", fn); pj.pj_fd = fd; pj.pj_hdr = PSCALLOC(PSC_ALIGN(sizeof(struct psc_journal_hdr), stb.st_blksize)); pj.pj_hdr->pjh_entsz = entsz; pj.pj_hdr->pjh_nents = nents; pj.pj_hdr->pjh_version = PJH_VERSION; pj.pj_hdr->pjh_readsize = rs; pj.pj_hdr->pjh_iolen = PSC_ALIGN(sizeof(struct psc_journal_hdr), stb.st_blksize); pj.pj_hdr->pjh_magic = PJH_MAGIC; pj.pj_hdr->pjh_timestamp = time(NULL); pj.pj_hdr->pjh_fsuuid = uuid; psc_crc64_init(&pj.pj_hdr->pjh_chksum); psc_crc64_add(&pj.pj_hdr->pjh_chksum, pj.pj_hdr, offsetof(struct psc_journal_hdr, pjh_chksum)); psc_crc64_fini(&pj.pj_hdr->pjh_chksum); nb = pwrite(pj.pj_fd, pj.pj_hdr, pj.pj_hdr->pjh_iolen, 0); if ((size_t)nb != pj.pj_hdr->pjh_iolen) psc_fatalx("failed to write journal header: %s", nb == -1 ? strerror(errno) : "short write"); nb = PJ_PJESZ(&pj) * pj.pj_hdr->pjh_readsize; jbuf = psc_alloc(nb, PAF_PAGEALIGN); for (i = 0; i < rs; i++) { pje = PSC_AGP(jbuf, PJ_PJESZ(&pj) * i); pje->pje_magic = PJE_MAGIC; pje->pje_type = PJE_FORMAT; pje->pje_xid = PJE_XID_NONE; pje->pje_len = 0; psc_crc64_init(&pje->pje_chksum); psc_crc64_add(&pje->pje_chksum, pje, offsetof(struct psc_journal_enthdr, pje_chksum)); psc_crc64_add(&pje->pje_chksum, pje->pje_data, pje->pje_len); psc_crc64_fini(&pje->pje_chksum); } j = 0; /* XXX use an option to write only one entry in fast create mode */ for (slot = 0; slot < pj.pj_hdr->pjh_nents; slot += rs) { nb = pwrite(pj.pj_fd, jbuf, PJ_PJESZ(&pj) * rs, PJ_GETENTOFF(&pj, slot)); if ((size_t)nb != PJ_PJESZ(&pj) * rs) psc_fatal("failed to write slot %u (%zd)", slot, nb); if (verbose && slot % 262144 == 0) { printf("."); fflush(stdout); fsync(pj.pj_fd); if (++j == 80) { printf("\n"); j = 0; } } } if (verbose && j) printf("\n"); if (close(fd) == -1) psc_fatal("failed to close journal"); psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(&pj) * rs); psclog_info("journal %s formatted: %d slots, %d readsize, error=%d", fn, nents, rs, rc); }
/* * Initialize an on-disk journal. * @fn: file path to store journal. * @nents: number of entries journal may contain if non-zero. * @entsz: size of a journal entry. * @rs: read size. * Returns the number of entries created. */ uint32_t sl_journal_format(const char *fn, uint32_t nents, uint32_t entsz, uint32_t rs, uint64_t uuid, int block_dev) { uint32_t i, slot, max_nents; struct psc_journal_enthdr *pje; struct psc_journal pj; struct stat stb; unsigned char *jbuf; size_t numblocks; ssize_t nb; int fd; memset(&pj, 0, sizeof(pj)); fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd == -1) psc_fatal("%s", fn); if (fstat(fd, &stb) == -1) psc_fatal("stat %s", fn); /* * If the user does not specify nents, either use default or * based on the block device size. */ if (nents == 0 && !block_dev) nents = SLJ_MDS_JNENTS; if (block_dev) { if (ioctl(fd, BLKGETSIZE, &numblocks) == -1) err(1, "BLKGETSIZE: %s", fn); /* show progress, it is going to be a while */ verbose = 1; /* deal with large disks */ max_nents = MIN(numblocks, SLJ_MDS_MAX_JNENTS); /* leave room on both ends */ max_nents -= stb.st_blksize / SLJ_MDS_ENTSIZE + 16; /* efficiency */ max_nents = (max_nents / rs) * rs; if (nents) nents = MIN(nents, max_nents); else nents = max_nents; } if (nents % rs) psc_fatalx("number of slots (%u) should be a multiple of " "readsize (%u)", nents, rs); pj.pj_fd = fd; pj.pj_hdr = PSCALLOC(PSC_ALIGN(sizeof(struct psc_journal_hdr), stb.st_blksize)); pj.pj_hdr->pjh_entsz = entsz; pj.pj_hdr->pjh_nents = nents; pj.pj_hdr->pjh_version = PJH_VERSION; pj.pj_hdr->pjh_readsize = rs; pj.pj_hdr->pjh_iolen = PSC_ALIGN(sizeof(struct psc_journal_hdr), stb.st_blksize); pj.pj_hdr->pjh_magic = PJH_MAGIC; pj.pj_hdr->pjh_timestamp = time(NULL); pj.pj_hdr->pjh_fsuuid = uuid; psc_crc64_init(&pj.pj_hdr->pjh_chksum); psc_crc64_add(&pj.pj_hdr->pjh_chksum, pj.pj_hdr, offsetof(struct psc_journal_hdr, pjh_chksum)); psc_crc64_fini(&pj.pj_hdr->pjh_chksum); nb = pwrite(pj.pj_fd, pj.pj_hdr, pj.pj_hdr->pjh_iolen, 0); if ((size_t)nb != pj.pj_hdr->pjh_iolen) psc_fatalx("failed to write journal header: %s", nb == -1 ? strerror(errno) : "short write"); nb = PJ_PJESZ(&pj) * pj.pj_hdr->pjh_readsize; jbuf = psc_alloc(nb, PAF_PAGEALIGN); for (i = 0; i < rs; i++) { pje = PSC_AGP(jbuf, PJ_PJESZ(&pj) * i); pje->pje_magic = PJE_MAGIC; pje->pje_type = PJE_FORMAT; pje->pje_xid = PJE_XID_NONE; pje->pje_len = 0; psc_crc64_init(&pje->pje_chksum); psc_crc64_add(&pje->pje_chksum, pje, offsetof(struct psc_journal_enthdr, pje_chksum)); psc_crc64_add(&pje->pje_chksum, pje->pje_data, pje->pje_len); psc_crc64_fini(&pje->pje_chksum); } i = 0; /* XXX use an option to write only one entry in fast create mode */ for (slot = 0; slot < pj.pj_hdr->pjh_nents; slot += rs) { nb = pwrite(pj.pj_fd, jbuf, PJ_PJESZ(&pj) * rs, PJ_GETENTOFF(&pj, slot)); if ((size_t)nb != PJ_PJESZ(&pj) * rs) psc_fatal("failed to write slot %u (%zd)", slot, nb); if (verbose && slot % 262144 == 0) { printf("."); fflush(stdout); fsync(pj.pj_fd); if (++i == 80) { printf("\n"); i = 0; } } } if (verbose && i) printf("\n"); if (close(fd) == -1) psc_fatal("failed to close journal"); psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(&pj) * rs); return (nents); }