void slcfg_init_res(struct sl_resource *r) { struct resprof_mds_info *rpmi; struct sl_mds_peerinfo *sp; struct sl_mds_iosinfo *si; rpmi = res2rpmi(r); psc_mutex_init(&rpmi->rpmi_mutex); psc_waitq_init(&rpmi->rpmi_waitq, "rpmi"); if (r->res_type == SLREST_MDS) { rpmi->rpmi_info = sp = PSCALLOC(sizeof(*sp)); sp->sp_flags = SPF_NEED_JRNL_INIT; pfl_meter_init(&sp->sp_batchmeter, 0, "nsupd-%s", r->res_name); } else { rpmi->rpmi_info = si = PSCALLOC(sizeof(*si)); si->si_flags = SIF_NEED_JRNL_INIT; if (RES_ISFS(r)) pfl_meter_init(&si->si_batchmeter, 0, "reclaim-%s", r->res_name); if (r->res_flags & RESF_DISABLE_BIA) si->si_flags |= SIF_DISABLE_LEASE; } }
struct pfl_opstat * pfl_opstat_initf(int flags, const char *namefmt, ...) { struct pfl_opstat *opst; int sz, pos; va_list ap; char *name = pfl_opstat_name; spinlock(&pfl_opstats_lock); va_start(ap, namefmt); sz = vsnprintf(name, 128, namefmt, ap) + 1; va_end(ap); /* (gdb) p ((struct pfl_opstat *)pfl_opstats.pda_items[74]).opst_name */ pos = psc_dynarray_bsearch(&pfl_opstats, name, _pfl_opstat_cmp); if (pos < psc_dynarray_len(&pfl_opstats)) { opst = psc_dynarray_getpos(&pfl_opstats, pos); if (strcmp(name, opst->opst_name) == 0) { pfl_assert((flags & OPSTF_EXCL) == 0); freelock(&pfl_opstats_lock); return (opst); } } pfl_opstats_sum++; opst = PSCALLOC(sizeof(*opst) + sz); strlcpy(opst->opst_name, name, 128); opst->opst_flags = flags; psc_dynarray_splice(&pfl_opstats, pos, 0, &opst, 1); freelock(&pfl_opstats_lock); return (opst); }
/* * Pull POSIX ACLs from an fcmh via RPCs to MDS. */ acl_t slc_acl_get_fcmh(struct pscfs_req *pfr, const struct pscfs_creds *pcr, struct fidc_membh *f) { char trybuf[64] = { 0 }; void *buf = NULL; size_t retsz = 0; ssize_t rc; acl_t a; rc = slc_getxattr(pfr, pcr, ACL_EA_ACCESS, trybuf, sizeof(trybuf), f, &retsz); if (rc == 0) { buf = trybuf; } else if (rc == ERANGE) { buf = PSCALLOC(retsz); rc = slc_getxattr(pfr, pcr, ACL_EA_ACCESS, buf, retsz, f, &retsz); if (rc) { PSCFREE(buf); return (NULL); } } else return (NULL); a = pfl_acl_from_xattr(buf, retsz); if (buf != trybuf) PSCFREE(buf); return (a); }
/* * Handle a NAMESPACE_UPDATE request from another MDS. */ int slm_rmm_handle_namespace_update(struct pscrpc_request *rq) { struct srt_update_entry *entryp; struct srm_update_req *mq; struct srm_update_rep *mp; struct sl_mds_peerinfo *p; struct sl_resource *res; struct sl_site *site; struct iovec iov; int i, len, count; SL_RSX_ALLOCREP(rq, mq, mp); count = mq->count; if (count <= 0 || mq->size > LNET_MTU) { mp->rc = -EINVAL; return (mp->rc); } iov.iov_len = mq->size; iov.iov_base = PSCALLOC(mq->size); mp->rc = slrpc_bulkserver(rq, BULK_GET_SINK, SRMM_BULK_PORTAL, &iov, 1); if (mp->rc) goto out; /* Search for the peer information by the given site ID. */ site = libsl_siteid2site(mq->siteid); p = NULL; if (site) SITE_FOREACH_RES(site, res, i) if (res->res_type == SLREST_MDS) { p = res2rpmi(res)->rpmi_info; break; } if (p == NULL) { psclog_info("fail to find site ID %d", mq->siteid); PFL_GOTOERR(out, mp->rc = -EINVAL); } /* * Iterate through the namespace update buffer and apply updates. * If we fail to apply an update, we still report success to our * peer because reporting an error does not help our cause. */ entryp = iov.iov_base; for (i = 0; i < count; i++) { slm_rmm_apply_update(entryp); len = UPDATE_ENTRY_LEN(entryp); entryp = PSC_AGP(entryp, len); } zfsslash2_wait_synced(0); out: PSCFREE(iov.iov_base); return (mp->rc); }
void pfl_odt_allocitem(struct pfl_odt *t, void **pp) { struct pfl_odt_hdr *h; void **p = (void **)pp; h = t->odt_hdr; *p = PSCALLOC(h->odth_itemsz); }
void * pfl_memdup(const void *p, size_t len) { void *d; d = PSCALLOC(len); memcpy(d, p, len); return (d); }
struct wok_module * mod_load(const char *path, const char *opts, char *errbuf, size_t errlen) { int (*loadf)(struct pscfs *); struct wok_module *wm; void *h; int rc; h = dlopen(path, RTLD_NOW); if (h == NULL) { snprintf(errbuf, LINE_MAX, "%s\n", dlerror()); fprintf(stderr, errbuf); return (NULL); } loadf = dlsym(h, "pscfs_module_load"); if (loadf == NULL) { dlclose(h); snprintf(errbuf, LINE_MAX, "symbol pscfs_module_load undefined.\n"); fprintf(stderr, errbuf); return (NULL); } wm = PSCALLOC(sizeof(*wm)); wm->wm_path = pfl_strdup(path); wm->wm_handle = h; wm->wm_opts = pfl_strdup(opts); wm->wm_module.pf_private = wm; pflfs_module_init(&wm->wm_module, opts); rc = loadf(&wm->wm_module); /* * XXX XXX XXX * This is a complete hack but this flush somehow avoids a bunch * of zeroes from ending up in the log... * XXX XXX XXX */ fflush(stderr); if (rc) { wm->wm_module.pf_handle_destroy = NULL; pflfs_module_destroy(&wm->wm_module); dlclose(h); PSCFREE(wm->wm_path); PSCFREE(wm); psclog_warnx("module failed to load: rc=%d module=%s", rc, path); strlcpy(errbuf, strerror(rc), errlen); return (NULL); } return (wm); }
void pfl_odt_load(struct pfl_odt **tp, struct pfl_odt_ops *odtops, int oflg, const char *fn, const char *fmt, ...) { struct pfl_odt_hdr *h; struct pfl_odt *t; uint64_t crc; va_list ap; *tp = t = PSCALLOC(sizeof(*t)); t->odt_ops = *odtops; INIT_SPINLOCK(&t->odt_lock); INIT_PSC_LISTENTRY(&t->odt_lentry); va_start(ap, fmt); vsnprintf(t->odt_name, sizeof(t->odt_name), fmt, ap); va_end(ap); t->odt_iostats.rd = pfl_opstat_init("odt-%s-rd", t->odt_name); t->odt_iostats.wr = pfl_opstat_init("odt-%s-wr", t->odt_name); h = t->odt_hdr = PSCALLOC(sizeof(*h)); /* pfl_odt_open() and slm_odt_open() */ odtops->odtop_open(t, fn, oflg); psc_crc64_calc(&crc, t->odt_hdr, sizeof(*t->odt_hdr) - sizeof(t->odt_hdr->odth_crc)); pfl_assert(h->odth_crc == crc); t->odt_bitmap = psc_vbitmap_newf(h->odth_nitems, PVBF_AUTO); pfl_assert(t->odt_bitmap); /* * Skip the first slot, so that we can detect whether we have * assigned a lease easily. */ psc_vbitmap_set(t->odt_bitmap, 0); PFLOG_ODT(PLL_DIAG, t, "loaded"); pll_add(&pfl_odtables, t); }
struct slab * slab_alloc(void) { struct slab *slb; slb = psc_pool_get(slab_pool); slb->slb_base = PSCALLOC(SLASH_SLVR_SIZE); INIT_LISTENTRY(&slb->slb_mgmt_lentry); return (slb); }
int main(int argc, char *argv[]) { size_t sz; void *p; pfl_init(); if (getopt(argc, argv, "") != -1) usage(); argc -= optind; if (argc) usage(); p = PSCALLOC(213); p = psc_realloc(p, 65536, 0); p = psc_realloc(p, 0, 0); p = psc_realloc(p, 128, 0); p = psc_realloc(p, 0, 0); PSCFREE(p); p = PSCALLOC(128); PSCFREE(p); p = psc_alloc(24, PAF_PAGEALIGN); psc_free(p, PAF_PAGEALIGN); p = PSCALLOC(24); p = psc_realloc(p, 128, 0); PSCFREE(p); p = psc_alloc(8, PAF_LOCK); *(uint64_t *)p = 0; psc_free(p, PAF_LOCK, (size_t)8); sz = 1024; p = psc_alloc(sz, PAF_LOCK | PAF_PAGEALIGN); memset(p, 0, sz); psc_free(p, PAF_LOCK | PAF_PAGEALIGN, sz); exit(0); }
int pfl_odt_create(const char *fn, int64_t nitems, size_t itemsz, int overwrite, size_t startoff, size_t pad, int tflg) { int rc; int64_t item; struct pfl_odt_slotftr f; struct pfl_odt_hdr *h; struct pfl_odt *t; t = PSCALLOC(sizeof(*t)); t->odt_ops = pfl_odtops; INIT_SPINLOCK(&t->odt_lock); snprintf(t->odt_name, sizeof(t->odt_name), "%s", pfl_basename(fn)); t->odt_iostats.rd = pfl_opstat_init("odt-%s-rd", t->odt_name); t->odt_iostats.wr = pfl_opstat_init("odt-%s-wr", t->odt_name); h = PSCALLOC(sizeof(*h)); memset(h, 0, sizeof(*h)); h->odth_nitems = nitems; h->odth_itemsz = itemsz; h->odth_slotsz = itemsz + pad + sizeof(f); h->odth_options = tflg; h->odth_start = startoff; t->odt_hdr = h; psc_crc64_calc(&h->odth_crc, h, sizeof(*h) - sizeof(h->odth_crc)); /* pfl_odt_new() and slm_odt_new() */ rc = t->odt_ops.odtop_new(t, fn, overwrite); if (rc) return (rc); for (item = 0; item < nitems; item++) _pfl_odt_doput(t, item, NULL, &f, 0); PFLOG_ODT(PLL_DIAG, t, "created"); pfl_odt_release(t); return (0); }
struct psc_streenode * psc_stree_addchild(struct psc_streenode *ptn, void *data) { struct psc_streenode *child; child = PSCALLOC(sizeof(*child)); INIT_PSCLIST_HEAD(&child->ptn_children); INIT_PSC_LISTENTRY(&child->ptn_sibling); child->ptn_data = data; psclist_add(&child->ptn_sibling, &ptn->ptn_children); return (child); }
/** * pflnet_getifaddrs - Acquire list of network interface addresses. * @ifap: value-result base of addresses array, must be when finished. */ int pflnet_getifaddrs(struct ifaddrs **ifap) { #ifdef HAVE_GETIFADDRS return (getifaddrs(ifap)); #else int nifs, rc, s, n; struct ifconf ifc; struct ifreq *ifr; s = socket(AF_INET, SOCK_DGRAM, 0); if (s == -1) psc_fatal("socket"); ifc.ifc_buf = NULL; rc = ioctl(s, SIOCGIFCONF, &ifc); if (rc == -1) psc_fatal("ioctl SIOCGIFCONF"); /* * If an interface is being added while we are fetching, * there is no way to determine that we didn't get them * all with this API. */ ifc.ifc_buf = PSCALLOC(ifc.ifc_len); rc = ioctl(s, SIOCGIFCONF, &ifc); if (rc == -1) psc_fatal("ioctl SIOCGIFCONF"); close(s); nifs = ifc.ifc_len / sizeof(*ifr); *ifap = PSCALLOC(sizeof(**ifap) * nifs); ifr = (void *)ifc.ifc_buf; for (n = 0; n < nifs; n+++, ifr++) memcpy(*ifap + n, &ifr->ifr_addr, sizeof(**ifap)); #endif return (0); }
void pfl_odt_getslot(struct pfl_odt *t, int64_t n, void *pp, struct pfl_odt_slotftr **fp) { struct pfl_odt_hdr *h; void **p = (void **)pp; h = t->odt_hdr; pfl_assert(n <= h->odth_nitems - 1); if (p) *p = PSCALLOC(h->odth_itemsz); if (fp) *fp = PSCALLOC(sizeof(**fp)); /* pfl_odt_read or slm_odt_read */ t->odt_ops.odtop_read(t, n, p ? *p : NULL, fp ? *fp : NULL); pfl_opstat_add(t->odt_iostats.rd, h->odth_slotsz); ODT_STAT_INCR(t, read); }
struct psc_streenode * psc_stree_addchild_sorted(struct psc_streenode *ptn, void *data, int (*cmpf)(const void *, const void *), off_t offset) { struct psc_streenode *child; child = PSCALLOC(sizeof(*child)); INIT_PSCLIST_HEAD(&child->ptn_children); INIT_PSC_LISTENTRY(&child->ptn_sibling); child->ptn_data = data; psclist_add_sorted(&ptn->ptn_children, &child->ptn_sibling, cmpf, offset); return (child); }
void push_filter(struct psc_dynarray *da, char *s, int type) { struct { const char *name; const char *abbr; int type; } *ty, types[] = { { "clear", "!", FPT_CLEAR }, { "dir-merge", ":", FPT_DIRMERGE }, { "exclude", "-", FPT_EXCL }, { "hide", "H", FPT_HIDE }, { "include", "+", FPT_INCL }, { "merge", ".", FPT_MERGE }, { "protect", "P", FPT_PROTECT }, { "risk", "R", FPT_RISK }, { "show", "S", FPT_SHOW } }; struct filterpattern *fp; char *sty, *sep; int n; fp = PSCALLOC(sizeof(*fp)); if (type) { fp->fp_type = type; fp->fp_pat = s; } else { for (sty = s; *s && !isspace(*s); s++) ; while (isspace(*s)) s++; if (*s == '\0') psync_fatal("invalid format"); sep = strchr(sty, ','); if (sep) *sep = '\0'; for (n = 0, ty = types; n < nitems(types); ty++, n++) if (strcmp(ty->name, sty) == 0 || strcmp(ty->abbr, sty) == 0) break; if (n == nitems(types)) psync_fatal("invalid format"); fp->fp_type = ty->type; fp->fp_pat = s; } push(da, fp); }
void pfl_opstats_grad_init(struct pfl_opstats_grad *og, int flags, int64_t *buckets, int nbuckets, const char *fmt, ...) { const char *lower_suffix = "", *upper_suffix = ""; int64_t lower_bound, upper_bound; struct pfl_opstat_bucket *ob; char label[16]; int rc, i; og->og_buckets = PSCALLOC(nbuckets * sizeof(og->og_buckets[0])); og->og_nbuckets = nbuckets; for (i = 0, ob = og->og_buckets; i < nbuckets; i++, ob++) { if (i) pfl_assert(buckets[i - 1] < buckets[i]); else pfl_assert(buckets[i] == 0); lower_bound = buckets[i]; if (!(flags & OPSTF_BASE10)) lower_suffix = _pfl_opstats_base2_suffix( &lower_bound); if (i == nbuckets - 1) { rc = snprintf(label, sizeof(label), "%d:>=%"PRId64"%.1s", i, lower_bound, lower_suffix); } else { upper_bound = buckets[i + 1]; if (!(flags & OPSTF_BASE10)) upper_suffix = _pfl_opstats_base2_suffix( &upper_bound); rc = snprintf(label, sizeof(label), "%d:%"PRId64"%.1s-<%"PRId64"%.1s", i, lower_bound, lower_suffix, upper_bound, upper_suffix); } if (rc == -1) psc_fatal("snprintf"); ob->ob_lower_bound = buckets[i]; ob->ob_opst = pfl_opstat_initf(flags | OPSTF_BASE10, fmt, label); } }
__static int pflnet_rtexists_sysctl(const struct sockaddr *sa) { union { struct rt_msghdr *rtm; char *ch; void *p; } u; union pfl_sockaddr_ptr s, os; int rc = 0, mib[6]; char *buf = NULL; size_t len; os.cp = sa; mib[0] = CTL_NET; mib[1] = PF_ROUTE; mib[2] = 0; /* protocol */ mib[3] = AF_INET; mib[4] = NET_RT_DUMP; mib[5] = 0; /* no flags */ if (sysctl(mib, nitems(mib), NULL, &len, NULL, 0) == -1) psc_fatal("route-sysctl-estimate"); if (len) { buf = PSCALLOC(len); if (sysctl(mib, nitems(mib), buf, &len, NULL, 0) == -1) psc_fatal("actual retrieval of routing table"); } for (u.p = buf; u.ch && u.ch < buf + len; u.ch += u.rtm->rtm_msglen) { if (u.rtm->rtm_version != RTM_VERSION) continue; s.p = u.rtm + 1; if (s.s->sin.sin_addr.s_addr == os.s->sin.sin_addr.s_addr) { rc = 1; break; } } PSCFREE(buf); return (rc); }
int pfl_vasprintf(char **p, const char *fmt, va_list ap) { va_list apd; int sz; va_copy(apd, ap); sz = vsnprintf(NULL, 0, fmt, ap); psc_assert(sz != -1); sz++; *p = PSCALLOC(sz); vsnprintf(*p, sz, fmt, apd); va_end(apd); return (sz); }
void pfl_register_errno(int code, const char *str) { struct pfl_errno *e; uint64_t q; q = code; e = psc_hashtbl_search(&pfl_errno_hashtbl, &q); if (e) { pfl_assert(e->code == q); pfl_assert(strcmp(e->str, str) == 0); return; } e = PSCALLOC(sizeof(*e)); e->code = q; e->str = pfl_strdup(str); psc_hashent_init(&pfl_errno_hashtbl, e); psc_hashtbl_add_item(&pfl_errno_hashtbl, e); }
int mds_inode_update(int vfsid, struct slash_inode_handle *ih, int old_version) { char fn[NAME_MAX + 1]; struct sl_ino_compat *sic; struct fidc_membh *f; struct srt_stat sstb; void *h = NULL, *th; int rc; sic = &sl_ino_compat_table[old_version]; rc = sic->sic_read_ino(ih); if (rc) return (rc); DEBUG_INOH(PLL_INFO, ih, "updating old inode (v %d)", old_version); f = inoh_2_fcmh(ih); snprintf(fn, sizeof(fn), "%016"PRIx64".update", fcmh_2_fid(f)); rc = mdsio_opencreatef(vfsid, mds_tmpdir_inum[vfsid], &rootcreds, O_RDWR | O_CREAT | O_TRUNC, MDSIO_OPENCRF_NOLINK, 0644, fn, NULL, NULL, &h, NULL, NULL, 0); if (rc) PFL_GOTOERR(out, rc); psc_assert(ih->inoh_extras == NULL); ih->inoh_extras = PSCALLOC(INOX_SZ); /* convert old structures into new into temp file */ rc = sic->sic_read_inox(ih); if (rc) PFL_GOTOERR(out, rc); th = inoh_2_mfhp(ih)->fh; inoh_2_mfhp(ih)->fh = h; rc = mds_inode_dump(vfsid, sic, ih, th); inoh_2_mfhp(ih)->fh = th; if (rc) PFL_GOTOERR(out, rc); /* move new structures to inode meta file */ memset(&sstb, 0, sizeof(sstb)); rc = mdsio_setattr(vfsid, 0, &sstb, SL_SETATTRF_METASIZE, &rootcreds, NULL, th, NULL); if (rc) PFL_GOTOERR(out, rc); // mdsio_rename(mds_tmpdir_inum, NULL, fn, &rootcreds, NULL); rc = mds_inode_dump(vfsid, NULL, ih, h); if (rc) PFL_GOTOERR(out, rc); mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn, &rootcreds, NULL, NULL); out: if (h) mdsio_release(vfsid, &rootcreds, h); if (rc) { mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn, &rootcreds, NULL, NULL); DEBUG_INOH(PLL_ERROR, ih, "error updating old inode " "rc=%d", rc); } return (rc); }
/** * pjournal_format - Initialize an on-disk journal. * @fn: file path to store journal. * @nents: number of entries journal may contain. * @entsz: size of a journal entry. * Returns 0 on success, errno on error. */ void pjournal_format(const char *fn, uint32_t nents, uint32_t entsz, uint32_t rs, uint64_t uuid) { struct psc_journal_enthdr *pje; struct psc_journal pj; struct stat stb; unsigned char *jbuf; uint32_t i, j, slot; int rc, fd; ssize_t nb; if (nents % rs) psc_fatalx("number of slots (%u) should be a multiple of " "readsize (%u)", nents, rs); memset(&pj, 0, sizeof(struct psc_journal)); rc = 0; fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd == -1) psc_fatal("%s", fn); if (fstat(fd, &stb) == -1) psc_fatal("stat %s", fn); pj.pj_fd = fd; pj.pj_hdr = PSCALLOC(PSC_ALIGN(sizeof(struct psc_journal_hdr), stb.st_blksize)); pj.pj_hdr->pjh_entsz = entsz; pj.pj_hdr->pjh_nents = nents; pj.pj_hdr->pjh_version = PJH_VERSION; pj.pj_hdr->pjh_readsize = rs; pj.pj_hdr->pjh_iolen = PSC_ALIGN(sizeof(struct psc_journal_hdr), stb.st_blksize); pj.pj_hdr->pjh_magic = PJH_MAGIC; pj.pj_hdr->pjh_timestamp = time(NULL); pj.pj_hdr->pjh_fsuuid = uuid; psc_crc64_init(&pj.pj_hdr->pjh_chksum); psc_crc64_add(&pj.pj_hdr->pjh_chksum, pj.pj_hdr, offsetof(struct psc_journal_hdr, pjh_chksum)); psc_crc64_fini(&pj.pj_hdr->pjh_chksum); nb = pwrite(pj.pj_fd, pj.pj_hdr, pj.pj_hdr->pjh_iolen, 0); if ((size_t)nb != pj.pj_hdr->pjh_iolen) psc_fatalx("failed to write journal header: %s", nb == -1 ? strerror(errno) : "short write"); nb = PJ_PJESZ(&pj) * pj.pj_hdr->pjh_readsize; jbuf = psc_alloc(nb, PAF_PAGEALIGN); for (i = 0; i < rs; i++) { pje = PSC_AGP(jbuf, PJ_PJESZ(&pj) * i); pje->pje_magic = PJE_MAGIC; pje->pje_type = PJE_FORMAT; pje->pje_xid = PJE_XID_NONE; pje->pje_len = 0; psc_crc64_init(&pje->pje_chksum); psc_crc64_add(&pje->pje_chksum, pje, offsetof(struct psc_journal_enthdr, pje_chksum)); psc_crc64_add(&pje->pje_chksum, pje->pje_data, pje->pje_len); psc_crc64_fini(&pje->pje_chksum); } j = 0; /* XXX use an option to write only one entry in fast create mode */ for (slot = 0; slot < pj.pj_hdr->pjh_nents; slot += rs) { nb = pwrite(pj.pj_fd, jbuf, PJ_PJESZ(&pj) * rs, PJ_GETENTOFF(&pj, slot)); if ((size_t)nb != PJ_PJESZ(&pj) * rs) psc_fatal("failed to write slot %u (%zd)", slot, nb); if (verbose && slot % 262144 == 0) { printf("."); fflush(stdout); fsync(pj.pj_fd); if (++j == 80) { printf("\n"); j = 0; } } } if (verbose && j) printf("\n"); if (close(fd) == -1) psc_fatal("failed to close journal"); psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(&pj) * rs); psclog_info("journal %s formatted: %d slots, %d readsize, error=%d", fn, nents, rs, rc); }
/** * pjournal_dump - Dump the contents of a journal file. * @fn: journal filename to query. * @verbose: whether to report stats summary or full dump. * * Each time mds restarts, it writes log entries starting from the very * first slot of the log. Anyway, the function dumps all log entries, * some of them may be from previous incarnations of the MDS. */ void pjournal_dump(const char *fn) { int i, ntotal, nmagic, nchksum, nformat, ndump, first = 1; uint32_t slot, highest_slot = -1, lowest_slot = -1; uint64_t chksum, highest_xid = 0, lowest_xid = 0; struct psc_journal_enthdr *pje; struct psc_journal_hdr *pjh; struct psc_journal *pj; struct stat statbuf; unsigned char *jbuf; ssize_t nb, pjhlen; time_t ts; ntotal = nmagic = nchksum = nformat = ndump = 0; pj = PSCALLOC(sizeof(*pj)); strlcpy(pj->pj_name, pfl_basename(fn), sizeof(pj->pj_name)); pj->pj_fd = open(fn, O_RDWR | O_DIRECT); if (pj->pj_fd == -1) psc_fatal("failed to open journal %s", fn); if (fstat(pj->pj_fd, &statbuf) == -1) psc_fatal("failed to stat journal %s", fn); /* * O_DIRECT may impose alignment restrictions so align the * buffer and perform I/O in multiples of file system block * size. */ pjhlen = PSC_ALIGN(sizeof(*pjh), statbuf.st_blksize); pjh = psc_alloc(pjhlen, PAF_PAGEALIGN); nb = pread(pj->pj_fd, pjh, pjhlen, 0); if (nb != pjhlen) psc_fatal("failed to read journal header"); pj->pj_hdr = pjh; if (pjh->pjh_magic != PJH_MAGIC) psc_fatalx("journal header has a bad magic number " "%#"PRIx64, pjh->pjh_magic); if (pjh->pjh_version != PJH_VERSION) psc_fatalx("journal header has an invalid version " "number %d", pjh->pjh_version); psc_crc64_init(&chksum); psc_crc64_add(&chksum, pjh, offsetof(struct psc_journal_hdr, pjh_chksum)); psc_crc64_fini(&chksum); if (pjh->pjh_chksum != chksum) psc_fatalx("journal header has an invalid checksum " "value %"PSCPRIxCRC64" vs %"PSCPRIxCRC64, pjh->pjh_chksum, chksum); if (S_ISREG(statbuf.st_mode) && statbuf.st_size != (off_t)(pjhlen + pjh->pjh_nents * PJ_PJESZ(pj))) psc_fatalx("size of the journal log %"PSCPRIdOFFT"d does " "not match specs in its header", statbuf.st_size); if (pjh->pjh_nents % pjh->pjh_readsize) psc_fatalx("number of entries %d is not a multiple of the " "readsize %d", pjh->pjh_nents, pjh->pjh_readsize); ts = pjh->pjh_timestamp; printf("%s:\n" " version: %u\n" " entry size: %u\n" " number of entries: %u\n" " batch read size: %u\n" " entry start offset: %"PRId64"\n" " format time: %s" " uuid: %"PRIx64"\n" " %4s %3s %4s %4s %s\n", fn, pjh->pjh_version, PJ_PJESZ(pj), pjh->pjh_nents, pjh->pjh_readsize, pjh->pjh_start_off, ctime(&ts), pjh->pjh_fsuuid, "idx", "typ", "xid", "txg", "details"); jbuf = psc_alloc(PJ_PJESZ(pj) * pj->pj_hdr->pjh_readsize, PAF_PAGEALIGN); for (slot = 0; slot < pjh->pjh_nents; slot += pjh->pjh_readsize) { nb = pread(pj->pj_fd, jbuf, PJ_PJESZ(pj) * pjh->pjh_readsize, PJ_GETENTOFF(pj, slot)); if (nb != PJ_PJESZ(pj) * pjh->pjh_readsize) warn("failed to read %d log entries at slot %d", pjh->pjh_readsize, slot); for (i = 0; i < pjh->pjh_readsize; i++) { ntotal++; pje = (void *)&jbuf[PJ_PJESZ(pj) * i]; if (pje->pje_magic != PJE_MAGIC) { nmagic++; warnx("journal slot %d has a bad magic" "number", slot + i); continue; } /* * If we hit a new entry that is never used, we * assume that the rest of the journal is never * used. */ if (pje->pje_type == PJE_FORMAT) { nformat = nformat + pjh->pjh_nents - (slot + i); goto done; } psc_crc64_init(&chksum); psc_crc64_add(&chksum, pje, offsetof( struct psc_journal_enthdr, pje_chksum)); psc_crc64_add(&chksum, pje->pje_data, pje->pje_len); psc_crc64_fini(&chksum); if (pje->pje_chksum != chksum) { nchksum++; warnx("journal slot %d has a corrupt " "checksum", slot + i); goto done; } ndump++; if (verbose) pjournal_dump_entry(slot + i, pje); if (first) { first = 0; highest_xid = lowest_xid = pje->pje_xid; highest_slot = lowest_slot = slot + i; continue; } if (highest_xid < pje->pje_xid) { highest_xid = pje->pje_xid; highest_slot = slot + i; } if (lowest_xid > pje->pje_xid) { lowest_xid = pje->pje_xid; lowest_slot = slot + i; } } } done: if (close(pj->pj_fd) == -1) printf("failed closing journal %s", fn); psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(pj)); PSCFREE(pj); printf("----------------------------------------------\n" "%8d slot(s) scanned\n" "%8d in use\n" "%8d formatted\n" "%8d bad magic\n" "%8d bad checksum(s)\n" "lowest transaction ID=%#"PRIx64" (slot=%d)\n" "highest transaction ID=%#"PRIx64" (slot=%d)\n", ntotal, ndump, nformat, nmagic, nchksum, lowest_xid, lowest_slot, highest_xid, highest_slot); }
int main(int argc, char *argv[]) { struct thr *thr; pthread_t pthr; int c, rc, i; pfl_init(); progname = argv[0]; while ((c = getopt(argc, argv, "i:n:")) != -1) switch (c) { case 'i': niter = atoi(optarg); break; case 'n': nthr = atoi(optarg); break; default: usage(); } argc -= optind; if (argc) usage(); psc_assert(psc_atomic64_read(&v64) == UINT64_C(100000000000)); TEST(psc_atomic64, set, &v64, &v64, UINT64_C(2000000000000), UINT64_C(2000000000000)); TEST(psc_atomic64, add, &v64, &v64, 15, UINT64_C(2000000000015)); TEST(psc_atomic64, sub, &v64, &v64, 9, UINT64_C(2000000000006)); TEST1(psc_atomic64, inc, &v64, UINT64_C(2000000000007)); TEST1(psc_atomic64, dec, &v64, UINT64_C(2000000000006)); psc_atomic16_set(&v16, 2); TEST(psc_atomic16, set, &v16, &v16, 200, 200); TEST(psc_atomic16, add, &v16, &v16, 15, 215); TEST(psc_atomic16, sub, &v16, &v16, 9, 206); TEST1(psc_atomic16, inc, &v16, 207); TEST1(psc_atomic16, dec, &v16, 206); TEST1V(psc_atomic16, dec_and_test0, &v16, 205, 0); TEST(psc_atomic16, set, &v16, &v16, 1, 1); TEST1V(psc_atomic16, dec_and_test0, &v16, 0, 1); TEST(psc_atomic16, setmask, &v16, &v16, 0x75, 0x75); TEST(psc_atomic16, clearmask, &v16, &v16, 0x41, 0x34); TEST(psc_atomic16, set, &v16, &v16, 0, 0); psc_atomic32_set(&v32, 2); TEST(psc_atomic32, set, &v32, &v32, 200, 200); TEST(psc_atomic32, add, &v32, &v32, 15, 215); TEST(psc_atomic32, sub, &v32, &v32, 9, 206); TEST1(psc_atomic32, inc, &v32, 207); TEST1(psc_atomic32, dec, &v32, 206); TEST1V(psc_atomic32, dec_and_test0, &v32, 205, 0); TEST(psc_atomic32, set, &v32, &v32, 1, 1); TEST1V(psc_atomic32, dec_and_test0, &v32, 0, 1); TEST(psc_atomic32, setmask, &v32, &v32, 0x75, 0x75); TEST(psc_atomic32, clearmask, &v32, &v32, 0x41, 0x34); TEST(psc_atomic32, set, &v32, &v32, 0, 0); psc_atomic64_set(&v64, 2); TEST(psc_atomic64, set, &v64, &v64, 200, 200); TEST(psc_atomic64, add, &v64, &v64, 15, 215); TEST(psc_atomic64, sub, &v64, &v64, 9, 206); TEST1(psc_atomic64, inc, &v64, 207); TEST1(psc_atomic64, dec, &v64, 206); TEST1V(psc_atomic64, dec_and_test0, &v64, 205, 0); TEST(psc_atomic64, set, &v64, &v64, 1, 1); TEST1V(psc_atomic64, dec_and_test0, &v64, 0, 1); TEST(psc_atomic64, setmask, &v64, &v64, 0x75, 0x75); TEST(psc_atomic64, clearmask, &v64, &v64, 0x41, 0x34); TEST(psc_atomic64, set, &v64, &v64, 0, 0); TEST1(psc_atomic16, inc, &v16, 1); TEST1V(psc_atomic16, dec_and_test0, &v16, 0, 1); rc = pthread_barrier_init(&barrier, NULL, nthr + 1); if (rc) psc_fatalx("pthread_barrier_init: %s", strerror(rc)); for (i = 0; i < nthr; i++) { thr = PSCALLOC(sizeof(*thr)); thr->pos = i; rc = pthread_create(&pthr, NULL, startf, thr); if (rc) psc_fatalx("pthread_create: %s", strerror(rc)); } pthread_barrier_wait(&barrier); pthread_barrier_wait(&barrier); exit(0); }
/* * Traverse a file hierarchy and perform an operation on each file * system entry. * @fn: file root. * @flags: behavorial flags. * @cmpf: optional dirent comparator for ordering. * @cbf: callback to invoke on each file. * @arg: optional argument to supply to callback. * Notes: the callback will be invoked with a fully resolved absolute * path name unless the file in question is a symbolic link. */ int pfl_filewalk(const char *fn, int flags, void *cmpf, int (*cbf)(FTSENT *, void *), void *arg) { char * const pathv[] = { (char *)fn, NULL }; int rc = 0, f_flags = 0; struct stat stb; FTSENT *f; FTS *fp; if (flags & PFL_FILEWALKF_RECURSIVE) { if (flags & PFL_FILEWALKF_NOSTAT) f_flags |= FTS_NOSTAT; if (flags & PFL_FILEWALKF_NOCHDIR) f_flags |= FTS_NOCHDIR; fp = pfl_fts_open(pathv, f_flags | FTS_COMFOLLOW | FTS_PHYSICAL, cmpf); if (fp == NULL) psc_fatal("fts_open %s", fn); while ((f = pfl_fts_read(fp)) != NULL) { switch (f->fts_info) { case FTS_NS: psclog_warnx("%s: %s", f->fts_path, strerror(f->fts_errno)); break; case FTS_F: case FTS_D: case FTS_SL: if (flags & PFL_FILEWALKF_VERBOSE) warnx("processing %s%s", fn, f->fts_info == FTS_D ? "/" : ""); case FTS_DP: rc = cbf(f, arg); if (rc) { pfl_fts_close(fp); return (rc); } break; default: if (f->fts_errno == 0) f->fts_errno = EOPNOTSUPP; psclog_warnx("%s: %s", f->fts_path, strerror(f->fts_errno)); break; } } pfl_fts_close(fp); } else { const char *basefn; size_t baselen; if (lstat(fn, &stb) == -1) err(1, "%s", fn); basefn = pfl_basename(fn); baselen = strlen(basefn); f = PSCALLOC(sizeof(*f) + baselen); f->fts_accpath = (char *)fn; f->fts_path = (char *)fn; f->fts_pathlen = strlen(fn); strlcpy(f->fts_name, basefn, baselen + 1); f->fts_namelen = baselen; f->fts_ino = stb.st_ino; f->fts_statp = &stb; switch (stb.st_mode & S_IFMT) { case S_IFDIR: f->fts_info = FTS_D; break; case S_IFREG: f->fts_info = FTS_F; break; case S_IFLNK: f->fts_info = FTS_SL; break; case S_IFBLK: f->fts_info = FTS_DEFAULT; break; default: psclog_warnx("%s: %s", fn, strerror(EOPNOTSUPP)); break; } rc = cbf(f, arg); PSCFREE(f); } return (rc); }
int mds_inode_update_interrupted(int vfsid, struct slash_inode_handle *ih, int *rc) { char fn[NAME_MAX + 1]; struct srt_stat sstb; struct iovec iovs[2]; uint64_t crc, od_crc; void *h = NULL, *th; mdsio_fid_t inum; int exists = 0; size_t nb; th = inoh_2_mfh(ih); snprintf(fn, sizeof(fn), "%016"PRIx64".update", inoh_2_fid(ih)); *rc = mdsio_lookup(vfsid, mds_tmpdir_inum[vfsid], fn, &inum, &rootcreds, NULL); if (*rc) PFL_GOTOERR(out, *rc); *rc = mdsio_opencreatef(vfsid, inum, &rootcreds, O_RDONLY, MDSIO_OPENCRF_NOLINK, 0644, NULL, NULL, NULL, &h, NULL, NULL, 0); if (*rc) PFL_GOTOERR(out, *rc); iovs[0].iov_base = &ih->inoh_ino; iovs[0].iov_len = sizeof(ih->inoh_ino); iovs[1].iov_base = &od_crc; iovs[1].iov_len = sizeof(od_crc); *rc = mdsio_preadv(vfsid, &rootcreds, iovs, nitems(iovs), &nb, 0, h); if (*rc) PFL_GOTOERR(out, *rc); psc_crc64_calc(&crc, &ih->inoh_ino, sizeof(ih->inoh_ino)); if (crc != od_crc) { *rc = PFLERR_BADCRC; PFL_GOTOERR(out, *rc); } exists = 1; psc_assert(ih->inoh_extras == NULL); ih->inoh_extras = PSCALLOC(INOX_SZ); inoh_2_mfh(ih) = h; *rc = mds_inox_ensure_loaded(ih); if (*rc) PFL_GOTOERR(out, *rc); inoh_2_mfh(ih) = th; memset(&sstb, 0, sizeof(sstb)); *rc = mdsio_setattr(vfsid, 0, &sstb, SL_SETATTRF_METASIZE, &rootcreds, NULL, th, NULL); if (*rc) PFL_GOTOERR(out, *rc); *rc = mds_inode_dump(vfsid, NULL, ih, h); if (*rc) PFL_GOTOERR(out, *rc); mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn, &rootcreds, NULL, NULL); out: if (h) mdsio_release(vfsid, &rootcreds, h); if (*rc) mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn, &rootcreds, NULL, NULL); inoh_2_mfh(ih) = th; return (exists); }
void visit(__unusedx void *data, struct pfl_odt_receipt *r, void *arg) { char buf[LINE_MAX], *p = data; struct pfl_odt **t = arg; static int shown_hdr; union { int *d; int64_t *q; void *p; } u; size_t i; if (num_free) { struct pfl_odt_receipt *rdup; rdup = PSCALLOC(sizeof(*rdup)); memcpy(rdup, r, sizeof(*r)); psc_dynarray_add(&rcpts, rdup); num_free--; } if (!show) return; if (!shown_hdr) { struct pfl_odt_hdr *h; h = (*t)->odt_hdr; printf("nelems\t%u\n", h->odth_nelems); printf("elemsz\t%u\n", h->odth_objsz); printf("%7s %16s data\n", "slot", "crc"); shown_hdr = 1; } printf("%7zd %16"PRIx64" ", r->odtr_elem, r->odtr_crc); if (fmt) { (void)FMTSTR(buf, sizeof(buf), fmt, FMTSTRCASE('d', "d", (u.p = p, p += sizeof(int), *u.d)) FMTSTRCASE('u', "u", (u.p = p, p += sizeof(int), *u.d)) FMTSTRCASE('x', "x", (u.p = p, p += sizeof(int), *u.d)) FMTSTRCASE('q', PRId64, (u.p = p, p += sizeof(int64_t), *u.q)) FMTSTRCASE('Q', PRIu64, (u.p = p, p += sizeof(int64_t), *u.q)) FMTSTRCASE('X', PRIx64, (u.p = p, p += sizeof(int64_t), *u.q)) ); printf("%s\n", buf); return; } /* * If the first 10 characters aren't ASCII, don't display as * such. */ for (i = 0, p = data; i < 10 && p; i++, p++) if (!isspace(*p) && !isgraph(*p)) goto skip; if (i != 10) goto skip; printf("%s\n", (char *)data); return; skip: for (i = 0, p = data; i < elem_size; p++, i++) printf("%02x", *p); printf("\n"); }
/* * Initialize an on-disk journal. * @fn: file path to store journal. * @nents: number of entries journal may contain if non-zero. * @entsz: size of a journal entry. * @rs: read size. * Returns the number of entries created. */ uint32_t sl_journal_format(const char *fn, uint32_t nents, uint32_t entsz, uint32_t rs, uint64_t uuid, int block_dev) { uint32_t i, slot, max_nents; struct psc_journal_enthdr *pje; struct psc_journal pj; struct stat stb; unsigned char *jbuf; size_t numblocks; ssize_t nb; int fd; memset(&pj, 0, sizeof(pj)); fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd == -1) psc_fatal("%s", fn); if (fstat(fd, &stb) == -1) psc_fatal("stat %s", fn); /* * If the user does not specify nents, either use default or * based on the block device size. */ if (nents == 0 && !block_dev) nents = SLJ_MDS_JNENTS; if (block_dev) { if (ioctl(fd, BLKGETSIZE, &numblocks) == -1) err(1, "BLKGETSIZE: %s", fn); /* show progress, it is going to be a while */ verbose = 1; /* deal with large disks */ max_nents = MIN(numblocks, SLJ_MDS_MAX_JNENTS); /* leave room on both ends */ max_nents -= stb.st_blksize / SLJ_MDS_ENTSIZE + 16; /* efficiency */ max_nents = (max_nents / rs) * rs; if (nents) nents = MIN(nents, max_nents); else nents = max_nents; } if (nents % rs) psc_fatalx("number of slots (%u) should be a multiple of " "readsize (%u)", nents, rs); pj.pj_fd = fd; pj.pj_hdr = PSCALLOC(PSC_ALIGN(sizeof(struct psc_journal_hdr), stb.st_blksize)); pj.pj_hdr->pjh_entsz = entsz; pj.pj_hdr->pjh_nents = nents; pj.pj_hdr->pjh_version = PJH_VERSION; pj.pj_hdr->pjh_readsize = rs; pj.pj_hdr->pjh_iolen = PSC_ALIGN(sizeof(struct psc_journal_hdr), stb.st_blksize); pj.pj_hdr->pjh_magic = PJH_MAGIC; pj.pj_hdr->pjh_timestamp = time(NULL); pj.pj_hdr->pjh_fsuuid = uuid; psc_crc64_init(&pj.pj_hdr->pjh_chksum); psc_crc64_add(&pj.pj_hdr->pjh_chksum, pj.pj_hdr, offsetof(struct psc_journal_hdr, pjh_chksum)); psc_crc64_fini(&pj.pj_hdr->pjh_chksum); nb = pwrite(pj.pj_fd, pj.pj_hdr, pj.pj_hdr->pjh_iolen, 0); if ((size_t)nb != pj.pj_hdr->pjh_iolen) psc_fatalx("failed to write journal header: %s", nb == -1 ? strerror(errno) : "short write"); nb = PJ_PJESZ(&pj) * pj.pj_hdr->pjh_readsize; jbuf = psc_alloc(nb, PAF_PAGEALIGN); for (i = 0; i < rs; i++) { pje = PSC_AGP(jbuf, PJ_PJESZ(&pj) * i); pje->pje_magic = PJE_MAGIC; pje->pje_type = PJE_FORMAT; pje->pje_xid = PJE_XID_NONE; pje->pje_len = 0; psc_crc64_init(&pje->pje_chksum); psc_crc64_add(&pje->pje_chksum, pje, offsetof(struct psc_journal_enthdr, pje_chksum)); psc_crc64_add(&pje->pje_chksum, pje->pje_data, pje->pje_len); psc_crc64_fini(&pje->pje_chksum); } i = 0; /* XXX use an option to write only one entry in fast create mode */ for (slot = 0; slot < pj.pj_hdr->pjh_nents; slot += rs) { nb = pwrite(pj.pj_fd, jbuf, PJ_PJESZ(&pj) * rs, PJ_GETENTOFF(&pj, slot)); if ((size_t)nb != PJ_PJESZ(&pj) * rs) psc_fatal("failed to write slot %u (%zd)", slot, nb); if (verbose && slot % 262144 == 0) { printf("."); fflush(stdout); fsync(pj.pj_fd); if (++i == 80) { printf("\n"); i = 0; } } } if (verbose && i) printf("\n"); if (close(fd) == -1) psc_fatal("failed to close journal"); psc_free(jbuf, PAF_PAGEALIGN, PJ_PJESZ(&pj) * rs); return (nents); }