/* * Handle a BMAPCHWRMODE request to upgrade a client bmap lease from * READ-only to READ+WRITE. * @rq: RPC request. */ int slm_rmc_handle_bmap_chwrmode(struct pscrpc_request *rq) { struct bmap_mds_lease *bml = NULL; struct srm_bmap_chwrmode_req *mq; struct srm_bmap_chwrmode_rep *mp; struct fidc_membh *f = NULL; struct bmapc_memb *b = NULL; struct bmap_mds_info *bmi; SL_RSX_ALLOCREP(rq, mq, mp); mp->rc = -slm_fcmh_get(&mq->sbd.sbd_fg, &f); if (mp->rc) PFL_GOTOERR(out, mp->rc); mp->rc = bmap_lookup(f, mq->sbd.sbd_bmapno, &b); if (mp->rc) PFL_GOTOERR(out, mp->rc); bmi = bmap_2_bmi(b); bml = mds_bmap_getbml(b, mq->sbd.sbd_seq, mq->sbd.sbd_nid, mq->sbd.sbd_pid); if (bml == NULL) PFL_GOTOERR(out, mp->rc = -EINVAL); mp->rc = mds_bmap_bml_chwrmode(bml, mq->prefios[0]); if (mp->rc == -PFLERR_ALREADY) mp->rc = 0; else if (mp->rc) PFL_GOTOERR(out, mp->rc); mp->sbd = mq->sbd; mp->sbd.sbd_seq = bml->bml_seq; mp->sbd.sbd_key = bmi->bmi_assign->odtr_crc; psc_assert(bmi->bmi_wr_ion); mp->sbd.sbd_ios = rmmi2resm(bmi->bmi_wr_ion)->resm_res_id; out: if (bml) mds_bmap_bml_release(bml); if (b) bmap_op_done(b); if (f) fcmh_op_done(f); return (0); }
int slm_rmc_handle_lookup(struct pscrpc_request *rq) { struct fidc_membh *p = NULL; struct srm_lookup_req *mq; struct srm_lookup_rep *mp; int vfsid; SL_RSX_ALLOCREP(rq, mq, mp); mp->rc = slfid_to_vfsid(mq->pfg.fg_fid, &vfsid); if (mp->rc) PFL_GOTOERR(out, mp->rc); mp->rc = -slm_fcmh_get(&mq->pfg, &p); if (mp->rc) PFL_GOTOERR(out, mp->rc); mq->name[sizeof(mq->name) - 1] = '\0'; psclog_diag("lookup: pfid="SLPRI_FID" name=%s", fcmh_2_mfid(p), mq->name); if (fcmh_2_mfid(p) == SLFID_ROOT && strcmp(mq->name, SL_RPATH_META_DIR) == 0) PFL_GOTOERR(out, mp->rc = -EINVAL); if (mq->pfg.fg_fid == SLFID_ROOT && use_global_mount) { uint64_t fid; struct sl_site *site; mp->rc = -ENOENT; CONF_LOCK(); CONF_FOREACH_SITE(site) { if (strcmp(mq->name, site->site_name) != 0) continue; fid = SLFID_ROOT; FID_SET_SITEID(fid, site->site_id); mp->xattrsize = 0; mp->attr.sst_fg.fg_fid = fid; mp->attr.sst_fg.fg_gen = 2; slm_root_attributes(&mp->attr); mp->rc = 0; break; } CONF_ULOCK(); goto out; }
/* * Handle a NAMESPACE_UPDATE request from another MDS. */ int slm_rmm_handle_namespace_update(struct pscrpc_request *rq) { struct srt_update_entry *entryp; struct srm_update_req *mq; struct srm_update_rep *mp; struct sl_mds_peerinfo *p; struct sl_resource *res; struct sl_site *site; struct iovec iov; int i, len, count; SL_RSX_ALLOCREP(rq, mq, mp); count = mq->count; if (count <= 0 || mq->size > LNET_MTU) { mp->rc = -EINVAL; return (mp->rc); } iov.iov_len = mq->size; iov.iov_base = PSCALLOC(mq->size); mp->rc = slrpc_bulkserver(rq, BULK_GET_SINK, SRMM_BULK_PORTAL, &iov, 1); if (mp->rc) goto out; /* Search for the peer information by the given site ID. */ site = libsl_siteid2site(mq->siteid); p = NULL; if (site) SITE_FOREACH_RES(site, res, i) if (res->res_type == SLREST_MDS) { p = res2rpmi(res)->rpmi_info; break; } if (p == NULL) { psclog_info("fail to find site ID %d", mq->siteid); PFL_GOTOERR(out, mp->rc = -EINVAL); } /* * Iterate through the namespace update buffer and apply updates. * If we fail to apply an update, we still report success to our * peer because reporting an error does not help our cause. */ entryp = iov.iov_base; for (i = 0; i < count; i++) { slm_rmm_apply_update(entryp); len = UPDATE_ENTRY_LEN(entryp); entryp = PSC_AGP(entryp, len); } zfsslash2_wait_synced(0); out: PSCFREE(iov.iov_base); return (mp->rc); }
int slm_rmc_handle_getattr(struct pscrpc_request *rq) { const struct srm_getattr_req *mq; struct srm_getattr_rep *mp; struct fidc_membh *f; int vfsid; SL_RSX_ALLOCREP(rq, mq, mp); psclog_diag("pfid="SLPRI_FID, mq->fg.fg_fid); if (mq->fg.fg_fid == SLFID_ROOT && use_global_mount) { mp->attr.sst_fg.fg_fid = SLFID_ROOT; mp->attr.sst_fg.fg_gen = FGEN_ANY-1; slm_root_attributes(&mp->attr); return (0); } mp->rc = -slm_fcmh_get(&mq->fg, &f); if (mp->rc) PFL_GOTOERR(out, mp->rc); mp->rc = slfid_to_vfsid(mq->fg.fg_fid, &vfsid); if (mp->rc) PFL_GOTOERR(out, mp->rc); mp->xattrsize = mdsio_hasxattrs(vfsid, &rootcreds, fcmh_2_mfid(f)); FCMH_LOCK(f); mp->attr = f->fcmh_sstb; out: if (f) fcmh_op_done(f); return (0); }
int slm_rmc_handle_link(struct pscrpc_request *rq) { struct fidc_membh *p = NULL, *c = NULL; struct srm_link_req *mq; struct srm_link_rep *mp; int vfsid; SL_RSX_ALLOCREP(rq, mq, mp); mp->rc = slfid_to_vfsid(mq->fg.fg_fid, &vfsid); if (mp->rc) PFL_GOTOERR(out, mp->rc); mp->rc = -slm_fcmh_get(&mq->pfg, &p); if (mp->rc) PFL_GOTOERR(out, mp->rc); mp->rc = -slm_fcmh_get(&mq->fg, &c); if (mp->rc) PFL_GOTOERR(out, mp->rc); mq->name[sizeof(mq->name) - 1] = '\0'; mds_reserve_slot(1); mp->rc = mdsio_link(vfsid, fcmh_2_mfid(c), fcmh_2_mfid(p), mq->name, &rootcreds, mdslog_namespace); mds_unreserve_slot(1); mdsio_fcmh_refreshattr(c, &mp->cattr); mdsio_fcmh_refreshattr(p, &mp->pattr); out: if (c) fcmh_op_done(c); if (p) fcmh_op_done(p); return (0); }
int slm_rmc_handle_getbmap(struct pscrpc_request *rq) { const struct srm_leasebmap_req *mq; struct srm_leasebmap_rep *mp; struct fidc_membh *f; int rc = 0; SL_RSX_ALLOCREP(rq, mq, mp); if (mq->rw == SL_WRITE) OPSTAT_INCR("getbmap-lease-write"); else if (mq->rw == SL_READ) OPSTAT_INCR("getbmap-lease-read"); else { mp->rc = -EINVAL; return (0); } mp->rc = -slm_fcmh_get(&mq->fg, &f); if (mp->rc) return (0); mp->flags = mq->flags; mp->rc = mds_bmap_load_cli(f, mq->bmapno, mq->flags, mq->rw, mq->prefios[0], &mp->sbd, rq->rq_export, mp->repls, 0); if (mp->rc) PFL_GOTOERR(out, mp->rc); if (mp->flags & SRM_LEASEBMAPF_GETINODE) slm_pack_inode(f, &mp->ino); out: fcmh_op_done(f); return (rc ? rc : mp->rc); }
int mds_repl_delrq(const struct sl_fidgen *fgp, sl_bmapno_t bmapno, sl_bmapno_t *nbmaps, sl_replica_t *iosv, int nios) { int tract[NBREPLST], rc, iosidx[SL_MAX_REPLICAS], flags; sl_bmapno_t nbmaps_processed = 0; struct slm_repl_valid replv; struct fidc_membh *f = NULL; struct bmap *b; if (nios < 1 || nios > SL_MAX_REPLICAS || *nbmaps == 0) return (-EINVAL); rc = slm_fcmh_get(fgp, &f); if (rc) return (-rc); FCMH_LOCK(f); if (fcmh_isdir(f)) flags = IOSV_LOOKUPF_DEL; else flags = IOSV_LOOKUPF_LOOKUP; /* Find replica IOS indexes. */ rc = -_mds_repl_iosv_lookup(current_vfsid, fcmh_2_inoh(f), iosv, iosidx, nios, flags); if (fcmh_isdir(f) || rc) PFL_GOTOERR(out, rc); replv.nios = nios; replv.idx = iosidx; brepls_init(tract, -1); tract[BREPLST_REPL_QUEUED] = BREPLST_GARBAGE_QUEUED; tract[BREPLST_REPL_SCHED] = BREPLST_GARBAGE_QUEUED; tract[BREPLST_VALID] = BREPLST_GARBAGE_QUEUED; /* Wildcards shouldn't result in errors on zero-length files. */ if (*nbmaps != (sl_bmapno_t)-1) rc = -SLERR_BMAP_INVALID; /* * The following loop will bail out on the very first error. * However, its previous action, if any, has already taken * effect. */ for (; *nbmaps && bmapno < fcmh_nvalidbmaps(f); bmapno++, --*nbmaps, nbmaps_processed++) { if (nbmaps_processed >= SLM_REPLRQ_NBMAPS_MAX) PFL_GOTOERR(out, rc = -PFLERR_WOULDBLOCK); rc = -bmap_get(f, bmapno, SL_WRITE, &b); if (rc) PFL_GOTOERR(out, rc); /* * Before blindly doing the transition, we have to check * to ensure this operation would retain at least one * valid replica. */ replv.n = 0; mds_repl_bmap_walkcb(b, NULL, NULL, 0, slm_repl_countvalid_cb, &replv); flags = 0; if (replv.n == 0) rc = -SLERR_LASTREPL; else { rc = _mds_repl_bmap_walk(b, tract, NULL, 0, iosidx, nios, slm_repl_delrq_cb, &flags); psc_assert(!rc); if (flags & FLAG_DIRTY) rc = mds_bmap_write_logrepls(b); } bmap_op_done_type(b, BMAP_OPCNT_LOOKUP); if (rc) PFL_GOTOERR(out, rc); } out: if (f) fcmh_op_done(f); *nbmaps = nbmaps_processed; return (rc); }
/* * Handle a request to do replication from a client. May also * reinitialize some parameters of the replication, such as priority, if * the request already exists in the system. */ int mds_repl_addrq(const struct sl_fidgen *fgp, sl_bmapno_t bmapno, sl_bmapno_t *nbmaps, sl_replica_t *iosv, int nios, int sys_prio, int usr_prio) { int tract[NBREPLST], ret_hasvalid[NBREPLST]; int iosidx[SL_MAX_REPLICAS], rc, flags; sl_bmapno_t nbmaps_processed = 0; struct fidc_membh *f = NULL; struct bmap *b; /* Perform sanity checks on request. */ if (nios < 1 || nios > SL_MAX_REPLICAS || *nbmaps == 0) return (-EINVAL); rc = slm_fcmh_get(fgp, &f); if (rc) return (-rc); if (!fcmh_isdir(f) && !fcmh_isreg(f)) PFL_GOTOERR(out, rc = -PFLERR_NOTSUP); /* Lookup replica(s)' indexes in our replica table. */ rc = -mds_repl_iosv_lookup_add(current_vfsid, fcmh_2_inoh(f), iosv, iosidx, nios); if (rc) PFL_GOTOERR(out, rc); /* * If we are modifying a directory, we are done as just the * replica table needs to be updated. */ if (fcmh_isdir(f)) PFL_GOTOERR(out, 0); /* * Setup structure to ensure at least one VALID replica exists. */ brepls_init(ret_hasvalid, 0); ret_hasvalid[BREPLST_VALID] = 1; /* * Setup transitions to enqueue a replication. */ brepls_init(tract, -1); tract[BREPLST_INVALID] = BREPLST_REPL_QUEUED; tract[BREPLST_GARBAGE_SCHED] = BREPLST_REPL_QUEUED; tract[BREPLST_GARBAGE_QUEUED] = BREPLST_REPL_QUEUED; /* Wildcards shouldn't result in errors on zero-length files. */ if (*nbmaps != (sl_bmapno_t)-1) rc = -SLERR_BMAP_INVALID; for (; *nbmaps && bmapno < fcmh_nvalidbmaps(f); bmapno++, --*nbmaps, nbmaps_processed++) { if (nbmaps_processed >= SLM_REPLRQ_NBMAPS_MAX) { rc = -PFLERR_WOULDBLOCK; break; } rc = -bmap_get(f, bmapno, SL_WRITE, &b); if (rc) PFL_GOTOERR(out, rc); /* * If no VALID replicas exist, the bmap must be * uninitialized/all zeroes; skip it. */ if (mds_repl_bmap_walk_all(b, NULL, ret_hasvalid, REPL_WALKF_SCIRCUIT) == 0) { bmap_op_done(b); continue; } /* * We do not follow the standard "retifset" API here * because we need to preserve DIRTY if it gets set * instead of some other state getting returned. */ flags = 0; _mds_repl_bmap_walk(b, tract, NULL, 0, iosidx, nios, slm_repl_addrq_cb, &flags); /* both default to -1 in parse_replrq() */ bmap_2_bmi(b)->bmi_sys_prio = sys_prio; bmap_2_bmi(b)->bmi_usr_prio = usr_prio; if (flags & FLAG_DIRTY) mds_bmap_write_logrepls(b); else if (sys_prio != -1 || usr_prio != -1) slm_repl_upd_write(b, 0); bmap_op_done_type(b, BMAP_OPCNT_LOOKUP); if (flags & FLAG_REPLICA_STATE_INVALID) { /* See pfl_register_errno() */ rc = -SLERR_REPLICA_STATE_INVALID; break; } } out: if (f) fcmh_op_done(f); *nbmaps = nbmaps_processed; return (rc); }
/* * Return the index of the given IOS ID or a negative error code on failure. */ int _mds_repl_ios_lookup(int vfsid, struct slash_inode_handle *ih, sl_ios_id_t ios, int flag) { int locked, rc; struct slm_inox_od *ix = NULL; struct sl_resource *res; struct fidc_membh *f; sl_replica_t *repl; uint32_t i, j, nr; char buf[LINE_MAX]; switch (flag) { case IOSV_LOOKUPF_ADD: OPSTAT_INCR("replicate-add"); break; case IOSV_LOOKUPF_DEL: OPSTAT_INCR("replicate-del"); break; case IOSV_LOOKUPF_LOOKUP: OPSTAT_INCR("replicate-lookup"); break; default: psc_fatalx("Invalid IOS lookup flag %d", flag); } /* * Can I assume that IOS ID are non-zeros. If so, I can use * zero to mark a free slot. See sl_global_id_build(). */ f = inoh_2_fcmh(ih); nr = ih->inoh_ino.ino_nrepls; repl = ih->inoh_ino.ino_repls; locked = INOH_RLOCK(ih); psc_assert(nr <= SL_MAX_REPLICAS); if (nr == SL_MAX_REPLICAS && flag == IOSV_LOOKUPF_ADD) { DEBUG_INOH(PLL_WARN, ih, buf, "too many replicas"); PFL_GOTOERR(out, rc = -ENOSPC); } res = libsl_id2res(ios); if (res == NULL || !RES_ISFS(res)) PFL_GOTOERR(out, rc = -SLERR_RES_BADTYPE); /* * 09/29/2016: Hit SLERR_SHORTIO in the function. Need more investigation. */ /* * Return ENOENT by default for IOSV_LOOKUPF_DEL & IOSV_LOOKUPF_LOOKUP. */ rc = -ENOENT; /* * Search the existing replicas to see if the given IOS is * already there. * * The following code can step through zero IOS IDs just fine. * */ for (i = 0, j = 0; i < nr; i++, j++) { if (i == SL_DEF_REPLICAS) { /* * The first few replicas are in the inode * itself, the rest are in the extra inode * block. */ rc = mds_inox_ensure_loaded(ih); if (rc) goto out; ix = ih->inoh_extras; repl = ix->inox_repls; j = 0; } DEBUG_INOH(PLL_DEBUG, ih, buf, "is rep[%u](=%u) == %u ?", j, repl[j].bs_id, ios); if (repl[j].bs_id == ios) { /* * Luckily, this code is only called by mds_repl_delrq() * for directories. * * Make sure that the logic works for at least the following * edge cases: * * (1) There is only one item in the basic array. * (2) There is only one item in the extra array. * (3) The number of items is SL_DEF_REPLICAS. * (4) The number of items is SL_MAX_REPLICAS. */ if (flag == IOSV_LOOKUPF_DEL) { /* * Compact the array if the IOS is not the last * one. The last one will be either overwritten * or zeroed. Note that we might move extra * garbage at the end if the total number is less * than SL_DEF_REPLICAS. */ if (i < SL_DEF_REPLICAS - 1) { memmove(&repl[j], &repl[j + 1], (SL_DEF_REPLICAS - j - 1) * sizeof(*repl)); } /* * All items in the basic array, zero the last * one and we are done. */ if (nr <= SL_DEF_REPLICAS) { repl[nr-1].bs_id = 0; goto syncit; } /* * Now we know we have more than SL_DEF_REPLICAS * items. However, if we are in the basic array, * we have not read the extra array yet. In this * case, we should also move the first item from * the extra array to the last one in the basic * array (overwrite). */ if (i < SL_DEF_REPLICAS) { rc = mds_inox_ensure_loaded(ih); if (rc) goto out; ix = ih->inoh_extras; repl[SL_DEF_REPLICAS - 1].bs_id = ix->inox_repls[0].bs_id; repl = ix->inox_repls; j = 0; } /* * Compact the extra array unless the IOS is * the last one, which will be zeroed. */ if (i < SL_MAX_REPLICAS - 1) { memmove(&repl[j], &repl[j + 1], (SL_INOX_NREPLICAS - j - 1) * sizeof(*repl)); } repl[nr-SL_DEF_REPLICAS-1].bs_id = 0; syncit: ih->inoh_ino.ino_nrepls = nr - 1; rc = mds_inodes_odsync(vfsid, f, mdslog_ino_repls); if (rc) goto out; } /* XXX EEXIST for IOSV_LOOKUPF_ADD? */ rc = i; goto out; } } /* It doesn't exist; add to inode replica table if requested. */ if (flag == IOSV_LOOKUPF_ADD) { /* paranoid */ psc_assert(i == nr); if (nr >= SL_DEF_REPLICAS) { /* be careful with the case of nr = SL_DEF_REPLICAS */ rc = mds_inox_ensure_loaded(ih); if (rc) goto out; repl = ih->inoh_extras->inox_repls; j = i - SL_DEF_REPLICAS; } else { repl = ih->inoh_ino.ino_repls; j = i; } repl[j].bs_id = ios; DEBUG_INOH(PLL_DIAG, ih, buf, "add IOS(%u) at idx %d", ios, i); ih->inoh_ino.ino_nrepls = nr + 1; rc = mds_inodes_odsync(vfsid, f, mdslog_ino_repls); if (!rc) rc = i; } out: INOH_URLOCK(ih, locked); return (rc); }
int mds_inode_update_interrupted(int vfsid, struct slash_inode_handle *ih, int *rc) { char fn[NAME_MAX + 1]; struct srt_stat sstb; struct iovec iovs[2]; uint64_t crc, od_crc; void *h = NULL, *th; mdsio_fid_t inum; int exists = 0; size_t nb; th = inoh_2_mfh(ih); snprintf(fn, sizeof(fn), "%016"PRIx64".update", inoh_2_fid(ih)); *rc = mdsio_lookup(vfsid, mds_tmpdir_inum[vfsid], fn, &inum, &rootcreds, NULL); if (*rc) PFL_GOTOERR(out, *rc); *rc = mdsio_opencreatef(vfsid, inum, &rootcreds, O_RDONLY, MDSIO_OPENCRF_NOLINK, 0644, NULL, NULL, NULL, &h, NULL, NULL, 0); if (*rc) PFL_GOTOERR(out, *rc); iovs[0].iov_base = &ih->inoh_ino; iovs[0].iov_len = sizeof(ih->inoh_ino); iovs[1].iov_base = &od_crc; iovs[1].iov_len = sizeof(od_crc); *rc = mdsio_preadv(vfsid, &rootcreds, iovs, nitems(iovs), &nb, 0, h); if (*rc) PFL_GOTOERR(out, *rc); psc_crc64_calc(&crc, &ih->inoh_ino, sizeof(ih->inoh_ino)); if (crc != od_crc) { *rc = PFLERR_BADCRC; PFL_GOTOERR(out, *rc); } exists = 1; psc_assert(ih->inoh_extras == NULL); ih->inoh_extras = PSCALLOC(INOX_SZ); inoh_2_mfh(ih) = h; *rc = mds_inox_ensure_loaded(ih); if (*rc) PFL_GOTOERR(out, *rc); inoh_2_mfh(ih) = th; memset(&sstb, 0, sizeof(sstb)); *rc = mdsio_setattr(vfsid, 0, &sstb, SL_SETATTRF_METASIZE, &rootcreds, NULL, th, NULL); if (*rc) PFL_GOTOERR(out, *rc); *rc = mds_inode_dump(vfsid, NULL, ih, h); if (*rc) PFL_GOTOERR(out, *rc); mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn, &rootcreds, NULL, NULL); out: if (h) mdsio_release(vfsid, &rootcreds, h); if (*rc) mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn, &rootcreds, NULL, NULL); inoh_2_mfh(ih) = th; return (exists); }
int mds_inode_update(int vfsid, struct slash_inode_handle *ih, int old_version) { char fn[NAME_MAX + 1]; struct sl_ino_compat *sic; struct fidc_membh *f; struct srt_stat sstb; void *h = NULL, *th; int rc; sic = &sl_ino_compat_table[old_version]; rc = sic->sic_read_ino(ih); if (rc) return (rc); DEBUG_INOH(PLL_INFO, ih, "updating old inode (v %d)", old_version); f = inoh_2_fcmh(ih); snprintf(fn, sizeof(fn), "%016"PRIx64".update", fcmh_2_fid(f)); rc = mdsio_opencreatef(vfsid, mds_tmpdir_inum[vfsid], &rootcreds, O_RDWR | O_CREAT | O_TRUNC, MDSIO_OPENCRF_NOLINK, 0644, fn, NULL, NULL, &h, NULL, NULL, 0); if (rc) PFL_GOTOERR(out, rc); psc_assert(ih->inoh_extras == NULL); ih->inoh_extras = PSCALLOC(INOX_SZ); /* convert old structures into new into temp file */ rc = sic->sic_read_inox(ih); if (rc) PFL_GOTOERR(out, rc); th = inoh_2_mfhp(ih)->fh; inoh_2_mfhp(ih)->fh = h; rc = mds_inode_dump(vfsid, sic, ih, th); inoh_2_mfhp(ih)->fh = th; if (rc) PFL_GOTOERR(out, rc); /* move new structures to inode meta file */ memset(&sstb, 0, sizeof(sstb)); rc = mdsio_setattr(vfsid, 0, &sstb, SL_SETATTRF_METASIZE, &rootcreds, NULL, th, NULL); if (rc) PFL_GOTOERR(out, rc); // mdsio_rename(mds_tmpdir_inum, NULL, fn, &rootcreds, NULL); rc = mds_inode_dump(vfsid, NULL, ih, h); if (rc) PFL_GOTOERR(out, rc); mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn, &rootcreds, NULL, NULL); out: if (h) mdsio_release(vfsid, &rootcreds, h); if (rc) { mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn, &rootcreds, NULL, NULL); DEBUG_INOH(PLL_ERROR, ih, "error updating old inode " "rc=%d", rc); } return (rc); }