/** * _slm_fcmh_endow - "Endow" or apply inheritance to a new directory * entry from its parent directory replica layout. * Note: the bulk of this is empty until we have a place to store such * info in the SLASH2 metafile. */ int _slm_fcmh_endow(int vfsid, struct fidc_membh *p, struct fidc_membh *c, int wr) { sl_replica_t repls[SL_MAX_REPLICAS]; int nr, rc = 0; uint32_t pol; FCMH_LOCK(p); pol = fcmh_2_ino(p)->ino_replpol; nr = fcmh_2_nrepls(p); memcpy(repls, fcmh_2_ino(p)->ino_repls, sizeof(repls[0]) * SL_DEF_REPLICAS); if (nr > SL_DEF_REPLICAS) { mds_inox_ensure_loaded(fcmh_2_inoh(p)); memcpy(&repls[SL_DEF_REPLICAS], fcmh_2_inox(p)->inox_repls, sizeof(repls[0]) * SL_INOX_NREPLICAS); } FCMH_ULOCK(p); FCMH_WAIT_BUSY(c); fcmh_2_replpol(c) = pol; fcmh_2_ino(c)->ino_nrepls = nr; memcpy(fcmh_2_ino(c)->ino_repls, repls, sizeof(repls[0]) * SL_DEF_REPLICAS); if (nr > SL_DEF_REPLICAS) { mds_inox_ensure_loaded(fcmh_2_inoh(c)); memcpy(fcmh_2_inox(c)->inox_repls, &repls[SL_DEF_REPLICAS], sizeof(repls[0]) * SL_INOX_NREPLICAS); } if (wr) mds_inodes_odsync(vfsid, c, mdslog_ino_repls); FCMH_UNBUSY(c); return (rc); }
void slm_pack_inode(struct fidc_membh *f, struct srt_inode *in) { struct slash_inode_handle *ih; int rc; ih = fcmh_2_inoh(f); in->newreplpol = ih->inoh_ino.ino_replpol; in->nrepls = ih->inoh_ino.ino_nrepls; memcpy(in->reptbl, &ih->inoh_ino.ino_repls, sizeof(ih->inoh_ino.ino_repls)); if (in->nrepls > SL_DEF_REPLICAS) { rc = mds_inox_ensure_loaded(ih); if (!rc) memcpy(&in->reptbl[SL_DEF_REPLICAS], &ih->inoh_extras->inox_repls, sizeof(ih->inoh_extras->inox_repls)); } }
void slm_repl_upd_write(struct bmap *b, int rel) { struct { sl_replica_t iosv[SL_MAX_REPLICAS]; char *stat[SL_MAX_REPLICAS]; unsigned nios; } add, del, chg; int off, vold, vnew, sprio, uprio, rc; struct sl_mds_iosinfo *si; struct bmap_mds_info *bmi; struct fidc_membh *f; struct sl_resource *r; sl_ios_id_t resid; unsigned n, nrepls; bmi = bmap_2_bmi(b); f = b->bcm_fcmh; sprio = bmi->bmi_sys_prio; uprio = bmi->bmi_usr_prio; add.nios = 0; del.nios = 0; chg.nios = 0; nrepls = fcmh_2_nrepls(f); for (n = 0, off = 0; n < nrepls; n++, off += SL_BITS_PER_REPLICA) { if (n == SL_DEF_REPLICAS) mds_inox_ensure_loaded(fcmh_2_inoh(f)); resid = fcmh_2_repl(f, n); vold = SL_REPL_GET_BMAP_IOS_STAT(bmi->bmi_orepls, off); vnew = SL_REPL_GET_BMAP_IOS_STAT(bmi->bmi_repls, off); r = libsl_id2res(resid); si = r ? res2iosinfo(r) : &slm_null_iosinfo; if (vold == vnew) ; /* Work was added. */ else if ((vold != BREPLST_REPL_SCHED && vold != BREPLST_GARBAGE_QUEUED && vold != BREPLST_GARBAGE_SCHED && vnew == BREPLST_REPL_QUEUED) || (vold != BREPLST_GARBAGE_SCHED && vnew == BREPLST_GARBAGE_QUEUED && (si->si_flags & SIF_PRECLAIM_NOTSUP) == 0)) { OPSTAT_INCR("repl-work-add"); PUSH_IOS(b, &add, resid, NULL); } /* Work has finished. */ else if ((vold == BREPLST_REPL_QUEUED || vold == BREPLST_REPL_SCHED || vold == BREPLST_TRUNC_SCHED || vold == BREPLST_TRUNC_QUEUED || vold == BREPLST_GARBAGE_SCHED || vold == BREPLST_VALID) && (((si->si_flags & SIF_PRECLAIM_NOTSUP) && vnew == BREPLST_GARBAGE_QUEUED) || vnew == BREPLST_VALID || vnew == BREPLST_INVALID)) { OPSTAT_INCR("repl-work-del"); PUSH_IOS(b, &del, resid, NULL); } /* * Work that was previously scheduled failed so * requeue it. */ else if (vold == BREPLST_REPL_SCHED || vold == BREPLST_GARBAGE_SCHED || vold == BREPLST_TRUNC_SCHED) PUSH_IOS(b, &chg, resid, "Q"); /* Work was scheduled. */ else if (vnew == BREPLST_REPL_SCHED || vnew == BREPLST_GARBAGE_SCHED || vnew == BREPLST_TRUNC_SCHED) PUSH_IOS(b, &chg, resid, "S"); /* Work was reprioritized. */ else if (sprio != -1 || uprio != -1) PUSH_IOS(b, &chg, resid, NULL); } for (n = 0; n < add.nios; n++) { rc = slm_upsch_insert(b, add.iosv[n].bs_id, sprio, uprio); if (!rc) continue; psclog_warnx("upsch insert failed: bno = %d, " "fid=%"PRId64", ios= %d, rc = %d", b->bcm_bmapno, bmap_2_fid(b), add.iosv[n].bs_id, rc); } for (n = 0; n < del.nios; n++) { spinlock(&slm_upsch_lock); dbdo(NULL, NULL, " DELETE FROM upsch" " WHERE resid = ?" " AND fid = ?" " AND bno = ?", SQLITE_INTEGER, del.iosv[n].bs_id, SQLITE_INTEGER64, bmap_2_fid(b), SQLITE_INTEGER, b->bcm_bmapno); freelock(&slm_upsch_lock); } for (n = 0; n < chg.nios; n++) { spinlock(&slm_upsch_lock); dbdo(NULL, NULL, " UPDATE upsch" " SET status = IFNULL(?, status)," " sys_prio = IFNULL(?, sys_prio)," " usr_prio = IFNULL(?, usr_prio)" " WHERE resid = ?" " AND fid = ?" " AND bno = ?", chg.stat[n] ? SQLITE_TEXT : SQLITE_NULL, chg.stat[n] ? chg.stat[n] : 0, sprio == -1 ? SQLITE_NULL : SQLITE_INTEGER, sprio == -1 ? 0 : sprio, uprio == -1 ? SQLITE_NULL : SQLITE_INTEGER, uprio == -1 ? 0 : uprio, SQLITE_INTEGER, chg.iosv[n].bs_id, SQLITE_INTEGER64, bmap_2_fid(b), SQLITE_INTEGER, b->bcm_bmapno); freelock(&slm_upsch_lock); } bmap_2_bmi(b)->bmi_sys_prio = -1; bmap_2_bmi(b)->bmi_usr_prio = -1; if (rel) { BMAP_LOCK(b); b->bcm_flags &= ~BMAPF_REPLMODWR; bmap_wake_locked(b); bmap_op_done_type(b, BMAP_OPCNT_WORK); } }
/* * Return the index of the given IOS ID or a negative error code on failure. */ int _mds_repl_ios_lookup(int vfsid, struct slash_inode_handle *ih, sl_ios_id_t ios, int flag) { int locked, rc; struct slm_inox_od *ix = NULL; struct sl_resource *res; struct fidc_membh *f; sl_replica_t *repl; uint32_t i, j, nr; char buf[LINE_MAX]; switch (flag) { case IOSV_LOOKUPF_ADD: OPSTAT_INCR("replicate-add"); break; case IOSV_LOOKUPF_DEL: OPSTAT_INCR("replicate-del"); break; case IOSV_LOOKUPF_LOOKUP: OPSTAT_INCR("replicate-lookup"); break; default: psc_fatalx("Invalid IOS lookup flag %d", flag); } /* * Can I assume that IOS ID are non-zeros. If so, I can use * zero to mark a free slot. See sl_global_id_build(). */ f = inoh_2_fcmh(ih); nr = ih->inoh_ino.ino_nrepls; repl = ih->inoh_ino.ino_repls; locked = INOH_RLOCK(ih); psc_assert(nr <= SL_MAX_REPLICAS); if (nr == SL_MAX_REPLICAS && flag == IOSV_LOOKUPF_ADD) { DEBUG_INOH(PLL_WARN, ih, buf, "too many replicas"); PFL_GOTOERR(out, rc = -ENOSPC); } res = libsl_id2res(ios); if (res == NULL || !RES_ISFS(res)) PFL_GOTOERR(out, rc = -SLERR_RES_BADTYPE); /* * 09/29/2016: Hit SLERR_SHORTIO in the function. Need more investigation. */ /* * Return ENOENT by default for IOSV_LOOKUPF_DEL & IOSV_LOOKUPF_LOOKUP. */ rc = -ENOENT; /* * Search the existing replicas to see if the given IOS is * already there. * * The following code can step through zero IOS IDs just fine. * */ for (i = 0, j = 0; i < nr; i++, j++) { if (i == SL_DEF_REPLICAS) { /* * The first few replicas are in the inode * itself, the rest are in the extra inode * block. */ rc = mds_inox_ensure_loaded(ih); if (rc) goto out; ix = ih->inoh_extras; repl = ix->inox_repls; j = 0; } DEBUG_INOH(PLL_DEBUG, ih, buf, "is rep[%u](=%u) == %u ?", j, repl[j].bs_id, ios); if (repl[j].bs_id == ios) { /* * Luckily, this code is only called by mds_repl_delrq() * for directories. * * Make sure that the logic works for at least the following * edge cases: * * (1) There is only one item in the basic array. * (2) There is only one item in the extra array. * (3) The number of items is SL_DEF_REPLICAS. * (4) The number of items is SL_MAX_REPLICAS. */ if (flag == IOSV_LOOKUPF_DEL) { /* * Compact the array if the IOS is not the last * one. The last one will be either overwritten * or zeroed. Note that we might move extra * garbage at the end if the total number is less * than SL_DEF_REPLICAS. */ if (i < SL_DEF_REPLICAS - 1) { memmove(&repl[j], &repl[j + 1], (SL_DEF_REPLICAS - j - 1) * sizeof(*repl)); } /* * All items in the basic array, zero the last * one and we are done. */ if (nr <= SL_DEF_REPLICAS) { repl[nr-1].bs_id = 0; goto syncit; } /* * Now we know we have more than SL_DEF_REPLICAS * items. However, if we are in the basic array, * we have not read the extra array yet. In this * case, we should also move the first item from * the extra array to the last one in the basic * array (overwrite). */ if (i < SL_DEF_REPLICAS) { rc = mds_inox_ensure_loaded(ih); if (rc) goto out; ix = ih->inoh_extras; repl[SL_DEF_REPLICAS - 1].bs_id = ix->inox_repls[0].bs_id; repl = ix->inox_repls; j = 0; } /* * Compact the extra array unless the IOS is * the last one, which will be zeroed. */ if (i < SL_MAX_REPLICAS - 1) { memmove(&repl[j], &repl[j + 1], (SL_INOX_NREPLICAS - j - 1) * sizeof(*repl)); } repl[nr-SL_DEF_REPLICAS-1].bs_id = 0; syncit: ih->inoh_ino.ino_nrepls = nr - 1; rc = mds_inodes_odsync(vfsid, f, mdslog_ino_repls); if (rc) goto out; } /* XXX EEXIST for IOSV_LOOKUPF_ADD? */ rc = i; goto out; } } /* It doesn't exist; add to inode replica table if requested. */ if (flag == IOSV_LOOKUPF_ADD) { /* paranoid */ psc_assert(i == nr); if (nr >= SL_DEF_REPLICAS) { /* be careful with the case of nr = SL_DEF_REPLICAS */ rc = mds_inox_ensure_loaded(ih); if (rc) goto out; repl = ih->inoh_extras->inox_repls; j = i - SL_DEF_REPLICAS; } else { repl = ih->inoh_ino.ino_repls; j = i; } repl[j].bs_id = ios; DEBUG_INOH(PLL_DIAG, ih, buf, "add IOS(%u) at idx %d", ios, i); ih->inoh_ino.ino_nrepls = nr + 1; rc = mds_inodes_odsync(vfsid, f, mdslog_ino_repls); if (!rc) rc = i; } out: INOH_URLOCK(ih, locked); return (rc); }
int mds_inode_update_interrupted(int vfsid, struct slash_inode_handle *ih, int *rc) { char fn[NAME_MAX + 1]; struct srt_stat sstb; struct iovec iovs[2]; uint64_t crc, od_crc; void *h = NULL, *th; mdsio_fid_t inum; int exists = 0; size_t nb; th = inoh_2_mfh(ih); snprintf(fn, sizeof(fn), "%016"PRIx64".update", inoh_2_fid(ih)); *rc = mdsio_lookup(vfsid, mds_tmpdir_inum[vfsid], fn, &inum, &rootcreds, NULL); if (*rc) PFL_GOTOERR(out, *rc); *rc = mdsio_opencreatef(vfsid, inum, &rootcreds, O_RDONLY, MDSIO_OPENCRF_NOLINK, 0644, NULL, NULL, NULL, &h, NULL, NULL, 0); if (*rc) PFL_GOTOERR(out, *rc); iovs[0].iov_base = &ih->inoh_ino; iovs[0].iov_len = sizeof(ih->inoh_ino); iovs[1].iov_base = &od_crc; iovs[1].iov_len = sizeof(od_crc); *rc = mdsio_preadv(vfsid, &rootcreds, iovs, nitems(iovs), &nb, 0, h); if (*rc) PFL_GOTOERR(out, *rc); psc_crc64_calc(&crc, &ih->inoh_ino, sizeof(ih->inoh_ino)); if (crc != od_crc) { *rc = PFLERR_BADCRC; PFL_GOTOERR(out, *rc); } exists = 1; psc_assert(ih->inoh_extras == NULL); ih->inoh_extras = PSCALLOC(INOX_SZ); inoh_2_mfh(ih) = h; *rc = mds_inox_ensure_loaded(ih); if (*rc) PFL_GOTOERR(out, *rc); inoh_2_mfh(ih) = th; memset(&sstb, 0, sizeof(sstb)); *rc = mdsio_setattr(vfsid, 0, &sstb, SL_SETATTRF_METASIZE, &rootcreds, NULL, th, NULL); if (*rc) PFL_GOTOERR(out, *rc); *rc = mds_inode_dump(vfsid, NULL, ih, h); if (*rc) PFL_GOTOERR(out, *rc); mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn, &rootcreds, NULL, NULL); out: if (h) mdsio_release(vfsid, &rootcreds, h); if (*rc) mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn, &rootcreds, NULL, NULL); inoh_2_mfh(ih) = th; return (exists); }