Exemple #1
0
int
mds_bmap_read_v1(struct bmapc_memb *b, void *readh)
{
	struct {
		uint8_t		crcstates[128];
		uint8_t		repls[24];
		uint64_t	crcs[128];
		uint32_t	gen;
		uint32_t	replpol;
	} bod;
	struct fidc_membh *f;
	struct iovec iovs[2];
	uint64_t crc, od_crc;
	int i, rc, vfsid;
	size_t nb, bsz;
	struct bmap_mds_info *bmi = bmap_2_bmi(b);

	bsz = sizeof(bod) + sizeof(crc);

	iovs[0].iov_base = &bod;
	iovs[0].iov_len = sizeof(bod);
	iovs[1].iov_base = &od_crc;
	iovs[1].iov_len = sizeof(od_crc);
	f = b->bcm_fcmh;
	slfid_to_vfsid(fcmh_2_fid(f), &vfsid);
	rc = mdsio_preadv(vfsid, &rootcreds, iovs, nitems(iovs), &nb,
	    bsz * b->bcm_bmapno + 0x1000, readh);

	if (rc)
		return (rc);
	if (nb == 0)
		return (SLERR_BMAP_INVALID);
	if (nb != bsz)
		return (SLERR_SHORTIO);

	psc_crc64_calc(&crc, &bod, sizeof(bod));
	if (crc != od_crc)
		return (PFLERR_BADCRC);
	for (i = 0; i < 128; i++)
		bmi->bmi_crcstates[i] = bod.crcstates[i];
	for (i = 0; i < 24; i++)
		bmi->bmi_repls[i] = bod.repls[i];
	for (i = 0; i < 128; i++)
		bmap_2_crcs(b, i) = bod.crcs[i];
	bmap_2_bgen(b) = bod.gen;
	bmap_2_replpol(b) = bod.replpol;
	return (0);
}
Exemple #2
0
/*
 * Handle a BMAPCHWRMODE request to upgrade a client bmap lease from
 * READ-only to READ+WRITE.
 * @rq: RPC request.
 */
int
slm_rmc_handle_bmap_chwrmode(struct pscrpc_request *rq)
{
	struct bmap_mds_lease *bml = NULL;
	struct srm_bmap_chwrmode_req *mq;
	struct srm_bmap_chwrmode_rep *mp;
	struct fidc_membh *f = NULL;
	struct bmapc_memb *b = NULL;
	struct bmap_mds_info *bmi;

	SL_RSX_ALLOCREP(rq, mq, mp);
	mp->rc = -slm_fcmh_get(&mq->sbd.sbd_fg, &f);
	if (mp->rc)
		PFL_GOTOERR(out, mp->rc);
	mp->rc = bmap_lookup(f, mq->sbd.sbd_bmapno, &b);
	if (mp->rc)
		PFL_GOTOERR(out, mp->rc);

	bmi = bmap_2_bmi(b);

	bml = mds_bmap_getbml(b, mq->sbd.sbd_seq,
	    mq->sbd.sbd_nid, mq->sbd.sbd_pid);

	if (bml == NULL)
		PFL_GOTOERR(out, mp->rc = -EINVAL);

	mp->rc = mds_bmap_bml_chwrmode(bml, mq->prefios[0]);
	if (mp->rc == -PFLERR_ALREADY)
		mp->rc = 0;
	else if (mp->rc)
		PFL_GOTOERR(out, mp->rc);

	mp->sbd = mq->sbd;
	mp->sbd.sbd_seq = bml->bml_seq;
	mp->sbd.sbd_key = bmi->bmi_assign->odtr_crc;

	psc_assert(bmi->bmi_wr_ion);
	mp->sbd.sbd_ios = rmmi2resm(bmi->bmi_wr_ion)->resm_res_id;

 out:
	if (bml)
		mds_bmap_bml_release(bml);
	if (b)
		bmap_op_done(b);
	if (f)
		fcmh_op_done(f);
	return (0);
}
Exemple #3
0
/*
 * Handle a request to do replication from a client.  May also
 * reinitialize some parameters of the replication, such as priority, if
 * the request already exists in the system.
 */
int
mds_repl_addrq(const struct sl_fidgen *fgp, sl_bmapno_t bmapno,
    sl_bmapno_t *nbmaps, sl_replica_t *iosv, int nios, int sys_prio,
    int usr_prio)
{
	int tract[NBREPLST], ret_hasvalid[NBREPLST];
	int iosidx[SL_MAX_REPLICAS], rc, flags;
	sl_bmapno_t nbmaps_processed = 0;
	struct fidc_membh *f = NULL;
	struct bmap *b;

	/* Perform sanity checks on request. */
	if (nios < 1 || nios > SL_MAX_REPLICAS || *nbmaps == 0)
		return (-EINVAL);

	rc = slm_fcmh_get(fgp, &f);
	if (rc)
		return (-rc);

	if (!fcmh_isdir(f) && !fcmh_isreg(f))
		PFL_GOTOERR(out, rc = -PFLERR_NOTSUP);

	/* Lookup replica(s)' indexes in our replica table. */
	rc = -mds_repl_iosv_lookup_add(current_vfsid, fcmh_2_inoh(f),
	    iosv, iosidx, nios);
	if (rc)
		PFL_GOTOERR(out, rc);

	/*
	 * If we are modifying a directory, we are done as just the
	 * replica table needs to be updated.
	 */
	if (fcmh_isdir(f))
		PFL_GOTOERR(out, 0);

	/*
	 * Setup structure to ensure at least one VALID replica exists.
	 */
	brepls_init(ret_hasvalid, 0);
	ret_hasvalid[BREPLST_VALID] = 1;

	/*
	 * Setup transitions to enqueue a replication.
	 */
	brepls_init(tract, -1);
	tract[BREPLST_INVALID] = BREPLST_REPL_QUEUED;
	tract[BREPLST_GARBAGE_SCHED] = BREPLST_REPL_QUEUED;
	tract[BREPLST_GARBAGE_QUEUED] = BREPLST_REPL_QUEUED;

	/* Wildcards shouldn't result in errors on zero-length files. */
	if (*nbmaps != (sl_bmapno_t)-1)
		rc = -SLERR_BMAP_INVALID;

	for (; *nbmaps && bmapno < fcmh_nvalidbmaps(f);
	    bmapno++, --*nbmaps, nbmaps_processed++) {

		if (nbmaps_processed >= SLM_REPLRQ_NBMAPS_MAX) {
			rc = -PFLERR_WOULDBLOCK;
			break;
		}

		rc = -bmap_get(f, bmapno, SL_WRITE, &b);
		if (rc)
			PFL_GOTOERR(out, rc);

		/*
		 * If no VALID replicas exist, the bmap must be
		 * uninitialized/all zeroes; skip it.
		 */
		if (mds_repl_bmap_walk_all(b, NULL, ret_hasvalid,
		    REPL_WALKF_SCIRCUIT) == 0) {
			bmap_op_done(b);
			continue;
		}

		/*
		 * We do not follow the standard "retifset" API here
		 * because we need to preserve DIRTY if it gets set
		 * instead of some other state getting returned.
		 */
		flags = 0;
		_mds_repl_bmap_walk(b, tract, NULL, 0, iosidx, nios,
		    slm_repl_addrq_cb, &flags);

		/* both default to -1 in parse_replrq() */
		bmap_2_bmi(b)->bmi_sys_prio = sys_prio;
		bmap_2_bmi(b)->bmi_usr_prio = usr_prio;
		if (flags & FLAG_DIRTY)
			mds_bmap_write_logrepls(b);
		else if (sys_prio != -1 || usr_prio != -1)
			slm_repl_upd_write(b, 0);

		bmap_op_done_type(b, BMAP_OPCNT_LOOKUP);
		if (flags & FLAG_REPLICA_STATE_INVALID) {
			/* See pfl_register_errno() */
			rc = -SLERR_REPLICA_STATE_INVALID;
			break;
		}
	}

 out:
	if (f)
		fcmh_op_done(f);
	*nbmaps = nbmaps_processed;
	return (rc);
}
Exemple #4
0
void
slm_repl_upd_write(struct bmap *b, int rel)
{
	struct {
		sl_replica_t	 iosv[SL_MAX_REPLICAS];
		char		*stat[SL_MAX_REPLICAS];
		unsigned	 nios;
	} add, del, chg;

	int off, vold, vnew, sprio, uprio, rc;
	struct sl_mds_iosinfo *si;
	struct bmap_mds_info *bmi;
	struct fidc_membh *f;
	struct sl_resource *r;
	sl_ios_id_t resid;
	unsigned n, nrepls;

	bmi = bmap_2_bmi(b);
	f = b->bcm_fcmh;
	sprio = bmi->bmi_sys_prio;
	uprio = bmi->bmi_usr_prio;

	add.nios = 0;
	del.nios = 0;
	chg.nios = 0;
	nrepls = fcmh_2_nrepls(f);
	for (n = 0, off = 0; n < nrepls; n++, off += SL_BITS_PER_REPLICA) {

		if (n == SL_DEF_REPLICAS)
			mds_inox_ensure_loaded(fcmh_2_inoh(f));

		resid = fcmh_2_repl(f, n);
		vold = SL_REPL_GET_BMAP_IOS_STAT(bmi->bmi_orepls, off);
		vnew = SL_REPL_GET_BMAP_IOS_STAT(bmi->bmi_repls, off);

		r = libsl_id2res(resid);
		si = r ? res2iosinfo(r) : &slm_null_iosinfo;

		if (vold == vnew)
			;

		/* Work was added. */
		else if ((vold != BREPLST_REPL_SCHED &&
		    vold != BREPLST_GARBAGE_QUEUED &&
		    vold != BREPLST_GARBAGE_SCHED &&
		    vnew == BREPLST_REPL_QUEUED) ||
		    (vold != BREPLST_GARBAGE_SCHED &&
		     vnew == BREPLST_GARBAGE_QUEUED &&
		     (si->si_flags & SIF_PRECLAIM_NOTSUP) == 0)) {
			OPSTAT_INCR("repl-work-add");
			PUSH_IOS(b, &add, resid, NULL);
		}

		/* Work has finished. */
		else if ((vold == BREPLST_REPL_QUEUED ||
		     vold == BREPLST_REPL_SCHED ||
		     vold == BREPLST_TRUNC_SCHED ||
		     vold == BREPLST_TRUNC_QUEUED ||
		     vold == BREPLST_GARBAGE_SCHED ||
		     vold == BREPLST_VALID) &&
		    (((si->si_flags & SIF_PRECLAIM_NOTSUP) &&
		      vnew == BREPLST_GARBAGE_QUEUED) ||
		     vnew == BREPLST_VALID ||
		     vnew == BREPLST_INVALID)) {
			OPSTAT_INCR("repl-work-del");
			PUSH_IOS(b, &del, resid, NULL);
		}

		/*
		 * Work that was previously scheduled failed so 
		 * requeue it.
		 */
		else if (vold == BREPLST_REPL_SCHED ||
		    vold == BREPLST_GARBAGE_SCHED ||
		    vold == BREPLST_TRUNC_SCHED)
			PUSH_IOS(b, &chg, resid, "Q");

		/* Work was scheduled. */
		else if (vnew == BREPLST_REPL_SCHED ||
		    vnew == BREPLST_GARBAGE_SCHED ||
		    vnew == BREPLST_TRUNC_SCHED)
			PUSH_IOS(b, &chg, resid, "S");

		/* Work was reprioritized. */
		else if (sprio != -1 || uprio != -1)
			PUSH_IOS(b, &chg, resid, NULL);
	}

	for (n = 0; n < add.nios; n++) {
		rc = slm_upsch_insert(b, add.iosv[n].bs_id, sprio,
		    uprio);
		if (!rc)
			continue;
		psclog_warnx("upsch insert failed: bno = %d, "
		    "fid=%"PRId64", ios= %d, rc = %d",
		    b->bcm_bmapno, bmap_2_fid(b), 
		    add.iosv[n].bs_id, rc);
	}

	for (n = 0; n < del.nios; n++) {
		spinlock(&slm_upsch_lock);
		dbdo(NULL, NULL,
		    " DELETE FROM upsch"
		    " WHERE	resid = ?"
		    "   AND	fid = ?"
		    "   AND	bno = ?",
		    SQLITE_INTEGER, del.iosv[n].bs_id,
		    SQLITE_INTEGER64, bmap_2_fid(b),
		    SQLITE_INTEGER, b->bcm_bmapno);
		freelock(&slm_upsch_lock);
	}

	for (n = 0; n < chg.nios; n++) {
		spinlock(&slm_upsch_lock);
		dbdo(NULL, NULL,
		    " UPDATE	upsch"
		    " SET	status = IFNULL(?, status),"
		    "		sys_prio = IFNULL(?, sys_prio),"
		    "		usr_prio = IFNULL(?, usr_prio)"
		    " WHERE	resid = ?"
		    "	AND	fid = ?"
		    "	AND	bno = ?",
		    chg.stat[n] ? SQLITE_TEXT : SQLITE_NULL,
		    chg.stat[n] ? chg.stat[n] : 0,
		    sprio == -1 ? SQLITE_NULL : SQLITE_INTEGER,
		    sprio == -1 ? 0 : sprio,
		    uprio == -1 ? SQLITE_NULL : SQLITE_INTEGER,
		    uprio == -1 ? 0 : uprio,
		    SQLITE_INTEGER, chg.iosv[n].bs_id,
		    SQLITE_INTEGER64, bmap_2_fid(b),
		    SQLITE_INTEGER, b->bcm_bmapno);
		freelock(&slm_upsch_lock);
	}

	bmap_2_bmi(b)->bmi_sys_prio = -1;
	bmap_2_bmi(b)->bmi_usr_prio = -1;

	if (rel) {
		BMAP_LOCK(b);
		b->bcm_flags &= ~BMAPF_REPLMODWR;
		bmap_wake_locked(b);
		bmap_op_done_type(b, BMAP_OPCNT_WORK);
	}
}
Exemple #5
0
/*
 * For the given bmap, change the status of all its replicas marked
 * "valid" to "invalid" except for the replica specified.
 *
 * This is a high-level convenience call provided to easily update
 * status after an ION has received some new I/O, which would make all
 * other existing copies of the bmap on any other replicas old.
 * @b: the bmap.
 * @iosidx: the index of the only ION resource in the inode replica
 *	table that should be marked "valid".
 *
 * Note: All callers must journal log these bmap replica changes
 *	themselves.  In addition, they must log any changes to the inode
 *	_before_ the bmap changes.  Otherwise, we could end up actually
 *	having bmap replicas that are not recognized by the information
 *	stored in the inode during log replay.
 */
int
mds_repl_inv_except(struct bmap *b, int iosidx)
{
	int rc, logit = 0, tract[NBREPLST], retifset[NBREPLST];
	uint32_t policy;

	/* Ensure replica on active IOS is marked valid. */
	brepls_init(tract, -1);
	tract[BREPLST_INVALID] = BREPLST_VALID;
	tract[BREPLST_GARBAGE_SCHED] = BREPLST_VALID;
	tract[BREPLST_GARBAGE_QUEUED] = BREPLST_VALID;

	/*
	 * The old state for this bmap on the given IOS is
	 * either valid or invalid.
	 */
	brepls_init_idx(retifset);
	retifset[BREPLST_INVALID] = 0;
	retifset[BREPLST_VALID] = 0;

	/*
	 * XXX on full truncate, the metafile will exist, which means
	 * the bmap states will exist, which means a new IOS will be
	 * selected which will probably be GARBAGE after truncate
	 * happens a few times.
	 */
	rc = mds_repl_bmap_walk(b, tract, retifset, 0, &iosidx, 1);
	if (rc) {
		psclog_errorx("bcs_repls has active IOS marked in a "
		    "weird state while invalidating other replicas; "
		    "fid="SLPRI_FID" bmap=%d iosidx=%d state=%d",
		    fcmh_2_fid(b->bcm_fcmh), b->bcm_bmapno, iosidx, rc);
	}

	policy = bmap_2_replpol(b);

	/*
	 * Invalidate all other replicas.
	 * Note: if the status is SCHED here, don't do anything; once
	 * the replication status update comes from the ION, we will
	 * know he copied an old bmap and mark it OLD then.
	 */
	brepls_init(tract, -1);
	tract[BREPLST_VALID] = policy == BRPOL_PERSIST ?
	    BREPLST_REPL_QUEUED : BREPLST_GARBAGE_QUEUED;
	tract[BREPLST_REPL_SCHED] = BREPLST_REPL_QUEUED;

	brepls_init(retifset, 0);
	retifset[BREPLST_VALID] = 1;
	retifset[BREPLST_REPL_SCHED] = 1;

	if (_mds_repl_bmap_walk(b, tract, retifset, REPL_WALKF_MODOTH,
	    &iosidx, 1, NULL, NULL)) {
		logit = 1;
		BHGEN_INCREMENT(b);
	}
	if (logit)
		rc = mds_bmap_write_logrepls(b);
	else
		rc = mds_bmap_write(b, NULL, NULL);

	/*
	 * If this bmap is marked for persistent replication, the repl
	 * request must exist and should be marked such that the
	 * replication monitors do not release it in the midst of
	 * processing it as this activity now means they have more to
	 * do.
	 */
	if (policy == BRPOL_PERSIST)
		upsch_enqueue(&bmap_2_bmi(b)->bmi_upd);
	return (rc);
}
Exemple #6
0
/*
 * Apply a translation matrix of residency states to a bmap.
 * @b: bmap.
 * @tract: translation actions, indexed by current bmap state with
 *	corresponding values to the new state that should be assigned.
 *	For example, index BREPLST_VALID in the array with the value
 *	BREPLST_INVALID would render a VALID state to an INVALID.
 * @retifset: return value, indexed in the same manner as @tract.
 * @flags: behavioral flags.
 * @off: offset int bmap residency table for IOS intended to be
 *	changed/queried.
 * @scircuit: value-result for batch operations.
 * @cbf: callback routine for more detailed processing.
 * @cbarg: argument to callback.
 *
 */
int
_mds_repl_bmap_apply(struct bmap *b, const int *tract,
    const int *retifset, int flags, int off, int *scircuit,
    brepl_walkcb_t cbf, void *cbarg)
{
	int val, rc = 0;
	struct timeval tv1, tv2, tvd;
	struct bmap_mds_info *bmi = bmap_2_bmi(b);

	BMAP_LOCK_ENSURE(b);
	if (tract) {
		/*
		 * The caller must set the flag if modifications are made.
		 */
		PFL_GETTIMEVAL(&tv1);
		bmap_wait_locked(b, b->bcm_flags & BMAPF_REPLMODWR);
		PFL_GETTIMEVAL(&tv2);
		timersub(&tv2, &tv1, &tvd);
		OPSTAT_ADD("bmap-wait-usecs", tvd.tv_sec * 1000000 + tvd.tv_usec);

		memcpy(bmi->bmi_orepls, bmi->bmi_repls,
		    sizeof(bmi->bmi_orepls));
		psc_assert((flags & REPL_WALKF_SCIRCUIT) == 0);
	}

	if (scircuit)
		*scircuit = 0;
	else
		psc_assert((flags & REPL_WALKF_SCIRCUIT) == 0);

	/* retrieve IOS status given a bit offset into the map */
	val = SL_REPL_GET_BMAP_IOS_STAT(bmi->bmi_repls, off);

	if (val >= NBREPLST)
		psc_fatalx("corrupt bmap, val = %d, bno = %d, fid="SLPRI_FID,
			 val, b->bcm_bmapno, fcmh_2_fid(b->bcm_fcmh));

	/* callback can also be used to track if we did make any changes */
	if (cbf)
		cbf(b, off / SL_BITS_PER_REPLICA, val, cbarg);

	/* check for & apply return values */
	if (retifset && retifset[val]) {
		rc = retifset[val];
		if (flags & REPL_WALKF_SCIRCUIT) {
			*scircuit = 1;
			goto out;
		}
	}

	/* apply any translations - this must be done after retifset */
	if (tract && tract[val] != -1) {
		DEBUG_BMAPOD(PLL_DEBUG, b, "before modification");
		SL_REPL_SET_BMAP_IOS_STAT(bmi->bmi_repls, off,
		    tract[val]);
		DEBUG_BMAPOD(PLL_DEBUG, b, "after modification");
	}

 out:
	return (rc);
}