Пример #1
0
/**
 * _slm_fcmh_endow - "Endow" or apply inheritance to a new directory
 *	entry from its parent directory replica layout.
 * Note: the bulk of this is empty until we have a place to store such
 * info in the SLASH2 metafile.
 */
int
_slm_fcmh_endow(int vfsid, struct fidc_membh *p, struct fidc_membh *c,
    int wr)
{
	sl_replica_t repls[SL_MAX_REPLICAS];
	int nr, rc = 0;
	uint32_t pol;

	FCMH_LOCK(p);
	pol = fcmh_2_ino(p)->ino_replpol;
	nr = fcmh_2_nrepls(p);
	memcpy(repls, fcmh_2_ino(p)->ino_repls, sizeof(repls[0]) *
	    SL_DEF_REPLICAS);
	if (nr > SL_DEF_REPLICAS) {
		mds_inox_ensure_loaded(fcmh_2_inoh(p));
		memcpy(&repls[SL_DEF_REPLICAS],
		    fcmh_2_inox(p)->inox_repls, sizeof(repls[0]) *
		    SL_INOX_NREPLICAS);
	}
	FCMH_ULOCK(p);

	FCMH_WAIT_BUSY(c);
	fcmh_2_replpol(c) = pol;
	fcmh_2_ino(c)->ino_nrepls = nr;
	memcpy(fcmh_2_ino(c)->ino_repls, repls, sizeof(repls[0]) *
	    SL_DEF_REPLICAS);
	if (nr > SL_DEF_REPLICAS) {
		mds_inox_ensure_loaded(fcmh_2_inoh(c));
		memcpy(fcmh_2_inox(c)->inox_repls,
		    &repls[SL_DEF_REPLICAS], sizeof(repls[0]) *
		    SL_INOX_NREPLICAS);
	}
	if (wr)
		mds_inodes_odsync(vfsid, c, mdslog_ino_repls);
	FCMH_UNBUSY(c);
	return (rc);
}
Пример #2
0
void
slm_pack_inode(struct fidc_membh *f, struct srt_inode *in)
{
	struct slash_inode_handle *ih;
	int rc;

	ih = fcmh_2_inoh(f);
	in->newreplpol = ih->inoh_ino.ino_replpol;
	in->nrepls = ih->inoh_ino.ino_nrepls;
	memcpy(in->reptbl, &ih->inoh_ino.ino_repls,
	    sizeof(ih->inoh_ino.ino_repls));

	if (in->nrepls > SL_DEF_REPLICAS) {
		rc = mds_inox_ensure_loaded(ih);
		if (!rc)
			memcpy(&in->reptbl[SL_DEF_REPLICAS],
			    &ih->inoh_extras->inox_repls,
			    sizeof(ih->inoh_extras->inox_repls));
	}
}
Пример #3
0
void
slm_repl_upd_write(struct bmap *b, int rel)
{
	struct {
		sl_replica_t	 iosv[SL_MAX_REPLICAS];
		char		*stat[SL_MAX_REPLICAS];
		unsigned	 nios;
	} add, del, chg;

	int off, vold, vnew, sprio, uprio, rc;
	struct sl_mds_iosinfo *si;
	struct bmap_mds_info *bmi;
	struct fidc_membh *f;
	struct sl_resource *r;
	sl_ios_id_t resid;
	unsigned n, nrepls;

	bmi = bmap_2_bmi(b);
	f = b->bcm_fcmh;
	sprio = bmi->bmi_sys_prio;
	uprio = bmi->bmi_usr_prio;

	add.nios = 0;
	del.nios = 0;
	chg.nios = 0;
	nrepls = fcmh_2_nrepls(f);
	for (n = 0, off = 0; n < nrepls; n++, off += SL_BITS_PER_REPLICA) {

		if (n == SL_DEF_REPLICAS)
			mds_inox_ensure_loaded(fcmh_2_inoh(f));

		resid = fcmh_2_repl(f, n);
		vold = SL_REPL_GET_BMAP_IOS_STAT(bmi->bmi_orepls, off);
		vnew = SL_REPL_GET_BMAP_IOS_STAT(bmi->bmi_repls, off);

		r = libsl_id2res(resid);
		si = r ? res2iosinfo(r) : &slm_null_iosinfo;

		if (vold == vnew)
			;

		/* Work was added. */
		else if ((vold != BREPLST_REPL_SCHED &&
		    vold != BREPLST_GARBAGE_QUEUED &&
		    vold != BREPLST_GARBAGE_SCHED &&
		    vnew == BREPLST_REPL_QUEUED) ||
		    (vold != BREPLST_GARBAGE_SCHED &&
		     vnew == BREPLST_GARBAGE_QUEUED &&
		     (si->si_flags & SIF_PRECLAIM_NOTSUP) == 0)) {
			OPSTAT_INCR("repl-work-add");
			PUSH_IOS(b, &add, resid, NULL);
		}

		/* Work has finished. */
		else if ((vold == BREPLST_REPL_QUEUED ||
		     vold == BREPLST_REPL_SCHED ||
		     vold == BREPLST_TRUNC_SCHED ||
		     vold == BREPLST_TRUNC_QUEUED ||
		     vold == BREPLST_GARBAGE_SCHED ||
		     vold == BREPLST_VALID) &&
		    (((si->si_flags & SIF_PRECLAIM_NOTSUP) &&
		      vnew == BREPLST_GARBAGE_QUEUED) ||
		     vnew == BREPLST_VALID ||
		     vnew == BREPLST_INVALID)) {
			OPSTAT_INCR("repl-work-del");
			PUSH_IOS(b, &del, resid, NULL);
		}

		/*
		 * Work that was previously scheduled failed so 
		 * requeue it.
		 */
		else if (vold == BREPLST_REPL_SCHED ||
		    vold == BREPLST_GARBAGE_SCHED ||
		    vold == BREPLST_TRUNC_SCHED)
			PUSH_IOS(b, &chg, resid, "Q");

		/* Work was scheduled. */
		else if (vnew == BREPLST_REPL_SCHED ||
		    vnew == BREPLST_GARBAGE_SCHED ||
		    vnew == BREPLST_TRUNC_SCHED)
			PUSH_IOS(b, &chg, resid, "S");

		/* Work was reprioritized. */
		else if (sprio != -1 || uprio != -1)
			PUSH_IOS(b, &chg, resid, NULL);
	}

	for (n = 0; n < add.nios; n++) {
		rc = slm_upsch_insert(b, add.iosv[n].bs_id, sprio,
		    uprio);
		if (!rc)
			continue;
		psclog_warnx("upsch insert failed: bno = %d, "
		    "fid=%"PRId64", ios= %d, rc = %d",
		    b->bcm_bmapno, bmap_2_fid(b), 
		    add.iosv[n].bs_id, rc);
	}

	for (n = 0; n < del.nios; n++) {
		spinlock(&slm_upsch_lock);
		dbdo(NULL, NULL,
		    " DELETE FROM upsch"
		    " WHERE	resid = ?"
		    "   AND	fid = ?"
		    "   AND	bno = ?",
		    SQLITE_INTEGER, del.iosv[n].bs_id,
		    SQLITE_INTEGER64, bmap_2_fid(b),
		    SQLITE_INTEGER, b->bcm_bmapno);
		freelock(&slm_upsch_lock);
	}

	for (n = 0; n < chg.nios; n++) {
		spinlock(&slm_upsch_lock);
		dbdo(NULL, NULL,
		    " UPDATE	upsch"
		    " SET	status = IFNULL(?, status),"
		    "		sys_prio = IFNULL(?, sys_prio),"
		    "		usr_prio = IFNULL(?, usr_prio)"
		    " WHERE	resid = ?"
		    "	AND	fid = ?"
		    "	AND	bno = ?",
		    chg.stat[n] ? SQLITE_TEXT : SQLITE_NULL,
		    chg.stat[n] ? chg.stat[n] : 0,
		    sprio == -1 ? SQLITE_NULL : SQLITE_INTEGER,
		    sprio == -1 ? 0 : sprio,
		    uprio == -1 ? SQLITE_NULL : SQLITE_INTEGER,
		    uprio == -1 ? 0 : uprio,
		    SQLITE_INTEGER, chg.iosv[n].bs_id,
		    SQLITE_INTEGER64, bmap_2_fid(b),
		    SQLITE_INTEGER, b->bcm_bmapno);
		freelock(&slm_upsch_lock);
	}

	bmap_2_bmi(b)->bmi_sys_prio = -1;
	bmap_2_bmi(b)->bmi_usr_prio = -1;

	if (rel) {
		BMAP_LOCK(b);
		b->bcm_flags &= ~BMAPF_REPLMODWR;
		bmap_wake_locked(b);
		bmap_op_done_type(b, BMAP_OPCNT_WORK);
	}
}
Пример #4
0
/*
 * Return the index of the given IOS ID or a negative error code on failure.
 */
int
_mds_repl_ios_lookup(int vfsid, struct slash_inode_handle *ih,
    sl_ios_id_t ios, int flag)
{
	int locked, rc;
	struct slm_inox_od *ix = NULL;
	struct sl_resource *res;
	struct fidc_membh *f;
	sl_replica_t *repl;
	uint32_t i, j, nr;
	char buf[LINE_MAX];

	switch (flag) {
	    case IOSV_LOOKUPF_ADD:
		OPSTAT_INCR("replicate-add");
		break;
	    case IOSV_LOOKUPF_DEL:
		OPSTAT_INCR("replicate-del");
		break;
	    case IOSV_LOOKUPF_LOOKUP:
		OPSTAT_INCR("replicate-lookup");
		break;
	    default:
		psc_fatalx("Invalid IOS lookup flag %d", flag);
	}

	/*
 	 * Can I assume that IOS ID are non-zeros.  If so, I can use
 	 * zero to mark a free slot.  See sl_global_id_build().
 	 */
	f = inoh_2_fcmh(ih);
	nr = ih->inoh_ino.ino_nrepls;
	repl = ih->inoh_ino.ino_repls;
	locked = INOH_RLOCK(ih);

	psc_assert(nr <= SL_MAX_REPLICAS);
	if (nr == SL_MAX_REPLICAS && flag == IOSV_LOOKUPF_ADD) {
		DEBUG_INOH(PLL_WARN, ih, buf, "too many replicas");
		PFL_GOTOERR(out, rc = -ENOSPC);
	}

	res = libsl_id2res(ios);
	if (res == NULL || !RES_ISFS(res))
		PFL_GOTOERR(out, rc = -SLERR_RES_BADTYPE);

	/*
	 * 09/29/2016: Hit SLERR_SHORTIO in the function. Need more investigation.
	 */

	/*
 	 * Return ENOENT by default for IOSV_LOOKUPF_DEL & IOSV_LOOKUPF_LOOKUP.
 	 */
	rc = -ENOENT;

	/*
	 * Search the existing replicas to see if the given IOS is
	 * already there.
	 *
	 * The following code can step through zero IOS IDs just fine.
	 *
	 */
	for (i = 0, j = 0; i < nr; i++, j++) {
		if (i == SL_DEF_REPLICAS) {
			/*
			 * The first few replicas are in the inode
			 * itself, the rest are in the extra inode
			 * block.
			 */
			rc = mds_inox_ensure_loaded(ih);
			if (rc)
				goto out;
			ix = ih->inoh_extras;
			repl = ix->inox_repls;
			j = 0;
		}

		DEBUG_INOH(PLL_DEBUG, ih, buf, "is rep[%u](=%u) == %u ?",
		    j, repl[j].bs_id, ios);

		if (repl[j].bs_id == ios) {
			/*
 			 * Luckily, this code is only called by mds_repl_delrq() 
 			 * for directories.
 			 *
 			 * Make sure that the logic works for at least the following 
 			 * edge cases:
 			 *
 			 *    (1) There is only one item in the basic array.
 			 *    (2) There is only one item in the extra array.
 			 *    (3) The number of items is SL_DEF_REPLICAS.
 			 *    (4) The number of items is SL_MAX_REPLICAS.
 			 */
			if (flag == IOSV_LOOKUPF_DEL) {
				/*
				 * Compact the array if the IOS is not the last
				 * one. The last one will be either overwritten
				 * or zeroed.  Note that we might move extra 
				 * garbage at the end if the total number is less 
				 * than SL_DEF_REPLICAS.
				 */
				if (i < SL_DEF_REPLICAS - 1) {
					memmove(&repl[j], &repl[j + 1],
					    (SL_DEF_REPLICAS - j - 1) *
					    sizeof(*repl));
				}
				/*
				 * All items in the basic array, zero the last
				 * one and we are done.
				 */
				if (nr <= SL_DEF_REPLICAS) {
					repl[nr-1].bs_id = 0;
					goto syncit;
				}
				/*
				 * Now we know we have more than SL_DEF_REPLICAS
				 * items.  However, if we are in the basic array,
				 * we have not read the extra array yet. In this
				 * case, we should also move the first item from 
				 * the extra array to the last one in the basic 
				 * array (overwrite).
				 */
				if (i < SL_DEF_REPLICAS) {
					rc = mds_inox_ensure_loaded(ih);
					if (rc)
						goto out;
					ix = ih->inoh_extras;

					repl[SL_DEF_REPLICAS - 1].bs_id =
					    ix->inox_repls[0].bs_id;

					repl = ix->inox_repls;
					j = 0;
				}
				/*
				 * Compact the extra array unless the IOS is
				 * the last one, which will be zeroed.
				 */
				if (i < SL_MAX_REPLICAS - 1) {
					memmove(&repl[j], &repl[j + 1],
					    (SL_INOX_NREPLICAS - j - 1) * 
					    sizeof(*repl));
				}

				repl[nr-SL_DEF_REPLICAS-1].bs_id = 0;
 syncit:
				ih->inoh_ino.ino_nrepls = nr - 1;
				rc = mds_inodes_odsync(vfsid, f, mdslog_ino_repls);
				if (rc)
					goto out;
			}
			/* XXX EEXIST for IOSV_LOOKUPF_ADD? */
			rc = i; 
			goto out;
		}
	}

	/* It doesn't exist; add to inode replica table if requested. */
	if (flag == IOSV_LOOKUPF_ADD) {

		/* paranoid */
		psc_assert(i == nr);
		if (nr >= SL_DEF_REPLICAS) {
			/* be careful with the case of nr = SL_DEF_REPLICAS */
			rc = mds_inox_ensure_loaded(ih);
			if (rc)
				goto out;
			repl = ih->inoh_extras->inox_repls;
			j = i - SL_DEF_REPLICAS;

		} else {
			repl = ih->inoh_ino.ino_repls;
			j = i;
		}

		repl[j].bs_id = ios;

		DEBUG_INOH(PLL_DIAG, ih, buf, "add IOS(%u) at idx %d", ios, i);

		ih->inoh_ino.ino_nrepls = nr + 1;
		rc = mds_inodes_odsync(vfsid, f, mdslog_ino_repls);
		if (!rc)
			rc = i;
	}

 out:
	INOH_URLOCK(ih, locked);
	return (rc);
}
Пример #5
0
int
mds_inode_update_interrupted(int vfsid, struct slash_inode_handle *ih,
    int *rc)
{
	char fn[NAME_MAX + 1];
	struct srt_stat sstb;
	struct iovec iovs[2];
	uint64_t crc, od_crc;
	void *h = NULL, *th;
	mdsio_fid_t inum;
	int exists = 0;
	size_t nb;

	th = inoh_2_mfh(ih);

	snprintf(fn, sizeof(fn), "%016"PRIx64".update",
	    inoh_2_fid(ih));

	*rc = mdsio_lookup(vfsid, mds_tmpdir_inum[vfsid], fn, &inum,
	    &rootcreds, NULL);
	if (*rc)
		PFL_GOTOERR(out, *rc);

	*rc = mdsio_opencreatef(vfsid, inum, &rootcreds, O_RDONLY,
	    MDSIO_OPENCRF_NOLINK, 0644, NULL, NULL, NULL, &h, NULL,
	    NULL, 0);
	if (*rc)
		PFL_GOTOERR(out, *rc);

	iovs[0].iov_base = &ih->inoh_ino;
	iovs[0].iov_len = sizeof(ih->inoh_ino);
	iovs[1].iov_base = &od_crc;
	iovs[1].iov_len = sizeof(od_crc);
	*rc = mdsio_preadv(vfsid, &rootcreds, iovs, nitems(iovs), &nb, 0, h);
	if (*rc)
		PFL_GOTOERR(out, *rc);

	psc_crc64_calc(&crc, &ih->inoh_ino, sizeof(ih->inoh_ino));
	if (crc != od_crc) {
		*rc = PFLERR_BADCRC;
		PFL_GOTOERR(out, *rc);
	}

	exists = 1;

	psc_assert(ih->inoh_extras == NULL);
	ih->inoh_extras = PSCALLOC(INOX_SZ);

	inoh_2_mfh(ih) = h;
	*rc = mds_inox_ensure_loaded(ih);
	if (*rc)
		PFL_GOTOERR(out, *rc);

	inoh_2_mfh(ih) = th;

	memset(&sstb, 0, sizeof(sstb));
	*rc = mdsio_setattr(vfsid, 0, &sstb, SL_SETATTRF_METASIZE,
	    &rootcreds, NULL, th, NULL);
	if (*rc)
		PFL_GOTOERR(out, *rc);

	*rc = mds_inode_dump(vfsid, NULL, ih, h);
	if (*rc)
		PFL_GOTOERR(out, *rc);

	mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn,
	    &rootcreds, NULL, NULL);

 out:
	if (h)
		mdsio_release(vfsid, &rootcreds, h);
	if (*rc)
		mdsio_unlink(vfsid, mds_tmpdir_inum[vfsid], NULL, fn,
		    &rootcreds, NULL, NULL);
	inoh_2_mfh(ih) = th;
	return (exists);
}