Ejemplo n.º 1
0
static int
sli_open_backing_file(struct fidc_membh *f)
{
	int lvl = PLL_DIAG, incr, rc = 0;
	char fidfn[PATH_MAX];

	/*
 	 * XXX hit setrlimit: operation not permitted, but no open-fail.
 	 * This per open system call should go!  Hit again due to fs.nr_open
 	 * sysctl limit.
 	 */
	incr = psc_rlim_adj(RLIMIT_NOFILE, 1);
	sli_fg_makepath(&f->fcmh_fg, fidfn);
	fcmh_2_fd(f) = open(fidfn, O_CREAT|O_RDWR, 0600);
	if (fcmh_2_fd(f) == -1) {
		rc = errno;
		if (incr)
			psc_rlim_adj(RLIMIT_NOFILE, -1);
		OPSTAT_INCR("open-fail");
		lvl = PLL_WARN;
	} else
		OPSTAT_INCR("open-succeed");
	psclog(lvl, "opened backing file path=%s fd=%d rc=%d",
	    strstr(fidfn, SL_RPATH_FIDNS_DIR), fcmh_2_fd(f), rc);
	return (rc);
}
Ejemplo n.º 2
0
void
sli_fcmh_dtor(__unusedx struct fidc_membh *f)
{
	if (f->fcmh_flags & FCMH_IOD_BACKFILE) {
		if (close(fcmh_2_fd(f)) == -1) {
			OPSTAT_INCR("close-fail");
			DEBUG_FCMH(PLL_ERROR, f,
			    "dtor/close errno=%d", errno);
		} else
			OPSTAT_INCR("close-succeed");
		psc_rlim_adj(RLIMIT_NOFILE, -1);
		f->fcmh_flags &= ~FCMH_IOD_BACKFILE;
	}
}
Ejemplo n.º 3
0
int
bcr_update_inodeinfo(struct bcrcupd *bcr)
{
	struct fidc_membh *f;
	struct stat stb;
	struct bmap *b;

	b = bcr_2_bmap(bcr);
	f = b->bcm_fcmh;

	if (bcr->bcr_crcup.fg.fg_fid == FID_ANY)
		return (EINVAL);

	psc_assert(bcr->bcr_crcup.fg.fg_fid == f->fcmh_fg.fg_fid);

	if (bcr->bcr_crcup.fg.fg_gen != f->fcmh_fg.fg_gen) {
		OPSTAT_INCR("brcupdate-stale");
		return (ESTALE);
	}

	if ((f->fcmh_flags & FCMH_IOD_BACKFILE) == 0)
		return (EBADF);

	if (fstat(fcmh_2_fd(f), &stb) == -1)
		return (errno);

	/* Used by mds_bmap_crc_update() */
	bcr->bcr_crcup.fsize = stb.st_size;
	bcr->bcr_crcup.nblks = stb.st_blocks;
	bcr->bcr_crcup.utimgen = f->fcmh_sstb.sst_utimgen;

	return (0);
}
Ejemplo n.º 4
0
uint64_t
mds_bmap_timeotbl_getnextseq(void)
{
	int locked;
	uint64_t hwm;

	locked = reqlock(&mdsBmapTimeoTbl.btt_lock);

	/*
 	 * Skip zero sequence number because the client does not
 	 * like it. More work is needed when an IOS decides if
 	 * a smaller sequence number is actually ahead of a larger 
 	 * one after a wrap around happens.
 	 */
	mdsBmapTimeoTbl.btt_maxseq++;
	if (mdsBmapTimeoTbl.btt_maxseq == BMAPSEQ_ANY) {
		OPSTAT_INCR("seqno-wrap");
		mdsBmapTimeoTbl.btt_maxseq = 1;
	}

	hwm = mdsBmapTimeoTbl.btt_maxseq;
	mds_bmap_timeotbl_journal_seqno();

	ureqlock(&mdsBmapTimeoTbl.btt_lock, locked);

	return (hwm);
}
Ejemplo n.º 5
0
Archivo: odtable.c Proyecto: pscedu/pfl
size_t
pfl_odt_allocslot(struct pfl_odt *t)
{
	struct pfl_odt_hdr *h;
	size_t item; 

	h = t->odt_hdr;
	spinlock(&t->odt_lock);
	if (psc_vbitmap_next(t->odt_bitmap, &item) <= 0) {
		ODT_STAT_INCR(t, full);
		freelock(&t->odt_lock);
		return (-1);
	}
	if (item >= h->odth_nitems) {
		ODT_STAT_INCR(t, extend);
		OPSTAT_INCR("pfl.odtable-resize");
		/*
		 * psc_vbitmap_next() has enlarged the bitmap. Update
		 * the number of items accordingly and write to the
		 * disk.
		 */
		h->odth_nitems = psc_vbitmap_getsize(t->odt_bitmap);

		t->odt_ops.odtop_resize(t);	/* slm_odt_resize() */
		PFLOG_ODT(PLL_WARN, t,
		    "odtable now has %u items (used to be %zd)",
		    h->odth_nitems, item);
	}
	freelock(&t->odt_lock);
	return (item);
}
Ejemplo n.º 6
0
void
sli_fcmh_dtor(__unusedx struct fidc_membh *f)
{
	struct fcmh_iod_info *fii;

	if (f->fcmh_flags & FCMH_IOD_BACKFILE) {
		if (close(fcmh_2_fd(f)) == -1) {
			OPSTAT_INCR("close-fail");
			DEBUG_FCMH(PLL_ERROR, f,
			    "dtor/close errno=%d", errno);
		} else
			OPSTAT_INCR("close-succeed");
		psc_rlim_adj(RLIMIT_NOFILE, -1);
		f->fcmh_flags &= ~FCMH_IOD_BACKFILE;
	}
	if (f->fcmh_flags & FCMH_IOD_DIRTYFILE) {
		fii = fcmh_2_fii(f);
		lc_remove(&sli_fcmh_dirty, fii);
		f->fcmh_flags &= ~FCMH_IOD_DIRTYFILE;
	}
}
Ejemplo n.º 7
0
static int
sli_open_backing_file(struct fidc_membh *f)
{
	int lvl = PLL_DIAG, incr, rc = 0;
	char fidfn[PATH_MAX];

	incr = psc_rlim_adj(RLIMIT_NOFILE, 1);
	sli_fg_makepath(&f->fcmh_fg, fidfn);
	fcmh_2_fd(f) = open(fidfn, O_CREAT|O_RDWR, 0600);
	if (fcmh_2_fd(f) == -1) {
		rc = errno;
		if (incr)
			psc_rlim_adj(RLIMIT_NOFILE, -1);
		OPSTAT_INCR("open-fail");
		lvl = PLL_WARN;
	} else
		OPSTAT_INCR("open-succeed");
	psclog(lvl, "opened backing file path=%s fd=%d rc=%d",
	    strstr(fidfn, SL_RPATH_FIDNS_DIR), fcmh_2_fd(f), rc);
	return (rc);
}
Ejemplo n.º 8
0
int
slm_rmc_handle_getbmap(struct pscrpc_request *rq)
{
	const struct srm_leasebmap_req *mq;
	struct srm_leasebmap_rep *mp;
	struct fidc_membh *f;
	int rc = 0;

	SL_RSX_ALLOCREP(rq, mq, mp);

	if (mq->rw == SL_WRITE)
		OPSTAT_INCR("getbmap-lease-write");
	else if (mq->rw == SL_READ)
		OPSTAT_INCR("getbmap-lease-read");
	else {
		mp->rc = -EINVAL;
		return (0);
	}

	mp->rc = -slm_fcmh_get(&mq->fg, &f);
	if (mp->rc)
		return (0);
	mp->flags = mq->flags;

	mp->rc = mds_bmap_load_cli(f, mq->bmapno, mq->flags, mq->rw,
	    mq->prefios[0], &mp->sbd, rq->rq_export, mp->repls, 0);
	if (mp->rc)
		PFL_GOTOERR(out, mp->rc);

	if (mp->flags & SRM_LEASEBMAPF_GETINODE)
		slm_pack_inode(f, &mp->ino);

 out:
	fcmh_op_done(f);
	return (rc ? rc : mp->rc);
}
Ejemplo n.º 9
0
/*
 * Get the specified bmap.
 * @f: fcmh.
 * @n: bmap number.
 * @rw: access mode.
 * @flags: retrieval parameters.
 * @bp: value-result bmap pointer.
 * Notes: returns the bmap referenced and locked.
 */
int
_bmap_get(const struct pfl_callerinfo *pci, struct fidc_membh *f,
    sl_bmapno_t n, enum rw rw, int flags, struct bmap **bp)
{
	int rc = 0, new_bmap, bmaprw = 0;
	struct bmap *b;

	if (bp)
		*bp = NULL;

	if (rw)
		bmaprw = rw == SL_WRITE ? BMAPF_WR : BMAPF_RD;

	new_bmap = flags & BMAPGETF_CREATE;
	b = bmap_lookup_cache(f, n, bmaprw, &new_bmap);
	if (b == NULL) {
		rc = ENOENT;
		goto out;
	}
	if (flags & BMAPGETF_NONBLOCK) {
		if (b->bcm_flags & BMAPF_LOADING)
			goto out;
	} else
		bmap_wait_locked(b, b->bcm_flags & BMAPF_LOADING);

	if (b->bcm_flags & BMAPF_LOADED)
		goto loaded;

	if (flags & BMAPGETF_NORETRIEVE) {
		if (b->bcm_flags & BMAPF_LOADED)
			OPSTAT_INCR("bmap-already-loaded");
		else
			OPSTAT_INCR("bmap-not-yet-loaded");
		goto out;
	}

	b->bcm_flags |= BMAPF_LOADING;
	DEBUG_BMAP(PLL_DIAG, b, "loading bmap; flags=%d", flags);
	BMAP_ULOCK(b);

	/* msl_bmap_retrieve(), iod_bmap_retrieve(), mds_bmap_read() */
	rc = sl_bmap_ops.bmo_retrievef(b, flags);

	BMAP_LOCK(b);

	if (flags & BMAPGETF_NONBLOCK) {
		if (rc)
			b->bcm_flags &= ~BMAPF_LOADING;
		goto out;
	}
	b->bcm_flags &= ~BMAPF_LOADING;
	if (!rc) {
		b->bcm_flags |= BMAPF_LOADED;
		bmap_wake_locked(b);
	}

 loaded:

	/*
 	 * Early bail out should be safe.  There is only one place the client
 	 * will do a bmap lookup.  And it that code path, we just add DIO flag
 	 * to the bmap.  See msrcm_handle_bmapdio().
 	 */
	if (rc || !bmaprw)
		goto out;

	/*
	 * Others wishing to access this bmap in the same mode must wait
	 * until MODECHNG ops have completed.  If the desired mode is
	 * present then a thread may proceed without blocking here so
	 * long as it only accesses structures which pertain to its
	 * mode.
	 */
	if (flags & BMAPGETF_NONBLOCK) {
		if (b->bcm_flags & BMAPF_MODECHNG)
			goto out;
	} else
		bmap_wait_locked(b, b->bcm_flags & BMAPF_MODECHNG);

	/*
	 * Not all lookups are done with the intent of changing the bmap
	 * mode i.e. bmap_lookup() does not specify a rw value.
	 */
	if (!(bmaprw & b->bcm_flags) && sl_bmap_ops.bmo_mode_chngf) {

		psc_assert(!(b->bcm_flags & BMAPF_MODECHNG));
		b->bcm_flags |= BMAPF_MODECHNG;

		DEBUG_BMAP(PLL_DIAG, b, "mode change (rw=%d)", rw);

		BMAP_ULOCK(b);

		psc_assert(rw == SL_WRITE || rw == SL_READ);

	 	/* client only: call msl_bmap_modeset() */
		rc = sl_bmap_ops.bmo_mode_chngf(b, rw, flags);
		BMAP_LOCK(b);
	}

 out:
	if (b) {
		DEBUG_BMAP(rc && (rc != SLERR_BMAP_INVALID ||
		    (flags & BMAPGETF_NOAUTOINST) == 0) ?
		    PLL_ERROR : PLL_DIAG, b, "grabbed rc=%d", rc);
		if (rc)
			bmap_op_done(b);
		else
			*bp = b;
	}
	return (rc);
}
Ejemplo n.º 10
0
/*
 * Lookup and optionally create a new bmap structure.
 * @f: file's bmap tree to search.
 * @n: bmap index number to search for.
 * @new_bmap: whether to allow creation and also value-result of whether
 * it was newly created or not.
 */
struct bmap *
bmap_lookup_cache(struct fidc_membh *f, sl_bmapno_t n, int bmaprw,
    int *new_bmap)
{
	struct bmap lb, *b, *bnew = NULL;
	int doalloc;

	doalloc = *new_bmap;
	lb.bcm_bmapno = n;

 restart:
	if (bnew)
		pfl_rwlock_wrlock(&f->fcmh_rwlock);
	else
		pfl_rwlock_rdlock(&f->fcmh_rwlock);
	b = RB_FIND(bmaptree, &f->fcmh_bmaptree, &lb);
	if (b) {
		if (!BMAP_TRYLOCK(b)) {
			pfl_rwlock_unlock(&f->fcmh_rwlock);
			usleep(10);
			goto restart;
		}

		if (b->bcm_flags & BMAPF_TOFREE) {
			/*
			 * This bmap is going away; wait for it so we
			 * can reload it back.
			 */
			DEBUG_BMAP(PLL_DIAG, b, "wait on to-free bmap");
			BMAP_ULOCK(b);
			/*
			 * We don't want to spin if we are waiting for a
			 * flush to clear.
			 */
			psc_waitq_waitrelf_us(&f->fcmh_waitq,
			    PFL_LOCKPRIMT_RWLOCK, &f->fcmh_rwlock, 100);
			goto restart;
		}
		bmap_op_start_type(b, BMAP_OPCNT_LOOKUP);
	}
	if (doalloc == 0 || b) {
		pfl_rwlock_unlock(&f->fcmh_rwlock);
		if (bnew)
			psc_pool_return(bmap_pool, bnew);
		*new_bmap = 0;
		OPSTAT_INCR("bmapcache.hit");
		return (b);
	}
	if (bnew == NULL) {
		pfl_rwlock_unlock(&f->fcmh_rwlock);

		if (sl_bmap_ops.bmo_reapf)
			sl_bmap_ops.bmo_reapf();

		bnew = psc_pool_get(bmap_pool);
		goto restart;
	}
	b = bnew;

	OPSTAT_INCR("bmapcache.miss");

	*new_bmap = 1;
	memset(b, 0, bmap_pool->ppm_master->pms_entsize);
	INIT_PSC_LISTENTRY(&b->bcm_lentry);
	INIT_SPINLOCK(&b->bcm_lock);

	psc_atomic32_set(&b->bcm_opcnt, 0);
	b->bcm_fcmh = f;
	b->bcm_bmapno = n;

	/*
	 * Signify that the bmap is newly initialized and therefore may
	 * not contain certain structures.
	 */
	psc_assert(bmaprw == BMAPF_RD || bmaprw == BMAPF_WR);
	b->bcm_flags = bmaprw;

	bmap_op_start_type(b, BMAP_OPCNT_LOOKUP);

	/*
	 * Perform app-specific substructure initialization, which is
	 * msl_bmap_init(), iod_bmap_init(), or mds_bmap_init().
	 */
	sl_bmap_ops.bmo_init_privatef(b);

	/* Add to the fcmh's bmap cache */
	PSC_RB_XINSERT(bmaptree, &f->fcmh_bmaptree, b);

	pfl_rwlock_unlock(&f->fcmh_rwlock);

	fcmh_op_start_type(f, FCMH_OPCNT_BMAP);

	BMAP_LOCK(b);

	return (b);
}
Ejemplo n.º 11
0
/*
 * Update the high-level app stat(2)-like attribute buffer for a FID
 * cache member.
 * @f: FID cache member to update.
 * @sstb: incoming stat attributes.
 * @flags: behavioral flags.
 * Notes:
 *     (1) if SAVELOCAL has been specified, save local field values:
 *		(o) file size
 *		(o) mtime
 *     (2) This function should only be used by a client.
 */
void
slc_fcmh_setattrf(struct fidc_membh *f, struct srt_stat *sstb,
    int flags)
{
	uidmap_int_stat(sstb);

	if (flags & FCMH_SETATTRF_HAVELOCK)
		FCMH_LOCK_ENSURE(f);
	else
		FCMH_LOCK(f);

	if (fcmh_2_gen(f) == FGEN_ANY)
		fcmh_2_gen(f) = sstb->sst_gen;

	if ((FID_GET_INUM(fcmh_2_fid(f))) != SLFID_ROOT &&
	    fcmh_2_gen(f) > sstb->sst_gen) {
		OPSTAT_INCR("msl.generation-backwards");
		DEBUG_FCMH(PLL_DIAG, f, "attempt to set attr with "
		    "gen %"PRIu64" from old gen %"PRIu64,
		    fcmh_2_gen(f), sstb->sst_gen);
		goto out;
	}

	/*
	 * If we don't have stat attributes, how can we save our local
	 * updates?
	 */
	if ((f->fcmh_flags & FCMH_HAVE_ATTRS) == 0)
		flags |= FCMH_SETATTRF_CLOBBER;

	/*
	 * Always update for roots because we might have faked them
	 * with readdir at the super root.
	 */
	if ((FID_GET_INUM(fcmh_2_fid(f))) == SLFID_ROOT)
		flags |= FCMH_SETATTRF_CLOBBER;

	psc_assert(sstb->sst_gen != FGEN_ANY);
	psc_assert(f->fcmh_fg.fg_fid == sstb->sst_fid);

	/*
	 * The default behavior is to save st_size and st_mtim since we
	 * might have done I/O that the MDS does not know about.
	 */
	if ((flags & FCMH_SETATTRF_CLOBBER) == 0 &&
	    fcmh_isreg(f)) {
		/*
		 * If generation numbers match, take the highest of the
		 * values.  Otherwise, disregard local values and
		 * blindly accept whatever the MDS tells us.
		 */
		if (fcmh_2_ptruncgen(f) == sstb->sst_ptruncgen &&
		    fcmh_2_gen(f) == sstb->sst_gen &&
		    fcmh_2_fsz(f) > sstb->sst_size)
			sstb->sst_size = fcmh_2_fsz(f);
		if (fcmh_2_utimgen(f) == sstb->sst_utimgen)
			sstb->sst_mtim = f->fcmh_sstb.sst_mtim;
	}

	COPY_SSTB(sstb, &f->fcmh_sstb);
	f->fcmh_flags |= FCMH_HAVE_ATTRS;
	f->fcmh_flags &= ~FCMH_GETTING_ATTRS;

	if (sl_fcmh_ops.sfop_postsetattr)
		sl_fcmh_ops.sfop_postsetattr(f);

	DEBUG_FCMH(PLL_DEBUG, f, "attr set");

 out:
	if (!(flags & FCMH_SETATTRF_HAVELOCK))
		FCMH_ULOCK(f);
}
Ejemplo n.º 12
0
/*
 * If the generation number changes, we assume a full truncation has
 * happened.  We need to open a new backing file and attach it to the
 * fcmh.
 */
int
sli_fcmh_reopen(struct fidc_membh *f, slfgen_t fgen)
{
	int rc = 0;

	FCMH_LOCK_ENSURE(f);

	OPSTAT_INCR("reopen");

	if (fgen == FGEN_ANY) {
		OPSTAT_INCR("generation-bogus");
		return (EBADF);
	}
	if (fgen < fcmh_2_gen(f)) {
		OPSTAT_INCR("generation-stale");
		return (ESTALE);
	}

	/*
	 * If our generation number is still unknown try to set it here.
	 */
	if (fcmh_2_gen(f) == FGEN_ANY && fgen != FGEN_ANY) {
		OPSTAT_INCR("generation-fix");
		fcmh_2_gen(f) = fgen;
	}

	if (fgen > fcmh_2_gen(f)) {
		struct sl_fidgen oldfg;
		char fidfn[PATH_MAX];

		DEBUG_FCMH(PLL_DIAG, f, "reopening new backing file");
		OPSTAT_INCR("slvr-remove-reopen");
		slvr_remove_all(f);

		/*
		 * It's possible the pruning of all slivers and bmaps
		 * ended up fcmh_op_done() our fcmh so ensure it is
		 * locked upon finishing.
		 */
		FCMH_RLOCK(f);

		/*
		 * Need to reopen the backing file and possibly remove
		 * the old one.
		 */
		if (f->fcmh_flags & FCMH_IOD_BACKFILE) {
			if (close(fcmh_2_fd(f)) == -1) {
				OPSTAT_INCR("close-fail");
				DEBUG_FCMH(PLL_ERROR, f,
				    "reopen/close errno=%d", errno);
			} else {
				OPSTAT_INCR("close-succeed");
			}
			fcmh_2_fd(f) = -1;
			psc_rlim_adj(RLIMIT_NOFILE, -1);
			f->fcmh_flags &= ~FCMH_IOD_BACKFILE;
		}

		oldfg.fg_fid = fcmh_2_fid(f);
		oldfg.fg_gen = fcmh_2_gen(f);

		fcmh_2_gen(f) = fgen;

		rc = sli_open_backing_file(f);
		/* Notify upper layers that open() has failed. */
		if (!rc)
			f->fcmh_flags |= FCMH_IOD_BACKFILE;

		/* Do some upfront garbage collection. */
		sli_fg_makepath(&oldfg, fidfn);

		errno = 0;
		unlink(fidfn);
		DEBUG_FCMH(PLL_INFO, f, "upfront unlink(), errno=%d",
		    errno);

	} else if (!(f->fcmh_flags & FCMH_IOD_BACKFILE)) {

		rc = sli_open_backing_file(f);
		if (!rc)
			f->fcmh_flags |= FCMH_IOD_BACKFILE;
		OPSTAT_INCR("generation-same");
	}
	return (rc);
}
Ejemplo n.º 13
0
void
slm_repl_upd_write(struct bmap *b, int rel)
{
	struct {
		sl_replica_t	 iosv[SL_MAX_REPLICAS];
		char		*stat[SL_MAX_REPLICAS];
		unsigned	 nios;
	} add, del, chg;

	int off, vold, vnew, sprio, uprio, rc;
	struct sl_mds_iosinfo *si;
	struct bmap_mds_info *bmi;
	struct fidc_membh *f;
	struct sl_resource *r;
	sl_ios_id_t resid;
	unsigned n, nrepls;

	bmi = bmap_2_bmi(b);
	f = b->bcm_fcmh;
	sprio = bmi->bmi_sys_prio;
	uprio = bmi->bmi_usr_prio;

	add.nios = 0;
	del.nios = 0;
	chg.nios = 0;
	nrepls = fcmh_2_nrepls(f);
	for (n = 0, off = 0; n < nrepls; n++, off += SL_BITS_PER_REPLICA) {

		if (n == SL_DEF_REPLICAS)
			mds_inox_ensure_loaded(fcmh_2_inoh(f));

		resid = fcmh_2_repl(f, n);
		vold = SL_REPL_GET_BMAP_IOS_STAT(bmi->bmi_orepls, off);
		vnew = SL_REPL_GET_BMAP_IOS_STAT(bmi->bmi_repls, off);

		r = libsl_id2res(resid);
		si = r ? res2iosinfo(r) : &slm_null_iosinfo;

		if (vold == vnew)
			;

		/* Work was added. */
		else if ((vold != BREPLST_REPL_SCHED &&
		    vold != BREPLST_GARBAGE_QUEUED &&
		    vold != BREPLST_GARBAGE_SCHED &&
		    vnew == BREPLST_REPL_QUEUED) ||
		    (vold != BREPLST_GARBAGE_SCHED &&
		     vnew == BREPLST_GARBAGE_QUEUED &&
		     (si->si_flags & SIF_PRECLAIM_NOTSUP) == 0)) {
			OPSTAT_INCR("repl-work-add");
			PUSH_IOS(b, &add, resid, NULL);
		}

		/* Work has finished. */
		else if ((vold == BREPLST_REPL_QUEUED ||
		     vold == BREPLST_REPL_SCHED ||
		     vold == BREPLST_TRUNC_SCHED ||
		     vold == BREPLST_TRUNC_QUEUED ||
		     vold == BREPLST_GARBAGE_SCHED ||
		     vold == BREPLST_VALID) &&
		    (((si->si_flags & SIF_PRECLAIM_NOTSUP) &&
		      vnew == BREPLST_GARBAGE_QUEUED) ||
		     vnew == BREPLST_VALID ||
		     vnew == BREPLST_INVALID)) {
			OPSTAT_INCR("repl-work-del");
			PUSH_IOS(b, &del, resid, NULL);
		}

		/*
		 * Work that was previously scheduled failed so 
		 * requeue it.
		 */
		else if (vold == BREPLST_REPL_SCHED ||
		    vold == BREPLST_GARBAGE_SCHED ||
		    vold == BREPLST_TRUNC_SCHED)
			PUSH_IOS(b, &chg, resid, "Q");

		/* Work was scheduled. */
		else if (vnew == BREPLST_REPL_SCHED ||
		    vnew == BREPLST_GARBAGE_SCHED ||
		    vnew == BREPLST_TRUNC_SCHED)
			PUSH_IOS(b, &chg, resid, "S");

		/* Work was reprioritized. */
		else if (sprio != -1 || uprio != -1)
			PUSH_IOS(b, &chg, resid, NULL);
	}

	for (n = 0; n < add.nios; n++) {
		rc = slm_upsch_insert(b, add.iosv[n].bs_id, sprio,
		    uprio);
		if (!rc)
			continue;
		psclog_warnx("upsch insert failed: bno = %d, "
		    "fid=%"PRId64", ios= %d, rc = %d",
		    b->bcm_bmapno, bmap_2_fid(b), 
		    add.iosv[n].bs_id, rc);
	}

	for (n = 0; n < del.nios; n++) {
		spinlock(&slm_upsch_lock);
		dbdo(NULL, NULL,
		    " DELETE FROM upsch"
		    " WHERE	resid = ?"
		    "   AND	fid = ?"
		    "   AND	bno = ?",
		    SQLITE_INTEGER, del.iosv[n].bs_id,
		    SQLITE_INTEGER64, bmap_2_fid(b),
		    SQLITE_INTEGER, b->bcm_bmapno);
		freelock(&slm_upsch_lock);
	}

	for (n = 0; n < chg.nios; n++) {
		spinlock(&slm_upsch_lock);
		dbdo(NULL, NULL,
		    " UPDATE	upsch"
		    " SET	status = IFNULL(?, status),"
		    "		sys_prio = IFNULL(?, sys_prio),"
		    "		usr_prio = IFNULL(?, usr_prio)"
		    " WHERE	resid = ?"
		    "	AND	fid = ?"
		    "	AND	bno = ?",
		    chg.stat[n] ? SQLITE_TEXT : SQLITE_NULL,
		    chg.stat[n] ? chg.stat[n] : 0,
		    sprio == -1 ? SQLITE_NULL : SQLITE_INTEGER,
		    sprio == -1 ? 0 : sprio,
		    uprio == -1 ? SQLITE_NULL : SQLITE_INTEGER,
		    uprio == -1 ? 0 : uprio,
		    SQLITE_INTEGER, chg.iosv[n].bs_id,
		    SQLITE_INTEGER64, bmap_2_fid(b),
		    SQLITE_INTEGER, b->bcm_bmapno);
		freelock(&slm_upsch_lock);
	}

	bmap_2_bmi(b)->bmi_sys_prio = -1;
	bmap_2_bmi(b)->bmi_usr_prio = -1;

	if (rel) {
		BMAP_LOCK(b);
		b->bcm_flags &= ~BMAPF_REPLMODWR;
		bmap_wake_locked(b);
		bmap_op_done_type(b, BMAP_OPCNT_WORK);
	}
}
Ejemplo n.º 14
0
/*
 * Return the index of the given IOS ID or a negative error code on failure.
 */
int
_mds_repl_ios_lookup(int vfsid, struct slash_inode_handle *ih,
    sl_ios_id_t ios, int flag)
{
	int locked, rc;
	struct slm_inox_od *ix = NULL;
	struct sl_resource *res;
	struct fidc_membh *f;
	sl_replica_t *repl;
	uint32_t i, j, nr;
	char buf[LINE_MAX];

	switch (flag) {
	    case IOSV_LOOKUPF_ADD:
		OPSTAT_INCR("replicate-add");
		break;
	    case IOSV_LOOKUPF_DEL:
		OPSTAT_INCR("replicate-del");
		break;
	    case IOSV_LOOKUPF_LOOKUP:
		OPSTAT_INCR("replicate-lookup");
		break;
	    default:
		psc_fatalx("Invalid IOS lookup flag %d", flag);
	}

	/*
 	 * Can I assume that IOS ID are non-zeros.  If so, I can use
 	 * zero to mark a free slot.  See sl_global_id_build().
 	 */
	f = inoh_2_fcmh(ih);
	nr = ih->inoh_ino.ino_nrepls;
	repl = ih->inoh_ino.ino_repls;
	locked = INOH_RLOCK(ih);

	psc_assert(nr <= SL_MAX_REPLICAS);
	if (nr == SL_MAX_REPLICAS && flag == IOSV_LOOKUPF_ADD) {
		DEBUG_INOH(PLL_WARN, ih, buf, "too many replicas");
		PFL_GOTOERR(out, rc = -ENOSPC);
	}

	res = libsl_id2res(ios);
	if (res == NULL || !RES_ISFS(res))
		PFL_GOTOERR(out, rc = -SLERR_RES_BADTYPE);

	/*
	 * 09/29/2016: Hit SLERR_SHORTIO in the function. Need more investigation.
	 */

	/*
 	 * Return ENOENT by default for IOSV_LOOKUPF_DEL & IOSV_LOOKUPF_LOOKUP.
 	 */
	rc = -ENOENT;

	/*
	 * Search the existing replicas to see if the given IOS is
	 * already there.
	 *
	 * The following code can step through zero IOS IDs just fine.
	 *
	 */
	for (i = 0, j = 0; i < nr; i++, j++) {
		if (i == SL_DEF_REPLICAS) {
			/*
			 * The first few replicas are in the inode
			 * itself, the rest are in the extra inode
			 * block.
			 */
			rc = mds_inox_ensure_loaded(ih);
			if (rc)
				goto out;
			ix = ih->inoh_extras;
			repl = ix->inox_repls;
			j = 0;
		}

		DEBUG_INOH(PLL_DEBUG, ih, buf, "is rep[%u](=%u) == %u ?",
		    j, repl[j].bs_id, ios);

		if (repl[j].bs_id == ios) {
			/*
 			 * Luckily, this code is only called by mds_repl_delrq() 
 			 * for directories.
 			 *
 			 * Make sure that the logic works for at least the following 
 			 * edge cases:
 			 *
 			 *    (1) There is only one item in the basic array.
 			 *    (2) There is only one item in the extra array.
 			 *    (3) The number of items is SL_DEF_REPLICAS.
 			 *    (4) The number of items is SL_MAX_REPLICAS.
 			 */
			if (flag == IOSV_LOOKUPF_DEL) {
				/*
				 * Compact the array if the IOS is not the last
				 * one. The last one will be either overwritten
				 * or zeroed.  Note that we might move extra 
				 * garbage at the end if the total number is less 
				 * than SL_DEF_REPLICAS.
				 */
				if (i < SL_DEF_REPLICAS - 1) {
					memmove(&repl[j], &repl[j + 1],
					    (SL_DEF_REPLICAS - j - 1) *
					    sizeof(*repl));
				}
				/*
				 * All items in the basic array, zero the last
				 * one and we are done.
				 */
				if (nr <= SL_DEF_REPLICAS) {
					repl[nr-1].bs_id = 0;
					goto syncit;
				}
				/*
				 * Now we know we have more than SL_DEF_REPLICAS
				 * items.  However, if we are in the basic array,
				 * we have not read the extra array yet. In this
				 * case, we should also move the first item from 
				 * the extra array to the last one in the basic 
				 * array (overwrite).
				 */
				if (i < SL_DEF_REPLICAS) {
					rc = mds_inox_ensure_loaded(ih);
					if (rc)
						goto out;
					ix = ih->inoh_extras;

					repl[SL_DEF_REPLICAS - 1].bs_id =
					    ix->inox_repls[0].bs_id;

					repl = ix->inox_repls;
					j = 0;
				}
				/*
				 * Compact the extra array unless the IOS is
				 * the last one, which will be zeroed.
				 */
				if (i < SL_MAX_REPLICAS - 1) {
					memmove(&repl[j], &repl[j + 1],
					    (SL_INOX_NREPLICAS - j - 1) * 
					    sizeof(*repl));
				}

				repl[nr-SL_DEF_REPLICAS-1].bs_id = 0;
 syncit:
				ih->inoh_ino.ino_nrepls = nr - 1;
				rc = mds_inodes_odsync(vfsid, f, mdslog_ino_repls);
				if (rc)
					goto out;
			}
			/* XXX EEXIST for IOSV_LOOKUPF_ADD? */
			rc = i; 
			goto out;
		}
	}

	/* It doesn't exist; add to inode replica table if requested. */
	if (flag == IOSV_LOOKUPF_ADD) {

		/* paranoid */
		psc_assert(i == nr);
		if (nr >= SL_DEF_REPLICAS) {
			/* be careful with the case of nr = SL_DEF_REPLICAS */
			rc = mds_inox_ensure_loaded(ih);
			if (rc)
				goto out;
			repl = ih->inoh_extras->inox_repls;
			j = i - SL_DEF_REPLICAS;

		} else {
			repl = ih->inoh_ino.ino_repls;
			j = i;
		}

		repl[j].bs_id = ios;

		DEBUG_INOH(PLL_DIAG, ih, buf, "add IOS(%u) at idx %d", ios, i);

		ih->inoh_ino.ino_nrepls = nr + 1;
		rc = mds_inodes_odsync(vfsid, f, mdslog_ino_repls);
		if (!rc)
			rc = i;
	}

 out:
	INOH_URLOCK(ih, locked);
	return (rc);
}