예제 #1
0
/** Add a CL_MARK record to the changelog
 * \param mdd
 * \param markerflags - CLM_*
 * \retval 0 ok
 */
int mdd_changelog_write_header(const struct lu_env *env,
			       struct mdd_device *mdd, int markerflags)
{
	struct obd_device		*obd = mdd2obd_dev(mdd);
	struct llog_changelog_rec	*rec;
	struct lu_buf			*buf;
	struct llog_ctxt		*ctxt;
	int				 reclen;
	int				 len = strlen(obd->obd_name);
	int				 rc;

	ENTRY;

	if (mdd->mdd_cl.mc_mask & (1 << CL_MARK)) {
		mdd->mdd_cl.mc_starttime = cfs_time_current_64();
		RETURN(0);
	}

	reclen = llog_data_len(sizeof(*rec) + len);
	buf = lu_buf_check_and_alloc(&mdd_env_info(env)->mti_big_buf, reclen);
	if (buf->lb_buf == NULL)
		RETURN(-ENOMEM);
	rec = buf->lb_buf;

        rec->cr.cr_flags = CLF_VERSION;
        rec->cr.cr_type = CL_MARK;
        rec->cr.cr_namelen = len;
	memcpy(changelog_rec_name(&rec->cr), obd->obd_name, rec->cr.cr_namelen);
        /* Status and action flags */
	rec->cr.cr_markerflags = mdd->mdd_cl.mc_flags | markerflags;
	rec->cr_hdr.lrh_len = llog_data_len(changelog_rec_size(&rec->cr) +
					    rec->cr.cr_namelen);
	rec->cr_hdr.lrh_type = CHANGELOG_REC;
	rec->cr.cr_time = cl_time();
	spin_lock(&mdd->mdd_cl.mc_lock);
	rec->cr.cr_index = ++mdd->mdd_cl.mc_index;
	spin_unlock(&mdd->mdd_cl.mc_lock);

	ctxt = llog_get_context(obd, LLOG_CHANGELOG_ORIG_CTXT);
	LASSERT(ctxt);

	rc = llog_cat_add(env, ctxt->loc_handle, &rec->cr_hdr, NULL);
	if (rc > 0)
		rc = 0;
	llog_ctxt_put(ctxt);

	/* assume on or off event; reset repeat-access time */
	mdd->mdd_cl.mc_starttime = cfs_time_current_64();
	RETURN(rc);
}
예제 #2
0
/** Remove entries with indicies up to and including \a endrec from the
 *  changelog
 * \param mdd
 * \param endrec
 * \retval 0 ok
 */
static int
mdd_changelog_llog_cancel(const struct lu_env *env, struct mdd_device *mdd,
			  long long endrec)
{
        struct obd_device *obd = mdd2obd_dev(mdd);
        struct llog_ctxt *ctxt;
        long long unsigned cur;
        int rc;

        ctxt = llog_get_context(obd, LLOG_CHANGELOG_ORIG_CTXT);
        if (ctxt == NULL)
                return -ENXIO;

	spin_lock(&mdd->mdd_cl.mc_lock);
	cur = (long long)mdd->mdd_cl.mc_index;
	spin_unlock(&mdd->mdd_cl.mc_lock);
        if (endrec > cur)
                endrec = cur;

        /* purge to "0" is shorthand for everything */
        if (endrec == 0)
                endrec = cur;

        /* If purging all records, write a header entry so we don't have an
           empty catalog and we're sure to have a valid starting index next
           time.  In case of crash, we just restart with old log so we're
           allright. */
        if (endrec == cur) {
                /* XXX: transaction is started by llog itself */
                rc = mdd_changelog_write_header(env, mdd, CLM_PURGE);
                if (rc)
                      goto out;
        }

        /* Some records were purged, so reset repeat-access time (so we
           record new mtime update records, so users can see a file has been
           changed since the last purge) */
        mdd->mdd_cl.mc_starttime = cfs_time_current_64();

	rc = llog_cancel(env, ctxt, (struct llog_cookie *)&endrec, 0);
out:
        llog_ctxt_put(ctxt);
        return rc;
}
예제 #3
0
static int mdd_changelog_init(const struct lu_env *env, struct mdd_device *mdd)
{
	struct obd_device	*obd = mdd2obd_dev(mdd);
	int			 rc;

	mdd->mdd_cl.mc_index = 0;
	spin_lock_init(&mdd->mdd_cl.mc_lock);
	mdd->mdd_cl.mc_starttime = cfs_time_current_64();
	mdd->mdd_cl.mc_flags = 0; /* off by default */
	mdd->mdd_cl.mc_mask = CHANGELOG_DEFMASK;
	spin_lock_init(&mdd->mdd_cl.mc_user_lock);
	mdd->mdd_cl.mc_lastuser = 0;

	rc = mdd_changelog_llog_init(env, mdd);
	if (rc) {
		CERROR("%s: changelog setup during init failed: rc = %d\n",
		       obd->obd_name, rc);
		mdd->mdd_cl.mc_flags |= CLM_ERR;
	}

	return rc;
}
예제 #4
0
/**
 * Get file system statistics of OST server.
 *
 * Helper function for ofd_statfs(), also used by grant code.
 * Implements caching for statistics to avoid calling OSD device each time.
 *
 * \param[in]  env	  execution environment
 * \param[in]  ofd	  OFD device
 * \param[out] osfs	  statistic data to return
 * \param[in]  max_age	  maximum age for cached data
 * \param[in]  from_cache show that data was get from cache or not
 *
 * \retval		0 if successful
 * \retval		negative value on error
 */
int ofd_statfs_internal(const struct lu_env *env, struct ofd_device *ofd,
                        struct obd_statfs *osfs, __u64 max_age, int *from_cache)
{
	int rc = 0;
	ENTRY;

	down_read(&ofd->ofd_lastid_rwsem);
	/* Currently, for safe, we do not distinguish which LAST_ID is broken,
	 * we may do that in the future.
	 * Return -ENOSPC until the LAST_ID rebuilt. */
	if (unlikely(ofd->ofd_lastid_rebuilding))
		GOTO(out, rc = -ENOSPC);

	spin_lock(&ofd->ofd_osfs_lock);
	if (cfs_time_before_64(ofd->ofd_osfs_age, max_age) || max_age == 0) {
		u64 unstable;

		/* statfs data are too old, get up-to-date one.
		 * we must be cautious here since multiple threads might be
		 * willing to update statfs data concurrently and we must
		 * grant that cached statfs data are always consistent */

		if (ofd->ofd_statfs_inflight == 0)
			/* clear inflight counter if no users, although it would
			 * take a while to overflow this 64-bit counter ... */
			ofd->ofd_osfs_inflight = 0;
		/* notify ofd_grant_commit() that we want to track writes
		 * completed as of now */
		ofd->ofd_statfs_inflight++;
		/* record value of inflight counter before running statfs to
		 * compute the diff once statfs is completed */
		unstable = ofd->ofd_osfs_inflight;
		spin_unlock(&ofd->ofd_osfs_lock);

		/* statfs can sleep ... hopefully not for too long since we can
		 * call it fairly often as space fills up */
		rc = dt_statfs(env, ofd->ofd_osd, osfs);
		if (unlikely(rc))
			GOTO(out, rc);

		spin_lock(&ofd->ofd_grant_lock);
		spin_lock(&ofd->ofd_osfs_lock);
		/* calculate how much space was written while we released the
		 * ofd_osfs_lock */
		unstable = ofd->ofd_osfs_inflight - unstable;
		ofd->ofd_osfs_unstable = 0;
		if (unstable) {
			/* some writes completed while we were running statfs
			 * w/o the ofd_osfs_lock. Those ones got added to
			 * the cached statfs data that we are about to crunch.
			 * Take them into account in the new statfs data */
			osfs->os_bavail -= min_t(u64, osfs->os_bavail,
					       unstable >> ofd->ofd_blockbits);
			/* However, we don't really know if those writes got
			 * accounted in the statfs call, so tell
			 * ofd_grant_space_left() there is some uncertainty
			 * on the accounting of those writes.
			 * The purpose is to prevent spurious error messages in
			 * ofd_grant_space_left() since those writes might be
			 * accounted twice. */
			ofd->ofd_osfs_unstable += unstable;
		}
		/* similarly, there is some uncertainty on write requests
		 * between prepare & commit */
		ofd->ofd_osfs_unstable += ofd->ofd_tot_pending;
		spin_unlock(&ofd->ofd_grant_lock);

		/* finally udpate cached statfs data */
		ofd->ofd_osfs = *osfs;
		ofd->ofd_osfs_age = cfs_time_current_64();

		ofd->ofd_statfs_inflight--; /* stop tracking */
		if (ofd->ofd_statfs_inflight == 0)
			ofd->ofd_osfs_inflight = 0;
		spin_unlock(&ofd->ofd_osfs_lock);

		if (from_cache)
			*from_cache = 0;
	} else {
예제 #5
0
/**
 * Callback function called when an acquire/release request sent to the master
 * is completed
 */
static void qsd_req_completion(const struct lu_env *env,
			       struct qsd_qtype_info *qqi,
			       struct quota_body *reqbody,
			       struct quota_body *repbody,
			       struct lustre_handle *lockh,
			       struct lquota_lvb *lvb,
			       void *arg, int ret)
{
	struct lquota_entry	*lqe = (struct lquota_entry *)arg;
	struct qsd_thread_info	*qti;
	int			 rc;
	bool			 adjust = false, cancel = false;
	ENTRY;

	LASSERT(qqi != NULL && lqe != NULL);

	/* environment passed by ptlrpcd is mostly used by CLIO and hasn't the
	 * DT tags set. */
	rc = lu_env_refill_by_tags((struct lu_env *)env, LCT_DT_THREAD, 0);
	if (rc) {
		LQUOTA_ERROR(lqe, "failed to refill environmnent %d", rc);
		lqe_write_lock(lqe);
		/* can't afford to adjust quota space with no suitable lu_env */
		GOTO(out_noadjust, rc);
	}
	qti = qsd_info(env);

	lqe_write_lock(lqe);
	LQUOTA_DEBUG(lqe, "DQACQ returned %d, flags:0x%x", ret,
		     reqbody->qb_flags);

	/* despite -EDQUOT & -EINPROGRESS errors, the master might still
	 * grant us back quota space to adjust quota overrun */
	if (ret != 0 && ret != -EDQUOT && ret != -EINPROGRESS) {
		if (ret != -ETIMEDOUT && ret != -ENOTCONN &&
		   ret != -ESHUTDOWN && ret != -EAGAIN)
			/* print errors only if return code is unexpected */
			LQUOTA_ERROR(lqe, "DQACQ failed with %d, flags:0x%x",
				     ret, reqbody->qb_flags);
		GOTO(out, ret);
	}

	/* Set the lqe_lockh */
	if (lustre_handle_is_used(lockh) &&
	    !lustre_handle_equal(lockh, &lqe->lqe_lockh))
		lustre_handle_copy(&lqe->lqe_lockh, lockh);

	/* If the replied qb_count is zero, it means master didn't process
	 * the DQACQ since the limit for this ID has been removed, so we
	 * should not update quota entry & slave index copy neither. */
	if (repbody != NULL && repbody->qb_count != 0) {
		LQUOTA_DEBUG(lqe, "DQACQ qb_count:"LPU64, repbody->qb_count);

		if (req_is_rel(reqbody->qb_flags)) {
			if (lqe->lqe_granted < repbody->qb_count) {
				LQUOTA_ERROR(lqe, "can't release more space "
					     "than owned "LPU64"<"LPU64,
					     lqe->lqe_granted,
					     repbody->qb_count);
				lqe->lqe_granted = 0;
			} else {
				lqe->lqe_granted -= repbody->qb_count;
			}
			/* Cancel the per-ID lock initiatively when there
			 * isn't any usage & grant, which can avoid master
			 * sending glimpse unnecessarily to this slave on
			 * quota revoking */
			if (!lqe->lqe_pending_write && !lqe->lqe_granted &&
			    !lqe->lqe_waiting_write && !lqe->lqe_usage)
				cancel = true;
		} else {
			lqe->lqe_granted += repbody->qb_count;
		}
		qti->qti_rec.lqr_slv_rec.qsr_granted = lqe->lqe_granted;
		lqe_write_unlock(lqe);

		/* Update the slave index file in the dedicated thread. So far,
		 * We don't update the version of slave index copy on DQACQ.
		 * No locking is necessary since nobody can change
		 * lqe->lqe_granted while lqe->lqe_pending_req > 0 */
		qsd_upd_schedule(qqi, lqe, &lqe->lqe_id, &qti->qti_rec, 0,
				 false);
		lqe_write_lock(lqe);
	}

	/* extract information from lvb */
	if (ret == 0 && lvb != 0) {
		if (lvb->lvb_id_qunit != 0)
			qsd_set_qunit(lqe, lvb->lvb_id_qunit);
		qsd_set_edquot(lqe, !!(lvb->lvb_flags & LQUOTA_FL_EDQUOT));
	} else if (repbody != NULL && repbody->qb_qunit != 0) {
		qsd_set_qunit(lqe, repbody->qb_qunit);
	}

	/* turn off pre-acquire if it failed with -EDQUOT. This is done to avoid
	 * flooding the master with acquire request. Pre-acquire will be turned
	 * on again as soon as qunit is modified */
	if (req_is_preacq(reqbody->qb_flags) && ret == -EDQUOT)
		lqe->lqe_nopreacq = true;
out:
	adjust = qsd_adjust_needed(lqe);
	if (reqbody && req_is_acq(reqbody->qb_flags) && ret != -EDQUOT) {
		lqe->lqe_acq_rc = ret;
		lqe->lqe_acq_time = cfs_time_current_64();
	}
out_noadjust:
	qsd_request_exit(lqe);
	lqe_write_unlock(lqe);

	/* release reference on per-ID lock */
	if (lustre_handle_is_used(lockh))
		ldlm_lock_decref(lockh, qsd_id_einfo.ei_mode);

	if (cancel) {
		qsd_adjust_schedule(lqe, false, true);
	} else if (adjust) {
		if (!ret || ret == -EDQUOT)
			qsd_adjust_schedule(lqe, false, false);
		else
			qsd_adjust_schedule(lqe, true, false);
	}
	lqe_putref(lqe);

	if (lvb)
		OBD_FREE_PTR(lvb);
	EXIT;
}