Пример #1
0
/*
 * Consult current disk space consumed by a given identifier.
 *
 * \param env   - the environment passed by the caller
 * \param qqi   - is the pointer to the qsd_qtype_info structure associated
 *                with the identifier.
 * \param lqe   - is the quota entry associated with the identifier
 */
int qsd_refresh_usage(const struct lu_env *env, struct lquota_entry *lqe)
{
	struct qsd_thread_info	*qti = qsd_info(env);
	struct lquota_acct_rec	*rec = &qti->qti_acct_rec;
	struct qsd_qtype_info	*qqi = lqe2qqi(lqe);
	int			 rc = 0;
	ENTRY;

	LASSERT(qqi->qqi_acct_obj);

	/* read disk usage */
	rc = lquota_disk_read(env, qqi->qqi_acct_obj, &lqe->lqe_id,
			      (struct dt_rec *)rec);
	switch(rc) {
	case -ENOENT:
		lqe->lqe_usage = 0;
		rc = 0;
		break;
	case 0:
		if (qqi->qqi_qsd->qsd_is_md)
			lqe->lqe_usage = rec->ispace;
		else
			lqe->lqe_usage = toqb(rec->bspace);
		break;
	default:
		LQUOTA_ERROR(lqe, "failed to read disk usage, rc:%d", rc);
		RETURN(rc);
	}

	LQUOTA_DEBUG(lqe, "disk usage: "LPU64, lqe->lqe_usage);
	RETURN(0);
}
Пример #2
0
/**
 * Companion of qsd_request_enter() dropping lqe_pending_req to 0.
 */
static inline void qsd_request_exit(struct lquota_entry *lqe)
{
	if (lqe->lqe_pending_req != 1) {
		LQUOTA_ERROR(lqe, "lqe_pending_req != 1!!!");
		LBUG();
	}
	lqe->lqe_pending_req--;
	lqe->lqe_pending_rel = 0;
	wake_up_all(&lqe->lqe_waiters);
}
Пример #3
0
/**
 * helper function bumping lqe_pending_req if there is no quota request in
 * flight for the lquota entry \a lqe. Otherwise, EBUSY is returned.
 */
static inline int qsd_request_enter(struct lquota_entry *lqe)
{
	/* is there already a quota request in flight? */
	if (lqe->lqe_pending_req != 0) {
		LQUOTA_DEBUG(lqe, "already a request in flight");
		return -EBUSY;
	}

	if (lqe->lqe_pending_rel != 0) {
		LQUOTA_ERROR(lqe, "no request in flight with pending_rel="LPU64,
			     lqe->lqe_pending_rel);
		LBUG();
	}

	lqe->lqe_pending_req++;
	return 0;
}
Пример #4
0
/*
 * Update a slave quota entry. This is done by reading enforcement status from
 * the copy of the global index and then how much is the slave currenly owns
 * for this user from the slave index copy.
 *
 * \param env - the environment passed by the caller
 * \param lqe - is the quota entry to refresh
 * \param arg - is the pointer to the qsd_qtype_info structure
 */
static int qsd_lqe_read(const struct lu_env *env, struct lquota_entry *lqe,
			void *arg)
{
	struct qsd_thread_info *qti = qsd_info(env);
	struct qsd_qtype_info  *qqi = (struct qsd_qtype_info *)arg;
	int			rc;

	LASSERT(!lqe_is_master(lqe));

	/* read record from global index copy to know whether quota is
	 * enforced for this user */
	rc = lquota_disk_read(env, qqi->qqi_glb_obj, &lqe->lqe_id,
			      (struct dt_rec *)&qti->qti_glb_rec);

	switch(rc) {
	case -ENOENT:
		/* no such entry, assume quota isn't enforced for this user */
		lqe->lqe_enforced = false;
		break;
	case 0:
		if (qti->qti_glb_rec.qbr_hardlimit == 0 &&
		    qti->qti_glb_rec.qbr_softlimit == 0)
			/* quota isn't enforced for this use */
			lqe->lqe_enforced = false;
		else
			lqe->lqe_enforced = true;
		break;
	default:
		LQUOTA_ERROR(lqe, "failed to read quota entry from global "
			     "index copy, rc:%d", rc);
		return rc;
	}

	/* read record from slave index copy to find out how much space is
	 * currently owned by this slave */
	rc = lquota_disk_read(env, qqi->qqi_slv_obj, &lqe->lqe_id,
			      (struct dt_rec *)&qti->qti_slv_rec);
	switch(rc) {
	case -ENOENT:
		lqe->lqe_granted = 0;
		break;
	case 0:
		lqe->lqe_granted = qti->qti_slv_rec.qsr_granted;
		break;
	default:
		LQUOTA_ERROR(lqe, "failed to read quota entry from slave "
			     "index copy, rc:%d", rc);
		return rc;
	}

	/* don't know what the qunit value is yet */
	qsd_set_qunit(lqe, 0);

	/* read current disk-usage from disk */
	rc = qsd_refresh_usage(env, lqe);
	if (rc)
		return rc;

	LQUOTA_DEBUG(lqe, "successfully read from disk");
	return 0;
}
Пример #5
0
/*
 * Helper function to handle quota request from slave.
 *
 * \param env     - is the environment passed by the caller
 * \param lqe     - is the lquota_entry subject to the quota request
 * \param qmt     - is the master device
 * \param uuid    - is the uuid associated with the slave
 * \param qb_flags - are the quota request flags as packed in the quota_body
 * \param qb_count - is the amount of quota space the slave wants to
 *                   acquire/release
 * \param qb_usage - is the current space usage on the slave
 * \param repbody - is the quota_body of reply
 *
 * \retval 0            : success
 * \retval -EDQUOT      : out of quota
 *         -EINPROGRESS : inform client to retry write/create
 *         -ve          : other appropriate errors
 */
int qmt_dqacq0(const struct lu_env *env, struct lquota_entry *lqe,
	       struct qmt_device *qmt, struct obd_uuid *uuid, __u32 qb_flags,
	       __u64 qb_count, __u64 qb_usage, struct quota_body *repbody)
{
	struct qmt_thread_info	*qti = qmt_info(env);
	__u64			 now, count;
	struct dt_object	*slv_obj = NULL;
	__u64			 slv_granted, slv_granted_bck;
	struct thandle		*th = NULL;
	int			 rc, ret;
	ENTRY;

	LASSERT(uuid != NULL);

	/* initialize reply */
	memset(repbody, 0, sizeof(*repbody));
	memcpy(&repbody->qb_id, &lqe->lqe_id, sizeof(repbody->qb_id));

	if (OBD_FAIL_CHECK(OBD_FAIL_QUOTA_RECOVERABLE_ERR))
		RETURN(-cfs_fail_val);

	/* look-up index file associated with acquiring slave */
	slv_obj = lquota_disk_slv_find(env, qmt->qmt_child, LQE_ROOT(lqe),
				       lu_object_fid(&LQE_GLB_OBJ(lqe)->do_lu),
				       uuid);
	if (IS_ERR(slv_obj))
		GOTO(out, rc = PTR_ERR(slv_obj));

	/* pack slave fid in reply just for sanity check */
	memcpy(&repbody->qb_slv_fid, lu_object_fid(&slv_obj->do_lu),
	       sizeof(struct lu_fid));

	/* allocate & start transaction with enough credits to update
	 * global & slave indexes */
	th = qmt_trans_start_with_slv(env, lqe, slv_obj, &qti->qti_restore);
	if (IS_ERR(th))
		GOTO(out, rc = PTR_ERR(th));

	lqe_write_lock(lqe);
	LQUOTA_DEBUG(lqe, "dqacq starts uuid:%s flags:0x%x wanted:"LPU64
		     " usage:"LPU64, obd_uuid2str(uuid), qb_flags, qb_count,
		     qb_usage);

	/* Legal race, limits have been removed on master, but slave didn't
	 * receive the change yet. Just return EINPROGRESS until the slave gets
	 * notified. */
	if (!lqe->lqe_enforced && !req_is_rel(qb_flags))
		GOTO(out_locked, rc = -ESRCH);

	/* recompute qunit in case it was never initialized */
	qmt_revalidate(env, lqe);

	/* slave just wants to acquire per-ID lock */
	if (req_is_acq(qb_flags) && qb_count == 0)
		GOTO(out_locked, rc = 0);

	/* fetch how much quota space is already granted to this slave */
	rc = qmt_slv_read(env, lqe, slv_obj, &slv_granted);
	if (rc) {
		LQUOTA_ERROR(lqe, "Failed to get granted for slave %s, rc=%d",
			     obd_uuid2str(uuid), rc);
		GOTO(out_locked, rc);
	}
	/* recall how much space this slave currently owns in order to restore
	 * it in case of failure */
	slv_granted_bck = slv_granted;

	/* record current time for soft limit & grace time management */
	now = (__u64)cfs_time_current_sec();

	if (req_is_rel(qb_flags)) {
		/* Slave would like to release quota space */
		if (slv_granted < qb_count ||
		    lqe->lqe_granted < qb_count) {
			/* can't release more than granted */
			LQUOTA_ERROR(lqe, "Release too much! uuid:%s release:"
				     LPU64" granted:"LPU64", total:"LPU64,
				     obd_uuid2str(uuid), qb_count,
				     slv_granted, lqe->lqe_granted);
			GOTO(out_locked, rc = -EINVAL);
		}

		repbody->qb_count = qb_count;
		/* put released space back to global pool */
		QMT_REL(lqe, slv_granted, qb_count);
		GOTO(out_write, rc = 0);
	}

	if (req_has_rep(qb_flags) && slv_granted < qb_usage) {
		/* Slave is reporting space usage in quota request and it turns
		 * out to be using more quota space than owned, so we adjust
		 * granted space regardless of the current state of affairs */
		repbody->qb_count = qb_usage - slv_granted;
		QMT_GRANT(lqe, slv_granted, repbody->qb_count);
	}

	if (!req_is_acq(qb_flags) && !req_is_preacq(qb_flags))
		GOTO(out_write, rc = 0);

	qmt_adjust_edquot(lqe, now);
	if (lqe->lqe_edquot)
		/* no hope to claim further space back */
		GOTO(out_write, rc = -EDQUOT);

	if (qmt_space_exhausted(lqe, now)) {
		/* might have some free space once rebalancing is completed */
		rc = req_is_acq(qb_flags) ? -EINPROGRESS : -EDQUOT;
		GOTO(out_write, rc);
	}

	if (req_is_preacq(qb_flags)) {
		/* slave would like to pre-acquire quota space. To do so, it
		 * reports in qb_count how much spare quota space it owns and we
		 * can grant back quota space which is consistent with qunit
		 * value. */

		if (qb_count >= lqe->lqe_qunit)
			/* slave already own the maximum it should */
			GOTO(out_write, rc = 0);

		count = qmt_alloc_expand(lqe, slv_granted, qb_count);
		if (count == 0)
			GOTO(out_write, rc = -EDQUOT);

		repbody->qb_count += count;
		QMT_GRANT(lqe, slv_granted, count);
		GOTO(out_write, rc = 0);
	}

	/* processing acquire request with clients waiting */
	if (lqe->lqe_hardlimit != 0 &&
	    lqe->lqe_granted + qb_count > lqe->lqe_hardlimit) {
		/* cannot grant as much as asked, but can still afford to grant
		 * some quota space back */
		count = lqe->lqe_hardlimit - lqe->lqe_granted;
		repbody->qb_count += count;
		QMT_GRANT(lqe, slv_granted, count);
		GOTO(out_write, rc = 0);
	}

	/* Whouhou! we can satisfy the slave request! */
	repbody->qb_count += qb_count;
	QMT_GRANT(lqe, slv_granted, qb_count);

	/* Try to expand the acquired count for DQACQ */
	count = qmt_alloc_expand(lqe, slv_granted, 0);
	if (count != 0) {
		/* can even grant more than asked, it is like xmas ... */
		repbody->qb_count += count;
		QMT_GRANT(lqe, slv_granted, count);
		GOTO(out_write, rc = 0);
	}

	GOTO(out_write, rc = 0);
out_write:
	if (repbody->qb_count == 0)
		GOTO(out_locked, rc);

	/* start/stop grace timer if required */
	if (lqe->lqe_softlimit != 0) {
		if (lqe->lqe_granted > lqe->lqe_softlimit &&
		    lqe->lqe_gracetime == 0)
			/* first time over soft limit, let's start grace
			 * timer */
			lqe->lqe_gracetime = now + qmt_lqe_grace(lqe);
		else if (lqe->lqe_granted <= lqe->lqe_softlimit &&
			 lqe->lqe_gracetime != 0)
			/* Clear grace timer */
			lqe->lqe_gracetime = 0;
	}

	/* Update slave index first since it is easier to roll back */
	ret = qmt_slv_write(env, th, lqe, slv_obj, LQUOTA_BUMP_VER,
			    &repbody->qb_slv_ver, slv_granted);
	if (ret) {
		/* restore initial quota settings */
		qmt_restore(lqe, &qti->qti_restore);
		/* reset qb_count */
		repbody->qb_count = 0;
		GOTO(out_locked, rc = ret);
	}

	/* Update global index, no version bump needed */
	ret = qmt_glb_write(env, th, lqe, 0, NULL);
	if (ret) {
		rc = ret;
		/* restore initial quota settings */
		qmt_restore(lqe, &qti->qti_restore);
		/* reset qb_count */
		repbody->qb_count = 0;

		/* restore previous granted value */
		ret = qmt_slv_write(env, th, lqe, slv_obj, 0, NULL,
				    slv_granted_bck);
		if (ret) {
			LQUOTA_ERROR(lqe, "failed to restore initial slave "
				     "value rc:%d ret%d", rc, ret);
			LBUG();
		}
		qmt_adjust_edquot(lqe, now);
		GOTO(out_locked, rc);
	}

	/* Total granted has been changed, let's try to adjust the qunit
	 * size according to the total granted & limits. */
	qmt_adjust_qunit(env, lqe);

	/* clear/set edquot flag and notify slaves via glimpse if needed */
	qmt_adjust_edquot(lqe, now);
out_locked:
	LQUOTA_DEBUG(lqe, "dqacq ends count:"LPU64" ver:"LPU64" rc:%d",
		     repbody->qb_count, repbody->qb_slv_ver, rc);
	lqe_write_unlock(lqe);
out:
	if (th != NULL && !IS_ERR(th))
		dt_trans_stop(env, qmt->qmt_child, th);

	if (slv_obj != NULL && !IS_ERR(slv_obj))
		lu_object_put(env, &slv_obj->do_lu);

	if ((req_is_acq(qb_flags) || req_is_preacq(qb_flags)) &&
	    OBD_FAIL_CHECK(OBD_FAIL_QUOTA_EDQUOT)) {
		/* introduce inconsistency between granted value in slave index
		 * and slave index copy of slave */
		repbody->qb_count = 0;
		rc = -EDQUOT;
	}

	RETURN(rc);
}
Пример #6
0
/**
 * Quota enforcement handler. If local quota can satisfy this operation,
 * return success, otherwise, acquire more quota from master.
 * (for write operation, if master isn't available at this moment, return
 * -EINPROGRESS to inform client to retry the write)
 *
 * \param env   - the environment passed by the caller
 * \param qsd   - is the qsd instance associated with the device in charge
 *                of the operation.
 * \param qid   - is the qid information attached in the transaction handle
 * \param space - is the space required by the operation
 * \param flags - if the operation is write, return caller no user/group
 *                and sync commit flags
 *
 * \retval 0            - success
 * \retval -EDQUOT      - out of quota
 * \retval -EINPROGRESS - inform client to retry write
 * \retval -ve          - other appropriate errors
 */
static int qsd_op_begin0(const struct lu_env *env, struct qsd_qtype_info *qqi,
			 struct lquota_id_info *qid, long long space,
			 int *flags)
{
	struct lquota_entry	*lqe;
	int			 rc, ret = -EINPROGRESS;
	struct l_wait_info	 lwi;
	ENTRY;

	if (qid->lqi_qentry != NULL) {
		/* we already had to deal with this id for this transaction */
		lqe = qid->lqi_qentry;
		if (!lqe->lqe_enforced)
			RETURN(0);
	} else {
		/* look up lquota entry associated with qid */
		lqe = lqe_locate(env, qqi->qqi_site, &qid->lqi_id);
		if (IS_ERR(lqe))
			RETURN(PTR_ERR(lqe));
		if (!lqe->lqe_enforced) {
			lqe_putref(lqe);
			RETURN(0);
		}
		qid->lqi_qentry = lqe;
		/* lqe will be released in qsd_op_end() */
	}

	if (space <= 0) {
		/* when space is negative or null, we don't need to consume
		 * quota space. That said, we still want to perform space
		 * adjustments in qsd_op_end, so we return here, but with
		 * a reference on the lqe */
		if (flags != NULL) {
			rc = qsd_refresh_usage(env, lqe);
			GOTO(out_flags, rc);
		}
		RETURN(0);
	}

	LQUOTA_DEBUG(lqe, "op_begin space:"LPD64, space);

	lqe_write_lock(lqe);
	lqe->lqe_waiting_write += space;
	lqe_write_unlock(lqe);

	/* acquire quota space for the operation, cap overall wait time to
	 * prevent a service thread from being stuck for too long */
	lwi = LWI_TIMEOUT(cfs_time_seconds(qsd_wait_timeout(qqi->qqi_qsd)),
			  NULL, NULL);
	rc = l_wait_event(lqe->lqe_waiters, qsd_acquire(env, lqe, space, &ret),
			  &lwi);

	if (rc == 0 && ret == 0) {
		qid->lqi_space += space;
	} else {
		if (rc == 0)
			rc = ret;

		LQUOTA_DEBUG(lqe, "acquire quota failed:%d", rc);

		lqe_write_lock(lqe);
		lqe->lqe_waiting_write -= space;

		if (flags && lqe->lqe_pending_write != 0)
			/* Inform OSD layer that there are pending writes.
			 * It might want to retry after a sync if appropriate */
			 *flags |= QUOTA_FL_SYNC;
		lqe_write_unlock(lqe);

		/* convert recoverable error into -EINPROGRESS, client will
		 * retry */
		if (rc == -ETIMEDOUT || rc == -ENOTCONN || rc == -ENOLCK ||
		    rc == -EAGAIN || rc == -EINTR) {
			rc = -EINPROGRESS;
		} else if (rc == -ESRCH) {
			rc = 0;
			LQUOTA_ERROR(lqe, "ID isn't enforced on master, it "
				     "probably due to a legeal race, if this "
				     "message is showing up constantly, there "
				     "could be some inconsistence between "
				     "master & slave, and quota reintegration "
				     "needs be re-triggered.");
		}
	}

	if (flags != NULL) {
out_flags:
		LASSERT(qid->lqi_is_blk);
		if (rc != 0) {
			*flags |= LQUOTA_OVER_FL(qqi->qqi_qtype);
		} else {
			__u64	usage;

			lqe_read_lock(lqe);
			usage  = lqe->lqe_usage;
			usage += lqe->lqe_pending_write;
			usage += lqe->lqe_waiting_write;
			usage += qqi->qqi_qsd->qsd_sync_threshold;

			/* if we should notify client to start sync write */
			if (usage >= lqe->lqe_granted - lqe->lqe_pending_rel)
				*flags |= LQUOTA_OVER_FL(qqi->qqi_qtype);
			else
				*flags &= ~LQUOTA_OVER_FL(qqi->qqi_qtype);
			lqe_read_unlock(lqe);
		}
	}
	RETURN(rc);
}
Пример #7
0
/**
 * Callback function called when an acquire/release request sent to the master
 * is completed
 */
static void qsd_req_completion(const struct lu_env *env,
			       struct qsd_qtype_info *qqi,
			       struct quota_body *reqbody,
			       struct quota_body *repbody,
			       struct lustre_handle *lockh,
			       struct lquota_lvb *lvb,
			       void *arg, int ret)
{
	struct lquota_entry	*lqe = (struct lquota_entry *)arg;
	struct qsd_thread_info	*qti;
	int			 rc;
	bool			 adjust = false, cancel = false;
	ENTRY;

	LASSERT(qqi != NULL && lqe != NULL);

	/* environment passed by ptlrpcd is mostly used by CLIO and hasn't the
	 * DT tags set. */
	rc = lu_env_refill_by_tags((struct lu_env *)env, LCT_DT_THREAD, 0);
	if (rc) {
		LQUOTA_ERROR(lqe, "failed to refill environmnent %d", rc);
		lqe_write_lock(lqe);
		/* can't afford to adjust quota space with no suitable lu_env */
		GOTO(out_noadjust, rc);
	}
	qti = qsd_info(env);

	lqe_write_lock(lqe);
	LQUOTA_DEBUG(lqe, "DQACQ returned %d, flags:0x%x", ret,
		     reqbody->qb_flags);

	/* despite -EDQUOT & -EINPROGRESS errors, the master might still
	 * grant us back quota space to adjust quota overrun */
	if (ret != 0 && ret != -EDQUOT && ret != -EINPROGRESS) {
		if (ret != -ETIMEDOUT && ret != -ENOTCONN &&
		   ret != -ESHUTDOWN && ret != -EAGAIN)
			/* print errors only if return code is unexpected */
			LQUOTA_ERROR(lqe, "DQACQ failed with %d, flags:0x%x",
				     ret, reqbody->qb_flags);
		GOTO(out, ret);
	}

	/* Set the lqe_lockh */
	if (lustre_handle_is_used(lockh) &&
	    !lustre_handle_equal(lockh, &lqe->lqe_lockh))
		lustre_handle_copy(&lqe->lqe_lockh, lockh);

	/* If the replied qb_count is zero, it means master didn't process
	 * the DQACQ since the limit for this ID has been removed, so we
	 * should not update quota entry & slave index copy neither. */
	if (repbody != NULL && repbody->qb_count != 0) {
		LQUOTA_DEBUG(lqe, "DQACQ qb_count:"LPU64, repbody->qb_count);

		if (req_is_rel(reqbody->qb_flags)) {
			if (lqe->lqe_granted < repbody->qb_count) {
				LQUOTA_ERROR(lqe, "can't release more space "
					     "than owned "LPU64"<"LPU64,
					     lqe->lqe_granted,
					     repbody->qb_count);
				lqe->lqe_granted = 0;
			} else {
				lqe->lqe_granted -= repbody->qb_count;
			}
			/* Cancel the per-ID lock initiatively when there
			 * isn't any usage & grant, which can avoid master
			 * sending glimpse unnecessarily to this slave on
			 * quota revoking */
			if (!lqe->lqe_pending_write && !lqe->lqe_granted &&
			    !lqe->lqe_waiting_write && !lqe->lqe_usage)
				cancel = true;
		} else {
			lqe->lqe_granted += repbody->qb_count;
		}
		qti->qti_rec.lqr_slv_rec.qsr_granted = lqe->lqe_granted;
		lqe_write_unlock(lqe);

		/* Update the slave index file in the dedicated thread. So far,
		 * We don't update the version of slave index copy on DQACQ.
		 * No locking is necessary since nobody can change
		 * lqe->lqe_granted while lqe->lqe_pending_req > 0 */
		qsd_upd_schedule(qqi, lqe, &lqe->lqe_id, &qti->qti_rec, 0,
				 false);
		lqe_write_lock(lqe);
	}

	/* extract information from lvb */
	if (ret == 0 && lvb != 0) {
		if (lvb->lvb_id_qunit != 0)
			qsd_set_qunit(lqe, lvb->lvb_id_qunit);
		qsd_set_edquot(lqe, !!(lvb->lvb_flags & LQUOTA_FL_EDQUOT));
	} else if (repbody != NULL && repbody->qb_qunit != 0) {
		qsd_set_qunit(lqe, repbody->qb_qunit);
	}

	/* turn off pre-acquire if it failed with -EDQUOT. This is done to avoid
	 * flooding the master with acquire request. Pre-acquire will be turned
	 * on again as soon as qunit is modified */
	if (req_is_preacq(reqbody->qb_flags) && ret == -EDQUOT)
		lqe->lqe_nopreacq = true;
out:
	adjust = qsd_adjust_needed(lqe);
	if (reqbody && req_is_acq(reqbody->qb_flags) && ret != -EDQUOT) {
		lqe->lqe_acq_rc = ret;
		lqe->lqe_acq_time = cfs_time_current_64();
	}
out_noadjust:
	qsd_request_exit(lqe);
	lqe_write_unlock(lqe);

	/* release reference on per-ID lock */
	if (lustre_handle_is_used(lockh))
		ldlm_lock_decref(lockh, qsd_id_einfo.ei_mode);

	if (cancel) {
		qsd_adjust_schedule(lqe, false, true);
	} else if (adjust) {
		if (!ret || ret == -EDQUOT)
			qsd_adjust_schedule(lqe, false, false);
		else
			qsd_adjust_schedule(lqe, true, false);
	}
	lqe_putref(lqe);

	if (lvb)
		OBD_FREE_PTR(lvb);
	EXIT;
}