/* * Consult current disk space consumed by a given identifier. * * \param env - the environment passed by the caller * \param qqi - is the pointer to the qsd_qtype_info structure associated * with the identifier. * \param lqe - is the quota entry associated with the identifier */ int qsd_refresh_usage(const struct lu_env *env, struct lquota_entry *lqe) { struct qsd_thread_info *qti = qsd_info(env); struct lquota_acct_rec *rec = &qti->qti_acct_rec; struct qsd_qtype_info *qqi = lqe2qqi(lqe); int rc = 0; ENTRY; LASSERT(qqi->qqi_acct_obj); /* read disk usage */ rc = lquota_disk_read(env, qqi->qqi_acct_obj, &lqe->lqe_id, (struct dt_rec *)rec); switch(rc) { case -ENOENT: lqe->lqe_usage = 0; rc = 0; break; case 0: if (qqi->qqi_qsd->qsd_is_md) lqe->lqe_usage = rec->ispace; else lqe->lqe_usage = toqb(rec->bspace); break; default: LQUOTA_ERROR(lqe, "failed to read disk usage, rc:%d", rc); RETURN(rc); } LQUOTA_DEBUG(lqe, "disk usage: "LPU64, lqe->lqe_usage); RETURN(0); }
/** * Companion of qsd_request_enter() dropping lqe_pending_req to 0. */ static inline void qsd_request_exit(struct lquota_entry *lqe) { if (lqe->lqe_pending_req != 1) { LQUOTA_ERROR(lqe, "lqe_pending_req != 1!!!"); LBUG(); } lqe->lqe_pending_req--; lqe->lqe_pending_rel = 0; wake_up_all(&lqe->lqe_waiters); }
/** * helper function bumping lqe_pending_req if there is no quota request in * flight for the lquota entry \a lqe. Otherwise, EBUSY is returned. */ static inline int qsd_request_enter(struct lquota_entry *lqe) { /* is there already a quota request in flight? */ if (lqe->lqe_pending_req != 0) { LQUOTA_DEBUG(lqe, "already a request in flight"); return -EBUSY; } if (lqe->lqe_pending_rel != 0) { LQUOTA_ERROR(lqe, "no request in flight with pending_rel="LPU64, lqe->lqe_pending_rel); LBUG(); } lqe->lqe_pending_req++; return 0; }
/* * Update a slave quota entry. This is done by reading enforcement status from * the copy of the global index and then how much is the slave currenly owns * for this user from the slave index copy. * * \param env - the environment passed by the caller * \param lqe - is the quota entry to refresh * \param arg - is the pointer to the qsd_qtype_info structure */ static int qsd_lqe_read(const struct lu_env *env, struct lquota_entry *lqe, void *arg) { struct qsd_thread_info *qti = qsd_info(env); struct qsd_qtype_info *qqi = (struct qsd_qtype_info *)arg; int rc; LASSERT(!lqe_is_master(lqe)); /* read record from global index copy to know whether quota is * enforced for this user */ rc = lquota_disk_read(env, qqi->qqi_glb_obj, &lqe->lqe_id, (struct dt_rec *)&qti->qti_glb_rec); switch(rc) { case -ENOENT: /* no such entry, assume quota isn't enforced for this user */ lqe->lqe_enforced = false; break; case 0: if (qti->qti_glb_rec.qbr_hardlimit == 0 && qti->qti_glb_rec.qbr_softlimit == 0) /* quota isn't enforced for this use */ lqe->lqe_enforced = false; else lqe->lqe_enforced = true; break; default: LQUOTA_ERROR(lqe, "failed to read quota entry from global " "index copy, rc:%d", rc); return rc; } /* read record from slave index copy to find out how much space is * currently owned by this slave */ rc = lquota_disk_read(env, qqi->qqi_slv_obj, &lqe->lqe_id, (struct dt_rec *)&qti->qti_slv_rec); switch(rc) { case -ENOENT: lqe->lqe_granted = 0; break; case 0: lqe->lqe_granted = qti->qti_slv_rec.qsr_granted; break; default: LQUOTA_ERROR(lqe, "failed to read quota entry from slave " "index copy, rc:%d", rc); return rc; } /* don't know what the qunit value is yet */ qsd_set_qunit(lqe, 0); /* read current disk-usage from disk */ rc = qsd_refresh_usage(env, lqe); if (rc) return rc; LQUOTA_DEBUG(lqe, "successfully read from disk"); return 0; }
/* * Helper function to handle quota request from slave. * * \param env - is the environment passed by the caller * \param lqe - is the lquota_entry subject to the quota request * \param qmt - is the master device * \param uuid - is the uuid associated with the slave * \param qb_flags - are the quota request flags as packed in the quota_body * \param qb_count - is the amount of quota space the slave wants to * acquire/release * \param qb_usage - is the current space usage on the slave * \param repbody - is the quota_body of reply * * \retval 0 : success * \retval -EDQUOT : out of quota * -EINPROGRESS : inform client to retry write/create * -ve : other appropriate errors */ int qmt_dqacq0(const struct lu_env *env, struct lquota_entry *lqe, struct qmt_device *qmt, struct obd_uuid *uuid, __u32 qb_flags, __u64 qb_count, __u64 qb_usage, struct quota_body *repbody) { struct qmt_thread_info *qti = qmt_info(env); __u64 now, count; struct dt_object *slv_obj = NULL; __u64 slv_granted, slv_granted_bck; struct thandle *th = NULL; int rc, ret; ENTRY; LASSERT(uuid != NULL); /* initialize reply */ memset(repbody, 0, sizeof(*repbody)); memcpy(&repbody->qb_id, &lqe->lqe_id, sizeof(repbody->qb_id)); if (OBD_FAIL_CHECK(OBD_FAIL_QUOTA_RECOVERABLE_ERR)) RETURN(-cfs_fail_val); /* look-up index file associated with acquiring slave */ slv_obj = lquota_disk_slv_find(env, qmt->qmt_child, LQE_ROOT(lqe), lu_object_fid(&LQE_GLB_OBJ(lqe)->do_lu), uuid); if (IS_ERR(slv_obj)) GOTO(out, rc = PTR_ERR(slv_obj)); /* pack slave fid in reply just for sanity check */ memcpy(&repbody->qb_slv_fid, lu_object_fid(&slv_obj->do_lu), sizeof(struct lu_fid)); /* allocate & start transaction with enough credits to update * global & slave indexes */ th = qmt_trans_start_with_slv(env, lqe, slv_obj, &qti->qti_restore); if (IS_ERR(th)) GOTO(out, rc = PTR_ERR(th)); lqe_write_lock(lqe); LQUOTA_DEBUG(lqe, "dqacq starts uuid:%s flags:0x%x wanted:"LPU64 " usage:"LPU64, obd_uuid2str(uuid), qb_flags, qb_count, qb_usage); /* Legal race, limits have been removed on master, but slave didn't * receive the change yet. Just return EINPROGRESS until the slave gets * notified. */ if (!lqe->lqe_enforced && !req_is_rel(qb_flags)) GOTO(out_locked, rc = -ESRCH); /* recompute qunit in case it was never initialized */ qmt_revalidate(env, lqe); /* slave just wants to acquire per-ID lock */ if (req_is_acq(qb_flags) && qb_count == 0) GOTO(out_locked, rc = 0); /* fetch how much quota space is already granted to this slave */ rc = qmt_slv_read(env, lqe, slv_obj, &slv_granted); if (rc) { LQUOTA_ERROR(lqe, "Failed to get granted for slave %s, rc=%d", obd_uuid2str(uuid), rc); GOTO(out_locked, rc); } /* recall how much space this slave currently owns in order to restore * it in case of failure */ slv_granted_bck = slv_granted; /* record current time for soft limit & grace time management */ now = (__u64)cfs_time_current_sec(); if (req_is_rel(qb_flags)) { /* Slave would like to release quota space */ if (slv_granted < qb_count || lqe->lqe_granted < qb_count) { /* can't release more than granted */ LQUOTA_ERROR(lqe, "Release too much! uuid:%s release:" LPU64" granted:"LPU64", total:"LPU64, obd_uuid2str(uuid), qb_count, slv_granted, lqe->lqe_granted); GOTO(out_locked, rc = -EINVAL); } repbody->qb_count = qb_count; /* put released space back to global pool */ QMT_REL(lqe, slv_granted, qb_count); GOTO(out_write, rc = 0); } if (req_has_rep(qb_flags) && slv_granted < qb_usage) { /* Slave is reporting space usage in quota request and it turns * out to be using more quota space than owned, so we adjust * granted space regardless of the current state of affairs */ repbody->qb_count = qb_usage - slv_granted; QMT_GRANT(lqe, slv_granted, repbody->qb_count); } if (!req_is_acq(qb_flags) && !req_is_preacq(qb_flags)) GOTO(out_write, rc = 0); qmt_adjust_edquot(lqe, now); if (lqe->lqe_edquot) /* no hope to claim further space back */ GOTO(out_write, rc = -EDQUOT); if (qmt_space_exhausted(lqe, now)) { /* might have some free space once rebalancing is completed */ rc = req_is_acq(qb_flags) ? -EINPROGRESS : -EDQUOT; GOTO(out_write, rc); } if (req_is_preacq(qb_flags)) { /* slave would like to pre-acquire quota space. To do so, it * reports in qb_count how much spare quota space it owns and we * can grant back quota space which is consistent with qunit * value. */ if (qb_count >= lqe->lqe_qunit) /* slave already own the maximum it should */ GOTO(out_write, rc = 0); count = qmt_alloc_expand(lqe, slv_granted, qb_count); if (count == 0) GOTO(out_write, rc = -EDQUOT); repbody->qb_count += count; QMT_GRANT(lqe, slv_granted, count); GOTO(out_write, rc = 0); } /* processing acquire request with clients waiting */ if (lqe->lqe_hardlimit != 0 && lqe->lqe_granted + qb_count > lqe->lqe_hardlimit) { /* cannot grant as much as asked, but can still afford to grant * some quota space back */ count = lqe->lqe_hardlimit - lqe->lqe_granted; repbody->qb_count += count; QMT_GRANT(lqe, slv_granted, count); GOTO(out_write, rc = 0); } /* Whouhou! we can satisfy the slave request! */ repbody->qb_count += qb_count; QMT_GRANT(lqe, slv_granted, qb_count); /* Try to expand the acquired count for DQACQ */ count = qmt_alloc_expand(lqe, slv_granted, 0); if (count != 0) { /* can even grant more than asked, it is like xmas ... */ repbody->qb_count += count; QMT_GRANT(lqe, slv_granted, count); GOTO(out_write, rc = 0); } GOTO(out_write, rc = 0); out_write: if (repbody->qb_count == 0) GOTO(out_locked, rc); /* start/stop grace timer if required */ if (lqe->lqe_softlimit != 0) { if (lqe->lqe_granted > lqe->lqe_softlimit && lqe->lqe_gracetime == 0) /* first time over soft limit, let's start grace * timer */ lqe->lqe_gracetime = now + qmt_lqe_grace(lqe); else if (lqe->lqe_granted <= lqe->lqe_softlimit && lqe->lqe_gracetime != 0) /* Clear grace timer */ lqe->lqe_gracetime = 0; } /* Update slave index first since it is easier to roll back */ ret = qmt_slv_write(env, th, lqe, slv_obj, LQUOTA_BUMP_VER, &repbody->qb_slv_ver, slv_granted); if (ret) { /* restore initial quota settings */ qmt_restore(lqe, &qti->qti_restore); /* reset qb_count */ repbody->qb_count = 0; GOTO(out_locked, rc = ret); } /* Update global index, no version bump needed */ ret = qmt_glb_write(env, th, lqe, 0, NULL); if (ret) { rc = ret; /* restore initial quota settings */ qmt_restore(lqe, &qti->qti_restore); /* reset qb_count */ repbody->qb_count = 0; /* restore previous granted value */ ret = qmt_slv_write(env, th, lqe, slv_obj, 0, NULL, slv_granted_bck); if (ret) { LQUOTA_ERROR(lqe, "failed to restore initial slave " "value rc:%d ret%d", rc, ret); LBUG(); } qmt_adjust_edquot(lqe, now); GOTO(out_locked, rc); } /* Total granted has been changed, let's try to adjust the qunit * size according to the total granted & limits. */ qmt_adjust_qunit(env, lqe); /* clear/set edquot flag and notify slaves via glimpse if needed */ qmt_adjust_edquot(lqe, now); out_locked: LQUOTA_DEBUG(lqe, "dqacq ends count:"LPU64" ver:"LPU64" rc:%d", repbody->qb_count, repbody->qb_slv_ver, rc); lqe_write_unlock(lqe); out: if (th != NULL && !IS_ERR(th)) dt_trans_stop(env, qmt->qmt_child, th); if (slv_obj != NULL && !IS_ERR(slv_obj)) lu_object_put(env, &slv_obj->do_lu); if ((req_is_acq(qb_flags) || req_is_preacq(qb_flags)) && OBD_FAIL_CHECK(OBD_FAIL_QUOTA_EDQUOT)) { /* introduce inconsistency between granted value in slave index * and slave index copy of slave */ repbody->qb_count = 0; rc = -EDQUOT; } RETURN(rc); }
/** * Quota enforcement handler. If local quota can satisfy this operation, * return success, otherwise, acquire more quota from master. * (for write operation, if master isn't available at this moment, return * -EINPROGRESS to inform client to retry the write) * * \param env - the environment passed by the caller * \param qsd - is the qsd instance associated with the device in charge * of the operation. * \param qid - is the qid information attached in the transaction handle * \param space - is the space required by the operation * \param flags - if the operation is write, return caller no user/group * and sync commit flags * * \retval 0 - success * \retval -EDQUOT - out of quota * \retval -EINPROGRESS - inform client to retry write * \retval -ve - other appropriate errors */ static int qsd_op_begin0(const struct lu_env *env, struct qsd_qtype_info *qqi, struct lquota_id_info *qid, long long space, int *flags) { struct lquota_entry *lqe; int rc, ret = -EINPROGRESS; struct l_wait_info lwi; ENTRY; if (qid->lqi_qentry != NULL) { /* we already had to deal with this id for this transaction */ lqe = qid->lqi_qentry; if (!lqe->lqe_enforced) RETURN(0); } else { /* look up lquota entry associated with qid */ lqe = lqe_locate(env, qqi->qqi_site, &qid->lqi_id); if (IS_ERR(lqe)) RETURN(PTR_ERR(lqe)); if (!lqe->lqe_enforced) { lqe_putref(lqe); RETURN(0); } qid->lqi_qentry = lqe; /* lqe will be released in qsd_op_end() */ } if (space <= 0) { /* when space is negative or null, we don't need to consume * quota space. That said, we still want to perform space * adjustments in qsd_op_end, so we return here, but with * a reference on the lqe */ if (flags != NULL) { rc = qsd_refresh_usage(env, lqe); GOTO(out_flags, rc); } RETURN(0); } LQUOTA_DEBUG(lqe, "op_begin space:"LPD64, space); lqe_write_lock(lqe); lqe->lqe_waiting_write += space; lqe_write_unlock(lqe); /* acquire quota space for the operation, cap overall wait time to * prevent a service thread from being stuck for too long */ lwi = LWI_TIMEOUT(cfs_time_seconds(qsd_wait_timeout(qqi->qqi_qsd)), NULL, NULL); rc = l_wait_event(lqe->lqe_waiters, qsd_acquire(env, lqe, space, &ret), &lwi); if (rc == 0 && ret == 0) { qid->lqi_space += space; } else { if (rc == 0) rc = ret; LQUOTA_DEBUG(lqe, "acquire quota failed:%d", rc); lqe_write_lock(lqe); lqe->lqe_waiting_write -= space; if (flags && lqe->lqe_pending_write != 0) /* Inform OSD layer that there are pending writes. * It might want to retry after a sync if appropriate */ *flags |= QUOTA_FL_SYNC; lqe_write_unlock(lqe); /* convert recoverable error into -EINPROGRESS, client will * retry */ if (rc == -ETIMEDOUT || rc == -ENOTCONN || rc == -ENOLCK || rc == -EAGAIN || rc == -EINTR) { rc = -EINPROGRESS; } else if (rc == -ESRCH) { rc = 0; LQUOTA_ERROR(lqe, "ID isn't enforced on master, it " "probably due to a legeal race, if this " "message is showing up constantly, there " "could be some inconsistence between " "master & slave, and quota reintegration " "needs be re-triggered."); } } if (flags != NULL) { out_flags: LASSERT(qid->lqi_is_blk); if (rc != 0) { *flags |= LQUOTA_OVER_FL(qqi->qqi_qtype); } else { __u64 usage; lqe_read_lock(lqe); usage = lqe->lqe_usage; usage += lqe->lqe_pending_write; usage += lqe->lqe_waiting_write; usage += qqi->qqi_qsd->qsd_sync_threshold; /* if we should notify client to start sync write */ if (usage >= lqe->lqe_granted - lqe->lqe_pending_rel) *flags |= LQUOTA_OVER_FL(qqi->qqi_qtype); else *flags &= ~LQUOTA_OVER_FL(qqi->qqi_qtype); lqe_read_unlock(lqe); } } RETURN(rc); }
/** * Callback function called when an acquire/release request sent to the master * is completed */ static void qsd_req_completion(const struct lu_env *env, struct qsd_qtype_info *qqi, struct quota_body *reqbody, struct quota_body *repbody, struct lustre_handle *lockh, struct lquota_lvb *lvb, void *arg, int ret) { struct lquota_entry *lqe = (struct lquota_entry *)arg; struct qsd_thread_info *qti; int rc; bool adjust = false, cancel = false; ENTRY; LASSERT(qqi != NULL && lqe != NULL); /* environment passed by ptlrpcd is mostly used by CLIO and hasn't the * DT tags set. */ rc = lu_env_refill_by_tags((struct lu_env *)env, LCT_DT_THREAD, 0); if (rc) { LQUOTA_ERROR(lqe, "failed to refill environmnent %d", rc); lqe_write_lock(lqe); /* can't afford to adjust quota space with no suitable lu_env */ GOTO(out_noadjust, rc); } qti = qsd_info(env); lqe_write_lock(lqe); LQUOTA_DEBUG(lqe, "DQACQ returned %d, flags:0x%x", ret, reqbody->qb_flags); /* despite -EDQUOT & -EINPROGRESS errors, the master might still * grant us back quota space to adjust quota overrun */ if (ret != 0 && ret != -EDQUOT && ret != -EINPROGRESS) { if (ret != -ETIMEDOUT && ret != -ENOTCONN && ret != -ESHUTDOWN && ret != -EAGAIN) /* print errors only if return code is unexpected */ LQUOTA_ERROR(lqe, "DQACQ failed with %d, flags:0x%x", ret, reqbody->qb_flags); GOTO(out, ret); } /* Set the lqe_lockh */ if (lustre_handle_is_used(lockh) && !lustre_handle_equal(lockh, &lqe->lqe_lockh)) lustre_handle_copy(&lqe->lqe_lockh, lockh); /* If the replied qb_count is zero, it means master didn't process * the DQACQ since the limit for this ID has been removed, so we * should not update quota entry & slave index copy neither. */ if (repbody != NULL && repbody->qb_count != 0) { LQUOTA_DEBUG(lqe, "DQACQ qb_count:"LPU64, repbody->qb_count); if (req_is_rel(reqbody->qb_flags)) { if (lqe->lqe_granted < repbody->qb_count) { LQUOTA_ERROR(lqe, "can't release more space " "than owned "LPU64"<"LPU64, lqe->lqe_granted, repbody->qb_count); lqe->lqe_granted = 0; } else { lqe->lqe_granted -= repbody->qb_count; } /* Cancel the per-ID lock initiatively when there * isn't any usage & grant, which can avoid master * sending glimpse unnecessarily to this slave on * quota revoking */ if (!lqe->lqe_pending_write && !lqe->lqe_granted && !lqe->lqe_waiting_write && !lqe->lqe_usage) cancel = true; } else { lqe->lqe_granted += repbody->qb_count; } qti->qti_rec.lqr_slv_rec.qsr_granted = lqe->lqe_granted; lqe_write_unlock(lqe); /* Update the slave index file in the dedicated thread. So far, * We don't update the version of slave index copy on DQACQ. * No locking is necessary since nobody can change * lqe->lqe_granted while lqe->lqe_pending_req > 0 */ qsd_upd_schedule(qqi, lqe, &lqe->lqe_id, &qti->qti_rec, 0, false); lqe_write_lock(lqe); } /* extract information from lvb */ if (ret == 0 && lvb != 0) { if (lvb->lvb_id_qunit != 0) qsd_set_qunit(lqe, lvb->lvb_id_qunit); qsd_set_edquot(lqe, !!(lvb->lvb_flags & LQUOTA_FL_EDQUOT)); } else if (repbody != NULL && repbody->qb_qunit != 0) { qsd_set_qunit(lqe, repbody->qb_qunit); } /* turn off pre-acquire if it failed with -EDQUOT. This is done to avoid * flooding the master with acquire request. Pre-acquire will be turned * on again as soon as qunit is modified */ if (req_is_preacq(reqbody->qb_flags) && ret == -EDQUOT) lqe->lqe_nopreacq = true; out: adjust = qsd_adjust_needed(lqe); if (reqbody && req_is_acq(reqbody->qb_flags) && ret != -EDQUOT) { lqe->lqe_acq_rc = ret; lqe->lqe_acq_time = cfs_time_current_64(); } out_noadjust: qsd_request_exit(lqe); lqe_write_unlock(lqe); /* release reference on per-ID lock */ if (lustre_handle_is_used(lockh)) ldlm_lock_decref(lockh, qsd_id_einfo.ei_mode); if (cancel) { qsd_adjust_schedule(lqe, false, true); } else if (adjust) { if (!ret || ret == -EDQUOT) qsd_adjust_schedule(lqe, false, false); else qsd_adjust_schedule(lqe, true, false); } lqe_putref(lqe); if (lvb) OBD_FREE_PTR(lvb); EXIT; }