/* * Update in-memory lquota entry with new quota setting from record \rec. * The record can either be a global record (i.e. lquota_glb_rec) or a slave * index record (i.e. lquota_slv_rec). In the former case, \global should be * set to true. * * \param env - the environment passed by the caller * \param lqe - is the quota entry associated with the identifier * \param global - is set to true when updating the record is of type * lquota_glb_rec. Otherwise, it is a lquota_slv_rec record. * \param rec - is the updated record received from the master. */ int qsd_update_lqe(const struct lu_env *env, struct lquota_entry *lqe, bool global, void *rec) { struct qsd_qtype_info *qqi; ENTRY; LASSERT(lqe != NULL); LASSERT(!lqe_is_master(lqe)); qqi = lqe2qqi(lqe); /* updating lqe is always serialized, no locking needed. */ if (global) { struct lquota_glb_rec *glb_rec = (struct lquota_glb_rec *)rec; /* change enforcement status based on new hard/soft limit */ lqe->lqe_enforced = (glb_rec->qbr_hardlimit || glb_rec->qbr_softlimit) ? true : false; LQUOTA_DEBUG(lqe, "updating global index hardlimit: "LPU64", " "softlimit: "LPU64, glb_rec->qbr_hardlimit, glb_rec->qbr_softlimit); } else { struct lquota_slv_rec *slv_rec = (struct lquota_slv_rec *)rec; lqe->lqe_granted = slv_rec->qsr_granted; LQUOTA_DEBUG(lqe, "updating slave index, granted:"LPU64"", slv_rec->qsr_granted); } RETURN(0); }
/** * Check whether a qsd instance is all set to send quota request to master. * This includes checking whether: * - the connection to master is set up and usable, * - the qsd isn't stopping * - reintegration has been successfully completed and all indexes are * up-to-date * * \param lqe - is the lquota entry for which we would like to send an quota * request * \param lockh - is the remote handle of the global lock returned on success * * \retval 0 on success, appropriate error on failure */ static int qsd_ready(struct lquota_entry *lqe, struct lustre_handle *lockh) { struct qsd_qtype_info *qqi = lqe2qqi(lqe); struct qsd_instance *qsd = qqi->qqi_qsd; struct obd_import *imp = NULL; struct ldlm_lock *lock; ENTRY; read_lock(&qsd->qsd_lock); /* is the qsd about to shut down? */ if (qsd->qsd_stopping) { read_unlock(&qsd->qsd_lock); LQUOTA_DEBUG(lqe, "dropping quota req since qsd is stopping"); /* Target is about to shut down, client will retry */ RETURN(-EINPROGRESS); } /* is the connection to the quota master ready? */ if (qsd->qsd_exp_valid) imp = class_exp2cliimp(qsd->qsd_exp); if (imp == NULL || imp->imp_invalid) { read_unlock(&qsd->qsd_lock); LQUOTA_DEBUG(lqe, "connection to master not ready"); RETURN(-ENOTCONN); } /* In most case, reintegration must have been triggered (when enable * quota or on OST start), however, in rare race condition (enabling * quota when starting OSTs), we might miss triggering reintegration * for some qqi. * * If the previous reintegration failed for some reason, we'll * re-trigger it here as well. */ if (!qqi->qqi_glb_uptodate || !qqi->qqi_slv_uptodate) { read_unlock(&qsd->qsd_lock); LQUOTA_DEBUG(lqe, "not up-to-date, dropping request and " "kicking off reintegration"); qsd_start_reint_thread(qqi); RETURN(-EINPROGRESS); } /* Fill the remote global lock handle, master will check this handle * to see if the slave is sending request with stale lock */ lustre_handle_copy(lockh, &qqi->qqi_lockh); read_unlock(&qsd->qsd_lock); if (!lustre_handle_is_used(lockh)) RETURN(-ENOLCK); lock = ldlm_handle2lock(lockh); if (lock == NULL) RETURN(-ENOLCK); /* return remote lock handle to be packed in quota request */ lustre_handle_copy(lockh, &lock->l_remote_handle); LDLM_LOCK_PUT(lock); RETURN(0); }
/* * Consult current disk space consumed by a given identifier. * * \param env - the environment passed by the caller * \param qqi - is the pointer to the qsd_qtype_info structure associated * with the identifier. * \param lqe - is the quota entry associated with the identifier */ int qsd_refresh_usage(const struct lu_env *env, struct lquota_entry *lqe) { struct qsd_thread_info *qti = qsd_info(env); struct lquota_acct_rec *rec = &qti->qti_acct_rec; struct qsd_qtype_info *qqi = lqe2qqi(lqe); int rc = 0; ENTRY; LASSERT(qqi->qqi_acct_obj); /* read disk usage */ rc = lquota_disk_read(env, qqi->qqi_acct_obj, &lqe->lqe_id, (struct dt_rec *)rec); switch(rc) { case -ENOENT: lqe->lqe_usage = 0; rc = 0; break; case 0: if (qqi->qqi_qsd->qsd_is_md) lqe->lqe_usage = rec->ispace; else lqe->lqe_usage = toqb(rec->bspace); break; default: LQUOTA_ERROR(lqe, "failed to read disk usage, rc:%d", rc); RETURN(rc); } LQUOTA_DEBUG(lqe, "disk usage: "LPU64, lqe->lqe_usage); RETURN(0); }
/* * Retrieve quota settings for a given identifier. * * \param env - is the environment passed by the caller * \param qmt - is the quota master target * \param pool_id - is the 16-bit pool identifier * \param restype - is the pool type, either block (i.e. LQUOTA_RES_DT) or inode * (i.e. LQUOTA_RES_MD) * \param qtype - is the quota type * \param id - is the quota indentifier for which we want to acces quota * settings. * \param hard - is the output variable where to copy the hard limit * \param soft - is the output variable where to copy the soft limit * \param time - is the output variable where to copy the grace time */ static int qmt_get(const struct lu_env *env, struct qmt_device *qmt, __u16 pool_id, __u8 restype, __u8 qtype, union lquota_id *id, __u64 *hard, __u64 *soft, __u64 *time) { struct lquota_entry *lqe; ENTRY; /* look-up lqe structure containing quota settings */ lqe = qmt_pool_lqe_lookup(env, qmt, pool_id, restype, qtype, id); if (IS_ERR(lqe)) RETURN(PTR_ERR(lqe)); /* copy quota settings */ lqe_read_lock(lqe); LQUOTA_DEBUG(lqe, "fetch settings"); if (hard != NULL) *hard = lqe->lqe_hardlimit; if (soft != NULL) *soft = lqe->lqe_softlimit; if (time != NULL) *time = lqe->lqe_gracetime; lqe_read_unlock(lqe); lqe_putref(lqe); RETURN(0); }
/** * helper function bumping lqe_pending_req if there is no quota request in * flight for the lquota entry \a lqe. Otherwise, EBUSY is returned. */ static inline int qsd_request_enter(struct lquota_entry *lqe) { /* is there already a quota request in flight? */ if (lqe->lqe_pending_req != 0) { LQUOTA_DEBUG(lqe, "already a request in flight"); return -EBUSY; } if (lqe->lqe_pending_rel != 0) { LQUOTA_ERROR(lqe, "no request in flight with pending_rel="LPU64, lqe->lqe_pending_rel); LBUG(); } lqe->lqe_pending_req++; return 0; }
/** * Acquire \a space of quota space in order to complete an operation. * Try to consume local quota space first and send acquire request to quota * master if required. * * \param env - the environment passed by the caller * \param lqe - is the qid entry to be processed * \param space - is the amount of quota required for the operation * \param ret - is the return code (-EDQUOT, -EINPROGRESS, ...) * * \retval true - exit from l_wait_event and real return value in \a ret * \retval false - continue waiting */ static bool qsd_acquire(const struct lu_env *env, struct lquota_entry *lqe, long long space, int *ret) { int rc = 0, count; ENTRY; for (count = 0; rc == 0; count++) { LQUOTA_DEBUG(lqe, "acquiring:"LPD64 " count=%d", space, count); if (lqe2qqi(lqe)->qqi_qsd->qsd_stopping) { rc = -EINPROGRESS; break; } /* refresh disk usage */ rc = qsd_refresh_usage(env, lqe); if (rc) break; /* try to consume local quota space first */ rc = qsd_acquire_local(lqe, space); if (rc != -EAGAIN) /* rc == 0, Wouhou! enough local quota space * rc < 0, something bad happened */ break; /* need to acquire more quota space from master */ rc = qsd_acquire_remote(env, lqe); } if (rc == -EBUSY) /* already a request in flight, continue waiting */ RETURN(false); *ret = rc; RETURN(true); /* exit from l_wait_event */ }
/* * Update a slave quota entry. This is done by reading enforcement status from * the copy of the global index and then how much is the slave currenly owns * for this user from the slave index copy. * * \param env - the environment passed by the caller * \param lqe - is the quota entry to refresh * \param arg - is the pointer to the qsd_qtype_info structure */ static int qsd_lqe_read(const struct lu_env *env, struct lquota_entry *lqe, void *arg) { struct qsd_thread_info *qti = qsd_info(env); struct qsd_qtype_info *qqi = (struct qsd_qtype_info *)arg; int rc; LASSERT(!lqe_is_master(lqe)); /* read record from global index copy to know whether quota is * enforced for this user */ rc = lquota_disk_read(env, qqi->qqi_glb_obj, &lqe->lqe_id, (struct dt_rec *)&qti->qti_glb_rec); switch(rc) { case -ENOENT: /* no such entry, assume quota isn't enforced for this user */ lqe->lqe_enforced = false; break; case 0: if (qti->qti_glb_rec.qbr_hardlimit == 0 && qti->qti_glb_rec.qbr_softlimit == 0) /* quota isn't enforced for this use */ lqe->lqe_enforced = false; else lqe->lqe_enforced = true; break; default: LQUOTA_ERROR(lqe, "failed to read quota entry from global " "index copy, rc:%d", rc); return rc; } /* read record from slave index copy to find out how much space is * currently owned by this slave */ rc = lquota_disk_read(env, qqi->qqi_slv_obj, &lqe->lqe_id, (struct dt_rec *)&qti->qti_slv_rec); switch(rc) { case -ENOENT: lqe->lqe_granted = 0; break; case 0: lqe->lqe_granted = qti->qti_slv_rec.qsr_granted; break; default: LQUOTA_ERROR(lqe, "failed to read quota entry from slave " "index copy, rc:%d", rc); return rc; } /* don't know what the qunit value is yet */ qsd_set_qunit(lqe, 0); /* read current disk-usage from disk */ rc = qsd_refresh_usage(env, lqe); if (rc) return rc; LQUOTA_DEBUG(lqe, "successfully read from disk"); return 0; }
/* * Glimpse callback handler for per-ID quota locks. * * \param lock - is the lock targeted by the glimpse * \param data - is a pointer to the glimpse ptlrpc request */ static int qsd_id_glimpse_ast(struct ldlm_lock *lock, void *data) { struct ptlrpc_request *req = data; struct lquota_entry *lqe; struct qsd_instance *qsd; struct ldlm_gl_lquota_desc *desc; struct lquota_lvb *lvb; int rc; bool wakeup = false; ENTRY; rc = qsd_common_glimpse_ast(req, &desc, (void **)&lvb); if (rc) GOTO(out, rc); lqe = qsd_id_ast_data_get(lock, false); if (lqe == NULL) /* valid race */ GOTO(out, rc = -ELDLM_NO_LOCK_DATA); LQUOTA_DEBUG(lqe, "glimpse on quota locks, new qunit:"LPU64, desc->gl_qunit); qsd = lqe2qqi(lqe)->qqi_qsd; lqe_write_lock(lqe); lvb->lvb_id_rel = 0; if (desc->gl_qunit != 0 && desc->gl_qunit != lqe->lqe_qunit) { long long space; /* extract new qunit from glimpse request */ qsd_set_qunit(lqe, desc->gl_qunit); space = lqe->lqe_granted - lqe->lqe_pending_rel; space -= lqe->lqe_usage; space -= lqe->lqe_pending_write + lqe->lqe_waiting_write; space -= lqe->lqe_qunit; if (space > 0) { if (lqe->lqe_pending_req > 0) { LQUOTA_DEBUG(lqe, "request in flight, postpone " "release of "LPD64, space); lvb->lvb_id_may_rel = space; } else { lqe->lqe_pending_req++; /* release quota space in glimpse reply */ LQUOTA_DEBUG(lqe, "releasing "LPD64, space); lqe->lqe_granted -= space; lvb->lvb_id_rel = space; lqe_write_unlock(lqe); /* change the lqe_granted */ qsd_upd_schedule(lqe2qqi(lqe), lqe, &lqe->lqe_id, (union lquota_rec *)&lqe->lqe_granted, 0, false); lqe_write_lock(lqe); lqe->lqe_pending_req--; wakeup = true; } } } lqe->lqe_edquot = !!(desc->gl_flags & LQUOTA_FL_EDQUOT); lqe_write_unlock(lqe); if (wakeup) wake_up_all(&lqe->lqe_waiters); lqe_putref(lqe); out: req->rq_status = rc; RETURN(rc); }
/** * Blocking callback handler for per-ID lock * * \param lock - is the lock for which ast occurred. * \param desc - is the description of a conflicting lock in case of blocking * ast. * \param data - is the value of lock->l_ast_data * \param flag - LDLM_CB_BLOCKING or LDLM_CB_CANCELING. Used to distinguish * cancellation and blocking ast's. */ static int qsd_id_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag) { struct lustre_handle lockh; int rc = 0; ENTRY; switch(flag) { case LDLM_CB_BLOCKING: { LDLM_DEBUG(lock, "blocking AST on ID quota lock"); ldlm_lock2handle(lock, &lockh); rc = ldlm_cli_cancel(&lockh, LCF_ASYNC); break; } case LDLM_CB_CANCELING: { struct lu_env *env; struct lquota_entry *lqe; bool rel = false; LDLM_DEBUG(lock, "canceling global quota lock"); lqe = qsd_id_ast_data_get(lock, true); if (lqe == NULL) break; LQUOTA_DEBUG(lqe, "losing ID lock"); /* just local cancel (for stack clean up or eviction), don't * release quota space in this case */ if (ldlm_is_local_only(lock)) { lqe_putref(lqe); break; } /* allocate environment */ OBD_ALLOC_PTR(env); if (env == NULL) { lqe_putref(lqe); rc = -ENOMEM; break; } /* initialize environment */ rc = lu_env_init(env, LCT_DT_THREAD); if (rc) { OBD_FREE_PTR(env); lqe_putref(lqe); break; } ldlm_lock2handle(lock, &lockh); lqe_write_lock(lqe); if (lustre_handle_equal(&lockh, &lqe->lqe_lockh)) { /* Clear lqe_lockh & reset qunit to 0 */ qsd_set_qunit(lqe, 0); memset(&lqe->lqe_lockh, 0, sizeof(lqe->lqe_lockh)); lqe->lqe_edquot = false; rel = true; } lqe_write_unlock(lqe); /* If there is qqacq inflight, the release will be skipped * at this time, and triggered on dqacq completion later, * which means there could be a short window that slave is * holding spare grant wihtout per-ID lock. */ if (rel) rc = qsd_adjust(env, lqe); /* release lqe reference grabbed by qsd_id_ast_data_get() */ lqe_putref(lqe); lu_env_fini(env); OBD_FREE_PTR(env); break; } default: LASSERTF(0, "invalid flags for blocking ast %d", flag); } RETURN(rc); }
/* * Update quota settings for a given identifier. * * \param env - is the environment passed by the caller * \param qmt - is the quota master target * \param pool_id - is the 16-bit pool identifier * \param restype - is the pool type, either block (i.e. LQUOTA_RES_DT) or inode * (i.e. LQUOTA_RES_MD) * \param qtype - is the quota type * \param id - is the quota indentifier for which we want to modify quota * settings. * \param hard - is the new hard limit * \param soft - is the new soft limit * \param time - is the new grace time * \param valid - is the list of settings to change */ static int qmt_set(const struct lu_env *env, struct qmt_device *qmt, __u16 pool_id, __u8 restype, __u8 qtype, union lquota_id *id, __u64 hard, __u64 soft, __u64 time, __u32 valid) { struct qmt_thread_info *qti = qmt_info(env); struct lquota_entry *lqe; struct thandle *th = NULL; __u64 ver, now; bool dirtied = false; int rc = 0; ENTRY; /* look-up quota entry associated with this ID */ lqe = qmt_pool_lqe_lookup(env, qmt, pool_id, restype, qtype, id); if (IS_ERR(lqe)) RETURN(PTR_ERR(lqe)); /* allocate & start transaction with enough credits to update quota * settings in the global index file */ th = qmt_trans_start(env, lqe, &qti->qti_restore); if (IS_ERR(th)) GOTO(out_nolock, rc = PTR_ERR(th)); now = cfs_time_current_sec(); lqe_write_lock(lqe); LQUOTA_DEBUG(lqe, "changing quota settings valid:%x hard:"LPU64" soft:" LPU64" time:"LPU64, valid, hard, soft, time); if ((valid & QIF_TIMES) != 0 && lqe->lqe_gracetime != time) { /* change time settings */ lqe->lqe_gracetime = time; dirtied = true; } if ((valid & QIF_LIMITS) != 0 && (lqe->lqe_hardlimit != hard || lqe->lqe_softlimit != soft)) { rc = qmt_validate_limits(lqe, hard, soft); if (rc) GOTO(out, rc); /* recompute qunit in case it was never initialized */ qmt_revalidate(env, lqe); /* change quota limits */ lqe->lqe_hardlimit = hard; lqe->lqe_softlimit = soft; /* clear grace time */ if (lqe->lqe_softlimit == 0 || lqe->lqe_granted <= lqe->lqe_softlimit) /* no soft limit or below soft limit, let's clear grace * time */ lqe->lqe_gracetime = 0; else if ((valid & QIF_TIMES) == 0) /* set grace only if user hasn't provided his own */ lqe->lqe_gracetime = now + qmt_lqe_grace(lqe); /* change enforced status based on new parameters */ if (lqe->lqe_hardlimit == 0 && lqe->lqe_softlimit == 0) lqe->lqe_enforced = false; else lqe->lqe_enforced = true; dirtied = true; } if (dirtied) { /* write new quota settings to disk */ rc = qmt_glb_write(env, th, lqe, LQUOTA_BUMP_VER, &ver); if (rc) { /* restore initial quota settings */ qmt_restore(lqe, &qti->qti_restore); GOTO(out, rc); } /* compute new qunit value now that we have modified the quota * settings */ qmt_adjust_qunit(env, lqe); /* clear/set edquot flag as needed */ qmt_adjust_edquot(lqe, now); } EXIT; out: lqe_write_unlock(lqe); out_nolock: lqe_putref(lqe); if (th != NULL && !IS_ERR(th)) dt_trans_stop(env, qmt->qmt_child, th); if (rc == 0 && dirtied) qmt_glb_lock_notify(env, lqe, ver); return rc; }
/* * Helper function to handle quota request from slave. * * \param env - is the environment passed by the caller * \param lqe - is the lquota_entry subject to the quota request * \param qmt - is the master device * \param uuid - is the uuid associated with the slave * \param qb_flags - are the quota request flags as packed in the quota_body * \param qb_count - is the amount of quota space the slave wants to * acquire/release * \param qb_usage - is the current space usage on the slave * \param repbody - is the quota_body of reply * * \retval 0 : success * \retval -EDQUOT : out of quota * -EINPROGRESS : inform client to retry write/create * -ve : other appropriate errors */ int qmt_dqacq0(const struct lu_env *env, struct lquota_entry *lqe, struct qmt_device *qmt, struct obd_uuid *uuid, __u32 qb_flags, __u64 qb_count, __u64 qb_usage, struct quota_body *repbody) { struct qmt_thread_info *qti = qmt_info(env); __u64 now, count; struct dt_object *slv_obj = NULL; __u64 slv_granted, slv_granted_bck; struct thandle *th = NULL; int rc, ret; ENTRY; LASSERT(uuid != NULL); /* initialize reply */ memset(repbody, 0, sizeof(*repbody)); memcpy(&repbody->qb_id, &lqe->lqe_id, sizeof(repbody->qb_id)); if (OBD_FAIL_CHECK(OBD_FAIL_QUOTA_RECOVERABLE_ERR)) RETURN(-cfs_fail_val); /* look-up index file associated with acquiring slave */ slv_obj = lquota_disk_slv_find(env, qmt->qmt_child, LQE_ROOT(lqe), lu_object_fid(&LQE_GLB_OBJ(lqe)->do_lu), uuid); if (IS_ERR(slv_obj)) GOTO(out, rc = PTR_ERR(slv_obj)); /* pack slave fid in reply just for sanity check */ memcpy(&repbody->qb_slv_fid, lu_object_fid(&slv_obj->do_lu), sizeof(struct lu_fid)); /* allocate & start transaction with enough credits to update * global & slave indexes */ th = qmt_trans_start_with_slv(env, lqe, slv_obj, &qti->qti_restore); if (IS_ERR(th)) GOTO(out, rc = PTR_ERR(th)); lqe_write_lock(lqe); LQUOTA_DEBUG(lqe, "dqacq starts uuid:%s flags:0x%x wanted:"LPU64 " usage:"LPU64, obd_uuid2str(uuid), qb_flags, qb_count, qb_usage); /* Legal race, limits have been removed on master, but slave didn't * receive the change yet. Just return EINPROGRESS until the slave gets * notified. */ if (!lqe->lqe_enforced && !req_is_rel(qb_flags)) GOTO(out_locked, rc = -ESRCH); /* recompute qunit in case it was never initialized */ qmt_revalidate(env, lqe); /* slave just wants to acquire per-ID lock */ if (req_is_acq(qb_flags) && qb_count == 0) GOTO(out_locked, rc = 0); /* fetch how much quota space is already granted to this slave */ rc = qmt_slv_read(env, lqe, slv_obj, &slv_granted); if (rc) { LQUOTA_ERROR(lqe, "Failed to get granted for slave %s, rc=%d", obd_uuid2str(uuid), rc); GOTO(out_locked, rc); } /* recall how much space this slave currently owns in order to restore * it in case of failure */ slv_granted_bck = slv_granted; /* record current time for soft limit & grace time management */ now = (__u64)cfs_time_current_sec(); if (req_is_rel(qb_flags)) { /* Slave would like to release quota space */ if (slv_granted < qb_count || lqe->lqe_granted < qb_count) { /* can't release more than granted */ LQUOTA_ERROR(lqe, "Release too much! uuid:%s release:" LPU64" granted:"LPU64", total:"LPU64, obd_uuid2str(uuid), qb_count, slv_granted, lqe->lqe_granted); GOTO(out_locked, rc = -EINVAL); } repbody->qb_count = qb_count; /* put released space back to global pool */ QMT_REL(lqe, slv_granted, qb_count); GOTO(out_write, rc = 0); } if (req_has_rep(qb_flags) && slv_granted < qb_usage) { /* Slave is reporting space usage in quota request and it turns * out to be using more quota space than owned, so we adjust * granted space regardless of the current state of affairs */ repbody->qb_count = qb_usage - slv_granted; QMT_GRANT(lqe, slv_granted, repbody->qb_count); } if (!req_is_acq(qb_flags) && !req_is_preacq(qb_flags)) GOTO(out_write, rc = 0); qmt_adjust_edquot(lqe, now); if (lqe->lqe_edquot) /* no hope to claim further space back */ GOTO(out_write, rc = -EDQUOT); if (qmt_space_exhausted(lqe, now)) { /* might have some free space once rebalancing is completed */ rc = req_is_acq(qb_flags) ? -EINPROGRESS : -EDQUOT; GOTO(out_write, rc); } if (req_is_preacq(qb_flags)) { /* slave would like to pre-acquire quota space. To do so, it * reports in qb_count how much spare quota space it owns and we * can grant back quota space which is consistent with qunit * value. */ if (qb_count >= lqe->lqe_qunit) /* slave already own the maximum it should */ GOTO(out_write, rc = 0); count = qmt_alloc_expand(lqe, slv_granted, qb_count); if (count == 0) GOTO(out_write, rc = -EDQUOT); repbody->qb_count += count; QMT_GRANT(lqe, slv_granted, count); GOTO(out_write, rc = 0); } /* processing acquire request with clients waiting */ if (lqe->lqe_hardlimit != 0 && lqe->lqe_granted + qb_count > lqe->lqe_hardlimit) { /* cannot grant as much as asked, but can still afford to grant * some quota space back */ count = lqe->lqe_hardlimit - lqe->lqe_granted; repbody->qb_count += count; QMT_GRANT(lqe, slv_granted, count); GOTO(out_write, rc = 0); } /* Whouhou! we can satisfy the slave request! */ repbody->qb_count += qb_count; QMT_GRANT(lqe, slv_granted, qb_count); /* Try to expand the acquired count for DQACQ */ count = qmt_alloc_expand(lqe, slv_granted, 0); if (count != 0) { /* can even grant more than asked, it is like xmas ... */ repbody->qb_count += count; QMT_GRANT(lqe, slv_granted, count); GOTO(out_write, rc = 0); } GOTO(out_write, rc = 0); out_write: if (repbody->qb_count == 0) GOTO(out_locked, rc); /* start/stop grace timer if required */ if (lqe->lqe_softlimit != 0) { if (lqe->lqe_granted > lqe->lqe_softlimit && lqe->lqe_gracetime == 0) /* first time over soft limit, let's start grace * timer */ lqe->lqe_gracetime = now + qmt_lqe_grace(lqe); else if (lqe->lqe_granted <= lqe->lqe_softlimit && lqe->lqe_gracetime != 0) /* Clear grace timer */ lqe->lqe_gracetime = 0; } /* Update slave index first since it is easier to roll back */ ret = qmt_slv_write(env, th, lqe, slv_obj, LQUOTA_BUMP_VER, &repbody->qb_slv_ver, slv_granted); if (ret) { /* restore initial quota settings */ qmt_restore(lqe, &qti->qti_restore); /* reset qb_count */ repbody->qb_count = 0; GOTO(out_locked, rc = ret); } /* Update global index, no version bump needed */ ret = qmt_glb_write(env, th, lqe, 0, NULL); if (ret) { rc = ret; /* restore initial quota settings */ qmt_restore(lqe, &qti->qti_restore); /* reset qb_count */ repbody->qb_count = 0; /* restore previous granted value */ ret = qmt_slv_write(env, th, lqe, slv_obj, 0, NULL, slv_granted_bck); if (ret) { LQUOTA_ERROR(lqe, "failed to restore initial slave " "value rc:%d ret%d", rc, ret); LBUG(); } qmt_adjust_edquot(lqe, now); GOTO(out_locked, rc); } /* Total granted has been changed, let's try to adjust the qunit * size according to the total granted & limits. */ qmt_adjust_qunit(env, lqe); /* clear/set edquot flag and notify slaves via glimpse if needed */ qmt_adjust_edquot(lqe, now); out_locked: LQUOTA_DEBUG(lqe, "dqacq ends count:"LPU64" ver:"LPU64" rc:%d", repbody->qb_count, repbody->qb_slv_ver, rc); lqe_write_unlock(lqe); out: if (th != NULL && !IS_ERR(th)) dt_trans_stop(env, qmt->qmt_child, th); if (slv_obj != NULL && !IS_ERR(slv_obj)) lu_object_put(env, &slv_obj->do_lu); if ((req_is_acq(qb_flags) || req_is_preacq(qb_flags)) && OBD_FAIL_CHECK(OBD_FAIL_QUOTA_EDQUOT)) { /* introduce inconsistency between granted value in slave index * and slave index copy of slave */ repbody->qb_count = 0; rc = -EDQUOT; } RETURN(rc); }
/** * Adjust quota space (by acquiring or releasing) hold by the quota slave. * This function is called after each quota request completion and during * reintegration in order to report usage or re-acquire quota locks. * Space adjustment is aborted if there is already a quota request in flight * for this ID. * * \param env - the environment passed by the caller * \param lqe - is the qid entry to be processed * * \retval 0 on success, appropriate errors on failure */ int qsd_adjust(const struct lu_env *env, struct lquota_entry *lqe) { struct qsd_thread_info *qti = qsd_info(env); struct quota_body *qbody = &qti->qti_body; struct qsd_instance *qsd; struct qsd_qtype_info *qqi; int rc; bool intent = false; ENTRY; memset(qbody, 0, sizeof(*qbody)); rc = qsd_ready(lqe, &qbody->qb_glb_lockh); if (rc) { /* add to adjust list again to trigger adjustment later when * slave is ready */ LQUOTA_DEBUG(lqe, "delaying adjustment since qsd isn't ready"); qsd_adjust_schedule(lqe, true, false); RETURN(0); } qqi = lqe2qqi(lqe); qsd = qqi->qqi_qsd; lqe_write_lock(lqe); /* fill qb_count & qb_flags */ if (!qsd_calc_adjust(lqe, qbody)) { lqe_write_unlock(lqe); LQUOTA_DEBUG(lqe, "no adjustment required"); RETURN(0); } /* only 1 quota request in flight for a given ID is allowed */ rc = qsd_request_enter(lqe); if (rc) { /* already a request in flight, space adjustment will be run * again on request completion */ lqe_write_unlock(lqe); RETURN(0); } if (req_is_rel(qbody->qb_flags)) lqe->lqe_pending_rel = qbody->qb_count; lustre_handle_copy(&qti->qti_lockh, &lqe->lqe_lockh); lqe_write_unlock(lqe); /* hold a refcount until completion */ lqe_getref(lqe); /* fill other quota body fields */ qbody->qb_fid = qqi->qqi_fid; qbody->qb_id = lqe->lqe_id; if (req_is_acq(qbody->qb_flags) || req_is_preacq(qbody->qb_flags)) { /* check whether we own a valid lock for this ID */ rc = qsd_id_lock_match(&qti->qti_lockh, &qbody->qb_lockh); if (rc) { memset(&qti->qti_lockh, 0, sizeof(qti->qti_lockh)); if (req_is_preacq(qbody->qb_flags)) { if (req_has_rep(qbody->qb_flags)) /* still want to report usage */ qbody->qb_flags = QUOTA_DQACQ_FL_REPORT; else /* no pre-acquire if no per-ID lock */ GOTO(out, rc = -ENOLCK); } else { /* no lock found, should use intent */ intent = true; } } else if (req_is_acq(qbody->qb_flags) && qbody->qb_count == 0) { /* found cached lock, no need to acquire */ GOTO(out, rc = 0); } } else { /* release and report don't need a per-ID lock */ memset(&qti->qti_lockh, 0, sizeof(qti->qti_lockh)); } if (!intent) { rc = qsd_send_dqacq(env, qsd->qsd_exp, qbody, false, qsd_req_completion, qqi, &qti->qti_lockh, lqe); } else { struct lquota_lvb *lvb; OBD_ALLOC_PTR(lvb); if (lvb == NULL) GOTO(out, rc = -ENOMEM); rc = qsd_intent_lock(env, qsd->qsd_exp, qbody, false, IT_QUOTA_DQACQ, qsd_req_completion, qqi, lvb, (void *)lqe); } /* the completion function will be called by qsd_send_dqacq or * qsd_intent_lock */ RETURN(rc); out: qsd_req_completion(env, qqi, qbody, NULL, &qti->qti_lockh, NULL, lqe, rc); return rc; }
/** * Quota enforcement handler. If local quota can satisfy this operation, * return success, otherwise, acquire more quota from master. * (for write operation, if master isn't available at this moment, return * -EINPROGRESS to inform client to retry the write) * * \param env - the environment passed by the caller * \param qsd - is the qsd instance associated with the device in charge * of the operation. * \param qid - is the qid information attached in the transaction handle * \param space - is the space required by the operation * \param flags - if the operation is write, return caller no user/group * and sync commit flags * * \retval 0 - success * \retval -EDQUOT - out of quota * \retval -EINPROGRESS - inform client to retry write * \retval -ve - other appropriate errors */ static int qsd_op_begin0(const struct lu_env *env, struct qsd_qtype_info *qqi, struct lquota_id_info *qid, long long space, int *flags) { struct lquota_entry *lqe; int rc, ret = -EINPROGRESS; struct l_wait_info lwi; ENTRY; if (qid->lqi_qentry != NULL) { /* we already had to deal with this id for this transaction */ lqe = qid->lqi_qentry; if (!lqe->lqe_enforced) RETURN(0); } else { /* look up lquota entry associated with qid */ lqe = lqe_locate(env, qqi->qqi_site, &qid->lqi_id); if (IS_ERR(lqe)) RETURN(PTR_ERR(lqe)); if (!lqe->lqe_enforced) { lqe_putref(lqe); RETURN(0); } qid->lqi_qentry = lqe; /* lqe will be released in qsd_op_end() */ } if (space <= 0) { /* when space is negative or null, we don't need to consume * quota space. That said, we still want to perform space * adjustments in qsd_op_end, so we return here, but with * a reference on the lqe */ if (flags != NULL) { rc = qsd_refresh_usage(env, lqe); GOTO(out_flags, rc); } RETURN(0); } LQUOTA_DEBUG(lqe, "op_begin space:"LPD64, space); lqe_write_lock(lqe); lqe->lqe_waiting_write += space; lqe_write_unlock(lqe); /* acquire quota space for the operation, cap overall wait time to * prevent a service thread from being stuck for too long */ lwi = LWI_TIMEOUT(cfs_time_seconds(qsd_wait_timeout(qqi->qqi_qsd)), NULL, NULL); rc = l_wait_event(lqe->lqe_waiters, qsd_acquire(env, lqe, space, &ret), &lwi); if (rc == 0 && ret == 0) { qid->lqi_space += space; } else { if (rc == 0) rc = ret; LQUOTA_DEBUG(lqe, "acquire quota failed:%d", rc); lqe_write_lock(lqe); lqe->lqe_waiting_write -= space; if (flags && lqe->lqe_pending_write != 0) /* Inform OSD layer that there are pending writes. * It might want to retry after a sync if appropriate */ *flags |= QUOTA_FL_SYNC; lqe_write_unlock(lqe); /* convert recoverable error into -EINPROGRESS, client will * retry */ if (rc == -ETIMEDOUT || rc == -ENOTCONN || rc == -ENOLCK || rc == -EAGAIN || rc == -EINTR) { rc = -EINPROGRESS; } else if (rc == -ESRCH) { rc = 0; LQUOTA_ERROR(lqe, "ID isn't enforced on master, it " "probably due to a legeal race, if this " "message is showing up constantly, there " "could be some inconsistence between " "master & slave, and quota reintegration " "needs be re-triggered."); } } if (flags != NULL) { out_flags: LASSERT(qid->lqi_is_blk); if (rc != 0) { *flags |= LQUOTA_OVER_FL(qqi->qqi_qtype); } else { __u64 usage; lqe_read_lock(lqe); usage = lqe->lqe_usage; usage += lqe->lqe_pending_write; usage += lqe->lqe_waiting_write; usage += qqi->qqi_qsd->qsd_sync_threshold; /* if we should notify client to start sync write */ if (usage >= lqe->lqe_granted - lqe->lqe_pending_rel) *flags |= LQUOTA_OVER_FL(qqi->qqi_qtype); else *flags &= ~LQUOTA_OVER_FL(qqi->qqi_qtype); lqe_read_unlock(lqe); } } RETURN(rc); }
/** * Acquire quota space from master. * There are at most 1 in-flight dqacq/dqrel. * * \param env - the environment passed by the caller * \param lqe - is the qid entry to be processed * * \retval 0 - success * \retval -EDQUOT - out of quota * \retval -EINPROGRESS - inform client to retry write/create * \retval -EBUSY - already a quota request in flight * \retval -ve - other appropriate errors */ static int qsd_acquire_remote(const struct lu_env *env, struct lquota_entry *lqe) { struct qsd_thread_info *qti = qsd_info(env); struct quota_body *qbody = &qti->qti_body; struct qsd_instance *qsd; struct qsd_qtype_info *qqi; int rc; ENTRY; memset(qbody, 0, sizeof(*qbody)); rc = qsd_ready(lqe, &qbody->qb_glb_lockh); if (rc) RETURN(rc); qqi = lqe2qqi(lqe); qsd = qqi->qqi_qsd; lqe_write_lock(lqe); /* is quota really enforced for this id? */ if (!lqe->lqe_enforced) { lqe_write_unlock(lqe); LQUOTA_DEBUG(lqe, "quota not enforced any more"); RETURN(0); } /* fill qb_count & qb_flags */ if (!qsd_calc_acquire(lqe, qbody)) { lqe_write_unlock(lqe); LQUOTA_DEBUG(lqe, "No acquire required"); RETURN(0); } /* check whether an acquire request completed recently */ if (lqe->lqe_acq_rc != 0 && cfs_time_before_64(cfs_time_shift_64(-1), lqe->lqe_acq_time)) { lqe_write_unlock(lqe); LQUOTA_DEBUG(lqe, "using cached return code %d", lqe->lqe_acq_rc); RETURN(lqe->lqe_acq_rc); } /* only 1 quota request in flight for a given ID is allowed */ rc = qsd_request_enter(lqe); if (rc) { lqe_write_unlock(lqe); RETURN(rc); } lustre_handle_copy(&qti->qti_lockh, &lqe->lqe_lockh); lqe_write_unlock(lqe); /* hold a refcount until completion */ lqe_getref(lqe); /* fill other quota body fields */ qbody->qb_fid = qqi->qqi_fid; qbody->qb_id = lqe->lqe_id; /* check whether we already own a valid lock for this ID */ rc = qsd_id_lock_match(&qti->qti_lockh, &qbody->qb_lockh); if (rc) { struct lquota_lvb *lvb; OBD_ALLOC_PTR(lvb); if (lvb == NULL) { rc = -ENOMEM; qsd_req_completion(env, qqi, qbody, NULL, &qti->qti_lockh, NULL, lqe, rc); RETURN(rc); } /* no lock found, should use intent */ rc = qsd_intent_lock(env, qsd->qsd_exp, qbody, true, IT_QUOTA_DQACQ, qsd_req_completion, qqi, lvb, (void *)lqe); } else { /* lock found, should use regular dqacq */ rc = qsd_send_dqacq(env, qsd->qsd_exp, qbody, true, qsd_req_completion, qqi, &qti->qti_lockh, lqe); } /* the completion function will be called by qsd_send_dqacq or * qsd_intent_lock */ RETURN(rc); }
/** * Callback function called when an acquire/release request sent to the master * is completed */ static void qsd_req_completion(const struct lu_env *env, struct qsd_qtype_info *qqi, struct quota_body *reqbody, struct quota_body *repbody, struct lustre_handle *lockh, struct lquota_lvb *lvb, void *arg, int ret) { struct lquota_entry *lqe = (struct lquota_entry *)arg; struct qsd_thread_info *qti; int rc; bool adjust = false, cancel = false; ENTRY; LASSERT(qqi != NULL && lqe != NULL); /* environment passed by ptlrpcd is mostly used by CLIO and hasn't the * DT tags set. */ rc = lu_env_refill_by_tags((struct lu_env *)env, LCT_DT_THREAD, 0); if (rc) { LQUOTA_ERROR(lqe, "failed to refill environmnent %d", rc); lqe_write_lock(lqe); /* can't afford to adjust quota space with no suitable lu_env */ GOTO(out_noadjust, rc); } qti = qsd_info(env); lqe_write_lock(lqe); LQUOTA_DEBUG(lqe, "DQACQ returned %d, flags:0x%x", ret, reqbody->qb_flags); /* despite -EDQUOT & -EINPROGRESS errors, the master might still * grant us back quota space to adjust quota overrun */ if (ret != 0 && ret != -EDQUOT && ret != -EINPROGRESS) { if (ret != -ETIMEDOUT && ret != -ENOTCONN && ret != -ESHUTDOWN && ret != -EAGAIN) /* print errors only if return code is unexpected */ LQUOTA_ERROR(lqe, "DQACQ failed with %d, flags:0x%x", ret, reqbody->qb_flags); GOTO(out, ret); } /* Set the lqe_lockh */ if (lustre_handle_is_used(lockh) && !lustre_handle_equal(lockh, &lqe->lqe_lockh)) lustre_handle_copy(&lqe->lqe_lockh, lockh); /* If the replied qb_count is zero, it means master didn't process * the DQACQ since the limit for this ID has been removed, so we * should not update quota entry & slave index copy neither. */ if (repbody != NULL && repbody->qb_count != 0) { LQUOTA_DEBUG(lqe, "DQACQ qb_count:"LPU64, repbody->qb_count); if (req_is_rel(reqbody->qb_flags)) { if (lqe->lqe_granted < repbody->qb_count) { LQUOTA_ERROR(lqe, "can't release more space " "than owned "LPU64"<"LPU64, lqe->lqe_granted, repbody->qb_count); lqe->lqe_granted = 0; } else { lqe->lqe_granted -= repbody->qb_count; } /* Cancel the per-ID lock initiatively when there * isn't any usage & grant, which can avoid master * sending glimpse unnecessarily to this slave on * quota revoking */ if (!lqe->lqe_pending_write && !lqe->lqe_granted && !lqe->lqe_waiting_write && !lqe->lqe_usage) cancel = true; } else { lqe->lqe_granted += repbody->qb_count; } qti->qti_rec.lqr_slv_rec.qsr_granted = lqe->lqe_granted; lqe_write_unlock(lqe); /* Update the slave index file in the dedicated thread. So far, * We don't update the version of slave index copy on DQACQ. * No locking is necessary since nobody can change * lqe->lqe_granted while lqe->lqe_pending_req > 0 */ qsd_upd_schedule(qqi, lqe, &lqe->lqe_id, &qti->qti_rec, 0, false); lqe_write_lock(lqe); } /* extract information from lvb */ if (ret == 0 && lvb != 0) { if (lvb->lvb_id_qunit != 0) qsd_set_qunit(lqe, lvb->lvb_id_qunit); qsd_set_edquot(lqe, !!(lvb->lvb_flags & LQUOTA_FL_EDQUOT)); } else if (repbody != NULL && repbody->qb_qunit != 0) { qsd_set_qunit(lqe, repbody->qb_qunit); } /* turn off pre-acquire if it failed with -EDQUOT. This is done to avoid * flooding the master with acquire request. Pre-acquire will be turned * on again as soon as qunit is modified */ if (req_is_preacq(reqbody->qb_flags) && ret == -EDQUOT) lqe->lqe_nopreacq = true; out: adjust = qsd_adjust_needed(lqe); if (reqbody && req_is_acq(reqbody->qb_flags) && ret != -EDQUOT) { lqe->lqe_acq_rc = ret; lqe->lqe_acq_time = cfs_time_current_64(); } out_noadjust: qsd_request_exit(lqe); lqe_write_unlock(lqe); /* release reference on per-ID lock */ if (lustre_handle_is_used(lockh)) ldlm_lock_decref(lockh, qsd_id_einfo.ei_mode); if (cancel) { qsd_adjust_schedule(lqe, false, true); } else if (adjust) { if (!ret || ret == -EDQUOT) qsd_adjust_schedule(lqe, false, false); else qsd_adjust_schedule(lqe, true, false); } lqe_putref(lqe); if (lvb) OBD_FREE_PTR(lvb); EXIT; }
/** * Check whether any quota space adjustment (pre-acquire/release/report) is * needed for a given quota ID. If a non-null \a qbody is passed, then the * \a qbody structure (qb_count/flags/usage) is filled with appropriate data * to be packed in the quota request. * * \param lqe - is the lquota entry for which we would like to adjust quota * space. * \param qbody - is the quota body to fill, if not NULL. * * \retval true - space adjustment is required and \a qbody is filled, if not * NULL * \retval false - no space adjustment required */ static bool qsd_calc_adjust(struct lquota_entry *lqe, struct quota_body *qbody) { __u64 usage, granted; ENTRY; usage = lqe->lqe_usage; usage += lqe->lqe_pending_write + lqe->lqe_waiting_write; granted = lqe->lqe_granted; if (qbody != NULL) qbody->qb_flags = 0; if (!lqe->lqe_enforced) { /* quota not enforced any more for this ID */ if (granted != 0) { /* release all quota space unconditionally */ LQUOTA_DEBUG(lqe, "not enforced, releasing all space"); if (qbody != NULL) { qbody->qb_count = granted; qbody->qb_flags = QUOTA_DQACQ_FL_REL; } RETURN(true); } RETURN(false); } if (!lustre_handle_is_used(&lqe->lqe_lockh)) { /* No valid per-ID lock * When reporting quota (during reintegration or on setquota * glimpse), we should release granted space if usage is 0. * Otherwise, if the usage is less than granted, we need to * acquire the per-ID lock to make sure the unused grant can be * reclaimed by per-ID lock glimpse. */ if (usage == 0) { /* no on-disk usage and no outstanding activity, release * space */ if (granted != 0) { LQUOTA_DEBUG(lqe, "no usage, releasing all " "space"); if (qbody != NULL) { qbody->qb_count = granted; qbody->qb_flags = QUOTA_DQACQ_FL_REL; } RETURN(true); } LQUOTA_DEBUG(lqe, "no usage + no granted, nothing to " "do"); RETURN(false); } if (lqe->lqe_usage < lqe->lqe_granted) { /* holding quota space w/o any lock, enqueue per-ID lock * again */ LQUOTA_DEBUG(lqe, "(re)acquiring per-ID lock"); if (qbody != NULL) { qbody->qb_count = 0; qbody->qb_flags = QUOTA_DQACQ_FL_ACQ; } RETURN(true); } if (lqe->lqe_usage > lqe->lqe_granted) { /* quota overrun, report usage */ LQUOTA_DEBUG(lqe, "overrun, reporting usage"); if (qbody != NULL) { qbody->qb_usage = lqe->lqe_usage; qbody->qb_flags = QUOTA_DQACQ_FL_REPORT; } RETURN(true); } LQUOTA_DEBUG(lqe, "granted matches usage, nothing to do"); RETURN(false); } /* valid per-ID lock * Apply good old quota qunit adjustment logic which has been around * since lustre 1.4: * 1. release spare quota space? */ if (granted > usage + lqe->lqe_qunit) { /* pre-release quota space */ if (qbody == NULL) RETURN(true); qbody->qb_count = granted - usage; /* if usage == 0, release all granted space */ if (usage) { /* try to keep one qunit of quota space */ qbody->qb_count -= lqe->lqe_qunit; /* but don't release less than qtune to avoid releasing * space too often */ if (qbody->qb_count < lqe->lqe_qtune) qbody->qb_count = lqe->lqe_qtune; } qbody->qb_flags = QUOTA_DQACQ_FL_REL; RETURN(true); } /* 2. Any quota overrun? */ if (lqe->lqe_usage > lqe->lqe_granted) { /* we overconsumed quota space, we report usage in request so * that master can adjust it unconditionally */ if (qbody == NULL) RETURN(true); qbody->qb_usage = lqe->lqe_usage; granted = lqe->lqe_usage; qbody->qb_flags = QUOTA_DQACQ_FL_REPORT; } /* 3. Time to pre-acquire? */ if (!lqe->lqe_edquot && !lqe->lqe_nopreacq && usage > 0 && lqe->lqe_qunit != 0 && granted < usage + lqe->lqe_qtune) { /* To pre-acquire quota space, we report how much spare quota * space the slave currently owns, then the master will grant us * back how much we can pretend given the current state of * affairs */ if (qbody == NULL) RETURN(true); if (granted <= usage) qbody->qb_count = 0; else qbody->qb_count = granted - usage; qbody->qb_flags |= QUOTA_DQACQ_FL_PREACQ; RETURN(true); } if (qbody != NULL) RETURN(qbody->qb_flags != 0); else RETURN(false); }