/* * Handle quota request from slave. * * \param env - is the environment passed by the caller * \param ld - is the lu device associated with the qmt * \param req - is the quota acquire request */ static int qmt_dqacq(const struct lu_env *env, struct lu_device *ld, struct ptlrpc_request *req) { struct qmt_device *qmt = lu2qmt_dev(ld); struct quota_body *qbody, *repbody; struct obd_uuid *uuid; struct ldlm_lock *lock; struct lquota_entry *lqe; int pool_id, pool_type, qtype; int rc; ENTRY; qbody = req_capsule_client_get(&req->rq_pill, &RMF_QUOTA_BODY); if (qbody == NULL) RETURN(err_serious(-EPROTO)); repbody = req_capsule_server_get(&req->rq_pill, &RMF_QUOTA_BODY); if (repbody == NULL) RETURN(err_serious(-EFAULT)); /* verify if global lock is stale */ if (!lustre_handle_is_used(&qbody->qb_glb_lockh)) RETURN(-ENOLCK); lock = ldlm_handle2lock(&qbody->qb_glb_lockh); if (lock == NULL) RETURN(-ENOLCK); LDLM_LOCK_PUT(lock); uuid = &req->rq_export->exp_client_uuid; if (req_is_rel(qbody->qb_flags) + req_is_acq(qbody->qb_flags) + req_is_preacq(qbody->qb_flags) > 1) { CERROR("%s: malformed quota request with conflicting flags set " "(%x) from slave %s\n", qmt->qmt_svname, qbody->qb_flags, obd_uuid2str(uuid)); RETURN(-EPROTO); } if (req_is_acq(qbody->qb_flags) || req_is_preacq(qbody->qb_flags)) { /* acquire and pre-acquire should use a valid ID lock */ if (!lustre_handle_is_used(&qbody->qb_lockh)) RETURN(-ENOLCK); lock = ldlm_handle2lock(&qbody->qb_lockh); if (lock == NULL) /* no lock associated with this handle */ RETURN(-ENOLCK); LDLM_DEBUG(lock, "%sacquire request", req_is_preacq(qbody->qb_flags) ? "pre" : ""); if (!obd_uuid_equals(&lock->l_export->exp_client_uuid, uuid)) { /* sorry, no way to cheat ... */ LDLM_LOCK_PUT(lock); RETURN(-ENOLCK); } if ((lock->l_flags & LDLM_FL_AST_SENT) != 0) { struct ptlrpc_service_part *svc; unsigned int timeout; svc = req->rq_rqbd->rqbd_svcpt; timeout = at_est2timeout(at_get(&svc->scp_at_estimate)); timeout = max(timeout, ldlm_timeout); /* lock is being cancelled, prolong timeout */ ldlm_refresh_waiting_lock(lock, timeout); } LDLM_LOCK_PUT(lock); } /* extract pool & quota information from global index FID packed in the * request */ rc = lquota_extract_fid(&qbody->qb_fid, &pool_id, &pool_type, &qtype); if (rc) RETURN(-EINVAL); /* Find the quota entry associated with the quota id */ lqe = qmt_pool_lqe_lookup(env, qmt, pool_id, pool_type, qtype, &qbody->qb_id); if (IS_ERR(lqe)) RETURN(PTR_ERR(lqe)); /* process quota request */ rc = qmt_dqacq0(env, lqe, qmt, uuid, qbody->qb_flags, qbody->qb_count, qbody->qb_usage, repbody); if (lustre_handle_is_used(&qbody->qb_lockh)) /* return current qunit value only to slaves owning an per-ID * quota lock. For enqueue, the qunit value will be returned in * the LVB */ repbody->qb_qunit = lqe->lqe_qunit; lqe_putref(lqe); RETURN(rc); }
/** * Adjust quota space (by acquiring or releasing) hold by the quota slave. * This function is called after each quota request completion and during * reintegration in order to report usage or re-acquire quota locks. * Space adjustment is aborted if there is already a quota request in flight * for this ID. * * \param env - the environment passed by the caller * \param lqe - is the qid entry to be processed * * \retval 0 on success, appropriate errors on failure */ int qsd_adjust(const struct lu_env *env, struct lquota_entry *lqe) { struct qsd_thread_info *qti = qsd_info(env); struct quota_body *qbody = &qti->qti_body; struct qsd_instance *qsd; struct qsd_qtype_info *qqi; int rc; bool intent = false; ENTRY; memset(qbody, 0, sizeof(*qbody)); rc = qsd_ready(lqe, &qbody->qb_glb_lockh); if (rc) { /* add to adjust list again to trigger adjustment later when * slave is ready */ LQUOTA_DEBUG(lqe, "delaying adjustment since qsd isn't ready"); qsd_adjust_schedule(lqe, true, false); RETURN(0); } qqi = lqe2qqi(lqe); qsd = qqi->qqi_qsd; lqe_write_lock(lqe); /* fill qb_count & qb_flags */ if (!qsd_calc_adjust(lqe, qbody)) { lqe_write_unlock(lqe); LQUOTA_DEBUG(lqe, "no adjustment required"); RETURN(0); } /* only 1 quota request in flight for a given ID is allowed */ rc = qsd_request_enter(lqe); if (rc) { /* already a request in flight, space adjustment will be run * again on request completion */ lqe_write_unlock(lqe); RETURN(0); } if (req_is_rel(qbody->qb_flags)) lqe->lqe_pending_rel = qbody->qb_count; lustre_handle_copy(&qti->qti_lockh, &lqe->lqe_lockh); lqe_write_unlock(lqe); /* hold a refcount until completion */ lqe_getref(lqe); /* fill other quota body fields */ qbody->qb_fid = qqi->qqi_fid; qbody->qb_id = lqe->lqe_id; if (req_is_acq(qbody->qb_flags) || req_is_preacq(qbody->qb_flags)) { /* check whether we own a valid lock for this ID */ rc = qsd_id_lock_match(&qti->qti_lockh, &qbody->qb_lockh); if (rc) { memset(&qti->qti_lockh, 0, sizeof(qti->qti_lockh)); if (req_is_preacq(qbody->qb_flags)) { if (req_has_rep(qbody->qb_flags)) /* still want to report usage */ qbody->qb_flags = QUOTA_DQACQ_FL_REPORT; else /* no pre-acquire if no per-ID lock */ GOTO(out, rc = -ENOLCK); } else { /* no lock found, should use intent */ intent = true; } } else if (req_is_acq(qbody->qb_flags) && qbody->qb_count == 0) { /* found cached lock, no need to acquire */ GOTO(out, rc = 0); } } else { /* release and report don't need a per-ID lock */ memset(&qti->qti_lockh, 0, sizeof(qti->qti_lockh)); } if (!intent) { rc = qsd_send_dqacq(env, qsd->qsd_exp, qbody, false, qsd_req_completion, qqi, &qti->qti_lockh, lqe); } else { struct lquota_lvb *lvb; OBD_ALLOC_PTR(lvb); if (lvb == NULL) GOTO(out, rc = -ENOMEM); rc = qsd_intent_lock(env, qsd->qsd_exp, qbody, false, IT_QUOTA_DQACQ, qsd_req_completion, qqi, lvb, (void *)lqe); } /* the completion function will be called by qsd_send_dqacq or * qsd_intent_lock */ RETURN(rc); out: qsd_req_completion(env, qqi, qbody, NULL, &qti->qti_lockh, NULL, lqe, rc); return rc; }
/* * Helper function to handle quota request from slave. * * \param env - is the environment passed by the caller * \param lqe - is the lquota_entry subject to the quota request * \param qmt - is the master device * \param uuid - is the uuid associated with the slave * \param qb_flags - are the quota request flags as packed in the quota_body * \param qb_count - is the amount of quota space the slave wants to * acquire/release * \param qb_usage - is the current space usage on the slave * \param repbody - is the quota_body of reply * * \retval 0 : success * \retval -EDQUOT : out of quota * -EINPROGRESS : inform client to retry write/create * -ve : other appropriate errors */ int qmt_dqacq0(const struct lu_env *env, struct lquota_entry *lqe, struct qmt_device *qmt, struct obd_uuid *uuid, __u32 qb_flags, __u64 qb_count, __u64 qb_usage, struct quota_body *repbody) { struct qmt_thread_info *qti = qmt_info(env); __u64 now, count; struct dt_object *slv_obj = NULL; __u64 slv_granted, slv_granted_bck; struct thandle *th = NULL; int rc, ret; ENTRY; LASSERT(uuid != NULL); /* initialize reply */ memset(repbody, 0, sizeof(*repbody)); memcpy(&repbody->qb_id, &lqe->lqe_id, sizeof(repbody->qb_id)); if (OBD_FAIL_CHECK(OBD_FAIL_QUOTA_RECOVERABLE_ERR)) RETURN(-cfs_fail_val); /* look-up index file associated with acquiring slave */ slv_obj = lquota_disk_slv_find(env, qmt->qmt_child, LQE_ROOT(lqe), lu_object_fid(&LQE_GLB_OBJ(lqe)->do_lu), uuid); if (IS_ERR(slv_obj)) GOTO(out, rc = PTR_ERR(slv_obj)); /* pack slave fid in reply just for sanity check */ memcpy(&repbody->qb_slv_fid, lu_object_fid(&slv_obj->do_lu), sizeof(struct lu_fid)); /* allocate & start transaction with enough credits to update * global & slave indexes */ th = qmt_trans_start_with_slv(env, lqe, slv_obj, &qti->qti_restore); if (IS_ERR(th)) GOTO(out, rc = PTR_ERR(th)); lqe_write_lock(lqe); LQUOTA_DEBUG(lqe, "dqacq starts uuid:%s flags:0x%x wanted:"LPU64 " usage:"LPU64, obd_uuid2str(uuid), qb_flags, qb_count, qb_usage); /* Legal race, limits have been removed on master, but slave didn't * receive the change yet. Just return EINPROGRESS until the slave gets * notified. */ if (!lqe->lqe_enforced && !req_is_rel(qb_flags)) GOTO(out_locked, rc = -ESRCH); /* recompute qunit in case it was never initialized */ qmt_revalidate(env, lqe); /* slave just wants to acquire per-ID lock */ if (req_is_acq(qb_flags) && qb_count == 0) GOTO(out_locked, rc = 0); /* fetch how much quota space is already granted to this slave */ rc = qmt_slv_read(env, lqe, slv_obj, &slv_granted); if (rc) { LQUOTA_ERROR(lqe, "Failed to get granted for slave %s, rc=%d", obd_uuid2str(uuid), rc); GOTO(out_locked, rc); } /* recall how much space this slave currently owns in order to restore * it in case of failure */ slv_granted_bck = slv_granted; /* record current time for soft limit & grace time management */ now = (__u64)cfs_time_current_sec(); if (req_is_rel(qb_flags)) { /* Slave would like to release quota space */ if (slv_granted < qb_count || lqe->lqe_granted < qb_count) { /* can't release more than granted */ LQUOTA_ERROR(lqe, "Release too much! uuid:%s release:" LPU64" granted:"LPU64", total:"LPU64, obd_uuid2str(uuid), qb_count, slv_granted, lqe->lqe_granted); GOTO(out_locked, rc = -EINVAL); } repbody->qb_count = qb_count; /* put released space back to global pool */ QMT_REL(lqe, slv_granted, qb_count); GOTO(out_write, rc = 0); } if (req_has_rep(qb_flags) && slv_granted < qb_usage) { /* Slave is reporting space usage in quota request and it turns * out to be using more quota space than owned, so we adjust * granted space regardless of the current state of affairs */ repbody->qb_count = qb_usage - slv_granted; QMT_GRANT(lqe, slv_granted, repbody->qb_count); } if (!req_is_acq(qb_flags) && !req_is_preacq(qb_flags)) GOTO(out_write, rc = 0); qmt_adjust_edquot(lqe, now); if (lqe->lqe_edquot) /* no hope to claim further space back */ GOTO(out_write, rc = -EDQUOT); if (qmt_space_exhausted(lqe, now)) { /* might have some free space once rebalancing is completed */ rc = req_is_acq(qb_flags) ? -EINPROGRESS : -EDQUOT; GOTO(out_write, rc); } if (req_is_preacq(qb_flags)) { /* slave would like to pre-acquire quota space. To do so, it * reports in qb_count how much spare quota space it owns and we * can grant back quota space which is consistent with qunit * value. */ if (qb_count >= lqe->lqe_qunit) /* slave already own the maximum it should */ GOTO(out_write, rc = 0); count = qmt_alloc_expand(lqe, slv_granted, qb_count); if (count == 0) GOTO(out_write, rc = -EDQUOT); repbody->qb_count += count; QMT_GRANT(lqe, slv_granted, count); GOTO(out_write, rc = 0); } /* processing acquire request with clients waiting */ if (lqe->lqe_hardlimit != 0 && lqe->lqe_granted + qb_count > lqe->lqe_hardlimit) { /* cannot grant as much as asked, but can still afford to grant * some quota space back */ count = lqe->lqe_hardlimit - lqe->lqe_granted; repbody->qb_count += count; QMT_GRANT(lqe, slv_granted, count); GOTO(out_write, rc = 0); } /* Whouhou! we can satisfy the slave request! */ repbody->qb_count += qb_count; QMT_GRANT(lqe, slv_granted, qb_count); /* Try to expand the acquired count for DQACQ */ count = qmt_alloc_expand(lqe, slv_granted, 0); if (count != 0) { /* can even grant more than asked, it is like xmas ... */ repbody->qb_count += count; QMT_GRANT(lqe, slv_granted, count); GOTO(out_write, rc = 0); } GOTO(out_write, rc = 0); out_write: if (repbody->qb_count == 0) GOTO(out_locked, rc); /* start/stop grace timer if required */ if (lqe->lqe_softlimit != 0) { if (lqe->lqe_granted > lqe->lqe_softlimit && lqe->lqe_gracetime == 0) /* first time over soft limit, let's start grace * timer */ lqe->lqe_gracetime = now + qmt_lqe_grace(lqe); else if (lqe->lqe_granted <= lqe->lqe_softlimit && lqe->lqe_gracetime != 0) /* Clear grace timer */ lqe->lqe_gracetime = 0; } /* Update slave index first since it is easier to roll back */ ret = qmt_slv_write(env, th, lqe, slv_obj, LQUOTA_BUMP_VER, &repbody->qb_slv_ver, slv_granted); if (ret) { /* restore initial quota settings */ qmt_restore(lqe, &qti->qti_restore); /* reset qb_count */ repbody->qb_count = 0; GOTO(out_locked, rc = ret); } /* Update global index, no version bump needed */ ret = qmt_glb_write(env, th, lqe, 0, NULL); if (ret) { rc = ret; /* restore initial quota settings */ qmt_restore(lqe, &qti->qti_restore); /* reset qb_count */ repbody->qb_count = 0; /* restore previous granted value */ ret = qmt_slv_write(env, th, lqe, slv_obj, 0, NULL, slv_granted_bck); if (ret) { LQUOTA_ERROR(lqe, "failed to restore initial slave " "value rc:%d ret%d", rc, ret); LBUG(); } qmt_adjust_edquot(lqe, now); GOTO(out_locked, rc); } /* Total granted has been changed, let's try to adjust the qunit * size according to the total granted & limits. */ qmt_adjust_qunit(env, lqe); /* clear/set edquot flag and notify slaves via glimpse if needed */ qmt_adjust_edquot(lqe, now); out_locked: LQUOTA_DEBUG(lqe, "dqacq ends count:"LPU64" ver:"LPU64" rc:%d", repbody->qb_count, repbody->qb_slv_ver, rc); lqe_write_unlock(lqe); out: if (th != NULL && !IS_ERR(th)) dt_trans_stop(env, qmt->qmt_child, th); if (slv_obj != NULL && !IS_ERR(slv_obj)) lu_object_put(env, &slv_obj->do_lu); if ((req_is_acq(qb_flags) || req_is_preacq(qb_flags)) && OBD_FAIL_CHECK(OBD_FAIL_QUOTA_EDQUOT)) { /* introduce inconsistency between granted value in slave index * and slave index copy of slave */ repbody->qb_count = 0; rc = -EDQUOT; } RETURN(rc); }
/** * Callback function called when an acquire/release request sent to the master * is completed */ static void qsd_req_completion(const struct lu_env *env, struct qsd_qtype_info *qqi, struct quota_body *reqbody, struct quota_body *repbody, struct lustre_handle *lockh, struct lquota_lvb *lvb, void *arg, int ret) { struct lquota_entry *lqe = (struct lquota_entry *)arg; struct qsd_thread_info *qti; int rc; bool adjust = false, cancel = false; ENTRY; LASSERT(qqi != NULL && lqe != NULL); /* environment passed by ptlrpcd is mostly used by CLIO and hasn't the * DT tags set. */ rc = lu_env_refill_by_tags((struct lu_env *)env, LCT_DT_THREAD, 0); if (rc) { LQUOTA_ERROR(lqe, "failed to refill environmnent %d", rc); lqe_write_lock(lqe); /* can't afford to adjust quota space with no suitable lu_env */ GOTO(out_noadjust, rc); } qti = qsd_info(env); lqe_write_lock(lqe); LQUOTA_DEBUG(lqe, "DQACQ returned %d, flags:0x%x", ret, reqbody->qb_flags); /* despite -EDQUOT & -EINPROGRESS errors, the master might still * grant us back quota space to adjust quota overrun */ if (ret != 0 && ret != -EDQUOT && ret != -EINPROGRESS) { if (ret != -ETIMEDOUT && ret != -ENOTCONN && ret != -ESHUTDOWN && ret != -EAGAIN) /* print errors only if return code is unexpected */ LQUOTA_ERROR(lqe, "DQACQ failed with %d, flags:0x%x", ret, reqbody->qb_flags); GOTO(out, ret); } /* Set the lqe_lockh */ if (lustre_handle_is_used(lockh) && !lustre_handle_equal(lockh, &lqe->lqe_lockh)) lustre_handle_copy(&lqe->lqe_lockh, lockh); /* If the replied qb_count is zero, it means master didn't process * the DQACQ since the limit for this ID has been removed, so we * should not update quota entry & slave index copy neither. */ if (repbody != NULL && repbody->qb_count != 0) { LQUOTA_DEBUG(lqe, "DQACQ qb_count:"LPU64, repbody->qb_count); if (req_is_rel(reqbody->qb_flags)) { if (lqe->lqe_granted < repbody->qb_count) { LQUOTA_ERROR(lqe, "can't release more space " "than owned "LPU64"<"LPU64, lqe->lqe_granted, repbody->qb_count); lqe->lqe_granted = 0; } else { lqe->lqe_granted -= repbody->qb_count; } /* Cancel the per-ID lock initiatively when there * isn't any usage & grant, which can avoid master * sending glimpse unnecessarily to this slave on * quota revoking */ if (!lqe->lqe_pending_write && !lqe->lqe_granted && !lqe->lqe_waiting_write && !lqe->lqe_usage) cancel = true; } else { lqe->lqe_granted += repbody->qb_count; } qti->qti_rec.lqr_slv_rec.qsr_granted = lqe->lqe_granted; lqe_write_unlock(lqe); /* Update the slave index file in the dedicated thread. So far, * We don't update the version of slave index copy on DQACQ. * No locking is necessary since nobody can change * lqe->lqe_granted while lqe->lqe_pending_req > 0 */ qsd_upd_schedule(qqi, lqe, &lqe->lqe_id, &qti->qti_rec, 0, false); lqe_write_lock(lqe); } /* extract information from lvb */ if (ret == 0 && lvb != 0) { if (lvb->lvb_id_qunit != 0) qsd_set_qunit(lqe, lvb->lvb_id_qunit); qsd_set_edquot(lqe, !!(lvb->lvb_flags & LQUOTA_FL_EDQUOT)); } else if (repbody != NULL && repbody->qb_qunit != 0) { qsd_set_qunit(lqe, repbody->qb_qunit); } /* turn off pre-acquire if it failed with -EDQUOT. This is done to avoid * flooding the master with acquire request. Pre-acquire will be turned * on again as soon as qunit is modified */ if (req_is_preacq(reqbody->qb_flags) && ret == -EDQUOT) lqe->lqe_nopreacq = true; out: adjust = qsd_adjust_needed(lqe); if (reqbody && req_is_acq(reqbody->qb_flags) && ret != -EDQUOT) { lqe->lqe_acq_rc = ret; lqe->lqe_acq_time = cfs_time_current_64(); } out_noadjust: qsd_request_exit(lqe); lqe_write_unlock(lqe); /* release reference on per-ID lock */ if (lustre_handle_is_used(lockh)) ldlm_lock_decref(lockh, qsd_id_einfo.ei_mode); if (cancel) { qsd_adjust_schedule(lqe, false, true); } else if (adjust) { if (!ret || ret == -EDQUOT) qsd_adjust_schedule(lqe, false, false); else qsd_adjust_schedule(lqe, true, false); } lqe_putref(lqe); if (lvb) OBD_FREE_PTR(lvb); EXIT; }