/* * Return qsd_qtype_info structure associated with a global lock * * \param lock - is the global lock from which we should extract the qqi * \param reset - whether lock->l_ast_data should be cleared */ static struct qsd_qtype_info *qsd_glb_ast_data_get(struct ldlm_lock *lock, bool reset) { struct qsd_qtype_info *qqi; ENTRY; lock_res_and_lock(lock); qqi = lock->l_ast_data; if (qqi != NULL) { qqi_getref(qqi); if (reset) lock->l_ast_data = NULL; } unlock_res_and_lock(lock); if (qqi != NULL) /* it is not safe to call lu_ref_add() under spinlock */ lu_ref_add(&qqi->qqi_reference, "ast_data_get", lock); if (reset && qqi != NULL) { /* release qqi reference hold for the lock */ lu_ref_del(&qqi->qqi_reference, "glb_lock", lock); qqi_putref(qqi); } RETURN(qqi); }
static void osc_lock_granted(const struct lu_env *env, struct osc_lock *oscl, struct lustre_handle *lockh, bool lvb_update) { struct ldlm_lock *dlmlock; dlmlock = ldlm_handle2lock_long(lockh, 0); LASSERT(dlmlock); /* lock reference taken by ldlm_handle2lock_long() is * owned by osc_lock and released in osc_lock_detach() */ lu_ref_add(&dlmlock->l_reference, "osc_lock", oscl); oscl->ols_has_ref = 1; LASSERT(!oscl->ols_dlmlock); oscl->ols_dlmlock = dlmlock; /* This may be a matched lock for glimpse request, do not hold * lock reference in that case. */ if (!oscl->ols_glimpse) { /* hold a refc for non glimpse lock which will * be released in osc_lock_cancel() */ lustre_handle_copy(&oscl->ols_handle, lockh); ldlm_lock_addref(lockh, oscl->ols_einfo.ei_mode); oscl->ols_hold = 1; } /* Lock must have been granted. */ lock_res_and_lock(dlmlock); if (dlmlock->l_granted_mode == dlmlock->l_req_mode) { struct ldlm_extent *ext = &dlmlock->l_policy_data.l_extent; struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr; /* extend the lock extent, otherwise it will have problem when * we decide whether to grant a lockless lock. */ descr->cld_mode = osc_ldlm2cl_lock(dlmlock->l_granted_mode); descr->cld_start = cl_index(descr->cld_obj, ext->start); descr->cld_end = cl_index(descr->cld_obj, ext->end); descr->cld_gid = ext->gid; /* no lvb update for matched lock */ if (lvb_update) { LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY); osc_lock_lvb_update(env, cl2osc(oscl->ols_cl.cls_obj), dlmlock, NULL); } LINVRNT(osc_lock_invariant(oscl)); } unlock_res_and_lock(dlmlock); LASSERT(oscl->ols_state != OLS_GRANTED); oscl->ols_state = OLS_GRANTED; }
int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj, struct cl_page *page, struct page *vmpage) { struct lov_object *loo = cl2lov(obj); struct lov_layout_raid0 *r0 = lov_r0(loo); struct lov_io *lio = lov_env_io(env); struct cl_page *subpage; struct cl_object *subobj; struct lov_io_sub *sub; struct lov_page *lpg = cl_object_page_slice(obj, page); loff_t offset; u64 suboff; int stripe; int rc; offset = cl_offset(obj, page->cp_index); stripe = lov_stripe_number(loo->lo_lsm, offset); LASSERT(stripe < r0->lo_nr); rc = lov_stripe_offset(loo->lo_lsm, offset, stripe, &suboff); LASSERT(rc == 0); lpg->lps_invalid = 1; cl_page_slice_add(page, &lpg->lps_cl, obj, &lov_page_ops); sub = lov_sub_get(env, lio, stripe); if (IS_ERR(sub)) { rc = PTR_ERR(sub); goto out; } subobj = lovsub2cl(r0->lo_sub[stripe]); subpage = cl_page_find_sub(sub->sub_env, subobj, cl_index(subobj, suboff), vmpage, page); lov_sub_put(sub); if (IS_ERR(subpage)) { rc = PTR_ERR(subpage); goto out; } if (likely(subpage->cp_parent == page)) { lu_ref_add(&subpage->cp_reference, "lov", page); lpg->lps_invalid = 0; rc = 0; } else { CL_PAGE_DEBUG(D_ERROR, env, page, "parent page\n"); CL_PAGE_DEBUG(D_ERROR, env, subpage, "child page\n"); LASSERT(0); } out: return rc; }
static int vvp_device_init(const struct lu_env *env, struct lu_device *d, const char *name, struct lu_device *next) { struct vvp_device *vdv; int rc; vdv = lu2vvp_dev(d); vdv->vdv_next = lu2cl_dev(next); LASSERT(d->ld_site && next->ld_type); next->ld_site = d->ld_site; rc = next->ld_type->ldt_ops->ldto_device_init(env, next, next->ld_type->ldt_name, NULL); if (rc == 0) { lu_device_get(next); lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init); } return rc; }
static int lovsub_device_init(const struct lu_env *env, struct lu_device *d, const char *name, struct lu_device *next) { struct lovsub_device *lsd = lu2lovsub_dev(d); struct lu_device_type *ldt; int rc; next->ld_site = d->ld_site; ldt = next->ld_type; LASSERT(ldt != NULL); rc = ldt->ldt_ops->ldto_device_init(env, next, ldt->ldt_name, NULL); if (rc) { next->ld_site = NULL; return rc; } lu_device_get(next); lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init); lsd->acid_next = lu2cl_dev(next); return rc; }
/** * Implements Linux VM address_space::invalidatepage() method. This method is * called when the page is truncate from a file, either as a result of * explicit truncate, or when inode is removed from memory (as a result of * final iput(), umount, or memory pressure induced icache shrinking). * * [0, offset] bytes of the page remain valid (this is for a case of not-page * aligned truncate). Lustre leaves partially truncated page in the cache, * relying on struct inode::i_size to limit further accesses. */ static int cl_invalidatepage(struct page *vmpage, unsigned long offset) { struct inode *inode; struct lu_env *env; struct cl_page *page; struct cl_object *obj; int result; int refcheck; LASSERT(PageLocked(vmpage)); LASSERT(!PageWriteback(vmpage)); /* * It is safe to not check anything in invalidatepage/releasepage * below because they are run with page locked and all our io is * happening with locked page too */ result = 0; if (offset == 0) { env = cl_env_get(&refcheck); if (!IS_ERR(env)) { inode = vmpage->mapping->host; obj = ll_i2info(inode)->lli_clob; if (obj != NULL) { page = cl_vmpage_page(vmpage, obj); if (page != NULL) { lu_ref_add(&page->cp_reference, "delete", vmpage); cl_page_delete(env, page); result = 1; lu_ref_del(&page->cp_reference, "delete", vmpage); cl_page_put(env, page); } } else LASSERT(vmpage->private == 0); cl_env_put(env, &refcheck); }
static void osc_lock_upcall0(const struct lu_env *env, struct osc_lock *olck) { struct ldlm_lock *dlmlock; ENTRY; dlmlock = ldlm_handle2lock_long(&olck->ols_handle, 0); LASSERT(dlmlock != NULL); lock_res_and_lock(dlmlock); spin_lock(&osc_ast_guard); LASSERT(dlmlock->l_ast_data == olck); LASSERT(olck->ols_lock == NULL); olck->ols_lock = dlmlock; spin_unlock(&osc_ast_guard); /* * Lock might be not yet granted. In this case, completion ast * (osc_ldlm_completion_ast()) comes later and finishes lock * granting. */ if (dlmlock->l_granted_mode == dlmlock->l_req_mode) osc_lock_granted(env, olck, dlmlock, 0); unlock_res_and_lock(dlmlock); /* * osc_enqueue_interpret() decrefs asynchronous locks, counter * this. */ ldlm_lock_addref(&olck->ols_handle, olck->ols_einfo.ei_mode); olck->ols_hold = 1; /* lock reference taken by ldlm_handle2lock_long() is owned by * osc_lock and released in osc_lock_detach() */ lu_ref_add(&dlmlock->l_reference, "osc_lock", olck); olck->ols_has_ref = 1; }
/* * Create a new qsd_instance to be associated with backend osd device * identified by \dev. * * \param env - the environment passed by the caller * \param svname - is the service name of the OSD device creating this instance * \param dev - is the dt_device where to store quota index files * \param osd_proc - is the procfs parent directory where to create procfs file * related to this new qsd instance * * \retval - pointer to new qsd_instance associated with dev \dev on success, * appropriate error on failure */ struct qsd_instance *qsd_init(const struct lu_env *env, char *svname, struct dt_device *dev, cfs_proc_dir_entry_t *osd_proc) { struct qsd_thread_info *qti = qsd_info(env); struct qsd_instance *qsd; int rc, type, idx; ENTRY; /* only configure qsd for MDT & OST */ type = server_name2index(svname, &idx, NULL); if (type != LDD_F_SV_TYPE_MDT && type != LDD_F_SV_TYPE_OST) RETURN(NULL); /* allocate qsd instance */ OBD_ALLOC_PTR(qsd); if (qsd == NULL) RETURN(ERR_PTR(-ENOMEM)); /* generic initializations */ rwlock_init(&qsd->qsd_lock); CFS_INIT_LIST_HEAD(&qsd->qsd_link); thread_set_flags(&qsd->qsd_upd_thread, SVC_STOPPED); init_waitqueue_head(&qsd->qsd_upd_thread.t_ctl_waitq); CFS_INIT_LIST_HEAD(&qsd->qsd_upd_list); spin_lock_init(&qsd->qsd_adjust_lock); CFS_INIT_LIST_HEAD(&qsd->qsd_adjust_list); qsd->qsd_prepared = false; qsd->qsd_started = false; /* copy service name */ if (strlcpy(qsd->qsd_svname, svname, sizeof(qsd->qsd_svname)) >= sizeof(qsd->qsd_svname)) GOTO(out, rc = -E2BIG); /* grab reference on osd device */ lu_device_get(&dev->dd_lu_dev); lu_ref_add(&dev->dd_lu_dev.ld_reference, "qsd", qsd); qsd->qsd_dev = dev; /* we only support pool ID 0 (default data or metadata pool) for the * time being. A different pool ID could be assigned to this target via * the configuration log in the future */ qsd->qsd_pool_id = 0; /* get fsname from svname */ rc = server_name2fsname(svname, qti->qti_buf, NULL); if (rc) { CERROR("%s: fail to extract filesystem name\n", svname); GOTO(out, rc); } /* look up quota setting for the filesystem the target belongs to */ qsd->qsd_fsinfo = qsd_get_fsinfo(qti->qti_buf, 1); if (qsd->qsd_fsinfo == NULL) { CERROR("%s: failed to locate filesystem information\n", svname); GOTO(out, rc = -EINVAL); } /* add in the list of lquota_fsinfo */ mutex_lock(&qsd->qsd_fsinfo->qfs_mutex); list_add_tail(&qsd->qsd_link, &qsd->qsd_fsinfo->qfs_qsd_list); mutex_unlock(&qsd->qsd_fsinfo->qfs_mutex); /* register procfs directory */ qsd->qsd_proc = lprocfs_seq_register(QSD_DIR, osd_proc, lprocfs_quota_qsd_vars, qsd); if (IS_ERR(qsd->qsd_proc)) { rc = PTR_ERR(qsd->qsd_proc); qsd->qsd_proc = NULL; CERROR("%s: fail to create quota slave proc entry (%d)\n", svname, rc); GOTO(out, rc); } EXIT; out: if (rc) { qsd_fini(env, qsd); return ERR_PTR(rc); } RETURN(qsd); }
static int ll_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { struct ll_cl_context *lcc; const struct lu_env *env = NULL; struct cl_io *io; struct cl_page *page = NULL; struct cl_object *clob = ll_i2info(mapping->host)->lli_clob; pgoff_t index = pos >> PAGE_SHIFT; struct page *vmpage = NULL; unsigned from = pos & (PAGE_SIZE - 1); unsigned to = from + len; int result = 0; ENTRY; CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len); lcc = ll_cl_find(file); if (lcc == NULL) { io = NULL; GOTO(out, result = -EIO); } env = lcc->lcc_env; io = lcc->lcc_io; /* To avoid deadlock, try to lock page first. */ vmpage = grab_cache_page_nowait(mapping, index); if (unlikely(vmpage == NULL || PageDirty(vmpage) || PageWriteback(vmpage))) { struct vvp_io *vio = vvp_env_io(env); struct cl_page_list *plist = &vio->u.write.vui_queue; /* if the page is already in dirty cache, we have to commit * the pages right now; otherwise, it may cause deadlock * because it holds page lock of a dirty page and request for * more grants. It's okay for the dirty page to be the first * one in commit page list, though. */ if (vmpage != NULL && plist->pl_nr > 0) { unlock_page(vmpage); put_page(vmpage); vmpage = NULL; } /* commit pages and then wait for page lock */ result = vvp_io_write_commit(env, io); if (result < 0) GOTO(out, result); if (vmpage == NULL) { vmpage = grab_cache_page_write_begin(mapping, index, flags); if (vmpage == NULL) GOTO(out, result = -ENOMEM); } } page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); if (IS_ERR(page)) GOTO(out, result = PTR_ERR(page)); lcc->lcc_page = page; lu_ref_add(&page->cp_reference, "cl_io", io); cl_page_assume(env, io, page); if (!PageUptodate(vmpage)) { /* * We're completely overwriting an existing page, * so _don't_ set it up to date until commit_write */ if (from == 0 && to == PAGE_SIZE) { CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n"); POISON_PAGE(vmpage, 0x11); } else { /* TODO: can be optimized at OSC layer to check if it * is a lockless IO. In that case, it's not necessary * to read the data. */ result = ll_prepare_partial_page(env, io, page); if (result == 0) SetPageUptodate(vmpage); } } if (result < 0) cl_page_unassume(env, io, page); EXIT; out: if (result < 0) { if (vmpage != NULL) { unlock_page(vmpage); put_page(vmpage); } if (!IS_ERR_OR_NULL(page)) { lu_ref_del(&page->cp_reference, "cl_io", io); cl_page_put(env, page); } if (io) io->ci_result = result; } else { *pagep = vmpage; *fsdata = lcc; } RETURN(result); }
static int vvp_io_fault_start(const struct lu_env *env, const struct cl_io_slice *ios) { struct vvp_io *vio = cl2vvp_io(env, ios); struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; struct inode *inode = ccc_object_inode(obj); struct cl_fault_io *fio = &io->u.ci_fault; struct vvp_fault_io *cfio = &vio->u.fault; loff_t offset; int result = 0; struct page *vmpage = NULL; struct cl_page *page; loff_t size; pgoff_t last; /* last page in a file data region */ if (fio->ft_executable && LTIME_S(inode->i_mtime) != vio->u.fault.ft_mtime) CWARN("binary "DFID " changed while waiting for the page fault lock\n", PFID(lu_object_fid(&obj->co_lu))); /* offset of the last byte on the page */ offset = cl_offset(obj, fio->ft_index + 1) - 1; LASSERT(cl_index(obj, offset) == fio->ft_index); result = ccc_prep_size(env, obj, io, 0, offset + 1, NULL); if (result != 0) return result; /* must return locked page */ if (fio->ft_mkwrite) { LASSERT(cfio->ft_vmpage != NULL); lock_page(cfio->ft_vmpage); } else { result = vvp_io_kernel_fault(cfio); if (result != 0) return result; } vmpage = cfio->ft_vmpage; LASSERT(PageLocked(vmpage)); if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_FAULT_TRUNC_RACE)) ll_invalidate_page(vmpage); size = i_size_read(inode); /* Though we have already held a cl_lock upon this page, but * it still can be truncated locally. */ if (unlikely((vmpage->mapping != inode->i_mapping) || (page_offset(vmpage) > size))) { CDEBUG(D_PAGE, "llite: fault and truncate race happened!\n"); /* return +1 to stop cl_io_loop() and ll_fault() will catch * and retry. */ GOTO(out, result = +1); } if (fio->ft_mkwrite ) { pgoff_t last_index; /* * Capture the size while holding the lli_trunc_sem from above * we want to make sure that we complete the mkwrite action * while holding this lock. We need to make sure that we are * not past the end of the file. */ last_index = cl_index(obj, size - 1); if (last_index < fio->ft_index) { CDEBUG(D_PAGE, "llite: mkwrite and truncate race happened: " "%p: 0x%lx 0x%lx\n", vmpage->mapping,fio->ft_index,last_index); /* * We need to return if we are * passed the end of the file. This will propagate * up the call stack to ll_page_mkwrite where * we will return VM_FAULT_NOPAGE. Any non-negative * value returned here will be silently * converted to 0. If the vmpage->mapping is null * the error code would be converted back to ENODATA * in ll_page_mkwrite0. Thus we return -ENODATA * to handle both cases */ GOTO(out, result = -ENODATA); } } page = cl_page_find(env, obj, fio->ft_index, vmpage, CPT_CACHEABLE); if (IS_ERR(page)) GOTO(out, result = PTR_ERR(page)); /* if page is going to be written, we should add this page into cache * earlier. */ if (fio->ft_mkwrite) { wait_on_page_writeback(vmpage); if (set_page_dirty(vmpage)) { struct ccc_page *cp; /* vvp_page_assume() calls wait_on_page_writeback(). */ cl_page_assume(env, io, page); cp = cl2ccc_page(cl_page_at(page, &vvp_device_type)); vvp_write_pending(cl2ccc(obj), cp); /* Do not set Dirty bit here so that in case IO is * started before the page is really made dirty, we * still have chance to detect it. */ result = cl_page_cache_add(env, io, page, CRT_WRITE); LASSERT(cl_page_is_owned(page, io)); vmpage = NULL; if (result < 0) { cl_page_unmap(env, io, page); cl_page_discard(env, io, page); cl_page_disown(env, io, page); cl_page_put(env, page); /* we're in big trouble, what can we do now? */ if (result == -EDQUOT) result = -ENOSPC; GOTO(out, result); } else cl_page_disown(env, io, page); } } last = cl_index(obj, size - 1); /* * The ft_index is only used in the case of * a mkwrite action. We need to check * our assertions are correct, since * we should have caught this above */ LASSERT(!fio->ft_mkwrite || fio->ft_index <= last); if (fio->ft_index == last) /* * Last page is mapped partially. */ fio->ft_nob = size - cl_offset(obj, fio->ft_index); else fio->ft_nob = cl_page_size(obj); lu_ref_add(&page->cp_reference, "fault", io); fio->ft_page = page; EXIT; out: /* return unlocked vmpage to avoid deadlocking */ if (vmpage != NULL) unlock_page(vmpage); cfio->fault.ft_flags &= ~VM_FAULT_LOCKED; return result; }
/* * Get intent per-ID lock or global-index lock from master. * * \param env - the environment passed by the caller * \param exp - is the export to use to send the intent RPC * \param qbody - quota body to be packed in request * \param sync - synchronous or asynchronous (pre-acquire) * \param it_op - IT_QUOTA_DQACQ or IT_QUOTA_CONN * \param completion - completion callback * \param qqi - is the qsd_qtype_info structure to pass to the completion * function * \param lvb - is the lvb associated with the lock and returned by the * server * \param arg - is an opaq argument passed to the completion callback * * \retval 0 - success * \retval -ve - appropriate errors */ int qsd_intent_lock(const struct lu_env *env, struct obd_export *exp, struct quota_body *qbody, bool sync, int it_op, qsd_req_completion_t completion, struct qsd_qtype_info *qqi, struct lquota_lvb *lvb, void *arg) { struct qsd_thread_info *qti = qsd_info(env); struct ptlrpc_request *req; struct qsd_async_args *aa = NULL; struct ldlm_intent *lit; struct quota_body *req_qbody; __u64 flags = LDLM_FL_HAS_INTENT; int rc; ENTRY; LASSERT(exp != NULL); LASSERT(!lustre_handle_is_used(&qbody->qb_lockh)); memset(&qti->qti_lockh, 0, sizeof(qti->qti_lockh)); req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LDLM_INTENT_QUOTA); if (req == NULL) GOTO(out, rc = -ENOMEM); req->rq_no_retry_einprogress = 1; rc = ldlm_prep_enqueue_req(exp, req, NULL, 0); if (rc) { ptlrpc_request_free(req); GOTO(out, rc); } lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT); lit->opc = (__u64)it_op; req_qbody = req_capsule_client_get(&req->rq_pill, &RMF_QUOTA_BODY); *req_qbody = *qbody; req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, sizeof(*lvb)); ptlrpc_request_set_replen(req); switch(it_op) { case IT_QUOTA_CONN: /* build resource name associated with global index */ fid_build_reg_res_name(&qbody->qb_fid, &qti->qti_resid); /* copy einfo template and fill ei_cbdata with qqi pointer */ memcpy(&qti->qti_einfo, &qsd_glb_einfo, sizeof(qti->qti_einfo)); qti->qti_einfo.ei_cbdata = qqi; /* don't cancel global lock on memory pressure */ flags |= LDLM_FL_NO_LRU; break; case IT_QUOTA_DQACQ: /* build resource name associated for per-ID quota lock */ fid_build_quota_res_name(&qbody->qb_fid, &qbody->qb_id, &qti->qti_resid); /* copy einfo template and fill ei_cbdata with lqe pointer */ memcpy(&qti->qti_einfo, &qsd_id_einfo, sizeof(qti->qti_einfo)); qti->qti_einfo.ei_cbdata = arg; break; default: LASSERTF(0, "invalid it_op %d", it_op); } /* build lock enqueue request */ rc = ldlm_cli_enqueue(exp, &req, &qti->qti_einfo, &qti->qti_resid, NULL, &flags, (void *)lvb, sizeof(*lvb), LVB_T_LQUOTA, &qti->qti_lockh, 1); if (rc < 0) { ptlrpc_req_finished(req); GOTO(out, rc); } /* grab reference on backend structure for the new lock */ switch(it_op) { case IT_QUOTA_CONN: /* grab reference on qqi for new lock */ #ifdef USE_LU_REF { struct ldlm_lock *lock; lock = ldlm_handle2lock(&qti->qti_lockh); if (lock == NULL) { ptlrpc_req_finished(req); GOTO(out, rc = -ENOLCK); } lu_ref_add(&qqi->qqi_reference, "glb_lock", lock); LDLM_LOCK_PUT(lock); } #endif qqi_getref(qqi); break; case IT_QUOTA_DQACQ: /* grab reference on lqe for new lock */ lqe_getref((struct lquota_entry *)arg); /* all acquire/release request are sent with no_resend and * no_delay flag */ req->rq_no_resend = req->rq_no_delay = 1; break; default: break; } CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); aa = ptlrpc_req_async_args(req); aa->aa_exp = exp; aa->aa_qqi = qqi; aa->aa_arg = arg; aa->aa_lvb = lvb; aa->aa_completion = completion; lustre_handle_copy(&aa->aa_lockh, &qti->qti_lockh); if (sync) { /* send lock enqueue request and wait for completion */ rc = ptlrpc_queue_wait(req); rc = qsd_intent_interpret(env, req, aa, rc); ptlrpc_req_finished(req); } else { /* queue lock request and return */ req->rq_interpret_reply = qsd_intent_interpret; ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1); } RETURN(rc); out: completion(env, qqi, qbody, NULL, &qti->qti_lockh, lvb, arg, rc); return rc; }