static int osd_scrub_next(const struct lu_env *env, struct osd_device *dev, struct lu_fid *fid, uint64_t *oid) { struct l_wait_info lwi = { 0 }; struct lustre_scrub *scrub = &dev->od_scrub; struct ptlrpc_thread *thread = &scrub->os_thread; struct osd_otable_it *it = dev->od_otable_it; struct lustre_mdt_attrs *lma = NULL; nvlist_t *nvbuf = NULL; int size = 0; int rc = 0; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) && cfs_fail_val > 0) { lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), NULL, NULL); if (likely(lwi.lwi_timeout > 0)) { l_wait_event(thread->t_ctl_waitq, !list_empty(&scrub->os_inconsistent_items) || !thread_is_running(thread), &lwi); if (unlikely(!thread_is_running(thread))) RETURN(SCRUB_NEXT_EXIT); } } if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_CRASH)) { spin_lock(&scrub->os_lock); thread_set_flags(thread, SVC_STOPPING); spin_unlock(&scrub->os_lock); RETURN(SCRUB_NEXT_CRASH); } if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_FATAL)) RETURN(SCRUB_NEXT_FATAL); again: if (nvbuf) { nvlist_free(nvbuf); nvbuf = NULL; lma = NULL; } if (!list_empty(&scrub->os_inconsistent_items)) { spin_lock(&scrub->os_lock); if (likely(!list_empty(&scrub->os_inconsistent_items))) { struct osd_inconsistent_item *oii; oii = list_entry(scrub->os_inconsistent_items.next, struct osd_inconsistent_item, oii_list); *fid = oii->oii_cache.oic_fid; *oid = oii->oii_cache.oic_dnode; scrub->os_in_prior = 1; spin_unlock(&scrub->os_lock); GOTO(out, rc = 0); } spin_unlock(&scrub->os_lock); }
int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data, struct ptlrpc_request **request) { CFS_LIST_HEAD(cancels); struct obd_device *obd = class_exp2obd(exp); struct ptlrpc_request *req = *request; int count = 0, rc; ENTRY; LASSERT(req == NULL); if ((op_data->op_flags & MF_MDC_CANCEL_FID1) && (fid_is_sane(&op_data->op_fid1)) && !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET)) count = mdc_resource_get_unused(exp, &op_data->op_fid1, &cancels, LCK_EX, MDS_INODELOCK_UPDATE); if ((op_data->op_flags & MF_MDC_CANCEL_FID3) && (fid_is_sane(&op_data->op_fid3)) && !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET)) count += mdc_resource_get_unused(exp, &op_data->op_fid3, &cancels, LCK_EX, MDS_INODELOCK_FULL); req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_REINT_UNLINK); if (req == NULL) { ldlm_lock_list_put(&cancels, l_bl_ast, count); RETURN(-ENOMEM); } mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1); req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, op_data->op_namelen + 1); rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count); if (rc) { ptlrpc_request_free(req); RETURN(rc); } mdc_unlink_pack(req, op_data); req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, obd->u.cli.cl_max_mds_easize); req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER, obd->u.cli.cl_max_mds_cookiesize); ptlrpc_request_set_replen(req); *request = req; rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL); if (rc == -ERESTARTSYS) rc = 0; RETURN(rc); }
int osc_object_is_contended(struct osc_object *obj) { struct osc_device *dev = lu2osc_dev(obj->oo_cl.co_lu.lo_dev); int osc_contention_time = dev->od_contention_time; unsigned long cur_time = cfs_time_current(); unsigned long retry_time; if (OBD_FAIL_CHECK(OBD_FAIL_OSC_OBJECT_CONTENTION)) return 1; if (!obj->oo_contended) return 0; /* * I like copy-paste. the code is copied from * ll_file_is_contended. */ retry_time = cfs_time_add(obj->oo_contention_time, cfs_time_seconds(osc_contention_time)); if (cfs_time_after(cur_time, retry_time)) { osc_object_clear_contended(obj); return 0; } return 1; }
/** * Implementation of struct cl_req_operations::cro_attr_set() for VVP * layer. VVP is responsible for * * - o_[mac]time * * - o_mode * * - o_parent_seq * * - o_[ug]id * * - o_parent_oid * * - o_parent_ver * * - o_ioepoch, * */ static void vvp_req_attr_set(const struct lu_env *env, const struct cl_req_slice *slice, const struct cl_object *obj, struct cl_req_attr *attr, u64 flags) { struct inode *inode; struct obdo *oa; u32 valid_flags; oa = attr->cra_oa; inode = vvp_object_inode(obj); valid_flags = OBD_MD_FLTYPE; if (slice->crs_req->crq_type == CRT_WRITE) { if (flags & OBD_MD_FLEPOCH) { oa->o_valid |= OBD_MD_FLEPOCH; oa->o_ioepoch = ll_i2info(inode)->lli_ioepoch; valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLUID | OBD_MD_FLGID; } } obdo_from_inode(oa, inode, valid_flags & flags); obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid); if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID)) oa->o_parent_oid++; memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid, LUSTRE_JOBID_SIZE); }
/** * Disconnect a bulk desc from the network. Idempotent. Not * thread-safe (i.e. only interlocks with completion callback). * Returns 1 on success or 0 if network unregistration failed for whatever * reason. */ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) { struct ptlrpc_bulk_desc *desc = req->rq_bulk; wait_queue_head_t *wq; int rc; LASSERT(!in_interrupt()); /* might sleep */ /* Let's setup deadline for reply unlink. */ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0) req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK; if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ return 1; /* never registered */ LASSERT(desc->bd_req == req); /* bd_req NULL until registered */ /* the unlink ensures the callback happens ASAP and is the last * one. If it fails, it must be because completion just happened, * but we must still wait_event() in this case to give liblustre * a chance to run client_bulk_callback() */ mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ return 1; /* never registered */ /* Move to "Unregistering" phase as bulk was not unlinked yet. */ ptlrpc_rqphase_move(req, RQ_PHASE_UNREG_BULK); /* Do not wait for unlink to finish. */ if (async) return 0; if (req->rq_set) wq = &req->rq_set->set_waitq; else wq = &req->rq_reply_waitq; for (;;) { /* Network access will complete in finite time but the HUGE * timeout lets us CWARN for visibility of sluggish LNDs */ int cnt = 0; while (cnt < LONG_UNLINK && (rc = wait_event_idle_timeout(*wq, !ptlrpc_client_bulk_active(req), HZ)) == 0) cnt += 1; if (rc > 0) { ptlrpc_rqphase_move(req, req->rq_next_phase); return 1; } DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p", desc); } return 0; }
int osd_xattr_set(const struct lu_env *env, struct dt_object *dt, const struct lu_buf *buf, const char *name, int fl, struct thandle *handle) { struct osd_object *obj = osd_dt_obj(dt); struct osd_thandle *oh; int rc = 0; ENTRY; LASSERT(handle != NULL); LASSERT(osd_invariant(obj)); if (!osd_obj2dev(obj)->od_posix_acl && (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0 || strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)) RETURN(-EOPNOTSUPP); if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LINKEA_OVERFLOW) && strcmp(name, XATTR_NAME_LINK) == 0) RETURN(-ENOSPC); oh = container_of0(handle, struct osd_thandle, ot_super); down_write(&obj->oo_guard); CDEBUG(D_INODE, "Setting xattr %s with size %d\n", name, (int)buf->lb_len); rc = osd_xattr_set_internal(env, obj, buf, name, fl, oh); up_write(&obj->oo_guard); RETURN(rc); }
/** * Disconnect a bulk desc from the network. Idempotent. Not * thread-safe (i.e. only interlocks with completion callback). * Returns 1 on success or 0 if network unregistration failed for whatever * reason. */ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) { struct ptlrpc_bulk_desc *desc = req->rq_bulk; struct l_wait_info lwi; int rc; ENTRY; LASSERT(!in_interrupt()); /* might sleep */ /* Let's setup deadline for reply unlink. */ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && async && req->rq_bulk_deadline == 0) req->rq_bulk_deadline = cfs_time_current_sec() + LONG_UNLINK; if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ RETURN(1); /* never registered */ LASSERT(desc->bd_req == req); /* bd_req NULL until registered */ /* the unlink ensures the callback happens ASAP and is the last * one. If it fails, it must be because completion just happened, * but we must still l_wait_event() in this case to give liblustre * a chance to run client_bulk_callback() */ mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ RETURN(1); /* never registered */ /* Move to "Unregistering" phase as bulk was not unlinked yet. */ ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING); /* Do not wait for unlink to finish. */ if (async) RETURN(0); for (;;) { #ifdef __KERNEL__ /* The wq argument is ignored by user-space wait_event macros */ wait_queue_head_t *wq = (req->rq_set != NULL) ? &req->rq_set->set_waitq : &req->rq_reply_waitq; #endif /* Network access will complete in finite time but the HUGE * timeout lets us CWARN for visibility of sluggish NALs */ lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK), cfs_time_seconds(1), NULL, NULL); rc = l_wait_event(*wq, !ptlrpc_client_bulk_active(req), &lwi); if (rc == 0) { ptlrpc_rqphase_move(req, req->rq_next_phase); RETURN(1); } LASSERT(rc == -ETIMEDOUT); DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p", desc); } RETURN(0); }
/* Add log records for each OSC that this object is striped over, and return * cookies for each one. We _would_ have nice abstraction here, except that * we need to keep cookies in stripe order, even if some are NULL, so that * the right cookies are passed back to the right OSTs at the client side. * Unset cookies should be all-zero (which will never occur naturally). */ static int lov_llog_origin_add(const struct lu_env *env, struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, struct llog_cookie *logcookies, int numcookies) { struct obd_device *obd = ctxt->loc_obd; struct lov_obd *lov = &obd->u.lov; int i, rc = 0, cookies = 0; ENTRY; LASSERTF(logcookies && numcookies >= lsm->lsm_stripe_count, "logcookies %p, numcookies %d lsm->lsm_stripe_count %d \n", logcookies, numcookies, lsm->lsm_stripe_count); for (i = 0; i < lsm->lsm_stripe_count; i++) { struct lov_oinfo *loi = lsm->lsm_oinfo[i]; struct obd_device *child = lov->lov_tgts[loi->loi_ost_idx]->ltd_exp->exp_obd; struct llog_ctxt *cctxt = llog_get_context(child, ctxt->loc_idx); /* fill mds unlink/setattr log record */ switch (rec->lrh_type) { case MDS_UNLINK_REC: { struct llog_unlink_rec *lur = (struct llog_unlink_rec *)rec; lur->lur_oid = ostid_id(&loi->loi_oi); lur->lur_oseq = (__u32)ostid_seq(&loi->loi_oi); break; } case MDS_SETATTR64_REC: { struct llog_setattr64_rec *lsr = (struct llog_setattr64_rec *)rec; lsr->lsr_oi = loi->loi_oi; break; } default: break; } /* inject error in llog_obd_add() below */ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_FAIL_LOV_LOG_ADD)) { llog_ctxt_put(cctxt); cctxt = NULL; } rc = llog_obd_add(env, cctxt, rec, NULL, logcookies + cookies, numcookies - cookies); llog_ctxt_put(cctxt); if (rc < 0) { CERROR("Can't add llog (rc = %d) for stripe %d\n", rc, cookies); memset(logcookies + cookies, 0, sizeof(struct llog_cookie)); rc = 1; /* skip this cookie */ } /* Note that rc is always 1 if llog_obd_add was successful */ cookies += rc; } RETURN(cookies); }
/* return EADATA length to the caller. negative value means error */ static int mdt_getxattr_pack_reply(struct mdt_thread_info * info) { struct req_capsule *pill = info->mti_pill ; struct ptlrpc_request *req = mdt_info_req(info); char *xattr_name; __u64 valid = info->mti_body->valid; static const char user_string[] = "user."; int size, rc; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETXATTR_PACK)) RETURN(-ENOMEM); /* Determine how many bytes we need */ if (valid & OBD_MD_FLXATTR) { xattr_name = req_capsule_client_get(pill, &RMF_NAME); if (!xattr_name) RETURN(-EFAULT); if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_XATTR) && !strncmp(xattr_name, user_string, sizeof(user_string) - 1)) RETURN(-EOPNOTSUPP); size = mo_xattr_get(info->mti_env, mdt_object_child(info->mti_object), &LU_BUF_NULL, xattr_name); } else if (valid & OBD_MD_FLXATTRLS) { size = mo_xattr_list(info->mti_env, mdt_object_child(info->mti_object), &LU_BUF_NULL); } else { CDEBUG(D_INFO, "Valid bits: "LPX64"\n", info->mti_body->valid); RETURN(-EINVAL); } if (size == -ENODATA) { size = 0; } else if (size < 0) { CERROR("Error geting EA size: %d\n", size); RETURN(size); } if (info->mti_body->eadatasize != 0 && info->mti_body->eadatasize < size) RETURN(-ERANGE); req_capsule_set_size(pill, &RMF_EADATA, RCL_SERVER, info->mti_body->eadatasize == 0 ? 0 : size); rc = req_capsule_server_pack(pill); if (rc) { LASSERT(rc < 0); RETURN(rc); } RETURN(size); }
/** * Allocate new fid on passed client @seq and save it to @fid. * * \param[in] env pointer to the thread context * \param[in,out] seq pointer to the client sequence manager * \param[out] fid to hold the new allocated fid * * \retval 1 for notify the caller that sequence switch * is performed to allow it to setup FLD for it. * \retval 0 for new FID allocated in current sequence. * \retval Negative error number on failure. */ int seq_client_alloc_fid(const struct lu_env *env, struct lu_client_seq *seq, struct lu_fid *fid) { wait_queue_t link; int rc; ENTRY; LASSERT(seq != NULL); LASSERT(fid != NULL); init_waitqueue_entry(&link, current); mutex_lock(&seq->lcs_mutex); if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST)) seq->lcs_fid.f_oid = seq->lcs_width; while (1) { u64 seqnr; if (unlikely(!fid_is_zero(&seq->lcs_fid) && fid_oid(&seq->lcs_fid) < seq->lcs_width)) { /* Just bump last allocated fid and return to caller. */ seq->lcs_fid.f_oid++; rc = 0; break; } /* Release seq::lcs_mutex via seq_fid_alloc_prep() to avoid * deadlock during seq_client_alloc_seq(). */ rc = seq_fid_alloc_prep(seq, &link); if (rc) continue; rc = seq_client_alloc_seq(env, seq, &seqnr); /* Re-take seq::lcs_mutex via seq_fid_alloc_fini(). */ seq_fid_alloc_fini(seq, rc ? 0 : seqnr, false); if (rc) { CERROR("%s: Can't allocate new sequence: rc = %d\n", seq->lcs_name, rc); mutex_unlock(&seq->lcs_mutex); RETURN(rc); } rc = 1; break; } *fid = seq->lcs_fid; mutex_unlock(&seq->lcs_mutex); CDEBUG(D_INFO, "%s: Allocated FID "DFID"\n", seq->lcs_name, PFID(fid)); RETURN(rc); }
/** * Pack a link_ea_entry. * All elements are stored as chars to avoid alignment issues. * Numbers are always big-endian * \retval record length */ int linkea_entry_pack(struct link_ea_entry *lee, const struct lu_name *lname, const struct lu_fid *pfid) { struct lu_fid tmpfid; int reclen; tmpfid = *pfid; if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_MUL_REF)) tmpfid.f_oid--; if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LINKEA_CRASH)) tmpfid.f_ver = ~0; fid_cpu_to_be(&tmpfid, &tmpfid); memcpy(&lee->lee_parent_fid, &tmpfid, sizeof(tmpfid)); memcpy(lee->lee_name, lname->ln_name, lname->ln_namelen); reclen = sizeof(struct link_ea_entry) + lname->ln_namelen; lee->lee_reclen[0] = (reclen >> 8) & 0xff; lee->lee_reclen[1] = reclen & 0xff; return reclen; }
/** * Register request buffer descriptor for request receiving. */ int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd) { struct ptlrpc_service *service = rqbd->rqbd_svcpt->scp_service; static lnet_process_id_t match_id = {LNET_NID_ANY, LNET_PID_ANY}; int rc; lnet_md_t md; lnet_handle_me_t me_h; CDEBUG(D_NET, "LNetMEAttach: portal %d\n", service->srv_req_portal); if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_RQBD)) return -ENOMEM; /* NB: CPT affinity service should use new LNet flag LNET_INS_LOCAL, * which means buffer can only be attached on local CPT, and LND * threads can find it by grabbing a local lock */ rc = LNetMEAttach(service->srv_req_portal, match_id, 0, ~0, LNET_UNLINK, rqbd->rqbd_svcpt->scp_cpt >= 0 ? LNET_INS_LOCAL : LNET_INS_AFTER, &me_h); if (rc != 0) { CERROR("LNetMEAttach failed: %d\n", rc); return -ENOMEM; } LASSERT(rqbd->rqbd_refcount == 0); rqbd->rqbd_refcount = 1; md.start = rqbd->rqbd_buffer; md.length = service->srv_buf_size; md.max_size = service->srv_max_req_size; md.threshold = LNET_MD_THRESH_INF; md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MAX_SIZE; md.user_ptr = &rqbd->rqbd_cbid; md.eq_handle = ptlrpc_eq_h; rc = LNetMDAttach(me_h, md, LNET_UNLINK, &rqbd->rqbd_md_h); if (rc == 0) return 0; CERROR("LNetMDAttach failed: %d;\n", rc); LASSERT(rc == -ENOMEM); rc = LNetMEUnlink(me_h); LASSERT(rc == 0); rqbd->rqbd_refcount = 0; return -ENOMEM; }
/** * delete an orphan \a obj from orphan index. * \param obj file or directory. * \param th transaction for index deletion and object destruction. * * \pre obj->mod_count == 0 && ORPHAN_OBJ is set for obj. * * \retval 0 success * \retval -ve index operation error. */ int mdd_orphan_delete(const struct lu_env *env, struct mdd_object *obj, struct thandle *th) { struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); struct dt_object *dor = mdd->mdd_orphans; struct dt_key *key; int rc = 0; ENTRY; LASSERT(mdd_write_locked(env, obj) != 0); LASSERT(obj->mod_flags & ORPHAN_OBJ); LASSERT(obj->mod_count == 0); LASSERT(dor); key = mdd_orphan_key_fill(env, mdo2fid(obj)); dt_write_lock(env, mdd->mdd_orphans, MOR_TGT_ORPHAN); if (OBD_FAIL_CHECK(OBD_FAIL_MDS_ORPHAN_DELETE)) goto ref_del; rc = dt_delete(env, mdd->mdd_orphans, key, th); if (rc == -ENOENT) { key = mdd_orphan_key_fill_20(env, mdo2fid(obj)); rc = dt_delete(env, mdd->mdd_orphans, key, th); } ref_del: if (!rc) { /* lov objects will be destroyed by caller */ mdo_ref_del(env, obj, th); if (S_ISDIR(mdd_object_type(obj))) { mdo_ref_del(env, obj, th); dt_ref_del(env, mdd->mdd_orphans, th); } obj->mod_flags &= ~ORPHAN_OBJ; } else { CERROR("%s: could not delete orphan object "DFID": rc = %d\n", mdd2obd_dev(mdd)->obd_name, PFID(mdo2fid(obj)), rc); } dt_write_unlock(env, mdd->mdd_orphans); RETURN(rc); }
/* * This function implements new seq allocation algorithm using async * updates to seq file on disk. ref bug 18857 for details. * there are four variable to keep track of this process * * lss_space; - available lss_space * lss_lowater_set; - lu_seq_range for all seqs before barrier, i.e. safe to use * lss_hiwater_set; - lu_seq_range after barrier, i.e. allocated but may be * not yet committed * * when lss_lowater_set reaches the end it is replaced with hiwater one and * a write operation is initiated to allocate new hiwater range. * if last seq write opearion is still not commited, current operation is * flaged as sync write op. */ static int range_alloc_set(const struct lu_env *env, struct lu_seq_range *out, struct lu_server_seq *seq) { struct lu_seq_range *space = &seq->lss_space; struct lu_seq_range *loset = &seq->lss_lowater_set; struct lu_seq_range *hiset = &seq->lss_hiwater_set; int rc = 0; if (lu_seq_range_is_zero(loset)) __seq_set_init(env, seq); if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_ALLOC)) /* exhaust set */ loset->lsr_start = loset->lsr_end; if (lu_seq_range_is_exhausted(loset)) { /* reached high water mark. */ struct lu_device *dev = seq->lss_site->ss_lu->ls_top_dev; int obd_num_clients = dev->ld_obd->obd_num_exports; __u64 set_sz; /* calculate new seq width based on number of clients */ set_sz = max(seq->lss_set_width, obd_num_clients * seq->lss_width); set_sz = min(lu_seq_range_space(space), set_sz); /* Switch to hiwater range now */ *loset = *hiset; /* allocate new hiwater range */ range_alloc(hiset, space, set_sz); /* update ondisk seq with new *space */ rc = seq_store_update(env, seq, NULL, seq->lss_need_sync); } LASSERTF(!lu_seq_range_is_exhausted(loset) || lu_seq_range_is_sane(loset), DRANGE"\n", PRANGE(loset)); if (rc == 0) range_alloc(out, loset, seq->lss_width); RETURN(rc); }
/** * FLR: verify the layout version of object. * * \param[in] env execution environment * \param[in] fo OFD object * \param[in] oa OBDO structure with layout version * * \retval 0 on successful verification * \retval -EINPROGRESS layout version is in transfer * \retval -ESTALE the layout version on client is stale */ int ofd_verify_layout_version(const struct lu_env *env, struct ofd_object *fo, const struct obdo *oa) { __u32 layout_version; int rc; ENTRY; if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_OST_SKIP_LV_CHECK))) GOTO(out, rc = 0); rc = ofd_object_ff_load(env, fo); if (rc < 0) { if (rc == -ENODATA) rc = -EINPROGRESS; GOTO(out, rc); } layout_version = fo->ofo_ff.ff_layout_version; if (oa->o_layout_version >= layout_version && oa->o_layout_version <= layout_version + fo->ofo_ff.ff_range) GOTO(out, rc = 0); /* normal traffic, decide if to return ESTALE or EINPROGRESS */ layout_version &= ~LU_LAYOUT_RESYNC; /* this update is not legitimate */ if ((oa->o_layout_version & ~LU_LAYOUT_RESYNC) <= layout_version) GOTO(out, rc = -ESTALE); /* layout version may not be transmitted yet */ if ((oa->o_layout_version & ~LU_LAYOUT_RESYNC) > layout_version) GOTO(out, rc = -EINPROGRESS); EXIT; out: CDEBUG(D_INODE, DFID " verify layout version: %u vs. %u/%u, rc: %d\n", PFID(lu_object_fid(&fo->ofo_obj.do_lu)), oa->o_layout_version, fo->ofo_ff.ff_layout_version, fo->ofo_ff.ff_range, rc); return rc; }
static int mdt_reint_create(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) { struct ptlrpc_request *req = mdt_info_req(info); int rc; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE)) RETURN(err_serious(-ESTALE)); if (info->mti_dlm_req) ldlm_request_cancel(mdt_info_req(info), info->mti_dlm_req, 0); switch (info->mti_attr.ma_attr.la_mode & S_IFMT) { case S_IFDIR:{ /* Cross-ref case. */ /* TODO: we can add LPROC_MDT_CROSS for cross-ref stats */ if (info->mti_cross_ref) { rc = mdt_md_mkobj(info); } else { LASSERT(info->mti_rr.rr_namelen > 0); mdt_counter_incr(req->rq_export, LPROC_MDT_MKDIR); rc = mdt_md_create(info); } break; } case S_IFREG: case S_IFLNK: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:{ /* Special file should stay on the same node as parent. */ LASSERT(info->mti_rr.rr_namelen > 0); mdt_counter_incr(req->rq_export, LPROC_MDT_MKNOD); rc = mdt_md_create(info); break; } default: rc = err_serious(-EOPNOTSUPP); } RETURN(rc); }
static void vvp_req_attr_set(const struct lu_env *env, struct cl_object *obj, struct cl_req_attr *attr) { struct inode *inode; struct obdo *oa; u64 valid_flags = OBD_MD_FLTYPE; oa = attr->cra_oa; inode = vvp_object_inode(obj); if (attr->cra_type == CRT_WRITE) valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLUID | OBD_MD_FLGID; obdo_from_inode(oa, inode, valid_flags & attr->cra_flags); obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid); if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID)) oa->o_parent_oid++; memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid, LUSTRE_JOBID_SIZE); }
static int mdt_reint_create(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) { struct ptlrpc_request *req = mdt_info_req(info); int rc; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE)) RETURN(err_serious(-ESTALE)); if (info->mti_dlm_req) ldlm_request_cancel(mdt_info_req(info), info->mti_dlm_req, 0); LASSERT(info->mti_rr.rr_namelen > 0); switch (info->mti_attr.ma_attr.la_mode & S_IFMT) { case S_IFDIR: mdt_counter_incr(req, LPROC_MDT_MKDIR); break; case S_IFREG: case S_IFLNK: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: /* Special file should stay on the same node as parent. */ mdt_counter_incr(req, LPROC_MDT_MKNOD); break; default: CERROR("%s: Unsupported mode %o\n", mdt2obd_dev(info->mti_mdt)->obd_name, info->mti_attr.ma_attr.la_mode); RETURN(err_serious(-EOPNOTSUPP)); } rc = mdt_md_create(info); RETURN(rc); }
/** * Match client and OST server connection feature flags. * * Compute the compatibility flags for a connection request based on * features mutually supported by client and server. * * The obd_export::exp_connect_data.ocd_connect_flags field in \a exp * must not be updated here, otherwise a partially initialized value may * be exposed. After the connection request is successfully processed, * the top-level tgt_connect() request handler atomically updates the export * connect flags from the obd_connect_data::ocd_connect_flags field of the * reply. \see tgt_connect(). * * \param[in] env execution environment * \param[in] exp the obd_export associated with this * client/target pair * \param[in] data stores data for this connect request * \param[in] new_connection is this connection new or not * * \retval 0 if success * \retval -EPROTO client and server feature requirements are * incompatible * \retval -EBADF OST index in connect request doesn't match * real OST index */ static int ofd_parse_connect_data(const struct lu_env *env, struct obd_export *exp, struct obd_connect_data *data, bool new_connection) { struct ofd_device *ofd = ofd_exp(exp); struct filter_export_data *fed = &exp->exp_filter_data; if (!data) RETURN(0); CDEBUG(D_RPCTRACE, "%s: cli %s/%p ocd_connect_flags: "LPX64 " ocd_version: %x ocd_grant: %d ocd_index: %u" " ocd_group %u\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, data->ocd_connect_flags, data->ocd_version, data->ocd_grant, data->ocd_index, data->ocd_group); if (fed->fed_group != 0 && fed->fed_group != data->ocd_group) { CWARN("!!! This export (nid %s) used object group %d " "earlier; now it's trying to use group %d! This could " "be a bug in the MDS. Please report to " "https://jira.hpdd.intel.com/\n", obd_export_nid2str(exp), fed->fed_group, data->ocd_group); RETURN(-EPROTO); } fed->fed_group = data->ocd_group; data->ocd_connect_flags &= OST_CONNECT_SUPPORTED; data->ocd_version = LUSTRE_VERSION_CODE; /* Kindly make sure the SKIP_ORPHAN flag is from MDS. */ if (data->ocd_connect_flags & OBD_CONNECT_MDS) CDEBUG(D_HA, "%s: Received MDS connection for group %u\n", exp->exp_obd->obd_name, data->ocd_group); else if (data->ocd_connect_flags & OBD_CONNECT_SKIP_ORPHAN) RETURN(-EPROTO); if (ofd_grant_param_supp(exp)) { exp->exp_filter_data.fed_pagesize = data->ocd_blocksize; /* ocd_{blocksize,inodespace} are log2 values */ data->ocd_blocksize = ofd->ofd_blockbits; data->ocd_inodespace = ofd->ofd_dt_conf.ddp_inodespace; /* ocd_grant_extent is in 1K blocks */ data->ocd_grant_extent = ofd->ofd_dt_conf.ddp_grant_frag >> 10; } if (data->ocd_connect_flags & OBD_CONNECT_GRANT) data->ocd_grant = ofd_grant_connect(env, exp, data->ocd_grant, new_connection); if (data->ocd_connect_flags & OBD_CONNECT_INDEX) { struct lr_server_data *lsd = &ofd->ofd_lut.lut_lsd; int index = lsd->lsd_osd_index; if (index != data->ocd_index) { LCONSOLE_ERROR_MSG(0x136, "Connection from %s to index" " %u doesn't match actual OST index" " %u in last_rcvd file, bad " "configuration?\n", obd_export_nid2str(exp), index, data->ocd_index); RETURN(-EBADF); } if (!(lsd->lsd_feature_compat & OBD_COMPAT_OST)) { /* this will only happen on the first connect */ lsd->lsd_feature_compat |= OBD_COMPAT_OST; /* sync is not needed here as lut_client_add will * set exp_need_sync flag */ tgt_server_data_update(env, &ofd->ofd_lut, 0); } } if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_SIZE)) { data->ocd_brw_size = 65536; } else if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) { data->ocd_brw_size = min(data->ocd_brw_size, (__u32)DT_MAX_BRW_SIZE); if (data->ocd_brw_size == 0) { CERROR("%s: cli %s/%p ocd_connect_flags: "LPX64 " ocd_version: %x ocd_grant: %d ocd_index: %u " "ocd_brw_size is unexpectedly zero, " "network data corruption?" "Refusing connection of this client\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, data->ocd_connect_flags, data->ocd_version, data->ocd_grant, data->ocd_index); RETURN(-EPROTO); } } if (data->ocd_connect_flags & OBD_CONNECT_CKSUM) { __u32 cksum_types = data->ocd_cksum_types; /* The client set in ocd_cksum_types the checksum types it * supports. We have to mask off the algorithms that we don't * support */ data->ocd_cksum_types &= cksum_types_supported_server(); if (unlikely(data->ocd_cksum_types == 0)) { CERROR("%s: Connect with checksum support but no " "ocd_cksum_types is set\n", exp->exp_obd->obd_name); RETURN(-EPROTO); } CDEBUG(D_RPCTRACE, "%s: cli %s supports cksum type %x, return " "%x\n", exp->exp_obd->obd_name, obd_export_nid2str(exp), cksum_types, data->ocd_cksum_types); } else { /* This client does not support OBD_CONNECT_CKSUM * fall back to CRC32 */ CDEBUG(D_RPCTRACE, "%s: cli %s does not support " "OBD_CONNECT_CKSUM, CRC32 will be used\n", exp->exp_obd->obd_name, obd_export_nid2str(exp)); } if (data->ocd_connect_flags & OBD_CONNECT_MAXBYTES) data->ocd_maxbytes = ofd->ofd_dt_conf.ddp_maxbytes; if (OCD_HAS_FLAG(data, PINGLESS)) { if (ptlrpc_pinger_suppress_pings()) { spin_lock(&exp->exp_obd->obd_dev_lock); list_del_init(&exp->exp_obd_chain_timed); spin_unlock(&exp->exp_obd->obd_dev_lock); } else { data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS; } } RETURN(0); }
int mdt_reint_setxattr(struct mdt_thread_info *info, struct mdt_lock_handle *unused) { struct ptlrpc_request *req = mdt_info_req(info); struct md_ucred *uc = mdt_ucred(info); struct mdt_lock_handle *lh; const struct lu_env *env = info->mti_env; struct lu_buf *buf = &info->mti_buf; struct mdt_reint_record *rr = &info->mti_rr; struct md_attr *ma = &info->mti_attr; struct lu_attr *attr = &info->mti_attr.ma_attr; struct mdt_object *obj; struct md_object *child; __u64 valid = attr->la_valid; const char *xattr_name = rr->rr_name; int xattr_len = rr->rr_eadatalen; __u64 lockpart; int rc; posix_acl_xattr_header *new_xattr = NULL; __u32 remote = exp_connect_rmtclient(info->mti_exp); __u32 perm; ENTRY; CDEBUG(D_INODE, "setxattr for "DFID"\n", PFID(rr->rr_fid1)); if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SETXATTR)) RETURN(err_serious(-ENOMEM)); CDEBUG(D_INODE, "%s xattr %s\n", valid & OBD_MD_FLXATTR ? "set" : "remove", xattr_name); rc = mdt_init_ucred_reint(info); if (rc != 0) RETURN(rc); if (valid & OBD_MD_FLRMTRSETFACL) { if (unlikely(!remote)) GOTO(out, rc = err_serious(-EINVAL)); perm = mdt_identity_get_perm(uc->mu_identity, remote, req->rq_peer.nid); if (!(perm & CFS_RMTACL_PERM)) GOTO(out, rc = err_serious(-EPERM)); } if (strncmp(xattr_name, XATTR_USER_PREFIX, sizeof(XATTR_USER_PREFIX) - 1) == 0) { if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_XATTR)) GOTO(out, rc = -EOPNOTSUPP); if (strcmp(xattr_name, XATTR_NAME_LOV) == 0) GOTO(out, rc = -EACCES); if (strcmp(xattr_name, XATTR_NAME_LMA) == 0) GOTO(out, rc = 0); if (strcmp(xattr_name, XATTR_NAME_LINK) == 0) GOTO(out, rc = 0); } else if ((valid & OBD_MD_FLXATTR) && (strncmp(xattr_name, XATTR_NAME_ACL_ACCESS, sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0 || strncmp(xattr_name, XATTR_NAME_ACL_DEFAULT, sizeof(XATTR_NAME_ACL_DEFAULT) - 1) == 0)) { /* currently lustre limit acl access size */ if (xattr_len > LUSTRE_POSIX_ACL_MAX_SIZE) GOTO(out, -ERANGE); } lockpart = MDS_INODELOCK_UPDATE; /* Revoke all clients' lookup lock, since the access * permissions for this inode is changed when ACL_ACCESS is * set. This isn't needed for ACL_DEFAULT, since that does * not change the access permissions of this inode, nor any * other existing inodes. It is setting the ACLs inherited * by new directories/files at create time. */ if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS)) lockpart |= MDS_INODELOCK_LOOKUP; lh = &info->mti_lh[MDT_LH_PARENT]; /* ACLs were sent to clients under LCK_CR locks, so taking LCK_EX * to cancel them. */ mdt_lock_reg_init(lh, LCK_EX); obj = mdt_object_find_lock(info, rr->rr_fid1, lh, lockpart); if (IS_ERR(obj)) GOTO(out, rc = PTR_ERR(obj)); info->mti_mos = obj; rc = mdt_version_get_check_save(info, obj, 0); if (rc) GOTO(out_unlock, rc); if (unlikely(!(valid & OBD_MD_FLCTIME))) { /* This isn't strictly an error, but all current clients * should set OBD_MD_FLCTIME when setting attributes. */ CWARN("%s: client miss to set OBD_MD_FLCTIME when " "setxattr %s: [object "DFID"] [valid "LPU64"]\n", info->mti_exp->exp_obd->obd_name, xattr_name, PFID(rr->rr_fid1), valid); attr->la_ctime = cfs_time_current_sec(); } attr->la_valid = LA_CTIME; child = mdt_object_child(obj); if (valid & OBD_MD_FLXATTR) { char *xattr = (void *)rr->rr_eadata; if (xattr_len > 0) { int flags = 0; if (valid & OBD_MD_FLRMTLSETFACL) { if (unlikely(!remote)) GOTO(out_unlock, rc = -EINVAL); xattr_len = mdt_rmtlsetfacl(info, child, xattr_name, (ext_acl_xattr_header *)xattr, &new_xattr); if (xattr_len < 0) GOTO(out_unlock, rc = xattr_len); xattr = (char *)new_xattr; } if (attr->la_flags & XATTR_REPLACE) flags |= LU_XATTR_REPLACE; if (attr->la_flags & XATTR_CREATE) flags |= LU_XATTR_CREATE; mdt_fail_write(env, info->mti_mdt->mdt_bottom, OBD_FAIL_MDS_SETXATTR_WRITE); buf->lb_buf = xattr; buf->lb_len = xattr_len; rc = mo_xattr_set(env, child, buf, xattr_name, flags); /* update ctime after xattr changed */ if (rc == 0) { ma->ma_attr_flags |= MDS_PERM_BYPASS; mo_attr_set(env, child, ma); } } } else if (valid & OBD_MD_FLXATTRRM) { rc = mo_xattr_del(env, child, xattr_name); /* update ctime after xattr changed */ if (rc == 0) { ma->ma_attr_flags |= MDS_PERM_BYPASS; mo_attr_set(env, child, ma); } } else { CDEBUG(D_INFO, "valid bits: "LPX64"\n", valid); rc = -EINVAL; } if (rc == 0) mdt_counter_incr(req->rq_export, LPROC_MDT_SETXATTR); EXIT; out_unlock: mdt_object_unlock_put(info, obj, lh, rc); if (unlikely(new_xattr != NULL)) lustre_posix_acl_xattr_free(new_xattr, xattr_len); out: mdt_exit_ucred(info); return rc; }
/* return EADATA length to the caller. negative value means error */ static int mdt_getxattr_pack_reply(struct mdt_thread_info * info) { struct req_capsule *pill = info->mti_pill ; struct ptlrpc_request *req = mdt_info_req(info); char *xattr_name; __u64 valid; static const char user_string[] = "user."; int size, rc; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETXATTR_PACK)) RETURN(-ENOMEM); valid = info->mti_body->valid & (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS); /* Determine how many bytes we need */ if (valid == OBD_MD_FLXATTR) { xattr_name = req_capsule_client_get(pill, &RMF_NAME); if (!xattr_name) RETURN(-EFAULT); if (!(exp_connect_flags(req->rq_export) & OBD_CONNECT_XATTR) && !strncmp(xattr_name, user_string, sizeof(user_string) - 1)) RETURN(-EOPNOTSUPP); size = mo_xattr_get(info->mti_env, mdt_object_child(info->mti_object), &LU_BUF_NULL, xattr_name); } else if (valid == OBD_MD_FLXATTRLS) { size = mo_xattr_list(info->mti_env, mdt_object_child(info->mti_object), &LU_BUF_NULL); } else if (valid == OBD_MD_FLXATTRALL) { /* N.B. eadatasize = 0 is not valid for FLXATTRALL */ /* We could calculate accurate sizes, but this would * introduce a lot of overhead, let's do it later... */ size = info->mti_body->eadatasize; req_capsule_set_size(pill, &RMF_EAVALS, RCL_SERVER, size); req_capsule_set_size(pill, &RMF_EAVALS_LENS, RCL_SERVER, size); } else { CDEBUG(D_INFO, "Valid bits: "LPX64"\n", info->mti_body->valid); RETURN(-EINVAL); } if (size == -ENODATA) { size = 0; } else if (size < 0) { CERROR("Error geting EA size: %d\n", size); RETURN(size); } req_capsule_set_size(pill, &RMF_EADATA, RCL_SERVER, info->mti_body->eadatasize == 0 ? 0 : size); rc = req_capsule_server_pack(pill); if (rc) { LASSERT(rc < 0); RETURN(rc); } RETURN(size); }
/** * Implementation of ldlm_valblock_ops::lvbo_init for OFD. * * This function allocates and initializes new LVB data for the given * LDLM resource if it is not allocated yet. New LVB is filled with attributes * of the object associated with that resource. Function does nothing if LVB * for the given LDLM resource is allocated already. * * Called with res->lr_lvb_sem held. * * \param[in] res LDLM resource * * \retval 0 on successful setup * \retval negative value on error */ static int ofd_lvbo_init(struct ldlm_resource *res) { struct ost_lvb *lvb; struct ofd_device *ofd; struct ofd_object *fo; struct ofd_thread_info *info; struct lu_env env; int rc = 0; ENTRY; LASSERT(res); LASSERT(mutex_is_locked(&res->lr_lvb_mutex)); if (res->lr_lvb_data != NULL) RETURN(0); ofd = ldlm_res_to_ns(res)->ns_lvbp; LASSERT(ofd != NULL); if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_OST_LVB)) RETURN(-ENOMEM); rc = lu_env_init(&env, LCT_DT_THREAD); if (rc) RETURN(rc); OBD_ALLOC_PTR(lvb); if (lvb == NULL) GOTO(out_env, rc = -ENOMEM); res->lr_lvb_data = lvb; res->lr_lvb_len = sizeof(*lvb); info = ofd_info_init(&env, NULL); ost_fid_from_resid(&info->fti_fid, &res->lr_name, ofd->ofd_lut.lut_lsd.lsd_osd_index); fo = ofd_object_find(&env, ofd, &info->fti_fid); if (IS_ERR(fo)) GOTO(out_lvb, rc = PTR_ERR(fo)); rc = ofd_attr_get(&env, fo, &info->fti_attr); if (rc) GOTO(out_obj, rc); lvb->lvb_size = info->fti_attr.la_size; lvb->lvb_blocks = info->fti_attr.la_blocks; lvb->lvb_mtime = info->fti_attr.la_mtime; lvb->lvb_atime = info->fti_attr.la_atime; lvb->lvb_ctime = info->fti_attr.la_ctime; CDEBUG(D_DLMTRACE, "res: "DFID" initial lvb size: "LPU64", " "mtime: "LPX64", blocks: "LPX64"\n", PFID(&info->fti_fid), lvb->lvb_size, lvb->lvb_mtime, lvb->lvb_blocks); EXIT; out_obj: ofd_object_put(&env, fo); out_lvb: if (rc != 0) OST_LVB_SET_ERR(lvb->lvb_blocks, rc); out_env: lu_env_fini(&env); /* Don't free lvb data on lookup error */ return rc; }
/* Allocate new fid on passed client @seq and save it to @fid. */ int seq_client_alloc_fid(const struct lu_env *env, struct lu_client_seq *seq, struct lu_fid *fid) { wait_queue_t link; int rc; ENTRY; LASSERT(seq != NULL); LASSERT(fid != NULL); init_waitqueue_entry_current(&link); mutex_lock(&seq->lcs_mutex); if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST)) seq->lcs_fid.f_oid = seq->lcs_width; while (1) { u64 seqnr; if (!fid_is_zero(&seq->lcs_fid) && fid_oid(&seq->lcs_fid) < seq->lcs_width) { /* Just bump last allocated fid and return to caller. */ seq->lcs_fid.f_oid += 1; rc = 0; break; } rc = seq_fid_alloc_prep(seq, &link); if (rc) continue; rc = seq_client_alloc_seq(env, seq, &seqnr); if (rc) { CERROR("%s: Can't allocate new sequence, " "rc %d\n", seq->lcs_name, rc); seq_fid_alloc_fini(seq); mutex_unlock(&seq->lcs_mutex); RETURN(rc); } CDEBUG(D_INFO, "%s: Switch to sequence " "[0x%16.16"LPF64"x]\n", seq->lcs_name, seqnr); seq->lcs_fid.f_oid = LUSTRE_FID_INIT_OID; seq->lcs_fid.f_seq = seqnr; seq->lcs_fid.f_ver = 0; /* * Inform caller that sequence switch is performed to allow it * to setup FLD for it. */ rc = 1; seq_fid_alloc_fini(seq); break; } *fid = seq->lcs_fid; mutex_unlock(&seq->lcs_mutex); CDEBUG(D_INFO, "%s: Allocated FID "DFID"\n", seq->lcs_name, PFID(fid)); RETURN(rc); }
/** * Callback handler for receiving incoming completion ASTs. * * This only can happen on client side. */ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, struct ldlm_namespace *ns, struct ldlm_request *dlm_req, struct ldlm_lock *lock) { int lvb_len; LIST_HEAD(ast_list); int rc = 0; LDLM_DEBUG(lock, "client completion callback handler START"); if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) { int to = cfs_time_seconds(1); while (to > 0) { schedule_timeout_and_set_state( TASK_INTERRUPTIBLE, to); if (lock->l_granted_mode == lock->l_req_mode || lock->l_flags & LDLM_FL_DESTROYED) break; } } lvb_len = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT); if (lvb_len < 0) { LDLM_ERROR(lock, "Fail to get lvb_len, rc = %d", lvb_len); GOTO(out, rc = lvb_len); } else if (lvb_len > 0) { if (lock->l_lvb_len > 0) { /* for extent lock, lvb contains ost_lvb{}. */ LASSERT(lock->l_lvb_data != NULL); if (unlikely(lock->l_lvb_len < lvb_len)) { LDLM_ERROR(lock, "Replied LVB is larger than " "expectation, expected = %d, " "replied = %d", lock->l_lvb_len, lvb_len); GOTO(out, rc = -EINVAL); } } else if (ldlm_has_layout(lock)) { /* for layout lock, lvb has * variable length */ void *lvb_data; OBD_ALLOC(lvb_data, lvb_len); if (lvb_data == NULL) { LDLM_ERROR(lock, "No memory: %d.\n", lvb_len); GOTO(out, rc = -ENOMEM); } lock_res_and_lock(lock); LASSERT(lock->l_lvb_data == NULL); lock->l_lvb_data = lvb_data; lock->l_lvb_len = lvb_len; unlock_res_and_lock(lock); } } lock_res_and_lock(lock); if ((lock->l_flags & LDLM_FL_DESTROYED) || lock->l_granted_mode == lock->l_req_mode) { /* bug 11300: the lock has already been granted */ unlock_res_and_lock(lock); LDLM_DEBUG(lock, "Double grant race happened"); GOTO(out, rc = 0); } /* If we receive the completion AST before the actual enqueue returned, * then we might need to switch lock modes, resources, or extents. */ if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) { lock->l_req_mode = dlm_req->lock_desc.l_granted_mode; LDLM_DEBUG(lock, "completion AST, new lock mode"); } if (lock->l_resource->lr_type != LDLM_PLAIN) { ldlm_convert_policy_to_local(req->rq_export, dlm_req->lock_desc.l_resource.lr_type, &dlm_req->lock_desc.l_policy_data, &lock->l_policy_data); LDLM_DEBUG(lock, "completion AST, new policy data"); } ldlm_resource_unlink_lock(lock); if (memcmp(&dlm_req->lock_desc.l_resource.lr_name, &lock->l_resource->lr_name, sizeof(lock->l_resource->lr_name)) != 0) { unlock_res_and_lock(lock); rc = ldlm_lock_change_resource(ns, lock, &dlm_req->lock_desc.l_resource.lr_name); if (rc < 0) { LDLM_ERROR(lock, "Failed to allocate resource"); GOTO(out, rc); } LDLM_DEBUG(lock, "completion AST, new resource"); CERROR("change resource!\n"); lock_res_and_lock(lock); } if (dlm_req->lock_flags & LDLM_FL_AST_SENT) { /* BL_AST locks are not needed in LRU. * Let ldlm_cancel_lru() be fast. */ ldlm_lock_remove_from_lru(lock); lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST; LDLM_DEBUG(lock, "completion AST includes blocking AST"); } if (lock->l_lvb_len > 0) { rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_CLIENT, lock->l_lvb_data, lvb_len); if (rc < 0) { unlock_res_and_lock(lock); GOTO(out, rc); } } ldlm_grant_lock(lock, &ast_list); unlock_res_and_lock(lock); LDLM_DEBUG(lock, "callback handler finished, about to run_ast_work"); /* Let Enqueue to call osc_lock_upcall() and initialize * l_ast_data */ OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 2); ldlm_run_ast_work(ns, &ast_list, LDLM_WORK_CP_AST); LDLM_DEBUG_NOLOCK("client completion callback handler END (lock %p)", lock); GOTO(out, rc); out: if (rc < 0) { lock_res_and_lock(lock); lock->l_flags |= LDLM_FL_FAILED; unlock_res_and_lock(lock); wake_up(&lock->l_waitq); } LDLM_LOCK_RELEASE(lock); }
/** * Register bulk at the sender for later transfer. * Returns 0 on success or error code. */ static int ptlrpc_register_bulk(struct ptlrpc_request *req) { struct ptlrpc_bulk_desc *desc = req->rq_bulk; lnet_process_id_t peer; int rc = 0; int rc2; int posted_md; int total_md; __u64 xid; lnet_handle_me_t me_h; lnet_md_t md; if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET)) return 0; /* NB no locking required until desc is on the network */ LASSERT(desc->bd_nob > 0); LASSERT(desc->bd_md_count == 0); LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT); LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); LASSERT(desc->bd_req != NULL); LASSERT(desc->bd_type == BULK_PUT_SINK || desc->bd_type == BULK_GET_SOURCE); /* cleanup the state of the bulk for it will be reused */ if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY) desc->bd_nob_transferred = 0; else LASSERT(desc->bd_nob_transferred == 0); desc->bd_failure = 0; peer = desc->bd_import->imp_connection->c_peer; LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback); LASSERT(desc->bd_cbid.cbid_arg == desc); /* An XID is only used for a single request from the client. * For retried bulk transfers, a new XID will be allocated in * in ptlrpc_check_set() if it needs to be resent, so it is not * using the same RDMA match bits after an error. * * For multi-bulk RPCs, rq_xid is the last XID needed for bulks. The * first bulk XID is power-of-two aligned before rq_xid. LU-1431 */ xid = req->rq_xid & ~((__u64)desc->bd_md_max_brw - 1); LASSERTF(!(desc->bd_registered && req->rq_send_state != LUSTRE_IMP_REPLAY) || xid != desc->bd_last_xid, "registered: %d rq_xid: %llu bd_last_xid: %llu\n", desc->bd_registered, xid, desc->bd_last_xid); total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV; desc->bd_registered = 1; desc->bd_last_xid = xid; desc->bd_md_count = total_md; md.user_ptr = &desc->bd_cbid; md.eq_handle = ptlrpc_eq_h; md.threshold = 1; /* PUT or GET */ for (posted_md = 0; posted_md < total_md; posted_md++, xid++) { md.options = PTLRPC_MD_OPTIONS | ((desc->bd_type == BULK_GET_SOURCE) ? LNET_MD_OP_GET : LNET_MD_OP_PUT); ptlrpc_fill_bulk_md(&md, desc, posted_md); rc = LNetMEAttach(desc->bd_portal, peer, xid, 0, LNET_UNLINK, LNET_INS_AFTER, &me_h); if (rc != 0) { CERROR("%s: LNetMEAttach failed x%llu/%d: rc = %d\n", desc->bd_import->imp_obd->obd_name, xid, posted_md, rc); break; } /* About to let the network at it... */ rc = LNetMDAttach(me_h, md, LNET_UNLINK, &desc->bd_mds[posted_md]); if (rc != 0) { CERROR("%s: LNetMDAttach failed x%llu/%d: rc = %d\n", desc->bd_import->imp_obd->obd_name, xid, posted_md, rc); rc2 = LNetMEUnlink(me_h); LASSERT(rc2 == 0); break; } } if (rc != 0) { LASSERT(rc == -ENOMEM); spin_lock(&desc->bd_lock); desc->bd_md_count -= total_md - posted_md; spin_unlock(&desc->bd_lock); LASSERT(desc->bd_md_count >= 0); mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); req->rq_status = -ENOMEM; return -ENOMEM; } /* Set rq_xid to matchbits of the final bulk so that server can * infer the number of bulks that were prepared */ req->rq_xid = --xid; LASSERTF(desc->bd_last_xid == (req->rq_xid & PTLRPC_BULK_OPS_MASK), "bd_last_xid = x%llu, rq_xid = x%llu\n", desc->bd_last_xid, req->rq_xid); spin_lock(&desc->bd_lock); /* Holler if peer manages to touch buffers before he knows the xid */ if (desc->bd_md_count != total_md) CWARN("%s: Peer %s touched %d buffers while I registered\n", desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer), total_md - desc->bd_md_count); spin_unlock(&desc->bd_lock); CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, xid x%#llx-%#llx, portal %u\n", desc->bd_md_count, desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink", desc->bd_iov_count, desc->bd_nob, desc->bd_last_xid, req->rq_xid, desc->bd_portal); return 0; }
int llog_setup(const struct lu_env *env, struct obd_device *obd, struct obd_llog_group *olg, int index, struct obd_device *disk_obd, struct llog_operations *op) { struct llog_ctxt *ctxt; int rc = 0; if (index < 0 || index >= LLOG_MAX_CTXTS) return -EINVAL; LASSERT(olg != NULL); ctxt = llog_new_ctxt(obd); if (!ctxt) return -ENOMEM; ctxt->loc_obd = obd; ctxt->loc_olg = olg; ctxt->loc_idx = index; ctxt->loc_logops = op; mutex_init(&ctxt->loc_mutex); ctxt->loc_exp = class_export_get(disk_obd->obd_self_export); ctxt->loc_flags = LLOG_CTXT_FLAG_UNINITIALIZED; rc = llog_group_set_ctxt(olg, ctxt, index); if (rc) { llog_ctxt_destroy(ctxt); if (rc == -EEXIST) { ctxt = llog_group_get_ctxt(olg, index); if (ctxt) { /* * mds_lov_update_desc() might call here multiple * times. So if the llog is already set up then * don't to do it again. */ CDEBUG(D_CONFIG, "obd %s ctxt %d already set up\n", obd->obd_name, index); LASSERT(ctxt->loc_olg == olg); LASSERT(ctxt->loc_obd == obd); LASSERT(ctxt->loc_exp == disk_obd->obd_self_export); LASSERT(ctxt->loc_logops == op); llog_ctxt_put(ctxt); } rc = 0; } return rc; } if (op->lop_setup) { if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LLOG_SETUP)) rc = -EOPNOTSUPP; else rc = op->lop_setup(env, obd, olg, index, disk_obd); } if (rc) { CERROR("%s: ctxt %d lop_setup=%p failed: rc = %d\n", obd->obd_name, index, op->lop_setup, rc); llog_group_clear_ctxt(olg, index); llog_ctxt_destroy(ctxt); } else { CDEBUG(D_CONFIG, "obd %s ctxt %d is initialized\n", obd->obd_name, index); ctxt->loc_flags &= ~LLOG_CTXT_FLAG_UNINITIALIZED; } return rc; }
/* TODO: handle requests in a similar way as MDT: see mdt_handle_common() */ static int ldlm_callback_handler(struct ptlrpc_request *req) { struct ldlm_namespace *ns; struct ldlm_request *dlm_req; struct ldlm_lock *lock; int rc; /* Requests arrive in sender's byte order. The ptlrpc service * handler has already checked and, if necessary, byte-swapped the * incoming request message body, but I am responsible for the * message buffers. */ /* do nothing for sec context finalize */ if (lustre_msg_get_opc(req->rq_reqmsg) == SEC_CTX_FINI) return 0; req_capsule_init(&req->rq_pill, req, RCL_SERVER); if (req->rq_export == NULL) { rc = ldlm_callback_reply(req, -ENOTCONN); ldlm_callback_errmsg(req, "Operate on unconnected server", rc, NULL); return 0; } LASSERT(req->rq_export != NULL); LASSERT(req->rq_export->exp_obd != NULL); switch (lustre_msg_get_opc(req->rq_reqmsg)) { case LDLM_BL_CALLBACK: if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET)) return 0; break; case LDLM_CP_CALLBACK: if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CP_CALLBACK_NET)) return 0; break; case LDLM_GL_CALLBACK: if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_GL_CALLBACK_NET)) return 0; break; case LDLM_SET_INFO: rc = ldlm_handle_setinfo(req); ldlm_callback_reply(req, rc); return 0; case OBD_LOG_CANCEL: /* remove this eventually - for 1.4.0 compat */ CERROR("shouldn't be handling OBD_LOG_CANCEL on DLM thread\n"); req_capsule_set(&req->rq_pill, &RQF_LOG_CANCEL); if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_NET)) return 0; rc = llog_origin_handle_cancel(req); if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_REP)) return 0; ldlm_callback_reply(req, rc); return 0; case LLOG_ORIGIN_HANDLE_CREATE: req_capsule_set(&req->rq_pill, &RQF_LLOG_ORIGIN_HANDLE_CREATE); if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET)) return 0; rc = llog_origin_handle_open(req); ldlm_callback_reply(req, rc); return 0; case LLOG_ORIGIN_HANDLE_NEXT_BLOCK: req_capsule_set(&req->rq_pill, &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK); if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET)) return 0; rc = llog_origin_handle_next_block(req); ldlm_callback_reply(req, rc); return 0; case LLOG_ORIGIN_HANDLE_READ_HEADER: req_capsule_set(&req->rq_pill, &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER); if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET)) return 0; rc = llog_origin_handle_read_header(req); ldlm_callback_reply(req, rc); return 0; case LLOG_ORIGIN_HANDLE_CLOSE: if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET)) return 0; rc = llog_origin_handle_close(req); ldlm_callback_reply(req, rc); return 0; case OBD_QC_CALLBACK: req_capsule_set(&req->rq_pill, &RQF_QC_CALLBACK); if (OBD_FAIL_CHECK(OBD_FAIL_OBD_QC_CALLBACK_NET)) return 0; rc = ldlm_handle_qc_callback(req); ldlm_callback_reply(req, rc); return 0; default: CERROR("unknown opcode %u\n", lustre_msg_get_opc(req->rq_reqmsg)); ldlm_callback_reply(req, -EPROTO); return 0; } ns = req->rq_export->exp_obd->obd_namespace; LASSERT(ns != NULL); req_capsule_set(&req->rq_pill, &RQF_LDLM_CALLBACK); dlm_req = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ); if (dlm_req == NULL) { rc = ldlm_callback_reply(req, -EPROTO); ldlm_callback_errmsg(req, "Operate without parameter", rc, NULL); return 0; } /* Force a known safe race, send a cancel to the server for a lock * which the server has already started a blocking callback on. */ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE) && lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) { rc = ldlm_cli_cancel(&dlm_req->lock_handle[0], 0); if (rc < 0) CERROR("ldlm_cli_cancel: %d\n", rc); } lock = ldlm_handle2lock_long(&dlm_req->lock_handle[0], 0); if (!lock) { CDEBUG(D_DLMTRACE, "callback on lock "LPX64" - lock " "disappeared\n", dlm_req->lock_handle[0].cookie); rc = ldlm_callback_reply(req, -EINVAL); ldlm_callback_errmsg(req, "Operate with invalid parameter", rc, &dlm_req->lock_handle[0]); return 0; } if ((lock->l_flags & LDLM_FL_FAIL_LOC) && lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); /* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */ lock_res_and_lock(lock); lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags & LDLM_AST_FLAGS); if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) { /* If somebody cancels lock and cache is already dropped, * or lock is failed before cp_ast received on client, * we can tell the server we have no lock. Otherwise, we * should send cancel after dropping the cache. */ if (((lock->l_flags & LDLM_FL_CANCELING) && (lock->l_flags & LDLM_FL_BL_DONE)) || (lock->l_flags & LDLM_FL_FAILED)) { LDLM_DEBUG(lock, "callback on lock " LPX64" - lock disappeared\n", dlm_req->lock_handle[0].cookie); unlock_res_and_lock(lock); LDLM_LOCK_RELEASE(lock); rc = ldlm_callback_reply(req, -EINVAL); ldlm_callback_errmsg(req, "Operate on stale lock", rc, &dlm_req->lock_handle[0]); return 0; } /* BL_AST locks are not needed in LRU. * Let ldlm_cancel_lru() be fast. */ ldlm_lock_remove_from_lru(lock); lock->l_flags |= LDLM_FL_BL_AST; } unlock_res_and_lock(lock); /* We want the ost thread to get this reply so that it can respond * to ost requests (write cache writeback) that might be triggered * in the callback. * * But we'd also like to be able to indicate in the reply that we're * cancelling right now, because it's unused, or have an intent result * in the reply, so we might have to push the responsibility for sending * the reply down into the AST handlers, alas. */ switch (lustre_msg_get_opc(req->rq_reqmsg)) { case LDLM_BL_CALLBACK: CDEBUG(D_INODE, "blocking ast\n"); req_capsule_extend(&req->rq_pill, &RQF_LDLM_BL_CALLBACK); if (!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)) { rc = ldlm_callback_reply(req, 0); if (req->rq_no_reply || rc) ldlm_callback_errmsg(req, "Normal process", rc, &dlm_req->lock_handle[0]); } if (ldlm_bl_to_thread_lock(ns, &dlm_req->lock_desc, lock)) ldlm_handle_bl_callback(ns, &dlm_req->lock_desc, lock); break; case LDLM_CP_CALLBACK: CDEBUG(D_INODE, "completion ast\n"); req_capsule_extend(&req->rq_pill, &RQF_LDLM_CP_CALLBACK); ldlm_callback_reply(req, 0); ldlm_handle_cp_callback(req, ns, dlm_req, lock); break; case LDLM_GL_CALLBACK: CDEBUG(D_INODE, "glimpse ast\n"); req_capsule_extend(&req->rq_pill, &RQF_LDLM_GL_CALLBACK); ldlm_handle_gl_callback(req, ns, dlm_req, lock); break; default: LBUG(); /* checked above */ } return 0; }
/** * Prepare bulk IO requests for processing. * * This function does initial checks of IO and calls corresponding * functions for read/write processing. * * \param[in] env execution environment * \param[in] cmd IO type (read/write) * \param[in] exp OBD export of client * \param[in] oa OBDO structure from request * \param[in] objcount always 1 * \param[in] obj object data * \param[in] rnb remote buffers * \param[in] nr_local number of local buffers * \param[in] lnb local buffers * * \retval 0 on successful prepare * \retval negative value on error */ int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rnb, int *nr_local, struct niobuf_local *lnb) { struct tgt_session_info *tsi = tgt_ses_info(env); struct ofd_device *ofd = ofd_exp(exp); struct ofd_thread_info *info; char *jobid; const struct lu_fid *fid = &oa->o_oi.oi_fid; int rc = 0; if (*nr_local > PTLRPC_MAX_BRW_PAGES) { CERROR("%s: bulk has too many pages %d, which exceeds the" "maximum pages per RPC of %d\n", exp->exp_obd->obd_name, *nr_local, PTLRPC_MAX_BRW_PAGES); RETURN(-EPROTO); } if (tgt_ses_req(tsi) == NULL) { /* echo client case */ info = ofd_info_init(env, exp); jobid = NULL; } else { info = tsi2ofd_info(tsi); jobid = tsi->tsi_jobid; } LASSERT(oa != NULL); if (OBD_FAIL_CHECK(OBD_FAIL_SRV_ENOENT)) { struct ofd_seq *oseq; oseq = ofd_seq_load(env, ofd, ostid_seq(&oa->o_oi)); if (IS_ERR(oseq)) { CERROR("%s: Can not find seq for "DOSTID ": rc = %ld\n", ofd_name(ofd), POSTID(&oa->o_oi), PTR_ERR(oseq)); RETURN(-EINVAL); } if (oseq->os_destroys_in_progress == 0) { /* don't fail lookups for orphan recovery, it causes * later LBUGs when objects still exist during * precreate */ ofd_seq_put(env, oseq); RETURN(-ENOENT); } ofd_seq_put(env, oseq); } LASSERT(objcount == 1); LASSERT(obj->ioo_bufcnt > 0); if (cmd == OBD_BRW_WRITE) { la_from_obdo(&info->fti_attr, oa, OBD_MD_FLGETATTR); rc = ofd_preprw_write(env, exp, ofd, fid, &info->fti_attr, oa, objcount, obj, rnb, nr_local, lnb, jobid); } else if (cmd == OBD_BRW_READ) { ofd_grant_prepare_read(env, exp, oa); rc = ofd_preprw_read(env, exp, ofd, fid, &info->fti_attr, oa, obj->ioo_bufcnt, rnb, nr_local, lnb, jobid); obdo_from_la(oa, &info->fti_attr, LA_ATIME); } else { CERROR("%s: wrong cmd %d received!\n", exp->exp_obd->obd_name, cmd); rc = -EPROTO; } RETURN(rc); }
/** * Send request \a request. * if \a noreply is set, don't expect any reply back and don't set up * reply buffers. * Returns 0 on success or error code. */ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) { int rc; int rc2; int mpflag = 0; struct ptlrpc_connection *connection; lnet_handle_me_t reply_me_h; lnet_md_t reply_md; struct obd_device *obd = request->rq_import->imp_obd; if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC)) return 0; LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST); LASSERT(request->rq_wait_ctx == 0); /* If this is a re-transmit, we're required to have disengaged * cleanly from the previous attempt */ LASSERT(!request->rq_receiving_reply); LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) && (request->rq_import->imp_state == LUSTRE_IMP_FULL))); if (unlikely(obd != NULL && obd->obd_fail)) { CDEBUG(D_HA, "muting rpc for failed imp obd %s\n", obd->obd_name); /* this prevents us from waiting in ptlrpc_queue_wait */ spin_lock(&request->rq_lock); request->rq_err = 1; spin_unlock(&request->rq_lock); request->rq_status = -ENODEV; return -ENODEV; } connection = request->rq_import->imp_connection; lustre_msg_set_handle(request->rq_reqmsg, &request->rq_import->imp_remote_handle); lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST); lustre_msg_set_conn_cnt(request->rq_reqmsg, request->rq_import->imp_conn_cnt); lustre_msghdr_set_flags(request->rq_reqmsg, request->rq_import->imp_msghdr_flags); if (request->rq_resend) lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT); if (request->rq_memalloc) mpflag = cfs_memory_pressure_get_and_set(); rc = sptlrpc_cli_wrap_request(request); if (rc) goto out; /* bulk register should be done after wrap_request() */ if (request->rq_bulk != NULL) { rc = ptlrpc_register_bulk(request); if (rc != 0) goto out; } if (!noreply) { LASSERT(request->rq_replen != 0); if (request->rq_repbuf == NULL) { LASSERT(request->rq_repdata == NULL); LASSERT(request->rq_repmsg == NULL); rc = sptlrpc_cli_alloc_repbuf(request, request->rq_replen); if (rc) { /* this prevents us from looping in * ptlrpc_queue_wait */ spin_lock(&request->rq_lock); request->rq_err = 1; spin_unlock(&request->rq_lock); request->rq_status = rc; goto cleanup_bulk; } } else { request->rq_repdata = NULL; request->rq_repmsg = NULL; } rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/ connection->c_peer, request->rq_xid, 0, LNET_UNLINK, LNET_INS_AFTER, &reply_me_h); if (rc != 0) { CERROR("LNetMEAttach failed: %d\n", rc); LASSERT(rc == -ENOMEM); rc = -ENOMEM; goto cleanup_bulk; } } spin_lock(&request->rq_lock); /* If the MD attach succeeds, there _will_ be a reply_in callback */ request->rq_receiving_reply = !noreply; request->rq_req_unlink = 1; /* We are responsible for unlinking the reply buffer */ request->rq_reply_unlink = !noreply; /* Clear any flags that may be present from previous sends. */ request->rq_replied = 0; request->rq_err = 0; request->rq_timedout = 0; request->rq_net_err = 0; request->rq_resend = 0; request->rq_restart = 0; request->rq_reply_truncate = 0; spin_unlock(&request->rq_lock); if (!noreply) { reply_md.start = request->rq_repbuf; reply_md.length = request->rq_repbuf_len; /* Allow multiple early replies */ reply_md.threshold = LNET_MD_THRESH_INF; /* Manage remote for early replies */ reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MANAGE_REMOTE | LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */ reply_md.user_ptr = &request->rq_reply_cbid; reply_md.eq_handle = ptlrpc_eq_h; /* We must see the unlink callback to unset rq_reply_unlink, so we can't auto-unlink */ rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN, &request->rq_reply_md_h); if (rc != 0) { CERROR("LNetMDAttach failed: %d\n", rc); LASSERT(rc == -ENOMEM); spin_lock(&request->rq_lock); /* ...but the MD attach didn't succeed... */ request->rq_receiving_reply = 0; spin_unlock(&request->rq_lock); rc = -ENOMEM; goto cleanup_me; } CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid %llu, portal %u\n", request->rq_repbuf_len, request->rq_xid, request->rq_reply_portal); } /* add references on request for request_out_callback */ ptlrpc_request_addref(request); if (obd != NULL && obd->obd_svc_stats != NULL) lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR, atomic_read(&request->rq_import->imp_inflight)); OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5); ktime_get_real_ts64(&request->rq_arrival_time); request->rq_sent = ktime_get_real_seconds(); /* We give the server rq_timeout secs to process the req, and add the network latency for our local timeout. */ request->rq_deadline = request->rq_sent + request->rq_timeout + ptlrpc_at_get_net_latency(request); ptlrpc_pinger_sending_on_import(request->rq_import); DEBUG_REQ(D_INFO, request, "send flg=%x", lustre_msg_get_flags(request->rq_reqmsg)); rc = ptl_send_buf(&request->rq_req_md_h, request->rq_reqbuf, request->rq_reqdata_len, LNET_NOACK_REQ, &request->rq_req_cbid, connection, request->rq_request_portal, request->rq_xid, 0); if (rc == 0) goto out; ptlrpc_req_finished(request); if (noreply) goto out; cleanup_me: /* MEUnlink is safe; the PUT didn't even get off the ground, and * nobody apart from the PUT's target has the right nid+XID to * access the reply buffer. */ rc2 = LNetMEUnlink(reply_me_h); LASSERT(rc2 == 0); /* UNLINKED callback called synchronously */ LASSERT(!request->rq_receiving_reply); cleanup_bulk: /* We do sync unlink here as there was no real transfer here so * the chance to have long unlink to sluggish net is smaller here. */ ptlrpc_unregister_bulk(request, 0); out: if (request->rq_memalloc) cfs_memory_pressure_restore(mpflag); return rc; }
/** * Set attributes of object during write bulk IO processing. * * Change object attributes and write parent FID into extended * attributes when needed. * * \param[in] env execution environment * \param[in] ofd OFD device * \param[in] ofd_obj OFD object * \param[in] la object attributes * \param[in] ff parent FID * * \retval 0 on successful attributes update * \retval negative value on error */ static int ofd_write_attr_set(const struct lu_env *env, struct ofd_device *ofd, struct ofd_object *ofd_obj, struct lu_attr *la, struct filter_fid *ff) { struct ofd_thread_info *info = ofd_info(env); __u64 valid = la->la_valid; int rc; struct thandle *th; struct dt_object *dt_obj; int ff_needed = 0; ENTRY; LASSERT(la); dt_obj = ofd_object_child(ofd_obj); LASSERT(dt_obj != NULL); la->la_valid &= LA_UID | LA_GID; rc = ofd_attr_handle_ugid(env, ofd_obj, la, 0 /* !is_setattr */); if (rc != 0) GOTO(out, rc); if (ff != NULL) { rc = ofd_object_ff_load(env, ofd_obj); if (rc == -ENODATA) ff_needed = 1; else if (rc < 0) GOTO(out, rc); } if (!la->la_valid && !ff_needed) /* no attributes to set */ GOTO(out, rc = 0); th = ofd_trans_create(env, ofd); if (IS_ERR(th)) GOTO(out, rc = PTR_ERR(th)); if (la->la_valid) { rc = dt_declare_attr_set(env, dt_obj, la, th); if (rc) GOTO(out_tx, rc); } if (ff_needed) { if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_UNMATCHED_PAIR1)) ff->ff_parent.f_oid = cpu_to_le32(1UL << 31); if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_UNMATCHED_PAIR2)) ff->ff_parent.f_oid = cpu_to_le32(le32_to_cpu(ff->ff_parent.f_oid) - 1); info->fti_buf.lb_buf = ff; info->fti_buf.lb_len = sizeof(*ff); rc = dt_declare_xattr_set(env, dt_obj, &info->fti_buf, XATTR_NAME_FID, 0, th); if (rc) GOTO(out_tx, rc); } /* We don't need a transno for this operation which will be re-executed * anyway when the OST_WRITE (with a transno assigned) is replayed */ rc = dt_trans_start_local(env, ofd->ofd_osd , th); if (rc) GOTO(out_tx, rc); /* set uid/gid */ if (la->la_valid) { rc = dt_attr_set(env, dt_obj, la, th); if (rc) GOTO(out_tx, rc); } /* set filter fid EA */ if (ff_needed) { if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NOPFID)) GOTO(out_tx, rc); rc = dt_xattr_set(env, dt_obj, &info->fti_buf, XATTR_NAME_FID, 0, th); if (rc == 0) { ofd_obj->ofo_pfid.f_seq = le64_to_cpu(ff->ff_parent.f_seq); ofd_obj->ofo_pfid.f_oid = le32_to_cpu(ff->ff_parent.f_oid); /* Currently, the filter_fid::ff_parent::f_ver is not * the real parent MDT-object's FID::f_ver, instead it * is the OST-object index in its parent MDT-object's * layout EA. */ ofd_obj->ofo_pfid.f_stripe_idx = le32_to_cpu(ff->ff_parent.f_stripe_idx); } } GOTO(out_tx, rc); out_tx: dt_trans_stop(env, ofd->ofd_osd, th); out: la->la_valid = valid; return rc; }