int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data, struct ptlrpc_request **request) { CFS_LIST_HEAD(cancels); struct obd_device *obd = class_exp2obd(exp); struct ptlrpc_request *req = *request; int count = 0, rc; ENTRY; LASSERT(req == NULL); if ((op_data->op_flags & MF_MDC_CANCEL_FID1) && (fid_is_sane(&op_data->op_fid1)) && !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET)) count = mdc_resource_get_unused(exp, &op_data->op_fid1, &cancels, LCK_EX, MDS_INODELOCK_UPDATE); if ((op_data->op_flags & MF_MDC_CANCEL_FID3) && (fid_is_sane(&op_data->op_fid3)) && !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET)) count += mdc_resource_get_unused(exp, &op_data->op_fid3, &cancels, LCK_EX, MDS_INODELOCK_FULL); req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_REINT_UNLINK); if (req == NULL) { ldlm_lock_list_put(&cancels, l_bl_ast, count); RETURN(-ENOMEM); } mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1); req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, op_data->op_namelen + 1); rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count); if (rc) { ptlrpc_request_free(req); RETURN(rc); } mdc_unlink_pack(req, op_data); req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, obd->u.cli.cl_max_mds_easize); req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER, obd->u.cli.cl_max_mds_cookiesize); ptlrpc_request_set_replen(req); *request = req; rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL); if (rc == -ERESTARTSYS) rc = 0; RETURN(rc); }
int mdc_link(struct obd_export *exp, struct md_op_data *op_data, struct ptlrpc_request **request) { CFS_LIST_HEAD(cancels); struct obd_device *obd = exp->exp_obd; struct ptlrpc_request *req; int count = 0, rc; ENTRY; if ((op_data->op_flags & MF_MDC_CANCEL_FID2) && (fid_is_sane(&op_data->op_fid2))) count = mdc_resource_get_unused(exp, &op_data->op_fid2, &cancels, LCK_EX, MDS_INODELOCK_UPDATE); if ((op_data->op_flags & MF_MDC_CANCEL_FID1) && (fid_is_sane(&op_data->op_fid1))) count += mdc_resource_get_unused(exp, &op_data->op_fid1, &cancels, LCK_EX, MDS_INODELOCK_UPDATE); req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_REINT_LINK); if (req == NULL) { ldlm_lock_list_put(&cancels, l_bl_ast, count); RETURN(-ENOMEM); } mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1); mdc_set_capa_size(req, &RMF_CAPA2, op_data->op_capa2); req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, op_data->op_namelen + 1); rc = mdc_prep_elc_req(exp, req, &cancels, count); if (rc) { ptlrpc_request_free(req); RETURN(rc); } mdc_link_pack(req, op_data); ptlrpc_request_set_replen(req); rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL); *request = req; if (rc == -ERESTARTSYS) rc = 0; RETURN(rc); }
/** * Create a memory descriptor and attach it to a ME * * \param meh A handle for a ME to associate the new MD with. * \param umd Provides initial values for the user-visible parts of a MD. * Other than its use for initialization, there is no linkage between this * structure and the MD maintained by the LNet. * \param unlink A flag to indicate whether the MD is automatically unlinked * when it becomes inactive, either because the operation threshold drops to * zero or because the available memory becomes less than \a umd.max_size. * (Note that the check for unlinking a MD only occurs after the completion * of a successful operation on the MD.) The value LNET_UNLINK enables auto * unlinking; the value LNET_RETAIN disables it. * \param handle On successful returns, a handle to the newly created MD is * saved here. This handle can be used later in LNetMDUnlink(). * * \retval 0 On success. * \retval -EINVAL If \a umd is not valid. * \retval -ENOMEM If new MD cannot be allocated. * \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a * valid object. Note that it's OK to supply a NULL \a umd.eq_handle by * calling LNetInvalidateHandle() on it. * \retval -EBUSY If the ME pointed to by \a meh is already associated with * a MD. */ int LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle) { CFS_LIST_HEAD (matches); CFS_LIST_HEAD (drops); struct lnet_me *me; struct lnet_libmd *md; int cpt; int rc; LASSERT (the_lnet.ln_init); LASSERT (the_lnet.ln_refcount > 0); if (lnet_md_validate(&umd) != 0) return -EINVAL; if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) == 0) { CERROR("Invalid option: no MD_OP set\n"); return -EINVAL; } md = lnet_md_alloc(&umd); if (md == NULL) return -ENOMEM; rc = lnet_md_build(md, &umd, unlink); cpt = lnet_cpt_of_cookie(meh.cookie); lnet_res_lock(cpt); if (rc != 0) goto failed; me = lnet_handle2me(&meh); if (me == NULL) rc = -ENOENT; else if (me->me_md != NULL) rc = -EBUSY; else rc = lnet_md_link(md, umd.eq_handle, cpt); if (rc != 0) goto failed; /* attach this MD to portal of ME and check if it matches any * blocked msgs on this portal */ lnet_ptl_attach_md(me, md, &matches, &drops); lnet_md2handle(handle, md); lnet_res_unlock(cpt); lnet_drop_delayed_msg_list(&drops, "Bad match"); lnet_recv_delayed_msg_list(&matches); return 0; failed: lnet_md_free_locked(md); lnet_res_unlock(cpt); return rc; }
static int lcw_dispatch_main(void *data) { int rc = 0; unsigned long flags; struct lc_watchdog *lcw; CFS_LIST_HEAD (zombies); ENTRY; cfs_daemonize("lc_watchdogd"); SIGNAL_MASK_LOCK(current, flags); sigfillset(¤t->blocked); RECALC_SIGPENDING; SIGNAL_MASK_UNLOCK(current, flags); cfs_complete(&lcw_start_completion); while (1) { int dumplog = 1; cfs_wait_event_interruptible(lcw_event_waitq, is_watchdog_fired(), rc); CDEBUG(D_INFO, "Watchdog got woken up...\n"); if (cfs_test_bit(LCW_FLAG_STOP, &lcw_flags)) { CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n"); cfs_spin_lock_bh(&lcw_pending_timers_lock); rc = !cfs_list_empty(&lcw_pending_timers); cfs_spin_unlock_bh(&lcw_pending_timers_lock); if (rc) { CERROR("pending timers list was not empty at " "time of watchdog dispatch shutdown\n"); } break; } cfs_spin_lock_bh(&lcw_pending_timers_lock); while (!cfs_list_empty(&lcw_pending_timers)) { int is_dumplog; lcw = cfs_list_entry(lcw_pending_timers.next, struct lc_watchdog, lcw_list); /* +1 ref for callback to make sure lwc wouldn't be * deleted after releasing lcw_pending_timers_lock */ lcw->lcw_refcount++; cfs_spin_unlock_bh(&lcw_pending_timers_lock); /* lock ordering */ cfs_spin_lock_bh(&lcw->lcw_lock); cfs_spin_lock_bh(&lcw_pending_timers_lock); if (cfs_list_empty(&lcw->lcw_list)) { /* already removed from pending list */ lcw->lcw_refcount--; /* -1 ref for callback */ if (lcw->lcw_refcount == 0) cfs_list_add(&lcw->lcw_list, &zombies); cfs_spin_unlock_bh(&lcw->lcw_lock); /* still hold lcw_pending_timers_lock */ continue; } cfs_list_del_init(&lcw->lcw_list); lcw->lcw_refcount--; /* -1 ref for pending list */ cfs_spin_unlock_bh(&lcw_pending_timers_lock); cfs_spin_unlock_bh(&lcw->lcw_lock); CDEBUG(D_INFO, "found lcw for pid " LPPID "\n", lcw->lcw_pid); lcw_dump_stack(lcw); is_dumplog = lcw->lcw_callback == lc_watchdog_dumplog; if (lcw->lcw_state != LC_WATCHDOG_DISABLED && (dumplog || !is_dumplog)) { lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data); if (dumplog && is_dumplog) dumplog = 0; } cfs_spin_lock_bh(&lcw_pending_timers_lock); lcw->lcw_refcount--; /* -1 ref for callback */ if (lcw->lcw_refcount == 0) cfs_list_add(&lcw->lcw_list, &zombies); } cfs_spin_unlock_bh(&lcw_pending_timers_lock); while (!cfs_list_empty(&zombies)) { lcw = cfs_list_entry(lcw_pending_timers.next, struct lc_watchdog, lcw_list); cfs_list_del(&lcw->lcw_list); LIBCFS_FREE(lcw, sizeof(*lcw)); } } cfs_complete(&lcw_stop_completion); RETURN(rc); }
int mdc_create(struct obd_export *exp, struct md_op_data *op_data, const void *data, int datalen, int mode, __u32 uid, __u32 gid, cfs_cap_t cap_effective, __u64 rdev, struct ptlrpc_request **request) { struct ptlrpc_request *req; int level, rc; int count = 0; CFS_LIST_HEAD(cancels); ENTRY; /* For case if upper layer did not alloc fid, do it now. */ if (!fid_is_sane(&op_data->op_fid2)) { /* * mdc_fid_alloc() may return errno 1 in case of switch to new * sequence, handle this. */ rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data); if (rc < 0) { CERROR("Can't alloc new fid, rc %d\n", rc); RETURN(rc); } } if ((op_data->op_flags & MF_MDC_CANCEL_FID1) && (fid_is_sane(&op_data->op_fid1))) count = mdc_resource_get_unused(exp, &op_data->op_fid1, &cancels, LCK_EX, MDS_INODELOCK_UPDATE); req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_REINT_CREATE_RMT_ACL); if (req == NULL) { ldlm_lock_list_put(&cancels, l_bl_ast, count); RETURN(-ENOMEM); } mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1); req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, op_data->op_namelen + 1); req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, data && datalen ? datalen : 0); rc = mdc_prep_elc_req(exp, req, &cancels, count); if (rc) { ptlrpc_request_free(req); RETURN(rc); } /* * mdc_create_pack() fills msg->bufs[1] with name and msg->bufs[2] with * tgt, for symlinks or lov MD data. */ mdc_create_pack(req, op_data, data, datalen, mode, uid, gid, cap_effective, rdev); ptlrpc_request_set_replen(req); level = LUSTRE_IMP_FULL; resend: rc = mdc_reint(req, exp->exp_obd->u.cli.cl_rpc_lock, level); /* Resend if we were told to. */ if (rc == -ERESTARTSYS) { level = LUSTRE_IMP_RECOVER; goto resend; } else if (rc == 0) { struct mdt_body *body; struct lustre_capa *capa; body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); LASSERT(body); if (body->valid & OBD_MD_FLMDSCAPA) { capa = req_capsule_server_get(&req->rq_pill, &RMF_CAPA1); if (capa == NULL) rc = -EPROTO; } } *request = req; RETURN(rc); }
/* If mdc_setattr is called with an 'iattr', then it is a normal RPC that * should take the normal semaphore and go to the normal portal. * * If it is called with iattr->ia_valid & ATTR_FROM_OPEN, then it is a * magic open-path setattr that should take the setattr semaphore and * go to the setattr portal. */ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, void *ea, int ealen, void *ea2, int ea2len, struct ptlrpc_request **request, struct md_open_data **mod) { CFS_LIST_HEAD(cancels); struct ptlrpc_request *req; struct mdc_rpc_lock *rpc_lock; struct obd_device *obd = exp->exp_obd; int count = 0, rc; __u64 bits; ENTRY; LASSERT(op_data != NULL); bits = MDS_INODELOCK_UPDATE; if (op_data->op_attr.ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) bits |= MDS_INODELOCK_LOOKUP; if ((op_data->op_flags & MF_MDC_CANCEL_FID1) && (fid_is_sane(&op_data->op_fid1))) count = mdc_resource_get_unused(exp, &op_data->op_fid1, &cancels, LCK_EX, bits); req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_REINT_SETATTR); if (req == NULL) { ldlm_lock_list_put(&cancels, l_bl_ast, count); RETURN(-ENOMEM); } mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1); if ((op_data->op_flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN)) == 0) req_capsule_set_size(&req->rq_pill, &RMF_MDT_EPOCH, RCL_CLIENT, 0); req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, ealen); req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_CLIENT, ea2len); rc = mdc_prep_elc_req(exp, req, &cancels, count); if (rc) { ptlrpc_request_free(req); RETURN(rc); } if (op_data->op_attr.ia_valid & ATTR_FROM_OPEN) { req->rq_request_portal = MDS_SETATTR_PORTAL; ptlrpc_at_set_req_timeout(req); rpc_lock = obd->u.cli.cl_setattr_lock; } else { rpc_lock = obd->u.cli.cl_rpc_lock; } if (op_data->op_attr.ia_valid & (ATTR_MTIME | ATTR_CTIME)) CDEBUG(D_INODE, "setting mtime "CFS_TIME_T ", ctime "CFS_TIME_T"\n", LTIME_S(op_data->op_attr.ia_mtime), LTIME_S(op_data->op_attr.ia_ctime)); mdc_setattr_pack(req, op_data, ea, ealen, ea2, ea2len); ptlrpc_request_set_replen(req); if (mod && (op_data->op_flags & MF_EPOCH_OPEN) && req->rq_import->imp_replayable) { LASSERT(*mod == NULL); *mod = obd_mod_alloc(); if (*mod == NULL) { DEBUG_REQ(D_ERROR, req, "Can't allocate " "md_open_data"); } else { req->rq_replay = 1; req->rq_cb_data = *mod; (*mod)->mod_open_req = req; req->rq_commit_cb = mdc_commit_open; /** * Take an extra reference on \var mod, it protects \var * mod from being freed on eviction (commit callback is * called despite rq_replay flag). * Will be put on mdc_done_writing(). */ obd_mod_get(*mod); } } rc = mdc_reint(req, rpc_lock, LUSTRE_IMP_FULL); /* Save the obtained info in the original RPC for the replay case. */ if (rc == 0 && (op_data->op_flags & MF_EPOCH_OPEN)) { struct mdt_ioepoch *epoch; struct mdt_body *body; epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH); body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); LASSERT(epoch != NULL); LASSERT(body != NULL); epoch->handle = body->handle; epoch->ioepoch = body->ioepoch; req->rq_replay_cb = mdc_replay_open; /** bug 3633, open may be committed and estale answer is not error */ } else if (rc == -ESTALE && (op_data->op_flags & MF_SOM_CHANGE)) { rc = 0; } else if (rc == -ERESTARTSYS) { rc = 0; } *request = req; if (rc && req->rq_commit_cb) { /* Put an extra reference on \var mod on error case. */ obd_mod_put(*mod); req->rq_commit_cb(req); } RETURN(rc); }
int mdc_create(struct obd_export *exp, struct md_op_data *op_data, const void *data, int datalen, int mode, __u32 uid, __u32 gid, cfs_cap_t cap_effective, __u64 rdev, struct ptlrpc_request **request) { struct ptlrpc_request *req; int level, rc; int count, resends = 0; struct obd_import *import = exp->exp_obd->u.cli.cl_import; int generation = import->imp_generation; CFS_LIST_HEAD(cancels); ENTRY; /* For case if upper layer did not alloc fid, do it now. */ if (!fid_is_sane(&op_data->op_fid2)) { /* * mdc_fid_alloc() may return errno 1 in case of switch to new * sequence, handle this. */ rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data); if (rc < 0) { CERROR("Can't alloc new fid, rc %d\n", rc); RETURN(rc); } } rebuild: count = 0; if ((op_data->op_flags & MF_MDC_CANCEL_FID1) && (fid_is_sane(&op_data->op_fid1))) count = mdc_resource_get_unused(exp, &op_data->op_fid1, &cancels, LCK_EX, MDS_INODELOCK_UPDATE); req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_REINT_CREATE_RMT_ACL); if (req == NULL) { ldlm_lock_list_put(&cancels, l_bl_ast, count); RETURN(-ENOMEM); } mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1); req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, op_data->op_namelen + 1); req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, data && datalen ? datalen : 0); rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count); if (rc) { ptlrpc_request_free(req); RETURN(rc); } /* * mdc_create_pack() fills msg->bufs[1] with name and msg->bufs[2] with * tgt, for symlinks or lov MD data. */ mdc_create_pack(req, op_data, data, datalen, mode, uid, gid, cap_effective, rdev); ptlrpc_request_set_replen(req); /* ask ptlrpc not to resend on EINPROGRESS since we have our own retry * logic here */ req->rq_no_retry_einprogress = 1; if (resends) { req->rq_generation_set = 1; req->rq_import_generation = generation; req->rq_sent = cfs_time_current_sec() + resends; } level = LUSTRE_IMP_FULL; resend: rc = mdc_reint(req, exp->exp_obd->u.cli.cl_rpc_lock, level); /* Resend if we were told to. */ if (rc == -ERESTARTSYS) { level = LUSTRE_IMP_RECOVER; goto resend; } else if (rc == -EINPROGRESS) { /* Retry create infinitely until succeed or get other * error code. */ ptlrpc_req_finished(req); resends++; CDEBUG(D_HA, "%s: resend:%d create on "DFID"/"DFID"\n", exp->exp_obd->obd_name, resends, PFID(&op_data->op_fid1), PFID(&op_data->op_fid2)); if (generation == import->imp_generation) { goto rebuild; } else { CDEBUG(D_HA, "resend cross eviction\n"); RETURN(-EIO); } } else if (rc == 0) { struct mdt_body *body; struct lustre_capa *capa; body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); LASSERT(body); if (body->valid & OBD_MD_FLMDSCAPA) { capa = req_capsule_server_get(&req->rq_pill, &RMF_CAPA1); if (capa == NULL) rc = -EPROTO; } } *request = req; RETURN(rc); }
/** * An implementation of cl_io_operations::cio_io_submit() method for osc * layer. Iterates over pages in the in-queue, prepares each for io by calling * cl_page_prep() and then either submits them through osc_io_submit_page() * or, if page is already submitted, changes osc flags through * osc_set_async_flags(). */ static int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios, enum cl_req_type crt, struct cl_2queue *queue) { struct cl_page *page; struct cl_page *tmp; struct client_obd *cli = NULL; struct osc_object *osc = NULL; /* to keep gcc happy */ struct osc_page *opg; struct cl_io *io; CFS_LIST_HEAD (list); struct cl_page_list *qin = &queue->c2_qin; struct cl_page_list *qout = &queue->c2_qout; int queued = 0; int result = 0; int cmd; int brw_flags; int max_pages; LASSERT(qin->pl_nr > 0); CDEBUG(D_CACHE, "%d %d\n", qin->pl_nr, crt); osc = cl2osc(ios->cis_obj); cli = osc_cli(osc); max_pages = cli->cl_max_pages_per_rpc; cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ; brw_flags = osc_io_srvlock(cl2osc_io(env, ios)) ? OBD_BRW_SRVLOCK : 0; /* * NOTE: here @page is a top-level page. This is done to avoid * creation of sub-page-list. */ cl_page_list_for_each_safe(page, tmp, qin) { struct osc_async_page *oap; /* Top level IO. */ io = page->cp_owner; LASSERT(io != NULL); opg = osc_cl_page_osc(page, osc); oap = &opg->ops_oap; LASSERT(osc == oap->oap_obj); if (!cfs_list_empty(&oap->oap_pending_item) || !cfs_list_empty(&oap->oap_rpc_item)) { CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n", oap, opg); result = -EBUSY; break; } result = cl_page_prep(env, io, page, crt); if (result != 0) { LASSERT(result < 0); if (result != -EALREADY) break; /* * Handle -EALREADY error: for read case, the page is * already in UPTODATE state; for write, the page * is not dirty. */ result = 0; continue; } cl_page_list_move(qout, qin, page); spin_lock(&oap->oap_lock); oap->oap_async_flags = ASYNC_URGENT|ASYNC_READY; oap->oap_async_flags |= ASYNC_COUNT_STABLE; spin_unlock(&oap->oap_lock); osc_page_submit(env, opg, crt, brw_flags); cfs_list_add_tail(&oap->oap_pending_item, &list); if (++queued == max_pages) { queued = 0; result = osc_queue_sync_pages(env, osc, &list, cmd, brw_flags); if (result < 0) break; } } if (queued > 0) result = osc_queue_sync_pages(env, osc, &list, cmd, brw_flags); CDEBUG(D_INFO, "%d/%d %d\n", qin->pl_nr, qout->pl_nr, result); return qout->pl_nr > 0 ? 0 : result; }