Ejemplo n.º 1
0
int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
               struct ptlrpc_request **request)
{
        CFS_LIST_HEAD(cancels);
        struct obd_device *obd = class_exp2obd(exp);
        struct ptlrpc_request *req = *request;
        int count = 0, rc;
        ENTRY;

        LASSERT(req == NULL);

	if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
	    (fid_is_sane(&op_data->op_fid1)) &&
	    !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
		count = mdc_resource_get_unused(exp, &op_data->op_fid1,
						&cancels, LCK_EX,
						MDS_INODELOCK_UPDATE);
	if ((op_data->op_flags & MF_MDC_CANCEL_FID3) &&
	    (fid_is_sane(&op_data->op_fid3)) &&
	    !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
		count += mdc_resource_get_unused(exp, &op_data->op_fid3,
						 &cancels, LCK_EX,
						 MDS_INODELOCK_FULL);
        req = ptlrpc_request_alloc(class_exp2cliimp(exp),
                                   &RQF_MDS_REINT_UNLINK);
        if (req == NULL) {
                ldlm_lock_list_put(&cancels, l_bl_ast, count);
                RETURN(-ENOMEM);
        }
        mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
        req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
                             op_data->op_namelen + 1);

	rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
	if (rc) {
		ptlrpc_request_free(req);
		RETURN(rc);
	}

        mdc_unlink_pack(req, op_data);

        req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
                             obd->u.cli.cl_max_mds_easize);
        req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER,
                             obd->u.cli.cl_max_mds_cookiesize);
        ptlrpc_request_set_replen(req);

        *request = req;

        rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL);
        if (rc == -ERESTARTSYS)
                rc = 0;
        RETURN(rc);
}
Ejemplo n.º 2
0
int mdc_link(struct obd_export *exp, struct md_op_data *op_data,
             struct ptlrpc_request **request)
{
        CFS_LIST_HEAD(cancels);
        struct obd_device *obd = exp->exp_obd;
        struct ptlrpc_request *req;
        int count = 0, rc;
        ENTRY;

        if ((op_data->op_flags & MF_MDC_CANCEL_FID2) &&
            (fid_is_sane(&op_data->op_fid2)))
                count = mdc_resource_get_unused(exp, &op_data->op_fid2,
                                                &cancels, LCK_EX,
                                                MDS_INODELOCK_UPDATE);
        if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
            (fid_is_sane(&op_data->op_fid1)))
                count += mdc_resource_get_unused(exp, &op_data->op_fid1,
                                                 &cancels, LCK_EX,
                                                 MDS_INODELOCK_UPDATE);

        req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_REINT_LINK);
        if (req == NULL) {
                ldlm_lock_list_put(&cancels, l_bl_ast, count);
                RETURN(-ENOMEM);
        }
        mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
        mdc_set_capa_size(req, &RMF_CAPA2, op_data->op_capa2);
        req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
                             op_data->op_namelen + 1);

        rc = mdc_prep_elc_req(exp, req, &cancels, count);
        if (rc) {
                ptlrpc_request_free(req);
                RETURN(rc);
        }

        mdc_link_pack(req, op_data);
        ptlrpc_request_set_replen(req);

        rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL);
        *request = req;
        if (rc == -ERESTARTSYS)
                rc = 0;

        RETURN(rc);
}
Ejemplo n.º 3
0
/**
 * Create a memory descriptor and attach it to a ME
 *
 * \param meh A handle for a ME to associate the new MD with.
 * \param umd Provides initial values for the user-visible parts of a MD.
 * Other than its use for initialization, there is no linkage between this
 * structure and the MD maintained by the LNet.
 * \param unlink A flag to indicate whether the MD is automatically unlinked
 * when it becomes inactive, either because the operation threshold drops to
 * zero or because the available memory becomes less than \a umd.max_size.
 * (Note that the check for unlinking a MD only occurs after the completion
 * of a successful operation on the MD.) The value LNET_UNLINK enables auto
 * unlinking; the value LNET_RETAIN disables it.
 * \param handle On successful returns, a handle to the newly created MD is
 * saved here. This handle can be used later in LNetMDUnlink().
 *
 * \retval 0       On success.
 * \retval -EINVAL If \a umd is not valid.
 * \retval -ENOMEM If new MD cannot be allocated.
 * \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a
 * valid object. Note that it's OK to supply a NULL \a umd.eq_handle by
 * calling LNetInvalidateHandle() on it.
 * \retval -EBUSY  If the ME pointed to by \a meh is already associated with
 * a MD.
 */
int
LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
	     lnet_unlink_t unlink, lnet_handle_md_t *handle)
{
	CFS_LIST_HEAD		(matches);
	CFS_LIST_HEAD		(drops);
	struct lnet_me		*me;
	struct lnet_libmd	*md;
	int			cpt;
	int			rc;

        LASSERT (the_lnet.ln_init);
        LASSERT (the_lnet.ln_refcount > 0);

        if (lnet_md_validate(&umd) != 0)
                return -EINVAL;

        if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) == 0) {
                CERROR("Invalid option: no MD_OP set\n");
                return -EINVAL;
        }

        md = lnet_md_alloc(&umd);
        if (md == NULL)
                return -ENOMEM;

	rc = lnet_md_build(md, &umd, unlink);
	cpt = lnet_cpt_of_cookie(meh.cookie);

	lnet_res_lock(cpt);
	if (rc != 0)
		goto failed;

	me = lnet_handle2me(&meh);
	if (me == NULL)
		rc = -ENOENT;
	else if (me->me_md != NULL)
		rc = -EBUSY;
	else
		rc = lnet_md_link(md, umd.eq_handle, cpt);

	if (rc != 0)
		goto failed;

	/* attach this MD to portal of ME and check if it matches any
	 * blocked msgs on this portal */
	lnet_ptl_attach_md(me, md, &matches, &drops);

	lnet_md2handle(handle, md);

	lnet_res_unlock(cpt);

	lnet_drop_delayed_msg_list(&drops, "Bad match");
	lnet_recv_delayed_msg_list(&matches);

	return 0;

 failed:
	lnet_md_free_locked(md);

	lnet_res_unlock(cpt);
	return rc;
}
Ejemplo n.º 4
0
static int lcw_dispatch_main(void *data)
{
        int                 rc = 0;
        unsigned long       flags;
        struct lc_watchdog *lcw;
        CFS_LIST_HEAD      (zombies);

        ENTRY;

        cfs_daemonize("lc_watchdogd");

        SIGNAL_MASK_LOCK(current, flags);
        sigfillset(&current->blocked);
        RECALC_SIGPENDING;
        SIGNAL_MASK_UNLOCK(current, flags);

        cfs_complete(&lcw_start_completion);

        while (1) {
                int dumplog = 1;

                cfs_wait_event_interruptible(lcw_event_waitq,
                                             is_watchdog_fired(), rc);
                CDEBUG(D_INFO, "Watchdog got woken up...\n");
                if (cfs_test_bit(LCW_FLAG_STOP, &lcw_flags)) {
                        CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n");

                        cfs_spin_lock_bh(&lcw_pending_timers_lock);
                        rc = !cfs_list_empty(&lcw_pending_timers);
                        cfs_spin_unlock_bh(&lcw_pending_timers_lock);
                        if (rc) {
                                CERROR("pending timers list was not empty at "
                                       "time of watchdog dispatch shutdown\n");
                        }
                        break;
                }

                cfs_spin_lock_bh(&lcw_pending_timers_lock);
                while (!cfs_list_empty(&lcw_pending_timers)) {
                        int is_dumplog;

                        lcw = cfs_list_entry(lcw_pending_timers.next,
                                             struct lc_watchdog, lcw_list);
                        /* +1 ref for callback to make sure lwc wouldn't be
                         * deleted after releasing lcw_pending_timers_lock */
                        lcw->lcw_refcount++;
                        cfs_spin_unlock_bh(&lcw_pending_timers_lock);

                        /* lock ordering */
                        cfs_spin_lock_bh(&lcw->lcw_lock);
                        cfs_spin_lock_bh(&lcw_pending_timers_lock);

                        if (cfs_list_empty(&lcw->lcw_list)) {
                                /* already removed from pending list */
                                lcw->lcw_refcount--; /* -1 ref for callback */
                                if (lcw->lcw_refcount == 0)
                                        cfs_list_add(&lcw->lcw_list, &zombies);
                                cfs_spin_unlock_bh(&lcw->lcw_lock);
                                /* still hold lcw_pending_timers_lock */
                                continue;
                        }

                        cfs_list_del_init(&lcw->lcw_list);
                        lcw->lcw_refcount--; /* -1 ref for pending list */

                        cfs_spin_unlock_bh(&lcw_pending_timers_lock);
                        cfs_spin_unlock_bh(&lcw->lcw_lock);

                        CDEBUG(D_INFO, "found lcw for pid " LPPID "\n",
                               lcw->lcw_pid);
                        lcw_dump_stack(lcw);

                        is_dumplog = lcw->lcw_callback == lc_watchdog_dumplog;
                        if (lcw->lcw_state != LC_WATCHDOG_DISABLED &&
                            (dumplog || !is_dumplog)) {
                                lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data);
                                if (dumplog && is_dumplog)
                                        dumplog = 0;
                        }

                        cfs_spin_lock_bh(&lcw_pending_timers_lock);
                        lcw->lcw_refcount--; /* -1 ref for callback */
                        if (lcw->lcw_refcount == 0)
                                cfs_list_add(&lcw->lcw_list, &zombies);
                }
                cfs_spin_unlock_bh(&lcw_pending_timers_lock);

                while (!cfs_list_empty(&zombies)) {
                        lcw = cfs_list_entry(lcw_pending_timers.next,
                                         struct lc_watchdog, lcw_list);
                        cfs_list_del(&lcw->lcw_list);
                        LIBCFS_FREE(lcw, sizeof(*lcw));
                }
        }

        cfs_complete(&lcw_stop_completion);

        RETURN(rc);
}
Ejemplo n.º 5
0
int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
               const void *data, int datalen, int mode, __u32 uid, __u32 gid,
               cfs_cap_t cap_effective, __u64 rdev,
               struct ptlrpc_request **request)
{
        struct ptlrpc_request *req;
        int level, rc;
        int count = 0;
        CFS_LIST_HEAD(cancels);
        ENTRY;

        /* For case if upper layer did not alloc fid, do it now. */
        if (!fid_is_sane(&op_data->op_fid2)) {
                /*
                 * mdc_fid_alloc() may return errno 1 in case of switch to new
                 * sequence, handle this.
                 */
                rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data);
                if (rc < 0) {
                        CERROR("Can't alloc new fid, rc %d\n", rc);
                        RETURN(rc);
                }
        }

        if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
            (fid_is_sane(&op_data->op_fid1)))
                count = mdc_resource_get_unused(exp, &op_data->op_fid1,
                                                &cancels, LCK_EX,
                                                MDS_INODELOCK_UPDATE);

        req = ptlrpc_request_alloc(class_exp2cliimp(exp),
                                   &RQF_MDS_REINT_CREATE_RMT_ACL);
        if (req == NULL) {
                ldlm_lock_list_put(&cancels, l_bl_ast, count);
                RETURN(-ENOMEM);
        }
        mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
        req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
                             op_data->op_namelen + 1);
        req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT,
                             data && datalen ? datalen : 0);

        rc = mdc_prep_elc_req(exp, req, &cancels, count);
        if (rc) {
                ptlrpc_request_free(req);
                RETURN(rc);
        }

        /*
         * mdc_create_pack() fills msg->bufs[1] with name and msg->bufs[2] with
         * tgt, for symlinks or lov MD data.
         */
        mdc_create_pack(req, op_data, data, datalen, mode, uid,
                        gid, cap_effective, rdev);

        ptlrpc_request_set_replen(req);

        level = LUSTRE_IMP_FULL;
 resend:
        rc = mdc_reint(req, exp->exp_obd->u.cli.cl_rpc_lock, level);

        /* Resend if we were told to. */
        if (rc == -ERESTARTSYS) {
                level = LUSTRE_IMP_RECOVER;
                goto resend;
        } else if (rc == 0) {
                struct mdt_body *body;
                struct lustre_capa *capa;

                body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
                LASSERT(body);
                if (body->valid & OBD_MD_FLMDSCAPA) {
                        capa = req_capsule_server_get(&req->rq_pill,
                                                      &RMF_CAPA1);
                        if (capa == NULL)
                                rc = -EPROTO;
                }
        }

        *request = req;
        RETURN(rc);
}
Ejemplo n.º 6
0
/* If mdc_setattr is called with an 'iattr', then it is a normal RPC that
 * should take the normal semaphore and go to the normal portal.
 *
 * If it is called with iattr->ia_valid & ATTR_FROM_OPEN, then it is a
 * magic open-path setattr that should take the setattr semaphore and
 * go to the setattr portal. */
int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
                void *ea, int ealen, void *ea2, int ea2len,
                struct ptlrpc_request **request, struct md_open_data **mod)
{
        CFS_LIST_HEAD(cancels);
        struct ptlrpc_request *req;
        struct mdc_rpc_lock *rpc_lock;
        struct obd_device *obd = exp->exp_obd;
        int count = 0, rc;
        __u64 bits;
        ENTRY;

        LASSERT(op_data != NULL);

        bits = MDS_INODELOCK_UPDATE;
        if (op_data->op_attr.ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID))
                bits |= MDS_INODELOCK_LOOKUP;
        if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
            (fid_is_sane(&op_data->op_fid1)))
                count = mdc_resource_get_unused(exp, &op_data->op_fid1,
                                                &cancels, LCK_EX, bits);
        req = ptlrpc_request_alloc(class_exp2cliimp(exp),
                                   &RQF_MDS_REINT_SETATTR);
        if (req == NULL) {
                ldlm_lock_list_put(&cancels, l_bl_ast, count);
                RETURN(-ENOMEM);
        }
        mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
        if ((op_data->op_flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN)) == 0)
                req_capsule_set_size(&req->rq_pill, &RMF_MDT_EPOCH, RCL_CLIENT,
                                     0);
        req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, ealen);
        req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_CLIENT,
                             ea2len);

        rc = mdc_prep_elc_req(exp, req, &cancels, count);
        if (rc) {
                ptlrpc_request_free(req);
                RETURN(rc);
        }

        if (op_data->op_attr.ia_valid & ATTR_FROM_OPEN) {
                req->rq_request_portal = MDS_SETATTR_PORTAL;
                ptlrpc_at_set_req_timeout(req);
                rpc_lock = obd->u.cli.cl_setattr_lock;
        } else {
                rpc_lock = obd->u.cli.cl_rpc_lock;
        }

        if (op_data->op_attr.ia_valid & (ATTR_MTIME | ATTR_CTIME))
                CDEBUG(D_INODE, "setting mtime "CFS_TIME_T
                       ", ctime "CFS_TIME_T"\n",
                       LTIME_S(op_data->op_attr.ia_mtime),
                       LTIME_S(op_data->op_attr.ia_ctime));
        mdc_setattr_pack(req, op_data, ea, ealen, ea2, ea2len);

        ptlrpc_request_set_replen(req);
        if (mod && (op_data->op_flags & MF_EPOCH_OPEN) &&
            req->rq_import->imp_replayable)
        {
                LASSERT(*mod == NULL);

                *mod = obd_mod_alloc();
                if (*mod == NULL) {
                        DEBUG_REQ(D_ERROR, req, "Can't allocate "
                                  "md_open_data");
                } else {
                        req->rq_replay = 1;
                        req->rq_cb_data = *mod;
                        (*mod)->mod_open_req = req;
                        req->rq_commit_cb = mdc_commit_open;
                        /**
                         * Take an extra reference on \var mod, it protects \var
                         * mod from being freed on eviction (commit callback is
                         * called despite rq_replay flag).
                         * Will be put on mdc_done_writing().
                         */
                        obd_mod_get(*mod);
                }
        }

        rc = mdc_reint(req, rpc_lock, LUSTRE_IMP_FULL);

        /* Save the obtained info in the original RPC for the replay case. */
        if (rc == 0 && (op_data->op_flags & MF_EPOCH_OPEN)) {
                struct mdt_ioepoch *epoch;
                struct mdt_body  *body;

                epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
                body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
                LASSERT(epoch != NULL);
                LASSERT(body != NULL);
                epoch->handle = body->handle;
                epoch->ioepoch = body->ioepoch;
                req->rq_replay_cb = mdc_replay_open;
        /** bug 3633, open may be committed and estale answer is not error */
        } else if (rc == -ESTALE && (op_data->op_flags & MF_SOM_CHANGE)) {
                rc = 0;
        } else if (rc == -ERESTARTSYS) {
                rc = 0;
        }
        *request = req;
        if (rc && req->rq_commit_cb) {
                /* Put an extra reference on \var mod on error case. */
                obd_mod_put(*mod);
                req->rq_commit_cb(req);
        }
        RETURN(rc);
}
Ejemplo n.º 7
0
int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
               const void *data, int datalen, int mode, __u32 uid, __u32 gid,
               cfs_cap_t cap_effective, __u64 rdev,
               struct ptlrpc_request **request)
{
        struct ptlrpc_request *req;
        int level, rc;
        int count, resends = 0;
        struct obd_import *import = exp->exp_obd->u.cli.cl_import;
        int generation = import->imp_generation;
        CFS_LIST_HEAD(cancels);
        ENTRY;

        /* For case if upper layer did not alloc fid, do it now. */
        if (!fid_is_sane(&op_data->op_fid2)) {
                /*
                 * mdc_fid_alloc() may return errno 1 in case of switch to new
                 * sequence, handle this.
                 */
                rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data);
                if (rc < 0) {
                        CERROR("Can't alloc new fid, rc %d\n", rc);
                        RETURN(rc);
                }
        }

rebuild:
        count = 0;
        if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
            (fid_is_sane(&op_data->op_fid1)))
                count = mdc_resource_get_unused(exp, &op_data->op_fid1,
                                                &cancels, LCK_EX,
                                                MDS_INODELOCK_UPDATE);

        req = ptlrpc_request_alloc(class_exp2cliimp(exp),
                                   &RQF_MDS_REINT_CREATE_RMT_ACL);
        if (req == NULL) {
                ldlm_lock_list_put(&cancels, l_bl_ast, count);
                RETURN(-ENOMEM);
        }
        mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
        req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
                             op_data->op_namelen + 1);
        req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT,
                             data && datalen ? datalen : 0);

	rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
	if (rc) {
		ptlrpc_request_free(req);
		RETURN(rc);
	}

        /*
         * mdc_create_pack() fills msg->bufs[1] with name and msg->bufs[2] with
         * tgt, for symlinks or lov MD data.
         */
        mdc_create_pack(req, op_data, data, datalen, mode, uid,
                        gid, cap_effective, rdev);

        ptlrpc_request_set_replen(req);

	/* ask ptlrpc not to resend on EINPROGRESS since we have our own retry
	 * logic here */
	req->rq_no_retry_einprogress = 1;

        if (resends) {
                req->rq_generation_set = 1;
                req->rq_import_generation = generation;
                req->rq_sent = cfs_time_current_sec() + resends;
        }
        level = LUSTRE_IMP_FULL;
 resend:
        rc = mdc_reint(req, exp->exp_obd->u.cli.cl_rpc_lock, level);

        /* Resend if we were told to. */
        if (rc == -ERESTARTSYS) {
                level = LUSTRE_IMP_RECOVER;
                goto resend;
        } else if (rc == -EINPROGRESS) {
                /* Retry create infinitely until succeed or get other
                 * error code. */
                ptlrpc_req_finished(req);
                resends++;

                CDEBUG(D_HA, "%s: resend:%d create on "DFID"/"DFID"\n",
                       exp->exp_obd->obd_name, resends,
                       PFID(&op_data->op_fid1), PFID(&op_data->op_fid2));

                if (generation == import->imp_generation) {
                        goto rebuild;
                } else {
                        CDEBUG(D_HA, "resend cross eviction\n");
                        RETURN(-EIO);
                }
        } else if (rc == 0) {
                struct mdt_body *body;
                struct lustre_capa *capa;

                body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
                LASSERT(body);
                if (body->valid & OBD_MD_FLMDSCAPA) {
                        capa = req_capsule_server_get(&req->rq_pill,
                                                      &RMF_CAPA1);
                        if (capa == NULL)
                                rc = -EPROTO;
                }
        }

        *request = req;
        RETURN(rc);
}
Ejemplo n.º 8
0
/**
 * An implementation of cl_io_operations::cio_io_submit() method for osc
 * layer. Iterates over pages in the in-queue, prepares each for io by calling
 * cl_page_prep() and then either submits them through osc_io_submit_page()
 * or, if page is already submitted, changes osc flags through
 * osc_set_async_flags().
 */
static int osc_io_submit(const struct lu_env *env,
                         const struct cl_io_slice *ios,
			 enum cl_req_type crt, struct cl_2queue *queue)
{
        struct cl_page    *page;
        struct cl_page    *tmp;
        struct client_obd *cli  = NULL;
        struct osc_object *osc  = NULL; /* to keep gcc happy */
        struct osc_page   *opg;
        struct cl_io      *io;
	CFS_LIST_HEAD     (list);

	struct cl_page_list *qin      = &queue->c2_qin;
	struct cl_page_list *qout     = &queue->c2_qout;
	int queued = 0;
	int result = 0;
	int cmd;
	int brw_flags;
	int max_pages;

	LASSERT(qin->pl_nr > 0);

	CDEBUG(D_CACHE, "%d %d\n", qin->pl_nr, crt);

	osc = cl2osc(ios->cis_obj);
	cli = osc_cli(osc);
	max_pages = cli->cl_max_pages_per_rpc;

	cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ;
	brw_flags = osc_io_srvlock(cl2osc_io(env, ios)) ? OBD_BRW_SRVLOCK : 0;

        /*
         * NOTE: here @page is a top-level page. This is done to avoid
         *       creation of sub-page-list.
         */
        cl_page_list_for_each_safe(page, tmp, qin) {
                struct osc_async_page *oap;

                /* Top level IO. */
                io = page->cp_owner;
                LASSERT(io != NULL);

		opg = osc_cl_page_osc(page, osc);
		oap = &opg->ops_oap;
		LASSERT(osc == oap->oap_obj);

		if (!cfs_list_empty(&oap->oap_pending_item) ||
		    !cfs_list_empty(&oap->oap_rpc_item)) {
			CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n",
			       oap, opg);
                        result = -EBUSY;
                        break;
                }

                result = cl_page_prep(env, io, page, crt);
		if (result != 0) {
                        LASSERT(result < 0);
                        if (result != -EALREADY)
                                break;
                        /*
                         * Handle -EALREADY error: for read case, the page is
                         * already in UPTODATE state; for write, the page
                         * is not dirty.
                         */
                        result = 0;
			continue;
                }

		cl_page_list_move(qout, qin, page);
		spin_lock(&oap->oap_lock);
		oap->oap_async_flags = ASYNC_URGENT|ASYNC_READY;
		oap->oap_async_flags |= ASYNC_COUNT_STABLE;
		spin_unlock(&oap->oap_lock);

		osc_page_submit(env, opg, crt, brw_flags);
		cfs_list_add_tail(&oap->oap_pending_item, &list);
		if (++queued == max_pages) {
			queued = 0;
			result = osc_queue_sync_pages(env, osc, &list, cmd,
						      brw_flags);
			if (result < 0)
				break;
		}
	}

	if (queued > 0)
		result = osc_queue_sync_pages(env, osc, &list, cmd, brw_flags);

	CDEBUG(D_INFO, "%d/%d %d\n", qin->pl_nr, qout->pl_nr, result);
	return qout->pl_nr > 0 ? 0 : result;
}