Exemplo n.º 1
0
/*
 * could be called frequently for query (@nr_to_scan == 0).
 * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
 */
static int enc_pools_shrink(int nr_to_scan, unsigned int gfp_mask)
{
        if (unlikely(nr_to_scan != 0)) {
                cfs_spin_lock(&page_pools.epp_lock);
                nr_to_scan = min(nr_to_scan, (int) page_pools.epp_free_pages -
                                             PTLRPC_MAX_BRW_PAGES);
                if (nr_to_scan > 0) {
                        enc_pools_release_free_pages(nr_to_scan);
                        CDEBUG(D_SEC, "released %d pages, %ld left\n",
                               nr_to_scan, page_pools.epp_free_pages);

                        page_pools.epp_st_shrinks++;
                        page_pools.epp_last_shrink = cfs_time_current_sec();
                }
                cfs_spin_unlock(&page_pools.epp_lock);
        }

        /*
         * if no pool access for a long time, we consider it's fully idle.
         * a little race here is fine.
         */
        if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access >
                     CACHE_QUIESCENT_PERIOD)) {
                cfs_spin_lock(&page_pools.epp_lock);
                page_pools.epp_idle_idx = IDLE_IDX_MAX;
                cfs_spin_unlock(&page_pools.epp_lock);
        }

        LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
        return max((int) page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
               (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
}
Exemplo n.º 2
0
/*
 * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
 */
static unsigned long enc_pools_shrink_scan(struct shrinker *s,
        struct shrink_control *sc)
{
    spin_lock(&page_pools.epp_lock);
    sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan,
                           page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES);
    if (sc->nr_to_scan > 0) {
        enc_pools_release_free_pages(sc->nr_to_scan);
        CDEBUG(D_SEC, "released %ld pages, %ld left\n",
               (long)sc->nr_to_scan, page_pools.epp_free_pages);

        page_pools.epp_st_shrinks++;
        page_pools.epp_last_shrink = cfs_time_current_sec();
    }
    spin_unlock(&page_pools.epp_lock);

    /*
     * if no pool access for a long time, we consider it's fully idle.
     * a little race here is fine.
     */
    if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access >
                 CACHE_QUIESCENT_PERIOD)) {
        spin_lock(&page_pools.epp_lock);
        page_pools.epp_idle_idx = IDLE_IDX_MAX;
        spin_unlock(&page_pools.epp_lock);
    }

    LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
    return sc->nr_to_scan;
}
Exemplo n.º 3
0
/*
 * /proc/fs/lustre/sptlrpc/encrypt_page_pools
 */
int sptlrpc_proc_read_enc_pool(char *page, char **start, off_t off, int count,
                               int *eof, void *data)
{
        int     rc;

        cfs_spin_lock(&page_pools.epp_lock);

        rc = snprintf(page, count,
                      "physical pages:          %lu\n"
                      "pages per pool:          %lu\n"
                      "max pages:               %lu\n"
                      "max pools:               %u\n"
                      "total pages:             %lu\n"
                      "total free:              %lu\n"
                      "idle index:              %lu/100\n"
                      "last shrink:             %lds\n"
                      "last access:             %lds\n"
                      "max pages reached:       %lu\n"
                      "grows:                   %u\n"
                      "grows failure:           %u\n"
                      "shrinks:                 %u\n"
                      "cache access:            %lu\n"
                      "cache missing:           %lu\n"
                      "low free mark:           %lu\n"
                      "max waitqueue depth:     %u\n"
                      "max wait time:           "CFS_TIME_T"/%u\n"
                      ,
                      cfs_num_physpages,
                      PAGES_PER_POOL,
                      page_pools.epp_max_pages,
                      page_pools.epp_max_pools,
                      page_pools.epp_total_pages,
                      page_pools.epp_free_pages,
                      page_pools.epp_idle_idx,
                      cfs_time_current_sec() - page_pools.epp_last_shrink,
                      cfs_time_current_sec() - page_pools.epp_last_access,
                      page_pools.epp_st_max_pages,
                      page_pools.epp_st_grows,
                      page_pools.epp_st_grow_fails,
                      page_pools.epp_st_shrinks,
                      page_pools.epp_st_access,
                      page_pools.epp_st_missings,
                      page_pools.epp_st_lowfree,
                      page_pools.epp_st_max_wqlen,
                      page_pools.epp_st_max_wait, CFS_HZ
                     );

        cfs_spin_unlock(&page_pools.epp_lock);
        return rc;
}
Exemplo n.º 4
0
/*
 * /proc/fs/lustre/sptlrpc/encrypt_page_pools
 */
int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
{
        int     rc;

	spin_lock(&page_pools.epp_lock);

	rc = seq_printf(m,
                      "physical pages:          %lu\n"
                      "pages per pool:          %lu\n"
                      "max pages:               %lu\n"
                      "max pools:               %u\n"
                      "total pages:             %lu\n"
                      "total free:              %lu\n"
                      "idle index:              %lu/100\n"
                      "last shrink:             %lds\n"
                      "last access:             %lds\n"
                      "max pages reached:       %lu\n"
                      "grows:                   %u\n"
                      "grows failure:           %u\n"
                      "shrinks:                 %u\n"
                      "cache access:            %lu\n"
                      "cache missing:           %lu\n"
                      "low free mark:           %lu\n"
                      "max waitqueue depth:     %u\n"
		      "max wait time:           "CFS_TIME_T"/%lu\n"
                      ,
		      totalram_pages,
                      PAGES_PER_POOL,
                      page_pools.epp_max_pages,
                      page_pools.epp_max_pools,
                      page_pools.epp_total_pages,
                      page_pools.epp_free_pages,
                      page_pools.epp_idle_idx,
                      cfs_time_current_sec() - page_pools.epp_last_shrink,
                      cfs_time_current_sec() - page_pools.epp_last_access,
                      page_pools.epp_st_max_pages,
                      page_pools.epp_st_grows,
                      page_pools.epp_st_grow_fails,
                      page_pools.epp_st_shrinks,
                      page_pools.epp_st_access,
		      page_pools.epp_st_missings,
		      page_pools.epp_st_lowfree,
		      page_pools.epp_st_max_wqlen,
		      page_pools.epp_st_max_wait,
		      msecs_to_jiffies(MSEC_PER_SEC)
		     );

	spin_unlock(&page_pools.epp_lock);
	return rc;
}
Exemplo n.º 5
0
static int lfsck_namespace_checkpoint(const struct lu_env *env,
				      struct lfsck_component *com, bool init)
{
	struct lfsck_instance	*lfsck = com->lc_lfsck;
	struct lfsck_namespace	*ns    = com->lc_file_ram;
	int			 rc;

	if (com->lc_new_checked == 0 && !init)
		return 0;

	down_write(&com->lc_sem);

	if (init) {
		ns->ln_pos_latest_start = lfsck->li_pos_current;
	} else {
		ns->ln_pos_last_checkpoint = lfsck->li_pos_current;
		ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
				HALF_SEC - lfsck->li_time_last_checkpoint);
		ns->ln_time_last_checkpoint = cfs_time_current_sec();
		ns->ln_items_checked += com->lc_new_checked;
		com->lc_new_checked = 0;
	}

	rc = lfsck_namespace_store(env, com, false);

	up_write(&com->lc_sem);
	return rc;
}
Exemplo n.º 6
0
/*
 *  Client's outgoing request callback
 */
void request_out_callback(lnet_event_t *ev)
{
	struct ptlrpc_cb_id   *cbid = ev->md.user_ptr;
	struct ptlrpc_request *req = cbid->cbid_arg;

	LASSERT(ev->type == LNET_EVENT_SEND ||
		ev->type == LNET_EVENT_UNLINK);
	LASSERT(ev->unlinked);

	DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);

	sptlrpc_request_out_callback(req);
	req->rq_real_sent = cfs_time_current_sec();

	if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {

		/* Failed send: make it seem like the reply timed out, just
		 * like failing sends in client.c does currently...  */

		spin_lock(&req->rq_lock);
		req->rq_net_err = 1;
		spin_unlock(&req->rq_lock);

		ptlrpc_client_wake_req(req);
	}

	ptlrpc_req_finished(req);
}
Exemplo n.º 7
0
Arquivo: llog.c Projeto: 7799/linux
static int llog_read_header(const struct lu_env *env,
			    struct llog_handle *handle,
			    struct obd_uuid *uuid)
{
	struct llog_operations *lop;
	int rc;

	rc = llog_handle2ops(handle, &lop);
	if (rc)
		return rc;

	if (lop->lop_read_header == NULL)
		return -EOPNOTSUPP;

	rc = lop->lop_read_header(env, handle);
	if (rc == LLOG_EEMPTY) {
		struct llog_log_hdr *llh = handle->lgh_hdr;

		handle->lgh_last_idx = 0; /* header is record with index 0 */
		llh->llh_count = 1;	 /* for the header record */
		llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC;
		llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
		llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0;
		llh->llh_timestamp = cfs_time_current_sec();
		if (uuid)
			memcpy(&llh->llh_tgtuuid, uuid,
			       sizeof(llh->llh_tgtuuid));
		llh->llh_bitmap_offset = offsetof(typeof(*llh), llh_bitmap);
		ext2_set_bit(0, llh->llh_bitmap);
		rc = 0;
	}
	return rc;
}
Exemplo n.º 8
0
static
struct ptlrpc_cli_ctx *ctx_create_kr(struct ptlrpc_sec *sec,
                                     struct vfs_cred *vcred)
{
        struct ptlrpc_cli_ctx      *ctx;
        struct gss_cli_ctx_keyring *gctx_kr;

        OBD_ALLOC_PTR(gctx_kr);
        if (gctx_kr == NULL)
                return NULL;

        OBD_ALLOC_PTR(gctx_kr->gck_timer);
        if (gctx_kr->gck_timer == NULL) {
                OBD_FREE_PTR(gctx_kr);
                return NULL;
        }
        init_timer(gctx_kr->gck_timer);

        ctx = &gctx_kr->gck_base.gc_base;

        if (gss_cli_ctx_init_common(sec, ctx, &gss_keyring_ctxops, vcred)) {
                OBD_FREE_PTR(gctx_kr->gck_timer);
                OBD_FREE_PTR(gctx_kr);
                return NULL;
        }

	ctx->cc_expire = cfs_time_current_sec() + KEYRING_UPCALL_TIMEOUT;
	clear_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags);
	atomic_inc(&ctx->cc_refcount); /* for the caller */

	return ctx;
}
Exemplo n.º 9
0
int sptlrpc_enc_pool_init(void)
{
	DEF_SHRINKER_VAR(shvar, enc_pools_shrink,
			 enc_pools_shrink_count, enc_pools_shrink_scan);
	/*
	 * maximum capacity is 1/8 of total physical memory.
	 * is the 1/8 a good number?
	 */
	page_pools.epp_max_pages = totalram_pages / 8;
	page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages);

	init_waitqueue_head(&page_pools.epp_waitq);
	page_pools.epp_waitqlen = 0;
	page_pools.epp_pages_short = 0;

        page_pools.epp_growing = 0;

        page_pools.epp_idle_idx = 0;
        page_pools.epp_last_shrink = cfs_time_current_sec();
        page_pools.epp_last_access = cfs_time_current_sec();

	spin_lock_init(&page_pools.epp_lock);
        page_pools.epp_total_pages = 0;
        page_pools.epp_free_pages = 0;

        page_pools.epp_st_max_pages = 0;
        page_pools.epp_st_grows = 0;
        page_pools.epp_st_grow_fails = 0;
        page_pools.epp_st_shrinks = 0;
        page_pools.epp_st_access = 0;
        page_pools.epp_st_missings = 0;
        page_pools.epp_st_lowfree = 0;
        page_pools.epp_st_max_wqlen = 0;
        page_pools.epp_st_max_wait = 0;

        enc_pools_alloc();
        if (page_pools.epp_pools == NULL)
                return -ENOMEM;

	pools_shrinker = set_shrinker(pools_shrinker_seeks, &shvar);
        if (pools_shrinker == NULL) {
                enc_pools_free();
                return -ENOMEM;
        }

        return 0;
}
Exemplo n.º 10
0
int sptlrpc_enc_pool_init(void)
{
        /*
         * maximum capacity is 1/8 of total physical memory.
         * is the 1/8 a good number?
         */
        page_pools.epp_max_pages = cfs_num_physpages / 8;
        page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages);

        cfs_waitq_init(&page_pools.epp_waitq);
        page_pools.epp_waitqlen = 0;
        page_pools.epp_pages_short = 0;

        page_pools.epp_growing = 0;

        page_pools.epp_idle_idx = 0;
        page_pools.epp_last_shrink = cfs_time_current_sec();
        page_pools.epp_last_access = cfs_time_current_sec();

        cfs_spin_lock_init(&page_pools.epp_lock);
        page_pools.epp_total_pages = 0;
        page_pools.epp_free_pages = 0;

        page_pools.epp_st_max_pages = 0;
        page_pools.epp_st_grows = 0;
        page_pools.epp_st_grow_fails = 0;
        page_pools.epp_st_shrinks = 0;
        page_pools.epp_st_access = 0;
        page_pools.epp_st_missings = 0;
        page_pools.epp_st_lowfree = 0;
        page_pools.epp_st_max_wqlen = 0;
        page_pools.epp_st_max_wait = 0;

        enc_pools_alloc();
        if (page_pools.epp_pools == NULL)
                return -ENOMEM;

        pools_shrinker = cfs_set_shrinker(pools_shrinker_seeks,
                                          enc_pools_shrink);
        if (pools_shrinker == NULL) {
                enc_pools_free();
                return -ENOMEM;
        }

        return 0;
}
Exemplo n.º 11
0
/**
 * Disconnect a bulk desc from the network. Idempotent. Not
 * thread-safe (i.e. only interlocks with completion callback).
 * Returns 1 on success or 0 if network unregistration failed for whatever
 * reason.
 */
int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
{
	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
	struct l_wait_info       lwi;
	int                      rc;
	ENTRY;

	LASSERT(!in_interrupt());     /* might sleep */

	/* Let's setup deadline for reply unlink. */
	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
	    async && req->rq_bulk_deadline == 0)
		req->rq_bulk_deadline = cfs_time_current_sec() + LONG_UNLINK;

	if (ptlrpc_client_bulk_active(req) == 0)	/* completed or */
		RETURN(1);				/* never registered */

	LASSERT(desc->bd_req == req);  /* bd_req NULL until registered */

	/* the unlink ensures the callback happens ASAP and is the last
	 * one.  If it fails, it must be because completion just happened,
	 * but we must still l_wait_event() in this case to give liblustre
	 * a chance to run client_bulk_callback() */
	mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);

	if (ptlrpc_client_bulk_active(req) == 0)	/* completed or */
		RETURN(1);				/* never registered */

        /* Move to "Unregistering" phase as bulk was not unlinked yet. */
        ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING);

        /* Do not wait for unlink to finish. */
        if (async)
                RETURN(0);

        for (;;) {
#ifdef __KERNEL__
		/* The wq argument is ignored by user-space wait_event macros */
		wait_queue_head_t *wq = (req->rq_set != NULL) ?
					&req->rq_set->set_waitq :
					&req->rq_reply_waitq;
#endif
                /* Network access will complete in finite time but the HUGE
                 * timeout lets us CWARN for visibility of sluggish NALs */
                lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK),
                                           cfs_time_seconds(1), NULL, NULL);
                rc = l_wait_event(*wq, !ptlrpc_client_bulk_active(req), &lwi);
                if (rc == 0) {
                        ptlrpc_rqphase_move(req, req->rq_next_phase);
                        RETURN(1);
                }

                LASSERT(rc == -ETIMEDOUT);
                DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p",
                          desc);
        }
        RETURN(0);
}
Exemplo n.º 12
0
static struct obd_capa *osd_capa_get(const struct lu_env *env,
				     struct dt_object *dt,
				     struct lustre_capa *old,
				     __u64 opc)
{
	struct osd_thread_info	*info = osd_oti_get(env);
	const struct lu_fid	*fid = lu_object_fid(&dt->do_lu);
	struct osd_object	*obj = osd_dt_obj(dt);
	struct osd_device	*dev = osd_obj2dev(obj);
	struct lustre_capa_key	*key = &info->oti_capa_key;
	struct lustre_capa	*capa = &info->oti_capa;
	struct obd_capa		*oc;
	int			 rc;
	ENTRY;

	if (!dev->od_fl_capa)
		RETURN(ERR_PTR(-ENOENT));

	LASSERT(dt_object_exists(dt));
	LASSERT(osd_invariant(obj));

	/* renewal sanity check */
	if (old && osd_object_auth(env, dt, old, opc))
		RETURN(ERR_PTR(-EACCES));

	capa->lc_fid = *fid;
	capa->lc_opc = opc;
	capa->lc_uid = 0;
	capa->lc_flags = dev->od_capa_alg << 24;
	capa->lc_timeout = dev->od_capa_timeout;
	capa->lc_expiry = 0;

	oc = capa_lookup(dev->od_capa_hash, capa, 1);
	if (oc) {
		LASSERT(!capa_is_expired(oc));
		RETURN(oc);
	}

	spin_lock(&capa_lock);
	*key = dev->od_capa_keys[1];
	spin_unlock(&capa_lock);

	capa->lc_keyid = key->lk_keyid;
	capa->lc_expiry = cfs_time_current_sec() + dev->od_capa_timeout;

	rc = capa_hmac(capa->lc_hmac, capa, key->lk_key);
	if (rc) {
		DEBUG_CAPA(D_ERROR, capa, "HMAC failed: %d for", rc);
		LBUG();
		RETURN(ERR_PTR(rc));
	}

	oc = capa_add(dev->od_capa_hash, capa);
	RETURN(oc);
}
Exemplo n.º 13
0
static int lfsck_namespace_post(const struct lu_env *env,
				struct lfsck_component *com,
				int result, bool init)
{
	struct lfsck_instance	*lfsck = com->lc_lfsck;
	struct lfsck_namespace	*ns    = com->lc_file_ram;
	int			 rc;

	down_write(&com->lc_sem);

	spin_lock(&lfsck->li_lock);
	if (!init)
		ns->ln_pos_last_checkpoint = lfsck->li_pos_current;
	if (result > 0) {
		ns->ln_status = LS_SCANNING_PHASE2;
		ns->ln_flags |= LF_SCANNED_ONCE;
		ns->ln_flags &= ~LF_UPGRADE;
		cfs_list_del_init(&com->lc_link);
		cfs_list_del_init(&com->lc_link_dir);
		cfs_list_add_tail(&com->lc_link, &lfsck->li_list_double_scan);
	} else if (result == 0) {
		ns->ln_status = lfsck->li_status;
		if (ns->ln_status == 0)
			ns->ln_status = LS_STOPPED;
		if (ns->ln_status != LS_PAUSED) {
			cfs_list_del_init(&com->lc_link);
			cfs_list_del_init(&com->lc_link_dir);
			cfs_list_add_tail(&com->lc_link, &lfsck->li_list_idle);
		}
	} else {
		ns->ln_status = LS_FAILED;
		cfs_list_del_init(&com->lc_link);
		cfs_list_del_init(&com->lc_link_dir);
		cfs_list_add_tail(&com->lc_link, &lfsck->li_list_idle);
	}
	spin_unlock(&lfsck->li_lock);

	if (!init) {
		ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
				HALF_SEC - lfsck->li_time_last_checkpoint);
		ns->ln_time_last_checkpoint = cfs_time_current_sec();
		ns->ln_items_checked += com->lc_new_checked;
		com->lc_new_checked = 0;
	}

	rc = lfsck_namespace_store(env, com, false);

	up_write(&com->lc_sem);
	return rc;
}
Exemplo n.º 14
0
/**
 * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages()
 */
void libcfs_debug_dumplog_internal(void *arg)
{
        CFS_DECL_JOURNAL_DATA;

        CFS_PUSH_JOURNAL;

        if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0) {
                snprintf(debug_file_name, sizeof(debug_file_name) - 1,
                         "%s.%ld." LPLD, libcfs_debug_file_path_arr,
                         cfs_time_current_sec(), (long_ptr_t)arg);
                printk(CFS_KERN_ALERT "LustreError: dumping log to %s\n",
                       debug_file_name);
                cfs_tracefile_dump_all_pages(debug_file_name);
                libcfs_run_debug_log_upcall(debug_file_name);
        }
        CFS_POP_JOURNAL;
}
Exemplo n.º 15
0
static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
{
	struct ptlrpc_service_part	*svcpt = req->rq_rqbd->rqbd_svcpt;
	struct ptlrpc_service		*svc = svcpt->scp_service;
        int service_time = max_t(int, cfs_time_current_sec() -
                                 req->rq_arrival_time.tv_sec, 1);

        if (!(flags & PTLRPC_REPLY_EARLY) &&
            (req->rq_type != PTL_RPC_MSG_ERR) &&
            (req->rq_reqmsg != NULL) &&
            !(lustre_msg_get_flags(req->rq_reqmsg) &
              (MSG_RESENT | MSG_REPLAY |
               MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE))) {
                /* early replies, errors and recovery requests don't count
                 * toward our service time estimate */
		int oldse = at_measured(&svcpt->scp_at_estimate, service_time);

		if (oldse != 0) {
			DEBUG_REQ(D_ADAPTTO, req,
				  "svc %s changed estimate from %d to %d",
				  svc->srv_name, oldse,
				  at_get(&svcpt->scp_at_estimate));
		}
        }
        /* Report actual service time for client latency calc */
        lustre_msg_set_service_time(req->rq_repmsg, service_time);
        /* Report service time estimate for future client reqs, but report 0
         * (to be ignored by client) if it's a error reply during recovery.
         * (bz15815) */
        if (req->rq_type == PTL_RPC_MSG_ERR &&
            (req->rq_export == NULL || req->rq_export->exp_obd->obd_recovering))
                lustre_msg_set_timeout(req->rq_repmsg, 0);
        else
                lustre_msg_set_timeout(req->rq_repmsg,
				       at_get(&svcpt->scp_at_estimate));

        if (req->rq_reqmsg &&
            !(lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
                CDEBUG(D_ADAPTTO, "No early reply support: flags=%#x "
                       "req_flags=%#x magic=%d:%x/%x len=%d\n",
                       flags, lustre_msg_get_flags(req->rq_reqmsg),
                       lustre_msg_is_v1(req->rq_reqmsg),
                       lustre_msg_get_magic(req->rq_reqmsg),
                       lustre_msg_get_magic(req->rq_repmsg), req->rq_replen);
        }
}
Exemplo n.º 16
0
/*
 * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
 */
static unsigned long enc_pools_shrink_count(struct shrinker *s,
        struct shrink_control *sc)
{
    /*
     * if no pool access for a long time, we consider it's fully idle.
     * a little race here is fine.
     */
    if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access >
                 CACHE_QUIESCENT_PERIOD)) {
        spin_lock(&page_pools.epp_lock);
        page_pools.epp_idle_idx = IDLE_IDX_MAX;
        spin_unlock(&page_pools.epp_lock);
    }

    LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
    return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
           (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
}
Exemplo n.º 17
0
int llog_read_header(const struct lu_env *env, struct llog_handle *handle,
		     const struct obd_uuid *uuid)
{
	struct llog_operations *lop;
	int rc;
	ENTRY;

	rc = llog_handle2ops(handle, &lop);
	if (rc)
		RETURN(rc);

	if (lop->lop_read_header == NULL)
		RETURN(-EOPNOTSUPP);

	rc = lop->lop_read_header(env, handle);
	if (rc == LLOG_EEMPTY) {
		struct llog_log_hdr *llh = handle->lgh_hdr;

		/* lrh_len should be initialized in llog_init_handle */
		handle->lgh_last_idx = 0; /* header is record with index 0 */
		handle->lgh_write_offset = 0;
		llh->llh_count = 1;         /* for the header record */
		llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC;
		LASSERT(handle->lgh_ctxt->loc_chunk_size >=
						LLOG_MIN_CHUNK_SIZE);
		llh->llh_hdr.lrh_len = handle->lgh_ctxt->loc_chunk_size;
		llh->llh_hdr.lrh_index = 0;
		llh->llh_timestamp = cfs_time_current_sec();
		if (uuid)
			memcpy(&llh->llh_tgtuuid, uuid,
			       sizeof(llh->llh_tgtuuid));
		llh->llh_bitmap_offset = offsetof(typeof(*llh), llh_bitmap);
		/* Since update llog header might also call this function,
		 * let's reset the bitmap to 0 here */
		memset(LLOG_HDR_BITMAP(llh), 0, llh->llh_hdr.lrh_len -
						llh->llh_bitmap_offset -
						sizeof(llh->llh_tail));
		ext2_set_bit(0, LLOG_HDR_BITMAP(llh));
		LLOG_HDR_TAIL(llh)->lrt_len = llh->llh_hdr.lrh_len;
		LLOG_HDR_TAIL(llh)->lrt_index = llh->llh_hdr.lrh_index;
		rc = 0;
	}
	RETURN(rc);
}
Exemplo n.º 18
0
/**
 * Add a record to the end of link ea buf
 **/
int linkea_add_buf(struct linkea_data *ldata, const struct lu_name *lname,
		   const struct lu_fid *pfid)
{
	struct link_ea_header *leh = ldata->ld_leh;
	int reclen;

	LASSERT(leh != NULL);

	if (lname == NULL || pfid == NULL)
		return -EINVAL;

	reclen = lname->ln_namelen + sizeof(struct link_ea_entry);
	if (unlikely(leh->leh_len + reclen > MAX_LINKEA_SIZE)) {
		/* Use 32-bits to save the overflow time, although it will
		 * shrink the cfs_time_current_sec() returned 64-bits value
		 * to 32-bits value, it is still quite large and can be used
		 * for about 140 years. That is enough. */
		leh->leh_overflow_time = cfs_time_current_sec();
		if (unlikely(leh->leh_overflow_time == 0))
			leh->leh_overflow_time++;

		CDEBUG(D_INODE, "No enough space to hold linkea entry '"
		       DFID": %.*s' at %u\n", PFID(pfid), lname->ln_namelen,
		       lname->ln_name, leh->leh_overflow_time);
		return 0;
	}

	if (leh->leh_len + reclen > ldata->ld_buf->lb_len) {
		if (lu_buf_check_and_grow(ldata->ld_buf,
					  leh->leh_len + reclen) < 0)
			return -ENOMEM;

		leh = ldata->ld_leh = ldata->ld_buf->lb_buf;
	}

	ldata->ld_lee = ldata->ld_buf->lb_buf + leh->leh_len;
	ldata->ld_reclen = linkea_entry_pack(ldata->ld_lee, lname, pfid);
	leh->leh_len += ldata->ld_reclen;
	leh->leh_reccount++;
	CDEBUG(D_INODE, "New link_ea name '"DFID":%.*s' is added\n",
	       PFID(pfid), lname->ln_namelen, lname->ln_name);
	return 0;
}
Exemplo n.º 19
0
/**
 * Mark the linkEA as overflow with current timestamp,
 * and remove the last linkEA entry.
 *
 * Return the new linkEA size.
 */
int linkea_overflow_shrink(struct linkea_data *ldata)
{
	struct link_ea_header *leh;
	struct lu_name tname;
	struct lu_fid tfid;
	int count;

	leh = ldata->ld_leh = ldata->ld_buf->lb_buf;
	if (leh->leh_magic == __swab32(LINK_EA_MAGIC)) {
		leh->leh_magic = LINK_EA_MAGIC;
		leh->leh_reccount = __swab32(leh->leh_reccount);
		leh->leh_overflow_time = __swab32(leh->leh_overflow_time);
		leh->leh_padding = __swab32(leh->leh_padding);
	}

	LASSERT(leh->leh_reccount > 0);

	leh->leh_len = sizeof(struct link_ea_header);
	leh->leh_reccount--;
	if (unlikely(leh->leh_reccount == 0))
		return 0;

	leh->leh_overflow_time = cfs_time_current_sec();
	if (unlikely(leh->leh_overflow_time == 0))
		leh->leh_overflow_time++;
	ldata->ld_reclen = 0;
	ldata->ld_lee = (struct link_ea_entry *)(leh + 1);
	for (count = 0; count < leh->leh_reccount; count++) {
		linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen,
				    &tname, &tfid);
		leh->leh_len += ldata->ld_reclen;
		ldata->ld_lee = (struct link_ea_entry *)((char *)ldata->ld_lee +
							 ldata->ld_reclen);
	}

	linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, &tname, &tfid);
	CDEBUG(D_INODE, "No enough space to hold the last linkea entry '"
	       DFID": %.*s', shrink it, left %d linkea entries, size %llu\n",
	       PFID(&tfid), tname.ln_namelen, tname.ln_name,
	       leh->leh_reccount, leh->leh_len);

	return leh->leh_len;
}
Exemplo n.º 20
0
/*
 * could be called frequently for query (@nr_to_scan == 0).
 * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
 */
static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
{
	if (unlikely(shrink_param(sc, nr_to_scan) != 0)) {
		spin_lock(&page_pools.epp_lock);
                shrink_param(sc, nr_to_scan) = min_t(unsigned long,
                                                   shrink_param(sc, nr_to_scan),
                                                   page_pools.epp_free_pages -
                                                   PTLRPC_MAX_BRW_PAGES);
                if (shrink_param(sc, nr_to_scan) > 0) {
                        enc_pools_release_free_pages(shrink_param(sc,
                                                                  nr_to_scan));
                        CDEBUG(D_SEC, "released %ld pages, %ld left\n",
                               (long)shrink_param(sc, nr_to_scan),
                               page_pools.epp_free_pages);

                        page_pools.epp_st_shrinks++;
                        page_pools.epp_last_shrink = cfs_time_current_sec();
                }
		spin_unlock(&page_pools.epp_lock);
	}
Exemplo n.º 21
0
static int sptlrpc_info_lprocfs_seq_show(struct seq_file *seq, void *v)
{
        struct obd_device *dev = seq->private;
        struct client_obd *cli = &dev->u.cli;
        struct ptlrpc_sec *sec = NULL;
        char               str[32];

	LASSERT(strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 ||
		strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
		strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) == 0 ||
		strcmp(dev->obd_type->typ_name, LUSTRE_LWP_NAME) == 0 ||
		strcmp(dev->obd_type->typ_name, LUSTRE_OSP_NAME) == 0);

        if (cli->cl_import)
                sec = sptlrpc_import_sec_ref(cli->cl_import);
        if (sec == NULL)
                goto out;

        sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str));

	seq_printf(seq, "rpc flavor:	%s\n",
		   sptlrpc_flavor2name_base(sec->ps_flvr.sf_rpc));
	seq_printf(seq, "bulk flavor:	%s\n",
		   sptlrpc_flavor2name_bulk(&sec->ps_flvr, str, sizeof(str)));
	seq_printf(seq, "flags:		%s\n",
		   sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str)));
	seq_printf(seq, "id:		%d\n", sec->ps_id);
	seq_printf(seq, "refcount:	%d\n",
		   atomic_read(&sec->ps_refcount));
	seq_printf(seq, "nctx:	%d\n", atomic_read(&sec->ps_nctx));
	seq_printf(seq, "gc internal	%ld\n", sec->ps_gc_interval);
	seq_printf(seq, "gc next	%ld\n",
		   sec->ps_gc_interval ?
		   sec->ps_gc_next - cfs_time_current_sec() : 0);

	sptlrpc_sec_put(sec);
out:
        return 0;
}
Exemplo n.º 22
0
/**
 * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages()
 */
void libcfs_debug_dumplog_internal(void *arg)
{
	static time_t last_dump_time;
	time_t current_time;
	DECL_JOURNAL_DATA;

	PUSH_JOURNAL;

	current_time = cfs_time_current_sec();

	if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0 &&
	    current_time > last_dump_time) {
		last_dump_time = current_time;
		snprintf(debug_file_name, sizeof(debug_file_name) - 1,
			 "%s.%ld." LPLD, libcfs_debug_file_path_arr,
			 current_time, (long_ptr_t)arg);
		printk(KERN_ALERT "LustreError: dumping log to %s\n",
		       debug_file_name);
		cfs_tracefile_dump_all_pages(debug_file_name);
		libcfs_run_debug_log_upcall(debug_file_name);
	}
	POP_JOURNAL;
}
Exemplo n.º 23
0
int mdt_reint_setxattr(struct mdt_thread_info *info,
                       struct mdt_lock_handle *unused)
{
        struct ptlrpc_request   *req = mdt_info_req(info);
        struct md_ucred         *uc  = mdt_ucred(info);
        struct mdt_lock_handle  *lh;
        const struct lu_env     *env  = info->mti_env;
        struct lu_buf           *buf  = &info->mti_buf;
        struct mdt_reint_record *rr   = &info->mti_rr;
        struct md_attr          *ma = &info->mti_attr;
        struct lu_attr          *attr = &info->mti_attr.ma_attr;
        struct mdt_object       *obj;
        struct md_object        *child;
        __u64                    valid = attr->la_valid;
        const char              *xattr_name = rr->rr_name;
        int                      xattr_len = rr->rr_eadatalen;
        __u64                    lockpart;
        int                      rc;
        posix_acl_xattr_header  *new_xattr = NULL;
        __u32                    remote = exp_connect_rmtclient(info->mti_exp);
        __u32                    perm;
        ENTRY;

        CDEBUG(D_INODE, "setxattr for "DFID"\n", PFID(rr->rr_fid1));

        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SETXATTR))
                RETURN(err_serious(-ENOMEM));

        CDEBUG(D_INODE, "%s xattr %s\n",
               valid & OBD_MD_FLXATTR ? "set" : "remove", xattr_name);

        rc = mdt_init_ucred_reint(info);
        if (rc != 0)
                RETURN(rc);

        if (valid & OBD_MD_FLRMTRSETFACL) {
                if (unlikely(!remote))
                        GOTO(out, rc = err_serious(-EINVAL));

                perm = mdt_identity_get_perm(uc->mu_identity, remote,
                                             req->rq_peer.nid);
                if (!(perm & CFS_RMTACL_PERM))
                        GOTO(out, rc = err_serious(-EPERM));
        }

        if (strncmp(xattr_name, XATTR_USER_PREFIX,
                    sizeof(XATTR_USER_PREFIX) - 1) == 0) {
                if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_XATTR))
                        GOTO(out, rc = -EOPNOTSUPP);
                if (strcmp(xattr_name, XATTR_NAME_LOV) == 0)
                        GOTO(out, rc = -EACCES);
                if (strcmp(xattr_name, XATTR_NAME_LMA) == 0)
                        GOTO(out, rc = 0);
                if (strcmp(xattr_name, XATTR_NAME_LINK) == 0)
                        GOTO(out, rc = 0);
        } else if ((valid & OBD_MD_FLXATTR) &&
                   (strncmp(xattr_name, XATTR_NAME_ACL_ACCESS,
                            sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0 ||
                    strncmp(xattr_name, XATTR_NAME_ACL_DEFAULT,
                            sizeof(XATTR_NAME_ACL_DEFAULT) - 1) == 0)) {
                /* currently lustre limit acl access size */
                if (xattr_len > LUSTRE_POSIX_ACL_MAX_SIZE)
                        GOTO(out, -ERANGE);
        }

        lockpart = MDS_INODELOCK_UPDATE;
        /* Revoke all clients' lookup lock, since the access
         * permissions for this inode is changed when ACL_ACCESS is
         * set. This isn't needed for ACL_DEFAULT, since that does
         * not change the access permissions of this inode, nor any
         * other existing inodes. It is setting the ACLs inherited
         * by new directories/files at create time. */
        if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS))
                lockpart |= MDS_INODELOCK_LOOKUP;

        lh = &info->mti_lh[MDT_LH_PARENT];
        /* ACLs were sent to clients under LCK_CR locks, so taking LCK_EX
         * to cancel them. */
        mdt_lock_reg_init(lh, LCK_EX);
        obj = mdt_object_find_lock(info, rr->rr_fid1, lh, lockpart);
        if (IS_ERR(obj))
                GOTO(out, rc =  PTR_ERR(obj));

        info->mti_mos = obj;
        rc = mdt_version_get_check_save(info, obj, 0);
        if (rc)
                GOTO(out_unlock, rc);

        if (unlikely(!(valid & OBD_MD_FLCTIME))) {
                /* This isn't strictly an error, but all current clients
                 * should set OBD_MD_FLCTIME when setting attributes. */
                CWARN("%s: client miss to set OBD_MD_FLCTIME when "
                      "setxattr %s: [object "DFID"] [valid "LPU64"]\n",
                      info->mti_exp->exp_obd->obd_name, xattr_name,
                      PFID(rr->rr_fid1), valid);
                attr->la_ctime = cfs_time_current_sec();
        }
        attr->la_valid = LA_CTIME;
        child = mdt_object_child(obj);
        if (valid & OBD_MD_FLXATTR) {
                char *xattr = (void *)rr->rr_eadata;

                if (xattr_len > 0) {
                        int flags = 0;

                        if (valid & OBD_MD_FLRMTLSETFACL) {
                                if (unlikely(!remote))
                                        GOTO(out_unlock, rc = -EINVAL);

                                xattr_len = mdt_rmtlsetfacl(info, child,
                                                xattr_name,
                                                (ext_acl_xattr_header *)xattr,
                                                &new_xattr);
                                if (xattr_len < 0)
                                        GOTO(out_unlock, rc = xattr_len);

                                xattr = (char *)new_xattr;
                        }

                        if (attr->la_flags & XATTR_REPLACE)
                                flags |= LU_XATTR_REPLACE;

                        if (attr->la_flags & XATTR_CREATE)
                                flags |= LU_XATTR_CREATE;

                        mdt_fail_write(env, info->mti_mdt->mdt_bottom,
                                       OBD_FAIL_MDS_SETXATTR_WRITE);

                        buf->lb_buf = xattr;
                        buf->lb_len = xattr_len;
                        rc = mo_xattr_set(env, child, buf, xattr_name, flags);
                        /* update ctime after xattr changed */
                        if (rc == 0) {
                                ma->ma_attr_flags |= MDS_PERM_BYPASS;
                                mo_attr_set(env, child, ma);
                        }
                }
        } else if (valid & OBD_MD_FLXATTRRM) {
                rc = mo_xattr_del(env, child, xattr_name);
                /* update ctime after xattr changed */
                if (rc == 0) {
                        ma->ma_attr_flags |= MDS_PERM_BYPASS;
                        mo_attr_set(env, child, ma);
                }
        } else {
                CDEBUG(D_INFO, "valid bits: "LPX64"\n", valid);
                rc = -EINVAL;
        }
        if (rc == 0)
                mdt_counter_incr(req->rq_export, LPROC_MDT_SETXATTR);

        EXIT;
out_unlock:
        mdt_object_unlock_put(info, obj, lh, rc);
        if (unlikely(new_xattr != NULL))
                lustre_posix_acl_xattr_free(new_xattr, xattr_len);
out:
        mdt_exit_ucred(info);
        return rc;
}
Exemplo n.º 24
0
/*
 * Client's incoming reply callback
 */
void reply_in_callback(lnet_event_t *ev)
{
	struct ptlrpc_cb_id   *cbid = ev->md.user_ptr;
	struct ptlrpc_request *req = cbid->cbid_arg;

	DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);

	LASSERT(ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK);
	LASSERT(ev->md.start == req->rq_repbuf);
	LASSERT(ev->offset + ev->mlength <= req->rq_repbuf_len);
	/* We've set LNET_MD_MANAGE_REMOTE for all outgoing requests
	   for adaptive timeouts' early reply. */
	LASSERT((ev->md.options & LNET_MD_MANAGE_REMOTE) != 0);

	spin_lock(&req->rq_lock);

	req->rq_receiving_reply = 0;
	req->rq_early = 0;
	if (ev->unlinked)
		req->rq_must_unlink = 0;

	if (ev->status)
		goto out_wake;

	if (ev->type == LNET_EVENT_UNLINK) {
		LASSERT(ev->unlinked);
		DEBUG_REQ(D_NET, req, "unlink");
		goto out_wake;
	}

	if (ev->mlength < ev->rlength) {
		CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req,
		       req->rq_replen, ev->rlength, ev->offset);
		req->rq_reply_truncate = 1;
		req->rq_replied = 1;
		req->rq_status = -EOVERFLOW;
		req->rq_nob_received = ev->rlength + ev->offset;
		goto out_wake;
	}

	if ((ev->offset == 0) &&
	    ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT))) {
		/* Early reply */
		DEBUG_REQ(D_ADAPTTO, req,
			  "Early reply received: mlen=%u offset=%d replen=%d "
			  "replied=%d unlinked=%d", ev->mlength, ev->offset,
			  req->rq_replen, req->rq_replied, ev->unlinked);

		req->rq_early_count++; /* number received, client side */

		if (req->rq_replied)   /* already got the real reply */
			goto out_wake;

		req->rq_early = 1;
		req->rq_reply_off = ev->offset;
		req->rq_nob_received = ev->mlength;
		/* And we're still receiving */
		req->rq_receiving_reply = 1;
	} else {
		/* Real reply */
		req->rq_rep_swab_mask = 0;
		req->rq_replied = 1;
		req->rq_reply_off = ev->offset;
		req->rq_nob_received = ev->mlength;
		/* LNetMDUnlink can't be called under the LNET_LOCK,
		   so we must unlink in ptlrpc_unregister_reply */
		DEBUG_REQ(D_INFO, req,
			  "reply in flags=%x mlen=%u offset=%d replen=%d",
			  lustre_msg_get_flags(req->rq_reqmsg),
			  ev->mlength, ev->offset, req->rq_replen);
	}

	req->rq_import->imp_last_reply_time = cfs_time_current_sec();

out_wake:
	/* NB don't unlock till after wakeup; req can disappear under us
	 * since we don't have our own ref */
	ptlrpc_client_wake_req(req);
	spin_unlock(&req->rq_lock);
}
Exemplo n.º 25
0
int
lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr)
{
	char   *buf;
	struct libcfs_ioctl_data *data;
	int     opc;
	int     rc;

	if (cmd != IOC_LIBCFS_LNETST)
		return -EINVAL;

	data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr);

	opc = data->ioc_u32[0];

	if (data->ioc_plen1 > PAGE_SIZE)
		return -EINVAL;

	LIBCFS_ALLOC(buf, data->ioc_plen1);
	if (buf == NULL)
		return -ENOMEM;

	/* copy in parameter */
	if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) {
		LIBCFS_FREE(buf, data->ioc_plen1);
		return -EFAULT;
	}

	mutex_lock(&console_session.ses_mutex);

	console_session.ses_laststamp = cfs_time_current_sec();

	if (console_session.ses_shutdown) {
		rc = -ESHUTDOWN;
		goto out;
	}

	if (console_session.ses_expired)
		lstcon_session_end();

	if (opc != LSTIO_SESSION_NEW &&
	    console_session.ses_state == LST_SESSION_NONE) {
		CDEBUG(D_NET, "LST no active session\n");
		rc = -ESRCH;
		goto out;
	}

	memset(&console_session.ses_trans_stat, 0, sizeof(struct lstcon_trans_stat));

	switch (opc) {
	case LSTIO_SESSION_NEW:
		rc = lst_session_new_ioctl((struct lstio_session_new_args *)buf);
		break;
	case LSTIO_SESSION_END:
		rc = lst_session_end_ioctl((struct lstio_session_end_args *)buf);
		break;
	case LSTIO_SESSION_INFO:
		rc = lst_session_info_ioctl((struct lstio_session_info_args *)buf);
		break;
	case LSTIO_DEBUG:
		rc = lst_debug_ioctl((struct lstio_debug_args *)buf);
		break;
	case LSTIO_GROUP_ADD:
		rc = lst_group_add_ioctl((struct lstio_group_add_args *)buf);
		break;
	case LSTIO_GROUP_DEL:
		rc = lst_group_del_ioctl((struct lstio_group_del_args *)buf);
		break;
	case LSTIO_GROUP_UPDATE:
		rc = lst_group_update_ioctl((struct lstio_group_update_args *)buf);
		break;
	case LSTIO_NODES_ADD:
		rc = lst_nodes_add_ioctl((struct lstio_group_nodes_args *)buf);
		break;
	case LSTIO_GROUP_LIST:
		rc = lst_group_list_ioctl((struct lstio_group_list_args *)buf);
		break;
	case LSTIO_GROUP_INFO:
		rc = lst_group_info_ioctl((struct lstio_group_info_args *)buf);
		break;
	case LSTIO_BATCH_ADD:
		rc = lst_batch_add_ioctl((struct lstio_batch_add_args *)buf);
		break;
	case LSTIO_BATCH_START:
		rc = lst_batch_run_ioctl((struct lstio_batch_run_args *)buf);
		break;
	case LSTIO_BATCH_STOP:
		rc = lst_batch_stop_ioctl((struct lstio_batch_stop_args *)buf);
		break;
	case LSTIO_BATCH_QUERY:
		rc = lst_batch_query_ioctl((struct lstio_batch_query_args *)buf);
		break;
	case LSTIO_BATCH_LIST:
		rc = lst_batch_list_ioctl((struct lstio_batch_list_args *)buf);
		break;
	case LSTIO_BATCH_INFO:
		rc = lst_batch_info_ioctl((struct lstio_batch_info_args *)buf);
		break;
	case LSTIO_TEST_ADD:
		rc = lst_test_add_ioctl((struct lstio_test_args *)buf);
		break;
	case LSTIO_STAT_QUERY:
		rc = lst_stat_query_ioctl((struct lstio_stat_args *)buf);
		break;
	default:
		rc = -EINVAL;
	}

	if (copy_to_user(data->ioc_pbuf2, &console_session.ses_trans_stat,
			 sizeof(struct lstcon_trans_stat)))
		rc = -EFAULT;
out:
	mutex_unlock(&console_session.ses_mutex);

	LIBCFS_FREE(buf, data->ioc_plen1);

	return rc;
}
Exemplo n.º 26
0
int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
{
	struct obd_device *obd = m->private;

	LASSERT(obd != NULL);

	seq_printf(m, "status: ");
	if (obd->obd_max_recoverable_clients == 0) {
		seq_printf(m, "INACTIVE\n");
		goto out;
	}

	/* sampled unlocked, but really... */
	if (obd->obd_recovering == 0) {
		seq_printf(m, "COMPLETE\n");
		seq_printf(m, "recovery_start: %lu\n", obd->obd_recovery_start);
		seq_printf(m, "recovery_duration: %lu\n",
			   obd->obd_recovery_end - obd->obd_recovery_start);
		/* Number of clients that have completed recovery */
		seq_printf(m, "completed_clients: %d/%d\n",
			   obd->obd_max_recoverable_clients -
			   obd->obd_stale_clients,
			   obd->obd_max_recoverable_clients);
		seq_printf(m, "replayed_requests: %d\n",
			   obd->obd_replayed_requests);
		seq_printf(m, "last_transno: "LPD64"\n",
			   obd->obd_next_recovery_transno - 1);
		seq_printf(m, "VBR: %s\n", obd->obd_version_recov ?
			   "ENABLED" : "DISABLED");
		seq_printf(m, "IR: %s\n", obd->obd_no_ir ?
			   "DISABLED" : "ENABLED");
		goto out;
	}

	seq_printf(m, "RECOVERING\n");
	seq_printf(m, "recovery_start: %lu\n", obd->obd_recovery_start);
	seq_printf(m, "time_remaining: %lu\n",
		   cfs_time_current_sec() >=
		   obd->obd_recovery_start +
		   obd->obd_recovery_timeout ? 0 :
		   obd->obd_recovery_start +
		   obd->obd_recovery_timeout -
		   cfs_time_current_sec());
	seq_printf(m, "connected_clients: %d/%d\n",
		   atomic_read(&obd->obd_connected_clients),
		   obd->obd_max_recoverable_clients);
	/* Number of clients that have completed recovery */
	seq_printf(m, "req_replay_clients: %d\n",
		   atomic_read(&obd->obd_req_replay_clients));
	seq_printf(m, "lock_repay_clients: %d\n",
		   atomic_read(&obd->obd_lock_replay_clients));
	seq_printf(m, "completed_clients: %d\n",
		   atomic_read(&obd->obd_connected_clients) -
		   atomic_read(&obd->obd_lock_replay_clients));
	seq_printf(m, "evicted_clients: %d\n", obd->obd_stale_clients);
	seq_printf(m, "replayed_requests: %d\n", obd->obd_replayed_requests);
	seq_printf(m, "queued_requests: %d\n",
		   obd->obd_requests_queued_for_recovery);
	seq_printf(m, "next_transno: "LPD64"\n",
		   obd->obd_next_recovery_transno);
out:
	return 0;
}
Exemplo n.º 27
0
/**
 * Send request \a request.
 * if \a noreply is set, don't expect any reply back and don't set up
 * reply buffers.
 * Returns 0 on success or error code.
 */
int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
{
        int rc;
        int rc2;
        int mpflag = 0;
        struct ptlrpc_connection *connection;
        lnet_handle_me_t  reply_me_h;
        lnet_md_t         reply_md;
        struct obd_device *obd = request->rq_import->imp_obd;
        ENTRY;

        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
                RETURN(0);

        LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST);
        LASSERT(request->rq_wait_ctx == 0);

        /* If this is a re-transmit, we're required to have disengaged
         * cleanly from the previous attempt */
        LASSERT(!request->rq_receiving_reply);

        if (request->rq_import->imp_obd &&
            request->rq_import->imp_obd->obd_fail) {
                CDEBUG(D_HA, "muting rpc for failed imp obd %s\n",
                       request->rq_import->imp_obd->obd_name);
                /* this prevents us from waiting in ptlrpc_queue_wait */
		spin_lock(&request->rq_lock);
		request->rq_err = 1;
		spin_unlock(&request->rq_lock);
                request->rq_status = -ENODEV;
                RETURN(-ENODEV);
        }

        connection = request->rq_import->imp_connection;

        lustre_msg_set_handle(request->rq_reqmsg,
                              &request->rq_import->imp_remote_handle);
        lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST);
        lustre_msg_set_conn_cnt(request->rq_reqmsg,
                                request->rq_import->imp_conn_cnt);
        lustre_msghdr_set_flags(request->rq_reqmsg,
                                request->rq_import->imp_msghdr_flags);

        if (request->rq_resend)
                lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);

        if (request->rq_memalloc)
                mpflag = cfs_memory_pressure_get_and_set();

        rc = sptlrpc_cli_wrap_request(request);
        if (rc)
                GOTO(out, rc);

        /* bulk register should be done after wrap_request() */
        if (request->rq_bulk != NULL) {
                rc = ptlrpc_register_bulk (request);
                if (rc != 0)
                        GOTO(out, rc);
        }

        if (!noreply) {
                LASSERT (request->rq_replen != 0);
                if (request->rq_repbuf == NULL) {
                        LASSERT(request->rq_repdata == NULL);
                        LASSERT(request->rq_repmsg == NULL);
                        rc = sptlrpc_cli_alloc_repbuf(request,
                                                      request->rq_replen);
                        if (rc) {
                                /* this prevents us from looping in
                                 * ptlrpc_queue_wait */
				spin_lock(&request->rq_lock);
				request->rq_err = 1;
				spin_unlock(&request->rq_lock);
                                request->rq_status = rc;
                                GOTO(cleanup_bulk, rc);
                        }
                } else {
                        request->rq_repdata = NULL;
                        request->rq_repmsg = NULL;
                }

                rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/
                                  connection->c_peer, request->rq_xid, 0,
                                  LNET_UNLINK, LNET_INS_AFTER, &reply_me_h);
                if (rc != 0) {
                        CERROR("LNetMEAttach failed: %d\n", rc);
                        LASSERT (rc == -ENOMEM);
                        GOTO(cleanup_bulk, rc = -ENOMEM);
                }
        }

	spin_lock(&request->rq_lock);
        /* If the MD attach succeeds, there _will_ be a reply_in callback */
        request->rq_receiving_reply = !noreply;
        /* We are responsible for unlinking the reply buffer */
        request->rq_must_unlink = !noreply;
        /* Clear any flags that may be present from previous sends. */
        request->rq_replied = 0;
        request->rq_err = 0;
        request->rq_timedout = 0;
        request->rq_net_err = 0;
        request->rq_resend = 0;
        request->rq_restart = 0;
        request->rq_reply_truncate = 0;
	spin_unlock(&request->rq_lock);

        if (!noreply) {
                reply_md.start     = request->rq_repbuf;
                reply_md.length    = request->rq_repbuf_len;
                /* Allow multiple early replies */
                reply_md.threshold = LNET_MD_THRESH_INF;
                /* Manage remote for early replies */
                reply_md.options   = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT |
                        LNET_MD_MANAGE_REMOTE |
                        LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */;
                reply_md.user_ptr  = &request->rq_reply_cbid;
                reply_md.eq_handle = ptlrpc_eq_h;

                /* We must see the unlink callback to unset rq_must_unlink,
                   so we can't auto-unlink */
                rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN,
                                  &request->rq_reply_md_h);
                if (rc != 0) {
                        CERROR("LNetMDAttach failed: %d\n", rc);
                        LASSERT (rc == -ENOMEM);
			spin_lock(&request->rq_lock);
			/* ...but the MD attach didn't succeed... */
			request->rq_receiving_reply = 0;
			spin_unlock(&request->rq_lock);
                        GOTO(cleanup_me, rc = -ENOMEM);
                }

                CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64
                       ", portal %u\n",
                       request->rq_repbuf_len, request->rq_xid,
                       request->rq_reply_portal);
        }

        /* add references on request for request_out_callback */
        ptlrpc_request_addref(request);
        if (obd->obd_svc_stats != NULL)
                lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR,
			atomic_read(&request->rq_import->imp_inflight));

	OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);

	do_gettimeofday(&request->rq_arrival_time);
	request->rq_sent = cfs_time_current_sec();
	/* We give the server rq_timeout secs to process the req, and
	   add the network latency for our local timeout. */
        request->rq_deadline = request->rq_sent + request->rq_timeout +
                ptlrpc_at_get_net_latency(request);

        ptlrpc_pinger_sending_on_import(request->rq_import);

        DEBUG_REQ(D_INFO, request, "send flg=%x",
                  lustre_msg_get_flags(request->rq_reqmsg));
        rc = ptl_send_buf(&request->rq_req_md_h,
                          request->rq_reqbuf, request->rq_reqdata_len,
                          LNET_NOACK_REQ, &request->rq_req_cbid,
                          connection,
                          request->rq_request_portal,
                          request->rq_xid, 0);
        if (rc == 0)
                GOTO(out, rc);

        ptlrpc_req_finished(request);
        if (noreply)
                GOTO(out, rc);

 cleanup_me:
        /* MEUnlink is safe; the PUT didn't even get off the ground, and
         * nobody apart from the PUT's target has the right nid+XID to
         * access the reply buffer. */
        rc2 = LNetMEUnlink(reply_me_h);
        LASSERT (rc2 == 0);
        /* UNLINKED callback called synchronously */
        LASSERT(!request->rq_receiving_reply);

 cleanup_bulk:
        /* We do sync unlink here as there was no real transfer here so
         * the chance to have long unlink to sluggish net is smaller here. */
        ptlrpc_unregister_bulk(request, 0);
 out:
        if (request->rq_memalloc)
                cfs_memory_pressure_restore(mpflag);
        return rc;
}
Exemplo n.º 28
0
/*
 * we allocate the requested pages atomically.
 */
int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc)
{
        cfs_waitlink_t  waitlink;
        unsigned long   this_idle = -1;
        cfs_time_t      tick = 0;
        long            now;
        int             p_idx, g_idx;
        int             i;

        LASSERT(desc->bd_iov_count > 0);
        LASSERT(desc->bd_iov_count <= page_pools.epp_max_pages);

        /* resent bulk, enc iov might have been allocated previously */
        if (desc->bd_enc_iov != NULL)
                return 0;

        OBD_ALLOC(desc->bd_enc_iov,
                  desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
        if (desc->bd_enc_iov == NULL)
                return -ENOMEM;

        cfs_spin_lock(&page_pools.epp_lock);

        page_pools.epp_st_access++;
again:
        if (unlikely(page_pools.epp_free_pages < desc->bd_iov_count)) {
                if (tick == 0)
                        tick = cfs_time_current();

                now = cfs_time_current_sec();

                page_pools.epp_st_missings++;
                page_pools.epp_pages_short += desc->bd_iov_count;

                if (enc_pools_should_grow(desc->bd_iov_count, now)) {
                        page_pools.epp_growing = 1;

                        cfs_spin_unlock(&page_pools.epp_lock);
                        enc_pools_add_pages(page_pools.epp_pages_short / 2);
                        cfs_spin_lock(&page_pools.epp_lock);

                        page_pools.epp_growing = 0;

                        enc_pools_wakeup();
                } else {
                        if (++page_pools.epp_waitqlen >
                            page_pools.epp_st_max_wqlen)
                                page_pools.epp_st_max_wqlen =
                                                page_pools.epp_waitqlen;

                        cfs_set_current_state(CFS_TASK_UNINT);
                        cfs_waitlink_init(&waitlink);
                        cfs_waitq_add(&page_pools.epp_waitq, &waitlink);

                        cfs_spin_unlock(&page_pools.epp_lock);
                        cfs_waitq_wait(&waitlink, CFS_TASK_UNINT);
                        cfs_waitq_del(&page_pools.epp_waitq, &waitlink);
                        LASSERT(page_pools.epp_waitqlen > 0);
                        cfs_spin_lock(&page_pools.epp_lock);
                        page_pools.epp_waitqlen--;
                }

                LASSERT(page_pools.epp_pages_short >= desc->bd_iov_count);
                page_pools.epp_pages_short -= desc->bd_iov_count;

                this_idle = 0;
                goto again;
        }

        /* record max wait time */
        if (unlikely(tick != 0)) {
                tick = cfs_time_current() - tick;
                if (tick > page_pools.epp_st_max_wait)
                        page_pools.epp_st_max_wait = tick;
        }

        /* proceed with rest of allocation */
        page_pools.epp_free_pages -= desc->bd_iov_count;

        p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
        g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;

        for (i = 0; i < desc->bd_iov_count; i++) {
                LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
                desc->bd_enc_iov[i].kiov_page =
                                        page_pools.epp_pools[p_idx][g_idx];
                page_pools.epp_pools[p_idx][g_idx] = NULL;

                if (++g_idx == PAGES_PER_POOL) {
                        p_idx++;
                        g_idx = 0;
                }
        }

        if (page_pools.epp_free_pages < page_pools.epp_st_lowfree)
                page_pools.epp_st_lowfree = page_pools.epp_free_pages;

        /*
         * new idle index = (old * weight + new) / (weight + 1)
         */
        if (this_idle == -1) {
                this_idle = page_pools.epp_free_pages * IDLE_IDX_MAX /
                            page_pools.epp_total_pages;
        }
        page_pools.epp_idle_idx = (page_pools.epp_idle_idx * IDLE_IDX_WEIGHT +
                                   this_idle) /
                                  (IDLE_IDX_WEIGHT + 1);

        page_pools.epp_last_access = cfs_time_current_sec();

        cfs_spin_unlock(&page_pools.epp_lock);
        return 0;
}
Exemplo n.º 29
0
/**
 * Send request reply from request \a req reply buffer.
 * \a flags defines reply types
 * Returns 0 on success or error code
 */
int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
{
        struct ptlrpc_reply_state *rs = req->rq_reply_state;
        struct ptlrpc_connection  *conn;
        int                        rc;

        /* We must already have a reply buffer (only ptlrpc_error() may be
         * called without one). The reply generated by sptlrpc layer (e.g.
         * error notify, etc.) might have NULL rq->reqmsg; Otherwise we must
         * have a request buffer which is either the actual (swabbed) incoming
         * request, or a saved copy if this is a req saved in
         * target_queue_final_reply().
         */
        LASSERT (req->rq_no_reply == 0);
        LASSERT (req->rq_reqbuf != NULL);
        LASSERT (rs != NULL);
        LASSERT ((flags & PTLRPC_REPLY_MAYBE_DIFFICULT) || !rs->rs_difficult);
        LASSERT (req->rq_repmsg != NULL);
        LASSERT (req->rq_repmsg == rs->rs_msg);
        LASSERT (rs->rs_cb_id.cbid_fn == reply_out_callback);
        LASSERT (rs->rs_cb_id.cbid_arg == rs);

        /* There may be no rq_export during failover */

        if (unlikely(req->rq_export && req->rq_export->exp_obd &&
                     req->rq_export->exp_obd->obd_fail)) {
                /* Failed obd's only send ENODEV */
                req->rq_type = PTL_RPC_MSG_ERR;
                req->rq_status = -ENODEV;
                CDEBUG(D_HA, "sending ENODEV from failed obd %d\n",
                       req->rq_export->exp_obd->obd_minor);
        }

	/* In order to keep interoprability with the client (< 2.3) which
	 * doesn't have pb_jobid in ptlrpc_body, We have to shrink the
	 * ptlrpc_body in reply buffer to ptlrpc_body_v2, otherwise, the
	 * reply buffer on client will be overflow.
	 *
	 * XXX Remove this whenver we drop the interoprability with such client.
	 */
	req->rq_replen = lustre_shrink_msg(req->rq_repmsg, 0,
					   sizeof(struct ptlrpc_body_v2), 1);

        if (req->rq_type != PTL_RPC_MSG_ERR)
                req->rq_type = PTL_RPC_MSG_REPLY;

        lustre_msg_set_type(req->rq_repmsg, req->rq_type);
	lustre_msg_set_status(req->rq_repmsg,
			      ptlrpc_status_hton(req->rq_status));
        lustre_msg_set_opc(req->rq_repmsg,
                req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0);

        target_pack_pool_reply(req);

        ptlrpc_at_set_reply(req, flags);

        if (req->rq_export == NULL || req->rq_export->exp_connection == NULL)
                conn = ptlrpc_connection_get(req->rq_peer, req->rq_self, NULL);
        else
                conn = ptlrpc_connection_addref(req->rq_export->exp_connection);

        if (unlikely(conn == NULL)) {
                CERROR("not replying on NULL connection\n"); /* bug 9635 */
                return -ENOTCONN;
        }
        ptlrpc_rs_addref(rs);                   /* +1 ref for the network */

        rc = sptlrpc_svc_wrap_reply(req);
        if (unlikely(rc))
                goto out;

        req->rq_sent = cfs_time_current_sec();

        rc = ptl_send_buf (&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len,
                           (rs->rs_difficult && !rs->rs_no_ack) ?
                           LNET_ACK_REQ : LNET_NOACK_REQ,
			   &rs->rs_cb_id, conn,
			   ptlrpc_req2svc(req)->srv_rep_portal,
                           req->rq_xid, req->rq_reply_off);
out:
        if (unlikely(rc != 0))
                ptlrpc_req_drop_rs(req);
        ptlrpc_connection_put(conn);
        return rc;
}
Exemplo n.º 30
0
/**
 * Send request \a request.
 * if \a noreply is set, don't expect any reply back and don't set up
 * reply buffers.
 * Returns 0 on success or error code.
 */
int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
{
        int rc;
        int rc2;
        int mpflag = 0;
        struct ptlrpc_connection *connection;
        lnet_handle_me_t  reply_me_h;
        lnet_md_t         reply_md;
	struct obd_import *imp = request->rq_import;
	struct obd_device *obd = imp->imp_obd;
        ENTRY;

        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
                RETURN(0);

        LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST);
        LASSERT(request->rq_wait_ctx == 0);

        /* If this is a re-transmit, we're required to have disengaged
         * cleanly from the previous attempt */
        LASSERT(!request->rq_receiving_reply);
	LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) &&
		(imp->imp_state == LUSTRE_IMP_FULL)));

	if (unlikely(obd != NULL && obd->obd_fail)) {
		CDEBUG(D_HA, "muting rpc for failed imp obd %s\n",
			obd->obd_name);
		/* this prevents us from waiting in ptlrpc_queue_wait */
		spin_lock(&request->rq_lock);
		request->rq_err = 1;
		spin_unlock(&request->rq_lock);
                request->rq_status = -ENODEV;
                RETURN(-ENODEV);
        }

	connection = imp->imp_connection;

	lustre_msg_set_handle(request->rq_reqmsg,
			      &imp->imp_remote_handle);
	lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST);
	lustre_msg_set_conn_cnt(request->rq_reqmsg,
				imp->imp_conn_cnt);
	lustre_msghdr_set_flags(request->rq_reqmsg,
				imp->imp_msghdr_flags);

	/* If it's the first time to resend the request for EINPROGRESS,
	 * we need to allocate a new XID (see after_reply()), it's different
	 * from the resend for reply timeout. */
	if (request->rq_nr_resend != 0 &&
	    list_empty(&request->rq_unreplied_list)) {
		__u64 min_xid = 0;
		/* resend for EINPROGRESS, allocate new xid to avoid reply
		 * reconstruction */
		spin_lock(&imp->imp_lock);
		ptlrpc_assign_next_xid_nolock(request);
		request->rq_mbits = request->rq_xid;
		min_xid = ptlrpc_known_replied_xid(imp);
		spin_unlock(&imp->imp_lock);

		lustre_msg_set_last_xid(request->rq_reqmsg, min_xid);
		DEBUG_REQ(D_RPCTRACE, request, "Allocating new xid for "
			  "resend on EINPROGRESS");
	} else if (request->rq_bulk != NULL) {
		ptlrpc_set_bulk_mbits(request);
		lustre_msg_set_mbits(request->rq_reqmsg, request->rq_mbits);
	}

	if (list_empty(&request->rq_unreplied_list) ||
	    request->rq_xid <= imp->imp_known_replied_xid) {
		DEBUG_REQ(D_ERROR, request, "xid: "LPU64", replied: "LPU64", "
			  "list_empty:%d\n", request->rq_xid,
			  imp->imp_known_replied_xid,
			  list_empty(&request->rq_unreplied_list));
		LBUG();
	}

	/** For enabled AT all request should have AT_SUPPORT in the
	 * FULL import state when OBD_CONNECT_AT is set */
	LASSERT(AT_OFF || imp->imp_state != LUSTRE_IMP_FULL ||
		(imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT) ||
		!(imp->imp_connect_data.ocd_connect_flags &
		OBD_CONNECT_AT));

	if (request->rq_resend) {
		lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
		if (request->rq_resend_cb != NULL)
			request->rq_resend_cb(request, &request->rq_async_args);
	}
        if (request->rq_memalloc)
                mpflag = cfs_memory_pressure_get_and_set();

	rc = sptlrpc_cli_wrap_request(request);
	if (rc == -ENOMEM)
		/* set rq_sent so that this request is treated
		 * as a delayed send in the upper layers */
		request->rq_sent = cfs_time_current_sec();
	if (rc)
		GOTO(out, rc);

        /* bulk register should be done after wrap_request() */
        if (request->rq_bulk != NULL) {
                rc = ptlrpc_register_bulk (request);
                if (rc != 0)
                        GOTO(out, rc);
        }

        if (!noreply) {
                LASSERT (request->rq_replen != 0);
                if (request->rq_repbuf == NULL) {
                        LASSERT(request->rq_repdata == NULL);
                        LASSERT(request->rq_repmsg == NULL);
                        rc = sptlrpc_cli_alloc_repbuf(request,
                                                      request->rq_replen);
                        if (rc) {
                                /* this prevents us from looping in
                                 * ptlrpc_queue_wait */
				spin_lock(&request->rq_lock);
				request->rq_err = 1;
				spin_unlock(&request->rq_lock);
                                request->rq_status = rc;
                                GOTO(cleanup_bulk, rc);
                        }
                } else {
                        request->rq_repdata = NULL;
                        request->rq_repmsg = NULL;
                }

                rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/
                                  connection->c_peer, request->rq_xid, 0,
                                  LNET_UNLINK, LNET_INS_AFTER, &reply_me_h);
                if (rc != 0) {
                        CERROR("LNetMEAttach failed: %d\n", rc);
                        LASSERT (rc == -ENOMEM);
                        GOTO(cleanup_bulk, rc = -ENOMEM);
                }
        }

	spin_lock(&request->rq_lock);
	/* We are responsible for unlinking the reply buffer */
	request->rq_reply_unlinked = noreply;
	request->rq_receiving_reply = !noreply;
	/* Clear any flags that may be present from previous sends. */
	request->rq_req_unlinked = 0;
        request->rq_replied = 0;
        request->rq_err = 0;
        request->rq_timedout = 0;
        request->rq_net_err = 0;
        request->rq_resend = 0;
        request->rq_restart = 0;
	request->rq_reply_truncated = 0;
	spin_unlock(&request->rq_lock);

        if (!noreply) {
                reply_md.start     = request->rq_repbuf;
                reply_md.length    = request->rq_repbuf_len;
                /* Allow multiple early replies */
                reply_md.threshold = LNET_MD_THRESH_INF;
                /* Manage remote for early replies */
                reply_md.options   = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT |
                        LNET_MD_MANAGE_REMOTE |
                        LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */;
                reply_md.user_ptr  = &request->rq_reply_cbid;
                reply_md.eq_handle = ptlrpc_eq_h;

		/* We must see the unlink callback to set rq_reply_unlinked,
		 * so we can't auto-unlink */
                rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN,
                                  &request->rq_reply_md_h);
                if (rc != 0) {
                        CERROR("LNetMDAttach failed: %d\n", rc);
                        LASSERT (rc == -ENOMEM);
			spin_lock(&request->rq_lock);
			/* ...but the MD attach didn't succeed... */
			request->rq_receiving_reply = 0;
			spin_unlock(&request->rq_lock);
                        GOTO(cleanup_me, rc = -ENOMEM);
                }

                CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64
                       ", portal %u\n",
                       request->rq_repbuf_len, request->rq_xid,
                       request->rq_reply_portal);
        }

        /* add references on request for request_out_callback */
        ptlrpc_request_addref(request);
	if (obd != NULL && obd->obd_svc_stats != NULL)
		lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR,
			atomic_read(&imp->imp_inflight));

	OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);

	do_gettimeofday(&request->rq_sent_tv);
	request->rq_sent = cfs_time_current_sec();
	/* We give the server rq_timeout secs to process the req, and
	   add the network latency for our local timeout. */
        request->rq_deadline = request->rq_sent + request->rq_timeout +
                ptlrpc_at_get_net_latency(request);

	ptlrpc_pinger_sending_on_import(imp);

        DEBUG_REQ(D_INFO, request, "send flg=%x",
                  lustre_msg_get_flags(request->rq_reqmsg));
        rc = ptl_send_buf(&request->rq_req_md_h,
                          request->rq_reqbuf, request->rq_reqdata_len,
                          LNET_NOACK_REQ, &request->rq_req_cbid,
                          connection,
                          request->rq_request_portal,
                          request->rq_xid, 0);
	if (likely(rc == 0))
		GOTO(out, rc);

	request->rq_req_unlinked = 1;
        ptlrpc_req_finished(request);
        if (noreply)
                GOTO(out, rc);

 cleanup_me:
        /* MEUnlink is safe; the PUT didn't even get off the ground, and
         * nobody apart from the PUT's target has the right nid+XID to
         * access the reply buffer. */
        rc2 = LNetMEUnlink(reply_me_h);
        LASSERT (rc2 == 0);
        /* UNLINKED callback called synchronously */
        LASSERT(!request->rq_receiving_reply);

 cleanup_bulk:
        /* We do sync unlink here as there was no real transfer here so
         * the chance to have long unlink to sluggish net is smaller here. */
        ptlrpc_unregister_bulk(request, 0);
 out:
        if (request->rq_memalloc)
                cfs_memory_pressure_restore(mpflag);
        return rc;
}