/* * could be called frequently for query (@nr_to_scan == 0). * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. */ static int enc_pools_shrink(int nr_to_scan, unsigned int gfp_mask) { if (unlikely(nr_to_scan != 0)) { cfs_spin_lock(&page_pools.epp_lock); nr_to_scan = min(nr_to_scan, (int) page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES); if (nr_to_scan > 0) { enc_pools_release_free_pages(nr_to_scan); CDEBUG(D_SEC, "released %d pages, %ld left\n", nr_to_scan, page_pools.epp_free_pages); page_pools.epp_st_shrinks++; page_pools.epp_last_shrink = cfs_time_current_sec(); } cfs_spin_unlock(&page_pools.epp_lock); } /* * if no pool access for a long time, we consider it's fully idle. * a little race here is fine. */ if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access > CACHE_QUIESCENT_PERIOD)) { cfs_spin_lock(&page_pools.epp_lock); page_pools.epp_idle_idx = IDLE_IDX_MAX; cfs_spin_unlock(&page_pools.epp_lock); } LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); return max((int) page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; }
/* * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. */ static unsigned long enc_pools_shrink_scan(struct shrinker *s, struct shrink_control *sc) { spin_lock(&page_pools.epp_lock); sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan, page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES); if (sc->nr_to_scan > 0) { enc_pools_release_free_pages(sc->nr_to_scan); CDEBUG(D_SEC, "released %ld pages, %ld left\n", (long)sc->nr_to_scan, page_pools.epp_free_pages); page_pools.epp_st_shrinks++; page_pools.epp_last_shrink = cfs_time_current_sec(); } spin_unlock(&page_pools.epp_lock); /* * if no pool access for a long time, we consider it's fully idle. * a little race here is fine. */ if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access > CACHE_QUIESCENT_PERIOD)) { spin_lock(&page_pools.epp_lock); page_pools.epp_idle_idx = IDLE_IDX_MAX; spin_unlock(&page_pools.epp_lock); } LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); return sc->nr_to_scan; }
/* * /proc/fs/lustre/sptlrpc/encrypt_page_pools */ int sptlrpc_proc_read_enc_pool(char *page, char **start, off_t off, int count, int *eof, void *data) { int rc; cfs_spin_lock(&page_pools.epp_lock); rc = snprintf(page, count, "physical pages: %lu\n" "pages per pool: %lu\n" "max pages: %lu\n" "max pools: %u\n" "total pages: %lu\n" "total free: %lu\n" "idle index: %lu/100\n" "last shrink: %lds\n" "last access: %lds\n" "max pages reached: %lu\n" "grows: %u\n" "grows failure: %u\n" "shrinks: %u\n" "cache access: %lu\n" "cache missing: %lu\n" "low free mark: %lu\n" "max waitqueue depth: %u\n" "max wait time: "CFS_TIME_T"/%u\n" , cfs_num_physpages, PAGES_PER_POOL, page_pools.epp_max_pages, page_pools.epp_max_pools, page_pools.epp_total_pages, page_pools.epp_free_pages, page_pools.epp_idle_idx, cfs_time_current_sec() - page_pools.epp_last_shrink, cfs_time_current_sec() - page_pools.epp_last_access, page_pools.epp_st_max_pages, page_pools.epp_st_grows, page_pools.epp_st_grow_fails, page_pools.epp_st_shrinks, page_pools.epp_st_access, page_pools.epp_st_missings, page_pools.epp_st_lowfree, page_pools.epp_st_max_wqlen, page_pools.epp_st_max_wait, CFS_HZ ); cfs_spin_unlock(&page_pools.epp_lock); return rc; }
/* * /proc/fs/lustre/sptlrpc/encrypt_page_pools */ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) { int rc; spin_lock(&page_pools.epp_lock); rc = seq_printf(m, "physical pages: %lu\n" "pages per pool: %lu\n" "max pages: %lu\n" "max pools: %u\n" "total pages: %lu\n" "total free: %lu\n" "idle index: %lu/100\n" "last shrink: %lds\n" "last access: %lds\n" "max pages reached: %lu\n" "grows: %u\n" "grows failure: %u\n" "shrinks: %u\n" "cache access: %lu\n" "cache missing: %lu\n" "low free mark: %lu\n" "max waitqueue depth: %u\n" "max wait time: "CFS_TIME_T"/%lu\n" , totalram_pages, PAGES_PER_POOL, page_pools.epp_max_pages, page_pools.epp_max_pools, page_pools.epp_total_pages, page_pools.epp_free_pages, page_pools.epp_idle_idx, cfs_time_current_sec() - page_pools.epp_last_shrink, cfs_time_current_sec() - page_pools.epp_last_access, page_pools.epp_st_max_pages, page_pools.epp_st_grows, page_pools.epp_st_grow_fails, page_pools.epp_st_shrinks, page_pools.epp_st_access, page_pools.epp_st_missings, page_pools.epp_st_lowfree, page_pools.epp_st_max_wqlen, page_pools.epp_st_max_wait, msecs_to_jiffies(MSEC_PER_SEC) ); spin_unlock(&page_pools.epp_lock); return rc; }
static int lfsck_namespace_checkpoint(const struct lu_env *env, struct lfsck_component *com, bool init) { struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_namespace *ns = com->lc_file_ram; int rc; if (com->lc_new_checked == 0 && !init) return 0; down_write(&com->lc_sem); if (init) { ns->ln_pos_latest_start = lfsck->li_pos_current; } else { ns->ln_pos_last_checkpoint = lfsck->li_pos_current; ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() + HALF_SEC - lfsck->li_time_last_checkpoint); ns->ln_time_last_checkpoint = cfs_time_current_sec(); ns->ln_items_checked += com->lc_new_checked; com->lc_new_checked = 0; } rc = lfsck_namespace_store(env, com, false); up_write(&com->lc_sem); return rc; }
/* * Client's outgoing request callback */ void request_out_callback(lnet_event_t *ev) { struct ptlrpc_cb_id *cbid = ev->md.user_ptr; struct ptlrpc_request *req = cbid->cbid_arg; LASSERT(ev->type == LNET_EVENT_SEND || ev->type == LNET_EVENT_UNLINK); LASSERT(ev->unlinked); DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status); sptlrpc_request_out_callback(req); req->rq_real_sent = cfs_time_current_sec(); if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) { /* Failed send: make it seem like the reply timed out, just * like failing sends in client.c does currently... */ spin_lock(&req->rq_lock); req->rq_net_err = 1; spin_unlock(&req->rq_lock); ptlrpc_client_wake_req(req); } ptlrpc_req_finished(req); }
static int llog_read_header(const struct lu_env *env, struct llog_handle *handle, struct obd_uuid *uuid) { struct llog_operations *lop; int rc; rc = llog_handle2ops(handle, &lop); if (rc) return rc; if (lop->lop_read_header == NULL) return -EOPNOTSUPP; rc = lop->lop_read_header(env, handle); if (rc == LLOG_EEMPTY) { struct llog_log_hdr *llh = handle->lgh_hdr; handle->lgh_last_idx = 0; /* header is record with index 0 */ llh->llh_count = 1; /* for the header record */ llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC; llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE; llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0; llh->llh_timestamp = cfs_time_current_sec(); if (uuid) memcpy(&llh->llh_tgtuuid, uuid, sizeof(llh->llh_tgtuuid)); llh->llh_bitmap_offset = offsetof(typeof(*llh), llh_bitmap); ext2_set_bit(0, llh->llh_bitmap); rc = 0; } return rc; }
static struct ptlrpc_cli_ctx *ctx_create_kr(struct ptlrpc_sec *sec, struct vfs_cred *vcred) { struct ptlrpc_cli_ctx *ctx; struct gss_cli_ctx_keyring *gctx_kr; OBD_ALLOC_PTR(gctx_kr); if (gctx_kr == NULL) return NULL; OBD_ALLOC_PTR(gctx_kr->gck_timer); if (gctx_kr->gck_timer == NULL) { OBD_FREE_PTR(gctx_kr); return NULL; } init_timer(gctx_kr->gck_timer); ctx = &gctx_kr->gck_base.gc_base; if (gss_cli_ctx_init_common(sec, ctx, &gss_keyring_ctxops, vcred)) { OBD_FREE_PTR(gctx_kr->gck_timer); OBD_FREE_PTR(gctx_kr); return NULL; } ctx->cc_expire = cfs_time_current_sec() + KEYRING_UPCALL_TIMEOUT; clear_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags); atomic_inc(&ctx->cc_refcount); /* for the caller */ return ctx; }
int sptlrpc_enc_pool_init(void) { DEF_SHRINKER_VAR(shvar, enc_pools_shrink, enc_pools_shrink_count, enc_pools_shrink_scan); /* * maximum capacity is 1/8 of total physical memory. * is the 1/8 a good number? */ page_pools.epp_max_pages = totalram_pages / 8; page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages); init_waitqueue_head(&page_pools.epp_waitq); page_pools.epp_waitqlen = 0; page_pools.epp_pages_short = 0; page_pools.epp_growing = 0; page_pools.epp_idle_idx = 0; page_pools.epp_last_shrink = cfs_time_current_sec(); page_pools.epp_last_access = cfs_time_current_sec(); spin_lock_init(&page_pools.epp_lock); page_pools.epp_total_pages = 0; page_pools.epp_free_pages = 0; page_pools.epp_st_max_pages = 0; page_pools.epp_st_grows = 0; page_pools.epp_st_grow_fails = 0; page_pools.epp_st_shrinks = 0; page_pools.epp_st_access = 0; page_pools.epp_st_missings = 0; page_pools.epp_st_lowfree = 0; page_pools.epp_st_max_wqlen = 0; page_pools.epp_st_max_wait = 0; enc_pools_alloc(); if (page_pools.epp_pools == NULL) return -ENOMEM; pools_shrinker = set_shrinker(pools_shrinker_seeks, &shvar); if (pools_shrinker == NULL) { enc_pools_free(); return -ENOMEM; } return 0; }
int sptlrpc_enc_pool_init(void) { /* * maximum capacity is 1/8 of total physical memory. * is the 1/8 a good number? */ page_pools.epp_max_pages = cfs_num_physpages / 8; page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages); cfs_waitq_init(&page_pools.epp_waitq); page_pools.epp_waitqlen = 0; page_pools.epp_pages_short = 0; page_pools.epp_growing = 0; page_pools.epp_idle_idx = 0; page_pools.epp_last_shrink = cfs_time_current_sec(); page_pools.epp_last_access = cfs_time_current_sec(); cfs_spin_lock_init(&page_pools.epp_lock); page_pools.epp_total_pages = 0; page_pools.epp_free_pages = 0; page_pools.epp_st_max_pages = 0; page_pools.epp_st_grows = 0; page_pools.epp_st_grow_fails = 0; page_pools.epp_st_shrinks = 0; page_pools.epp_st_access = 0; page_pools.epp_st_missings = 0; page_pools.epp_st_lowfree = 0; page_pools.epp_st_max_wqlen = 0; page_pools.epp_st_max_wait = 0; enc_pools_alloc(); if (page_pools.epp_pools == NULL) return -ENOMEM; pools_shrinker = cfs_set_shrinker(pools_shrinker_seeks, enc_pools_shrink); if (pools_shrinker == NULL) { enc_pools_free(); return -ENOMEM; } return 0; }
/** * Disconnect a bulk desc from the network. Idempotent. Not * thread-safe (i.e. only interlocks with completion callback). * Returns 1 on success or 0 if network unregistration failed for whatever * reason. */ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) { struct ptlrpc_bulk_desc *desc = req->rq_bulk; struct l_wait_info lwi; int rc; ENTRY; LASSERT(!in_interrupt()); /* might sleep */ /* Let's setup deadline for reply unlink. */ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && async && req->rq_bulk_deadline == 0) req->rq_bulk_deadline = cfs_time_current_sec() + LONG_UNLINK; if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ RETURN(1); /* never registered */ LASSERT(desc->bd_req == req); /* bd_req NULL until registered */ /* the unlink ensures the callback happens ASAP and is the last * one. If it fails, it must be because completion just happened, * but we must still l_wait_event() in this case to give liblustre * a chance to run client_bulk_callback() */ mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ RETURN(1); /* never registered */ /* Move to "Unregistering" phase as bulk was not unlinked yet. */ ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING); /* Do not wait for unlink to finish. */ if (async) RETURN(0); for (;;) { #ifdef __KERNEL__ /* The wq argument is ignored by user-space wait_event macros */ wait_queue_head_t *wq = (req->rq_set != NULL) ? &req->rq_set->set_waitq : &req->rq_reply_waitq; #endif /* Network access will complete in finite time but the HUGE * timeout lets us CWARN for visibility of sluggish NALs */ lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK), cfs_time_seconds(1), NULL, NULL); rc = l_wait_event(*wq, !ptlrpc_client_bulk_active(req), &lwi); if (rc == 0) { ptlrpc_rqphase_move(req, req->rq_next_phase); RETURN(1); } LASSERT(rc == -ETIMEDOUT); DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p", desc); } RETURN(0); }
static struct obd_capa *osd_capa_get(const struct lu_env *env, struct dt_object *dt, struct lustre_capa *old, __u64 opc) { struct osd_thread_info *info = osd_oti_get(env); const struct lu_fid *fid = lu_object_fid(&dt->do_lu); struct osd_object *obj = osd_dt_obj(dt); struct osd_device *dev = osd_obj2dev(obj); struct lustre_capa_key *key = &info->oti_capa_key; struct lustre_capa *capa = &info->oti_capa; struct obd_capa *oc; int rc; ENTRY; if (!dev->od_fl_capa) RETURN(ERR_PTR(-ENOENT)); LASSERT(dt_object_exists(dt)); LASSERT(osd_invariant(obj)); /* renewal sanity check */ if (old && osd_object_auth(env, dt, old, opc)) RETURN(ERR_PTR(-EACCES)); capa->lc_fid = *fid; capa->lc_opc = opc; capa->lc_uid = 0; capa->lc_flags = dev->od_capa_alg << 24; capa->lc_timeout = dev->od_capa_timeout; capa->lc_expiry = 0; oc = capa_lookup(dev->od_capa_hash, capa, 1); if (oc) { LASSERT(!capa_is_expired(oc)); RETURN(oc); } spin_lock(&capa_lock); *key = dev->od_capa_keys[1]; spin_unlock(&capa_lock); capa->lc_keyid = key->lk_keyid; capa->lc_expiry = cfs_time_current_sec() + dev->od_capa_timeout; rc = capa_hmac(capa->lc_hmac, capa, key->lk_key); if (rc) { DEBUG_CAPA(D_ERROR, capa, "HMAC failed: %d for", rc); LBUG(); RETURN(ERR_PTR(rc)); } oc = capa_add(dev->od_capa_hash, capa); RETURN(oc); }
static int lfsck_namespace_post(const struct lu_env *env, struct lfsck_component *com, int result, bool init) { struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_namespace *ns = com->lc_file_ram; int rc; down_write(&com->lc_sem); spin_lock(&lfsck->li_lock); if (!init) ns->ln_pos_last_checkpoint = lfsck->li_pos_current; if (result > 0) { ns->ln_status = LS_SCANNING_PHASE2; ns->ln_flags |= LF_SCANNED_ONCE; ns->ln_flags &= ~LF_UPGRADE; cfs_list_del_init(&com->lc_link); cfs_list_del_init(&com->lc_link_dir); cfs_list_add_tail(&com->lc_link, &lfsck->li_list_double_scan); } else if (result == 0) { ns->ln_status = lfsck->li_status; if (ns->ln_status == 0) ns->ln_status = LS_STOPPED; if (ns->ln_status != LS_PAUSED) { cfs_list_del_init(&com->lc_link); cfs_list_del_init(&com->lc_link_dir); cfs_list_add_tail(&com->lc_link, &lfsck->li_list_idle); } } else { ns->ln_status = LS_FAILED; cfs_list_del_init(&com->lc_link); cfs_list_del_init(&com->lc_link_dir); cfs_list_add_tail(&com->lc_link, &lfsck->li_list_idle); } spin_unlock(&lfsck->li_lock); if (!init) { ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() + HALF_SEC - lfsck->li_time_last_checkpoint); ns->ln_time_last_checkpoint = cfs_time_current_sec(); ns->ln_items_checked += com->lc_new_checked; com->lc_new_checked = 0; } rc = lfsck_namespace_store(env, com, false); up_write(&com->lc_sem); return rc; }
/** * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages() */ void libcfs_debug_dumplog_internal(void *arg) { CFS_DECL_JOURNAL_DATA; CFS_PUSH_JOURNAL; if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0) { snprintf(debug_file_name, sizeof(debug_file_name) - 1, "%s.%ld." LPLD, libcfs_debug_file_path_arr, cfs_time_current_sec(), (long_ptr_t)arg); printk(CFS_KERN_ALERT "LustreError: dumping log to %s\n", debug_file_name); cfs_tracefile_dump_all_pages(debug_file_name); libcfs_run_debug_log_upcall(debug_file_name); } CFS_POP_JOURNAL; }
static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags) { struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; struct ptlrpc_service *svc = svcpt->scp_service; int service_time = max_t(int, cfs_time_current_sec() - req->rq_arrival_time.tv_sec, 1); if (!(flags & PTLRPC_REPLY_EARLY) && (req->rq_type != PTL_RPC_MSG_ERR) && (req->rq_reqmsg != NULL) && !(lustre_msg_get_flags(req->rq_reqmsg) & (MSG_RESENT | MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE))) { /* early replies, errors and recovery requests don't count * toward our service time estimate */ int oldse = at_measured(&svcpt->scp_at_estimate, service_time); if (oldse != 0) { DEBUG_REQ(D_ADAPTTO, req, "svc %s changed estimate from %d to %d", svc->srv_name, oldse, at_get(&svcpt->scp_at_estimate)); } } /* Report actual service time for client latency calc */ lustre_msg_set_service_time(req->rq_repmsg, service_time); /* Report service time estimate for future client reqs, but report 0 * (to be ignored by client) if it's a error reply during recovery. * (bz15815) */ if (req->rq_type == PTL_RPC_MSG_ERR && (req->rq_export == NULL || req->rq_export->exp_obd->obd_recovering)) lustre_msg_set_timeout(req->rq_repmsg, 0); else lustre_msg_set_timeout(req->rq_repmsg, at_get(&svcpt->scp_at_estimate)); if (req->rq_reqmsg && !(lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) { CDEBUG(D_ADAPTTO, "No early reply support: flags=%#x " "req_flags=%#x magic=%d:%x/%x len=%d\n", flags, lustre_msg_get_flags(req->rq_reqmsg), lustre_msg_is_v1(req->rq_reqmsg), lustre_msg_get_magic(req->rq_reqmsg), lustre_msg_get_magic(req->rq_repmsg), req->rq_replen); } }
/* * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. */ static unsigned long enc_pools_shrink_count(struct shrinker *s, struct shrink_control *sc) { /* * if no pool access for a long time, we consider it's fully idle. * a little race here is fine. */ if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access > CACHE_QUIESCENT_PERIOD)) { spin_lock(&page_pools.epp_lock); page_pools.epp_idle_idx = IDLE_IDX_MAX; spin_unlock(&page_pools.epp_lock); } LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; }
int llog_read_header(const struct lu_env *env, struct llog_handle *handle, const struct obd_uuid *uuid) { struct llog_operations *lop; int rc; ENTRY; rc = llog_handle2ops(handle, &lop); if (rc) RETURN(rc); if (lop->lop_read_header == NULL) RETURN(-EOPNOTSUPP); rc = lop->lop_read_header(env, handle); if (rc == LLOG_EEMPTY) { struct llog_log_hdr *llh = handle->lgh_hdr; /* lrh_len should be initialized in llog_init_handle */ handle->lgh_last_idx = 0; /* header is record with index 0 */ handle->lgh_write_offset = 0; llh->llh_count = 1; /* for the header record */ llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC; LASSERT(handle->lgh_ctxt->loc_chunk_size >= LLOG_MIN_CHUNK_SIZE); llh->llh_hdr.lrh_len = handle->lgh_ctxt->loc_chunk_size; llh->llh_hdr.lrh_index = 0; llh->llh_timestamp = cfs_time_current_sec(); if (uuid) memcpy(&llh->llh_tgtuuid, uuid, sizeof(llh->llh_tgtuuid)); llh->llh_bitmap_offset = offsetof(typeof(*llh), llh_bitmap); /* Since update llog header might also call this function, * let's reset the bitmap to 0 here */ memset(LLOG_HDR_BITMAP(llh), 0, llh->llh_hdr.lrh_len - llh->llh_bitmap_offset - sizeof(llh->llh_tail)); ext2_set_bit(0, LLOG_HDR_BITMAP(llh)); LLOG_HDR_TAIL(llh)->lrt_len = llh->llh_hdr.lrh_len; LLOG_HDR_TAIL(llh)->lrt_index = llh->llh_hdr.lrh_index; rc = 0; } RETURN(rc); }
/** * Add a record to the end of link ea buf **/ int linkea_add_buf(struct linkea_data *ldata, const struct lu_name *lname, const struct lu_fid *pfid) { struct link_ea_header *leh = ldata->ld_leh; int reclen; LASSERT(leh != NULL); if (lname == NULL || pfid == NULL) return -EINVAL; reclen = lname->ln_namelen + sizeof(struct link_ea_entry); if (unlikely(leh->leh_len + reclen > MAX_LINKEA_SIZE)) { /* Use 32-bits to save the overflow time, although it will * shrink the cfs_time_current_sec() returned 64-bits value * to 32-bits value, it is still quite large and can be used * for about 140 years. That is enough. */ leh->leh_overflow_time = cfs_time_current_sec(); if (unlikely(leh->leh_overflow_time == 0)) leh->leh_overflow_time++; CDEBUG(D_INODE, "No enough space to hold linkea entry '" DFID": %.*s' at %u\n", PFID(pfid), lname->ln_namelen, lname->ln_name, leh->leh_overflow_time); return 0; } if (leh->leh_len + reclen > ldata->ld_buf->lb_len) { if (lu_buf_check_and_grow(ldata->ld_buf, leh->leh_len + reclen) < 0) return -ENOMEM; leh = ldata->ld_leh = ldata->ld_buf->lb_buf; } ldata->ld_lee = ldata->ld_buf->lb_buf + leh->leh_len; ldata->ld_reclen = linkea_entry_pack(ldata->ld_lee, lname, pfid); leh->leh_len += ldata->ld_reclen; leh->leh_reccount++; CDEBUG(D_INODE, "New link_ea name '"DFID":%.*s' is added\n", PFID(pfid), lname->ln_namelen, lname->ln_name); return 0; }
/** * Mark the linkEA as overflow with current timestamp, * and remove the last linkEA entry. * * Return the new linkEA size. */ int linkea_overflow_shrink(struct linkea_data *ldata) { struct link_ea_header *leh; struct lu_name tname; struct lu_fid tfid; int count; leh = ldata->ld_leh = ldata->ld_buf->lb_buf; if (leh->leh_magic == __swab32(LINK_EA_MAGIC)) { leh->leh_magic = LINK_EA_MAGIC; leh->leh_reccount = __swab32(leh->leh_reccount); leh->leh_overflow_time = __swab32(leh->leh_overflow_time); leh->leh_padding = __swab32(leh->leh_padding); } LASSERT(leh->leh_reccount > 0); leh->leh_len = sizeof(struct link_ea_header); leh->leh_reccount--; if (unlikely(leh->leh_reccount == 0)) return 0; leh->leh_overflow_time = cfs_time_current_sec(); if (unlikely(leh->leh_overflow_time == 0)) leh->leh_overflow_time++; ldata->ld_reclen = 0; ldata->ld_lee = (struct link_ea_entry *)(leh + 1); for (count = 0; count < leh->leh_reccount; count++) { linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, &tname, &tfid); leh->leh_len += ldata->ld_reclen; ldata->ld_lee = (struct link_ea_entry *)((char *)ldata->ld_lee + ldata->ld_reclen); } linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, &tname, &tfid); CDEBUG(D_INODE, "No enough space to hold the last linkea entry '" DFID": %.*s', shrink it, left %d linkea entries, size %llu\n", PFID(&tfid), tname.ln_namelen, tname.ln_name, leh->leh_reccount, leh->leh_len); return leh->leh_len; }
/* * could be called frequently for query (@nr_to_scan == 0). * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. */ static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) { if (unlikely(shrink_param(sc, nr_to_scan) != 0)) { spin_lock(&page_pools.epp_lock); shrink_param(sc, nr_to_scan) = min_t(unsigned long, shrink_param(sc, nr_to_scan), page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES); if (shrink_param(sc, nr_to_scan) > 0) { enc_pools_release_free_pages(shrink_param(sc, nr_to_scan)); CDEBUG(D_SEC, "released %ld pages, %ld left\n", (long)shrink_param(sc, nr_to_scan), page_pools.epp_free_pages); page_pools.epp_st_shrinks++; page_pools.epp_last_shrink = cfs_time_current_sec(); } spin_unlock(&page_pools.epp_lock); }
static int sptlrpc_info_lprocfs_seq_show(struct seq_file *seq, void *v) { struct obd_device *dev = seq->private; struct client_obd *cli = &dev->u.cli; struct ptlrpc_sec *sec = NULL; char str[32]; LASSERT(strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 || strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 || strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) == 0 || strcmp(dev->obd_type->typ_name, LUSTRE_LWP_NAME) == 0 || strcmp(dev->obd_type->typ_name, LUSTRE_OSP_NAME) == 0); if (cli->cl_import) sec = sptlrpc_import_sec_ref(cli->cl_import); if (sec == NULL) goto out; sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str)); seq_printf(seq, "rpc flavor: %s\n", sptlrpc_flavor2name_base(sec->ps_flvr.sf_rpc)); seq_printf(seq, "bulk flavor: %s\n", sptlrpc_flavor2name_bulk(&sec->ps_flvr, str, sizeof(str))); seq_printf(seq, "flags: %s\n", sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str))); seq_printf(seq, "id: %d\n", sec->ps_id); seq_printf(seq, "refcount: %d\n", atomic_read(&sec->ps_refcount)); seq_printf(seq, "nctx: %d\n", atomic_read(&sec->ps_nctx)); seq_printf(seq, "gc internal %ld\n", sec->ps_gc_interval); seq_printf(seq, "gc next %ld\n", sec->ps_gc_interval ? sec->ps_gc_next - cfs_time_current_sec() : 0); sptlrpc_sec_put(sec); out: return 0; }
/** * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages() */ void libcfs_debug_dumplog_internal(void *arg) { static time_t last_dump_time; time_t current_time; DECL_JOURNAL_DATA; PUSH_JOURNAL; current_time = cfs_time_current_sec(); if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0 && current_time > last_dump_time) { last_dump_time = current_time; snprintf(debug_file_name, sizeof(debug_file_name) - 1, "%s.%ld." LPLD, libcfs_debug_file_path_arr, current_time, (long_ptr_t)arg); printk(KERN_ALERT "LustreError: dumping log to %s\n", debug_file_name); cfs_tracefile_dump_all_pages(debug_file_name); libcfs_run_debug_log_upcall(debug_file_name); } POP_JOURNAL; }
int mdt_reint_setxattr(struct mdt_thread_info *info, struct mdt_lock_handle *unused) { struct ptlrpc_request *req = mdt_info_req(info); struct md_ucred *uc = mdt_ucred(info); struct mdt_lock_handle *lh; const struct lu_env *env = info->mti_env; struct lu_buf *buf = &info->mti_buf; struct mdt_reint_record *rr = &info->mti_rr; struct md_attr *ma = &info->mti_attr; struct lu_attr *attr = &info->mti_attr.ma_attr; struct mdt_object *obj; struct md_object *child; __u64 valid = attr->la_valid; const char *xattr_name = rr->rr_name; int xattr_len = rr->rr_eadatalen; __u64 lockpart; int rc; posix_acl_xattr_header *new_xattr = NULL; __u32 remote = exp_connect_rmtclient(info->mti_exp); __u32 perm; ENTRY; CDEBUG(D_INODE, "setxattr for "DFID"\n", PFID(rr->rr_fid1)); if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SETXATTR)) RETURN(err_serious(-ENOMEM)); CDEBUG(D_INODE, "%s xattr %s\n", valid & OBD_MD_FLXATTR ? "set" : "remove", xattr_name); rc = mdt_init_ucred_reint(info); if (rc != 0) RETURN(rc); if (valid & OBD_MD_FLRMTRSETFACL) { if (unlikely(!remote)) GOTO(out, rc = err_serious(-EINVAL)); perm = mdt_identity_get_perm(uc->mu_identity, remote, req->rq_peer.nid); if (!(perm & CFS_RMTACL_PERM)) GOTO(out, rc = err_serious(-EPERM)); } if (strncmp(xattr_name, XATTR_USER_PREFIX, sizeof(XATTR_USER_PREFIX) - 1) == 0) { if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_XATTR)) GOTO(out, rc = -EOPNOTSUPP); if (strcmp(xattr_name, XATTR_NAME_LOV) == 0) GOTO(out, rc = -EACCES); if (strcmp(xattr_name, XATTR_NAME_LMA) == 0) GOTO(out, rc = 0); if (strcmp(xattr_name, XATTR_NAME_LINK) == 0) GOTO(out, rc = 0); } else if ((valid & OBD_MD_FLXATTR) && (strncmp(xattr_name, XATTR_NAME_ACL_ACCESS, sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0 || strncmp(xattr_name, XATTR_NAME_ACL_DEFAULT, sizeof(XATTR_NAME_ACL_DEFAULT) - 1) == 0)) { /* currently lustre limit acl access size */ if (xattr_len > LUSTRE_POSIX_ACL_MAX_SIZE) GOTO(out, -ERANGE); } lockpart = MDS_INODELOCK_UPDATE; /* Revoke all clients' lookup lock, since the access * permissions for this inode is changed when ACL_ACCESS is * set. This isn't needed for ACL_DEFAULT, since that does * not change the access permissions of this inode, nor any * other existing inodes. It is setting the ACLs inherited * by new directories/files at create time. */ if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS)) lockpart |= MDS_INODELOCK_LOOKUP; lh = &info->mti_lh[MDT_LH_PARENT]; /* ACLs were sent to clients under LCK_CR locks, so taking LCK_EX * to cancel them. */ mdt_lock_reg_init(lh, LCK_EX); obj = mdt_object_find_lock(info, rr->rr_fid1, lh, lockpart); if (IS_ERR(obj)) GOTO(out, rc = PTR_ERR(obj)); info->mti_mos = obj; rc = mdt_version_get_check_save(info, obj, 0); if (rc) GOTO(out_unlock, rc); if (unlikely(!(valid & OBD_MD_FLCTIME))) { /* This isn't strictly an error, but all current clients * should set OBD_MD_FLCTIME when setting attributes. */ CWARN("%s: client miss to set OBD_MD_FLCTIME when " "setxattr %s: [object "DFID"] [valid "LPU64"]\n", info->mti_exp->exp_obd->obd_name, xattr_name, PFID(rr->rr_fid1), valid); attr->la_ctime = cfs_time_current_sec(); } attr->la_valid = LA_CTIME; child = mdt_object_child(obj); if (valid & OBD_MD_FLXATTR) { char *xattr = (void *)rr->rr_eadata; if (xattr_len > 0) { int flags = 0; if (valid & OBD_MD_FLRMTLSETFACL) { if (unlikely(!remote)) GOTO(out_unlock, rc = -EINVAL); xattr_len = mdt_rmtlsetfacl(info, child, xattr_name, (ext_acl_xattr_header *)xattr, &new_xattr); if (xattr_len < 0) GOTO(out_unlock, rc = xattr_len); xattr = (char *)new_xattr; } if (attr->la_flags & XATTR_REPLACE) flags |= LU_XATTR_REPLACE; if (attr->la_flags & XATTR_CREATE) flags |= LU_XATTR_CREATE; mdt_fail_write(env, info->mti_mdt->mdt_bottom, OBD_FAIL_MDS_SETXATTR_WRITE); buf->lb_buf = xattr; buf->lb_len = xattr_len; rc = mo_xattr_set(env, child, buf, xattr_name, flags); /* update ctime after xattr changed */ if (rc == 0) { ma->ma_attr_flags |= MDS_PERM_BYPASS; mo_attr_set(env, child, ma); } } } else if (valid & OBD_MD_FLXATTRRM) { rc = mo_xattr_del(env, child, xattr_name); /* update ctime after xattr changed */ if (rc == 0) { ma->ma_attr_flags |= MDS_PERM_BYPASS; mo_attr_set(env, child, ma); } } else { CDEBUG(D_INFO, "valid bits: "LPX64"\n", valid); rc = -EINVAL; } if (rc == 0) mdt_counter_incr(req->rq_export, LPROC_MDT_SETXATTR); EXIT; out_unlock: mdt_object_unlock_put(info, obj, lh, rc); if (unlikely(new_xattr != NULL)) lustre_posix_acl_xattr_free(new_xattr, xattr_len); out: mdt_exit_ucred(info); return rc; }
/* * Client's incoming reply callback */ void reply_in_callback(lnet_event_t *ev) { struct ptlrpc_cb_id *cbid = ev->md.user_ptr; struct ptlrpc_request *req = cbid->cbid_arg; DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status); LASSERT(ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK); LASSERT(ev->md.start == req->rq_repbuf); LASSERT(ev->offset + ev->mlength <= req->rq_repbuf_len); /* We've set LNET_MD_MANAGE_REMOTE for all outgoing requests for adaptive timeouts' early reply. */ LASSERT((ev->md.options & LNET_MD_MANAGE_REMOTE) != 0); spin_lock(&req->rq_lock); req->rq_receiving_reply = 0; req->rq_early = 0; if (ev->unlinked) req->rq_must_unlink = 0; if (ev->status) goto out_wake; if (ev->type == LNET_EVENT_UNLINK) { LASSERT(ev->unlinked); DEBUG_REQ(D_NET, req, "unlink"); goto out_wake; } if (ev->mlength < ev->rlength) { CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req, req->rq_replen, ev->rlength, ev->offset); req->rq_reply_truncate = 1; req->rq_replied = 1; req->rq_status = -EOVERFLOW; req->rq_nob_received = ev->rlength + ev->offset; goto out_wake; } if ((ev->offset == 0) && ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT))) { /* Early reply */ DEBUG_REQ(D_ADAPTTO, req, "Early reply received: mlen=%u offset=%d replen=%d " "replied=%d unlinked=%d", ev->mlength, ev->offset, req->rq_replen, req->rq_replied, ev->unlinked); req->rq_early_count++; /* number received, client side */ if (req->rq_replied) /* already got the real reply */ goto out_wake; req->rq_early = 1; req->rq_reply_off = ev->offset; req->rq_nob_received = ev->mlength; /* And we're still receiving */ req->rq_receiving_reply = 1; } else { /* Real reply */ req->rq_rep_swab_mask = 0; req->rq_replied = 1; req->rq_reply_off = ev->offset; req->rq_nob_received = ev->mlength; /* LNetMDUnlink can't be called under the LNET_LOCK, so we must unlink in ptlrpc_unregister_reply */ DEBUG_REQ(D_INFO, req, "reply in flags=%x mlen=%u offset=%d replen=%d", lustre_msg_get_flags(req->rq_reqmsg), ev->mlength, ev->offset, req->rq_replen); } req->rq_import->imp_last_reply_time = cfs_time_current_sec(); out_wake: /* NB don't unlock till after wakeup; req can disappear under us * since we don't have our own ref */ ptlrpc_client_wake_req(req); spin_unlock(&req->rq_lock); }
int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr) { char *buf; struct libcfs_ioctl_data *data; int opc; int rc; if (cmd != IOC_LIBCFS_LNETST) return -EINVAL; data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr); opc = data->ioc_u32[0]; if (data->ioc_plen1 > PAGE_SIZE) return -EINVAL; LIBCFS_ALLOC(buf, data->ioc_plen1); if (buf == NULL) return -ENOMEM; /* copy in parameter */ if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) { LIBCFS_FREE(buf, data->ioc_plen1); return -EFAULT; } mutex_lock(&console_session.ses_mutex); console_session.ses_laststamp = cfs_time_current_sec(); if (console_session.ses_shutdown) { rc = -ESHUTDOWN; goto out; } if (console_session.ses_expired) lstcon_session_end(); if (opc != LSTIO_SESSION_NEW && console_session.ses_state == LST_SESSION_NONE) { CDEBUG(D_NET, "LST no active session\n"); rc = -ESRCH; goto out; } memset(&console_session.ses_trans_stat, 0, sizeof(struct lstcon_trans_stat)); switch (opc) { case LSTIO_SESSION_NEW: rc = lst_session_new_ioctl((struct lstio_session_new_args *)buf); break; case LSTIO_SESSION_END: rc = lst_session_end_ioctl((struct lstio_session_end_args *)buf); break; case LSTIO_SESSION_INFO: rc = lst_session_info_ioctl((struct lstio_session_info_args *)buf); break; case LSTIO_DEBUG: rc = lst_debug_ioctl((struct lstio_debug_args *)buf); break; case LSTIO_GROUP_ADD: rc = lst_group_add_ioctl((struct lstio_group_add_args *)buf); break; case LSTIO_GROUP_DEL: rc = lst_group_del_ioctl((struct lstio_group_del_args *)buf); break; case LSTIO_GROUP_UPDATE: rc = lst_group_update_ioctl((struct lstio_group_update_args *)buf); break; case LSTIO_NODES_ADD: rc = lst_nodes_add_ioctl((struct lstio_group_nodes_args *)buf); break; case LSTIO_GROUP_LIST: rc = lst_group_list_ioctl((struct lstio_group_list_args *)buf); break; case LSTIO_GROUP_INFO: rc = lst_group_info_ioctl((struct lstio_group_info_args *)buf); break; case LSTIO_BATCH_ADD: rc = lst_batch_add_ioctl((struct lstio_batch_add_args *)buf); break; case LSTIO_BATCH_START: rc = lst_batch_run_ioctl((struct lstio_batch_run_args *)buf); break; case LSTIO_BATCH_STOP: rc = lst_batch_stop_ioctl((struct lstio_batch_stop_args *)buf); break; case LSTIO_BATCH_QUERY: rc = lst_batch_query_ioctl((struct lstio_batch_query_args *)buf); break; case LSTIO_BATCH_LIST: rc = lst_batch_list_ioctl((struct lstio_batch_list_args *)buf); break; case LSTIO_BATCH_INFO: rc = lst_batch_info_ioctl((struct lstio_batch_info_args *)buf); break; case LSTIO_TEST_ADD: rc = lst_test_add_ioctl((struct lstio_test_args *)buf); break; case LSTIO_STAT_QUERY: rc = lst_stat_query_ioctl((struct lstio_stat_args *)buf); break; default: rc = -EINVAL; } if (copy_to_user(data->ioc_pbuf2, &console_session.ses_trans_stat, sizeof(struct lstcon_trans_stat))) rc = -EFAULT; out: mutex_unlock(&console_session.ses_mutex); LIBCFS_FREE(buf, data->ioc_plen1); return rc; }
int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data) { struct obd_device *obd = m->private; LASSERT(obd != NULL); seq_printf(m, "status: "); if (obd->obd_max_recoverable_clients == 0) { seq_printf(m, "INACTIVE\n"); goto out; } /* sampled unlocked, but really... */ if (obd->obd_recovering == 0) { seq_printf(m, "COMPLETE\n"); seq_printf(m, "recovery_start: %lu\n", obd->obd_recovery_start); seq_printf(m, "recovery_duration: %lu\n", obd->obd_recovery_end - obd->obd_recovery_start); /* Number of clients that have completed recovery */ seq_printf(m, "completed_clients: %d/%d\n", obd->obd_max_recoverable_clients - obd->obd_stale_clients, obd->obd_max_recoverable_clients); seq_printf(m, "replayed_requests: %d\n", obd->obd_replayed_requests); seq_printf(m, "last_transno: "LPD64"\n", obd->obd_next_recovery_transno - 1); seq_printf(m, "VBR: %s\n", obd->obd_version_recov ? "ENABLED" : "DISABLED"); seq_printf(m, "IR: %s\n", obd->obd_no_ir ? "DISABLED" : "ENABLED"); goto out; } seq_printf(m, "RECOVERING\n"); seq_printf(m, "recovery_start: %lu\n", obd->obd_recovery_start); seq_printf(m, "time_remaining: %lu\n", cfs_time_current_sec() >= obd->obd_recovery_start + obd->obd_recovery_timeout ? 0 : obd->obd_recovery_start + obd->obd_recovery_timeout - cfs_time_current_sec()); seq_printf(m, "connected_clients: %d/%d\n", atomic_read(&obd->obd_connected_clients), obd->obd_max_recoverable_clients); /* Number of clients that have completed recovery */ seq_printf(m, "req_replay_clients: %d\n", atomic_read(&obd->obd_req_replay_clients)); seq_printf(m, "lock_repay_clients: %d\n", atomic_read(&obd->obd_lock_replay_clients)); seq_printf(m, "completed_clients: %d\n", atomic_read(&obd->obd_connected_clients) - atomic_read(&obd->obd_lock_replay_clients)); seq_printf(m, "evicted_clients: %d\n", obd->obd_stale_clients); seq_printf(m, "replayed_requests: %d\n", obd->obd_replayed_requests); seq_printf(m, "queued_requests: %d\n", obd->obd_requests_queued_for_recovery); seq_printf(m, "next_transno: "LPD64"\n", obd->obd_next_recovery_transno); out: return 0; }
/** * Send request \a request. * if \a noreply is set, don't expect any reply back and don't set up * reply buffers. * Returns 0 on success or error code. */ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) { int rc; int rc2; int mpflag = 0; struct ptlrpc_connection *connection; lnet_handle_me_t reply_me_h; lnet_md_t reply_md; struct obd_device *obd = request->rq_import->imp_obd; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC)) RETURN(0); LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST); LASSERT(request->rq_wait_ctx == 0); /* If this is a re-transmit, we're required to have disengaged * cleanly from the previous attempt */ LASSERT(!request->rq_receiving_reply); if (request->rq_import->imp_obd && request->rq_import->imp_obd->obd_fail) { CDEBUG(D_HA, "muting rpc for failed imp obd %s\n", request->rq_import->imp_obd->obd_name); /* this prevents us from waiting in ptlrpc_queue_wait */ spin_lock(&request->rq_lock); request->rq_err = 1; spin_unlock(&request->rq_lock); request->rq_status = -ENODEV; RETURN(-ENODEV); } connection = request->rq_import->imp_connection; lustre_msg_set_handle(request->rq_reqmsg, &request->rq_import->imp_remote_handle); lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST); lustre_msg_set_conn_cnt(request->rq_reqmsg, request->rq_import->imp_conn_cnt); lustre_msghdr_set_flags(request->rq_reqmsg, request->rq_import->imp_msghdr_flags); if (request->rq_resend) lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT); if (request->rq_memalloc) mpflag = cfs_memory_pressure_get_and_set(); rc = sptlrpc_cli_wrap_request(request); if (rc) GOTO(out, rc); /* bulk register should be done after wrap_request() */ if (request->rq_bulk != NULL) { rc = ptlrpc_register_bulk (request); if (rc != 0) GOTO(out, rc); } if (!noreply) { LASSERT (request->rq_replen != 0); if (request->rq_repbuf == NULL) { LASSERT(request->rq_repdata == NULL); LASSERT(request->rq_repmsg == NULL); rc = sptlrpc_cli_alloc_repbuf(request, request->rq_replen); if (rc) { /* this prevents us from looping in * ptlrpc_queue_wait */ spin_lock(&request->rq_lock); request->rq_err = 1; spin_unlock(&request->rq_lock); request->rq_status = rc; GOTO(cleanup_bulk, rc); } } else { request->rq_repdata = NULL; request->rq_repmsg = NULL; } rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/ connection->c_peer, request->rq_xid, 0, LNET_UNLINK, LNET_INS_AFTER, &reply_me_h); if (rc != 0) { CERROR("LNetMEAttach failed: %d\n", rc); LASSERT (rc == -ENOMEM); GOTO(cleanup_bulk, rc = -ENOMEM); } } spin_lock(&request->rq_lock); /* If the MD attach succeeds, there _will_ be a reply_in callback */ request->rq_receiving_reply = !noreply; /* We are responsible for unlinking the reply buffer */ request->rq_must_unlink = !noreply; /* Clear any flags that may be present from previous sends. */ request->rq_replied = 0; request->rq_err = 0; request->rq_timedout = 0; request->rq_net_err = 0; request->rq_resend = 0; request->rq_restart = 0; request->rq_reply_truncate = 0; spin_unlock(&request->rq_lock); if (!noreply) { reply_md.start = request->rq_repbuf; reply_md.length = request->rq_repbuf_len; /* Allow multiple early replies */ reply_md.threshold = LNET_MD_THRESH_INF; /* Manage remote for early replies */ reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MANAGE_REMOTE | LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */; reply_md.user_ptr = &request->rq_reply_cbid; reply_md.eq_handle = ptlrpc_eq_h; /* We must see the unlink callback to unset rq_must_unlink, so we can't auto-unlink */ rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN, &request->rq_reply_md_h); if (rc != 0) { CERROR("LNetMDAttach failed: %d\n", rc); LASSERT (rc == -ENOMEM); spin_lock(&request->rq_lock); /* ...but the MD attach didn't succeed... */ request->rq_receiving_reply = 0; spin_unlock(&request->rq_lock); GOTO(cleanup_me, rc = -ENOMEM); } CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64 ", portal %u\n", request->rq_repbuf_len, request->rq_xid, request->rq_reply_portal); } /* add references on request for request_out_callback */ ptlrpc_request_addref(request); if (obd->obd_svc_stats != NULL) lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR, atomic_read(&request->rq_import->imp_inflight)); OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5); do_gettimeofday(&request->rq_arrival_time); request->rq_sent = cfs_time_current_sec(); /* We give the server rq_timeout secs to process the req, and add the network latency for our local timeout. */ request->rq_deadline = request->rq_sent + request->rq_timeout + ptlrpc_at_get_net_latency(request); ptlrpc_pinger_sending_on_import(request->rq_import); DEBUG_REQ(D_INFO, request, "send flg=%x", lustre_msg_get_flags(request->rq_reqmsg)); rc = ptl_send_buf(&request->rq_req_md_h, request->rq_reqbuf, request->rq_reqdata_len, LNET_NOACK_REQ, &request->rq_req_cbid, connection, request->rq_request_portal, request->rq_xid, 0); if (rc == 0) GOTO(out, rc); ptlrpc_req_finished(request); if (noreply) GOTO(out, rc); cleanup_me: /* MEUnlink is safe; the PUT didn't even get off the ground, and * nobody apart from the PUT's target has the right nid+XID to * access the reply buffer. */ rc2 = LNetMEUnlink(reply_me_h); LASSERT (rc2 == 0); /* UNLINKED callback called synchronously */ LASSERT(!request->rq_receiving_reply); cleanup_bulk: /* We do sync unlink here as there was no real transfer here so * the chance to have long unlink to sluggish net is smaller here. */ ptlrpc_unregister_bulk(request, 0); out: if (request->rq_memalloc) cfs_memory_pressure_restore(mpflag); return rc; }
/* * we allocate the requested pages atomically. */ int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc) { cfs_waitlink_t waitlink; unsigned long this_idle = -1; cfs_time_t tick = 0; long now; int p_idx, g_idx; int i; LASSERT(desc->bd_iov_count > 0); LASSERT(desc->bd_iov_count <= page_pools.epp_max_pages); /* resent bulk, enc iov might have been allocated previously */ if (desc->bd_enc_iov != NULL) return 0; OBD_ALLOC(desc->bd_enc_iov, desc->bd_iov_count * sizeof(*desc->bd_enc_iov)); if (desc->bd_enc_iov == NULL) return -ENOMEM; cfs_spin_lock(&page_pools.epp_lock); page_pools.epp_st_access++; again: if (unlikely(page_pools.epp_free_pages < desc->bd_iov_count)) { if (tick == 0) tick = cfs_time_current(); now = cfs_time_current_sec(); page_pools.epp_st_missings++; page_pools.epp_pages_short += desc->bd_iov_count; if (enc_pools_should_grow(desc->bd_iov_count, now)) { page_pools.epp_growing = 1; cfs_spin_unlock(&page_pools.epp_lock); enc_pools_add_pages(page_pools.epp_pages_short / 2); cfs_spin_lock(&page_pools.epp_lock); page_pools.epp_growing = 0; enc_pools_wakeup(); } else { if (++page_pools.epp_waitqlen > page_pools.epp_st_max_wqlen) page_pools.epp_st_max_wqlen = page_pools.epp_waitqlen; cfs_set_current_state(CFS_TASK_UNINT); cfs_waitlink_init(&waitlink); cfs_waitq_add(&page_pools.epp_waitq, &waitlink); cfs_spin_unlock(&page_pools.epp_lock); cfs_waitq_wait(&waitlink, CFS_TASK_UNINT); cfs_waitq_del(&page_pools.epp_waitq, &waitlink); LASSERT(page_pools.epp_waitqlen > 0); cfs_spin_lock(&page_pools.epp_lock); page_pools.epp_waitqlen--; } LASSERT(page_pools.epp_pages_short >= desc->bd_iov_count); page_pools.epp_pages_short -= desc->bd_iov_count; this_idle = 0; goto again; } /* record max wait time */ if (unlikely(tick != 0)) { tick = cfs_time_current() - tick; if (tick > page_pools.epp_st_max_wait) page_pools.epp_st_max_wait = tick; } /* proceed with rest of allocation */ page_pools.epp_free_pages -= desc->bd_iov_count; p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; for (i = 0; i < desc->bd_iov_count; i++) { LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL); desc->bd_enc_iov[i].kiov_page = page_pools.epp_pools[p_idx][g_idx]; page_pools.epp_pools[p_idx][g_idx] = NULL; if (++g_idx == PAGES_PER_POOL) { p_idx++; g_idx = 0; } } if (page_pools.epp_free_pages < page_pools.epp_st_lowfree) page_pools.epp_st_lowfree = page_pools.epp_free_pages; /* * new idle index = (old * weight + new) / (weight + 1) */ if (this_idle == -1) { this_idle = page_pools.epp_free_pages * IDLE_IDX_MAX / page_pools.epp_total_pages; } page_pools.epp_idle_idx = (page_pools.epp_idle_idx * IDLE_IDX_WEIGHT + this_idle) / (IDLE_IDX_WEIGHT + 1); page_pools.epp_last_access = cfs_time_current_sec(); cfs_spin_unlock(&page_pools.epp_lock); return 0; }
/** * Send request reply from request \a req reply buffer. * \a flags defines reply types * Returns 0 on success or error code */ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags) { struct ptlrpc_reply_state *rs = req->rq_reply_state; struct ptlrpc_connection *conn; int rc; /* We must already have a reply buffer (only ptlrpc_error() may be * called without one). The reply generated by sptlrpc layer (e.g. * error notify, etc.) might have NULL rq->reqmsg; Otherwise we must * have a request buffer which is either the actual (swabbed) incoming * request, or a saved copy if this is a req saved in * target_queue_final_reply(). */ LASSERT (req->rq_no_reply == 0); LASSERT (req->rq_reqbuf != NULL); LASSERT (rs != NULL); LASSERT ((flags & PTLRPC_REPLY_MAYBE_DIFFICULT) || !rs->rs_difficult); LASSERT (req->rq_repmsg != NULL); LASSERT (req->rq_repmsg == rs->rs_msg); LASSERT (rs->rs_cb_id.cbid_fn == reply_out_callback); LASSERT (rs->rs_cb_id.cbid_arg == rs); /* There may be no rq_export during failover */ if (unlikely(req->rq_export && req->rq_export->exp_obd && req->rq_export->exp_obd->obd_fail)) { /* Failed obd's only send ENODEV */ req->rq_type = PTL_RPC_MSG_ERR; req->rq_status = -ENODEV; CDEBUG(D_HA, "sending ENODEV from failed obd %d\n", req->rq_export->exp_obd->obd_minor); } /* In order to keep interoprability with the client (< 2.3) which * doesn't have pb_jobid in ptlrpc_body, We have to shrink the * ptlrpc_body in reply buffer to ptlrpc_body_v2, otherwise, the * reply buffer on client will be overflow. * * XXX Remove this whenver we drop the interoprability with such client. */ req->rq_replen = lustre_shrink_msg(req->rq_repmsg, 0, sizeof(struct ptlrpc_body_v2), 1); if (req->rq_type != PTL_RPC_MSG_ERR) req->rq_type = PTL_RPC_MSG_REPLY; lustre_msg_set_type(req->rq_repmsg, req->rq_type); lustre_msg_set_status(req->rq_repmsg, ptlrpc_status_hton(req->rq_status)); lustre_msg_set_opc(req->rq_repmsg, req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0); target_pack_pool_reply(req); ptlrpc_at_set_reply(req, flags); if (req->rq_export == NULL || req->rq_export->exp_connection == NULL) conn = ptlrpc_connection_get(req->rq_peer, req->rq_self, NULL); else conn = ptlrpc_connection_addref(req->rq_export->exp_connection); if (unlikely(conn == NULL)) { CERROR("not replying on NULL connection\n"); /* bug 9635 */ return -ENOTCONN; } ptlrpc_rs_addref(rs); /* +1 ref for the network */ rc = sptlrpc_svc_wrap_reply(req); if (unlikely(rc)) goto out; req->rq_sent = cfs_time_current_sec(); rc = ptl_send_buf (&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len, (rs->rs_difficult && !rs->rs_no_ack) ? LNET_ACK_REQ : LNET_NOACK_REQ, &rs->rs_cb_id, conn, ptlrpc_req2svc(req)->srv_rep_portal, req->rq_xid, req->rq_reply_off); out: if (unlikely(rc != 0)) ptlrpc_req_drop_rs(req); ptlrpc_connection_put(conn); return rc; }
/** * Send request \a request. * if \a noreply is set, don't expect any reply back and don't set up * reply buffers. * Returns 0 on success or error code. */ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) { int rc; int rc2; int mpflag = 0; struct ptlrpc_connection *connection; lnet_handle_me_t reply_me_h; lnet_md_t reply_md; struct obd_import *imp = request->rq_import; struct obd_device *obd = imp->imp_obd; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC)) RETURN(0); LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST); LASSERT(request->rq_wait_ctx == 0); /* If this is a re-transmit, we're required to have disengaged * cleanly from the previous attempt */ LASSERT(!request->rq_receiving_reply); LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) && (imp->imp_state == LUSTRE_IMP_FULL))); if (unlikely(obd != NULL && obd->obd_fail)) { CDEBUG(D_HA, "muting rpc for failed imp obd %s\n", obd->obd_name); /* this prevents us from waiting in ptlrpc_queue_wait */ spin_lock(&request->rq_lock); request->rq_err = 1; spin_unlock(&request->rq_lock); request->rq_status = -ENODEV; RETURN(-ENODEV); } connection = imp->imp_connection; lustre_msg_set_handle(request->rq_reqmsg, &imp->imp_remote_handle); lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST); lustre_msg_set_conn_cnt(request->rq_reqmsg, imp->imp_conn_cnt); lustre_msghdr_set_flags(request->rq_reqmsg, imp->imp_msghdr_flags); /* If it's the first time to resend the request for EINPROGRESS, * we need to allocate a new XID (see after_reply()), it's different * from the resend for reply timeout. */ if (request->rq_nr_resend != 0 && list_empty(&request->rq_unreplied_list)) { __u64 min_xid = 0; /* resend for EINPROGRESS, allocate new xid to avoid reply * reconstruction */ spin_lock(&imp->imp_lock); ptlrpc_assign_next_xid_nolock(request); request->rq_mbits = request->rq_xid; min_xid = ptlrpc_known_replied_xid(imp); spin_unlock(&imp->imp_lock); lustre_msg_set_last_xid(request->rq_reqmsg, min_xid); DEBUG_REQ(D_RPCTRACE, request, "Allocating new xid for " "resend on EINPROGRESS"); } else if (request->rq_bulk != NULL) { ptlrpc_set_bulk_mbits(request); lustre_msg_set_mbits(request->rq_reqmsg, request->rq_mbits); } if (list_empty(&request->rq_unreplied_list) || request->rq_xid <= imp->imp_known_replied_xid) { DEBUG_REQ(D_ERROR, request, "xid: "LPU64", replied: "LPU64", " "list_empty:%d\n", request->rq_xid, imp->imp_known_replied_xid, list_empty(&request->rq_unreplied_list)); LBUG(); } /** For enabled AT all request should have AT_SUPPORT in the * FULL import state when OBD_CONNECT_AT is set */ LASSERT(AT_OFF || imp->imp_state != LUSTRE_IMP_FULL || (imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT) || !(imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_AT)); if (request->rq_resend) { lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT); if (request->rq_resend_cb != NULL) request->rq_resend_cb(request, &request->rq_async_args); } if (request->rq_memalloc) mpflag = cfs_memory_pressure_get_and_set(); rc = sptlrpc_cli_wrap_request(request); if (rc == -ENOMEM) /* set rq_sent so that this request is treated * as a delayed send in the upper layers */ request->rq_sent = cfs_time_current_sec(); if (rc) GOTO(out, rc); /* bulk register should be done after wrap_request() */ if (request->rq_bulk != NULL) { rc = ptlrpc_register_bulk (request); if (rc != 0) GOTO(out, rc); } if (!noreply) { LASSERT (request->rq_replen != 0); if (request->rq_repbuf == NULL) { LASSERT(request->rq_repdata == NULL); LASSERT(request->rq_repmsg == NULL); rc = sptlrpc_cli_alloc_repbuf(request, request->rq_replen); if (rc) { /* this prevents us from looping in * ptlrpc_queue_wait */ spin_lock(&request->rq_lock); request->rq_err = 1; spin_unlock(&request->rq_lock); request->rq_status = rc; GOTO(cleanup_bulk, rc); } } else { request->rq_repdata = NULL; request->rq_repmsg = NULL; } rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/ connection->c_peer, request->rq_xid, 0, LNET_UNLINK, LNET_INS_AFTER, &reply_me_h); if (rc != 0) { CERROR("LNetMEAttach failed: %d\n", rc); LASSERT (rc == -ENOMEM); GOTO(cleanup_bulk, rc = -ENOMEM); } } spin_lock(&request->rq_lock); /* We are responsible for unlinking the reply buffer */ request->rq_reply_unlinked = noreply; request->rq_receiving_reply = !noreply; /* Clear any flags that may be present from previous sends. */ request->rq_req_unlinked = 0; request->rq_replied = 0; request->rq_err = 0; request->rq_timedout = 0; request->rq_net_err = 0; request->rq_resend = 0; request->rq_restart = 0; request->rq_reply_truncated = 0; spin_unlock(&request->rq_lock); if (!noreply) { reply_md.start = request->rq_repbuf; reply_md.length = request->rq_repbuf_len; /* Allow multiple early replies */ reply_md.threshold = LNET_MD_THRESH_INF; /* Manage remote for early replies */ reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MANAGE_REMOTE | LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */; reply_md.user_ptr = &request->rq_reply_cbid; reply_md.eq_handle = ptlrpc_eq_h; /* We must see the unlink callback to set rq_reply_unlinked, * so we can't auto-unlink */ rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN, &request->rq_reply_md_h); if (rc != 0) { CERROR("LNetMDAttach failed: %d\n", rc); LASSERT (rc == -ENOMEM); spin_lock(&request->rq_lock); /* ...but the MD attach didn't succeed... */ request->rq_receiving_reply = 0; spin_unlock(&request->rq_lock); GOTO(cleanup_me, rc = -ENOMEM); } CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64 ", portal %u\n", request->rq_repbuf_len, request->rq_xid, request->rq_reply_portal); } /* add references on request for request_out_callback */ ptlrpc_request_addref(request); if (obd != NULL && obd->obd_svc_stats != NULL) lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR, atomic_read(&imp->imp_inflight)); OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5); do_gettimeofday(&request->rq_sent_tv); request->rq_sent = cfs_time_current_sec(); /* We give the server rq_timeout secs to process the req, and add the network latency for our local timeout. */ request->rq_deadline = request->rq_sent + request->rq_timeout + ptlrpc_at_get_net_latency(request); ptlrpc_pinger_sending_on_import(imp); DEBUG_REQ(D_INFO, request, "send flg=%x", lustre_msg_get_flags(request->rq_reqmsg)); rc = ptl_send_buf(&request->rq_req_md_h, request->rq_reqbuf, request->rq_reqdata_len, LNET_NOACK_REQ, &request->rq_req_cbid, connection, request->rq_request_portal, request->rq_xid, 0); if (likely(rc == 0)) GOTO(out, rc); request->rq_req_unlinked = 1; ptlrpc_req_finished(request); if (noreply) GOTO(out, rc); cleanup_me: /* MEUnlink is safe; the PUT didn't even get off the ground, and * nobody apart from the PUT's target has the right nid+XID to * access the reply buffer. */ rc2 = LNetMEUnlink(reply_me_h); LASSERT (rc2 == 0); /* UNLINKED callback called synchronously */ LASSERT(!request->rq_receiving_reply); cleanup_bulk: /* We do sync unlink here as there was no real transfer here so * the chance to have long unlink to sluggish net is smaller here. */ ptlrpc_unregister_bulk(request, 0); out: if (request->rq_memalloc) cfs_memory_pressure_restore(mpflag); return rc; }