/* * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. */ static unsigned long enc_pools_shrink_scan(struct shrinker *s, struct shrink_control *sc) { spin_lock(&page_pools.epp_lock); sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan, page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES); if (sc->nr_to_scan > 0) { enc_pools_release_free_pages(sc->nr_to_scan); CDEBUG(D_SEC, "released %ld pages, %ld left\n", (long)sc->nr_to_scan, page_pools.epp_free_pages); page_pools.epp_st_shrinks++; page_pools.epp_last_shrink = ktime_get_seconds(); } spin_unlock(&page_pools.epp_lock); /* * if no pool access for a long time, we consider it's fully idle. * a little race here is fine. */ if (unlikely(ktime_get_seconds() - page_pools.epp_last_access > CACHE_QUIESCENT_PERIOD)) { spin_lock(&page_pools.epp_lock); page_pools.epp_idle_idx = IDLE_IDX_MAX; spin_unlock(&page_pools.epp_lock); } LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); return sc->nr_to_scan; }
/* * /sys/kernel/debug/lustre/sptlrpc/encrypt_page_pools */ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) { spin_lock(&page_pools.epp_lock); seq_printf(m, "physical pages: %lu\n" "pages per pool: %lu\n" "max pages: %lu\n" "max pools: %u\n" "total pages: %lu\n" "total free: %lu\n" "idle index: %lu/100\n" "last shrink: %lds\n" "last access: %lds\n" "max pages reached: %lu\n" "grows: %u\n" "grows failure: %u\n" "shrinks: %u\n" "cache access: %lu\n" "cache missing: %lu\n" "low free mark: %lu\n" "max waitqueue depth: %u\n" "max wait time: %ld/%lu\n" "out of mem: %lu\n", totalram_pages, PAGES_PER_POOL, page_pools.epp_max_pages, page_pools.epp_max_pools, page_pools.epp_total_pages, page_pools.epp_free_pages, page_pools.epp_idle_idx, (long)(ktime_get_seconds() - page_pools.epp_last_shrink), (long)(ktime_get_seconds() - page_pools.epp_last_access), page_pools.epp_st_max_pages, page_pools.epp_st_grows, page_pools.epp_st_grow_fails, page_pools.epp_st_shrinks, page_pools.epp_st_access, page_pools.epp_st_missings, page_pools.epp_st_lowfree, page_pools.epp_st_max_wqlen, page_pools.epp_st_max_wait, msecs_to_jiffies(MSEC_PER_SEC), page_pools.epp_st_outofmem); spin_unlock(&page_pools.epp_lock); return 0; }
int sptlrpc_enc_pool_init(void) { /* * maximum capacity is 1/8 of total physical memory. * is the 1/8 a good number? */ page_pools.epp_max_pages = totalram_pages / 8; page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages); init_waitqueue_head(&page_pools.epp_waitq); page_pools.epp_waitqlen = 0; page_pools.epp_pages_short = 0; page_pools.epp_growing = 0; page_pools.epp_idle_idx = 0; page_pools.epp_last_shrink = ktime_get_seconds(); page_pools.epp_last_access = ktime_get_seconds(); spin_lock_init(&page_pools.epp_lock); page_pools.epp_total_pages = 0; page_pools.epp_free_pages = 0; page_pools.epp_st_max_pages = 0; page_pools.epp_st_grows = 0; page_pools.epp_st_grow_fails = 0; page_pools.epp_st_shrinks = 0; page_pools.epp_st_access = 0; page_pools.epp_st_missings = 0; page_pools.epp_st_lowfree = 0; page_pools.epp_st_max_wqlen = 0; page_pools.epp_st_max_wait = 0; page_pools.epp_st_outofmem = 0; enc_pools_alloc(); if (!page_pools.epp_pools) return -ENOMEM; register_shrinker(&pools_shrinker); return 0; }
static int osd_scrub_post(const struct lu_env *env, struct osd_device *dev, int result) { struct lustre_scrub *scrub = &dev->od_scrub; struct scrub_file *sf = &scrub->os_file; int rc; ENTRY; CDEBUG(D_LFSCK, "%s: OI scrub post with result = %d\n", scrub->os_name, result); down_write(&scrub->os_rwsem); spin_lock(&scrub->os_lock); thread_set_flags(&scrub->os_thread, SVC_STOPPING); spin_unlock(&scrub->os_lock); if (scrub->os_new_checked > 0) { sf->sf_items_checked += scrub->os_new_checked; scrub->os_new_checked = 0; sf->sf_pos_last_checkpoint = scrub->os_pos_current; } sf->sf_time_last_checkpoint = ktime_get_real_seconds(); if (result > 0) { sf->sf_status = SS_COMPLETED; if (!(sf->sf_param & SP_DRYRUN)) { memset(sf->sf_oi_bitmap, 0, SCRUB_OI_BITMAP_SIZE); sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT | SF_UPGRADE | SF_AUTO); } sf->sf_time_last_complete = sf->sf_time_last_checkpoint; sf->sf_success_count++; } else if (result == 0) { if (scrub->os_paused) sf->sf_status = SS_PAUSED; else sf->sf_status = SS_STOPPED; } else { sf->sf_status = SS_FAILED; } sf->sf_run_time += ktime_get_seconds() - scrub->os_time_last_checkpoint; rc = scrub_file_store(env, scrub); up_write(&scrub->os_rwsem); RETURN(rc < 0 ? rc : result); }
/* * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. */ static unsigned long enc_pools_shrink_count(struct shrinker *s, struct shrink_control *sc) { /* * if no pool access for a long time, we consider it's fully idle. * a little race here is fine. */ if (unlikely(ktime_get_seconds() - page_pools.epp_last_access > CACHE_QUIESCENT_PERIOD)) { spin_lock(&page_pools.epp_lock); page_pools.epp_idle_idx = IDLE_IDX_MAX; spin_unlock(&page_pools.epp_lock); } LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; }
/* * Retransmit terminal ACK or ABORT of the previous call. */ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct sk_buff *skb, unsigned int channel) { struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL; struct rxrpc_channel *chan; struct msghdr msg; struct kvec iov[3]; struct { struct rxrpc_wire_header whdr; union { __be32 abort_code; struct rxrpc_ackpacket ack; }; } __attribute__((packed)) pkt; struct rxrpc_ackinfo ack_info; size_t len; int ret, ioc; u32 serial, mtu, call_id, padding; _enter("%d", conn->debug_id); chan = &conn->channels[channel]; /* If the last call got moved on whilst we were waiting to run, just * ignore this packet. */ call_id = READ_ONCE(chan->last_call); /* Sync with __rxrpc_disconnect_call() */ smp_rmb(); if (skb && call_id != sp->hdr.callNumber) return; msg.msg_name = &conn->params.peer->srx.transport; msg.msg_namelen = conn->params.peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; iov[0].iov_base = &pkt; iov[0].iov_len = sizeof(pkt.whdr); iov[1].iov_base = &padding; iov[1].iov_len = 3; iov[2].iov_base = &ack_info; iov[2].iov_len = sizeof(ack_info); pkt.whdr.epoch = htonl(conn->proto.epoch); pkt.whdr.cid = htonl(conn->proto.cid | channel); pkt.whdr.callNumber = htonl(call_id); pkt.whdr.seq = 0; pkt.whdr.type = chan->last_type; pkt.whdr.flags = conn->out_clientflag; pkt.whdr.userStatus = 0; pkt.whdr.securityIndex = conn->security_ix; pkt.whdr._rsvd = 0; pkt.whdr.serviceId = htons(conn->service_id); len = sizeof(pkt.whdr); switch (chan->last_type) { case RXRPC_PACKET_TYPE_ABORT: pkt.abort_code = htonl(chan->last_abort); iov[0].iov_len += sizeof(pkt.abort_code); len += sizeof(pkt.abort_code); ioc = 1; break; case RXRPC_PACKET_TYPE_ACK: mtu = conn->params.peer->if_mtu; mtu -= conn->params.peer->hdrsize; pkt.ack.bufferSpace = 0; pkt.ack.maxSkew = htons(skb ? skb->priority : 0); pkt.ack.firstPacket = htonl(chan->last_seq + 1); pkt.ack.previousPacket = htonl(chan->last_seq); pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0); pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE; pkt.ack.nAcks = 0; ack_info.rxMTU = htonl(rxrpc_rx_mtu); ack_info.maxMTU = htonl(mtu); ack_info.rwind = htonl(rxrpc_rx_window_size); ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max); pkt.whdr.flags |= RXRPC_SLOW_START_OK; padding = 0; iov[0].iov_len += sizeof(pkt.ack); len += sizeof(pkt.ack) + 3 + sizeof(ack_info); ioc = 3; break; default: return; } /* Resync with __rxrpc_disconnect_call() and check that the last call * didn't get advanced whilst we were filling out the packets. */ smp_rmb(); if (READ_ONCE(chan->last_call) != call_id) return; serial = atomic_inc_return(&conn->serial); pkt.whdr.serial = htonl(serial); switch (chan->last_type) { case RXRPC_PACKET_TYPE_ABORT: _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort); break; case RXRPC_PACKET_TYPE_ACK: trace_rxrpc_tx_ack(chan->call_debug_id, serial, ntohl(pkt.ack.firstPacket), ntohl(pkt.ack.serial), pkt.ack.reason, 0); _proto("Tx ACK %%%u [re]", serial); break; } ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); conn->params.peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(chan->call_debug_id, serial, ret, rxrpc_tx_point_call_final_resend); else trace_rxrpc_tx_packet(chan->call_debug_id, &pkt.whdr, rxrpc_tx_point_call_final_resend); _leave(""); }
/* * generate a connection-level abort */ static int rxrpc_abort_connection(struct rxrpc_connection *conn, int error, u32 abort_code) { struct rxrpc_wire_header whdr; struct msghdr msg; struct kvec iov[2]; __be32 word; size_t len; u32 serial; int ret; _enter("%d,,%u,%u", conn->debug_id, error, abort_code); /* generate a connection-level abort */ spin_lock_bh(&conn->state_lock); if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { spin_unlock_bh(&conn->state_lock); _leave(" = 0 [already dead]"); return 0; } conn->state = RXRPC_CONN_LOCALLY_ABORTED; spin_unlock_bh(&conn->state_lock); rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error); msg.msg_name = &conn->params.peer->srx.transport; msg.msg_namelen = conn->params.peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; whdr.epoch = htonl(conn->proto.epoch); whdr.cid = htonl(conn->proto.cid); whdr.callNumber = 0; whdr.seq = 0; whdr.type = RXRPC_PACKET_TYPE_ABORT; whdr.flags = conn->out_clientflag; whdr.userStatus = 0; whdr.securityIndex = conn->security_ix; whdr._rsvd = 0; whdr.serviceId = htons(conn->service_id); word = htonl(conn->local_abort); iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); iov[1].iov_base = &word; iov[1].iov_len = sizeof(word); len = iov[0].iov_len + iov[1].iov_len; serial = atomic_inc_return(&conn->serial); whdr.serial = htonl(serial); _proto("Tx CONN ABORT %%%u { %d }", serial, conn->local_abort); ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); if (ret < 0) { trace_rxrpc_tx_fail(conn->debug_id, serial, ret, rxrpc_tx_point_conn_abort); _debug("sendmsg failed: %d", ret); return -EAGAIN; } trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort); conn->params.peer->last_tx_at = ktime_get_seconds(); _leave(" = 0"); return 0; }
/** * Send request \a request. * if \a noreply is set, don't expect any reply back and don't set up * reply buffers. * Returns 0 on success or error code. */ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) { int rc; int rc2; int mpflag = 0; struct ptlrpc_connection *connection; lnet_handle_me_t reply_me_h; lnet_md_t reply_md; struct obd_import *imp = request->rq_import; struct obd_device *obd = imp->imp_obd; if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC)) return 0; LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST); LASSERT(request->rq_wait_ctx == 0); /* If this is a re-transmit, we're required to have disengaged * cleanly from the previous attempt */ LASSERT(!request->rq_receiving_reply); LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) && (imp->imp_state == LUSTRE_IMP_FULL))); if (unlikely(obd && obd->obd_fail)) { CDEBUG(D_HA, "muting rpc for failed imp obd %s\n", obd->obd_name); /* this prevents us from waiting in ptlrpc_queue_wait */ spin_lock(&request->rq_lock); request->rq_err = 1; spin_unlock(&request->rq_lock); request->rq_status = -ENODEV; return -ENODEV; } connection = imp->imp_connection; lustre_msg_set_handle(request->rq_reqmsg, &imp->imp_remote_handle); lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST); lustre_msg_set_conn_cnt(request->rq_reqmsg, imp->imp_conn_cnt); lustre_msghdr_set_flags(request->rq_reqmsg, imp->imp_msghdr_flags); /* * If it's the first time to resend the request for EINPROGRESS, * we need to allocate a new XID (see after_reply()), it's different * from the resend for reply timeout. */ if (request->rq_nr_resend && list_empty(&request->rq_unreplied_list)) { __u64 min_xid = 0; /* * resend for EINPROGRESS, allocate new xid to avoid reply * reconstruction */ spin_lock(&imp->imp_lock); ptlrpc_assign_next_xid_nolock(request); min_xid = ptlrpc_known_replied_xid(imp); spin_unlock(&imp->imp_lock); lustre_msg_set_last_xid(request->rq_reqmsg, min_xid); DEBUG_REQ(D_RPCTRACE, request, "Allocating new xid for resend on EINPROGRESS"); } if (request->rq_bulk) { ptlrpc_set_bulk_mbits(request); lustre_msg_set_mbits(request->rq_reqmsg, request->rq_mbits); } if (list_empty(&request->rq_unreplied_list) || request->rq_xid <= imp->imp_known_replied_xid) { DEBUG_REQ(D_ERROR, request, "xid: %llu, replied: %llu, list_empty:%d\n", request->rq_xid, imp->imp_known_replied_xid, list_empty(&request->rq_unreplied_list)); LBUG(); } /** * For enabled AT all request should have AT_SUPPORT in the * FULL import state when OBD_CONNECT_AT is set */ LASSERT(AT_OFF || imp->imp_state != LUSTRE_IMP_FULL || (imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT) || !(imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_AT)); if (request->rq_resend) lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT); if (request->rq_memalloc) mpflag = cfs_memory_pressure_get_and_set(); rc = sptlrpc_cli_wrap_request(request); if (rc) { /* * set rq_sent so that this request is treated * as a delayed send in the upper layers */ if (rc == -ENOMEM) request->rq_sent = ktime_get_seconds(); goto out; } /* bulk register should be done after wrap_request() */ if (request->rq_bulk) { rc = ptlrpc_register_bulk(request); if (rc != 0) goto out; } if (!noreply) { LASSERT(request->rq_replen != 0); if (!request->rq_repbuf) { LASSERT(!request->rq_repdata); LASSERT(!request->rq_repmsg); rc = sptlrpc_cli_alloc_repbuf(request, request->rq_replen); if (rc) { /* this prevents us from looping in * ptlrpc_queue_wait */ spin_lock(&request->rq_lock); request->rq_err = 1; spin_unlock(&request->rq_lock); request->rq_status = rc; goto cleanup_bulk; } } else { request->rq_repdata = NULL; request->rq_repmsg = NULL; } rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/ connection->c_peer, request->rq_xid, 0, LNET_UNLINK, LNET_INS_AFTER, &reply_me_h); if (rc != 0) { CERROR("LNetMEAttach failed: %d\n", rc); LASSERT(rc == -ENOMEM); rc = -ENOMEM; goto cleanup_bulk; } } spin_lock(&request->rq_lock); /* We are responsible for unlinking the reply buffer */ request->rq_reply_unlinked = noreply; request->rq_receiving_reply = !noreply; /* Clear any flags that may be present from previous sends. */ request->rq_req_unlinked = 0; request->rq_replied = 0; request->rq_err = 0; request->rq_timedout = 0; request->rq_net_err = 0; request->rq_resend = 0; request->rq_restart = 0; request->rq_reply_truncated = 0; spin_unlock(&request->rq_lock); if (!noreply) { reply_md.start = request->rq_repbuf; reply_md.length = request->rq_repbuf_len; /* Allow multiple early replies */ reply_md.threshold = LNET_MD_THRESH_INF; /* Manage remote for early replies */ reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MANAGE_REMOTE | LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */ reply_md.user_ptr = &request->rq_reply_cbid; reply_md.eq_handle = ptlrpc_eq_h; /* We must see the unlink callback to set rq_reply_unlinked, * so we can't auto-unlink */ rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN, &request->rq_reply_md_h); if (rc != 0) { CERROR("LNetMDAttach failed: %d\n", rc); LASSERT(rc == -ENOMEM); spin_lock(&request->rq_lock); /* ...but the MD attach didn't succeed... */ request->rq_receiving_reply = 0; spin_unlock(&request->rq_lock); rc = -ENOMEM; goto cleanup_me; } CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid %llu, portal %u\n", request->rq_repbuf_len, request->rq_xid, request->rq_reply_portal); } /* add references on request for request_out_callback */ ptlrpc_request_addref(request); if (obd && obd->obd_svc_stats) lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR, atomic_read(&imp->imp_inflight)); OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5); ktime_get_real_ts64(&request->rq_sent_tv); request->rq_sent = ktime_get_real_seconds(); /* We give the server rq_timeout secs to process the req, and * add the network latency for our local timeout. */ request->rq_deadline = request->rq_sent + request->rq_timeout + ptlrpc_at_get_net_latency(request); ptlrpc_pinger_sending_on_import(imp); DEBUG_REQ(D_INFO, request, "send flg=%x", lustre_msg_get_flags(request->rq_reqmsg)); rc = ptl_send_buf(&request->rq_req_md_h, request->rq_reqbuf, request->rq_reqdata_len, LNET_NOACK_REQ, &request->rq_req_cbid, connection, request->rq_request_portal, request->rq_xid, 0); if (likely(rc == 0)) goto out; request->rq_req_unlinked = 1; ptlrpc_req_finished(request); if (noreply) goto out; cleanup_me: /* MEUnlink is safe; the PUT didn't even get off the ground, and * nobody apart from the PUT's target has the right nid+XID to * access the reply buffer. */ rc2 = LNetMEUnlink(reply_me_h); LASSERT(rc2 == 0); /* UNLINKED callback called synchronously */ LASSERT(!request->rq_receiving_reply); cleanup_bulk: /* We do sync unlink here as there was no real transfer here so * the chance to have long unlink to sluggish net is smaller here. */ ptlrpc_unregister_bulk(request, 0); out: if (request->rq_memalloc) cfs_memory_pressure_restore(mpflag); return rc; }