/* blocks sndbuf producer until at least one byte of free space available */ static int smc_tx_wait_memory(struct smc_sock *smc, int flags) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct smc_connection *conn = &smc->conn; struct sock *sk = &smc->sk; bool noblock; long timeo; int rc = 0; /* similar to sk_stream_wait_memory */ timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); noblock = timeo ? false : true; add_wait_queue(sk_sleep(sk), &wait); while (1) { sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN) || conn->local_tx_ctrl.conn_state_flags.peer_done_writing) { rc = -EPIPE; break; } if (smc_cdc_rxed_any_close(conn)) { rc = -ECONNRESET; break; } if (!timeo) { if (noblock) set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); rc = -EAGAIN; break; } if (signal_pending(current)) { rc = sock_intr_errno(timeo); break; } sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); if (atomic_read(&conn->sndbuf_space)) break; /* at least 1 byte of free space available */ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk_wait_event(sk, &timeo, sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN) || smc_cdc_rxed_any_close(conn) || atomic_read(&conn->sndbuf_space), &wait); } remove_wait_queue(sk_sleep(sk), &wait); return rc; }
/** * sk_stream_wait_memory - Wait for more memory for a socket * @sk: socket to wait for memory * @timeo_p: for how long */ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) { int err = 0; long vm_wait = 0; long current_timeo = *timeo_p; bool noblock = (*timeo_p ? false : true); DEFINE_WAIT(wait); if (sk_stream_memory_free(sk)) current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2; while (1) { sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; if (!*timeo_p) { if (noblock) set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); goto do_nonblock; } if (signal_pending(current)) goto do_interrupted; sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); if (sk_stream_memory_free(sk) && !vm_wait) break; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; sk_wait_event(sk, ¤t_timeo, sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN) || (sk_stream_memory_free(sk) && !vm_wait)); sk->sk_write_pending--; if (vm_wait) { vm_wait -= current_timeo; current_timeo = *timeo_p; if (current_timeo != MAX_SCHEDULE_TIMEOUT && (current_timeo -= vm_wait) < 0) current_timeo = 0; vm_wait = 0; } *timeo_p = current_timeo; } out: finish_wait(sk_sleep(sk), &wait); return err; do_error: err = -EPIPE; goto out; do_nonblock: err = -EAGAIN; goto out; do_interrupted: err = sock_intr_errno(*timeo_p); goto out; }
int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); int ret = 0; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); bool eor; size_t orig_size = size; unsigned char record_type = TLS_RECORD_TYPE_DATA; struct scatterlist *sg; bool full_record; int record_room; if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST)) return -ENOTSUPP; /* No MSG_EOR from splice, only look at MSG_MORE */ eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST)); lock_sock(sk); sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); if (tls_complete_pending_work(sk, tls_ctx, flags, &timeo)) goto sendpage_end; /* Call the sk_stream functions to manage the sndbuf mem. */ while (size > 0) { size_t copy, required_size; if (sk->sk_err) { ret = sk->sk_err; goto sendpage_end; } full_record = false; record_room = TLS_MAX_PAYLOAD_SIZE - ctx->sg_plaintext_size; copy = size; if (copy >= record_room) { copy = record_room; full_record = true; } required_size = ctx->sg_plaintext_size + copy + tls_ctx->overhead_size; if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; alloc_payload: ret = alloc_encrypted_sg(sk, required_size); if (ret) { if (ret != -ENOSPC) goto wait_for_memory; /* Adjust copy according to the amount that was * actually allocated. The difference is due * to max sg elements limit */ copy -= required_size - ctx->sg_plaintext_size; full_record = true; } get_page(page); sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem; sg_set_page(sg, page, copy, offset); ctx->sg_plaintext_num_elem++; sk_mem_charge(sk, copy); offset += copy; size -= copy; ctx->sg_plaintext_size += copy; tls_ctx->pending_open_record_frags = ctx->sg_plaintext_num_elem; if (full_record || eor || ctx->sg_plaintext_num_elem == ARRAY_SIZE(ctx->sg_plaintext_data)) { push_record: ret = tls_push_record(sk, flags, record_type); if (ret) { if (ret == -ENOMEM) goto wait_for_memory; goto sendpage_end; } } continue; wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: ret = sk_stream_wait_memory(sk, &timeo); if (ret) { trim_both_sgl(sk, ctx->sg_plaintext_size); goto sendpage_end; } if (tls_is_pending_closed_record(tls_ctx)) goto push_record; goto alloc_payload; } sendpage_end: if (orig_size > size) ret = orig_size - size; else ret = sk_stream_error(sk, flags, ret); release_sock(sk); return ret; }
/* sndbuf producer: main API called by socket layer. * called under sock lock. */ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) { size_t copylen, send_done = 0, send_remaining = len; size_t chunk_len, chunk_off, chunk_len_sum; struct smc_connection *conn = &smc->conn; union smc_host_cursor prep; struct sock *sk = &smc->sk; char *sndbuf_base; int tx_cnt_prep; int writespace; int rc, chunk; /* This should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) { rc = -EPIPE; goto out_err; } while (msg_data_left(msg)) { if (sk->sk_state == SMC_INIT) return -ENOTCONN; if (smc->sk.sk_shutdown & SEND_SHUTDOWN || (smc->sk.sk_err == ECONNABORTED) || conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) return -EPIPE; if (smc_cdc_rxed_any_close(conn)) return send_done ?: -ECONNRESET; if (!atomic_read(&conn->sndbuf_space)) { rc = smc_tx_wait_memory(smc, msg->msg_flags); if (rc) { if (send_done) return send_done; goto out_err; } continue; } /* initialize variables for 1st iteration of subsequent loop */ /* could be just 1 byte, even after smc_tx_wait_memory above */ writespace = atomic_read(&conn->sndbuf_space); /* not more than what user space asked for */ copylen = min_t(size_t, send_remaining, writespace); /* determine start of sndbuf */ sndbuf_base = conn->sndbuf_desc->cpu_addr; smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn); tx_cnt_prep = prep.count; /* determine chunks where to write into sndbuf */ /* either unwrapped case, or 1st chunk of wrapped case */ chunk_len = min_t(size_t, copylen, conn->sndbuf_size - tx_cnt_prep); chunk_len_sum = chunk_len; chunk_off = tx_cnt_prep; smc_sndbuf_sync_sg_for_cpu(conn); for (chunk = 0; chunk < 2; chunk++) { rc = memcpy_from_msg(sndbuf_base + chunk_off, msg, chunk_len); if (rc) { smc_sndbuf_sync_sg_for_device(conn); if (send_done) return send_done; goto out_err; } send_done += chunk_len; send_remaining -= chunk_len; if (chunk_len_sum == copylen) break; /* either on 1st or 2nd iteration */ /* prepare next (== 2nd) iteration */ chunk_len = copylen - chunk_len; /* remainder */ chunk_len_sum += chunk_len; chunk_off = 0; /* modulo offset in send ring buffer */ } smc_sndbuf_sync_sg_for_device(conn); /* update cursors */ smc_curs_add(conn->sndbuf_size, &prep, copylen); smc_curs_write(&conn->tx_curs_prep, smc_curs_read(&prep, conn), conn); /* increased in send tasklet smc_cdc_tx_handler() */ smp_mb__before_atomic(); atomic_sub(copylen, &conn->sndbuf_space); /* guarantee 0 <= sndbuf_space <= sndbuf_size */ smp_mb__after_atomic(); /* since we just produced more new data into sndbuf, * trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ smc_tx_sndbuf_nonempty(conn); } /* while (msg_data_left(msg)) */ return send_done; out_err: rc = sk_stream_error(sk, msg->msg_flags, rc); /* make sure we wake any epoll edge trigger waiter */ if (unlikely(rc == -EAGAIN)) sk->sk_write_space(sk); return rc; }