int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); int ret = 0; int required_size; long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); bool eor = !(msg->msg_flags & MSG_MORE); size_t try_to_copy, copied = 0; unsigned char record_type = TLS_RECORD_TYPE_DATA; int record_room; bool full_record; int orig_size; if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) return -ENOTSUPP; lock_sock(sk); if (tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo)) goto send_end; if (unlikely(msg->msg_controllen)) { ret = tls_proccess_cmsg(sk, msg, &record_type); if (ret) goto send_end; } while (msg_data_left(msg)) { if (sk->sk_err) { ret = sk->sk_err; goto send_end; } orig_size = ctx->sg_plaintext_size; full_record = false; try_to_copy = msg_data_left(msg); record_room = TLS_MAX_PAYLOAD_SIZE - ctx->sg_plaintext_size; if (try_to_copy >= record_room) { try_to_copy = record_room; full_record = true; } required_size = ctx->sg_plaintext_size + try_to_copy + tls_ctx->overhead_size; if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; alloc_encrypted: ret = alloc_encrypted_sg(sk, required_size); if (ret) { if (ret != -ENOSPC) goto wait_for_memory; /* Adjust try_to_copy according to the amount that was * actually allocated. The difference is due * to max sg elements limit */ try_to_copy -= required_size - ctx->sg_encrypted_size; full_record = true; } if (full_record || eor) { ret = zerocopy_from_iter(sk, &msg->msg_iter, try_to_copy); if (ret) goto fallback_to_reg_send; copied += try_to_copy; ret = tls_push_record(sk, msg->msg_flags, record_type); if (!ret) continue; if (ret == -EAGAIN) goto send_end; copied -= try_to_copy; fallback_to_reg_send: iov_iter_revert(&msg->msg_iter, ctx->sg_plaintext_size - orig_size); trim_sg(sk, ctx->sg_plaintext_data, &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size, orig_size); } required_size = ctx->sg_plaintext_size + try_to_copy; alloc_plaintext: ret = alloc_plaintext_sg(sk, required_size); if (ret) { if (ret != -ENOSPC) goto wait_for_memory; /* Adjust try_to_copy according to the amount that was * actually allocated. The difference is due * to max sg elements limit */ try_to_copy -= required_size - ctx->sg_plaintext_size; full_record = true; trim_sg(sk, ctx->sg_encrypted_data, &ctx->sg_encrypted_num_elem, &ctx->sg_encrypted_size, ctx->sg_plaintext_size + tls_ctx->overhead_size); } ret = memcopy_from_iter(sk, &msg->msg_iter, try_to_copy); if (ret) goto trim_sgl; copied += try_to_copy; if (full_record || eor) { push_record: ret = tls_push_record(sk, msg->msg_flags, record_type); if (ret) { if (ret == -ENOMEM) goto wait_for_memory; goto send_end; } } continue; wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: ret = sk_stream_wait_memory(sk, &timeo); if (ret) { trim_sgl: trim_both_sgl(sk, orig_size); goto send_end; } if (tls_is_pending_closed_record(tls_ctx)) goto push_record; if (ctx->sg_encrypted_size < required_size) goto alloc_encrypted; goto alloc_plaintext; } send_end: ret = sk_stream_error(sk, msg->msg_flags, ret); release_sock(sk); return copied ? copied : ret; }
/* * Post and wait for the I/O upcall to finish */ static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, loff_t *offset, struct iov_iter *iter, size_t total_size, loff_t readahead_size) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; struct orangefs_kernel_op_s *new_op = NULL; int buffer_index = -1; ssize_t ret; new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO); if (!new_op) return -ENOMEM; /* synchronous I/O */ new_op->upcall.req.io.readahead_size = readahead_size; new_op->upcall.req.io.io_type = type; new_op->upcall.req.io.refn = orangefs_inode->refn; populate_shared_memory: /* get a shared buffer index */ buffer_index = orangefs_bufmap_get(); if (buffer_index < 0) { ret = buffer_index; gossip_debug(GOSSIP_FILE_DEBUG, "%s: orangefs_bufmap_get failure (%zd)\n", __func__, ret); goto out; } gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): GET op %p -> buffer_index %d\n", __func__, handle, new_op, buffer_index); new_op->uses_shared_memory = 1; new_op->upcall.req.io.buf_index = buffer_index; new_op->upcall.req.io.count = total_size; new_op->upcall.req.io.offset = *offset; gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): offset: %llu total_size: %zd\n", __func__, handle, llu(*offset), total_size); /* * Stage 1: copy the buffers into client-core's address space * precopy_buffers only pertains to writes. */ if (type == ORANGEFS_IO_WRITE) { ret = precopy_buffers(buffer_index, iter, total_size); if (ret < 0) goto out; } gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): Calling post_io_request with tag (%llu)\n", __func__, handle, llu(new_op->tag)); /* Stage 2: Service the I/O operation */ ret = service_operation(new_op, type == ORANGEFS_IO_WRITE ? "file_write" : "file_read", get_interruptible_flag(inode)); /* * If service_operation() returns -EAGAIN #and# the operation was * purged from orangefs_request_list or htable_ops_in_progress, then * we know that the client was restarted, causing the shared memory * area to be wiped clean. To restart a write operation in this * case, we must re-copy the data from the user's iovec to a NEW * shared memory location. To restart a read operation, we must get * a new shared memory location. */ if (ret == -EAGAIN && op_state_purged(new_op)) { orangefs_bufmap_put(buffer_index); buffer_index = -1; if (type == ORANGEFS_IO_WRITE) iov_iter_revert(iter, total_size); gossip_debug(GOSSIP_FILE_DEBUG, "%s:going to repopulate_shared_memory.\n", __func__); goto populate_shared_memory; } if (ret < 0) { if (ret == -EINTR) { /* * We can't return EINTR if any data was written, * it's not POSIX. It is minimally acceptable * to give a partial write, the way NFS does. * * It would be optimal to return all or nothing, * but if a userspace write is bigger than * an IO buffer, and the interrupt occurs * between buffer writes, that would not be * possible. */ switch (new_op->op_state - OP_VFS_STATE_GIVEN_UP) { /* * If the op was waiting when the interrupt * occurred, then the client-core did not * trigger the write. */ case OP_VFS_STATE_WAITING: if (*offset == 0) ret = -EINTR; else ret = 0; break; /* * If the op was in progress when the interrupt * occurred, then the client-core was able to * trigger the write. */ case OP_VFS_STATE_INPROGR: ret = total_size; break; default: gossip_err("%s: unexpected op state :%d:.\n", __func__, new_op->op_state); ret = 0; break; } gossip_debug(GOSSIP_FILE_DEBUG, "%s: got EINTR, state:%d: %p\n", __func__, new_op->op_state, new_op); } else { gossip_err("%s: error in %s handle %pU, returning %zd\n", __func__, type == ORANGEFS_IO_READ ? "read from" : "write to", handle, ret); } if (orangefs_cancel_op_in_progress(new_op)) return ret; goto out; } /* * Stage 3: Post copy buffers from client-core's address space * postcopy_buffers only pertains to reads. */ if (type == ORANGEFS_IO_READ) { ret = postcopy_buffers(buffer_index, iter, new_op->downcall.resp.io.amt_complete); if (ret < 0) goto out; } gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): Amount %s, returned by the sys-io call:%d\n", __func__, handle, type == ORANGEFS_IO_READ ? "read" : "written", (int)new_op->downcall.resp.io.amt_complete); ret = new_op->downcall.resp.io.amt_complete; out: if (buffer_index >= 0) { orangefs_bufmap_put(buffer_index); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): PUT buffer_index %d\n", __func__, handle, buffer_index); buffer_index = -1; } op_release(new_op); return ret; }