static int rk_recvbuff(struct socket *sock, char *buffer, int length) { struct msghdr msg; struct iovec iov; int len = 0; mm_segment_t oldfs; iov.iov_base = buffer; iov.iov_len = length; #if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0) msg.msg_iov = &iov; msg.msg_iovlen = 1; #else iov_iter_init(&msg.msg_iter, WRITE, &iov, 1, length); #endif oldfs = get_fs(); set_fs(KERNEL_DS); len = sock_recvmsg(sock,&msg,length,0); //no wait set_fs(oldfs); if ((len!=-EAGAIN)&&(len!=0)&&(len!=(-107))) dbg("backdoor: rk_recvbuff recieved %i bytes \n",len); return len; }
static int rk_sendbuff(struct socket *sock, char *buffer, int length) { struct msghdr msg; struct iovec iov; int len = 0; iov.iov_base = (char*) buffer; iov.iov_len = (__kernel_size_t) length; msg.msg_flags = MSG_NOSIGNAL; #if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0) msg.msg_iov = &iov; msg.msg_iovlen = 1; #else iov_iter_init(&msg.msg_iter, WRITE, &iov, 1, length); #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0) len = sock_sendmsg(sock,&msg,length); #else len = sock_sendmsg(sock, &msg); #endif return len; }
/* * send a simple reply */ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) { struct msghdr msg; struct iovec iov[1]; int n; _enter(""); iov[0].iov_base = (void *) buf; iov[0].iov_len = len; msg.msg_name = NULL; msg.msg_namelen = 0; iov_iter_init(&msg.msg_iter, WRITE, iov, 1, len); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; call->state = AFS_CALL_AWAIT_ACK; n = rxrpc_kernel_send_data(call->rxcall, &msg, len); if (n >= 0) { /* Success */ _leave(" [replied]"); return; } if (n == -ENOMEM) { _debug("oom"); rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT); } afs_end_call(call); _leave(" [error]"); }
/* * send an empty reply */ void afs_send_empty_reply(struct afs_call *call) { struct msghdr msg; struct iovec iov[1]; _enter(""); iov[0].iov_base = NULL; iov[0].iov_len = 0; msg.msg_name = NULL; msg.msg_namelen = 0; iov_iter_init(&msg.msg_iter, WRITE, iov, 0, 0); /* WTF? */ msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; call->state = AFS_CALL_AWAIT_ACK; switch (rxrpc_kernel_send_data(call->rxcall, &msg, 0)) { case 0: _leave(" [replied]"); return; case -ENOMEM: _debug("oom"); rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT); default: afs_end_call(call); _leave(" [error]"); return; } }
static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) { struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; struct kiocb kiocb; struct iov_iter iter; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; iov_iter_init(&iter, WRITE, &iov, 1, len); ret = filp->f_op->write_iter(&kiocb, &iter); BUG_ON(ret == -EIOCBQUEUED); if (ret > 0) *ppos = kiocb.ki_pos; return ret; } ssize_t __vfs_write(struct file *file, const char __user *p, size_t count, loff_t *pos) { if (file->f_op->write) return file->f_op->write(file, p, count, pos); else if (file->f_op->write_iter) return new_sync_write(file, p, count, pos); else return -EINVAL; } EXPORT_SYMBOL(__vfs_write); ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) { mm_segment_t old_fs; const char __user *p; ssize_t ret; if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; old_fs = get_fs(); set_fs(get_ds()); p = (__force const char __user *)buf; if (count > MAX_RW_COUNT) count = MAX_RW_COUNT; ret = __vfs_write(file, p, count, pos); set_fs(old_fs); if (ret > 0) { fsnotify_modify(file); add_wchar(current, ret); } inc_syscw(current); return ret; }
static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) { struct iovec iov = { .iov_base = buf, .iov_len = len }; struct kiocb kiocb; struct iov_iter iter; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; iov_iter_init(&iter, READ, &iov, 1, len); ret = filp->f_op->read_iter(&kiocb, &iter); BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; }
static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov, unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn) { struct kiocb kiocb; struct iov_iter iter; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; kiocb.ki_nbytes = len; iov_iter_init(&iter, rw, iov, nr_segs, len); ret = fn(&kiocb, &iter); if (ret == -EIOCBQUEUED) ret = wait_on_sync_kiocb(&kiocb); *ppos = kiocb.ki_pos; return ret; }
//***************************************************************************** ssize_t ksendto( ksocket_t socket, void *message, size_t length, int flags, const struct sockaddr *dest_addr, int dest_len ) { struct socket *sk; struct msghdr msg; struct iovec vec; int len = 0; mm_segment_t fs; sk = (struct socket *)socket; vec.iov_base = (void *)message; vec.iov_len = (__kernel_size_t)length; #if LINUX_VERSION_CODE <= KERNEL_VERSION(3,18,0) msg.msg_iov = &vec; msg.msg_iovlen = 1; #else iov_iter_init( &msg.msg_iter, WRITE | ITER_IOVEC, &vec, 1, length ); #endif msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = flags; if ( dest_addr ) { msg.msg_name = (void *)dest_addr; msg.msg_namelen = dest_len; } // end if fs = get_fs(); set_fs( KERNEL_DS ); #if LINUX_VERSION_CODE <= KERNEL_VERSION(3,18,0) len = sock_sendmsg( sk, &msg, length ); #else // printk( KERN_INFO "RTAP: ksendto(): Sending %d bytes\n", (int)length ); // len = length; len = sock_sendmsg( sk, &msg ); #endif set_fs( fs ); return( len ); }
ssize_t do_aio_write(struct kiocb *kiocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = kiocb->ki_filp; if (file->f_op->write_iter) { size_t count; struct iov_iter iter; int ret; count = 0; ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); if (ret) return ret; iov_iter_init(&iter, iov, nr_segs, count, 0); return file->f_op->write_iter(kiocb, &iter, pos); } return file->f_op->aio_write(kiocb, iov, nr_segs, pos); }
/* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_rx(struct vhost_net *net) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX]; struct vhost_virtqueue *vq = &nvq->vq; unsigned uninitialized_var(in), log; struct vhost_log *vq_log; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_control = NULL, /* FIXME: get and handle RX aux data. */ .msg_controllen = 0, .msg_flags = MSG_DONTWAIT, }; struct virtio_net_hdr_mrg_rxbuf hdr = { .hdr.flags = 0, .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE }; size_t total_len = 0; int err, mergeable; s16 headcount; size_t vhost_hlen, sock_hlen; size_t vhost_len, sock_len; struct socket *sock; mutex_lock(&vq->mutex); sock = vq->private_data; if (!sock) goto out; vhost_disable_notify(&net->dev, vq); vhost_hlen = nvq->vhost_hlen; sock_hlen = nvq->sock_hlen; vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ? vq->log : NULL; mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); while ((sock_len = peek_head_len(sock->sk))) { sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; headcount = get_rx_bufs(vq, vq->heads, vhost_len, &in, vq_log, &log, likely(mergeable) ? UIO_MAXIOV : 1); /* On error, stop handling until the next kick. */ if (unlikely(headcount < 0)) break; /* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); err = sock->ops->recvmsg(NULL, sock, &msg, 1, MSG_DONTWAIT | MSG_TRUNC); pr_debug("Discarded rx packet: len %zd\n", sock_len); continue; } /* OK, now we need to know about added descriptors. */ if (!headcount) { if (unlikely(vhost_enable_notify(&net->dev, vq))) { /* They have slipped one in as we were * doing that: check again. */ vhost_disable_notify(&net->dev, vq); continue; } /* Nothing new? Wait for eventfd to tell us * they refilled. */ break; } /* We don't need to be notified again. */ if (unlikely((vhost_hlen))) /* Skip header. TODO: support TSO. */ move_iovec_hdr(vq->iov, nvq->hdr, vhost_hlen, in); else /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF: * needed because recvmsg can modify msg_iov. */ copy_iovec_hdr(vq->iov, nvq->hdr, sock_hlen, in); iov_iter_init(&msg.msg_iter, READ, vq->iov, in, sock_len); err = sock->ops->recvmsg(NULL, sock, &msg, sock_len, MSG_DONTWAIT | MSG_TRUNC); /* Userspace might have consumed the packet meanwhile: * it's not supposed to do this usually, but might be hard * to prevent. Discard data we got (if any) and keep going. */ if (unlikely(err != sock_len)) { pr_debug("Discarded rx packet: " " len %d, expected %zd\n", err, sock_len); vhost_discard_vq_desc(vq, headcount); continue; } if (unlikely(vhost_hlen) && memcpy_toiovecend(nvq->hdr, (unsigned char *)&hdr, 0, vhost_hlen)) { vq_err(vq, "Unable to write vnet_hdr at addr %p\n", vq->iov->iov_base); break; } /* TODO: Should check and handle checksum. */ if (likely(mergeable) && memcpy_toiovecend(nvq->hdr, (unsigned char *)&headcount, offsetof(typeof(hdr), num_buffers), sizeof hdr.num_buffers)) { vq_err(vq, "Failed num_buffers write"); vhost_discard_vq_desc(vq, headcount); break; } vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, headcount); if (unlikely(vq_log)) vhost_log_write(vq, vq_log, log, vhost_len); total_len += vhost_len; if (unlikely(total_len >= VHOST_NET_WEIGHT)) { vhost_poll_queue(&vq->poll); break; } } out: mutex_unlock(&vq->mutex); } static void handle_tx_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, poll.work); struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); handle_tx(net); } static void handle_rx_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, poll.work); struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); handle_rx(net); } static void handle_tx_net(struct vhost_work *work) { struct vhost_net *net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_TX].work); handle_tx(net); } static void handle_rx_net(struct vhost_work *work) { struct vhost_net *net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_RX].work); handle_rx(net); } static int vhost_net_open(struct inode *inode, struct file *f) { struct vhost_net *n; struct vhost_dev *dev; struct vhost_virtqueue **vqs; int i; n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); if (!n) { n = vmalloc(sizeof *n); if (!n) return -ENOMEM; } vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL); if (!vqs) { kvfree(n); return -ENOMEM; } dev = &n->dev; vqs[VHOST_NET_VQ_TX] = &n->vqs[VHOST_NET_VQ_TX].vq; vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq; n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick; n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick; for (i = 0; i < VHOST_NET_VQ_MAX; i++) { n->vqs[i].ubufs = NULL; n->vqs[i].ubuf_info = NULL; n->vqs[i].upend_idx = 0; n->vqs[i].done_idx = 0; n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; } vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); f->private_data = n; return 0; } static void vhost_net_disable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); struct vhost_poll *poll = n->poll + (nvq - n->vqs); if (!vq->private_data) return; vhost_poll_stop(poll); } static int vhost_net_enable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); struct vhost_poll *poll = n->poll + (nvq - n->vqs); struct socket *sock; sock = vq->private_data; if (!sock) return 0; return vhost_poll_start(poll, sock->file); } static struct socket *vhost_net_stop_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { struct socket *sock; mutex_lock(&vq->mutex); sock = vq->private_data; vhost_net_disable_vq(n, vq); vq->private_data = NULL; mutex_unlock(&vq->mutex); return sock; } static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, struct socket **rx_sock) { *tx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_TX].vq); *rx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_RX].vq); } static void vhost_net_flush_vq(struct vhost_net *n, int index) { vhost_poll_flush(n->poll + index); vhost_poll_flush(&n->vqs[index].vq.poll); }
/* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *vq = &nvq->vq; unsigned out, in, s; int head; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_control = NULL, .msg_controllen = 0, .msg_flags = MSG_DONTWAIT, }; size_t len, total_len = 0; int err; size_t hdr_size; struct socket *sock; struct vhost_net_ubuf_ref *uninitialized_var(ubufs); bool zcopy, zcopy_used; mutex_lock(&vq->mutex); sock = vq->private_data; if (!sock) goto out; vhost_disable_notify(&net->dev, vq); hdr_size = nvq->vhost_hlen; zcopy = nvq->ubufs; for (;;) { /* Release DMAs done buffers first */ if (zcopy) vhost_zerocopy_signal_used(net, vq); /* If more outstanding DMAs, queue the work. * Handle upend_idx wrap around */ if (unlikely((nvq->upend_idx + vq->num - VHOST_MAX_PEND) % UIO_MAXIOV == nvq->done_idx)) break; head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), &out, &in, NULL, NULL); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq); continue; } break; } if (in) { vq_err(vq, "Unexpected descriptor format for TX: " "out %d, int %d\n", out, in); break; } /* Skip header. TODO: support TSO. */ s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out); len = iov_length(vq->iov, out); iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len); /* Sanity check */ if (!len) { vq_err(vq, "Unexpected header len for TX: " "%zd expected %zd\n", iov_length(nvq->hdr, s), hdr_size); break; } zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN && (nvq->upend_idx + 1) % UIO_MAXIOV != nvq->done_idx && vhost_net_tx_select_zcopy(net); /* use msg_control to pass vhost zerocopy ubuf info to skb */ if (zcopy_used) { struct ubuf_info *ubuf; ubuf = nvq->ubuf_info + nvq->upend_idx; vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head); vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; ubuf->callback = vhost_zerocopy_callback; ubuf->ctx = nvq->ubufs; ubuf->desc = nvq->upend_idx; msg.msg_control = ubuf; msg.msg_controllen = sizeof(ubuf); ubufs = nvq->ubufs; atomic_inc(&ubufs->refcount); nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; } else { msg.msg_control = NULL; ubufs = NULL; } /* TODO: Check specific error and bomb out unless ENOBUFS? */ err = sock->ops->sendmsg(NULL, sock, &msg, len); if (unlikely(err < 0)) { if (zcopy_used) { vhost_net_ubuf_put(ubufs); nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) % UIO_MAXIOV; } vhost_discard_vq_desc(vq, 1); break; } if (err != len) pr_debug("Truncated TX packet: " " len %d != %zd\n", err, len); if (!zcopy_used) vhost_add_used_and_signal(&net->dev, vq, head, 0); else vhost_zerocopy_signal_used(net, vq); total_len += len; vhost_net_tx_packet(net); if (unlikely(total_len >= VHOST_NET_WEIGHT)) { vhost_poll_queue(&vq->poll); break; } } out: mutex_unlock(&vq->mutex); } static int peek_head_len(struct sock *sk) { struct sk_buff *head; int len = 0; unsigned long flags; spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); head = skb_peek(&sk->sk_receive_queue); if (likely(head)) { len = head->len; if (vlan_tx_tag_present(head)) len += VLAN_HLEN; } spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags); return len; } /* This is a multi-buffer version of vhost_get_desc, that works if * vq has read descriptors only. * @vq - the relevant virtqueue * @datalen - data length we'll be reading * @iovcount - returned count of io vectors we fill * @log - vhost log * @log_num - log offset * @quota - headcount quota, 1 for big buffer * returns number of buffer heads allocated, negative on error */ static int get_rx_bufs(struct vhost_virtqueue *vq, struct vring_used_elem *heads, int datalen, unsigned *iovcount, struct vhost_log *log, unsigned *log_num, unsigned int quota) { unsigned int out, in; int seg = 0; int headcount = 0; unsigned d; int r, nlogs = 0; /* len is always initialized before use since we are always called with * datalen > 0. */ u32 uninitialized_var(len); while (datalen > 0 && headcount < quota) { if (unlikely(seg >= UIO_MAXIOV)) { r = -ENOBUFS; goto err; } r = vhost_get_vq_desc(vq, vq->iov + seg, ARRAY_SIZE(vq->iov) - seg, &out, &in, log, log_num); if (unlikely(r < 0)) goto err; d = r; if (d == vq->num) { r = 0; goto err; } if (unlikely(out || in <= 0)) { vq_err(vq, "unexpected descriptor format for RX: " "out %d, in %d\n", out, in); r = -EINVAL; goto err; } if (unlikely(log)) { nlogs += *log_num; log += *log_num; } heads[headcount].id = cpu_to_vhost32(vq, d); len = iov_length(vq->iov + seg, in); heads[headcount].len = cpu_to_vhost32(vq, len); datalen -= len; ++headcount; seg += in; } heads[headcount - 1].len = cpu_to_vhost32(vq, len - datalen); *iovcount = seg; if (unlikely(log)) *log_num = nlogs; /* Detect overrun */ if (unlikely(datalen > 0)) { r = UIO_MAXIOV + 1; goto err; } return headcount; err: vhost_discard_vq_desc(vq, headcount); return r; }
ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) { struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; struct kiocb kiocb; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } EXPORT_SYMBOL(do_sync_write); ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) { struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; struct kiocb kiocb; struct iov_iter iter; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; iov_iter_init(&iter, WRITE, &iov, 1, len); ret = filp->f_op->write_iter(&kiocb, &iter); BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } EXPORT_SYMBOL(new_sync_write); ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) { mm_segment_t old_fs; const char __user *p; ssize_t ret; if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; old_fs = get_fs(); set_fs(get_ds()); p = (__force const char __user *)buf; if (count > MAX_RW_COUNT) count = MAX_RW_COUNT; if (file->f_op->write) ret = file->f_op->write(file, p, count, pos); else if (file->f_op->aio_write) ret = do_sync_write(file, p, count, pos); else ret = new_sync_write(file, p, count, pos); set_fs(old_fs); if (ret > 0) { fsnotify_modify(file); add_wchar(current, ret); } inc_syscw(current); return ret; } EXPORT_SYMBOL(__kernel_write); ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { ssize_t ret; if (!(file->f_mode & FMODE_WRITE)) return -EBADF; if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; if (unlikely(!access_ok(VERIFY_READ, buf, count))) return -EFAULT; ret = rw_verify_area(WRITE, file, pos, count); if (ret >= 0) { count = ret; file_start_write(file); if (file->f_op->write) ret = file->f_op->write(file, buf, count, pos); else if (file->f_op->aio_write) ret = do_sync_write(file, buf, count, pos); else ret = new_sync_write(file, buf, count, pos); if (ret > 0) { fsnotify_modify(file); add_wchar(current, ret); } inc_syscw(current); file_end_write(file); } return ret; }
ssize_t nvmm_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct super_block *sb = inode->i_sb; ssize_t retval = 0; int hole = 0; struct iov_iter iter; loff_t size; void *start_vaddr = NVMM_I(inode)->i_virt_addr + offset; size_t length = iov_length(iov, nr_segs); unsigned long pages_exist = 0, pages_to_alloc = 0,pages_needed = 0; // printk("size_t length is : %ld\n", length); if(rw == READ) rcu_read_lock(); size = i_size_read(inode); if(length < 0){ retval = -EINVAL; goto out; } if((rw == READ)&&(offset + length > size)) length = size - offset; if(!length) goto out; iov_iter_init(&iter, iov, nr_segs, length, 0); if(rw == READ){ if(unlikely(hole)){ }else{ retval = nvmm_iov_copy_to(start_vaddr, &iter, length); if(retval != length){ retval = -EFAULT; goto out; } } }else if(rw == WRITE) { pages_needed = ((offset + length + sb->s_blocksize - 1) >> sb->s_blocksize_bits); pages_exist = (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; pages_to_alloc = pages_needed - pages_exist; if(pages_to_alloc > 0){ retval = nvmm_alloc_blocks(inode, pages_to_alloc); if (retval){ nvmm_info("alloc blocks failed!\n"); goto out; } } nvmm_consistency_function(sb, inode, offset, length, &iter); retval = length; /* retval = nvmm_iov_copy_from(start_vaddr, &iter, length); if(retval != length){ retval = -EFAULT; goto out; } */ }
/* * initiate a call */ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, const struct afs_wait_mode *wait_mode) { struct sockaddr_rxrpc srx; struct rxrpc_call *rxcall; struct msghdr msg; struct kvec iov[1]; int ret; struct sk_buff *skb; _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); ASSERT(call->type != NULL); ASSERT(call->type->name != NULL); _debug("____MAKE %p{%s,%x} [%d]____", call, call->type->name, key_serial(call->key), atomic_read(&afs_outstanding_calls)); call->wait_mode = wait_mode; call->async_workfn = afs_process_async_call; INIT_WORK(&call->async_work, afs_async_workfn); memset(&srx, 0, sizeof(srx)); srx.srx_family = AF_RXRPC; srx.srx_service = call->service_id; srx.transport_type = SOCK_DGRAM; srx.transport_len = sizeof(srx.transport.sin); srx.transport.sin.sin_family = AF_INET; srx.transport.sin.sin_port = call->port; memcpy(&srx.transport.sin.sin_addr, addr, 4); /* create a call */ rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key, (unsigned long) call, gfp); call->key = NULL; if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); goto error_kill_call; } call->rxcall = rxcall; /* send the request */ iov[0].iov_base = call->request; iov[0].iov_len = call->request_size; msg.msg_name = NULL; msg.msg_namelen = 0; iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)iov, 1, call->request_size); msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = (call->send_pages ? MSG_MORE : 0); /* have to change the state *before* sending the last packet as RxRPC * might give us the reply before it returns from sending the * request */ if (!call->send_pages) call->state = AFS_CALL_AWAIT_REPLY; ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size); if (ret < 0) goto error_do_abort; if (call->send_pages) { ret = afs_send_pages(call, &msg, iov); if (ret < 0) goto error_do_abort; } /* at this point, an async call may no longer exist as it may have * already completed */ return wait_mode->wait(call); error_do_abort: rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT); while ((skb = skb_dequeue(&call->rx_queue))) afs_free_skb(skb); error_kill_call: afs_end_call(call); _leave(" = %d", ret); return ret; }
/* * attach the data from a bunch of pages on an inode to a call */ static int afs_send_pages(struct afs_call *call, struct msghdr *msg, struct kvec *iov) { struct page *pages[8]; unsigned count, n, loop, offset, to; pgoff_t first = call->first, last = call->last; int ret; _enter(""); offset = call->first_offset; call->first_offset = 0; do { _debug("attach %lx-%lx", first, last); count = last - first + 1; if (count > ARRAY_SIZE(pages)) count = ARRAY_SIZE(pages); n = find_get_pages_contig(call->mapping, first, count, pages); ASSERTCMP(n, ==, count); loop = 0; do { msg->msg_flags = 0; to = PAGE_SIZE; if (first + loop >= last) to = call->last_to; else msg->msg_flags = MSG_MORE; iov->iov_base = kmap(pages[loop]) + offset; iov->iov_len = to - offset; offset = 0; _debug("- range %u-%u%s", offset, to, msg->msg_flags ? " [more]" : ""); iov_iter_init(&msg->msg_iter, WRITE, (struct iovec *) iov, 1, to - offset); /* have to change the state *before* sending the last * packet as RxRPC might give us the reply before it * returns from sending the request */ if (first + loop >= last) call->state = AFS_CALL_AWAIT_REPLY; ret = rxrpc_kernel_send_data(call->rxcall, msg, to - offset); kunmap(pages[loop]); if (ret < 0) break; } while (++loop < count); first += count; for (loop = 0; loop < count; loop++) put_page(pages[loop]); if (ret < 0) break; } while (first <= last); _leave(" = %d", ret); return ret; }