static void __stop_cachemiss_threads (iothread_t *iot) { DECLARE_WAITQUEUE(wait, current); __set_current_state(TASK_UNINTERRUPTIBLE); Dprintk("stopping async IO threads %p.\n", iot); add_wait_queue(&iot->wait_shutdown, &wait); spin_lock(&iot->async_lock); if (iot->shutdown) TUX_BUG(); if (!iot->threads) TUX_BUG(); iot->shutdown = 1; wake_up_all(&iot->async_sleep); spin_unlock(&iot->async_lock); Dprintk("waiting for async IO threads %p to exit.\n", iot); schedule(); remove_wait_queue(&iot->wait_shutdown, &wait); if (iot->threads) TUX_BUG(); if (iot->nr_async_pending) TUX_BUG(); Dprintk("stopped async IO threads %p.\n", iot); }
void list_directory (tux_req_t *req, int cachemiss) { struct getdents_callback64 buf; struct linux_dirent64 *dirp0; mm_segment_t oldmm; int total; Dprintk("list_directory(%p, %d), dentry: %p.\n", req, cachemiss, req->dentry); if (!req->cwd_dentry) TUX_BUG(); if (!cachemiss) { add_tux_atom(req, list_directory); queue_cachemiss(req); return; } dirp0 = tux_kmalloc(DIRENT_SIZE); buf.current_dir = dirp0; buf.previous = NULL; buf.count = DIRENT_SIZE; buf.error = 0; oldmm = get_fs(); set_fs(KERNEL_DS); set_fs(KERNEL_DS); total = vfs_readdir(req->in_file, filldir64, &buf); set_fs(oldmm); if (buf.previous) total = DIRENT_SIZE - buf.count; Dprintk("total: %d (buf.error: %d, buf.previous %p)\n", total, buf.error, buf.previous); if (total < 0) { kfree(dirp0); req_err(req); add_req_to_workqueue(req); return; } if (!total) { kfree(dirp0); req->in_file->f_pos = 0; add_req_to_workqueue(req); return; } if (!req->cwd_dentry) TUX_BUG(); add_tux_atom(req, list_directory); req->dirp0 = dirp0; req->curroff = 0; req->total = total; add_tux_atom(req, do_dir_line); add_req_to_workqueue(req); }
void stop_cachemiss_threads (threadinfo_t *ti) { iothread_t *iot = ti->iot; if (!iot) TUX_BUG(); if (iot->nr_async_pending) TUX_BUG(); __stop_cachemiss_threads(iot); ti->iot = NULL; kfree(iot); }
static void register_tux_proc (unsigned int nr) { struct proc_dir_entry *entry; char name [MAX_NAMELEN]; int i; if (!root_tux_dir) TUX_BUG(); sprintf(name, "%d", nr); /* create /proc/net/tux/1234/ */ tux_dir[nr] = proc_mkdir(name, root_tux_dir); /* create /proc/net/tux/1234/listen/ */ listen_dir[nr] = proc_mkdir("listen", tux_dir[nr]); /* create /proc/net/tux/1234/listen/ */ for (i = 0; i < CONFIG_TUX_NUMSOCKETS; i++) { sprintf(name, "%d", i); entry = create_proc_entry(name, 0700, listen_dir[nr]); entry->nlink = 1; entry->data = (void *)(tux_listen[nr] + i); entry->read_proc = listen_read_proc; entry->write_proc = listen_write_proc; tux_listen[nr][i].entry = entry; } }
int send_sync_buf (tux_req_t *req, struct socket *sock, const char *buf, const size_t length, unsigned long flags) { struct msghdr msg; struct iovec iov; int len, written = 0, left = length; struct tcp_opt *tp = tcp_sk(sock->sk); tp->nonagle = 2; msg.msg_name = 0; msg.msg_namelen = 0; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = flags | MSG_NOSIGNAL; repeat_send: msg.msg_iov->iov_len = left; msg.msg_iov->iov_base = (char *) buf + written; len = sock_sendmsg(sock, &msg, left); Dprintk("sendmsg ret: %d, written: %d, left: %d.\n", len,written,left); if ((len == -ERESTARTSYS) || (!(flags & MSG_DONTWAIT) && (len == -EAGAIN))) { flush_all_signals(); goto repeat_send; } if (len > 0) { written += len; left -= len; if (left) goto repeat_send; } if (len >= 0) { if (written != length) TUX_BUG(); if (left) TUX_BUG(); } if (req && (written > 0)) req->bytes_sent += written; Dprintk("sendmsg FINAL ret: %d, written: %d, left: %d.\n", len,written,left); return written ? written : len; }
void queue_cachemiss (tux_req_t *req) { iothread_t *iot = req->ti->iot; Dprintk("queueing_cachemiss(req:%p) (req->cwd_dentry: %p) at %p:%p.\n", req, req->cwd_dentry, __builtin_return_address(0), __builtin_return_address(1)); if (req->idle_input || req->wait_output_space) TUX_BUG(); req->had_cachemiss = 1; if (!list_empty(&req->work)) TUX_BUG(); spin_lock(&iot->async_lock); if (connection_too_fast(req)) list_add_tail(&req->work, &iot->async_queue); else list_add(&req->work, &iot->async_queue); iot->nr_async_pending++; INC_STAT(nr_cachemiss_pending); spin_unlock(&iot->async_lock); wake_up(&iot->async_sleep); }
struct file * tux_open_file (char *filename, int mode) { struct file *filp; if (!filename) TUX_BUG(); /* Rule no. 3 -- Does the file exist ? */ filp = filp_open(filename, mode, 0600); if (IS_ERR(filp) || !filp || !filp->f_dentry) goto err; out: return filp; err: Dprintk("filp_open() error: %d.\n", (int)filp); filp = NULL; goto out; }
static tux_req_t * get_cachemiss (iothread_t *iot) { struct list_head *tmp; tux_req_t *req = NULL; spin_lock(&iot->async_lock); if (!list_empty(&iot->async_queue)) { tmp = iot->async_queue.next; req = list_entry(tmp, tux_req_t, work); Dprintk("get_cachemiss(%p): got req %p.\n", iot, req); list_del(tmp); DEBUG_DEL_LIST(tmp); iot->nr_async_pending--; DEC_STAT(nr_cachemiss_pending); if (req->ti->iot != iot) TUX_BUG(); } spin_unlock(&iot->async_lock); return req; }
/* * Return 1 if the output space condition went away * before adding the handler. */ int add_output_space_event (tux_req_t *req, struct socket *sock) { struct sock *sk = sock->sk; /* * blocked due to socket IO? */ spin_lock_irq(&req->ti->work_lock); add_keepalive_timer(req); if (test_and_set_bit(0,&req->wait_output_space)) TUX_BUG(); INC_STAT(nr_output_space_pending); if ((sk->sk_state == TCP_ESTABLISHED) && enough_wspace(sk)) { if (test_and_clear_bit(0, &req->wait_output_space)) { DEC_STAT(nr_output_space_pending); del_keepalive_timer(req); spin_unlock_irq(&req->ti->work_lock); return 1; } } spin_unlock_irq(&req->ti->work_lock); return 0; }
static int sock_send_actor (read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long orig_size) { sock_send_desc_t *sock_desc = (sock_send_desc_t *)desc->arg.buf; struct socket *sock = sock_desc->sock; tux_req_t *req = sock_desc->req; unsigned int flags; ssize_t written; char *buf = NULL; unsigned int size; flags = MSG_DONTWAIT | MSG_NOSIGNAL; if (desc->count < orig_size) orig_size = desc->count; if (desc->count > orig_size) flags |= MSG_MORE; Dprintk("sock_send_actor(), page: %p, offset: %ld, orig_size: %ld, sock: %p, desc->count: %d, desc->written: %d, MSG_MORE: %d.\n", page, offset, orig_size, sock, desc->count, desc->written, flags & MSG_MORE); if (req->content_gzipped >= 2) { unsigned int gzip_left; struct msghdr msg; struct iovec iov; mm_segment_t oldmm; char *kaddr = kmap(page); __u32 in_len, out_len; out_len = orig_size*101/100 + 12; buf = tux_kmalloc(out_len); in_len = orig_size; size = out_len; gzip_left = 0; // 8b1f 0808 fdc4 3bd8 0300 79 buf[1] = 0x8b; buf[0] = 0x1f; buf[3] = 0x08; buf[2] = 0x08; buf[5] = 0xfd; buf[4] = 0xc4; buf[7] = 0x3b; buf[6] = 0xd8; buf[9] = 0x03; buf[8] = 0x00; buf[10] = 0x79; size += 11; Dprintk("pre-compress: in_len: %d, out_len: %d, gzip_left: %d, uncompressed size: %d.\n", in_len, out_len, gzip_left, size); gzip_left = tux_gzip_compress(req, kaddr, buf+11, &in_len, &out_len); size -= out_len; buf[11] = 0x79; buf[12] = 0x00; Dprintk("post-compress: in_len: %d, out_len: %d, gzip_left: %d, compressed size: %d.\n", in_len, out_len, gzip_left, size); kunmap(page); msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_controllen = 0; flags &= ~MSG_DONTWAIT; msg.msg_flags = flags; iov.iov_base = buf; iov.iov_len = size; oldmm = get_fs(); set_fs(KERNEL_DS); written = sock_sendmsg(sock, &msg, size); set_fs(oldmm); Dprintk("buf: %p, offset: %ld, size: %d, written: %d.\n", buf, offset, size, written); if (written == size) written = orig_size; else written = size; } else { size = orig_size; if (tux_zerocopy_sendfile && sock->ops->sendpage && (sock->sk->sk_route_caps&NETIF_F_SG)) { written = sock->ops->sendpage(sock, page, offset, size, flags); } else { struct msghdr msg; struct iovec iov; char *kaddr; mm_segment_t oldmm; if (offset+size > PAGE_SIZE) return -EFAULT; kaddr = kmap(page); msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = flags; iov.iov_base = kaddr + offset; iov.iov_len = size; oldmm = get_fs(); set_fs(KERNEL_DS); written = sock_sendmsg(sock, &msg, size); set_fs(oldmm); Dprintk("kaddr: %p, offset: %ld, size: %d, written: %d.\n", kaddr, offset, size, written); kunmap(page); } } if (written < 0) { desc->error = written; written = 0; } Dprintk("desc->count: %d, desc->written: %d, written: %d.\n", desc->count, desc->written, written); desc->count -= written; if ((int)desc->count < 0) TUX_BUG(); desc->written += written; if (buf) kfree(buf); return written; }
int generic_send_file (tux_req_t *req, struct socket *sock, int cachemiss) { sock_send_desc_t sock_desc; int len, want, nonblock = !cachemiss; struct tcp_opt *tp = tcp_sk(sock->sk); tp->nonagle = 2; sock_desc.sock = sock; sock_desc.req = req; repeat: Dprintk("generic_send_file(%p,%d,%p) called, f_pos: %Ld, output_len: %Ld.\n", req, nonblock, sock, req->in_file->f_pos, req->output_len); if (req->proto->check_req_err(req, cachemiss)) return -1; if (connection_too_fast(req) == 2) { len = -5; goto out; } if (req->total_file_len < req->in_file->f_pos) TUX_BUG(); req->desc.written = 0; /* * Careful, output_len can be 64-bit, while 'want' can be 32-bit. */ if (req->output_len > SEND_BLOCKSIZE) want = SEND_BLOCKSIZE; else want = req->output_len; req->desc.count = want; req->desc.arg.buf = (char *) &sock_desc; req->desc.error = 0; Dprintk("sendfile(), desc.count: %d.\n", req->desc.count); do_generic_file_read(req->in_file, &req->in_file->f_pos, &req->desc, sock_send_actor, nonblock); if (req->desc.written > 0) { req->bytes_sent += req->desc.written; req->output_len -= req->desc.written; } if (!nonblock && (req->desc.error == -EWOULDBLOCKIO)) TUX_BUG(); Dprintk("sendfile() wrote: %d bytes.\n", req->desc.written); if (req->output_len && !req->desc.written && !req->desc.error) { #if CONFIG_TUX_DEBUG req->bytes_expected = 0; #endif req->in_file->f_pos = 0; req->error = TUX_ERROR_CONN_CLOSE; zap_request(req, cachemiss); return -1; } switch (req->desc.error) { case -EWOULDBLOCKIO: len = -3; break; case -EAGAIN: no_write_space: Dprintk("sk->wmem_queued: %d, sk->sndbuf: %d.\n", sock->sk->sk_wmem_queued, sock->sk->sk_sndbuf); len = -4; break; default: len = req->desc.written; #if CONFIG_TUX_DEBUG if (req->desc.error) TDprintk("TUX: sendfile() returned error %d (signals pending: %08lx)!\n", req->desc.error, current->pending.signal.sig[0]); #endif if (!req->desc.error) { if (req->output_len < 0) BUG(); if (req->output_len) { if (test_bit(SOCK_NOSPACE, &sock->flags)) goto no_write_space; goto repeat; } } #if CONFIG_TUX_DEBUG if (req->desc.written != want) TDprintk("TUX: sendfile() wrote %d bytes, wanted %d! (pos %Ld) (signals pending: %08lx).\n", req->desc.written, want, req->in_file->f_pos, current->pending.signal.sig[0]); else Dprintk("TUX: sendfile() FINISHED for req %p, wrote %d bytes.\n", req, req->desc.written); req->bytes_expected = 0; #endif break; } out: Dprintk("sendfile() wrote %d bytes.\n", len); return len; }
static void do_dir_line (tux_req_t *req, int cachemiss) { struct linux_dirent64 *dirp, *dirp0; char string0[MAX_OBJECTNAME_LEN+200], *tmp; int len, curroff, total, str_len = 0; int err, flag = cachemiss ? 0 : LOOKUP_ATOMIC; struct nameidata base = { }; struct dentry *dentry = NULL; struct inode *inode = NULL; struct vfsmount *mnt = NULL; if (req->proto->check_req_err(req, cachemiss)) return; tmp = NULL; dirp0 = req->dirp0; curroff = req->curroff; total = req->total; dirp = (struct linux_dirent64 *)((char *)dirp0 + curroff); if (!dirp->d_name || !dirp->d_name[0]) goto next_dir; /* * Hide .xxxxx files: */ if (dirp->d_name[0] == '.') goto next_dir; Dprintk("<%s T:%d (off:%Ld) (len:%d)>\n", dirp->d_name, dirp->d_type, dirp->d_off, dirp->d_reclen); if (tux_hide_unreadable) { switch (dirp->d_type) { default: goto next_dir; case DT_UNKNOWN: case DT_REG: case DT_DIR: case DT_LNK: /* valid entries - fall through. */ ; } } len = strlen(dirp->d_name); if (len >= MAX_OBJECTNAME_LEN) { dirp->d_name[MAX_OBJECTNAME_LEN] = 0; len = MAX_OBJECTNAME_LEN-1; } if (!req->dentry) TUX_BUG(); base.flags = flag; base.last_type = LAST_ROOT; base.dentry = dget(req->dentry); base.mnt = mntget(req->cwd_mnt); switch_docroot(req); err = path_walk(dirp->d_name, &base); Dprintk("path_walk() returned %d.\n", err); if (err) { if (err == -EWOULDBLOCKIO) { add_tux_atom(req, do_dir_line); queue_cachemiss(req); return; } goto next_dir; } dentry = base.dentry; mnt = base.mnt; if (!dentry) TUX_BUG(); if (IS_ERR(dentry)) TUX_BUG(); inode = dentry->d_inode; if (!inode) TUX_BUG(); if (!dirp->d_type) dirp->d_type = get_d_type(dentry); if (tux_hide_unreadable) { umode_t mode; mode = inode->i_mode; if (mode & tux_mode_forbidden) goto out_dput; if (!(mode & tux_mode_allowed)) goto out_dput; err = permission(inode, MAY_READ, NULL); if (err) goto out_dput; if (dirp->d_type == DT_DIR) { err = permission(inode, MAY_EXEC, NULL); if (err) goto out_dput; } } tmp = req->proto->print_dir_line(req, string0, dirp->d_name, len, dirp->d_type, dentry, inode); if (tmp) str_len = tmp-string0; out_dput: dput(dentry); mntput(mnt); next_dir: curroff += dirp->d_reclen; if (tmp && (tmp != string0)) Dprintk("writing line (len: %d): <%s>\n", strlen(string0), string0); if (curroff < total) { req->dirp0 = dirp0; req->curroff = curroff; add_tux_atom(req, do_dir_line); } else { kfree(dirp0); req->dirp0 = NULL; req->curroff = 0; // falls back to the list_directory atom } if (tmp && (tmp != string0)) __send_async_message(req, string0, 200, str_len, 0); else add_req_to_workqueue(req); }