// Finds the smallest index from 0 to NFD-1 that doesn't have // its fd page mapped. // Sets *fd_store to the corresponding fd page virtual address. // // Does NOT actually allocate an fd page. // It is up to the caller to allocate the page somehow. // This means that if someone calls fd_find_unused twice in a row // without allocating the first page we return, we'll return the same // page the second time. // // Returns 0 on success, < 0 on error. Errors are: // -E_MAX_FD: no more file descriptors // On error, *fd_store is set to 0. int fd_find_unused(struct Fd **fd_store) { int i; struct Fd *fd; for (i = 0; i < NFD; i++) { (void) fd_lookup(i, &fd, false); if (!fd_isopen(fd)) { *fd_store = fd; return 0; } } *fd_store = 0; return -E_MAX_OPEN; }
ssize_t write(int fdnum, const void *buf, size_t n) { int r; struct Dev *dev; struct Fd *fd; if ((r = fd_lookup(fdnum, &fd, true)) < 0 || (r = dev_lookup(fd->fd_dev_id, &dev)) < 0) return r; if ((fd->fd_omode & O_ACCMODE) == O_RDONLY) return -E_INVAL; if (!dev->dev_write) return -E_NOT_SUPP; return (*dev->dev_write)(fd, buf, n); }
// Frees file descriptor 'fd' by closing the corresponding file // and unmapping the file descriptor page. // If 'must_exist' is 0, then fd can be a closed or nonexistent file // descriptor; the function will return 0 and have no other effect. // If 'must_exist' is 1, then fd_close returns -E_INVAL when passed a // closed or nonexistent file descriptor. // Returns 0 on success, < 0 on error. int fd_close(struct Fd *fd, bool must_exist) { struct Fd *fd2; struct Dev *dev; int r; if ((r = fd_lookup(fd2num(fd), &fd2)) < 0 || fd != fd2) return (must_exist ? r : 0); if ((r = dev_lookup(fd->fd_dev_id, &dev)) >= 0) r = (*dev->dev_close)(fd); // Make sure fd is unmapped. Might be a no-op if // (*dev->dev_close)(fd) already unmapped it. (void) sys_page_unmap(0, fd); return r; }
int fstat(int fdnum, struct Stat *stat) { int r; struct Dev *dev; struct Fd *fd; if ((r = fd_lookup(fdnum, &fd)) < 0 || (r = dev_lookup(fd->fd_dev_id, &dev)) < 0) return r; stat->st_name[0] = 0; stat->st_size = 0; stat->st_isdir = 0; stat->st_dev = dev; return (*dev->dev_stat)(fd, stat); }
int ftruncate(int fdnum, off_t newsize) { int r; struct Dev *dev; struct Fd *fd; if ((r = fd_lookup(fdnum, &fd)) < 0 || (r = dev_lookup(fd->fd_dev_id, &dev)) < 0) return r; if ((fd->fd_omode & O_ACCMODE) == O_RDONLY) { cprintf("[%08x] ftruncate %d -- bad mode\n", env->env_id, fdnum); return -E_INVAL; } return (*dev->dev_trunc)(fd, newsize); }
int close(int fdnum) { int r; struct Dev *dev; struct Fd *fd; if ((r = fd_lookup(fdnum, &fd)) < 0 || (r = dev_lookup(fd->fd_dev_id, &dev)) < 0) { return r; } r = (*dev->dev_close)(fd); fd_close(fd); return r; }
/* 1. shutdown read : a) added read_lock to sturct socket (bool) b) v_socket will initialize the read_lock to 0 c) whenever v_shutdown is called with read_lock only, the followiing happens : i) set the read_lock = 1 ii) When v_read() reads the data from the buffer, CB_WRIE nbytes (read) 0's So the pointer is fine and adwin is shrinking constantly */ int v_shutdown(socket_t *socket, int shut_type) { printf("IN SHUTDOWN\n"); //1. get the socket by id socket_t *so = fd_lookup(socket); if(so == NULL) return 0; //no such socket if (shut_type == SHUTDOWN_READ) { printf("\t Request for shutdown read\n"); so->read_lock = 1; } /******************* CLOSING RECIVING WINDOW *************** * Send FIN * 1. if current state == Established * -> change to FIN_WAIT_1 * 2. if current state == CLOSE_WAIT (peer already closed down) * -> change to LAST_ACK ***********************************************************/ else if (shut_type == SHUTDOWN_WRITE) { printf("\t Request for shutdown write\n"); if (so->state == ESTABLISHED) { set_socketstate(so,FIN_WAIT_1); tcp_send_handshake(FIN_WAIT_1, so); } else if (so->state == CLOSE_WAIT) { printf("a\n"); set_socketstate(so, LAST_ACK); printf("b\n"); tcp_send_handshake(LAST_ACK, so); } } else if (shut_type == SHUTDOWN_BOTH) { printf("Request for shutdown read and write\n"); v_shutdown(so->id, SHUTDOWN_READ); //recursion huh? v_shutdown(so->id, SHUTDOWN_WRITE); } return 0; }
int v_listen(int socket){ socket_t *so = fd_lookup(socket); if(so == NULL) { printf(" v_listen() error : malloc failed\n"); return -1; } sockets_on_port *sop = get_sockets_on_port(so->myport); if(sop->listening_socket) return -1; //"already listening on this port" sop->listening_socket = so; set_socketstate(so, LISTENING); return 0; }
int v_bind(int socket, struct in_addr *nothing, uint16_t port) { //find the socket, set port, add it to its port list socket_t *so = fd_lookup(socket); if(so == NULL) { printf(" v_bind() error : malloc failed\n"); return -1; } so->myport = port; sockets_on_port *sop = get_sockets_on_port(port); list_append(sop->list, so); return 0; }
int dup(int oldfdnum, int newfdnum) { int i, r; u_int ova, nva, pte; struct Fd *oldfd, *newfd; if ((r = fd_lookup(oldfdnum, &oldfd)) < 0) { return r; } close(newfdnum); newfd = (struct Fd *)INDEX2FD(newfdnum); ova = fd2data(oldfd); nva = fd2data(newfd); if ((r = syscall_mem_map(0, (u_int)oldfd, 0, (u_int)newfd, ((*vpt)[VPN(oldfd)]) & (PTE_V | PTE_R | PTE_LIBRARY))) < 0) { goto err; } if ((* vpd)[PDX(ova)]) { for (i = 0; i < PDMAP; i += BY2PG) { pte = (* vpt)[VPN(ova + i)]; if (pte & PTE_V) { // should be no error here -- pd is already allocated if ((r = syscall_mem_map(0, ova + i, 0, nva + i, pte & (PTE_V | PTE_R | PTE_LIBRARY))) < 0) { goto err; } } } } return newfdnum; err: syscall_mem_unmap(0, (u_int)newfd); for (i = 0; i < PDMAP; i += BY2PG) { syscall_mem_unmap(0, nva + i); } return r; }
int add_edge_detect(unsigned int gpio, unsigned int edge) // return values: // 0 - Success // 1 - Edge detection already added // 2 - Other error { int fd = fd_lookup(gpio); pthread_t threads; struct epoll_event ev; long t = 0; // check to see if this gpio has been added already if (gpio_event_add(gpio) != 0) return 1; // export /sys/class/gpio interface gpio_export(gpio); gpio_set_direction(gpio, 1); // 1=input gpio_set_edge(gpio, edge); if (!fd) { if ((fd = open_value_file(gpio)) == -1) return 2; } // create epfd if not already open if ((epfd == -1) && ((epfd = epoll_create(1)) == -1)) return 2; // add to epoll fd ev.events = EPOLLIN | EPOLLET | EPOLLPRI; ev.data.fd = fd; if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1) return 2; // start poll thread if it is not already running if (!thread_running) { if (pthread_create(&threads, NULL, poll_thread, (void *)t) != 0) return 2; } return 0; }
int fstat(int fdnum, struct Stat *stat) { int r; struct Dev *dev; struct Fd *fd; if ((r = fd_lookup(fdnum, &fd, true)) < 0 || (r = dev_lookup(fd->fd_dev_id, &dev)) < 0) return r; if (!dev->dev_stat) return -E_NOT_SUPP; stat->st_name[0] = 0; stat->st_size = 0; stat->st_ftype = 0; stat->st_dev = dev; return (*dev->dev_stat)(fd, stat); }
ssize_t read(int fdnum, void *buf, size_t n) { int r; struct Dev *dev; struct Fd *fd; if ((r = fd_lookup(fdnum, &fd)) < 0 || (r = dev_lookup(fd->fd_dev_id, &dev)) < 0) return r; if ((fd->fd_omode & O_ACCMODE) == O_WRONLY) { cprintf("[%08x] read %d -- bad mode\n", env->env_id, fdnum); return -E_INVAL; } if (!dev->dev_read) return -E_NOT_SUPP; return (*dev->dev_read)(fd, buf, n); }
// Find the page that maps the file block starting at 'offset', // and store its address in '*blk'. int read_map(int fdnum, off_t offset, void **blk) { int r; char *va; struct Fd *fd; if ((r = fd_lookup(fdnum, &fd)) < 0) return r; if (fd->fd_dev_id != devfile.dev_id) return -E_INVAL; va = fd2data(fd) + offset; if (offset >= MAXFILESIZE) return -E_NO_DISK; if (!(vpd[PDX(va)] & PTE_P) || !(vpt[VPN(va)] & PTE_P)) return -E_NO_DISK; *blk = (void*) va; return 0; }
void remove_edge_detect(unsigned int gpio) { struct epoll_event ev; int fd = fd_lookup(gpio); // delete callbacks for gpio remove_callbacks(gpio); // delete epoll of fd epoll_ctl(epfd, EPOLL_CTL_DEL, fd, &ev); // set edge to none gpio_set_edge(gpio, NO_EDGE); // unexport gpio gpio_event_remove(gpio); // clear detected flag event_occurred[gpio] = 0; }
ssize_t write(int fdnum, const void *buf, size_t n) { int r; struct Dev *dev; struct Fd *fd; if ((r = fd_lookup(fdnum, &fd)) < 0 || (r = dev_lookup(fd->fd_dev_id, &dev)) < 0) return r; if ((fd->fd_omode & O_ACCMODE) == O_RDONLY) { cprintf("[%08x] write %d -- bad mode\n", thisenv->env_id, fdnum); return -E_INVAL; } if (debug) cprintf("write %d %p %d via dev %s\n", fdnum, buf, n, dev->dev_name); if (!dev->dev_write) return -E_NOT_SUPP; return (*dev->dev_write)(fd, buf, n); }
int gpio_get_direction(unsigned int gpio, unsigned int *value) { int fd = fd_lookup(gpio); char direction[4]; if (!fd) { if ((fd = open_value_file(gpio)) == -1) return -1; } lseek(fd, 0, SEEK_SET); read(fd, &direction, 4); if (strcmp(direction, "out") == 0) { *value = OUTPUT; } else { *value = INPUT; } return 0; }
// Make file descriptor 'newfdnum' a duplicate of file descriptor 'oldfdnum'. // For instance, writing onto either file descriptor will affect the // file and the file offset of the other. // Closes any previously open file descriptor at 'newfdnum'. // This is implemented using virtual memory tricks (of course!). int dup(int oldfdnum, int newfdnum) { int i, r; char *ova, *nva; pte_t pte; struct Fd *oldfd, *newfd; if ((r = fd_lookup(oldfdnum, &oldfd)) < 0) return r; close(newfdnum); newfd = INDEX2FD(newfdnum); ova = fd2data(oldfd); nva = fd2data(newfd); // if ((r = sys_page_map(0, oldfd, 0, newfd, vpt[VPN(oldfd)] & PTE_USER)) < 0) // goto err; if (vpd[PDX(ova)]) { for (i = 0; i < PTSIZE; i += PGSIZE) { pte = vpt[VPN(ova + i)]; if (pte&PTE_P) { // should be no error here -- pd is already allocated if ((r = sys_page_map(0, ova + i, 0, nva + i, pte & PTE_USER)) < 0) goto err; } } } if ((r = sys_page_map(0, oldfd, 0, newfd, vpt[VPN(oldfd)] & PTE_USER)) < 0) goto err; return newfdnum; err: sys_page_unmap(0, newfd); for (i = 0; i < PTSIZE; i += PGSIZE) sys_page_unmap(0, nva + i); return r; }
int gpio_get_value(unsigned int gpio, unsigned int *value) { int fd = fd_lookup(gpio); char ch; if (!fd) { if ((fd = open_value_file(gpio)) == -1) return -1; } lseek(fd, 0, SEEK_SET); read(fd, &ch, 1); if (ch != '0') { *value = 1; } else { *value = 0; } return 0; }
static int do_accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags) { int rval = -1; fdinfo_t *info = NULL; if( sockfd < 0 ) { errno = EINVAL; return -1; } DEBUG("do_accept4(%d, ...) ...", sockfd); L(); info = fd_lookup(sockfd); if( info == NULL || (info->type != BOUND && info->type != DUMMY) ) { U(); /* Should return an error. */ rval = libc.accept4(sockfd, addr, addrlen, flags); DEBUG("do_accept4(%d, ...) => %d (no info)", sockfd, rval); return rval; } /* Check that they've called listen. */ if( info->type == BOUND && !info->bound.stub_listened ) { U(); DEBUG("do_accept4(%d, ...) => -1 (not listened)", sockfd); errno = EINVAL; return -1; } /* Check if this is a dummy. * There's no way that they should be calling accept(). * The dummy FD will never trigger a poll, select, epoll, * etc. So we just act as a socket with no clients does -- * either return immediately or block forever. NOTE: We * still return in case of EINTR or other suitable errors. */ if( info->type == DUMMY && info->dummy.client >= 0 ) { rval = info->dummy.client; info->dummy.client = -1; U(); DEBUG("do_accept4(%d, ...) => %d (dummy client)", sockfd, rval); return rval; } U(); if( !(flags & SOCK_NONBLOCK) ) { /* Wait for activity on the socket. */ struct pollfd poll_info; poll_info.fd = sockfd; poll_info.events = POLLIN; poll_info.revents = 0; if( poll(&poll_info, 1, -1) < 0 ) { return -1; } } L(); /* Check our status. */ if( is_exiting == TRUE ) { /* We've transitioned from not exiting * to exiting in this period. This will * circle around a return a dummy descriptor. */ U(); DEBUG("do_accept4(%d, ...) => -1 (interrupted)", sockfd); errno = flags & SOCK_NONBLOCK ? EAGAIN : EINTR; return -1; } /* Do the accept for real. */ fdinfo_t *new_info = alloc_info(TRACKED); if( new_info == NULL ) { U(); DEBUG("do_accept4(%d, ...) => -1 (alloc error?)", sockfd); return -1; } inc_ref(info); new_info->tracked.bound = info; rval = libc.accept4(sockfd, addr, addrlen, flags); if( rval >= 0 ) { /* Save the reference to the socket. */ fd_save(rval, new_info); } else { /* An error occured, nothing to track. */ dec_ref(new_info); } U(); DEBUG("do_accept4(%d, ...) => %d (tracked %d) %s", sockfd, rval, total_tracked, rval == -1 ? strerror(errno) : ""); return rval; }
static int do_listen(int sockfd, int backlog) { int rval = -1; fdinfo_t *info = NULL; if( sockfd < 0 ) { errno = EINVAL; return -1; } DEBUG("do_listen(%d, ...) ...", sockfd); L(); info = fd_lookup(sockfd); if( info == NULL || info->type != BOUND ) { U(); DEBUG("do_listen(%d, %d) => -1 (not BOUND)", sockfd, backlog); errno = EINVAL; return -1; } /* Check if we can short-circuit this. */ if( info->bound.real_listened ) { info->bound.stub_listened = 1; U(); DEBUG("do_listen(%d, %d) => 0 (stub)", sockfd, backlog); return 0; } /* Can we really call listen() ? */ if( is_exiting == TRUE ) { info->bound.stub_listened = 1; U(); DEBUG("do_listen(%d, %d) => 0 (is_exiting)", sockfd, backlog); return 0; } /* We largely ignore the backlog parameter. People * don't really use sensible values here for the most * part. Hopefully (as is default on some systems), * tcp syn cookies are enabled, and there's no real * limit for this queue and this parameter is silently * ignored. If not, then we use the largest value we * can sensibly use. */ (void)backlog; rval = libc.listen(sockfd, SOMAXCONN); if( rval < 0 ) { U(); DEBUG("do_listen(%d, %d) => %d", sockfd, backlog, rval); return rval; } /* We're done. */ info->bound.real_listened = 1; info->bound.stub_listened = 1; U(); DEBUG("do_listen(%d, %d) => %d", sockfd, backlog, rval); return rval; }
void umain(void) { int p[2], r, pid, i, max; void *va; struct Fd *fd; volatile struct Env *kid; cprintf("testing for dup race...\n"); if ((r = pipe(p)) < 0) panic("pipe: %e", r); max = 200; if ((r = fork()) < 0) panic("fork: %e", r); if (r == 0) { close(p[1]); // // Now the ref count for p[0] will toggle between 2 and 3 // as the parent dups and closes it (there's a close implicit in dup). // // The ref count for p[1] is 1. // Thus the ref count for the underlying pipe structure // will toggle between 3 and 4. // // If a clock interrupt catches close between unmapping // the pipe structure and unmapping the fd, we'll have // a ref count for p[0] of 3, a ref count for p[1] of 1, // and a ref count for the pipe structure of 3, which is // a no-no. // // If a clock interrupt catches dup between mapping the // fd and mapping the pipe structure, we'll have the same // ref counts, still a no-no. // for (i=0; i<max; i++) { if(pipeisclosed(p[0])){ cprintf("RACE: pipe appears closed\n"); exit(); } sys_yield(); } // do something to be not runnable besides exiting ipc_recv(0,0,0); } pid = r; cprintf("pid is %d\n", pid); va = 0; kid = &envs[ENVX(pid)]; cprintf("kid is %d\n", kid-envs); dup(p[0], 10); while (kid->env_status == ENV_RUNNABLE) dup(p[0], 10); cprintf("child done with loop\n"); if (pipeisclosed(p[0])) panic("somehow the other end of p[0] got closed!"); if ((r = fd_lookup(p[0], &fd)) < 0) panic("cannot look up p[0]: %e", r); va = fd2data(fd); if (pageref(va) != 3+1) cprintf("\nchild detected race\n"); else cprintf("\nrace didn't happen\n", max); }
void umain(void) { int p[2], r, i; struct Fd *fd; volatile struct Env *kid; cprintf("testing for pipeisclosed race...\n"); if ((r = pipe(p)) < 0) panic("pipe: %e", r); if ((r = fork()) < 0) panic("fork: %e", r); if (r == 0) { // child just dups and closes repeatedly, // yielding so the parent can see // the fd state between the two. close(p[1]); for (i = 0; i < 200; i++) { if (i % 10 == 0) cprintf("%d.", i); // dup, then close. yield so that other guy will // see us while we're between them. dup(p[0], 10); sys_yield(); close(10); sys_yield(); } exit(); } // We hold both p[0] and p[1] open, so pipeisclosed should // never return false. // // Now the ref count for p[0] will toggle between 2 and 3 // as the child dups and closes it. // The ref count for p[1] is 1. // Thus the ref count for the underlying pipe structure // will toggle between 3 and 4. // // If pipeisclosed checks pageref(p[0]) and gets 3, and // then the child closes, and then pipeisclosed checks // pageref(pipe structure) and gets 3, then it will return true // when it shouldn't. // // If pipeisclosed checks pageref(pipe structure) and gets 3, // and then the child dups, and then pipeisclosed checks // pageref(p[0]) and gets 3, then it will return true when // it shouldn't. // // So either way, pipeisclosed is going give a wrong answer. // kid = &envs[ENVX(r)]; while (kid->env_status == ENV_RUNNABLE) if (pipeisclosed(p[0]) != 0) { cprintf("\nRACE: pipe appears closed\n"); sys_env_destroy(r); exit(); } cprintf("child done with loop\n"); if (pipeisclosed(p[0])) panic("somehow the other end of p[0] got closed!"); if ((r = fd_lookup(p[0], &fd)) < 0) panic("cannot look up p[0]: %e", r); (void) fd2data(fd); cprintf("race didn't happen\n"); }
void impl_init(void) { const char* mode_env = getenv("HUPTIME_MODE"); const char* multi_env = getenv("HUPTIME_MULTI"); const char* revive_env = getenv("HUPTIME_REVIVE"); const char* debug_env = getenv("HUPTIME_DEBUG"); const char* pipe_env = getenv("HUPTIME_PIPE"); const char* wait_env = getenv("HUPTIME_WAIT"); if( debug_env != NULL && strlen(debug_env) > 0 ) { debug_enabled = !strcasecmp(debug_env, "true") ? TRUE: FALSE; } DEBUG("Initializing..."); /* Initialize our lock. */ impl_init_lock(); /* Save this pid as our master pid. * This is done to handle processes that use * process pools. We remember the master pid and * will do the full fork()/exec() only when we are * the master. Otherwise, we will simply shutdown * gracefully, and all the master to restart. */ master_pid = getpid(); /* Grab our exit strategy. */ if( mode_env != NULL && strlen(mode_env) > 0 ) { if( !strcasecmp(mode_env, "fork") ) { exit_strategy = FORK; DEBUG("Exit strategy is fork."); } else if( !strcasecmp(mode_env, "exec") ) { exit_strategy = EXEC; DEBUG("Exit strategy is exec."); } else { fprintf(stderr, "Unknown exit strategy."); libc.exit(1); } } /* Check if we have something to unlink. */ to_unlink = getenv("HUPTIME_UNLINK"); if( to_unlink != NULL && strlen(to_unlink) > 0 ) { DEBUG("Unlink is '%s'.", to_unlink); } /* Clear up any outstanding child processes. * Because we may have exited before the process * could do appropriate waitpid()'s, we try to * clean up children here. Note that we may have * some zombies that hang around during the life * of the program, but at every restart they will * be cleaned up (so at least they won't grow * without bound). */ int status = 0; while( waitpid((pid_t)-1, &status, WNOHANG) > 0 ); /* Check if we're in multi mode. */ if( multi_env != NULL && strlen(multi_env) > 0 ) { multi_mode = !strcasecmp(multi_env, "true") ? TRUE: FALSE; } #ifndef SO_REUSEPORT if( multi_mode == TRUE ) { fprintf(stderr, "WARNING: Multi mode not supported.\n"); fprintf(stderr, "(Requires at least Linux 3.9 and recent headers).\n"); } #endif /* Check if we're in revive mode. */ if( revive_env != NULL && strlen(revive_env) > 0 ) { revive_mode = !strcasecmp(revive_env, "true") ? TRUE : FALSE; } /* Check if we are in wait mode. */ if( wait_env != NULL && strlen(wait_env) > 0 ) { wait_mode = !strcasecmp(wait_env, "true") ? TRUE : FALSE; } /* Check if we're a respawn. */ if( pipe_env != NULL && strlen(pipe_env) > 0 ) { int fd = -1; fdinfo_t *info = NULL; int pipefd = strtol(pipe_env, NULL, 10); DEBUG("Loading all file descriptors."); /* Decode all passed information. */ while( !info_decode(pipefd, &fd, &info) ) { fd_save(fd, info); DEBUG("Decoded fd %d (type %d).", fd, info->type); info = NULL; } if( info != NULL ) { dec_ref(info); } /* Finished with the pipe. */ libc.close(pipefd); unsetenv("HUPTIME_PIPE"); DEBUG("Finished decoding."); /* Close all non-encoded descriptors. */ for( fd = 0; fd < fd_max(); fd += 1 ) { info = fd_lookup(fd); if( info == NULL ) { DEBUG("Closing fd %d.", fd); libc.close(fd); } } /* Restore all given file descriptors. */ for( fd = 0; fd < fd_limit(); fd += 1 ) { info = fd_lookup(fd); if( info != NULL && info->type == SAVED ) { fdinfo_t *orig_info = fd_lookup(info->saved.fd); if( orig_info != NULL ) { /* Uh-oh, conflict. Move the original (best effort). */ do_dup(info->saved.fd); do_close(info->saved.fd); } /* Return the offset (ignore failure). */ if( info->saved.offset != (off_t)-1 ) { lseek(fd, info->saved.offset, SEEK_SET); } /* Move the SAVED fd back. */ libc.dup2(fd, info->saved.fd); DEBUG("Restored fd %d.", info->saved.fd); } } } else { DEBUG("Saving all initial file descriptors."); /* Save all of our initial files. These are used * for re-execing the process. These are persisted * effectively forever, and on restarts we close * everything that is not a BOUND socket or a SAVED * file descriptor. */ for( int fd = 0; fd < fd_max(); fd += 1 ) { fdinfo_t *info = fd_lookup(fd); if( info != NULL ) { /* Encoded earlier. */ continue; } /* Make a new SAVED FD. */ int newfd = libc.dup(fd); if( newfd >= 0 ) { fdinfo_t *saved_info = alloc_info(SAVED); if( saved_info != NULL ) { saved_info->saved.fd = fd; saved_info->saved.offset = lseek(fd, 0, SEEK_CUR); fd_save(newfd, saved_info); DEBUG("Saved fd %d (offset %lld).", fd, (long long int)saved_info->saved.offset); } } } } /* Save the environment. * * NOTE: We reserve extra space in the environment * for our special start-up parameters, which will be added * in impl_exec() below. (The encoded BOUND/SAVED sockets). * * We also filter out the special variables above that were * used to pass in information about sockets that were bound. */ free(environ_copy); environ_copy = (char**)read_nul_sep("/proc/self/environ"); DEBUG("Saved environment."); /* Save the arguments. */ free(args_copy); args_copy = (char**)read_nul_sep("/proc/self/cmdline"); DEBUG("Saved args."); for( int i = 0; args_copy[i] != NULL; i += 1 ) { DEBUG(" arg%d=%s", i, args_copy[i]); } /* Save the cwd & exe. */ free(cwd_copy); cwd_copy = (char*)read_link("/proc/self/cwd"); DEBUG("Saved cwd."); free(exe_copy); exe_copy = (char*)read_link("/proc/self/exe"); DEBUG("Saved exe."); /* Install our signal handlers. */ impl_install_sighandlers(); /* Initialize our thread. */ impl_init_thread(); /* Unblock our signals. * Note that we have specifically masked the * signals prior to the exec() below, to cover * the race between program start and having * installed the appropriate handlers. */ sigset_t set; sigemptyset(&set); sigaddset(&set, SIGHUP); sigprocmask(SIG_UNBLOCK, &set, NULL); /* Done. */ DEBUG("Initialization complete."); }
// Handle an environment's block cache request. // BCREQ_FLUSH and BCREQ_MAP can be satisified right away. // BCREQ_MAP_RLOCK, BCREQ_MAP_WLOCK, and BCREQ_UNLOCK manipulate the queue // of waiting environments. // At most 8 IPC requests per block are queued and will be handled in the // order they arrive (for fairness). // The 9th and furhter concurrent requests are ignored; a -E_AGAIN error asks // the sending environment to try again later. // static void handle_breq(envid_t envid, int32_t breq) { struct BlockInfo *bip; int r; // Extract block number and request type from request. blocknum_t blocknum = BCREQ_BLOCKNUM(breq); int reqtype = BCREQ_TYPE(breq); // Check request type. if (reqtype < BCREQ_MAP || reqtype > BCREQ_FLUSH_PIPE) { ipc_send(envid, -E_NOT_SUPP, 0, 0); return; } if (reqtype == BCREQ_FLUSH_PIPE) { ipc_send(envid, 0, 0, 0); } if (reqtype == BCREQ_PIPE_ATTACH) { struct Fd *writer; if ((r = fd_lookup(p[1], &writer, true)) < 0) ipc_send(envid, r, 0, 0); else ipc_send(envid, 0, fd2data(writer), PTE_P | PTE_W | PTE_U | PTE_SHARE); return; } // Handle simple requests. if (reqtype == BCREQ_FLUSH) { return; } else if (reqtype == BCREQ_MAP) { r = get_block_info(blocknum, &bip, 0); send_block(envid, blocknum, r >= 0 ? bip->bi_initialized : 0); return; } // More complex requests need the block_info pointer. if ((r = get_block_info(blocknum, &bip, 1)) < 0) { ipc_send(envid, r, 0, 0); return; } if (reqtype == BCREQ_INITIALIZE) { int old_initialized = bip->bi_initialized; bip->bi_initialized = 1; ipc_send(envid, old_initialized, 0, 0); return; } // Warn about one particularly simple deadlock. if (reqtype == BCREQ_MAP_WLOCK && bip->bi_nlocked > 0 && BI_REQTYPE(bip, 0) == BCREQ_MAP_WLOCK && BI_ENVID(bip, 0) == envid) cprintf("bufcache: DEADLOCK: env [%08x] re-requests write lock on block %d!\n", envid, blocknum); if (reqtype == BCREQ_UNLOCK || reqtype == BCREQ_UNLOCK_FLUSH) { // Ensure that envid is one of the environments // currently locking the block int n = 0; while (n < bip->bi_nlocked && BI_ENVID(bip, n) != envid) ++n; if (n == bip->bi_nlocked) { ipc_send(envid, -E_NOT_LOCKED, 0, 0); return; } BI_ENVID(bip, n) = BI_ENVID(bip, 0); BI_REQTYPE(bip, n) = BI_REQTYPE(bip, 0); ++bip->bi_head; --bip->bi_nlocked; --bip->bi_count; r = (reqtype == BCREQ_UNLOCK ? 0 : flush_block(blocknum)); ipc_send(envid, r, 0, 0); // Continue on to clear the request queue: perhaps this // environment's unlock reqtype lets the next environment lock } else if (bip->bi_count == BI_QSIZE) { // The queue is full; ask the environment to try again later ipc_send(envid, -E_AGAIN, 0, 0); return; } else { BI_ENVID(bip, bip->bi_count) = envid; BI_REQTYPE(bip, bip->bi_count) = reqtype; ++bip->bi_count; } // Process the request queue while (bip->bi_nlocked < bip->bi_count) { // If trying to write lock, must be first attempt if (BI_REQTYPE(bip, bip->bi_nlocked) == BCREQ_MAP_WLOCK && bip->bi_nlocked > 0) break; // If trying to read lock, any existing lock must be read if (BI_REQTYPE(bip, bip->bi_nlocked) == BCREQ_MAP_RLOCK && bip->bi_nlocked > 0 && BI_REQTYPE(bip, 0) != BCREQ_MAP_RLOCK) break; // If we get here, we can grant the page to this queue element send_block(BI_ENVID(bip, bip->bi_nlocked), blocknum, bip->bi_initialized); ++bip->bi_nlocked; } }
int v_connect(int socket, struct in_addr *addr, uint16_t port){ #ifdef DEBUG printf(" v_connect() Trying to connect\n"); #endif //bind the socket to a random port int ret; struct in_addr any_addr; any_addr.s_addr = 0; ret = v_bind(socket, &any_addr, rand()%MAXPORT); //something went wrong at v_bind (no socket, not valid pnum) if(ret < 0) { #ifdef DEBUG printf(" v_connect() error : v_bind() failed\n"); #endif return -1; } #ifdef DEBUG printf(" v_connect() : v_bind() success\n"); #endif socket_t *so = fd_lookup(socket); if (so == NULL) { #ifdef DEBUG printf(" v_connect() error : fd_lookup() failed\n"); #endif set_socketstate(so, CLOSE); return -1; } #ifdef DEBUG printf(" v_connect() : fd_lookup() success\n"); #endif //store it in the lookup table (urport, myport, uraddr) + init windows init_windows(so); //populate socket with info so->urport = port; so->uraddr = addr->s_addr; //my addr is the interface IP address we will be sending request out to interface_t *i = get_nexthop(so->uraddr); if (i == NULL) { set_socketstate(so, CLOSE); return -EHOSTUNREACH; } so->myaddr = i->sourcevip; so->myseq = rand() % MAXSEQ; #ifdef SIMPLESEQ so->myseq = 0; #endif #ifdef DEBUG printf(" v_connect() : so->* success\n"); #endif #ifdef DEBUG printf(" v_connect: Added socket %d to socket_table"_NORMAL_"\n", so->id); #endif HASH_ADD(hh2, socket_table, urport, keylen, so); set_socketstate(so, SYN_SENT); #ifdef DEBUG printf(" v_connect() : socket %d moved into state SYN_SENT\n", so->id); #endif tcp_send_handshake(1, so); #ifdef DEBUG printf(_RED_" v_connect: send syn"_NORMAL_"\n"); printf(_BLUE_" v_connect: timed out"_NORMAL_"\n"); #endif time_t now = time(NULL); time_t next = now; time_t diff = 0; int count = 0; //send a connection request (first grip) while ((count < MAX_SYN_REQ)) { if (so->state == ESTABLISHED) { break; } next = time(NULL); diff = next-now; if (diff == 1) { tcp_send_handshake(1, so); count++; #ifdef DEBUG printf(_RED_" v_connect: send syn"_NORMAL_"\n"); printf(_BLUE_" v_connect: timed out"_NORMAL_"\n"); #endif now = next; } } // Could not connect if (so->state == SYN_SENT || so->state == CLOSE) { set_socketstate(so, CLOSED); return -ENOTCONN; } //commence buffer management int s = (int)socket; pthread_attr_t thr_attr; pthread_attr_init(&thr_attr); pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED); pthread_create(&so->th, &thr_attr, buf_mgmt, (void *) s); return 0; }
static int do_bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { fdinfo_t *info = NULL; int rval = -1; if( sockfd < 0 ) { errno = EINVAL; return -1; } /* At this point, we can reasonably assume * the program has started up and has installed * whatever signal handlers it wants. We check * that our own signal handler is installed. * If the user doesn't want us to override the * built-in signal handlers, they shouldn't use * huptime. */ impl_install_sighandlers(); DEBUG("do_bind(%d, ...) ...", sockfd); L(); /* See if this socket already exists. */ for( int fd = 0; fd < fd_limit(); fd += 1 ) { fdinfo_t *info = fd_lookup(fd); if( info != NULL && info->type == BOUND && info->bound.addrlen == addrlen && !memcmp(addr, (void*)info->bound.addr, addrlen) ) { DEBUG("Found ghost %d, cloning...", fd); /* Give back a duplicate of this one. */ int rval = do_dup2(fd, sockfd); if( rval < 0 ) { /* Dup2 failed? */ DEBUG("Failed."); continue; } if( info->bound.is_ghost ) { /* Close the original (not needed). */ info->bound.is_ghost = 0; do_close(fd); } /* Success. */ U(); DEBUG("do_bind(%d, ...) => 0 (ghosted)", sockfd); return 0; } } #ifdef SO_REUSEPORT /* Multi mode? Set socket options. */ if( multi_mode == TRUE ) { int optval = 1; if( setsockopt(sockfd, SOL_SOCKET, SO_REUSEPORT, &optval, sizeof(optval)) < 0 ) { U(); DEBUG("do_bind(%d, ...) => -1 (no multi?)", sockfd); return -1; } DEBUG("Multi mode enabled."); } #endif /* Try a real bind. */ info = alloc_info(BOUND); if( info == NULL ) { U(); DEBUG("do_bind(%d, ...) => -1 (alloc error?)", sockfd); return -1; } rval = libc.bind(sockfd, addr, addrlen); if( rval < 0 ) { dec_ref(info); U(); DEBUG("do_bind(%d, ...) => %d (error)", sockfd, rval); return rval; } /* Ensure that this socket is non-blocking, * this is because we override the behavior * for accept() and we require non-blocking * behavior. We deal with the consequences. */ rval = fcntl(sockfd, F_SETFL, O_NONBLOCK); if( rval < 0 ) { dec_ref(info); U(); DEBUG("do_bind(%d, ...) => %d (fcntl error)", sockfd, rval); return -1; } /* Save a refresh bound socket info. */ info->bound.stub_listened = 0; info->bound.real_listened = 0; info->bound.addr = (struct sockaddr*)malloc(addrlen); info->bound.addrlen = addrlen; memcpy((void*)info->bound.addr, (void*)addr, addrlen); fd_save(sockfd, info); /* Success. */ U(); DEBUG("do_bind(%d, ...) => %d", sockfd, rval); return rval; }
void impl_exit_start(void) { if( is_exiting == TRUE ) { return; } /* We are now exiting. * After this point, all calls to various sockets, * (i.e. accept(), listen(), etc. will result in stalls. * We are just waiting until existing connections have * finished and then we will be either exec()'ing a new * version or exiting this process. */ is_exiting = TRUE; /* Get ready to restart. * We only proceed with actual restart actions * if we are the master process, otherwise we will * simply prepare to shutdown cleanly once all the * current active connections have finished. */ if( master_pid == getpid() ) { pid_t child; DEBUG("Exit started -- this is the master."); /* Unlink files (e.g. pidfile). */ if( to_unlink != NULL && strlen(to_unlink) > 0 ) { DEBUG("Unlinking '%s'...", to_unlink); unlink(to_unlink); } /* Neuter this process. */ for( int fd = 0; fd < fd_limit(); fd += 1 ) { fdinfo_t* info = fd_lookup(fd); if( exit_strategy == FORK && info != NULL && info->type == SAVED ) { /* Close initial files. Since these * are now passed on to the child, we * ensure that the parent won't mess * with them anymore. Note that we still * have a copy as all SAVED descriptors. */ if( info->saved.fd == 2 ) { /* We treat stderr special. * Assuming logging will go here, we * allow the parent process to continue * writing to this file (and hope that * it's open in APPEND mode, etc.). */ continue; } int nullfd = open("/dev/null", O_RDWR); do_dup2(nullfd, info->saved.fd); libc.close(nullfd); } if( info != NULL && info->type == BOUND && !info->bound.is_ghost ) { /* Change BOUND sockets to dummy sockets. * This will allow select() and poll() to * operate as you expect, and never give * back new clients. */ int newfd = do_dup(fd); if( newfd >= 0 ) { int dummy_server = impl_dummy_server(); if( dummy_server >= 0 ) { /* Remove the descriptor in any epoll FDs. */ for( int efd = 0; efd < fd_limit(); efd += 1 ) { fdinfo_t* einfo = fd_lookup(efd); if( einfo != NULL && einfo->type == EPOLL ) { struct epoll_event no_event; epoll_ctl(efd, EPOLL_CTL_DEL, fd, &no_event); } } info->bound.is_ghost = 1; do_dup2(dummy_server, fd); DEBUG("Replaced FD %d with dummy.", fd); } else { do_close(newfd); } } } } switch( exit_strategy ) { case FORK: /* Start the child process. * We will exit gracefully when the tracked * connection count reaches zero. */ DEBUG("Exit strategy is fork."); child = libc.fork(); if( child == 0 ) { DEBUG("I'm the child."); impl_exec(); } else { DEBUG("I'm the parent."); } break; case EXEC: /* Nothing necessary beyond the above. */ DEBUG("Exit strategy is exec."); break; } } else { /* Force our strategy to fork, though we haven't forked. * This will basically just have this process exit cleanly * once all the current active connections have finished. */ DEBUG("Exit started -- this is the child."); exit_strategy = FORK; } }
int blocking_wait_for_edge(unsigned int gpio, unsigned int edge) // standalone from all the event functions above { int fd = fd_lookup(gpio); int epfd, n, i; struct epoll_event events, ev; char buf; if ((epfd = epoll_create(1)) == -1) return 1; // check to see if this gpio has been added already, if not, mark as added if (gpio_event_add(gpio) != 0) return 2; // export /sys/class/gpio interface gpio_export(gpio); gpio_set_direction(gpio, 1); // 1=input gpio_set_edge(gpio, edge); if (!fd) { if ((fd = open_value_file(gpio)) == -1) return 3; } // add to epoll fd ev.events = EPOLLIN | EPOLLET | EPOLLPRI; ev.data.fd = fd; if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1) { gpio_event_remove(gpio); return 4; } // epoll for event for (i = 0; i<2; i++) // first time triggers with current state, so ignore if ((n = epoll_wait(epfd, &events, 1, -1)) == -1) { gpio_event_remove(gpio); return 5; } if (n > 0) { lseek(events.data.fd, 0, SEEK_SET); if (read(events.data.fd, &buf, 1) != 1) { gpio_event_remove(gpio); return 6; } if (events.data.fd != fd) { gpio_event_remove(gpio); return 7; } } gpio_event_remove(gpio); close(epfd); return 0; }
void impl_exec(void) { DEBUG("Preparing for exec..."); /* Reset our signal masks. * We intentionally mask SIGHUP here so that * it can't be called prior to us installing * our signal handlers. */ sigset_t set; sigemptyset(&set); sigaddset(&set, SIGHUP); sigprocmask(SIG_BLOCK, &set, NULL); /* Encode extra information. * * This includes information about sockets which * are in the BOUND or SAVED state. Note that we * can't really do anything with these *now* as * there are real threads running rampant -- so * we encode things for the exec() and take care * of it post-exec(), where we know we're solo. * * This information is encoded into a pipe which * is passed as an extra environment variable into * the next child. Although there is a limit on the * amount of data that can be stuffed into a pipe, * past Linux 2.6.11 (IIRC) this is 65K. */ int pipes[2]; if( pipe(pipes) < 0 ) { DEBUG("Unable to create pipes?"); libc.exit(1); } /* Stuff information into the pipe. */ for( int fd = 0; fd < fd_limit(); fd += 1 ) { fdinfo_t *info = fd_lookup(fd); int to_be_saved = (info != NULL && (info->type == BOUND || info->type == SAVED)); if( fd == 2 || to_be_saved ) { /* I can't believe this is necessary. * When node.js starts up, it seems to run over * an arbitrary number of file descriptors and * mark them all CLO_EXEC. That is so messed up. * That's some seriously broken behaviour. */ fcntl(fd, F_SETFD, 0); } if( to_be_saved ) { if( info_encode(pipes[1], fd, info) < 0 ) { DEBUG("Error encoding fd %d: %s", fd, strerror(errno)); } else { DEBUG("Encoded fd %d (type %d).", fd, info->type); } } } libc.close(pipes[1]); DEBUG("Finished encoding."); /* Prepare our environment variable. */ char pipe_env[32]; snprintf(pipe_env, 32, "HUPTIME_PIPE=%d", pipes[0]); /* Mask the existing environment variable. */ char **environ = environ_copy; int environ_len = 0; for( environ_len = 0; environ[environ_len] != NULL; environ_len += 1 ) { if( !strncmp("HUPTIME_PIPE=", environ[environ_len], strlen("HUPTIME_PIPE=")) ) { environ[environ_len] = pipe_env; break; } } /* Do we need to extend the environment? */ if( environ[environ_len] == NULL ) { char** new_environ = malloc(sizeof(char*) * (environ_len + 2)); memcpy(new_environ, environ, sizeof(char*) * (environ_len)); new_environ[environ_len] = pipe_env; new_environ[environ_len + 1] = NULL; environ = new_environ; } /* Execute in the same environment, etc. */ chdir(cwd_copy); DEBUG("Doing exec()... bye!"); execve(exe_copy, args_copy, environ); /* Bail. Should never reach here. */ DEBUG("Things went horribly wrong!"); libc.exit(1); }