/// Executes a process \p in job \j, using the read pipe \p pipe_current_read. /// If the process pipes to a command, the read end of the created pipe is returned in /// out_pipe_next_read. \returns true on success, false on exec error. static bool exec_process_in_job(parser_t &parser, process_t *p, job_t *j, autoclose_fd_t pipe_current_read, autoclose_fd_t *out_pipe_next_read, const io_chain_t &all_ios, size_t stdout_read_limit) { // The IO chain for this process. It starts with the block IO, then pipes, and then gets any // from the process. io_chain_t process_net_io_chain = j->block_io_chain(); // See if we need a pipe. const bool pipes_to_next_command = !p->is_last_in_job; // The write end of any pipe we create. autoclose_fd_t pipe_current_write{}; // The pipes the current process write to and read from. Unfortunately these can't be just // allocated on the stack, since j->io wants shared_ptr. // // The write pipe (destined for stdout) needs to occur before redirections. For example, // with a redirection like this: // // `foo 2>&1 | bar` // // what we want to happen is this: // // dup2(pipe, stdout) // dup2(stdout, stderr) // // so that stdout and stderr both wind up referencing the pipe. // // The read pipe (destined for stdin) is more ambiguous. Imagine a pipeline like this: // // echo alpha | cat < beta.txt // // Should cat output alpha or beta? bash and ksh output 'beta', tcsh gets it right and // complains about ambiguity, and zsh outputs both (!). No shells appear to output 'alpha', // so we match bash here. That would mean putting the pipe first, so that it gets trumped by // the file redirection. // // However, eval does this: // // echo "begin; $argv "\n" ;end <&3 3<&-" | source 3<&0 // // which depends on the redirection being evaluated before the pipe. So the write end of the // pipe comes first, the read pipe of the pipe comes last. See issue #966. shared_ptr<io_pipe_t> pipe_write; shared_ptr<io_pipe_t> pipe_read; // Write pipe goes first. if (pipes_to_next_command) { pipe_write.reset(new io_pipe_t(p->pipe_write_fd, false)); process_net_io_chain.push_back(pipe_write); } // The explicit IO redirections associated with the process. process_net_io_chain.append(p->io_chain()); // Read pipe goes last. if (!p->is_first_in_job) { pipe_read.reset(new io_pipe_t(p->pipe_read_fd, true)); // Record the current read in pipe_read. pipe_read->pipe_fd[0] = pipe_current_read.fd(); process_net_io_chain.push_back(pipe_read); } // This call is used so the global environment variable array is regenerated, if needed, // before the fork. That way, we avoid a lot of duplicate work where EVERY child would need // to generate it, since that result would not get written back to the parent. This call // could be safely removed, but it would result in slightly lower performance - at least on // uniprocessor systems. if (p->type == EXTERNAL) { // Apply universal barrier so we have the most recent uvar changes if (!get_proc_had_barrier()) { set_proc_had_barrier(true); env_universal_barrier(); } env_export_arr(); } // Set up fds that will be used in the pipe. if (pipes_to_next_command) { // debug( 1, L"%ls|%ls" , p->argv[0], p->next->argv[0]); int local_pipe[2] = {-1, -1}; if (exec_pipe(local_pipe) == -1) { debug(1, PIPE_ERROR); wperror(L"pipe"); job_mark_process_as_failed(j, p); return false; } // Ensure our pipe fds not conflict with any fd redirections. E.g. if the process is // like 'cat <&5' then fd 5 must not be used by the pipe. if (!pipe_avoid_conflicts_with_io_chain(local_pipe, all_ios)) { // We failed. The pipes were closed for us. wperror(L"dup"); job_mark_process_as_failed(j, p); return false; } // This tells the redirection about the fds, but the redirection does not close them. assert(local_pipe[0] >= 0); assert(local_pipe[1] >= 0); memcpy(pipe_write->pipe_fd, local_pipe, sizeof(int) * 2); // Record our pipes. pipe_current_write.reset(local_pipe[1]); out_pipe_next_read->reset(local_pipe[0]); } // Execute the process. switch (p->type) { case INTERNAL_FUNCTION: case INTERNAL_BLOCK_NODE: { if (!exec_block_or_func_process(parser, j, p, all_ios, process_net_io_chain)) { return false; } break; } case INTERNAL_BUILTIN: { io_streams_t builtin_io_streams{stdout_read_limit}; if (!exec_internal_builtin_proc(parser, j, p, pipe_read.get(), process_net_io_chain, builtin_io_streams)) { return false; } if (!handle_builtin_output(j, p, &process_net_io_chain, builtin_io_streams)) { return false; } break; } case EXTERNAL: { if (!exec_external_command(j, p, process_net_io_chain)) { return false; } break; } case INTERNAL_EXEC: { // We should have handled exec up above. DIE("INTERNAL_EXEC process found in pipeline, where it should never be. Aborting."); break; } } return true; }
void io_buffer_t::run_background_fillthread(autoclose_fd_t readfd) { // Here we are running the background fillthread, executing in a background thread. // Our plan is: // 1. poll via select() until the fd is readable. // 2. Acquire the append lock. // 3. read until EAGAIN (would block), appending // 4. release the lock // The purpose of holding the lock around the read calls is to ensure that data from background // processes isn't weirdly interspersed with data directly transferred (from a builtin to a // buffer). const int fd = readfd.fd(); // 100 msec poll rate. Note that in most cases, the write end of the pipe will be closed so // select() will return; the polling is important only for weird cases like a background process // launched in a command substitution. const long poll_timeout_usec = 100000; struct timeval tv = {}; tv.tv_usec = poll_timeout_usec; bool shutdown = false; while (!shutdown) { bool readable = false; // Poll if our fd is readable. // Do this even if the shutdown flag is set. It's important we wait for the fd at least // once. For short-lived processes, it's possible for the process to execute, produce output // (fits in the pipe buffer) and be reaped before we are even scheduled. So always wait at // least once on the fd. Note that doesn't mean we will wait for the full poll duration; // typically what will happen is our pipe will be widowed and so this will return quickly. // It's only for weird cases (e.g. a background process launched inside a command // substitution) that we'll wait out the entire poll time. fd_set fds; FD_ZERO(&fds); FD_SET(fd, &fds); int ret = select(fd + 1, &fds, NULL, NULL, &tv); // select(2) is allowed to (and does) update `tv` to indicate how much time was left, so we // need to restore the desired value each time. tv.tv_usec = poll_timeout_usec; readable = ret > 0; if (ret < 0 && errno != EINTR) { // Surprising error. wperror(L"select"); return; } // Only check the shutdown flag if we timed out. // It's important that if select() indicated we were readable, that we call select() again // allowing it to time out. Note the typical case is that the fd will be closed, in which // case select will return immediately. if (!readable) { shutdown = this->shutdown_fillthread_.load(std::memory_order_relaxed); } if (readable || shutdown) { // Now either our fd is readable, or we have set the shutdown flag. // Either way acquire the lock and read until we reach EOF, or EAGAIN / EINTR. scoped_lock locker(append_lock_); ssize_t ret; do { errno = 0; char buff[4096]; ret = read(fd, buff, sizeof buff); if (ret > 0) { buffer_.append(&buff[0], &buff[ret]); } else if (ret == 0) { shutdown = true; } else if (ret == -1 && errno == 0) { // No specific error. We assume we just return, // since that's what we do in read_blocked. return; } else if (errno != EINTR && errno != EAGAIN) { wperror(L"read"); return; } } while (ret > 0); } } assert(shutdown && "Should only exit loop if shutdown flag is set"); }