cudaError_t cudaMalloc(void **devPtr, size_t size) { static cudaError_t (*nv_cudaMalloc)(void **, size_t) = NULL; cudaError_t ret; struct timeval t; if(!nv_cudaMalloc) { nv_cudaMalloc = dlsym(RTLD_NEXT, "cudaMalloc"); //nv_cudaMalloc = dlsym(RTLD_NEXT, "cudaMalloc_v2"); if(!nv_cudaMalloc) { fprintf(stderr, "failed to find symbol cudaMalloc: %s\n", dlerror()); show_stackframe(); return cudaErrorSharedObjectSymbolNotFound; } } gettimeofday(&t, NULL); printf("[gvm] %lf intercepting cudaMalloc at %lx\n", t.tv_sec + t.tv_usec / 1000000.0, (unsigned long)devPtr); do { ret = nv_cudaMalloc(devPtr, size); } while (ret != cudaSuccess); gettimeofday(&t, NULL); printf("[gvm] %lf intercepted cudaMalloc( %lx %ld ) = %d\n", t.tv_sec + t.tv_usec / 1000000.0, (unsigned long)(*devPtr), size, (int)ret); return ret; }
/** Perform output from builtins */ static void do_builtin_io( wchar_t *out, wchar_t *err ) { if( out ) { if( fwprintf( stdout, L"%ls", out ) == -1 || fflush( stdout ) == EOF ) { debug( 0, L"Error while writing to stdout" ); wperror( L"fwprintf" ); show_stackframe(); } } if( err ) { if( fwprintf( stderr, L"%ls", err ) == -1 || fflush( stderr ) == EOF ) { /* Can't really show any error message here, since stderr is dead. */ } } }
void sig_handler(int sig) { gettimeofday(&recvtime, NULL); show_stackframe(); bit_flip(); }
/// Handle output from a builtin, by printing the contents of builtin_io_streams to the redirections /// given in io_chain. static bool handle_builtin_output(job_t *j, process_t *p, io_chain_t *io_chain, const io_streams_t &builtin_io_streams) { assert(p->type == INTERNAL_BUILTIN && "Process is not a builtin"); // Handle output from builtin commands. In the general case, this means forking of a // worker process, that will write out the contents of the stdout and stderr buffers // to the correct file descriptor. Since forking is expensive, fish tries to avoid // it when possible. bool fork_was_skipped = false; const shared_ptr<io_data_t> stdout_io = io_chain->get_io_for_fd(STDOUT_FILENO); const shared_ptr<io_data_t> stderr_io = io_chain->get_io_for_fd(STDERR_FILENO); const output_stream_t &stdout_stream = builtin_io_streams.out; const output_stream_t &stderr_stream = builtin_io_streams.err; // If we are outputting to a file, we have to actually do it, even if we have no // output, so that we can truncate the file. Does not apply to /dev/null. bool must_fork = redirection_is_to_real_file(stdout_io.get()) || redirection_is_to_real_file(stderr_io.get()); if (!must_fork && p->is_last_in_job) { // We are handling reads directly in the main loop. Note that we may still end // up forking. const bool stdout_is_to_buffer = stdout_io && stdout_io->io_mode == IO_BUFFER; const bool no_stdout_output = stdout_stream.empty(); const bool no_stderr_output = stderr_stream.empty(); const bool stdout_discarded = stdout_stream.buffer().discarded(); if (!stdout_discarded && no_stdout_output && no_stderr_output) { // The builtin produced no output and is not inside of a pipeline. No // need to fork or even output anything. debug(4, L"Skipping fork: no output for internal builtin '%ls'", p->argv0()); fork_was_skipped = true; } else if (no_stderr_output && stdout_is_to_buffer) { // The builtin produced no stderr, and its stdout is going to an // internal buffer. There is no need to fork. This helps out the // performance quite a bit in complex completion code. // TODO: we're sloppy about handling explicitly separated output. // Theoretically we could have explicitly separated output on stdout and // also stderr output; in that case we ought to thread the exp-sep output // through to the io buffer. We're getting away with this because the only // thing that can output exp-sep output is `string split0` which doesn't // also produce stderr. debug(4, L"Skipping fork: buffered output for internal builtin '%ls'", p->argv0()); io_buffer_t *io_buffer = static_cast<io_buffer_t *>(stdout_io.get()); io_buffer->append_from_stream(stdout_stream); fork_was_skipped = true; } else if (stdout_io.get() == NULL && stderr_io.get() == NULL) { // We are writing to normal stdout and stderr. Just do it - no need to fork. debug(4, L"Skipping fork: ordinary output for internal builtin '%ls'", p->argv0()); const std::string outbuff = wcs2string(stdout_stream.contents()); const std::string errbuff = wcs2string(stderr_stream.contents()); bool builtin_io_done = do_builtin_io(outbuff.data(), outbuff.size(), errbuff.data(), errbuff.size()); if (!builtin_io_done && errno != EPIPE) { redirect_tty_output(); // workaround glibc bug debug(0, "!builtin_io_done and errno != EPIPE"); show_stackframe(L'E'); } if (stdout_discarded) p->status = STATUS_READ_TOO_MUCH; fork_was_skipped = true; } } if (fork_was_skipped) { p->completed = 1; if (p->is_last_in_job) { debug(4, L"Set status of job %d (%ls) to %d using short circuit", j->job_id, j->preview().c_str(), p->status); int status = p->status; proc_set_last_status(j->get_flag(job_flag_t::NEGATE) ? (!status) : status); } } else { // Ok, unfortunately, we have to do a real fork. Bummer. We work hard to make // sure we don't have to wait for all our threads to exit, by arranging things // so that we don't have to allocate memory or do anything except system calls // in the child. // // These strings may contain embedded nulls, so don't treat them as C strings. const std::string outbuff_str = wcs2string(stdout_stream.contents()); const char *outbuff = outbuff_str.data(); size_t outbuff_len = outbuff_str.size(); const std::string errbuff_str = wcs2string(stderr_stream.contents()); const char *errbuff = errbuff_str.data(); size_t errbuff_len = errbuff_str.size(); fflush(stdout); fflush(stderr); if (!fork_child_for_process(j, p, *io_chain, false, "internal builtin", [&] { do_builtin_io(outbuff, outbuff_len, errbuff, errbuff_len); exit_without_destructors(p->status); })) { return false; } } return true; }