static void test_raw_clone(void) { pid_t parent, pid, pid2; log_info("/* %s */", __func__); parent = getpid(); log_info("before clone: getpid()→"PID_FMT, parent); assert_se(raw_getpid() == parent); pid = raw_clone(0); assert_se(pid >= 0); pid2 = raw_getpid(); log_info("raw_clone: "PID_FMT" getpid()→"PID_FMT" raw_getpid()→"PID_FMT, pid, getpid(), pid2); if (pid == 0) { assert_se(pid2 != parent); _exit(EXIT_SUCCESS); } else { int status; assert_se(pid2 == parent); waitpid(pid, &status, __WCLONE); assert_se(WIFEXITED(status) && WEXITSTATUS(status) == EXIT_SUCCESS); } errno = 0; assert_se(raw_clone(CLONE_FS|CLONE_NEWNS) == -1); assert_se(errno == EINVAL); }
static void test_getpid_cached(void) { siginfo_t si; pid_t a, b, c, d, e, f, child; a = raw_getpid(); b = getpid_cached(); c = getpid(); assert_se(a == b && a == c); child = fork(); assert_se(child >= 0); if (child == 0) { /* In child */ a = raw_getpid(); b = getpid_cached(); c = getpid(); assert_se(a == b && a == c); _exit(EXIT_SUCCESS); } d = raw_getpid(); e = getpid_cached(); f = getpid(); assert_se(a == d && a == e && a == f); assert_se(wait_for_terminate(child, &si) >= 0); assert_se(si.si_status == 0); assert_se(si.si_code == CLD_EXITED); }
static void __attribute__((constructor)) startup(void) { #else #define RETURN_VALUE 0 static void *ignore_ud2_addr; // scratch test code int main(void) { #endif char *debug_level_str = getenv("TRAP_SYSCALLS_DEBUG"); char *footprint_fd_str = getenv("TRAP_SYSCALLS_FOOTPRINT_FD"); char *trace_fd_str = getenv("TRAP_SYSCALLS_TRACE_FD"); char *sleep_for_seconds_str = getenv("TRAP_SYSCALLS_SLEEP_FOR_SECONDS"); char *stop_self_str = getenv("TRAP_SYSCALLS_STOP_SELF"); stop_self = (stop_self_str != NULL); footprints_spec_filename = getenv("TRAP_SYSCALLS_FOOTPRINT_SPEC_FILENAME"); struct timespec one_second = { /* seconds */ 1, /* nanoseconds */ 0 }; if (debug_level_str) debug_level = atoi(debug_level_str); if (trace_fd_str) trace_fd = atoi(trace_fd_str); if (footprint_fd_str) footprint_fd = atoi(footprint_fd_str); if (sleep_for_seconds_str) sleep_for_seconds = atoi(sleep_for_seconds_str); debug_printf(0, "Debug level is %s=%d.\n", debug_level_str, debug_level); if (stop_self) { self_pid = raw_getpid(); debug_printf(0, "TRAP_SYSCALLS_STOP_SELF is set, sending SIGSTOP to self (pid %d)\n", self_pid); raw_kill(self_pid, SIGSTOP); } debug_printf(0, "TRAP_SYSCALLS_SLEEP_FOR_SECONDS is %s, pausing for %d seconds", sleep_for_seconds_str, sleep_for_seconds); for (int i = 0; i < sleep_for_seconds; i++) { raw_nanosleep(&one_second, NULL); debug_printf(0, "."); } debug_printf(0, "\n"); /* Is fd open? If so, it's the input fd for our sanity check info * from systemtap. */ debug_printf(0, "TRAP_SYSCALLS_FOOTPRINT_FD is %s, ", footprint_fd_str); if (footprint_fd > 2) { struct stat buf; int stat_ret = raw_fstat(footprint_fd, &buf); if (stat_ret == 0) { debug_printf(0, "fd %d is open; outputting systemtap cross-check info.\n", footprint_fd); /* PROBLEM: ideally we'd read in the stap script's output ourselves, and process * it at every system call. But by reading in stuff from stap, we're doing more * copying to/from userspace, so creating a feedback loop which would blow up. * * Instead we write out what we think we touched, and do a diff outside the process. * This also adds noise to stap's output, but without the feedback cycle: we ourselves * won't read the extra output, hence won't write() more stuff in response. */ __write_footprints = 1; footprints_out = fdopen(footprint_fd, "a"); if (!footprints_out) { debug_printf(0, "Could not open footprints output stream for writing!\n"); } if (footprints_spec_filename) { footprints = parse_footprints_from_file(footprints_spec_filename, &footprints_env); } else { debug_printf(0, "no footprints spec filename provided\n", footprints_spec_filename); } } else { debug_printf(0, "fd %d is closed; skipping systemtap cross-check info.\n", footprint_fd); } } else { debug_printf(0, "skipping systemtap cross-check info\n"); } debug_printf(0, "TRAP_SYSCALLS_TRACE_FD is %s, ", trace_fd_str); if (!trace_fd_str || trace_fd == 2) { debug_printf(0, "dup'ing stderr, "); trace_fd = dup(2); } if (trace_fd >= 0) { struct stat buf; int stat_ret = raw_fstat(trace_fd, &buf); if (stat_ret == 0) { debug_printf(0, "fd %d is open; outputting traces there.\n", trace_fd); __write_traces = 1; traces_out = fdopen(trace_fd, "a"); if (!traces_out) { debug_printf(0, "Could not open traces output stream for writing!\n"); } } else { debug_printf(0, "fd %d is closed; not outputting traces.\n", trace_fd); } } else { debug_printf(0, "not outputting traces.\n"); } int fd = raw_open("/proc/self/maps", O_RDONLY); if (fd != -1) { // we use a simple buffer and a read loop char buf[8192]; unsigned int ret; char *buf_pos = &buf[0]; // the next position to fill in the buffer char *entry_start_pos = &buf[0]; // the position size_t size_requested; do { // read some stuff, perhaps filling up the buffer size_requested = sizeof buf - (buf_pos - buf); ret = raw_read(fd, buf_pos, size_requested); char *buf_limit = buf_pos + ret; assert(buf_limit <= &buf[sizeof buf]); // we have zero or more complete entries in the buffer; iterate over them char *seek_pos; while (1) { seek_pos = entry_start_pos; // search forward for a newline while (seek_pos != buf_limit && *seek_pos != '\n') { ++seek_pos; } // did we find one? if (seek_pos == buf_limit) { // no! // but we have a partial entry in the buffer // between entry_start_pos and seek_pos; // copy it to the start, re-set and continue __builtin_memmove(&buf[0], entry_start_pos, seek_pos - entry_start_pos); buf_pos = &buf[seek_pos - entry_start_pos]; entry_start_pos = &buf[0]; break; } else { assert(*seek_pos == '\n'); // we have a complete entry; read it and advance entry_start_pos char debug_buf1[seek_pos - entry_start_pos + 1]; strncpy(debug_buf1, entry_start_pos, seek_pos - entry_start_pos); debug_buf1[sizeof debug_buf1 - 1] = '\0'; debug_printf(1, "DEBUG: entry is: %s\n", debug_buf1); char debug_buf2[buf_pos - buf]; strncpy(debug_buf2, buf, buf_pos - buf); debug_buf2[sizeof debug_buf2 - 1] = '\0'; debug_printf(1, "DEBUG: buffer is: %s", debug_buf2); saw_mapping(entry_start_pos, seek_pos); entry_start_pos = seek_pos + 1; // if the newline was the last in the buffer, break and read more if (entry_start_pos == buf_pos + sizeof buf) { buf_pos = entry_start_pos = &buf[0]; break; } // else we might have another entry; go round again continue; } } } while (ret > 0); raw_close(fd); } /* Install our SIGILL (was SIGTRAP, but that interferes with gdb) handler. * Linux seems to require us to provide a restorer; the code is in restore_rt. */ struct sigaction action = { //.sa_sigaction = &handle_sigtrap, .sa_handler = &handle_sigill, .sa_mask = 0, .sa_flags = /*SA_SIGINFO |*/ 0x04000000u /* SA_RESTORER */ | /*SA_RESTART |*/ SA_NODEFER, .sa_restorer = restore_rt }; struct sigaction oldaction; raw_rt_sigaction(SIGILL, &action, &oldaction); /* Un-executablize our own code, except for the signal handler and the remainder of * this function and those afterwards. * * For this, we need our load address. How can we get this? We've already seen it! */ // long int len = &&exit_and_return - our_text_begin_address; // long int ret; // long int longprot = PROT_NONE; // long int op = SYS_mprotect; // __asm__ (".align 4096"); exit_and_return: //__asm__ volatile ("movq %0, %%rdi # \n\ // movq %1, %%rsi # \n\ // movq %2, %%rdx # \n\ // "FIX_STACK_ALIGNMENT " \n\ // movq %3, %%rax # \n\ // syscall # do the syscall \n\ // "UNFIX_STACK_ALIGNMENT " \n" // : /* no output*/ : "rm"(our_text_begin_address), "rm"(len), "rm"(longprot), "rm"(op) : "%rax", "r12", SYSCALL_CLOBBER_LIST); #ifdef EXECUTABLE // HACK for testing: do a ud2 right now! ignore_ud2_addr = &&ud2_addr; ud2_addr: __asm__ ("ud2\n"); // we must also exit without running any libdl exit handlers, // because we're an executable so our csu/startfiles include some cleanup // that will now cause traps (this isn't necessary in the shared library case) raw_exit(0); #endif return RETURN_VALUE; } // For debug printing inside handle_sigill we have to know // that it's our own debug printing in order to filter it // out of the footprints, hence this noinline function // rather than using the normal macro __attribute__ ((noinline)) static void _handle_sigill_debug_printf(int level, const char *fmt, ...) { va_list vl; va_start(vl, fmt); if ((level) <= debug_level) { vfprintf(*p_err_stream, fmt, vl); fflush(*p_err_stream); } va_end(vl); }
{ #ifndef NDEBUG if (old_num != (bigalloc_num_t) -1 && *(begin + (n-1)) != old_num) abort(); #endif *(begin + (n-1)) = num; } } static void (__attribute__((constructor(101))) init)(void) { if (!pageindex) { write_string("liballocs: process name "); raw_write(2, get_exe_basename(), strlen(get_exe_basename())); write_string(", pid "); int pid = raw_getpid(); char a; _Bool seen_nonzero = 0; #define CHAR_TO_PRINT(ord) ( ((pid/(ord)) % 10) ? \ (seen_nonzero |= 1, '0' + ((pid/(ord)) % 10)) : \ (seen_nonzero ? '0' : ' ')) a = CHAR_TO_PRINT(10000); raw_write(2, &a, 1); a = CHAR_TO_PRINT(1000); raw_write(2, &a, 1); a = CHAR_TO_PRINT(100); raw_write(2, &a, 1); a = CHAR_TO_PRINT(10); raw_write(2, &a, 1); a = CHAR_TO_PRINT(1); raw_write(2, &a, 1); raw_write(2, "\n", 1); #undef CHAR_TO_PRINT /* Mmap our region. We map one 16-bit number for every page in the user address region. */ pageindex = MEMTABLE_NEW_WITH_TYPE(bigalloc_num_t, PAGE_SIZE, (void*) 0, (void*) (MAXIMUM_USER_ADDRESS + 1)); if (pageindex == MAP_FAILED) abort();