pid_t handle_events_until_dump_trap(pid_t wait_for) { while (true) { event_t e = wait_event(wait_for); if (e.signo == SIGTRAP) { if (is_dump_sigtrap(e.tid)) { clear_trap(e.tid); return e.tid; } else if (is_hook_sigtrap(e.tid)) { assert(tracer_state == TRACER_LOCKED || tracer_state == TRACER_FIRSTTOUCH); clear_trap(e.tid); tracer_lock_range(e.tid); continue; } else { // If we arrive here, it is a syscall handle_syscall(e.tid); ptrace_syscall(e.tid); continue; } } else if (e.signo == SIGSEGV) { void *addr = round_to_page(e.sigaddr); switch (tracer_state) { case TRACER_UNLOCKED: /* We should never get a sigsegv in unlocked state ! */ errx(EXIT_FAILURE, "SIGSEGV at %p before locking memory during capture\n", e.sigaddr); case TRACER_FIRSTTOUCH: firsttouch_handler(e.tid, addr); break; case TRACER_LOCKED: mru_handler(e.tid, addr); break; case TRACER_DUMPING: dump_handler(e.tid, addr); break; default: assert(false); /* we should never be here */ } ptrace_syscall(e.tid); } else if (e.signo == SIGSTOP) { /* A new thread is starting, ignore this event, next wait_event call will unblock the thread once its parents registers it in tids array */ } else if (e.signo == SIGWINCH) { /* Ignore signal SIGWINCH, tty resize */ ptrace_syscall(e.tid); continue; } else { errx(EXIT_FAILURE, "Unexpected signal in wait_sigtrap: %d\n", e.signo); } } debug_print("%s", "\n"); }
static void tracer_lock_range(pid_t child) { debug_print("%s %d\n", "START LOCK RANGE", child); assert(tracer_state == TRACER_LOCKED); ptrace_syscall(child); void *from = (void *)receive_from_tracee(child); ptrace_syscall(child); void *to = (void *)receive_from_tracee(child); /* We need that the process be stopped to protect */ long unsigned nb_pages_to_allocate = nb_pages_in_range(from, to); for (long unsigned i = 0; i < nb_pages_to_allocate; i++) if (!is_mru(from + PAGESIZE * i)) protect_i(child, round_to_page(from + PAGESIZE * i), PAGESIZE); ptrace_syscall(child); debug_print("%s %d (%p -> %p)\n", "END LOCK RANGE", child, from, to); }
int main(int argc, char *argv[]) { pid_t child = 0; siginfo_t sig; if (argc != 3) { errx(EXIT_FAILURE, "usage: %s pid tracer_buff_address\n", argv[0]); } dump_prefix = getenv("CERE_WORKING_PATH"); if(!dump_prefix) { debug_print("%s\n", "CERE_WORKING_PATH not defined, using defaut cere dir.\n"); dump_prefix = ".cere"; } char * ft = getenv("CERE_FIRSTTOUCH"); if (ft && strcmp("TRUE", ft) == 0) { firsttouch_active = true; debug_print("%s\n", "First touch capture is active"); } child = atoi(argv[1]); sscanf(argv[2], "%p", &tracer_buff); tracer_init(child); /* Wait for lock_mem trap */ pid_t tid = handle_events_until_dump_trap(-1); register_t ret = get_arg_from_regs(tid); assert(ret == TRAP_LOCK_MEM); stop_all_except(tid); tracer_lock_mem(tid); debug_print("%s\n", "******* TRACER_LOCKED"); tracer_state = TRACER_LOCKED; continue_all(); /* Dump arguments */ tracer_dump(tid); debug_print("%s\n", "******* TRACER_DUMPING"); tracer_state = TRACER_DUMPING; ptrace_syscall(tid); while (1) { handle_events_until_dump_trap(-1); } }
static void tracer_dump(pid_t pid) { /* Read arguments from tracee */ handle_events_until_dump_trap(-1); register_t ret = get_arg_from_regs(pid); assert(ret == TRAP_START_ARGS); debug_print("receive string from tracee %d\n", pid); ptrace_getdata(pid, (long) tracer_buff->str_tmp, loop_name, SIZE_LOOP); ptrace_syscall(pid); invocation = (int)receive_from_tracee(pid); ptrace_syscall(pid); int arg_count = (int)receive_from_tracee(pid); ptrace_syscall(pid); printf("DUMP( %s %d count = %d) \n", loop_name, invocation, arg_count); /* Ensure that the dump directory exists */ snprintf(dump_path, sizeof(dump_path), "%s/%s/%s", dump_prefix, dump_root, loop_name); mkdir(dump_path, 0777); snprintf(dump_path, sizeof(dump_path), "%s/%s/%s/%d", dump_prefix, dump_root, loop_name, invocation); if (mkdir(dump_path, 0777) != 0) errx(EXIT_FAILURE, "dump %s already exists, stop\n", dump_path); int i; void *addresses[arg_count]; for (i = 0; i < arg_count; i++) { addresses[i] = (void *)receive_from_tracee(pid); ptrace_syscall(pid); } /* Wait for end of arguments sigtrap */ handle_events_until_dump_trap(pid); ret = get_arg_from_regs(pid); assert(ret == TRAP_END_ARGS); /* Dump hotpages to disk */ flush_hot_pages_trace_to_disk(pid); char lel_bin_path[1024]; /* Link to the original binary */ snprintf(lel_bin_path, sizeof(lel_bin_path), "%s/lel_bin", dump_path); int res = linkat(AT_FDCWD, "lel_bin", AT_FDCWD, lel_bin_path, AT_SYMLINK_FOLLOW); if (res == -1) errx(EXIT_FAILURE, "Error copying the dump binary\n"); for (i = 0; i < arg_count; i++) { void *start_of_page = round_to_page(addresses[i]); if (start_of_page != NULL) { unprotect_i(pid, start_of_page, PAGESIZE); dump_page(pid, start_of_page); } } if (firsttouch_active) { dump_firsttouch(); } dump_core(arg_count, addresses); dump_unprotected_pages(pid); }
static void inject_memory(void) { struct mem_region * stack_r = NULL; SYS_TRACE("nr_regions=%d\n", cf->nr_regions); /* for each mem region in ckpt file */ for (int i = 0; i < cf->nr_regions; i++) { struct mem_region * r = cf->regions[i]; SYS_TRACE("range %d: 0x%x--0x%x (0x%x:0x%x): %s\n", i, r->start, r->end, r->prot, r->offset, r->fn); /* find the region already mapped */ struct proc_entry e; bool_t res; uint32_t start, end; start = r->start; end = r->end; do { res = proc_find_in_range(&e, child_pid, start, end); if (res) { start = e.end; SYS_TRACE("\talready mapped in target: 0x%x--0x%x (0x%x:0x%x): %s\n", e.start, e.end, e.prot, e.offset, e.fn); /* check if 2 maps are same */ if ((e.start != r->start) || (e.end != r->end) || (e.prot != r->prot) || (strcmp(e.fn, r->fn) != 0)) { /* different */ /* special treat [heap] and [stack] */ if (strcmp(e.fn, "[heap]") == 0) { /* heap size is different */ /* that shouldn't happen, we restore heap address before * calling this function*/ THROW(EXCEPTION_FATAL, "heap address inconsistent"); } else if (strcmp(e.fn, "[stack]") == 0) { if (strcmp(r->fn, "[stack]") != 0) { /* that shouldn't happen... */ THROW(EXCEPTION_FATAL, "stack inconsistent"); } /* stack can auto expand */ } else if (strcmp(e.fn, "[vdso]") == 0) { THROW(EXCEPTION_FATAL, "vdso inconsistent"); } else { /* unmap the already mapped file */ /* first, check eip */ struct user_regs_struct regs = ptrace_peekuser(); if ((regs.eip >= e.start) && (regs.eip < e.end)) { /* target ckpt is special: it unmap itself... */ /* currently we don't support it */ THROW(EXCEPTION_FATAL, "eip (0x%x) inconsistent", (uint32_t)regs.eip); } /* now we can safely unmap that file or memregion */ int err; SYS_TRACE("\tunmap 0x%x--0x%x\n", e.start, e.end); err = ptrace_syscall(munmap, 2, e.start, e.end - e.start); if (err < 0) THROW(EXCEPTION_FATAL, "unmap memrigon 0x%x--0x%x failed: %d", e.start, e.end, err); } } } else { start = r->end; } } while (start < end); /* now the region has been cleaned up, there may be 2 situations: * 1. the desired region is empty; * 2. the desired region is mapped, but the same as ckpt. * here we use procutils to check it again. */ res = proc_find_in_range(&e, child_pid, r->start, r->end); if (res) { SYS_TRACE("\tdesired region is mapped\n"); if ((e.start != r->start) || (e.end != r->end) || (e.prot != r->prot) || (strcmp(e.fn, r->fn) != 0)) { /* if the lower end of stack inconsistent, don't care. * stack is auto expandable. */ /* NOT(e.fn same as r->fn and e.fn is "[stack]") */ if (!(!strcmp(e.fn, r->fn) && !strncmp(e.fn, "[stack]", 7))) { INJ_FATAL("this region is not cleaned up:\n"); INJ_FATAL("\tstart : 0x%x 0x%x\n", e.start, r->start); INJ_FATAL("\tend : 0x%x 0x%x\n", e.end, r->end); INJ_FATAL("\tprot : 0x%x 0x%x\n", e.prot, r->prot); INJ_FATAL("\tfn: : \"%s\" \"%s\"\n", e.fn, r->fn); THROW(EXCEPTION_FATAL, "Shouldn't happed...\n"); } } } else { SYS_TRACE("\tdesired region is unmapped\n"); /* from the ckpt, find the file and do the map */ uint32_t map_addr = r->start; uint32_t size = r->end - r->start; uint32_t prot = r->prot; uint32_t flags = MAP_FIXED | MAP_EXECUTABLE | MAP_PRIVATE; if ((r->fn_len <= 1) || (r->fn[0] == '\0')) { /* this is not a file map */ uint32_t ret_addr; flags |= MAP_ANONYMOUS; SYS_TRACE("\tdo the anonymouse map\n"); ret_addr = ptrace_syscall(mmap2, 6, map_addr, size, prot, flags, 0, 0); CTHROW(map_addr == ret_addr, "mmap2 failed, return 0x%x, not 0x%x", ret_addr, map_addr); } else { /* this is a file map */ uint32_t fn_pos; /* push the filename */ fn_pos = ptrace_push(r->fn, strlen(r->fn) + 1, TRUE); int fd = ptrace_syscall(open, 3, fn_pos, O_RDONLY, 0); CTHROW(fd >= 0, "open file %s failed: %d", r->fn, fd); SYS_TRACE("\tdo the map\n"); uint32_t off = r->offset; uint32_t ret_addr = ptrace_syscall(mmap2, 6, map_addr, size, prot, flags, fd, off >> PAGE_SHIFT); CTHROW(ret_addr == map_addr, "mmap2 file %s failed: return 0x%x", r->fn, ret_addr); ptrace_syscall(close, 1, fd); } } /* now the memory region has been built up, we then poke * memory into it */ /* don't update stack here. although in most case the stack * is the last region we meet, there are some special situations. * for example in compat memlayout. the stack may be polluted by * pervious ptrace_push operation. */ if (strcmp(r->fn, "[stack]") == 0) stack_r = r; else if (strcmp(r->fn, "[vdso]") != 0) /* chkp don't contain vdso */ ptrace_updmem(r->f_pos + cf->ckpt_img, r->start, r->end - r->start); } CTHROW(stack_r != NULL, "no \"[stack]\" found"); /* we poke stack at last */ ptrace_updmem(stack_r->f_pos + cf->ckpt_img, stack_r->start, stack_r->end - stack_r->start); }
static void gdbloader_main(const char * target_fn) { /* check: target_fn should be same as argv[0] */ if (strcmp(target_fn, cf->cmdline[0]) != 0) { SYS_FATAL("target should be %s, not %s\n", cf->cmdline[0], target_fn); THROW(EXCEPTION_FATAL, "cmdline error"); } /* execve child */ child_pid = ptrace_execve(target_fn, cf->cmdline, cf->environ); /* inject memory */ /* before we inject memory, we need to restore heap */ uint32_t heap_end; heap_end = ptrace_syscall(brk, 1, cf->state->brk); CTHROW(heap_end == cf->state->brk, "restore heap failed: %d", heap_end); SYS_TRACE("restore heap to 0x%x\n", heap_end); inject_memory(); /* then, we retrive the inject so file, enter from * __debug_entry. we need to push: * nothing. * process can retrive all from state vector. * and we cannot use stack now * */ /* NOTICE: the state_vector should be saved in the ckpt memory, * we needn't restore them in ptrace process. let the inject so * to do it. */ /* from the opts get the so-file bias */ uint32_t inj_bias = opts->inj_bias; /* use procutils to get the file */ struct proc_entry e; e.start = inj_bias; e.bits = PE_START; proc_fill_entry(&e, child_pid); SYS_TRACE("inject so is %s\n", e.fn); /* use elfutils to retrive the symbol */ void * img = load_file(e.fn); struct elf_handler * inj_so = elf_init(img, inj_bias); uintptr_t debug_entry = elf_get_symbol_address(inj_so, opts->entry); SYS_TRACE("symbol %s at 0x%x\n", opts->entry, debug_entry); /* inject the injector opts */ inject_injopts(inj_so); elf_cleanup(inj_so); free(img); /* we have to restore register here... */ SYS_FORCE("pid=%d\n", child_pid); SYS_FORCE("eip=0x%x\n", cf->state->regs.eip); ptrace_pokeuser(cf->state->regs); SYS_TRACE("eax=0x%x\n", cf->state->regs.eax); SYS_TRACE("ebx=0x%x\n", cf->state->regs.ebx); SYS_TRACE("ecx=0x%x\n", cf->state->regs.ecx); SYS_TRACE("edx=0x%x\n", cf->state->regs.edx); SYS_TRACE("esi=0x%x\n", cf->state->regs.esi); SYS_TRACE("edi=0x%x\n", cf->state->regs.edi); SYS_TRACE("ebp=0x%x\n", cf->state->regs.ebp); SYS_TRACE("esp=0x%x\n", cf->state->regs.esp); // SYS_TRACE("gs=0x%x\n", cf->state->regs.gs); // SYS_TRACE("es=0x%x\n", cf->state->regs.es); /* we push eip at the top of the new stack */ ptrace_push(&cf->state->regs.eip, sizeof(uint32_t), FALSE); /* fix libpthread problem: * * when gdb attaches to target, if it find libpthread, gdb * will try to use libthread_db to retrive thread-local info. * some data, like `errno', is TLS and need those info. * * When gdb does the work, it use ptrace to peek memory from target image. * so gdb will see the original thread info, the tid is different from * current pid, therefore gdb will think there are at least 2 threads and * then it will try to attach to the 'old' one and definitely fail. When this * failure occures, gdb print a warning message. * * We have 2 ways to solve this problem: * * 1. add a syscall into kernel's code, change its pid. it is simple. * 2. change the image when gdb attach. * * We choose the 2nd one because we prefer user space solution. * * */ uint32_t sym_stack_used = 0, sym_stack_user = 0; if (opts->fix_pthread_tid) { fix_libpthread(&sym_stack_used, &sym_stack_user); SYS_WARNING("sym_stack_used=0x%x, sym_stack_user=0x%x\n", sym_stack_used, sym_stack_user); } /* we push those 2 addresses onto the stack */ ptrace_push(&sym_stack_used, sizeof(uint32_t), FALSE); ptrace_push(&sym_stack_user, sizeof(uint32_t), FALSE); /* move eip and detach, let the target process to run */ ptrace_goto(debug_entry); /* detach in main */ return; }
static void map_wrap_so(const char * so_file, uintptr_t load_bias, uint32_t * pvdso_entrance, uint32_t * pvdso_ehdr) { uint32_t name_pos; int fd, err; struct stat s; err = stat(so_file, &s); assert_errno_throw("stat file %s failed", so_file); assert_throw(S_ISREG(s.st_mode), "file %s not a regular file", so_file); /* don't use off_t, it may not be a 32 bit word! */ int32_t fsize = s.st_size; SYS_TRACE("desired so file length is %d\n", fsize); /* elf operations */ void * so_image = load_file(so_file); struct elf_handler * h = elf_init(so_image, load_bias); /* load program headers */ int nr_phdr = 0; struct elf32_phdr * phdr = elf_get_phdr_table(h, &nr_phdr); assert_throw(((phdr != NULL) && (nr_phdr != 0)), "load phdr of file %s failed\n", so_file); /* find the entry symbol */ uintptr_t entry_addr = elf_get_symbol_address(h, "syscall_wrapper_entrace"); SYS_TRACE("wrapper func address will be 0x%x\n", entry_addr); name_pos = ptrace_push(so_file, strlen(so_file), TRUE); fd = ptrace_syscall(open, 3, name_pos, O_RDONLY, 0); assert_throw(fd >= 0, "open sofile for child failed, return %d", fd); SYS_TRACE("open so file for child, fd=%d\n", fd); /* for each program header */ for (int i = 0; i < nr_phdr; i++, phdr ++) { SYS_FORCE("phdr %d, type=0x%x, flag=0x%x\n", i, phdr->p_type, phdr->p_flags); if (phdr->p_type != PT_LOAD) continue; int elf_prot = 0, elf_flags = 0; if (phdr->p_flags & PF_R) elf_prot |= PROT_READ; if (phdr->p_flags & PF_W) elf_prot |= PROT_WRITE; if (phdr->p_flags & PF_X) elf_prot |= PROT_EXEC; elf_flags = MAP_PRIVATE | MAP_EXECUTABLE; unsigned long size = phdr->p_filesz + ELF_PAGEOFFSET(phdr->p_vaddr); unsigned long off = phdr->p_offset - ELF_PAGEOFFSET(phdr->p_vaddr); int32_t map_addr = load_bias + phdr->p_vaddr - ELF_PAGEOFFSET(phdr->p_vaddr); map_addr = ptrace_syscall(mmap2, 6, map_addr, size, elf_prot, elf_flags | MAP_FIXED, fd, off); assert_throw(map_addr != 0xffffffff, "map wrap so failed, return 0x%x", map_addr); } elf_cleanup(h); free(so_image); if (pvdso_ehdr) *pvdso_ehdr = load_bias; if (pvdso_entrance) *pvdso_entrance = entry_addr; }