/* * map the memory from address p and size sz to the component spdid with * permission flags. if p is NULL allocate a piece of new memory * return spdid's address to daddr, manager's virtual address to page */ static int cbuf_alloc_map(spdid_t spdid, vaddr_t *daddr, void **page, void *p, unsigned long sz, int flags) { vaddr_t dest; int ret = 0; void *new_p; tracking_start(NULL, CBUF_MAP); assert(sz == round_to_page(sz)); if (!p) { new_p = page_alloc(sz/PAGE_SIZE); assert(new_p); memset(new_p, 0, sz); } else { new_p = p; } dest = (vaddr_t)valloc_alloc(cos_spd_id(), spdid, sz/PAGE_SIZE); if (unlikely(!dest)) goto free; if (!cbuf_map(spdid, dest, new_p, sz, flags)) goto done; free: if (dest) valloc_free(cos_spd_id(), spdid, (void *)dest, 1); if (!p) page_free(new_p, sz/PAGE_SIZE); ret = -1; done: if (page) *page = new_p; *daddr = dest; tracking_end(NULL, CBUF_MAP); return ret; }
static void boot_symb_reify(char *mem, vaddr_t d_addr, vaddr_t symb_addr, u32_t value) { if (round_to_page(symb_addr) == d_addr) { u32_t *p; p = (u32_t*)(mem + ((PAGE_SIZE-1) & symb_addr)); *p = value; } }
pid_t handle_events_until_dump_trap(pid_t wait_for) { while (true) { event_t e = wait_event(wait_for); if (e.signo == SIGTRAP) { if (is_dump_sigtrap(e.tid)) { clear_trap(e.tid); return e.tid; } else if (is_hook_sigtrap(e.tid)) { assert(tracer_state == TRACER_LOCKED || tracer_state == TRACER_FIRSTTOUCH); clear_trap(e.tid); tracer_lock_range(e.tid); continue; } else { // If we arrive here, it is a syscall handle_syscall(e.tid); ptrace_syscall(e.tid); continue; } } else if (e.signo == SIGSEGV) { void *addr = round_to_page(e.sigaddr); switch (tracer_state) { case TRACER_UNLOCKED: /* We should never get a sigsegv in unlocked state ! */ errx(EXIT_FAILURE, "SIGSEGV at %p before locking memory during capture\n", e.sigaddr); case TRACER_FIRSTTOUCH: firsttouch_handler(e.tid, addr); break; case TRACER_LOCKED: mru_handler(e.tid, addr); break; case TRACER_DUMPING: dump_handler(e.tid, addr); break; default: assert(false); /* we should never be here */ } ptrace_syscall(e.tid); } else if (e.signo == SIGSTOP) { /* A new thread is starting, ignore this event, next wait_event call will unblock the thread once its parents registers it in tids array */ } else if (e.signo == SIGWINCH) { /* Ignore signal SIGWINCH, tty resize */ ptrace_syscall(e.tid); continue; } else { errx(EXIT_FAILURE, "Unexpected signal in wait_sigtrap: %d\n", e.signo); } } debug_print("%s", "\n"); }
static bool is_mru(void *addr) { char *start_of_page = round_to_page(addr); bool present = false; for (int i = 0; i < log_size; i++) { if (pages_cache[i] == start_of_page) { present = true; break; } } return present; }
static void boot_symb_process(struct cobj_header *h, spdid_t spdid, vaddr_t heap_val, char *mem, vaddr_t d_addr, vaddr_t symb_addr) { if (round_to_page(symb_addr) == d_addr) { struct cos_component_information *ci; ci = (struct cos_component_information*)(mem + ((PAGE_SIZE-1) & symb_addr)); // ci->cos_heap_alloc_extent = ci->cos_heap_ptr; // ci->cos_heap_allocated = heap_val; if (!ci->cos_heap_ptr) ci->cos_heap_ptr = heap_val; ci->cos_this_spd_id = spdid; /* save the address of this page for later retrieval * (e.g. to manipulate the stack pointer) */ if (!cos_vect_lookup(&spd_info_addresses, spdid)) { boot_spd_set_symbs(h, spdid, ci); cos_vect_add_id(&spd_info_addresses, (void*)round_to_page(ci), spdid); } } }
static unsigned long * map_stack(spdid_t spdid, vaddr_t extern_stk) { static unsigned long *stack = 0; vaddr_t extern_addr; if (!stack) stack = cos_get_vas_page(); extern_addr = round_to_page(extern_stk); if (stkmgr_stack_introspect(cos_spd_id(), (vaddr_t)stack, spdid, extern_addr)) BUG(); return stack; }
static inline struct cos_stk_item * stkmgr_get_spds_stk_item(spdid_t spdid, vaddr_t a) { struct spd_stk_info *ssi; struct cos_stk_item *csi; vaddr_t ra = round_to_page(a); ssi = get_spd_stk_info(spdid); for (csi = FIRST_LIST(&ssi->stk_list, next, prev) ; csi != &ssi->stk_list ; csi = FIRST_LIST(csi, next, prev)) { if (csi->d_addr == ra) return csi; } return NULL; }
static int cbuf_map(spdid_t spdid, vaddr_t daddr, void *page, unsigned long size, int flags) { unsigned long off; assert(size == round_to_page(size)); assert(daddr); assert(page); for (off = 0 ; off < size ; off += PAGE_SIZE) { vaddr_t d = daddr + off; if (unlikely(d != (mman_alias_page(cos_spd_id(), ((vaddr_t)page) + off, spdid, d, flags)))) { for (d = daddr + off - PAGE_SIZE ; d >= daddr ; d -= PAGE_SIZE) { mman_revoke_page(spdid, d, 0); } return -ENOMEM; } } return 0; }
static void tracer_lock_range(pid_t child) { debug_print("%s %d\n", "START LOCK RANGE", child); assert(tracer_state == TRACER_LOCKED); ptrace_syscall(child); void *from = (void *)receive_from_tracee(child); ptrace_syscall(child); void *to = (void *)receive_from_tracee(child); /* We need that the process be stopped to protect */ long unsigned nb_pages_to_allocate = nb_pages_in_range(from, to); for (long unsigned i = 0; i < nb_pages_to_allocate; i++) if (!is_mru(from + PAGESIZE * i)) protect_i(child, round_to_page(from + PAGESIZE * i), PAGESIZE); ptrace_syscall(child); debug_print("%s %d (%p -> %p)\n", "END LOCK RANGE", child, from, to); }
int cbuf_unmap_at(spdid_t s_spd, unsigned int cbid, spdid_t d_spd, vaddr_t d_addr) { struct cbuf_info *cbi; int ret = 0, err = 0; u32_t off; assert(d_addr); CBUF_TAKE(); cbi = cmap_lookup(&cbufs, cbid); if (unlikely(!cbi)) ERR_THROW(-EINVAL, done); if (unlikely(cbi->owner.spdid != s_spd)) ERR_THROW(-EPERM, done); assert(cbi->size == round_to_page(cbi->size)); /* unmap pages in only the d_spd client */ for (off = 0 ; off < cbi->size ; off += PAGE_SIZE) err |= mman_release_page(d_spd, d_addr + off, 0); err |= valloc_free(s_spd, d_spd, (void*)d_addr, cbi->size/PAGE_SIZE); if (unlikely(err)) ERR_THROW(-EFAULT, done); assert(!err); done: CBUF_RELEASE(); return ret; }
static void tracer_dump(pid_t pid) { /* Read arguments from tracee */ handle_events_until_dump_trap(-1); register_t ret = get_arg_from_regs(pid); assert(ret == TRAP_START_ARGS); debug_print("receive string from tracee %d\n", pid); ptrace_getdata(pid, (long) tracer_buff->str_tmp, loop_name, SIZE_LOOP); ptrace_syscall(pid); invocation = (int)receive_from_tracee(pid); ptrace_syscall(pid); int arg_count = (int)receive_from_tracee(pid); ptrace_syscall(pid); printf("DUMP( %s %d count = %d) \n", loop_name, invocation, arg_count); /* Ensure that the dump directory exists */ snprintf(dump_path, sizeof(dump_path), "%s/%s/%s", dump_prefix, dump_root, loop_name); mkdir(dump_path, 0777); snprintf(dump_path, sizeof(dump_path), "%s/%s/%s/%d", dump_prefix, dump_root, loop_name, invocation); if (mkdir(dump_path, 0777) != 0) errx(EXIT_FAILURE, "dump %s already exists, stop\n", dump_path); int i; void *addresses[arg_count]; for (i = 0; i < arg_count; i++) { addresses[i] = (void *)receive_from_tracee(pid); ptrace_syscall(pid); } /* Wait for end of arguments sigtrap */ handle_events_until_dump_trap(pid); ret = get_arg_from_regs(pid); assert(ret == TRAP_END_ARGS); /* Dump hotpages to disk */ flush_hot_pages_trace_to_disk(pid); char lel_bin_path[1024]; /* Link to the original binary */ snprintf(lel_bin_path, sizeof(lel_bin_path), "%s/lel_bin", dump_path); int res = linkat(AT_FDCWD, "lel_bin", AT_FDCWD, lel_bin_path, AT_SYMLINK_FOLLOW); if (res == -1) errx(EXIT_FAILURE, "Error copying the dump binary\n"); for (i = 0; i < arg_count; i++) { void *start_of_page = round_to_page(addresses[i]); if (start_of_page != NULL) { unprotect_i(pid, start_of_page, PAGESIZE); dump_page(pid, start_of_page); } } if (firsttouch_active) { dump_firsttouch(); } dump_core(arg_count, addresses); dump_unprotected_pages(pid); }
static void tracer_lock_mem(pid_t pid) { char maps_path[MAX_PATH]; sprintf(maps_path, "/proc/%d/maps", pid); FILE *maps = fopen(maps_path, "r"); if (!maps) errx(EXIT_FAILURE, "Error reading the memory using /proc/ interface"); debug_print("%s\n", "START LOCK MEM"); void *addresses[65536]; char buf[BUFSIZ + 1]; int counter = 0; while (fgets(buf, BUFSIZ, maps)) { void *start, *end; debug_print("%s", buf); sscanf(buf, "%p-%p", &start, &end); /* Ignore libdump mem zones */ if (strstr(buf, "libcere_dump.so") != NULL) continue; /* Ignore libc pages */ if (strstr(buf, "linux-gnu") != NULL) continue; /* Ignore libc special mem zones */ /* If we don't ignore those mem zones we get this error */ /* /usr/bin/ld: la section .interp chargée à */ /* [00000000004003c0 -> 00000000004003db] */ /* chevauche la section s000000400000 chargée à */ /* [0000000000400000 -> 0000000000400fff] */ if (strstr(buf, "r-xp") != NULL) continue; /* Ignore vsyscall special mem zones */ if (strstr(buf, "vsyscall") != NULL) continue; /* Ignore libdump vdso zones */ if (strstr(buf, "vdso") != NULL) continue; /* Ignore vvar zone (cf. https://lkml.org/lkml/2015/3/12/602) */ if (strstr(buf, "vvar") != NULL) continue; /* Ignore alreay protected pages */ if (strstr(buf, "---p") != NULL) continue; assert(counter < 65536); addresses[counter++] = round_to_page(start); addresses[counter++] = end; } /* Protect all pages in adresses */ while (counter > 0) { void *end = addresses[--counter]; void *start = addresses[--counter]; protect_i(pid, start, (end - start)); } int r = fclose(maps); if (r != 0) errx(EXIT_FAILURE, "Error reading the memory using /proc/ %s\n", strerror(errno)); debug_print("%s\n", "END LOCK MEM"); }
/* * For a certain principal, collect any unreferenced and not_in * free list cbufs so that they can be reused. This is the * garbage-collection mechanism. * * Collect cbufs and add them onto the shared component's ring buffer. * * This function is semantically complicated. It can return no cbufs * even if they are available to force the pool of cbufs to be * expanded (the client will call cbuf_create in this case). * Or, the common case: it can return a number of available cbufs. */ int cbuf_collect(spdid_t spdid, unsigned long size) { struct cbuf_info *cbi; struct cbuf_comp_info *cci; struct cbuf_shared_page *csp; struct cbuf_bin *bin; int ret = 0; printl("cbuf_collect\n"); CBUF_TAKE(); cci = cbuf_comp_info_get(spdid); tracking_start(&cci->track, CBUF_COLLECT); if (unlikely(!cci)) ERR_THROW(-ENOMEM, done); if (size + cci->allocated_size <= cci->target_size) goto done; csp = cci->csp; if (unlikely(!csp)) ERR_THROW(-EINVAL, done); assert(csp->ring.size == CSP_BUFFER_SIZE); ret = CK_RING_SIZE(cbuf_ring, &csp->ring); if (ret != 0) goto done; /* * Go through all cbufs we own, and report all of them that * have no current references to them. Unfortunately, this is * O(N*M), N = min(num cbufs, PAGE_SIZE/sizeof(int)), and M = * num components. */ size = round_up_to_page(size); bin = cbuf_comp_info_bin_get(cci, size); if (!bin) ERR_THROW(0, done); cbi = bin->c; do { if (!cbi) break; /* * skip cbufs which are in freelist. Coordinates with cbuf_free to * detect such cbufs correctly. * We must check refcnt first and then next pointer. * * If do not check refcnt: the manager may check "next" before cbuf_free * (when it is NULL), then switch to client who calls cbuf_free to set * "next", decrease refcnt and add cbuf to freelist. Then switch back to * manager, but now it will collect this in-freelist cbuf. * * Furthermore we must check refcnt before the "next" pointer: * If not, similar to above case, the manager maybe preempted by client * between the manager checks "next" and refcnt. Therefore the manager * finds the "next" is null and refcnt is 0, and collect this cbuf. * Short-circuit can prevent reordering. */ assert(cbi->owner.m); if (!CBUF_REFCNT(cbi->owner.m) && !CBUF_IS_IN_FREELIST(cbi->owner.m) && !cbuf_referenced(cbi)) { struct cbuf_ring_element el = { .cbid = cbi->cbid }; cbuf_references_clear(cbi); if (!CK_RING_ENQUEUE_SPSC(cbuf_ring, &csp->ring, &el)) break; /* * Prevent other collection collecting those cbufs. * The manager checks if the shared ring buffer is empty upon * the entry, if not, it just returns. This is not enough to * prevent double-collection. The corner case is: * after the last one in ring buffer is dequeued and * before it is added to the free-list, the manager * appears. It may collect the last one again. */ cbi->owner.m->next = (struct cbuf_meta *)1; if (++ret == CSP_BUFFER_SIZE) break; } cbi = FIRST_LIST(cbi, next, prev); } while (cbi != bin->c); if (ret) cbuf_thd_wake_up(cci, ret*size); done: tracking_end(&cci->track, CBUF_COLLECT); CBUF_RELEASE(); return ret; } /* * Called by cbuf_deref. */ int cbuf_delete(spdid_t spdid, unsigned int cbid) { struct cbuf_comp_info *cci; struct cbuf_info *cbi; struct cbuf_meta *meta; int ret = -EINVAL, sz; printl("cbuf_delete\n"); CBUF_TAKE(); tracking_start(NULL, CBUF_DEL); cci = cbuf_comp_info_get(spdid); if (unlikely(!cci)) goto done; cbi = cmap_lookup(&cbufs, cbid); if (unlikely(!cbi)) goto done; meta = cbuf_meta_lookup(cci, cbid); /* * Other threads can access the meta data simultaneously. For * example, others call cbuf2buf which increase the refcnt. */ CBUF_REFCNT_ATOMIC_DEC(meta); /* Find the owner of this cbuf */ if (cbi->owner.spdid != spdid) { cci = cbuf_comp_info_get(cbi->owner.spdid); if (unlikely(!cci)) goto done; } if (cbuf_free_unmap(cci, cbi)) goto done; if (cci->allocated_size < cci->target_size) { cbuf_thd_wake_up(cci, cci->target_size - cci->allocated_size); } ret = 0; done: tracking_end(NULL, CBUF_DEL); CBUF_RELEASE(); return ret; } /* * Called by cbuf2buf to retrieve a given cbid. */ int cbuf_retrieve(spdid_t spdid, unsigned int cbid, unsigned long size) { struct cbuf_comp_info *cci, *own; struct cbuf_info *cbi; struct cbuf_meta *meta, *own_meta; struct cbuf_maps *map; vaddr_t dest; void *page; int ret = -EINVAL, off; printl("cbuf_retrieve\n"); CBUF_TAKE(); tracking_start(NULL, CBUF_RETRV); cci = cbuf_comp_info_get(spdid); if (!cci) {printd("no cci\n"); goto done; } cbi = cmap_lookup(&cbufs, cbid); if (!cbi) {printd("no cbi\n"); goto done; } /* shouldn't cbuf2buf your own buffer! */ if (cbi->owner.spdid == spdid) { printd("owner\n"); goto done; } meta = cbuf_meta_lookup(cci, cbid); if (!meta) {printd("no meta\n"); goto done; } assert(!(meta->nfo & ~CBUF_INCONSISENT)); map = malloc(sizeof(struct cbuf_maps)); if (!map) {printd("no map\n"); ERR_THROW(-ENOMEM, done); } if (size > cbi->size) {printd("too big\n"); goto done; } assert(round_to_page(cbi->size) == cbi->size); size = cbi->size; /* TODO: change to MAPPING_READ */ if (cbuf_alloc_map(spdid, &map->addr, NULL, cbi->mem, size, MAPPING_RW)) { printc("cbuf mgr map fail spd %d mem %p sz %lu cbid %u\n", spdid, cbi->mem, size, cbid); goto free; } INIT_LIST(map, next, prev); ADD_LIST(&cbi->owner, map, next, prev); CBUF_PTR_SET(meta, map->addr); map->spdid = spdid; map->m = meta; meta->sz = cbi->size >> PAGE_ORDER; meta->cbid_tag.cbid = cbid; own = cbuf_comp_info_get(cbi->owner.spdid); if (unlikely(!own)) goto done; /* * We need to inherit the relinquish bit from the sender. * Otherwise, this cbuf cannot be returned to the manager. */ own_meta = cbuf_meta_lookup(own, cbid); if (CBUF_RELINQ(own_meta)) CBUF_FLAG_ADD(meta, CBUF_RELINQ); ret = 0; done: tracking_end(NULL, CBUF_RETRV); CBUF_RELEASE(); return ret; free: free(map); goto done; } vaddr_t cbuf_register(spdid_t spdid, unsigned int cbid) { struct cbuf_comp_info *cci; struct cbuf_meta_range *cmr; void *p; vaddr_t dest, ret = 0; printl("cbuf_register\n"); CBUF_TAKE(); tracking_start(NULL, CBUF_REG); cci = cbuf_comp_info_get(spdid); if (unlikely(!cci)) goto done; cmr = cbuf_meta_lookup_cmr(cci, cbid); if (cmr) ERR_THROW(cmr->dest, done); /* Create the mapping into the client */ if (cbuf_alloc_map(spdid, &dest, &p, NULL, PAGE_SIZE, MAPPING_RW)) goto done; assert((unsigned int)p == round_to_page(p)); cmr = cbuf_meta_add(cci, cbid, p, dest); assert(cmr); ret = cmr->dest; done: tracking_end(NULL, CBUF_REG); CBUF_RELEASE(); return ret; } static void cbuf_shrink(struct cbuf_comp_info *cci, int diff) { int i, sz; struct cbuf_bin *bin; struct cbuf_info *cbi, *next, *head; for (i = cci->nbin-1 ; i >= 0 ; i--) { bin = &cci->cbufs[i]; sz = (int)bin->size; if (!bin->c) continue; cbi = FIRST_LIST(bin->c, next, prev); while (cbi != bin->c) { next = FIRST_LIST(cbi, next, prev); if (!cbuf_free_unmap(cci, cbi)) { diff -= sz; if (diff <= 0) return; } cbi = next; } if (!cbuf_free_unmap(cci, cbi)) { diff -= sz; if (diff <= 0) return; } } if (diff > 0) cbuf_mark_relinquish_all(cci); }
void dump_init(void) { /* state.mtrace_active = false; */ mtrace_active = false; /* Copy the original binary */ char buf[BUFSIZ]; snprintf(buf, sizeof buf, "cp /proc/%d/exe lel_bin", getpid()); int ret = system(buf); if (ret != 0) { errx(EXIT_FAILURE, "lel_bin copy failed: %s.\n", strerror(errno)); } /* configure atexit */ atexit(dump_close); pid_t child = 0; child = fork(); if (child == (pid_t)-1) { errx(EXIT_FAILURE, "fork() failed: %s.\n", strerror(errno)); } /* If we are the parent */ if (child != 0) { char args[2][32]; snprintf(args[0], sizeof(args[0]), "%d", child); snprintf(args[1], sizeof(args[1]), "%p", &tracer_buff); char *const arg[] = {"cere-tracer", args[0], args[1], NULL}; execvp("cere-tracer", arg); errx(EXIT_FAILURE, "ERROR TRACER RUNNING : %s\n", strerror(errno)); } else { /* Give DUMPABLE capability, required by ptrace */ if (prctl(PR_SET_DUMPABLE, (long)1) != 0) { errx(EXIT_FAILURE, "Prctl : %s\n", strerror(errno)); } /* Make tracer buff executable */ if (mprotect(round_to_page(&tracer_buff), PAGESIZE, (PROT_READ | PROT_WRITE | PROT_EXEC)) != 0) { errx(EXIT_FAILURE, "Failed to make tracer buff executable : %s\n", strerror(errno)); } /* Request trace */ if (ptrace(PTRACE_TRACEME, 0, 0, 0) == -1) { errx(EXIT_FAILURE, "ptrace(PTRACE_ME) : %s\n", strerror(errno)); } debug_print("requesting ptrace from %d\n", getpid()); raise(SIGSTOP); dump_initialized = true; } debug_print("%s", "DUMP_INIT DONE\n"); }