mach_error_t mach_inject( const mach_inject_entry threadEntry, const void *paramBlock, size_t paramSize, pid_t targetProcess, vm_size_t stackSize ) { assert( threadEntry ); assert( targetProcess > 0 ); assert( stackSize == 0 || stackSize > 1024 ); // Find the image. const void *image; unsigned long imageSize; unsigned int jumpTableOffset; unsigned int jumpTableSize; mach_error_t err = machImageForPointer( threadEntry, &image, &imageSize, &jumpTableOffset, &jumpTableSize ); // Initialize stackSize to default if requested. if( stackSize == 0 ) /** @bug We only want an 8K default, fix the plop-in-the-middle code below. */ stackSize = 16 * 1024; // Convert PID to Mach Task ref. mach_port_t remoteTask = 0; if( !err ) { err = task_for_pid( mach_task_self(), targetProcess, &remoteTask ); #if defined(__i386__) if (err == 5) fprintf(stderr, "Could not access task for pid %d. You probably need to add user to procmod group\n", targetProcess); #endif } /** @todo Would be nice to just allocate one block for both the remote stack *and* the remoteCode (including the parameter data block once that's written. */ // Allocate the remoteStack. vm_address_t remoteStack = (vm_address_t)NULL; if( !err ) err = vm_allocate( remoteTask, &remoteStack, stackSize, 1 ); // Allocate the code. vm_address_t remoteCode = (vm_address_t)NULL; if( !err ) err = vm_allocate( remoteTask, &remoteCode, imageSize, 1 ); if( !err ) { ASSERT_CAST( pointer_t, image ); #if defined (__ppc__) || defined (__ppc64__) err = vm_write( remoteTask, remoteCode, (pointer_t) image, imageSize ); #elif defined (__i386__) // on intel, jump table use relative jump instructions (jmp), which means // the offset needs to be corrected. We thus copy the image and fix the offset by hand. ptrdiff_t fixUpOffset = (ptrdiff_t) (image - remoteCode); void * fixedUpImage = fixedUpImageFromImage(image, imageSize, jumpTableOffset, jumpTableSize, fixUpOffset); err = vm_write( remoteTask, remoteCode, (pointer_t) fixedUpImage, imageSize ); free(fixedUpImage); #endif } // Allocate the paramBlock if specified. vm_address_t remoteParamBlock = (vm_address_t)NULL; if( !err && paramBlock != NULL && paramSize ) { err = vm_allocate( remoteTask, &remoteParamBlock, paramSize, 1 ); if( !err ) { ASSERT_CAST( pointer_t, paramBlock ); err = vm_write( remoteTask, remoteParamBlock, (pointer_t) paramBlock, paramSize ); } } // Calculate offsets. ptrdiff_t threadEntryOffset, imageOffset; if( !err ) { //assert( (void*)threadEntry >= image && (void*)threadEntry <= (image+imageSize) ); ASSERT_CAST( void*, threadEntry ); threadEntryOffset = ((void*) threadEntry) - image; ASSERT_CAST( void*, remoteCode ); imageOffset = ((void*) remoteCode) - image; } // Allocate the thread. thread_act_t remoteThread; #if defined (__ppc__) || defined (__ppc64__) if( !err ) { ppc_thread_state_t remoteThreadState; /** @bug Stack math should be more sophisticated than this (ala redzone). */ remoteStack += stackSize / 2; bzero( &remoteThreadState, sizeof(remoteThreadState) ); ASSERT_CAST( unsigned int, remoteCode ); remoteThreadState.srr0 = (unsigned int) remoteCode; remoteThreadState.srr0 += threadEntryOffset; assert( remoteThreadState.srr0 < (remoteCode + imageSize) ); ASSERT_CAST( unsigned int, remoteStack ); remoteThreadState.r1 = (unsigned int) remoteStack; ASSERT_CAST( unsigned int, imageOffset ); remoteThreadState.r3 = (unsigned int) imageOffset; ASSERT_CAST( unsigned int, remoteParamBlock ); remoteThreadState.r4 = (unsigned int) remoteParamBlock; ASSERT_CAST( unsigned int, paramSize ); remoteThreadState.r5 = (unsigned int) paramSize; ASSERT_CAST( unsigned int, 0xDEADBEEF ); remoteThreadState.lr = (unsigned int) 0xDEADBEEF; #if 0 printf( "remoteCode start: %p\n", (void*) remoteCode ); printf( "remoteCode size: %ld\n", imageSize ); printf( "remoteCode pc: %p\n", (void*) remoteThreadState.srr0 ); printf( "remoteCode end: %p\n", (void*) (((char*)remoteCode)+imageSize) ); fflush(0); #endif err = thread_create_running( remoteTask, PPC_THREAD_STATE, (thread_state_t) &remoteThreadState, PPC_THREAD_STATE_COUNT, &remoteThread ); }
mach_error_t mach_inject( const mach_inject_entry threadEntry, const void *paramBlock, size_t paramSize, pid_t targetProcess, vm_size_t stackSize ) { ;//assertCodePtr( threadEntry ); ;//assertPtrIfNotNull( paramBlock ); ;//assertPositive( targetProcess ); ;//assertIsTrue( stackSize == 0 || stackSize > 1024 ); // Find the image. const void *image; unsigned long imageSize; mach_error_t err = machImageForPointer( threadEntry, &image, &imageSize ); // Initialize stackSize to default if requested. if( stackSize == 0 ) /** @bug We only want an 8K default, fix the plop-in-the-middle code below. */ stackSize = 16 * 1024; // Convert PID to Mach Task ref. mach_port_t remoteTask = 0; if( !err ) err = task_for_pid( mach_task_self(), targetProcess, &remoteTask ); /** @todo Would be nice to just allocate one block for both the remote stack *and* the remoteCode (including the parameter data block once that's written. */ // Allocate the remoteStack. vm_address_t remoteStack = 0; if( !err ) err = vm_allocate( remoteTask, &remoteStack, stackSize, 1 ); // Allocate the code. vm_address_t remoteCode = 0; if( !err ) err = vm_allocate( remoteTask, &remoteCode, imageSize, 1 ); if( !err ) { ASSERT_CAST( pointer_t, image ); err = vm_write( remoteTask, remoteCode, (pointer_t) image, imageSize ); } // Allocate the paramBlock if specified. vm_address_t remoteParamBlock = 0; if( !err && paramBlock != NULL && paramSize ) { err = vm_allocate( remoteTask, &remoteParamBlock, paramSize, 1 ); if( !err ) { ASSERT_CAST( pointer_t, paramBlock ); err = vm_write( remoteTask, remoteParamBlock, (pointer_t) paramBlock, paramSize ); } } // Calculate offsets. ptrdiff_t threadEntryOffset, imageOffset; if( !err ) { ;//assertIsWithinRange( threadEntry, image, image+imageSize ); ASSERT_CAST( void*, threadEntry ); threadEntryOffset = ((void*) threadEntry) - image; ASSERT_CAST( void*, remoteCode ); imageOffset = ((void*) remoteCode) - image; } // Allocate the thread. thread_act_t remoteThread; if( !err ) { ppc_thread_state_t remoteThreadState; /** @bug Stack math should be more sophisticated than this (ala redzone). */ remoteStack += stackSize / 2; bzero( &remoteThreadState, sizeof(remoteThreadState) ); ASSERT_CAST( unsigned int, remoteCode ); remoteThreadState.srr0 = (unsigned int) remoteCode; remoteThreadState.srr0 += threadEntryOffset; assert( remoteThreadState.srr0 < (remoteCode + imageSize) ); ASSERT_CAST( unsigned int, remoteStack ); remoteThreadState.r1 = (unsigned int) remoteStack; ASSERT_CAST( unsigned int, imageOffset ); remoteThreadState.r3 = (unsigned int) imageOffset; ASSERT_CAST( unsigned int, remoteParamBlock ); remoteThreadState.r4 = (unsigned int) remoteParamBlock; ASSERT_CAST( unsigned int, paramSize ); remoteThreadState.r5 = (unsigned int) paramSize; ASSERT_CAST( unsigned int, 0xDEADBEEF ); remoteThreadState.lr = (unsigned int) 0xDEADBEEF; //printf( "remoteCode start: %p\n", (void*) remoteCode ); //printf( "remoteCode size: %ld\n", imageSize ); //printf( "remoteCode pc: %p\n", (void*) remoteThreadState.srr0 ); //printf( "remoteCode end: %p\n", (void*) (((char*)remoteCode)+imageSize) ); fflush(0); err = thread_create_running( remoteTask, PPC_THREAD_STATE, (thread_state_t) &remoteThreadState, PPC_THREAD_STATE_COUNT, &remoteThread ); }