コード例 #1
0
ファイル: mach_inject.c プロジェクト: naves/escritorios
mach_error_t
mach_inject(
    const mach_inject_entry	threadEntry,
    const void				*paramBlock,
    size_t					paramSize,
    pid_t					targetProcess,
    vm_size_t				stackSize )
{
    assert( threadEntry );
    assert( targetProcess > 0 );
    assert( stackSize == 0 || stackSize > 1024 );

    //	Find the image.
    const void		*image;
    unsigned long	imageSize;
    unsigned int	jumpTableOffset;
    unsigned int	jumpTableSize;
    mach_error_t	err = machImageForPointer( threadEntry, &image, &imageSize, &jumpTableOffset, &jumpTableSize );

    //	Initialize stackSize to default if requested.
    if( stackSize == 0 )
        /** @bug
        	We only want an 8K default, fix the plop-in-the-middle code below.
        */
        stackSize = 16 * 1024;

    //	Convert PID to Mach Task ref.
    mach_port_t	remoteTask = 0;
    if( !err ) {
        err = task_for_pid( mach_task_self(), targetProcess, &remoteTask );
#if defined(__i386__)
        if (err == 5) fprintf(stderr, "Could not access task for pid %d. You probably need to add user to procmod group\n", targetProcess);
#endif
    }

    /** @todo
    	Would be nice to just allocate one block for both the remote stack
    	*and* the remoteCode (including the parameter data block once that's
    	written.
    */

    //	Allocate the remoteStack.
    vm_address_t remoteStack = (vm_address_t)NULL;
    if( !err )
        err = vm_allocate( remoteTask, &remoteStack, stackSize, 1 );

    //	Allocate the code.
    vm_address_t remoteCode = (vm_address_t)NULL;
    if( !err )
        err = vm_allocate( remoteTask, &remoteCode, imageSize, 1 );
    if( !err ) {
        ASSERT_CAST( pointer_t, image );
#if defined (__ppc__) || defined (__ppc64__)
        err = vm_write( remoteTask, remoteCode, (pointer_t) image, imageSize );
#elif defined (__i386__)
        // on intel, jump table use relative jump instructions (jmp), which means
        // the offset needs to be corrected. We thus copy the image and fix the offset by hand.
        ptrdiff_t fixUpOffset = (ptrdiff_t) (image - remoteCode);
        void * fixedUpImage = fixedUpImageFromImage(image, imageSize, jumpTableOffset, jumpTableSize, fixUpOffset);
        err = vm_write( remoteTask, remoteCode, (pointer_t) fixedUpImage, imageSize );
        free(fixedUpImage);
#endif
    }

    //	Allocate the paramBlock if specified.
    vm_address_t remoteParamBlock = (vm_address_t)NULL;
    if( !err && paramBlock != NULL && paramSize ) {
        err = vm_allocate( remoteTask, &remoteParamBlock, paramSize, 1 );
        if( !err ) {
            ASSERT_CAST( pointer_t, paramBlock );
            err = vm_write( remoteTask, remoteParamBlock,
                            (pointer_t) paramBlock, paramSize );
        }
    }

    //	Calculate offsets.
    ptrdiff_t	threadEntryOffset, imageOffset;
    if( !err ) {
        //assert( (void*)threadEntry >= image && (void*)threadEntry <= (image+imageSize) );
        ASSERT_CAST( void*, threadEntry );
        threadEntryOffset = ((void*) threadEntry) - image;

        ASSERT_CAST( void*, remoteCode );
        imageOffset = ((void*) remoteCode) - image;
    }

    //	Allocate the thread.
    thread_act_t remoteThread;
#if defined (__ppc__) || defined (__ppc64__)
    if( !err ) {
        ppc_thread_state_t remoteThreadState;

        /** @bug
        	Stack math should be more sophisticated than this (ala redzone).
        */
        remoteStack += stackSize / 2;

        bzero( &remoteThreadState, sizeof(remoteThreadState) );

        ASSERT_CAST( unsigned int, remoteCode );
        remoteThreadState.srr0 = (unsigned int) remoteCode;
        remoteThreadState.srr0 += threadEntryOffset;
        assert( remoteThreadState.srr0 < (remoteCode + imageSize) );

        ASSERT_CAST( unsigned int, remoteStack );
        remoteThreadState.r1 = (unsigned int) remoteStack;

        ASSERT_CAST( unsigned int, imageOffset );
        remoteThreadState.r3 = (unsigned int) imageOffset;

        ASSERT_CAST( unsigned int, remoteParamBlock );
        remoteThreadState.r4 = (unsigned int) remoteParamBlock;

        ASSERT_CAST( unsigned int, paramSize );
        remoteThreadState.r5 = (unsigned int) paramSize;

        ASSERT_CAST( unsigned int, 0xDEADBEEF );
        remoteThreadState.lr = (unsigned int) 0xDEADBEEF;

#if 0
        printf( "remoteCode start: %p\n", (void*) remoteCode );
        printf( "remoteCode size: %ld\n", imageSize );
        printf( "remoteCode pc: %p\n", (void*) remoteThreadState.srr0 );
        printf( "remoteCode end: %p\n",
                (void*) (((char*)remoteCode)+imageSize) );
        fflush(0);
#endif

        err = thread_create_running( remoteTask, PPC_THREAD_STATE,
                                     (thread_state_t) &remoteThreadState, PPC_THREAD_STATE_COUNT,
                                     &remoteThread );
    }
コード例 #2
0
/*
 * Set up the initial state of a MACH thread
 */
void
_pthread_setup(pthread_t thread,
	       void (*routine)(pthread_t),
	       void *vsp,
	       int suspended,
	       int needresume)
{
#if defined(__i386__)
	i386_thread_state_t state = {0};
	thread_state_flavor_t flavor = x86_THREAD_STATE32;
	mach_msg_type_number_t count = i386_THREAD_STATE_COUNT;
#elif defined(__x86_64__)
	x86_thread_state64_t state = {0};
	thread_state_flavor_t flavor = x86_THREAD_STATE64;
	mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT;
#elif defined(__arm__)
	arm_thread_state_t state = {0};
	thread_state_flavor_t flavor = ARM_THREAD_STATE;
	mach_msg_type_number_t count = ARM_THREAD_STATE_COUNT;
#else
#error _pthread_setup not defined for this architecture
#endif

	if (suspended) {
		(void)thread_get_state(_pthread_kernel_thread(thread),
				     flavor,
				     (thread_state_t)&state,
				     &count);
	}

#if defined(__i386__)
	uintptr_t *sp = vsp;

	state.__eip = (uintptr_t)routine;

	// We need to simulate a 16-byte aligned stack frame as if we had
	// executed a call instruction. Since we're "pushing" one argument,
	// we need to adjust the pointer by 12 bytes (3 * sizeof (int *))
	sp -= 3;			// make sure stack is aligned
	*--sp = (uintptr_t)thread;	// argument to function
	*--sp = 0;			// fake return address
	state.__esp = (uintptr_t)sp;	// set stack pointer
#elif defined(__x86_64__)
	uintptr_t *sp = vsp;

	state.__rip = (uintptr_t)routine;

	// We need to simulate a 16-byte aligned stack frame as if we had
	// executed a call instruction. The stack should already be aligned
	// before it comes to us and we don't need to push any arguments,
	// so we shouldn't need to change it.
	state.__rdi = (uintptr_t)thread;	// argument to function
	*--sp = 0;				// fake return address
	state.__rsp = (uintptr_t)sp;		// set stack pointer
#elif defined(__arm__)
	state.__pc = (uintptr_t)routine;

	// Detect switch to thumb mode.
	if (state.__pc & 1) {
	    state.__pc &= ~1;
	    state.__cpsr |= 0x20; /* PSR_THUMB */
	}

	state.__sp = (uintptr_t)vsp - C_ARGSAVE_LEN - C_RED_ZONE;
	state.__r[0] = (uintptr_t)thread;
#else
#error _pthread_setup not defined for this architecture
#endif

	if (suspended) {
		(void)thread_set_state(_pthread_kernel_thread(thread), flavor, (thread_state_t)&state, count);
		if (needresume) {
			(void)thread_resume(_pthread_kernel_thread(thread));
		}
	} else {
		mach_port_t kernel_thread;
		(void)thread_create_running(mach_task_self(), flavor, (thread_state_t)&state, count, &kernel_thread);
		_pthread_set_kernel_thread(thread, kernel_thread);
	}
}
コード例 #3
0
	mach_error_t
mach_inject(
		const mach_inject_entry	threadEntry,
		const void				*paramBlock,
		size_t					paramSize,
		pid_t					targetProcess,
		vm_size_t				stackSize ) {
	;//assertCodePtr( threadEntry );
	;//assertPtrIfNotNull( paramBlock );
	;//assertPositive( targetProcess );
	;//assertIsTrue( stackSize == 0 || stackSize > 1024 );
	
	//	Find the image.
	const void		*image;
	unsigned long	imageSize;
	mach_error_t	err = machImageForPointer( threadEntry, &image, &imageSize );
	
	//	Initialize stackSize to default if requested.
	if( stackSize == 0 )
		/** @bug We only want an 8K default, fix the plop-in-the-middle code below. */
		stackSize = 16 * 1024;
	
	//	Convert PID to Mach Task ref.
	mach_port_t	remoteTask = 0;
	if( !err )
		err = task_for_pid( mach_task_self(), targetProcess, &remoteTask );
	
	/** @todo	Would be nice to just allocate one block for both the remote stack
				*and* the remoteCode (including the parameter data block once that's
				written.
	*/
	
	//	Allocate the remoteStack.
	vm_address_t remoteStack = 0;
	if( !err )
		err = vm_allocate( remoteTask, &remoteStack, stackSize, 1 );
	
	//	Allocate the code.
	vm_address_t remoteCode = 0;
	if( !err )
		err = vm_allocate( remoteTask, &remoteCode, imageSize, 1 );
	if( !err ) {
		ASSERT_CAST( pointer_t, image );
		err = vm_write( remoteTask, remoteCode, (pointer_t) image, imageSize );
	}
	
	//	Allocate the paramBlock if specified.
	vm_address_t remoteParamBlock = 0;
	if( !err && paramBlock != NULL && paramSize ) {
		err = vm_allocate( remoteTask, &remoteParamBlock, paramSize, 1 );
		if( !err ) {
			ASSERT_CAST( pointer_t, paramBlock );
			err = vm_write( remoteTask, remoteParamBlock, (pointer_t) paramBlock, paramSize );
		}
	}
	
	//	Calculate offsets.
	ptrdiff_t	threadEntryOffset, imageOffset;
	if( !err ) {
		;//assertIsWithinRange( threadEntry, image, image+imageSize );
		ASSERT_CAST( void*, threadEntry );
		threadEntryOffset = ((void*) threadEntry) - image;
		
		ASSERT_CAST( void*, remoteCode );
		imageOffset = ((void*) remoteCode) - image;
	}
	
	//	Allocate the thread.
	thread_act_t remoteThread;
	if( !err ) {
		ppc_thread_state_t remoteThreadState;
		
		/** @bug Stack math should be more sophisticated than this (ala redzone). */
		remoteStack += stackSize / 2;
		
		bzero( &remoteThreadState, sizeof(remoteThreadState) );
		
		ASSERT_CAST( unsigned int, remoteCode );
		remoteThreadState.srr0 = (unsigned int) remoteCode;
		remoteThreadState.srr0 += threadEntryOffset;
		assert( remoteThreadState.srr0 < (remoteCode + imageSize) );
		
		ASSERT_CAST( unsigned int, remoteStack );
		remoteThreadState.r1 = (unsigned int) remoteStack;
		
		ASSERT_CAST( unsigned int, imageOffset );
		remoteThreadState.r3 = (unsigned int) imageOffset;
		
		ASSERT_CAST( unsigned int, remoteParamBlock );
		remoteThreadState.r4 = (unsigned int) remoteParamBlock;
		
		ASSERT_CAST( unsigned int, paramSize );
		remoteThreadState.r5 = (unsigned int) paramSize;
		
		ASSERT_CAST( unsigned int, 0xDEADBEEF );
		remoteThreadState.lr = (unsigned int) 0xDEADBEEF;
		
		//printf( "remoteCode start: %p\n", (void*) remoteCode );
		//printf( "remoteCode size: %ld\n", imageSize );
		//printf( "remoteCode pc: %p\n", (void*) remoteThreadState.srr0 );
		//printf( "remoteCode end: %p\n", (void*) (((char*)remoteCode)+imageSize) );
		fflush(0);
		
		err = thread_create_running( remoteTask, PPC_THREAD_STATE,
				(thread_state_t) &remoteThreadState, PPC_THREAD_STATE_COUNT,
				&remoteThread );
	}
コード例 #4
0
ファイル: inject.c プロジェクト: danzimm/mach_fun
int main(int argc, const char *argv[]) {
  if (argc < 3)
    return -1;
  kern_return_t ret;
  vm_address_t r_libname;
  vm_address_t stack;
  vm_address_t code;
  thread_t thread;
  x86_thread_state64_t state;
  mach_port_t task;
  const char *libname = argv[1];
  unsigned long long stackContents[5], stack_size, i; // the stack contents has to be an odd number of ull's for some reason (some aligning issue) in dlopen
  unsigned char codeContents[38];
  
  bzero(codeContents, sizeof(codeContents));
  bzero(stackContents, sizeof(stackContents));
  codeContents[0] = 0x55; // push rbp
  codeContents[1] = 0x48;
  codeContents[2] = 0x89;
  codeContents[3] = 0xe5; // mov %rsp, %rbp
  codeContents[4] = 0x48;
  codeContents[5] = 0xbf; // mov r_libname, %rdi
  stackContents[1] = (unsigned long long)dlopen;
  stackContents[2] = (unsigned long long)mach_thread_self;
  stackContents[4] = (unsigned long long)thread_suspend;
  stack_size = 65536;
  
  if (strcmp(argv[2], "self") == 0)
    task = mach_task_self();
  else
    ENSURE_SUCCESS(task_for_pid(mach_task_self(), atoi(argv[2]), &task));
  ENSURE_SUCCESS(vm_allocate(task, &r_libname, strlen(libname) + 1, true));
  ENSURE_SUCCESS(vm_allocate(task, &stack, stack_size, true));
  ENSURE_SUCCESS(vm_allocate(task, &code, sizeof(codeContents), true));
  stackContents[0] = code;
  stackContents[3] = (unsigned long long)code + 27;
  ENSURE_SUCCESS(vm_write(task, r_libname, (vm_offset_t)libname, strlen(libname) + 1));
  ENSURE_SUCCESS(vm_write(task, stack + stack_size-sizeof(stackContents), (vm_offset_t)stackContents, sizeof(stackContents)));
  
  memcpy(&codeContents[6], &r_libname, sizeof(unsigned long long));
  codeContents[14] = 0x48;
  codeContents[15] = 0xbe;
  codeContents[16] = 0x2;  // mov 0x2, %rsi
  codeContents[24] = 0x5d; // pop %rbp
  codeContents[25] = 0x90; // nop / int 3 depending if im debugging
  codeContents[26] = 0xc3; // ret
  codeContents[27] = 0x48;
  codeContents[28] = 0x89;
  codeContents[29] = 0xc7;
  codeContents[30] = 0xc3;

  ENSURE_SUCCESS(vm_write(task, code, (vm_offset_t)codeContents, sizeof(codeContents)));
  ENSURE_SUCCESS(vm_protect(task, code, sizeof(codeContents), false, VM_PROT_EXECUTE | VM_PROT_READ));
  printf("Created code region at %p:\n", (void *)code);
  for (i = 0; i < sizeof(codeContents); i++) {
    printf("0x%02x ", codeContents[i]);
  }
  puts("");
  printf("Created stack at %p with top of stack at %p\n", (void*)stack, (void*)(stack + stack_size));
  for (i = 0; i < sizeof(stackContents) / sizeof(stackContents[0]); i++) {
    printf("0x%02llx:\t0x%02llx\n", (stack + stack_size - sizeof(stackContents) + (i * sizeof(unsigned long long))), stackContents[i]);
  }
  bzero(&state, sizeof(state));
  state.__rip = (uint64_t)dlsym(RTLD_DEFAULT, "_pthread_set_self");
  state.__rdi = stack;
  state.__rsp = stack + stack_size-sizeof(stackContents); // end of stack minus returns
  state.__rbp = state.__rsp;
  printf("Found _pthread_set_self at %p\n", (void *)state.__rip);

  ENSURE_SUCCESS(thread_create_running(task, x86_THREAD_STATE64, (thread_state_t)(&state), x86_THREAD_STATE64_COUNT, &thread));
  
  if (strcmp(argv[2], "self") == 0) {
    int rv = pthread_join(*(pthread_t *)stack, NULL);
    if (rv) {
      fprintf(stderr, "pthread_join: (%d) %s\n", rv, strerror(rv));
      sleep(1); // let the dylib actually load in the other thread, it wouldn't appear that there exists mach thread waiting, and I'm too lazy to create a semaphore and using the value of `stack` for a pthread_t in pthread_join doesn't work
    }
  }
}