Пример #1
0
int ckpt_restore_mem(char *node, pid_t gpid, ckpt_desc_t desc)
{
    int i;
    int ret;
    ckpt_mm_t memory;
    struct mm_struct *mm = current->mm;

    log_restore_mem("restoring mem ...");
    if (ckpt_read(desc, &memory, sizeof(ckpt_mm_t)) != sizeof(ckpt_mm_t)) {
        log_err("failed to get mem");
        return -EIO;
    }

    while (mm->mmap) {
        struct vm_area_struct *vma = mm->mmap;

        if (vma->vm_start >= TASK_SIZE)
            break;

        ret = do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
        if (ret) {
            log_err("failed to unmap");
            return ret;
        }
    }

    down_write(&mm->mmap_sem);
    mm->task_size    = TASK_SIZE;
    arch_pick_mmap_layout(mm);
    mm->mmap_base = memory.mmap_base;
    mm->free_area_cache = memory.mmap_base;
    mm->cached_hole_size = ~0UL;
    mm->start_code    = memory.start_code;
    mm->end_code    = memory.end_code;
    mm->start_data    = memory.start_data;
    mm->end_data    = memory.end_data;
    mm->start_brk    = memory.start_brk;
    mm->brk            = memory.brk;
    mm->start_stack = memory.start_stack;
    mm->arg_start    = memory.arg_start;
    mm->arg_end        = memory.arg_end;
    mm->env_start    = memory.env_start;
    mm->env_end        = memory.env_end;
    up_write(&mm->mmap_sem);

    for (i = 0; i < memory.map_count; i++) {
        ret = ckpt_restore_vma(node, gpid, desc);
        if (ret) {
            log_err("failed to restore vma");
            return ret;
        }
    }
    log_restore_pos(desc);
    return 0;
}
static int CVE_2010_0307_linux2_6_27_31_load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
{
	struct file *interpreter = NULL; /* to shut gcc up */
 	unsigned long load_addr = 0, load_bias = 0;
	int load_addr_set = 0;
	char * elf_interpreter = NULL;
	unsigned long error;
	struct elf_phdr *elf_ppnt, *elf_phdata;
	unsigned long elf_bss, elf_brk;
	int elf_exec_fileno;
	int retval, i;
	unsigned int size;
	unsigned long elf_entry;
	unsigned long interp_load_addr = 0;
	unsigned long start_code, end_code, start_data, end_data;
	unsigned long reloc_func_desc = 0;
	int executable_stack = EXSTACK_DEFAULT;
	unsigned long def_flags = 0;
	struct {
		struct elfhdr elf_ex;
		struct elfhdr interp_elf_ex;
	} *loc;

	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
	if (!loc) {
		retval = -ENOMEM;
		goto out_ret;
	}
	
	/* Get the exec-header */
	loc->elf_ex = *((struct elfhdr *)bprm->buf);

	retval = -ENOEXEC;
	/* First of all, some simple consistency checks */
	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
		goto out;

	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
		goto out;
	if (!elf_check_arch(&loc->elf_ex))
		goto out;
	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
		goto out;

	/* Now read in all of the header information */
	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
		goto out;
	if (loc->elf_ex.e_phnum < 1 ||
	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
		goto out;
	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
	retval = -ENOMEM;
	elf_phdata = kmalloc(size, GFP_KERNEL);
	if (!elf_phdata)
		goto out;

	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
			     (char *)elf_phdata, size);
	if (retval != size) {
		if (retval >= 0)
			retval = -EIO;
		goto out_free_ph;
	}

	retval = get_unused_fd();
	if (retval < 0)
		goto out_free_ph;
	get_file(bprm->file);
	fd_install(elf_exec_fileno = retval, bprm->file);

	elf_ppnt = elf_phdata;
	elf_bss = 0;
	elf_brk = 0;

	start_code = ~0UL;
	end_code = 0;
	start_data = 0;
	end_data = 0;

	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
		if (elf_ppnt->p_type == PT_INTERP) {
			/* This is the program interpreter used for
			 * shared libraries - for now assume that this
			 * is an a.out format binary
			 */
			retval = -ENOEXEC;
			if (elf_ppnt->p_filesz > PATH_MAX || 
			    elf_ppnt->p_filesz < 2)
				goto out_free_file;

			retval = -ENOMEM;
			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
						  GFP_KERNEL);
			if (!elf_interpreter)
				goto out_free_file;

			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
					     elf_interpreter,
					     elf_ppnt->p_filesz);
			if (retval != elf_ppnt->p_filesz) {
				if (retval >= 0)
					retval = -EIO;
				goto out_free_interp;
			}
			/* make sure path is NULL terminated */
			retval = -ENOEXEC;
			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
				goto out_free_interp;

			/*
			 * The early SET_PERSONALITY here is so that the lookup
			 * for the interpreter happens in the namespace of the 
			 * to-be-execed image.  SET_PERSONALITY can select an
			 * alternate root.
			 *
			 * However, SET_PERSONALITY is NOT allowed to switch
			 * this task into the new images's memory mapping
			 * policy - that is, TASK_SIZE must still evaluate to
			 * that which is appropriate to the execing application.
			 * This is because exit_mmap() needs to have TASK_SIZE
			 * evaluate to the size of the old image.
			 *
			 * So if (say) a 64-bit application is execing a 32-bit
			 * application it is the architecture's responsibility
			 * to defer changing the value of TASK_SIZE until the
			 * switch really is going to happen - do this in
			 * flush_thread().	- akpm
			 */
			SET_PERSONALITY(loc->elf_ex, 0);

			interpreter = open_exec(elf_interpreter);
			retval = PTR_ERR(interpreter);
			if (IS_ERR(interpreter))
				goto out_free_interp;

			/*
			 * If the binary is not readable then enforce
			 * mm->dumpable = 0 regardless of the interpreter's
			 * permissions.
			 */
			if (file_permission(interpreter, MAY_READ) < 0)
				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;

			retval = kernel_read(interpreter, 0, bprm->buf,
					     BINPRM_BUF_SIZE);
			if (retval != BINPRM_BUF_SIZE) {
				if (retval >= 0)
					retval = -EIO;
				goto out_free_dentry;
			}

			/* Get the exec headers */
			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
			break;
		}
		elf_ppnt++;
	}

	elf_ppnt = elf_phdata;
	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
		if (elf_ppnt->p_type == PT_GNU_STACK) {
			if (elf_ppnt->p_flags & PF_X)
				executable_stack = EXSTACK_ENABLE_X;
			else
				executable_stack = EXSTACK_DISABLE_X;
			break;
		}

	/* Some simple consistency checks for the interpreter */
	if (elf_interpreter) {
		retval = -ELIBBAD;
		/* Not an ELF interpreter */
		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
			goto out_free_dentry;
		/* Verify the interpreter has a valid arch */
		if (!elf_check_arch(&loc->interp_elf_ex))
			goto out_free_dentry;
	} else {
		/* Executables without an interpreter also need a personality  */
		SET_PERSONALITY(loc->elf_ex, 0);
	}

	/* Flush all traces of the currently running executable */
	retval = flush_old_exec(bprm);
	if (retval)
		goto out_free_dentry;

	/* OK, This is the point of no return */
	current->flags &= ~PF_FORKNOEXEC;
	current->mm->def_flags = def_flags;

	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
	   may depend on the personality.  */
	SET_PERSONALITY(loc->elf_ex, 0);
	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
		current->personality |= READ_IMPLIES_EXEC;

	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
		current->flags |= PF_RANDOMIZE;
	arch_pick_mmap_layout(current->mm);

	/* Do this so that we can load the interpreter, if need be.  We will
	   change some of these later */
	current->mm->free_area_cache = current->mm->mmap_base;
	current->mm->cached_hole_size = 0;
	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
				 executable_stack);
	if (retval < 0) {
		send_sig(SIGKILL, current, 0);
		goto out_free_dentry;
	}
	
	current->mm->start_stack = bprm->p;

	/* Now we do a little grungy work by mmaping the ELF image into
	   the correct location in memory. */
	for(i = 0, elf_ppnt = elf_phdata;
	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
		int elf_prot = 0, elf_flags;
		unsigned long k, vaddr;

		if (elf_ppnt->p_type != PT_LOAD)
			continue;

		if (unlikely (elf_brk > elf_bss)) {
			unsigned long nbyte;
	            
			/* There was a PT_LOAD segment with p_memsz > p_filesz
			   before this one. Map anonymous pages, if needed,
			   and clear the area.  */
			retval = set_brk (elf_bss + load_bias,
					  elf_brk + load_bias);
			if (retval) {
				send_sig(SIGKILL, current, 0);
				goto out_free_dentry;
			}
			nbyte = ELF_PAGEOFFSET(elf_bss);
			if (nbyte) {
				nbyte = ELF_MIN_ALIGN - nbyte;
				if (nbyte > elf_brk - elf_bss)
					nbyte = elf_brk - elf_bss;
				if (clear_user((void __user *)elf_bss +
							load_bias, nbyte)) {
					/*
					 * This bss-zeroing can fail if the ELF
					 * file specifies odd protections. So
					 * we don't check the return value
					 */
				}
			}
		}

		if (elf_ppnt->p_flags & PF_R)
			elf_prot |= PROT_READ;
		if (elf_ppnt->p_flags & PF_W)
			elf_prot |= PROT_WRITE;
		if (elf_ppnt->p_flags & PF_X)
			elf_prot |= PROT_EXEC;

		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;

		vaddr = elf_ppnt->p_vaddr;
		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
			elf_flags |= MAP_FIXED;
		} else if (loc->elf_ex.e_type == ET_DYN) {
			/* Try and get dynamic programs out of the way of the
			 * default mmap base, as well as whatever program they
			 * might try to exec.  This is because the brk will
			 * follow the loader, and is not movable.  */
#ifdef CONFIG_X86
			load_bias = 0;
#else
			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
#endif
		}

		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
				elf_prot, elf_flags, 0);
		if (BAD_ADDR(error)) {
			send_sig(SIGKILL, current, 0);
			retval = IS_ERR((void *)error) ?
				PTR_ERR((void*)error) : -EINVAL;
			goto out_free_dentry;
		}

		if (!load_addr_set) {
			load_addr_set = 1;
			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
			if (loc->elf_ex.e_type == ET_DYN) {
				load_bias += error -
				             ELF_PAGESTART(load_bias + vaddr);
				load_addr += load_bias;
				reloc_func_desc = load_bias;
			}
		}
		k = elf_ppnt->p_vaddr;
		if (k < start_code)
			start_code = k;
		if (start_data < k)
			start_data = k;

		/*
		 * Check to see if the section's size will overflow the
		 * allowed task size. Note that p_filesz must always be
		 * <= p_memsz so it is only necessary to check p_memsz.
		 */
		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
		    elf_ppnt->p_memsz > TASK_SIZE ||
		    TASK_SIZE - elf_ppnt->p_memsz < k) {
			/* set_brk can never work. Avoid overflows. */
			send_sig(SIGKILL, current, 0);
			retval = -EINVAL;
			goto out_free_dentry;
		}

		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;

		if (k > elf_bss)
			elf_bss = k;
		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
			end_code = k;
		if (end_data < k)
			end_data = k;
		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
		if (k > elf_brk)
			elf_brk = k;
	}

	loc->elf_ex.e_entry += load_bias;
	elf_bss += load_bias;
	elf_brk += load_bias;
	start_code += load_bias;
	end_code += load_bias;
	start_data += load_bias;
	end_data += load_bias;

	/* Calling set_brk effectively mmaps the pages that we need
	 * for the bss and break sections.  We must do this before
	 * mapping in the interpreter, to make sure it doesn't wind
	 * up getting placed where the bss needs to go.
	 */
	retval = set_brk(elf_bss, elf_brk);
	if (retval) {
		send_sig(SIGKILL, current, 0);
		goto out_free_dentry;
	}
	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
		send_sig(SIGSEGV, current, 0);
		retval = -EFAULT; /* Nobody gets to see this, but.. */
		goto out_free_dentry;
	}

	if (elf_interpreter) {
		unsigned long uninitialized_var(interp_map_addr);

		elf_entry = load_elf_interp(&loc->interp_elf_ex,
					    interpreter,
					    &interp_map_addr,
					    load_bias);
		if (!IS_ERR((void *)elf_entry)) {
			/*
			 * load_elf_interp() returns relocation
			 * adjustment
			 */
			interp_load_addr = elf_entry;
			elf_entry += loc->interp_elf_ex.e_entry;
		}
		if (BAD_ADDR(elf_entry)) {
			force_sig(SIGSEGV, current);
			retval = IS_ERR((void *)elf_entry) ?
					(int)elf_entry : -EINVAL;
			goto out_free_dentry;
		}
		reloc_func_desc = interp_load_addr;

		allow_write_access(interpreter);
		fput(interpreter);
		kfree(elf_interpreter);
	} else {
		elf_entry = loc->elf_ex.e_entry;
		if (BAD_ADDR(elf_entry)) {
			force_sig(SIGSEGV, current);
			retval = -EINVAL;
			goto out_free_dentry;
		}
	}

	kfree(elf_phdata);

	sys_close(elf_exec_fileno);

	set_binfmt(&elf_format);

#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
	retval = arch_setup_additional_pages(bprm, executable_stack);
	if (retval < 0) {
		send_sig(SIGKILL, current, 0);
		goto out;
	}
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */

	compute_creds(bprm);
	current->flags &= ~PF_FORKNOEXEC;
	retval = create_elf_tables(bprm, &loc->elf_ex,
			  load_addr, interp_load_addr);
	if (retval < 0) {
		send_sig(SIGKILL, current, 0);
		goto out;
	}
	/* N.B. passed_fileno might not be initialized? */
	current->mm->end_code = end_code;
	current->mm->start_code = start_code;
	current->mm->start_data = start_data;
	current->mm->end_data = end_data;
	current->mm->start_stack = bprm->p;

#ifdef arch_randomize_brk
	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
		current->mm->brk = current->mm->start_brk =
			arch_randomize_brk(current->mm);
#endif

	if (current->personality & MMAP_PAGE_ZERO) {
		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
		   and some applications "depend" upon this behavior.
		   Since we do not have the power to recompile these, we
		   emulate the SVr4 behavior. Sigh. */
		down_write(&current->mm->mmap_sem);
		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
				MAP_FIXED | MAP_PRIVATE, 0);
		up_write(&current->mm->mmap_sem);
	}

#ifdef ELF_PLAT_INIT
	/*
	 * The ABI may specify that certain registers be set up in special
	 * ways (on i386 %edx is the address of a DT_FINI function, for
	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
	 * that the e_entry field is the address of the function descriptor
	 * for the startup routine, rather than the address of the startup
	 * routine itself.  This macro performs whatever initialization to
	 * the regs structure is required as well as any relocations to the
	 * function descriptor entries when executing dynamically links apps.
	 */
	ELF_PLAT_INIT(regs, reloc_func_desc);
#endif

	start_thread(regs, elf_entry, bprm->p);
	retval = 0;
out:
	kfree(loc);
out_ret:
	return retval;

	/* error cleanup */
out_free_dentry:
	allow_write_access(interpreter);
	if (interpreter)
		fput(interpreter);
out_free_interp:
	kfree(elf_interpreter);
out_free_file:
	sys_close(elf_exec_fileno);
out_free_ph:
	kfree(elf_phdata);
	goto out;
}
Пример #3
0
static int load_exeso_binary(struct linux_binprm *bprm, struct pt_regs *regs)
{
    struct elfhdr *elf_ex;
    struct elf_phdr *elf_phdata = NULL;
    struct mm_struct *mm;
    unsigned long load_addr = 0;
    unsigned long error;
    int retval = 0;
    unsigned long pe_entry, ntdll_load_addr = 0;
    unsigned long start_code, end_code, start_data, end_data;
    unsigned long ntdll_entry;
    int executable_stack = EXSTACK_DEFAULT;
    unsigned long def_flags = 0;
    unsigned long stack_top;
#ifdef NTDLL_SO
    unsigned long	interp_load_addr;
    unsigned long	interp_entry;
#endif
    struct eprocess	*process;
    struct ethread	*thread;
    PRTL_USER_PROCESS_PARAMETERS	ppb;
    OBJECT_ATTRIBUTES	ObjectAttributes;
    INITIAL_TEB	init_teb;

    BOOLEAN is_win32=FALSE;
    struct startup_info *info=NULL;
    struct eprocess	*parent_eprocess=NULL;
    struct ethread	*parent_ethread=NULL;
    struct w32process* child_w32process =NULL;
    struct w32process* parent_w32process =NULL;

    elf_ex = (struct elfhdr *)bprm->buf;
    retval = -ENOEXEC;
    /* First of all, some simple consistency checks */
    if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
        goto out;
    if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
        goto out;
    if (!elf_check_arch(elf_ex))
        goto out;
    if (!bprm->file->f_op||!bprm->file->f_op->mmap)
        goto out;

    if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
        goto out;
    if (elf_ex->e_phnum < 1 ||
            elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
        goto out;

    if(!check_exeso(bprm))
        goto out;

    start_code = ~0UL;
    end_code = 0;
    start_data = 0;
    end_data = 0;

    if(current->parent->ethread)
    {
        is_win32 = TRUE;
        parent_ethread = current->parent->ethread;
        parent_eprocess = parent_ethread->threads_process;
    }

    /* Flush all traces of the currently running executable */
    retval = flush_old_exec(bprm);
    if (retval) {
        goto out;
    }

    /* OK, This is the point of no return */
    mm = current->mm;
    current->flags &= ~PF_FORKNOEXEC;
    mm->def_flags = def_flags;

    current->signal->rlim[RLIMIT_STACK].rlim_cur = WIN32_STACK_LIMIT;
    current->signal->rlim[RLIMIT_STACK].rlim_max = WIN32_STACK_LIMIT;
    current->personality |= ADDR_COMPAT_LAYOUT;
    arch_pick_mmap_layout(mm);

    /* Do this so that we can load the ntdll, if need be.  We will
       change some of these later */
    mm->free_area_cache = mm->mmap_base = WIN32_UNMAPPED_BASE;
    mm->cached_hole_size = 0;
    stack_top = WIN32_STACK_LIMIT + WIN32_LOWEST_ADDR;
    retval = setup_arg_pages(bprm, stack_top, executable_stack);
    if (retval < 0)
        goto out_free_file;

    down_write(&mm->mmap_sem);
    /* reserve first 0x100000 */
    do_mmap_pgoff(NULL, 0, WIN32_LOWEST_ADDR, PROT_NONE, MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, 0);
    /* reserve first 0x7fff0000 - 0x80000000 */
    do_mmap_pgoff(NULL, WIN32_TASK_SIZE - 0x10000, 0x10000,
                  PROT_NONE, MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, 0);
    /* reserve first 0x81000000 - 0xc0000000
     * 0x80000000 - 0x81000000 used for wine SYSTEM_HEAP */
    do_mmap_pgoff(NULL, WIN32_TASK_SIZE + WIN32_SYSTEM_HEAP_SIZE,
                  TASK_SIZE - WIN32_TASK_SIZE - WIN32_SYSTEM_HEAP_SIZE,
                  PROT_NONE, MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, 0);
    up_write(&mm->mmap_sem);

#ifdef NTDLL_SO
    /* search ntdll.dll.so in $PATH, default is /usr/local/lib/wine/ntdll.dll.so */
    if (!*ntdll_name)
        search_ntdll();

    /* map ntdll.dll.so */
    map_system_dll(current, ntdll_name, &ntdll_load_addr, &interp_load_addr);

    pe_entry = get_pe_entry();
    ntdll_entry = get_ntdll_entry();
    interp_entry = get_interp_entry();
#endif

    set_binfmt(&exeso_format);

#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
    retval = arch_setup_additional_pages(bprm, executable_stack);
    if (retval < 0) {
        goto out_free_file;
    }
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */

    install_exec_creds(bprm);
    current->flags &= ~PF_FORKNOEXEC;

#ifdef NTDLL_SO
    /* copy argv, env, and auxvec to stack, all for interpreter */
    create_elf_tables_aux(bprm,
                          ntdll_load_addr, ntdll_phoff, ntdll_phnum, get_ntdll_start_thunk(),
                          load_addr, elf_ex->e_phoff, elf_ex->e_phnum, 0,
                          interp_load_addr, interp_entry, 0);
#endif

    mm->end_code = end_code;
    mm->start_code = start_code;
    mm->start_data = start_data;
    mm->end_data = end_data;
    mm->start_stack = bprm->p;

    if (current->personality & MMAP_PAGE_ZERO) {
        /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
           and some applications "depend" upon this behavior.
           Since we do not have the power to recompile these, we
           emulate the SVr4 behavior.  Sigh.  */
        down_write(&mm->mmap_sem);
        error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
                        MAP_FIXED | MAP_PRIVATE, 0);
        up_write(&mm->mmap_sem);
    }


    /* create win-related structure */
    INIT_OBJECT_ATTR(&ObjectAttributes, NULL, 0, NULL, NULL);

    /* Create EPROCESS */
    retval = create_object(KernelMode,
                           process_object_type,
                           &ObjectAttributes,
                           KernelMode,
                           NULL,
                           sizeof(struct eprocess),
                           0,
                           0,
                           (PVOID *)&process);
    if (retval != STATUS_SUCCESS) {
        goto out_free_file;
    }

    /* init eprocess */
    eprocess_init(NULL, FALSE, process);
    process->unique_processid = create_cid_handle(process, process_object_type);
    if (!process->unique_processid)
        goto out_free_eproc;

    /* initialize EProcess and KProcess */
    process->section_base_address = (void *)load_addr;

    /* FIXME: PsCreateCidHandle */

    /* Create PEB */
    if ((retval = create_peb(process)))
        goto out_free_process_cid;

    /* Create PPB */
    if(is_win32 == FALSE)
    {
        create_ppb(&ppb, process, bprm, bprm->filename, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
        ((PEB *)process->peb)->ProcessParameters = ppb;
    }
    /* allocate a Win32 thread object */
    retval = create_object(KernelMode,
                           thread_object_type,
                           &ObjectAttributes,
                           KernelMode,
                           NULL,
                           sizeof(struct ethread),
                           0,
                           0,
                           (PVOID *)&thread);
    if (retval) {
        goto out_free_process_cid;
    }

    thread->cid.unique_thread = create_cid_handle(thread, thread_object_type);
    thread->cid.unique_process = process->unique_processid;
    if (!thread->cid.unique_thread)
        goto out_free_ethread;

    /* set the teb */
    init_teb.StackBase = (PVOID)(bprm->p);
    init_teb.StackLimit = (PVOID)WIN32_LOWEST_ADDR + PAGE_SIZE;
    thread->tcb.teb = create_teb(process, (PCLIENT_ID)&thread->cid, &init_teb);
    if (IS_ERR(thread->tcb.teb)) {
        retval = PTR_ERR(thread->tcb.teb);
        goto out_free_thread_cid;
    }

    /* Init KThreaad */
    ethread_init(thread, process, current);

    sema_init(&thread->exec_semaphore,0);
    if (is_win32 == TRUE) //parent is a windows process
    {
        down(&thread->exec_semaphore);  //wait for the parent

        child_w32process = process->win32process;
        parent_w32process = parent_eprocess->win32process;
        info = child_w32process->startup_info;

        //now parent has finished its work
        if(thread->inherit_all)
        {
            create_handle_table(parent_eprocess, TRUE, process);
            child_w32process = create_w32process(parent_w32process, TRUE, process);
        }
    }

    deref_object(process);
    deref_object(thread);

    set_teb_selector(current, (long)thread->tcb.teb);

    thread->start_address = (void *)pe_entry;	/* FIXME */

    /* save current trap frame */
    thread->tcb.trap_frame = (struct ktrap_frame *)regs;

    /* init apc, to call LdrInitializeThunk */
#if 0
    thread_apc = kmalloc(sizeof(KAPC), GFP_KERNEL);
    if (!thread_apc) {
        retval = -ENOMEM;
        goto out_free_thread_cid;
    }
    apc_init(thread_apc,
             &thread->tcb,
             OriginalApcEnvironment,
             thread_special_apc,
             NULL,
             (PKNORMAL_ROUTINE)ntdll_entry,
             UserMode,
             (void *)(bprm->p + 12));
    insert_queue_apc(thread_apc, (void *)interp_entry, (void *)extra_page, IO_NO_INCREMENT);
#ifndef TIF_APC
#define	TIF_APC	13
#endif
    set_tsk_thread_flag(current, TIF_APC);
#endif

#ifdef ELF_PLAT_INIT
    /*
     * The ABI may specify that certain registers be set up in special
     * ways (on i386 %edx is the address of a DT_FINI function, for
     * example.  In addition, it may also specify (eg, PowerPC64 ELF)
     * that the e_entry field is the address of the function descriptor
     * for the startup routine, rather than the address of the startup
     * routine itself.  This macro performs whatever initialization to
     * the regs structure is required as well as any relocations to the
     * function descriptor entries when executing dynamically links apps.
     */
    ELF_PLAT_INIT(regs, reloc_func_desc);
#endif

    start_thread(regs, interp_entry, bprm->p);
    if (unlikely(current->ptrace & PT_PTRACED)) {
        if (current->ptrace & PT_TRACE_EXEC)
            ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
        else
            send_sig(SIGTRAP, current, 0);
    }

    retval = 0;

    try_module_get(THIS_MODULE);

    /* return from w32syscall_exit, not syscall_exit */
    ((unsigned long *)regs)[-1] = (unsigned long)w32syscall_exit;
    regs->fs = TEB_SELECTOR;

out:
    if(elf_phdata)
        kfree(elf_phdata);
    return retval;

    /* error cleanup */
out_free_thread_cid:
    delete_cid_handle(thread->cid.unique_thread, thread_object_type);
out_free_ethread:
    deref_object(thread);
out_free_process_cid:
    delete_cid_handle(process->unique_processid, process_object_type);
out_free_eproc:
    deref_object(process);
out_free_file:
    send_sig(SIGKILL, current, 0);
    goto out;
}