/*
 * Create a new mm_struct and populate it with a temporary stack
 * vm_area_struct.  We don't have enough context at this point to set the stack
 * flags, permissions, and offset, so we use temporary values.  We'll update
 * them later in setup_arg_pages().
 */
int bprm_mm_init(struct linux_binprm *bprm)
{
	int err;
	struct mm_struct *mm = NULL;

	bprm->mm = mm = mm_alloc();
	err = -ENOMEM;
	if (!mm)
		goto err;

	err = init_new_context(current, mm);
	if (err)
		goto err;

	err = __bprm_mm_init(bprm);
	if (err)
		goto err;

	return 0;

err:
	if (mm) {
		bprm->mm = NULL;
		mmdrop(mm);
	}

	return err;
}
예제 #2
0
파일: async_pf.c 프로젝트: 908626950/linux
static void async_pf_execute(struct work_struct *work)
{
	struct kvm_async_pf *apf =
		container_of(work, struct kvm_async_pf, work);
	struct mm_struct *mm = apf->mm;
	struct kvm_vcpu *vcpu = apf->vcpu;
	unsigned long addr = apf->addr;
	gva_t gva = apf->gva;

	might_sleep();

	use_mm(mm);
	down_read(&mm->mmap_sem);
	get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
	up_read(&mm->mmap_sem);
	kvm_async_page_present_sync(vcpu, apf);
	unuse_mm(mm);

	spin_lock(&vcpu->async_pf.lock);
	list_add_tail(&apf->link, &vcpu->async_pf.done);
	spin_unlock(&vcpu->async_pf.lock);

	/*
	 * apf may be freed by kvm_check_async_pf_completion() after
	 * this point
	 */

	trace_kvm_async_pf_completed(addr, gva);

	if (waitqueue_active(&vcpu->wq))
		wake_up_interruptible(&vcpu->wq);

	mmdrop(mm);
	kvm_put_kvm(vcpu->kvm);
}
int mm_remove_object (void *object,
		      struct kddm_set *set,
		      objid_t objid)
{
	struct mm_struct *mm = object;

	/* Ensure that no thread uses this signal_struct copy */
	down_write(&mm->remove_sem);
	up_write(&mm->remove_sem);

	/* Take the mmap_sem to avoid race condition with clean_up_mm_struct */

	atomic_inc(&mm->mm_count);
	down_write(&mm->mmap_sem);

	mmput(mm);

	up_write(&mm->mmap_sem);

	mm->mm_id = 0;
	krgnodes_clear(mm->copyset);

	mmdrop(mm);

	return 0;
}
예제 #4
0
파일: async_pf.c 프로젝트: 908626950/linux
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
{
	/* cancel outstanding work queue item */
	while (!list_empty(&vcpu->async_pf.queue)) {
		struct kvm_async_pf *work =
			list_entry(vcpu->async_pf.queue.next,
				   typeof(*work), queue);
		list_del(&work->queue);

#ifdef CONFIG_KVM_ASYNC_PF_SYNC
		flush_work(&work->work);
#else
		if (cancel_work_sync(&work->work)) {
			mmdrop(work->mm);
			kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
			kmem_cache_free(async_pf_cache, work);
		}
#endif
	}

	spin_lock(&vcpu->async_pf.lock);
	while (!list_empty(&vcpu->async_pf.done)) {
		struct kvm_async_pf *work =
			list_entry(vcpu->async_pf.done.next,
				   typeof(*work), link);
		list_del(&work->link);
		kmem_cache_free(async_pf_cache, work);
	}
	spin_unlock(&vcpu->async_pf.lock);

	vcpu->async_pf.queued = 0;
}
예제 #5
0
파일: async_pf.c 프로젝트: cgvarela/fvm
static void async_pf_execute(struct work_struct *work)
{
	struct page *page = NULL;
	struct vmmr0_async_pf *apf =
		container_of(work, struct vmmr0_async_pf, work);
	struct mm_struct *mm = apf->mm;
	struct vmmr0_vcpu *vcpu = apf->vcpu;
	unsigned long addr = apf->addr;

	might_sleep();

	vmmr0_use_mm(mm);
	down_read(&mm->mmap_sem);
	get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
	up_read(&mm->mmap_sem);
	vmmr0_unuse_mm(mm);

	spin_lock(&vcpu->async_pf.lock);
	list_add_tail(&apf->link, &vcpu->async_pf.done);
	apf->page = page;
	apf->done = true;
	spin_unlock(&vcpu->async_pf.lock);

	if (waitqueue_active(&vcpu->wq))
		wake_up_interruptible(&vcpu->wq);

	mmdrop(mm);
	vmmr0_put_vm(vcpu->pvm);
}
예제 #6
0
static void
__i915_mm_struct_free__worker(struct work_struct *work)
{
	struct i915_mm_struct *mm = container_of(work, typeof(*mm), work);
	i915_mmu_notifier_free(mm->mn, mm->mm);
	mmdrop(mm->mm);
	kfree(mm);
}
예제 #7
0
static int map_release(struct inode *inode, struct file *file)
{
	struct seq_file *seq = file->private_data;
	struct proc_maps_private *priv = seq->private;

	if (priv->mm)
		mmdrop(priv->mm);

	return seq_release_private(inode, file);
}
예제 #8
0
/*
 * Decrement the use count and release all resources for an mm.
 */
void mmput(struct mm_struct *mm)
{
	if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {
		list_del(&mm->mmlist);
		mmlist_nr--;
		spin_unlock(&mmlist_lock);
		exit_aio(mm);
		exit_mmap(mm);
		mmdrop(mm);
	}
}
예제 #9
0
void end_lazy_tlb(struct mm_struct *mm)
{
	struct mm_struct *active_mm = current->active_mm;

	current->mm = mm;
	if (mm != active_mm) {
		current->active_mm = mm;
		activate_mm(active_mm, mm);
	}
	mmdrop(active_mm);
}
예제 #10
0
/* Ensure we run on the idle task page tables so that we will
   switch page tables before running user space. This is needed
   on architectures with separate kernel and user page tables
   because the user page table pointer is not saved/restored. */
static void switch_idle_mm(void)
{
	struct mm_struct *mm = current->active_mm;

	if (mm == &init_mm)
		return;

	atomic_inc(&init_mm.mm_count);
	switch_mm(mm, &init_mm, current);
	current->active_mm = &init_mm;
	mmdrop(mm);
}
예제 #11
0
/*
 * Decrement the use count and release all resources for an mm.
 */
void mmput(struct mm_struct *mm)
{
	if (atomic_dec_and_test(&mm->mm_users)) {
		exit_aio(mm);
		exit_mmap(mm);
		if (!list_empty(&mm->mmlist)) {
			spin_lock(&mmlist_lock);
			list_del(&mm->mmlist);
			spin_unlock(&mmlist_lock);
		}
		put_swap_token(mm);
		mmdrop(mm);
	}
}
예제 #12
0
void end_lazy_tlb(struct mm_struct *mm)
{
    struct mm_struct *active_mm = current->active_mm;

#ifdef CONFIG_PREEMPT
    if (preempt_is_disabled() == 0)
        BUG();
#endif
    current->mm = mm;
    if (mm != active_mm) {
        current->active_mm = mm;
        activate_mm(active_mm, mm);
    }
    mmdrop(active_mm);
}
예제 #13
0
파일: async_pf.c 프로젝트: cgvarela/fvm
int vmmr0_setup_async_pf(struct vmmr0_vcpu *vcpu, gva_t gva, gfn_t gfn,
		       struct vmmr0_arch_async_pf *arch)
{
	struct vmmr0_async_pf *work;

	if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU)
		return 0;

	/* setup delayed work */

	/*
	 * do alloc nowait since if we are going to sleep anyway we
	 * may as well sleep faulting in page
	 */
	work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT);
	if (!work)
		return 0;

	work->page = NULL;
	work->done = false;
	work->vcpu = vcpu;
	work->gva = gva;
	work->addr = mmu_gfn_to_hva(vcpu->pvm, gfn);
	work->arch = *arch;
	work->mm = current->mm;
	atomic_inc(&work->mm->mm_count);
	vmmr0_get_vm(work->vcpu->pvm);

	/* this can't really happen otherwise mmu_gfn_to_pfn_async
	   would succeed */
	if (unlikely(vmmr0_is_error_hva(work->addr)))
		goto retry_sync;

	INIT_WORK(&work->work, async_pf_execute);
	if (!schedule_work(&work->work))
		goto retry_sync;

	list_add_tail(&work->queue, &vcpu->async_pf.queue);
	vcpu->async_pf.queued++;
	vmmr0_arch_async_page_not_present(vcpu, work);
	return 1;
retry_sync:
	vmmr0_put_vm(work->vcpu->pvm);
	mmdrop(work->mm);
	kmem_cache_free(async_pf_cache, work);
	return 0;
}
예제 #14
0
/*
 * use_mm
 *	Makes the calling kernel thread take on the specified
 *	mm context.
 *	Called by the retry thread execute retries within the
 *	iocb issuer's mm context, so that copy_from/to_user
 *	operations work seamlessly for aio.
 *	(Note: this routine is intended to be called only
 *	from a kernel thread context)
 */
void use_mm(struct mm_struct *mm)
{
	struct mm_struct *active_mm;
	struct task_struct *tsk = current;

	task_lock(tsk);
	active_mm = tsk->active_mm;
	if (active_mm != mm) {
		atomic_inc(&mm->mm_count);
		tsk->active_mm = mm;
	}
	tsk->mm = mm;
	switch_mm(active_mm, mm, tsk);
	task_unlock(tsk);

	if (active_mm != mm)
		mmdrop(active_mm);
}
예제 #15
0
/*
 * Decrement the use count and release all resources for an mm.
 */
void mmput(struct mm_struct *mm)
{
	might_sleep();

	if (atomic_dec_and_test(&mm->mm_users)) {
		exit_aio(mm);
		ksm_exit(mm);
		exit_mmap(mm);
		set_mm_exe_file(mm, NULL);
		if (!list_empty(&mm->mmlist)) {
			spin_lock(&mmlist_lock);
			list_del(&mm->mmlist);
			spin_unlock(&mmlist_lock);
		}
		put_swap_token(mm);
		mmdrop(mm);
	}
}
예제 #16
0
static void do_exit_flush_lazy_tlb(void *arg)
{
	struct mm_struct *mm = arg;
	unsigned long pid = mm->context.id;

	if (current->mm == mm)
		return; /* Local CPU */

	if (current->active_mm == mm) {
		/*
		 * Must be a kernel thread because sender is single-threaded.
		 */
		BUG_ON(current->mm);
		mmgrab(&init_mm);
		switch_mm(mm, &init_mm, current);
		current->active_mm = &init_mm;
		mmdrop(mm);
	}
	_tlbiel_pid(pid, RIC_FLUSH_ALL);
}
예제 #17
0
/*
 * Decrement the use count and release all resources for an mm.
 */
void mmput(struct mm_struct *mm)
{
	might_sleep();

	if (atomic_dec_and_test(&mm->mm_users)) {
		exit_aio(mm);
		ksm_exit(mm);
		khugepaged_exit(mm); /* must run before exit_mmap */
		exit_mmap(mm);
		set_mm_exe_file(mm, NULL);
		if (!list_empty(&mm->mmlist)) {
			spin_lock(&mmlist_lock);
			list_del(&mm->mmlist);
			spin_unlock(&mmlist_lock);
		}
		if (mm->binfmt)
			module_put(mm->binfmt->module);
		mmdrop(mm);
	}
}
예제 #18
0
/*
 * use_mm
 *	Makes the calling kernel thread take on the specified
 *	mm context.
 *	Called by the retry thread execute retries within the
 *	iocb issuer's mm context, so that copy_from/to_user
 *	operations work seamlessly for aio.
 *	(Note: this routine is intended to be called only
 *	from a kernel thread context)
 */
void use_mm(struct mm_struct *mm)
{
	struct mm_struct *active_mm;
	struct task_struct *tsk = current;
	unsigned long flags;

	task_lock(tsk);
	active_mm = tsk->active_mm;
 	ipipe_mm_switch_protect(flags);
	if (active_mm != mm) {
		atomic_inc(&mm->mm_count);
		tsk->active_mm = mm;
	}
	tsk->mm = mm;
	__switch_mm(active_mm, mm, tsk);
 	ipipe_mm_switch_unprotect(flags);
	task_unlock(tsk);

	if (active_mm != mm)
		mmdrop(active_mm);
}
예제 #19
0
int mmput(struct mm_struct *mm)
{
	int mm_freed = 0;
	might_sleep();

	if (atomic_dec_and_test(&mm->mm_users)) {
		exit_aio(mm);
		ksm_exit(mm);
		khugepaged_exit(mm); 
		exit_mmap(mm);
		set_mm_exe_file(mm, NULL);
		if (!list_empty(&mm->mmlist)) {
			spin_lock(&mmlist_lock);
			list_del(&mm->mmlist);
			spin_unlock(&mmlist_lock);
		}
		if (mm->binfmt)
			module_put(mm->binfmt->module);
		mmdrop(mm);
		mm_freed = 1;
	}
	return mm_freed;
}
예제 #20
0
/*
 * This creates a new process as a copy of the old one,
 * but does not actually start it yet.
 *
 * It copies the registers, and all the appropriate
 * parts of the process environment (as per the clone
 * flags).  The actual kick-off is left to the caller.
 */
struct task_struct *copy_process(unsigned long clone_flags,
			         unsigned long stack_start,
			         struct pt_regs *regs,
			         unsigned long stack_size,
			         int *parent_tidptr,
			         int *child_tidptr)
{
	int retval;
	struct task_struct *p = NULL;

	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
		return ERR_PTR(-EINVAL);

	/*
	 * Thread groups must share signals as well, and detached threads
	 * can only be started up within the thread group.
	 */
	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
		return ERR_PTR(-EINVAL);
	if ((clone_flags & CLONE_DETACHED) && !(clone_flags & CLONE_THREAD))
		return ERR_PTR(-EINVAL);
	if (!(clone_flags & CLONE_DETACHED) && (clone_flags & CLONE_THREAD))
		return ERR_PTR(-EINVAL);

	retval = -ENOMEM;
	p = dup_task_struct(current);
	if (!p)
		goto fork_out;

	p->tux_info = NULL;

	retval = -EAGAIN;

	/*
	 * Increment user->__count before the rlimit test so that it would
	 * be correct if we take the bad_fork_free failure path.
	 */
	atomic_inc(&p->user->__count);
	if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur) {
		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE))
			goto bad_fork_free;
	}

	atomic_inc(&p->user->processes);

	/*
	 * Counter increases are protected by
	 * the kernel lock so nr_threads can't
	 * increase under us (but it may decrease).
	 */
	if (nr_threads >= max_threads)
		goto bad_fork_cleanup_count;
	
	get_exec_domain(p->exec_domain);

	if (p->binfmt && p->binfmt->module)
		__MOD_INC_USE_COUNT(p->binfmt->module);

	p->did_exec = 0;
	p->swappable = 0;
	p->state = TASK_UNINTERRUPTIBLE;

	copy_flags(clone_flags, p);
	if (clone_flags & CLONE_IDLETASK)
		p->pid = 0;
	else {
		p->pid = alloc_pidmap();
		if (p->pid == -1)
			goto bad_fork_cleanup;
	}
	
	retval = -EFAULT;
	if (clone_flags & CLONE_PARENT_SETTID)
		if (put_user(p->pid, parent_tidptr))
			goto bad_fork_cleanup;

	INIT_LIST_HEAD(&p->run_list);

	INIT_LIST_HEAD(&p->children);
	INIT_LIST_HEAD(&p->sibling);
	init_waitqueue_head(&p->wait_chldexit);
	p->vfork_done = NULL;
	spin_lock_init(&p->alloc_lock);
	spin_lock_init(&p->switch_lock);

	p->sigpending = 0;
	init_sigpending(&p->pending);

	p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
	p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
	init_timer(&p->real_timer);
	p->real_timer.data = (unsigned long) p;

	p->leader = 0;		/* session leadership doesn't inherit */
	p->tty_old_pgrp = 0;
	memset(&p->utime, 0, sizeof(p->utime));
	memset(&p->stime, 0, sizeof(p->stime));
	memset(&p->cutime, 0, sizeof(p->cutime));
	memset(&p->cstime, 0, sizeof(p->cstime));
	memset(&p->group_utime, 0, sizeof(p->group_utime));
	memset(&p->group_stime, 0, sizeof(p->group_stime));
	memset(&p->group_cutime, 0, sizeof(p->group_cutime));
	memset(&p->group_cstime, 0, sizeof(p->group_cstime));

#ifdef CONFIG_SMP
	memset(&p->per_cpu_utime, 0, sizeof(p->per_cpu_utime));
	memset(&p->per_cpu_stime, 0, sizeof(p->per_cpu_stime));
#endif

	memset(&p->timing_state, 0, sizeof(p->timing_state));
	p->timing_state.type = PROCESS_TIMING_USER;
	p->last_sigxcpu = 0;
	p->array = NULL;
	p->lock_depth = -1;		/* -1 = no lock */
	p->start_time = jiffies;

	retval = -ENOMEM;
	/* copy all the process information */
	if (copy_files(clone_flags, p))
		goto bad_fork_cleanup;
	if (copy_fs(clone_flags, p))
		goto bad_fork_cleanup_files;
	if (copy_sighand(clone_flags, p))
		goto bad_fork_cleanup_fs;
	if (copy_signal(clone_flags, p))
		goto bad_fork_cleanup_sighand;
	if (copy_mm(clone_flags, p))
		goto bad_fork_cleanup_signal;
	if (copy_namespace(clone_flags, p))
		goto bad_fork_cleanup_mm;
	retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
	if (retval)
		goto bad_fork_cleanup_namespace;
	p->semundo = NULL;

	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID)
		? child_tidptr : NULL;
	/*
	 * Clear TID on mm_release()?
	 */
	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID)
		? child_tidptr : NULL;

	/* Our parent execution domain becomes current domain
	   These must match for thread signalling to apply */
	   
	p->parent_exec_id = p->self_exec_id;

	/* ok, now we should be set up.. */
	p->swappable = 1;
	if (clone_flags & CLONE_DETACHED)
		p->exit_signal = -1;
	else
		p->exit_signal = clone_flags & CSIGNAL;
	p->pdeath_signal = 0;

	/*
	 * Share the timeslice between parent and child, thus the
	 * total amount of pending timeslices in the system doesnt change,
	 * resulting in more scheduling fairness.
	 */
	local_irq_disable();
	p->time_slice = (current->time_slice + 1) >> 1;
	p->first_time_slice = 1;
	/*
	 * The remainder of the first timeslice might be recovered by
	 * the parent if the child exits early enough.
	 */
	current->time_slice >>= 1;
	p->last_run = jiffies;
	if (!current->time_slice) {
		/*
		 * This case is rare, it happens when the parent has only
		 * a single jiffy left from its timeslice. Taking the
		 * runqueue lock is not a problem.
		 */
		current->time_slice = 1;
		scheduler_tick(0 /* don't update the time stats */);
	}
	local_irq_enable();

	if ((int)current->time_slice <= 0)
		BUG();
	if ((int)p->time_slice <= 0)
		BUG();

	/*
	 * Ok, add it to the run-queues and make it
	 * visible to the rest of the system.
	 *
	 * Let it rip!
	 */
	p->tgid = p->pid;
	p->group_leader = p;
	INIT_LIST_HEAD(&p->ptrace_children);
	INIT_LIST_HEAD(&p->ptrace_list);

	/* Need tasklist lock for parent etc handling! */
	write_lock_irq(&tasklist_lock);
	/*
	 * Check for pending SIGKILL! The new thread should not be allowed
	 * to slip out of an OOM kill. (or normal SIGKILL.)
	 */
	if (sigismember(&current->pending.signal, SIGKILL)) {
		write_unlock_irq(&tasklist_lock);
		retval = -EINTR;
		goto bad_fork_cleanup_namespace;
	}

	/* CLONE_PARENT re-uses the old parent */
	if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
		p->real_parent = current->real_parent;
	else
		p->real_parent = current;
	p->parent = p->real_parent;

	if (clone_flags & CLONE_THREAD) {
		spin_lock(&current->sighand->siglock);
		/*
		 * Important: if an exit-all has been started then
		 * do not create this new thread - the whole thread
		 * group is supposed to exit anyway.
		 */
		if (current->signal->group_exit) {
			spin_unlock(&current->sighand->siglock);
			write_unlock_irq(&tasklist_lock);
			retval = -EINTR;
			goto bad_fork_cleanup_namespace;
		}
		p->tgid = current->tgid;
		p->group_leader = current->group_leader;

		if (current->signal->group_stop_count > 0) {
			/*
			 * There is an all-stop in progress for the group.
			 * We ourselves will stop as soon as we check signals.
			 * Make the new thread part of that group stop too.
			 */
			current->signal->group_stop_count++;
			p->sigpending = 1;
		}

		spin_unlock(&current->sighand->siglock);
	}

	SET_LINKS(p);
	if (p->ptrace & PT_PTRACED)
		__ptrace_link(p, current->parent);

	attach_pid(p, PIDTYPE_PID, p->pid);
	if (thread_group_leader(p)) {
		attach_pid(p, PIDTYPE_TGID, p->tgid);
		attach_pid(p, PIDTYPE_PGID, p->pgrp);
		attach_pid(p, PIDTYPE_SID, p->session);
	} else {
		link_pid(p, p->pids + PIDTYPE_TGID,
			&p->group_leader->pids[PIDTYPE_TGID].pid);
	}

	/* clear controlling tty of new task if parent's was just cleared */
	if (!current->tty && p->tty)
		p->tty = NULL;

	nr_threads++;
	write_unlock_irq(&tasklist_lock);
	retval = 0;

fork_out:
	if (retval)
		return ERR_PTR(retval);
	return p;

bad_fork_cleanup_namespace:
	exit_namespace(p);
bad_fork_cleanup_mm:
	exit_mm(p);
	if (p->active_mm)
		mmdrop(p->active_mm);
bad_fork_cleanup_signal:
	exit_signal(p);
bad_fork_cleanup_sighand:
	exit_sighand(p);
bad_fork_cleanup_fs:
	exit_fs(p); /* blocking */
bad_fork_cleanup_files:
	exit_files(p); /* blocking */
bad_fork_cleanup:
	if (p->pid > 0)
		free_pidmap(p->pid);
	put_exec_domain(p->exec_domain);
	if (p->binfmt && p->binfmt->module)
		__MOD_DEC_USE_COUNT(p->binfmt->module);
bad_fork_cleanup_count:
	atomic_dec(&p->user->processes);
bad_fork_free:
	p->state = TASK_ZOMBIE; /* debug */
	atomic_dec(&p->usage);
	put_task_struct(p);
	goto fork_out;
}
예제 #21
0
/*
 * This creates a new process as a copy of the old one,
 * but does not actually start it yet.
 *
 * It copies the registers, and all the appropriate
 * parts of the process environment (as per the clone
 * flags). The actual kick-off is left to the caller.
 */
struct task_struct *copy_process(unsigned long clone_flags,
				 unsigned long stack_start,
				 struct pt_regs *regs,
				 unsigned long stack_size,
				 int __user *parent_tidptr,
				 int __user *child_tidptr)
{
	int retval;
	struct task_struct *p = NULL;

	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
		return ERR_PTR(-EINVAL);

	/*
	 * Thread groups must share signals as well, and detached threads
	 * can only be started up within the thread group.
	 */
	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
		return ERR_PTR(-EINVAL);

	/*
	 * Shared signal handlers imply shared VM. By way of the above,
	 * thread groups also imply shared VM. Blocking this case allows
	 * for various simplifications in other code.
	 */
	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
		return ERR_PTR(-EINVAL);

	retval = security_task_create(clone_flags);
	if (retval)
		goto fork_out;

	retval = -ENOMEM;
	p = dup_task_struct(current);
	if (!p)
		goto fork_out;

	retval = -EAGAIN;
	if (atomic_read(&p->user->processes) >=
			p->rlim[RLIMIT_NPROC].rlim_cur) {
		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
				p->user != &root_user)
			goto bad_fork_free;
	}

	atomic_inc(&p->user->__count);
	atomic_inc(&p->user->processes);
	get_group_info(p->group_info);

	/*
	 * If multiple threads are within copy_process(), then this check
	 * triggers too late. This doesn't hurt, the check is only there
	 * to stop root fork bombs.
	 */
	if (nr_threads >= max_threads)
		goto bad_fork_cleanup_count;

	if (!try_module_get(p->thread_info->exec_domain->module))
		goto bad_fork_cleanup_count;

	if (p->binfmt && !try_module_get(p->binfmt->module))
		goto bad_fork_cleanup_put_domain;

	p->did_exec = 0;
	copy_flags(clone_flags, p);
	if (clone_flags & CLONE_IDLETASK)
		p->pid = 0;
	else {
		p->pid = alloc_pidmap();
		if (p->pid == -1)
			goto bad_fork_cleanup;
	}
	retval = -EFAULT;
	if (clone_flags & CLONE_PARENT_SETTID)
		if (put_user(p->pid, parent_tidptr))
			goto bad_fork_cleanup;

	p->proc_dentry = NULL;

	INIT_LIST_HEAD(&p->children);
	INIT_LIST_HEAD(&p->sibling);
	init_waitqueue_head(&p->wait_chldexit);
	p->vfork_done = NULL;
	spin_lock_init(&p->alloc_lock);
	spin_lock_init(&p->proc_lock);

	clear_tsk_thread_flag(p, TIF_SIGPENDING);
	init_sigpending(&p->pending);

	p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
	p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
	init_timer(&p->real_timer);
	p->real_timer.data = (unsigned long) p;

	p->utime = p->stime = 0;
	p->cutime = p->cstime = 0;
	p->lock_depth = -1;		/* -1 = no lock */
	p->start_time = get_jiffies_64();
	p->security = NULL;
	p->io_context = NULL;
	p->audit_context = NULL;
#ifdef CONFIG_NUMA
 	p->mempolicy = mpol_copy(p->mempolicy);
 	if (IS_ERR(p->mempolicy)) {
 		retval = PTR_ERR(p->mempolicy);
 		p->mempolicy = NULL;
 		goto bad_fork_cleanup;
 	}
#endif

	retval = -ENOMEM;
	if ((retval = security_task_alloc(p)))
		goto bad_fork_cleanup_policy;
	if ((retval = audit_alloc(p)))
		goto bad_fork_cleanup_security;
	/* copy all the process information */
	if ((retval = copy_semundo(clone_flags, p)))
		goto bad_fork_cleanup_audit;
	if ((retval = copy_files(clone_flags, p)))
		goto bad_fork_cleanup_semundo;
	if ((retval = copy_fs(clone_flags, p)))
		goto bad_fork_cleanup_files;
	if ((retval = copy_sighand(clone_flags, p)))
		goto bad_fork_cleanup_fs;
	if ((retval = copy_signal(clone_flags, p)))
		goto bad_fork_cleanup_sighand;
	if ((retval = copy_mm(clone_flags, p)))
		goto bad_fork_cleanup_signal;
	if ((retval = copy_namespace(clone_flags, p)))
		goto bad_fork_cleanup_mm;
	retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
	if (retval)
		goto bad_fork_cleanup_namespace;

	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
	/*
	 * Clear TID on mm_release()?
	 */
	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;

	/*
	 * Syscall tracing should be turned off in the child regardless
	 * of CLONE_PTRACE.
	 */
	clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);

	/* Our parent execution domain becomes current domain
	   These must match for thread signalling to apply */
	   
	p->parent_exec_id = p->self_exec_id;

	/* ok, now we should be set up.. */
	p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
	p->pdeath_signal = 0;

	/* Perform scheduler related setup */
	sched_fork(p);

	/*
	 * Ok, make it visible to the rest of the system.
	 * We dont wake it up yet.
	 */
	p->tgid = p->pid;
	p->group_leader = p;
	INIT_LIST_HEAD(&p->ptrace_children);
	INIT_LIST_HEAD(&p->ptrace_list);

	/* Need tasklist lock for parent etc handling! */
	write_lock_irq(&tasklist_lock);
	/*
	 * Check for pending SIGKILL! The new thread should not be allowed
	 * to slip out of an OOM kill. (or normal SIGKILL.)
	 */
	if (sigismember(&current->pending.signal, SIGKILL)) {
		write_unlock_irq(&tasklist_lock);
		retval = -EINTR;
		goto bad_fork_cleanup_namespace;
	}

	/* CLONE_PARENT re-uses the old parent */
	if (clone_flags & CLONE_PARENT)
		p->real_parent = current->real_parent;
	else
		p->real_parent = current;
	p->parent = p->real_parent;

	if (clone_flags & CLONE_THREAD) {
		spin_lock(&current->sighand->siglock);
		/*
		 * Important: if an exit-all has been started then
		 * do not create this new thread - the whole thread
		 * group is supposed to exit anyway.
		 */
		if (current->signal->group_exit) {
			spin_unlock(&current->sighand->siglock);
			write_unlock_irq(&tasklist_lock);
			retval = -EAGAIN;
			goto bad_fork_cleanup_namespace;
		}
		p->tgid = current->tgid;
		p->group_leader = current->group_leader;

		if (current->signal->group_stop_count > 0) {
			/*
			 * There is an all-stop in progress for the group.
			 * We ourselves will stop as soon as we check signals.
			 * Make the new thread part of that group stop too.
			 */
			current->signal->group_stop_count++;
			set_tsk_thread_flag(p, TIF_SIGPENDING);
		}

		spin_unlock(&current->sighand->siglock);
	}

	SET_LINKS(p);
	if (p->ptrace & PT_PTRACED)
		__ptrace_link(p, current->parent);

	attach_pid(p, PIDTYPE_PID, p->pid);
	if (thread_group_leader(p)) {
		attach_pid(p, PIDTYPE_TGID, p->tgid);
		attach_pid(p, PIDTYPE_PGID, process_group(p));
		attach_pid(p, PIDTYPE_SID, p->signal->session);
		if (p->pid)
			__get_cpu_var(process_counts)++;
	} else
		link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid);

	nr_threads++;
	write_unlock_irq(&tasklist_lock);
	retval = 0;

fork_out:
	if (retval)
		return ERR_PTR(retval);
	return p;

bad_fork_cleanup_namespace:
	exit_namespace(p);
bad_fork_cleanup_mm:
	exit_mm(p);
	if (p->active_mm)
		mmdrop(p->active_mm);
bad_fork_cleanup_signal:
	exit_signal(p);
bad_fork_cleanup_sighand:
	exit_sighand(p);
bad_fork_cleanup_fs:
	exit_fs(p); /* blocking */
bad_fork_cleanup_files:
	exit_files(p); /* blocking */
bad_fork_cleanup_semundo:
	exit_sem(p);
bad_fork_cleanup_audit:
	audit_free(p);
bad_fork_cleanup_security:
	security_task_free(p);
bad_fork_cleanup_policy:
#ifdef CONFIG_NUMA
	mpol_free(p->mempolicy);
#endif
bad_fork_cleanup:
	if (p->pid > 0)
		free_pidmap(p->pid);
	if (p->binfmt)
		module_put(p->binfmt->module);
bad_fork_cleanup_put_domain:
	module_put(p->thread_info->exec_domain->module);
bad_fork_cleanup_count:
	put_group_info(p->group_info);
	atomic_dec(&p->user->processes);
	free_uid(p->user);
bad_fork_free:
	free_task(p);
	goto fork_out;
}
예제 #22
0
파일: vmscan.c 프로젝트: davidbau/davej
static int swap_out(unsigned int priority, int gfp_mask, unsigned long idle_time)
{
	struct task_struct * p;
	int counter;
	int __ret = 0;

	lock_kernel();
	/* 
	 * We make one or two passes through the task list, indexed by 
	 * assign = {0, 1}:
	 *   Pass 1: select the swappable task with maximal RSS that has
	 *         not yet been swapped out. 
	 *   Pass 2: re-assign rss swap_cnt values, then select as above.
	 *
	 * With this approach, there's no need to remember the last task
	 * swapped out.  If the swap-out fails, we clear swap_cnt so the 
	 * task won't be selected again until all others have been tried.
	 *
	 * Think of swap_cnt as a "shadow rss" - it tells us which process
	 * we want to page out (always try largest first).
	 */
	counter = (nr_threads << SWAP_SHIFT) >> priority;
	if (counter < 1)
		counter = 1;

	for (; counter >= 0; counter--) {
		unsigned long max_cnt = 0;
		struct mm_struct *best = NULL;
		int pid = 0;
		int assign = 0;
		int found_task = 0;
	select:
		read_lock(&tasklist_lock);
		p = init_task.next_task;
		for (; p != &init_task; p = p->next_task) {
			struct mm_struct *mm = p->mm;
			if (!p->swappable || !mm)
				continue;
	 		if (mm->rss <= 0)
				continue;
			/* Skip tasks which haven't slept long enough yet when idle-swapping. */
			if (idle_time && !assign && (!(p->state & TASK_INTERRUPTIBLE) ||
					time_after(p->sleep_time + idle_time * HZ, jiffies)))
				continue;
			found_task++;
			/* Refresh swap_cnt? */
			if (assign == 1) {
				mm->swap_cnt = (mm->rss >> SWAP_SHIFT);
				if (mm->swap_cnt < SWAP_MIN)
					mm->swap_cnt = SWAP_MIN;
			}
			if (mm->swap_cnt > max_cnt) {
				max_cnt = mm->swap_cnt;
				best = mm;
				pid = p->pid;
			}
		}
		read_unlock(&tasklist_lock);
		if (!best) {
			if (!assign && found_task > 0) {
				assign = 1;
				goto select;
			}
			goto out;
		} else {
			int ret;

			atomic_inc(&best->mm_count);
			ret = swap_out_mm(best, gfp_mask);
			mmdrop(best);

			__ret = 1;
			goto out;
		}
	}
예제 #23
0
파일: oom_kill.c 프로젝트: avagin/linux
static void __oom_kill_process(struct task_struct *victim, const char *message)
{
	struct task_struct *p;
	struct mm_struct *mm;
	bool can_oom_reap = true;

	p = find_lock_task_mm(victim);
	if (!p) {
		put_task_struct(victim);
		return;
	} else if (victim != p) {
		get_task_struct(p);
		put_task_struct(victim);
		victim = p;
	}

	/* Get a reference to safely compare mm after task_unlock(victim) */
	mm = victim->mm;
	mmgrab(mm);

	/* Raise event before sending signal: task reaper must see this */
	count_vm_event(OOM_KILL);
	memcg_memory_event_mm(mm, MEMCG_OOM_KILL);

	/*
	 * We should send SIGKILL before granting access to memory reserves
	 * in order to prevent the OOM victim from depleting the memory
	 * reserves from the user space under its control.
	 */
	do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID);
	mark_oom_victim(victim);
	pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
		message, task_pid_nr(victim), victim->comm,
		K(victim->mm->total_vm),
		K(get_mm_counter(victim->mm, MM_ANONPAGES)),
		K(get_mm_counter(victim->mm, MM_FILEPAGES)),
		K(get_mm_counter(victim->mm, MM_SHMEMPAGES)));
	task_unlock(victim);

	/*
	 * Kill all user processes sharing victim->mm in other thread groups, if
	 * any.  They don't get access to memory reserves, though, to avoid
	 * depletion of all memory.  This prevents mm->mmap_sem livelock when an
	 * oom killed thread cannot exit because it requires the semaphore and
	 * its contended by another thread trying to allocate memory itself.
	 * That thread will now get access to memory reserves since it has a
	 * pending fatal signal.
	 */
	rcu_read_lock();
	for_each_process(p) {
		if (!process_shares_mm(p, mm))
			continue;
		if (same_thread_group(p, victim))
			continue;
		if (is_global_init(p)) {
			can_oom_reap = false;
			set_bit(MMF_OOM_SKIP, &mm->flags);
			pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n",
					task_pid_nr(victim), victim->comm,
					task_pid_nr(p), p->comm);
			continue;
		}
		/*
		 * No use_mm() user needs to read from the userspace so we are
		 * ok to reap it.
		 */
		if (unlikely(p->flags & PF_KTHREAD))
			continue;
		do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID);
	}
	rcu_read_unlock();

	if (can_oom_reap)
		wake_oom_reaper(victim);

	mmdrop(mm);
	put_task_struct(victim);
}