Beispiel #1
0
int notify_change(struct dentry * dentry, struct iattr * attr)
{
	struct inode *inode = dentry->d_inode;
	int error;
	struct timespec now;
	unsigned int ia_valid = attr->ia_valid;
	mode_t ia_mode = attr->ia_mode;

	now = current_fs_time(inode->i_sb);

	attr->ia_ctime = now;
	if (!(ia_valid & ATTR_ATIME_SET))
		attr->ia_atime = now;
	if (!(ia_valid & ATTR_MTIME_SET))
		attr->ia_mtime = now;
	if (ia_valid & ATTR_KILL_SUID) {
		ia_valid &= ~ATTR_KILL_SUID;
		if (inode->i_mode & S_ISUID) {
			if (!(ia_valid & ATTR_MODE)) {
				ia_valid |= ATTR_MODE;
				ia_mode = inode->i_mode;
			}
			ia_mode &= ~S_ISUID;
		}
	}
	if (ia_valid & ATTR_KILL_SGID) {
		ia_valid &= ~ATTR_KILL_SGID;
		if ((inode->i_mode & (S_ISGID | S_IXGRP)) ==
		    (S_ISGID | S_IXGRP)) {
			if (!(ia_valid & ATTR_MODE)) {
				ia_valid |= ATTR_MODE;
				ia_mode = inode->i_mode;
			}
			ia_mode &= ~S_ISGID;
		}
	}

	if (!ia_valid)
		return 0;

	/*
	 * For RHEL, we've added the S_NOATTRKILL flag to allow filesystems
	 * to opt-out of ATTR_KILL_S*ID processing. The ATTR_KILL_S*ID bits
	 * are now handled in two stages. First, we calculate what the
	 * ia_valid and the ia_mode would look like if we were to allow the
	 * ATTR_KILL_S*ID bits to modify them. We then make the decision of
	 * whether to allow the modification to occur. We could just skip
	 * all of the ATTR_KILL_S*ID processing altogether, but we need it
	 * for inotify. If a process is watching for mode changes, we want
	 * it to be notified if we suspect that the server will be doing the
	 * mode change for us.
	 */
	if ((ia_valid & ATTR_MODE) && !(inode->i_flags & S_NOATTRKILL)) {
		attr->ia_valid = ia_valid;
		attr->ia_mode = ia_mode;
	}

	if (ia_valid & ATTR_SIZE)
		down_write(&dentry->d_inode->i_alloc_sem);

	if (inode->i_op && inode->i_op->setattr) {
		error = security_inode_setattr(dentry, attr);
		if (!error)
			error = inode->i_op->setattr(dentry, attr);
	} else {
		error = inode_change_ok(inode, attr);
		if (!error)
			error = security_inode_setattr(dentry, attr);
		if (!error) {
			if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
			    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid))
				error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
			if (!error)
				error = inode_setattr(inode, attr);
		}
	}

	if (ia_valid & ATTR_SIZE)
		up_write(&dentry->d_inode->i_alloc_sem);

	if (!error)
		fsnotify_change(dentry, ia_valid);

	return error;
}
Beispiel #2
0
dotraplinkage void __kprobes
do_general_protection(struct pt_regs *regs, long error_code)
{
	struct task_struct *tsk;

	conditional_sti(regs);

#ifdef CONFIG_X86_32
	if (v8086_mode(regs))
		goto gp_in_vm86;
#endif

	tsk = current;
	if (!user_mode_novm(regs))
		goto gp_in_kernel;

#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC)
	if (!nx_enabled && tsk->mm && (tsk->mm->pax_flags & MF_PAX_PAGEEXEC)) {
		struct mm_struct *mm = tsk->mm;
		unsigned long limit;

		down_write(&mm->mmap_sem);
		limit = mm->context.user_cs_limit;
		if (limit < TASK_SIZE) {
			track_exec_limit(mm, limit, TASK_SIZE, VM_EXEC);
			up_write(&mm->mmap_sem);
			return;
		}
		up_write(&mm->mmap_sem);
	}
#endif

	tsk->thread.error_code = error_code;
	tsk->thread.trap_no = 13;

	if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
			printk_ratelimit()) {
		printk(KERN_INFO
			"%s[%d] general protection ip:%lx sp:%lx error:%lx",
			tsk->comm, task_pid_nr(tsk),
			regs->ip, regs->sp, error_code);
		print_vma_addr(" in ", regs->ip);
		printk("\n");
	}

	force_sig(SIGSEGV, tsk);
	return;

#ifdef CONFIG_X86_32
gp_in_vm86:
	local_irq_enable();
	handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
	return;
#endif

gp_in_kernel:
	if (fixup_exception(regs))
		return;

	tsk->thread.error_code = error_code;
	tsk->thread.trap_no = 13;
	if (notify_die(DIE_GPF, "general protection fault", regs,
				error_code, 13, SIGSEGV) == NOTIFY_STOP)
		return;

#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC)
	if ((regs->cs & 0xFFFF) == __KERNEL_CS)
		die("PAX: suspicious general protection fault", regs, error_code);
	else
#endif

	die("general protection fault", regs, error_code);
}
Beispiel #3
0
static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
{
	struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
	struct rb_node **rb_link, *rb_parent;
	int retval;
	unsigned long charge;
	struct mempolicy *pol;

	uprobe_start_dup_mmap();
	down_write(&oldmm->mmap_sem);
	flush_cache_dup_mm(oldmm);
	uprobe_dup_mmap(oldmm, mm);
	/*
	 * Not linked in yet - no deadlock potential:
	 */
	down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);

	mm->locked_vm = 0;
	mm->mmap = NULL;
	mm->mmap_cache = NULL;
	mm->free_area_cache = oldmm->mmap_base;
	mm->cached_hole_size = ~0UL;
	mm->map_count = 0;
	cpumask_clear(mm_cpumask(mm));
	mm->mm_rb = RB_ROOT;
	rb_link = &mm->mm_rb.rb_node;
	rb_parent = NULL;
	pprev = &mm->mmap;
	retval = ksm_fork(mm, oldmm);
	if (retval)
		goto out;
	retval = khugepaged_fork(mm, oldmm);
	if (retval)
		goto out;

	prev = NULL;
	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
		struct file *file;

		if (mpnt->vm_flags & VM_DONTCOPY) {
			vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
							-vma_pages(mpnt));
			continue;
		}
		charge = 0;
		if (mpnt->vm_flags & VM_ACCOUNT) {
			unsigned long len = vma_pages(mpnt);

			if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
				goto fail_nomem;
			charge = len;
		}
		tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
		if (!tmp)
			goto fail_nomem;
		*tmp = *mpnt;
		INIT_LIST_HEAD(&tmp->anon_vma_chain);
		pol = mpol_dup(vma_policy(mpnt));
		retval = PTR_ERR(pol);
		if (IS_ERR(pol))
			goto fail_nomem_policy;
		vma_set_policy(tmp, pol);
		tmp->vm_mm = mm;
		if (anon_vma_fork(tmp, mpnt))
			goto fail_nomem_anon_vma_fork;
		tmp->vm_flags &= ~VM_LOCKED;
		tmp->vm_next = tmp->vm_prev = NULL;
		file = tmp->vm_file;
		if (file) {
			struct inode *inode = file->f_path.dentry->d_inode;
			struct address_space *mapping = file->f_mapping;

			get_file(file);
			if (tmp->vm_flags & VM_DENYWRITE)
				atomic_dec(&inode->i_writecount);
			mutex_lock(&mapping->i_mmap_mutex);
			if (tmp->vm_flags & VM_SHARED)
				mapping->i_mmap_writable++;
			flush_dcache_mmap_lock(mapping);
			/* insert tmp into the share list, just after mpnt */
			if (unlikely(tmp->vm_flags & VM_NONLINEAR))
				vma_nonlinear_insert(tmp,
						&mapping->i_mmap_nonlinear);
			else
				vma_interval_tree_insert_after(tmp, mpnt,
							&mapping->i_mmap);
			flush_dcache_mmap_unlock(mapping);
			mutex_unlock(&mapping->i_mmap_mutex);
		}

		/*
		 * Clear hugetlb-related page reserves for children. This only
		 * affects MAP_PRIVATE mappings. Faults generated by the child
		 * are not guaranteed to succeed, even if read-only
		 */
		if (is_vm_hugetlb_page(tmp))
			reset_vma_resv_huge_pages(tmp);

		/*
		 * Link in the new vma and copy the page table entries.
		 */
		*pprev = tmp;
		pprev = &tmp->vm_next;
		tmp->vm_prev = prev;
		prev = tmp;

		__vma_link_rb(mm, tmp, rb_link, rb_parent);
		rb_link = &tmp->vm_rb.rb_right;
		rb_parent = &tmp->vm_rb;

		mm->map_count++;
		retval = copy_page_range(mm, oldmm, mpnt);

		if (tmp->vm_ops && tmp->vm_ops->open)
			tmp->vm_ops->open(tmp);

		if (retval)
			goto out;
	}
	/* a new mm has just been created */
	arch_dup_mmap(oldmm, mm);
	retval = 0;
out:
	up_write(&mm->mmap_sem);
	flush_tlb_mm(oldmm);
	up_write(&oldmm->mmap_sem);
	uprobe_end_dup_mmap();
	return retval;
fail_nomem_anon_vma_fork:
	mpol_put(pol);
fail_nomem_policy:
	kmem_cache_free(vm_area_cachep, tmp);
fail_nomem:
	retval = -ENOMEM;
	vm_unacct_memory(charge);
	goto out;
}
Beispiel #4
0
static void gameport_release_driver(struct gameport *gameport)
{
	down_write(&gameport_bus.subsys.rwsem);
	device_release_driver(&gameport->dev);
	up_write(&gameport_bus.subsys.rwsem);
}
void
ia64_elf32_init (struct pt_regs *regs)
{
	struct vm_area_struct *vma;

	/*
	 * Map GDT and TSS below 4GB, where the processor can find them.  We need to map
	 * it with privilege level 3 because the IVE uses non-privileged accesses to these
	 * tables.  IA-32 segmentation is used to protect against IA-32 accesses to them.
	 */
	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
	if (vma) {
		vma->vm_mm = current->mm;
		vma->vm_start = IA32_GDT_OFFSET;
		vma->vm_end = vma->vm_start + max(PAGE_SIZE, 2*IA32_PAGE_SIZE);
		vma->vm_page_prot = PAGE_SHARED;
		vma->vm_flags = VM_READ|VM_MAYREAD;
		vma->vm_ops = &ia32_shared_page_vm_ops;
		vma->vm_pgoff = 0;
		vma->vm_file = NULL;
		vma->vm_private_data = NULL;
		down_write(&current->mm->mmap_sem);
		{
			insert_vm_struct(current->mm, vma);
		}
		up_write(&current->mm->mmap_sem);
	}

	/*
	 * Install LDT as anonymous memory.  This gives us all-zero segment descriptors
	 * until a task modifies them via modify_ldt().
	 */
	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
	if (vma) {
		vma->vm_mm = current->mm;
		vma->vm_start = IA32_LDT_OFFSET;
		vma->vm_end = vma->vm_start + PAGE_ALIGN(IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE);
		vma->vm_page_prot = PAGE_SHARED;
		vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE;
		vma->vm_ops = NULL;
		vma->vm_pgoff = 0;
		vma->vm_file = NULL;
		vma->vm_private_data = NULL;
		down_write(&current->mm->mmap_sem);
		{
			insert_vm_struct(current->mm, vma);
		}
		up_write(&current->mm->mmap_sem);
	}

	ia64_psr(regs)->ac = 0;		/* turn off alignment checking */
	regs->loadrs = 0;
	/*
	 *  According to the ABI %edx points to an `atexit' handler.  Since we don't have
	 *  one we'll set it to 0 and initialize all the other registers just to make
	 *  things more deterministic, ala the i386 implementation.
	 */
	regs->r8 = 0;	/* %eax */
	regs->r11 = 0;	/* %ebx */
	regs->r9 = 0;	/* %ecx */
	regs->r10 = 0;	/* %edx */
	regs->r13 = 0;	/* %ebp */
	regs->r14 = 0;	/* %esi */
	regs->r15 = 0;	/* %edi */

	current->thread.eflag = IA32_EFLAG;
	current->thread.fsr = IA32_FSR_DEFAULT;
	current->thread.fcr = IA32_FCR_DEFAULT;
	current->thread.fir = 0;
	current->thread.fdr = 0;

	/*
	 * Setup GDTD.  Note: GDTD is the descrambled version of the pseudo-descriptor
	 * format defined by Figure 3-11 "Pseudo-Descriptor Format" in the IA-32
	 * architecture manual. Also note that the only fields that are not ignored are
	 * `base', `limit', 'G', `P' (must be 1) and `S' (must be 0).
	 */
	regs->r31 = IA32_SEG_UNSCRAMBLE(IA32_SEG_DESCRIPTOR(IA32_GDT_OFFSET, IA32_PAGE_SIZE - 1,
							    0, 0, 0, 1, 0, 0, 0));
	/* Setup the segment selectors */
	regs->r16 = (__USER_DS << 16) | __USER_DS; /* ES == DS, GS, FS are zero */
	regs->r17 = (__USER_DS << 16) | __USER_CS; /* SS, CS; ia32_load_state() sets TSS and LDT */

	ia32_load_segment_descriptors(current);
	ia32_load_state(current);
}
Beispiel #6
0
/**
 * nilfs_resize_fs - resize the filesystem
 * @sb: super block instance
 * @newsize: new size of the filesystem (in bytes)
 */
int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
{
	struct the_nilfs *nilfs = sb->s_fs_info;
	struct nilfs_super_block **sbp;
	__u64 devsize, newnsegs;
	loff_t sb2off;
	int ret;

	ret = -ERANGE;
	devsize = i_size_read(sb->s_bdev->bd_inode);
	if (newsize > devsize)
		goto out;

	/*
	 * Write lock is required to protect some functions depending
	 * on the number of segments, the number of reserved segments,
	 * and so forth.
	 */
	down_write(&nilfs->ns_segctor_sem);

	sb2off = NILFS_SB2_OFFSET_BYTES(newsize);
	newnsegs = sb2off >> nilfs->ns_blocksize_bits;
	do_div(newnsegs, nilfs->ns_blocks_per_segment);

	ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs);
	up_write(&nilfs->ns_segctor_sem);
	if (ret < 0)
		goto out;

	ret = nilfs_construct_segment(sb);
	if (ret < 0)
		goto out;

	down_write(&nilfs->ns_sem);
	nilfs_move_2nd_super(sb, sb2off);
	ret = -EIO;
	sbp = nilfs_prepare_super(sb, 0);
	if (likely(sbp)) {
		nilfs_set_log_cursor(sbp[0], nilfs);
		/*
		 * Drop NILFS_RESIZE_FS flag for compatibility with
		 * mount-time resize which may be implemented in a
		 * future release.
		 */
		sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) &
					      ~NILFS_RESIZE_FS);
		sbp[0]->s_dev_size = cpu_to_le64(newsize);
		sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments);
		if (sbp[1])
			memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
		ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
	}
	up_write(&nilfs->ns_sem);

	/*
	 * Reset the range of allocatable segments last.  This order
	 * is important in the case of expansion because the secondary
	 * superblock must be protected from log write until migration
	 * completes.
	 */
	if (!ret)
		nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1);
out:
	return ret;
}
Beispiel #7
0
void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
{
	handle_t *handle;
	int inline_size, value_len, needed_blocks;
	size_t i_size;
	void *value = NULL;
	struct ext4_xattr_ibody_find is = {
		.s = { .not_found = -ENODATA, },
	};
	struct ext4_xattr_info i = {
		.name_index = EXT4_XATTR_INDEX_SYSTEM,
		.name = EXT4_XATTR_SYSTEM_DATA,
	};


	needed_blocks = ext4_writepage_trans_blocks(inode);
	handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks);
	if (IS_ERR(handle))
		return;

	down_write(&EXT4_I(inode)->xattr_sem);
	if (!ext4_has_inline_data(inode)) {
		*has_inline = 0;
		ext4_journal_stop(handle);
		return;
	}

	if (ext4_orphan_add(handle, inode))
		goto out;

	if (ext4_get_inode_loc(inode, &is.iloc))
		goto out;

	down_write(&EXT4_I(inode)->i_data_sem);
	i_size = inode->i_size;
	inline_size = ext4_get_inline_size(inode);
	EXT4_I(inode)->i_disksize = i_size;

	if (i_size < inline_size) {
		/* Clear the content in the xattr space. */
		if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) {
			if (ext4_xattr_ibody_find(inode, &i, &is))
				goto out_error;

			BUG_ON(is.s.not_found);

			value_len = le32_to_cpu(is.s.here->e_value_size);
			value = kmalloc(value_len, GFP_NOFS);
			if (!value)
				goto out_error;

			if (ext4_xattr_ibody_get(inode, i.name_index, i.name,
						value, value_len))
				goto out_error;

			i.value = value;
			i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ?
					i_size - EXT4_MIN_INLINE_DATA_SIZE : 0;
			if (ext4_xattr_ibody_inline_set(handle, inode, &i, &is))
				goto out_error;
		}

		/* Clear the content within i_blocks. */
		if (i_size < EXT4_MIN_INLINE_DATA_SIZE) {
			void *p = (void *) ext4_raw_inode(&is.iloc)->i_block;
			memset(p + i_size, 0,
			       EXT4_MIN_INLINE_DATA_SIZE - i_size);
		}

		EXT4_I(inode)->i_inline_size = i_size <
					EXT4_MIN_INLINE_DATA_SIZE ?
					EXT4_MIN_INLINE_DATA_SIZE : i_size;
	}

out_error:
	up_write(&EXT4_I(inode)->i_data_sem);
out:
	brelse(is.iloc.bh);
	up_write(&EXT4_I(inode)->xattr_sem);
	kfree(value);
	if (inode->i_nlink)
		ext4_orphan_del(handle, inode);

	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
	ext4_mark_inode_dirty(handle, inode);
	if (IS_SYNC(inode))
		ext4_handle_sync(handle);

	ext4_journal_stop(handle);
	return;
}
static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
{
	struct exec ex;
	unsigned long error;
	unsigned long fd_offset;
	unsigned long rlim;
	int retval;

	ex = *((struct exec *) bprm->buf);		/* exec-header */
	if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
	     N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
	    N_TRSIZE(ex) || N_DRSIZE(ex) ||
	    bprm->file->f_dentry->d_inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
		return -ENOEXEC;
	}

	fd_offset = N_TXTOFF(ex);

	/* Check initial limits. This avoids letting people circumvent
	 * size limits imposed on them by creating programs with large
	 * arrays in the data or bss.
	 */
	rlim = current->rlim[RLIMIT_DATA].rlim_cur;
	if (rlim >= RLIM_INFINITY)
		rlim = ~0;
	if (ex.a_data + ex.a_bss > rlim)
		return -ENOMEM;

	/* Flush all traces of the currently running executable */
	retval = flush_old_exec(bprm);
	if (retval)
		return retval;

	/* OK, This is the point of no return */
#if defined(__alpha__)
	SET_AOUT_PERSONALITY(bprm, ex);
#elif defined(__sparc__)
	set_personality(PER_SUNOS);
#if !defined(__sparc_v9__)
	memcpy(&current->thread.core_exec, &ex, sizeof(struct exec));
#endif
#else
	set_personality(PER_LINUX);
#endif

	current->mm->end_code = ex.a_text +
		(current->mm->start_code = N_TXTADDR(ex));
	current->mm->end_data = ex.a_data +
		(current->mm->start_data = N_DATADDR(ex));
	current->mm->brk = ex.a_bss +
		(current->mm->start_brk = N_BSSADDR(ex));

	current->mm->rss = 0;
	current->mm->mmap = NULL;

#ifdef CONFIG_ARM_FASS
	arch_new_mm(current, current->mm);
#endif

	compute_creds(bprm);
 	current->flags &= ~PF_FORKNOEXEC;
#ifdef __sparc__
	if (N_MAGIC(ex) == NMAGIC) {
		loff_t pos = fd_offset;
		/* F**k me plenty... */
		/* <AOL></AOL> */
		down_write(&current->mm->mmap_sem);
		error = do_brk(N_TXTADDR(ex), ex.a_text);
		up_write(&current->mm->mmap_sem);
		bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex),
			  ex.a_text, &pos);
		down_write(&current->mm->mmap_sem);
		error = do_brk(N_DATADDR(ex), ex.a_data);
		up_write(&current->mm->mmap_sem);
		bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex),
			  ex.a_data, &pos);
		goto beyond_if;
	}
#endif

	if (N_MAGIC(ex) == OMAGIC) {
		unsigned long text_addr, map_size;
		loff_t pos;

		text_addr = N_TXTADDR(ex);

#if defined(__alpha__) || defined(__sparc__)
		pos = fd_offset;
		map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1;
#else
		pos = 32;
		map_size = ex.a_text+ex.a_data;
#endif
		down_write(&current->mm->mmap_sem);
		error = do_brk(text_addr & PAGE_MASK, map_size);
		up_write(&current->mm->mmap_sem);
		if (error != (text_addr & PAGE_MASK)) {
			send_sig(SIGKILL, current, 0);
			return error;
		}

		error = bprm->file->f_op->read(bprm->file, (char *)text_addr,
			  ex.a_text+ex.a_data, &pos);
		if ((signed long)error < 0) {
			send_sig(SIGKILL, current, 0);
			return error;
		}
			 
		flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
	} else {
		static unsigned long error_time, error_time2;
		if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
		    (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
		{
			printk(KERN_NOTICE "executable not page aligned\n");
			error_time2 = jiffies;
		}

		if ((fd_offset & ~PAGE_MASK) != 0 &&
		    (jiffies-error_time) > 5*HZ)
		{
			printk(KERN_WARNING 
			       "fd_offset is not page aligned. Please convert program: %s\n",
			       bprm->file->f_dentry->d_name.name);
			error_time = jiffies;
		}

		if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
			loff_t pos = fd_offset;
			down_write(&current->mm->mmap_sem);
			do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
			up_write(&current->mm->mmap_sem);
			bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex),
					ex.a_text+ex.a_data, &pos);
			flush_icache_range((unsigned long) N_TXTADDR(ex),
					   (unsigned long) N_TXTADDR(ex) +
					   ex.a_text+ex.a_data);
			goto beyond_if;
		}

		down_write(&current->mm->mmap_sem);
		error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
			PROT_READ | PROT_EXEC,
			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
			fd_offset);
		up_write(&current->mm->mmap_sem);

		if (error != N_TXTADDR(ex)) {
			send_sig(SIGKILL, current, 0);
			return error;
		}

		down_write(&current->mm->mmap_sem);
 		error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
				PROT_READ | PROT_WRITE | PROT_EXEC,
				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
				fd_offset + ex.a_text);
		up_write(&current->mm->mmap_sem);
		if (error != N_DATADDR(ex)) {
			send_sig(SIGKILL, current, 0);
			return error;
		}
	}
beyond_if:
	set_binfmt(&aout_format);

	retval = set_brk(current->mm->start_brk, current->mm->brk);
	if (retval < 0) {
		send_sig(SIGKILL, current, 0);
		return retval;
	}

	retval = setup_arg_pages(bprm, STACK_TOP); 
	if (retval < 0) { 
		/* Someone check-me: is this error path enough? */ 
		send_sig(SIGKILL, current, 0); 
		return retval;
	}

	current->mm->start_stack =
		(unsigned long) create_aout_tables((char *) bprm->p, bprm);
#ifdef __alpha__
	regs->gp = ex.a_gpvalue;
#endif
	start_thread(regs, ex.a_entry, current->mm->start_stack);
	if (current->ptrace & PT_PTRACED)
		send_sig(SIGTRAP, current, 0);
#ifndef __arm__
	return 0;
#else
	return regs->ARM_r0;
#endif
}
static int load_aout_library(struct file *file)
{
	struct inode * inode;
	unsigned long bss, start_addr, len;
	unsigned long error;
	int retval;
	struct exec ex;

	inode = file->f_dentry->d_inode;

	retval = -ENOEXEC;
	error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
	if (error != sizeof(ex))
		goto out;

	/* We come in here for the regular a.out style of shared libraries */
	if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
	    N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
	    inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
		goto out;
	}

	if (N_FLAGS(ex))
		goto out;

	/* For  QMAGIC, the starting address is 0x20 into the page.  We mask
	   this off to get the starting address for the page */
#ifndef __arm__
	start_addr =  ex.a_entry & 0xfffff000;
#else
	start_addr = ex.a_entry & 0xffff8000;
#endif

	if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
		static unsigned long error_time;
		loff_t pos = N_TXTOFF(ex);

		if ((jiffies-error_time) > 5*HZ)
		{
			printk(KERN_WARNING 
			       "N_TXTOFF is not page aligned. Please convert library: %s\n",
			       file->f_dentry->d_name.name);
			error_time = jiffies;
		}
		
		down_write(&current->mm->mmap_sem);
		do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
		up_write(&current->mm->mmap_sem);
		
		file->f_op->read(file, (char *)start_addr,
			ex.a_text + ex.a_data, &pos);
		flush_icache_range((unsigned long) start_addr,
				   (unsigned long) start_addr + ex.a_text + ex.a_data);

		retval = 0;
		goto out;
	}
	/* Now use mmap to map the library into memory. */
	down_write(&current->mm->mmap_sem);
	error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
			PROT_READ | PROT_WRITE | PROT_EXEC,
			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
			N_TXTOFF(ex));
	up_write(&current->mm->mmap_sem);
	retval = error;
	if (error != start_addr)
		goto out;

	len = PAGE_ALIGN(ex.a_text + ex.a_data);
	bss = ex.a_text + ex.a_data + ex.a_bss;
	if (bss > len) {
		down_write(&current->mm->mmap_sem);
		error = do_brk(start_addr + len, bss - len);
		up_write(&current->mm->mmap_sem);

		retval = error;
		if (error != start_addr + len)
			goto out;
	}
	retval = 0;
out:
	return retval;
}
Beispiel #10
0
/*
 * We need to be able to change a mapping table under a mounted
 * filesystem.  For example we might want to move some data in
 * the background.  Before the table can be swapped with
 * dm_bind_table, dm_suspend must be called to flush any in
 * flight bios and ensure that any further io gets deferred.
 */
int dm_suspend(struct mapped_device *md, int do_lockfs)
{
	struct dm_table *map = NULL;
	DECLARE_WAITQUEUE(wait, current);
	struct bio *def;
	int r = -EINVAL;

	down(&md->suspend_lock);

	if (dm_suspended(md))
		goto out;

	map = dm_get_table(md);

	/* This does not get reverted if there's an error later. */
	dm_table_presuspend_targets(map);

	md->suspended_bdev = bdget_disk(md->disk, 0);
	if (!md->suspended_bdev) {
		DMWARN("bdget failed in dm_suspend");
		r = -ENOMEM;
		goto out;
	}

	/* Flush I/O to the device. */
	if (do_lockfs) {
		r = lock_fs(md);
		if (r)
			goto out;
	}

	/*
	 * First we set the BLOCK_IO flag so no more ios will be mapped.
	 */
	down_write(&md->io_lock);
	set_bit(DMF_BLOCK_IO, &md->flags);

	add_wait_queue(&md->wait, &wait);
	up_write(&md->io_lock);

	/* unplug */
	if (map)
		dm_table_unplug_all(map);

	/*
	 * Then we wait for the already mapped ios to
	 * complete.
	 */
	while (1) {
		set_current_state(TASK_INTERRUPTIBLE);

		if (!atomic_read(&md->pending) || signal_pending(current))
			break;

		io_schedule();
	}
	set_current_state(TASK_RUNNING);

	down_write(&md->io_lock);
	remove_wait_queue(&md->wait, &wait);

	/* were we interrupted ? */
	r = -EINTR;
	if (atomic_read(&md->pending)) {
		clear_bit(DMF_BLOCK_IO, &md->flags);
		def = bio_list_get(&md->deferred);
		__flush_deferred_io(md, def);
		up_write(&md->io_lock);
		unlock_fs(md);
		goto out;
	}
	up_write(&md->io_lock);

	dm_table_postsuspend_targets(map);

	set_bit(DMF_SUSPENDED, &md->flags);

	r = 0;

out:
	if (r && md->suspended_bdev) {
		bdput(md->suspended_bdev);
		md->suspended_bdev = NULL;
	}

	dm_table_put(map);
	up(&md->suspend_lock);
	return r;
}
Beispiel #11
0
/*
 * inode->i_mutex: down
 */
int
reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
			  struct inode *inode, const char *name,
			  const void *buffer, size_t buffer_size, int flags)
{
	int err = 0;
	struct dentry *dentry;
	struct page *page;
	char *data;
	size_t file_pos = 0;
	size_t buffer_pos = 0;
	size_t new_size;
	__u32 xahash = 0;

	if (get_inode_sd_version(inode) == STAT_DATA_V1)
		return -EOPNOTSUPP;

	if (!buffer) {
		err = lookup_and_delete_xattr(inode, name);
		return err;
	}

	dentry = xattr_lookup(inode, name, flags);
	if (IS_ERR(dentry))
		return PTR_ERR(dentry);

	down_write(&REISERFS_I(inode)->i_xattr_sem);

	xahash = xattr_hash(buffer, buffer_size);
	while (buffer_pos < buffer_size || buffer_pos == 0) {
		size_t chunk;
		size_t skip = 0;
		size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
		if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
			chunk = PAGE_CACHE_SIZE;
		else
			chunk = buffer_size - buffer_pos;

		page = reiserfs_get_page(dentry->d_inode, file_pos);
		if (IS_ERR(page)) {
			err = PTR_ERR(page);
			goto out_unlock;
		}

		lock_page(page);
		data = page_address(page);

		if (file_pos == 0) {
			struct reiserfs_xattr_header *rxh;
			skip = file_pos = sizeof(struct reiserfs_xattr_header);
			if (chunk + skip > PAGE_CACHE_SIZE)
				chunk = PAGE_CACHE_SIZE - skip;
			rxh = (struct reiserfs_xattr_header *)data;
			rxh->h_magic = cpu_to_le32(REISERFS_XATTR_MAGIC);
			rxh->h_hash = cpu_to_le32(xahash);
		}

		reiserfs_write_lock(inode->i_sb);
		err = __reiserfs_write_begin(page, page_offset, chunk + skip);
		if (!err) {
			if (buffer)
				memcpy(data + skip, buffer + buffer_pos, chunk);
			err = reiserfs_commit_write(NULL, page, page_offset,
						    page_offset + chunk +
						    skip);
		}
		reiserfs_write_unlock(inode->i_sb);
		unlock_page(page);
		reiserfs_put_page(page);
		buffer_pos += chunk;
		file_pos += chunk;
		skip = 0;
		if (err || buffer_size == 0 || !buffer)
			break;
	}

	new_size = buffer_size + sizeof(struct reiserfs_xattr_header);
	if (!err && new_size < i_size_read(dentry->d_inode)) {
		struct iattr newattrs = {
			.ia_ctime = current_fs_time(inode->i_sb),
			.ia_size = new_size,
			.ia_valid = ATTR_SIZE | ATTR_CTIME,
		};

		mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
		inode_dio_wait(dentry->d_inode);

		err = reiserfs_setattr(dentry, &newattrs);
		mutex_unlock(&dentry->d_inode->i_mutex);
	} else
		update_ctime(inode);
out_unlock:
	up_write(&REISERFS_I(inode)->i_xattr_sem);
	dput(dentry);
	return err;
}
Beispiel #12
0
static void genl_unlock_all(void)
{
	genl_unlock();
	up_write(&cb_lock);
}
Beispiel #13
0
static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
                        struct inode *tmp_inode)
{
    int retval;
    __le32    i_data[3];
    struct ext4_inode_info *ei = EXT4_I(inode);
    struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);

    /*
     * One credit accounted for writing the
     * i_data field of the original inode
     */
    retval = ext4_journal_extend(handle, 1);
    if (retval) {
        retval = ext4_journal_restart(handle, 1);
        if (retval)
            goto err_out;
    }

    i_data[0] = ei->i_data[EXT4_IND_BLOCK];
    i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
    i_data[2] = ei->i_data[EXT4_TIND_BLOCK];

    down_write(&EXT4_I(inode)->i_data_sem);
    /*
     * if EXT4_EXT_MIGRATE is cleared a block allocation
     * happened after we started the migrate. We need to
     * fail the migrate
     */
    if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) {
        retval = -EAGAIN;
        up_write(&EXT4_I(inode)->i_data_sem);
        goto err_out;
    } else
        EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
                            ~EXT4_EXT_MIGRATE;
    /*
     * We have the extent map build with the tmp inode.
     * Now copy the i_data across
     */
    ei->i_flags |= EXT4_EXTENTS_FL;
    memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));

    /*
     * Update i_blocks with the new blocks that got
     * allocated while adding extents for extent index
     * blocks.
     *
     * While converting to extents we need not
     * update the orignal inode i_blocks for extent blocks
     * via quota APIs. The quota update happened via tmp_inode already.
     */
    spin_lock(&inode->i_lock);
    inode->i_blocks += tmp_inode->i_blocks;
    spin_unlock(&inode->i_lock);
    up_write(&EXT4_I(inode)->i_data_sem);

    /*
     * We mark the inode dirty after, because we decrement the
     * i_blocks when freeing the indirect meta-data blocks
     */
    retval = free_ind_block(handle, inode, i_data);
    ext4_mark_inode_dirty(handle, inode);

err_out:
    return retval;
}
/*
 * Allocate a new mm structure and copy contents from the
 * mm structure of the passed in task structure.
 */
struct mm_struct *dup_mm(struct task_struct *tsk)
{
	struct mm_struct *mm, *oldmm = current->mm;
	int err;

	if (!oldmm)
		return NULL;

	mm = allocate_mm();
	if (!mm)
		goto fail_nomem;

	memcpy(mm, oldmm, sizeof(*mm));
	mm_init_cpumask(mm);

	/* Initializing for Swap token stuff */
	mm->token_priority = 0;
	mm->last_interval = 0;

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	mm->pmd_huge_pte = NULL;
#endif
	uprobe_reset_state(mm);

	if (!mm_init(mm, tsk))
		goto fail_nomem;

	if (init_new_context(tsk, mm))
		goto fail_nocontext;

	dup_mm_exe_file(oldmm, mm);

	err = dup_mmap(mm, oldmm);
	if (err)
		goto free_pt;

#ifdef CONFIG_HOMECACHE
	{
		/* Reset vm_pid on all vmas.  In the new mm_struct, we
		 * want to switch anything that was associated with
		 * the parent to be associated with the child, and
		 * clear everything else.
		 */
		struct vm_area_struct *mpnt;
		down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
		for (mpnt = mm->mmap; mpnt; mpnt = mpnt->vm_next) {
			if (mpnt->vm_pid == current->pid)
				mpnt->vm_pid = tsk->pid;
			else
				mpnt->vm_pid = 0;
		}
		up_write(&mm->mmap_sem);
	}
#endif

	mm->hiwater_rss = get_mm_rss(mm);
	mm->hiwater_vm = mm->total_vm;

	if (mm->binfmt && !try_module_get(mm->binfmt->module))
		goto free_pt;

	return mm;

free_pt:
	/* don't put binfmt in mmput, we haven't got module yet */
	mm->binfmt = NULL;
	mmput(mm);

fail_nomem:
	return NULL;

fail_nocontext:
	/*
	 * If init_new_context() failed, we cannot use mmput() to free the mm
	 * because it calls destroy_context()
	 */
	mm_free_pgd(mm);
	free_mm(mm);
	return NULL;
}
Beispiel #15
0
/**
 * nilfs_fill_super() - initialize a super block instance
 * @sb: super_block
 * @data: mount options
 * @silent: silent mode flag
 *
 * This function is called exclusively by nilfs->ns_mount_mutex.
 * So, the recovery process is protected from other simultaneous mounts.
 */
static int
nilfs_fill_super(struct super_block *sb, void *data, int silent)
{
	struct the_nilfs *nilfs;
	struct nilfs_root *fsroot;
	__u64 cno;
	int err;

	nilfs = alloc_nilfs(sb);
	if (!nilfs)
		return -ENOMEM;

	sb->s_fs_info = nilfs;

	err = init_nilfs(nilfs, sb, (char *)data);
	if (err)
		goto failed_nilfs;

	sb->s_op = &nilfs_sops;
	sb->s_export_op = &nilfs_export_ops;
	sb->s_root = NULL;
	sb->s_time_gran = 1;
	sb->s_max_links = NILFS_LINK_MAX;

	sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;

	err = load_nilfs(nilfs, sb);
	if (err)
		goto failed_nilfs;

	cno = nilfs_last_cno(nilfs);
	err = nilfs_attach_checkpoint(sb, cno, true, &fsroot);
	if (err) {
		nilfs_msg(sb, KERN_ERR,
			  "error %d while loading last checkpoint (checkpoint number=%llu)",
			  err, (unsigned long long)cno);
		goto failed_unload;
	}

	if (!(sb->s_flags & MS_RDONLY)) {
		err = nilfs_attach_log_writer(sb, fsroot);
		if (err)
			goto failed_checkpoint;
	}

	err = nilfs_get_root_dentry(sb, fsroot, &sb->s_root);
	if (err)
		goto failed_segctor;

	nilfs_put_root(fsroot);

	if (!(sb->s_flags & MS_RDONLY)) {
		down_write(&nilfs->ns_sem);
		nilfs_setup_super(sb, true);
		up_write(&nilfs->ns_sem);
	}

	return 0;

 failed_segctor:
	nilfs_detach_log_writer(sb);

 failed_checkpoint:
	nilfs_put_root(fsroot);

 failed_unload:
	iput(nilfs->ns_sufile);
	iput(nilfs->ns_cpfile);
	iput(nilfs->ns_dat);

 failed_nilfs:
	destroy_nilfs(nilfs);
	return err;
}
Beispiel #16
0
/*
 * Garbage collector for unused keys.
 *
 * This is done in process context so that we don't have to disable interrupts
 * all over the place.  key_put() schedules this rather than trying to do the
 * cleanup itself, which means key_put() doesn't have to sleep.
 */
static void key_garbage_collector(struct work_struct *work)
{
	static LIST_HEAD(graveyard);
	static u8 gc_state;		/* Internal persistent state */
#define KEY_GC_REAP_AGAIN	0x01	/* - Need another cycle */
#define KEY_GC_REAPING_LINKS	0x02	/* - We need to reap links */
#define KEY_GC_SET_TIMER	0x04	/* - We need to restart the timer */
#define KEY_GC_REAPING_DEAD_1	0x10	/* - We need to mark dead keys */
#define KEY_GC_REAPING_DEAD_2	0x20	/* - We need to reap dead key links */
#define KEY_GC_REAPING_DEAD_3	0x40	/* - We need to reap dead keys */
#define KEY_GC_FOUND_DEAD_KEY	0x80	/* - We found at least one dead key */

	struct rb_node *cursor;
	struct key *key;
	time_t new_timer, limit;

	kenter("[%lx,%x]", key_gc_flags, gc_state);

	limit = current_kernel_time().tv_sec;
	if (limit > key_gc_delay)
		limit -= key_gc_delay;
	else
		limit = key_gc_delay;

	/* Work out what we're going to be doing in this pass */
	gc_state &= KEY_GC_REAPING_DEAD_1 | KEY_GC_REAPING_DEAD_2;
	gc_state <<= 1;
	if (test_and_clear_bit(KEY_GC_KEY_EXPIRED, &key_gc_flags))
		gc_state |= KEY_GC_REAPING_LINKS | KEY_GC_SET_TIMER;

	if (test_and_clear_bit(KEY_GC_REAP_KEYTYPE, &key_gc_flags))
		gc_state |= KEY_GC_REAPING_DEAD_1;
	kdebug("new pass %x", gc_state);

	new_timer = LONG_MAX;

	/* As only this function is permitted to remove things from the key
	 * serial tree, if cursor is non-NULL then it will always point to a
	 * valid node in the tree - even if lock got dropped.
	 */
	spin_lock(&key_serial_lock);
	cursor = rb_first(&key_serial_tree);

continue_scanning:
	while (cursor) {
		key = rb_entry(cursor, struct key, serial_node);
		cursor = rb_next(cursor);

		if (atomic_read(&key->usage) == 0)
			goto found_unreferenced_key;

		if (unlikely(gc_state & KEY_GC_REAPING_DEAD_1)) {
			if (key->type == key_gc_dead_keytype) {
				gc_state |= KEY_GC_FOUND_DEAD_KEY;
				set_bit(KEY_FLAG_DEAD, &key->flags);
				key->perm = 0;
				goto skip_dead_key;
			}
		}

		if (gc_state & KEY_GC_SET_TIMER) {
			if (key->expiry > limit && key->expiry < new_timer) {
				kdebug("will expire %x in %ld",
				       key_serial(key), key->expiry - limit);
				new_timer = key->expiry;
			}
		}

		if (unlikely(gc_state & KEY_GC_REAPING_DEAD_2))
			if (key->type == key_gc_dead_keytype)
				gc_state |= KEY_GC_FOUND_DEAD_KEY;

		if ((gc_state & KEY_GC_REAPING_LINKS) ||
		    unlikely(gc_state & KEY_GC_REAPING_DEAD_2)) {
			if (key->type == &key_type_keyring)
				goto found_keyring;
		}

		if (unlikely(gc_state & KEY_GC_REAPING_DEAD_3))
			if (key->type == key_gc_dead_keytype)
				goto destroy_dead_key;

	skip_dead_key:
		if (spin_is_contended(&key_serial_lock) || need_resched())
			goto contended;
	}

contended:
	spin_unlock(&key_serial_lock);

maybe_resched:
	if (cursor) {
		cond_resched();
		spin_lock(&key_serial_lock);
		goto continue_scanning;
	}

	/* We've completed the pass.  Set the timer if we need to and queue a
	 * new cycle if necessary.  We keep executing cycles until we find one
	 * where we didn't reap any keys.
	 */
	kdebug("pass complete");

	if (gc_state & KEY_GC_SET_TIMER && new_timer != (time_t)LONG_MAX) {
		new_timer += key_gc_delay;
		key_schedule_gc(new_timer);
	}

	if (unlikely(gc_state & KEY_GC_REAPING_DEAD_2) ||
	    !list_empty(&graveyard)) {
		/* Make sure that all pending keyring payload destructions are
		 * fulfilled and that people aren't now looking at dead or
		 * dying keys that they don't have a reference upon or a link
		 * to.
		 */
		kdebug("gc sync");
		synchronize_rcu();
	}

	if (!list_empty(&graveyard)) {
		kdebug("gc keys");
		key_gc_unused_keys(&graveyard);
	}

	if (unlikely(gc_state & (KEY_GC_REAPING_DEAD_1 |
				 KEY_GC_REAPING_DEAD_2))) {
		if (!(gc_state & KEY_GC_FOUND_DEAD_KEY)) {
			/* No remaining dead keys: short circuit the remaining
			 * keytype reap cycles.
			 */
			kdebug("dead short");
			gc_state &= ~(KEY_GC_REAPING_DEAD_1 | KEY_GC_REAPING_DEAD_2);
			gc_state |= KEY_GC_REAPING_DEAD_3;
		} else {
			gc_state |= KEY_GC_REAP_AGAIN;
		}
	}

	if (unlikely(gc_state & KEY_GC_REAPING_DEAD_3)) {
		kdebug("dead wake");
		smp_mb();
		clear_bit(KEY_GC_REAPING_KEYTYPE, &key_gc_flags);
		wake_up_bit(&key_gc_flags, KEY_GC_REAPING_KEYTYPE);
	}

	if (gc_state & KEY_GC_REAP_AGAIN)
		schedule_work(&key_gc_work);
	kleave(" [end %x]", gc_state);
	return;

	/* We found an unreferenced key - once we've removed it from the tree,
	 * we can safely drop the lock.
	 */
found_unreferenced_key:
	kdebug("unrefd key %d", key->serial);
	rb_erase(&key->serial_node, &key_serial_tree);
	spin_unlock(&key_serial_lock);

	list_add_tail(&key->graveyard_link, &graveyard);
	gc_state |= KEY_GC_REAP_AGAIN;
	goto maybe_resched;

	/* We found a keyring and we need to check the payload for links to
	 * dead or expired keys.  We don't flag another reap immediately as we
	 * have to wait for the old payload to be destroyed by RCU before we
	 * can reap the keys to which it refers.
	 */
found_keyring:
	spin_unlock(&key_serial_lock);
	kdebug("scan keyring %d", key->serial);
	key_gc_keyring(key, limit);
	goto maybe_resched;

	/* We found a dead key that is still referenced.  Reset its type and
	 * destroy its payload with its semaphore held.
	 */
destroy_dead_key:
	spin_unlock(&key_serial_lock);
	kdebug("destroy key %d", key->serial);
	down_write(&key->sem);
	key->type = &key_type_dead;
	if (key_gc_dead_keytype->destroy)
		key_gc_dead_keytype->destroy(key);
	memset(&key->payload, KEY_DESTROY, sizeof(key->payload));
	up_write(&key->sem);
	goto maybe_resched;
}
Beispiel #17
0
static int nilfs_remount(struct super_block *sb, int *flags, char *data)
{
	struct the_nilfs *nilfs = sb->s_fs_info;
	unsigned long old_sb_flags;
	unsigned long old_mount_opt;
	int err;

	sync_filesystem(sb);
	old_sb_flags = sb->s_flags;
	old_mount_opt = nilfs->ns_mount_opt;

	if (!parse_options(data, sb, 1)) {
		err = -EINVAL;
		goto restore_opts;
	}
	sb->s_flags = (sb->s_flags & ~MS_POSIXACL);

	err = -EINVAL;

	if (!nilfs_valid_fs(nilfs)) {
		nilfs_msg(sb, KERN_WARNING,
			  "couldn't remount because the filesystem is in an incomplete recovery state");
		goto restore_opts;
	}

	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
		goto out;
	if (*flags & MS_RDONLY) {
		/* Shutting down log writer */
		nilfs_detach_log_writer(sb);
		sb->s_flags |= MS_RDONLY;

		/*
		 * Remounting a valid RW partition RDONLY, so set
		 * the RDONLY flag and then mark the partition as valid again.
		 */
		down_write(&nilfs->ns_sem);
		nilfs_cleanup_super(sb);
		up_write(&nilfs->ns_sem);
	} else {
		__u64 features;
		struct nilfs_root *root;

		/*
		 * Mounting a RDONLY partition read-write, so reread and
		 * store the current valid flag.  (It may have been changed
		 * by fsck since we originally mounted the partition.)
		 */
		down_read(&nilfs->ns_sem);
		features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) &
			~NILFS_FEATURE_COMPAT_RO_SUPP;
		up_read(&nilfs->ns_sem);
		if (features) {
			nilfs_msg(sb, KERN_WARNING,
				  "couldn't remount RDWR because of unsupported optional features (%llx)",
				  (unsigned long long)features);
			err = -EROFS;
			goto restore_opts;
		}

		sb->s_flags &= ~MS_RDONLY;

		root = NILFS_I(d_inode(sb->s_root))->i_root;
		err = nilfs_attach_log_writer(sb, root);
		if (err)
			goto restore_opts;

		down_write(&nilfs->ns_sem);
		nilfs_setup_super(sb, true);
		up_write(&nilfs->ns_sem);
	}
 out:
	return 0;

 restore_opts:
	sb->s_flags = old_sb_flags;
	nilfs->ns_mount_opt = old_mount_opt;
	return err;
}
Beispiel #18
0
int notify_change(struct dentry * dentry, struct iattr * attr)
{
	struct inode *inode = dentry->d_inode;
	mode_t mode = inode->i_mode;
	int error;
	struct timespec now;
	unsigned int ia_valid = attr->ia_valid;

	if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
		if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
			return -EPERM;
	}

	now = current_fs_time(inode->i_sb);

	attr->ia_ctime = now;
	if (!(ia_valid & ATTR_ATIME_SET))
		attr->ia_atime = now;
	if (!(ia_valid & ATTR_MTIME_SET))
		attr->ia_mtime = now;
	if (ia_valid & ATTR_KILL_PRIV) {
		attr->ia_valid &= ~ATTR_KILL_PRIV;
		ia_valid &= ~ATTR_KILL_PRIV;
		error = security_inode_need_killpriv(dentry);
		if (error > 0)
			error = security_inode_killpriv(dentry);
		if (error)
			return error;
	}

	/*
	 * We now pass ATTR_KILL_S*ID to the lower level setattr function so
	 * that the function has the ability to reinterpret a mode change
	 * that's due to these bits. This adds an implicit restriction that
	 * no function will ever call notify_change with both ATTR_MODE and
	 * ATTR_KILL_S*ID set.
	 */
	if ((ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) &&
	    (ia_valid & ATTR_MODE))
		BUG();

	if (ia_valid & ATTR_KILL_SUID) {
		if (mode & S_ISUID) {
			ia_valid = attr->ia_valid |= ATTR_MODE;
			attr->ia_mode = (inode->i_mode & ~S_ISUID);
		}
	}
	if (ia_valid & ATTR_KILL_SGID) {
		if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
			if (!(ia_valid & ATTR_MODE)) {
				ia_valid = attr->ia_valid |= ATTR_MODE;
				attr->ia_mode = inode->i_mode;
			}
			attr->ia_mode &= ~S_ISGID;
		}
	}
	if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID)))
		return 0;

	error = security_inode_setattr(dentry, attr);
	if (error)
		return error;

	if (ia_valid & ATTR_SIZE)
		down_write(&dentry->d_inode->i_alloc_sem);

	if (inode->i_op->setattr)
		error = inode->i_op->setattr(dentry, attr);
	else
		error = simple_setattr(dentry, attr);

	if (ia_valid & ATTR_SIZE)
		up_write(&dentry->d_inode->i_alloc_sem);

	if (!error)
		fsnotify_change(dentry, ia_valid);

	return error;
}
Beispiel #19
0
/*
 * Try to add the new entry to the inline data.
 * If succeeds, return 0. If not, extended the inline dir and copied data to
 * the new created block.
 */
int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
			      struct inode *inode)
{
	int ret, inline_size;
	void *inline_start;
	struct ext4_iloc iloc;
	struct inode *dir = dentry->d_parent->d_inode;

	ret = ext4_get_inode_loc(dir, &iloc);
	if (ret)
		return ret;

	down_write(&EXT4_I(dir)->xattr_sem);
	if (!ext4_has_inline_data(dir))
		goto out;

	inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
						 EXT4_INLINE_DOTDOT_SIZE;
	inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;

	ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
					inline_start, inline_size);
	if (ret != -ENOSPC)
		goto out;

	/* check whether it can be inserted to inline xattr space. */
	inline_size = EXT4_I(dir)->i_inline_size -
			EXT4_MIN_INLINE_DATA_SIZE;
	if (!inline_size) {
		/* Try to use the xattr space.*/
		ret = ext4_update_inline_dir(handle, dir, &iloc);
		if (ret && ret != -ENOSPC)
			goto out;

		inline_size = EXT4_I(dir)->i_inline_size -
				EXT4_MIN_INLINE_DATA_SIZE;
	}

	if (inline_size) {
		inline_start = ext4_get_inline_xattr_pos(dir, &iloc);

		ret = ext4_add_dirent_to_inline(handle, dentry, inode, &iloc,
						inline_start, inline_size);

		if (ret != -ENOSPC)
			goto out;
	}

	/*
	 * The inline space is filled up, so create a new block for it.
	 * As the extent tree will be created, we have to save the inline
	 * dir first.
	 */
	ret = ext4_convert_inline_data_nolock(handle, dir, &iloc);

out:
	ext4_mark_inode_dirty(handle, dir);
	up_write(&EXT4_I(dir)->xattr_sem);
	brelse(iloc.bh);
	return ret;
}
Beispiel #20
0
/**
 * nilfs_fill_super() - initialize a super block instance
 * @sb: super_block
 * @data: mount options
 * @silent: silent mode flag
 * @nilfs: the_nilfs struct
 *
 * This function is called exclusively by bd_mount_mutex.
 * So, the recovery process is protected from other simultaneous mounts.
 */
static int
nilfs_fill_super(struct super_block *sb, void *data, int silent,
		 struct the_nilfs *nilfs)
{
	struct nilfs_sb_info *sbi;
	struct inode *root;
	__u64 cno;
	int err;

	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
	if (!sbi)
		return -ENOMEM;

	sb->s_fs_info = sbi;

	get_nilfs(nilfs);
	sbi->s_nilfs = nilfs;
	sbi->s_super = sb;

	err = init_nilfs(nilfs, sbi, (char *)data);
	if (err)
		goto failed_sbi;

	spin_lock_init(&sbi->s_inode_lock);
	INIT_LIST_HEAD(&sbi->s_dirty_files);
	INIT_LIST_HEAD(&sbi->s_list);

	/*
	 * Following initialization is overlapped because
	 * nilfs_sb_info structure has been cleared at the beginning.
	 * But we reserve them to keep our interest and make ready
	 * for the future change.
	 */
	get_random_bytes(&sbi->s_next_generation,
			 sizeof(sbi->s_next_generation));
	spin_lock_init(&sbi->s_next_gen_lock);

	sb->s_op = &nilfs_sops;
	sb->s_export_op = &nilfs_export_ops;
	sb->s_root = NULL;
	sb->s_time_gran = 1;

	if (!nilfs_loaded(nilfs)) {
		err = load_nilfs(nilfs, sbi);
		if (err)
			goto failed_sbi;
	}
	cno = nilfs_last_cno(nilfs);

	if (sb->s_flags & MS_RDONLY) {
		if (nilfs_test_opt(sbi, SNAPSHOT)) {
			err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile,
						       sbi->s_snapshot_cno);
			if (err < 0)
				goto failed_sbi;
			if (!err) {
				printk(KERN_ERR
				       "NILFS: The specified checkpoint is "
				       "not a snapshot "
				       "(checkpoint number=%llu).\n",
				       (unsigned long long)sbi->s_snapshot_cno);
				err = -EINVAL;
				goto failed_sbi;
			}
			cno = sbi->s_snapshot_cno;
		} else
			/* Read-only mount */
			sbi->s_snapshot_cno = cno;
	}

	err = nilfs_attach_checkpoint(sbi, cno);
	if (err) {
		printk(KERN_ERR "NILFS: error loading a checkpoint"
		       " (checkpoint number=%llu).\n", (unsigned long long)cno);
		goto failed_sbi;
	}

	if (!(sb->s_flags & MS_RDONLY)) {
		err = nilfs_attach_segment_constructor(sbi);
		if (err)
			goto failed_checkpoint;
	}

	root = nilfs_iget(sb, NILFS_ROOT_INO);
	if (IS_ERR(root)) {
		printk(KERN_ERR "NILFS: get root inode failed\n");
		err = PTR_ERR(root);
		goto failed_segctor;
	}
	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
		iput(root);
		printk(KERN_ERR "NILFS: corrupt root inode.\n");
		err = -EINVAL;
		goto failed_segctor;
	}
	sb->s_root = d_alloc_root(root);
	if (!sb->s_root) {
		iput(root);
		printk(KERN_ERR "NILFS: get root dentry failed\n");
		err = -ENOMEM;
		goto failed_segctor;
	}

	if (!(sb->s_flags & MS_RDONLY)) {
		down_write(&nilfs->ns_sem);
		nilfs_setup_super(sbi);
		up_write(&nilfs->ns_sem);
	}

	err = nilfs_mark_recovery_complete(sbi);
	if (unlikely(err)) {
		printk(KERN_ERR "NILFS: recovery failed.\n");
		goto failed_root;
	}

	return 0;

 failed_root:
	dput(sb->s_root);
	sb->s_root = NULL;

 failed_segctor:
	nilfs_detach_segment_constructor(sbi);

 failed_checkpoint:
	nilfs_detach_checkpoint(sbi);

 failed_sbi:
	put_nilfs(nilfs);
	sb->s_fs_info = NULL;
	kfree(sbi);
	return err;
}
Beispiel #21
0
static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
					      struct inode *inode,
					      unsigned flags)
{
	int ret, needed_blocks;
	handle_t *handle = NULL;
	int retries = 0, sem_held = 0;
	struct page *page = NULL;
	unsigned from, to;
	struct ext4_iloc iloc;

	if (!ext4_has_inline_data(inode)) {
		/*
		 * clear the flag so that no new write
		 * will trap here again.
		 */
		ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
		return 0;
	}

	needed_blocks = ext4_writepage_trans_blocks(inode);

	ret = ext4_get_inode_loc(inode, &iloc);
	if (ret)
		return ret;

retry:
	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		handle = NULL;
		goto out;
	}

	/* We cannot recurse into the filesystem as the transaction is already
	 * started */
	flags |= AOP_FLAG_NOFS;

	page = grab_cache_page_write_begin(mapping, 0, flags);
	if (!page) {
		ret = -ENOMEM;
		goto out;
	}

	down_write(&EXT4_I(inode)->xattr_sem);
	sem_held = 1;
	/* If some one has already done this for us, just exit. */
	if (!ext4_has_inline_data(inode)) {
		ret = 0;
		goto out;
	}

	from = 0;
	to = ext4_get_inline_size(inode);
	if (!PageUptodate(page)) {
		ret = ext4_read_inline_page(inode, page);
		if (ret < 0)
			goto out;
	}

	ret = ext4_destroy_inline_data_nolock(handle, inode);
	if (ret)
		goto out;

	if (ext4_should_dioread_nolock(inode))
		ret = __block_write_begin(page, from, to, ext4_get_block_write);
	else
		ret = __block_write_begin(page, from, to, ext4_get_block);

	if (!ret && ext4_should_journal_data(inode)) {
		ret = ext4_walk_page_buffers(handle, page_buffers(page),
					     from, to, NULL,
					     do_journal_get_write_access);
	}

	if (ret) {
		unlock_page(page);
		page_cache_release(page);
		page = NULL;
		ext4_orphan_add(handle, inode);
		up_write(&EXT4_I(inode)->xattr_sem);
		sem_held = 0;
		ext4_journal_stop(handle);
		handle = NULL;
		ext4_truncate_failed_write(inode);
		/*
		 * If truncate failed early the inode might
		 * still be on the orphan list; we need to
		 * make sure the inode is removed from the
		 * orphan list in that case.
		 */
		if (inode->i_nlink)
			ext4_orphan_del(NULL, inode);
	}

	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
		goto retry;

	if (page)
		block_commit_write(page, from, to);
out:
	if (page) {
		unlock_page(page);
		page_cache_release(page);
	}
	if (sem_held)
		up_write(&EXT4_I(inode)->xattr_sem);
	if (handle)
		ext4_journal_stop(handle);
	brelse(iloc.bh);
	return ret;
}
Beispiel #22
0
static int nilfs_remount(struct super_block *sb, int *flags, char *data)
{
	struct nilfs_sb_info *sbi = NILFS_SB(sb);
	struct nilfs_super_block *sbp;
	struct the_nilfs *nilfs = sbi->s_nilfs;
	unsigned long old_sb_flags;
	struct nilfs_mount_options old_opts;
	int err;

	old_sb_flags = sb->s_flags;
	old_opts.mount_opt = sbi->s_mount_opt;
	old_opts.snapshot_cno = sbi->s_snapshot_cno;

	if (!parse_options(data, sb)) {
		err = -EINVAL;
		goto restore_opts;
	}
	sb->s_flags = (sb->s_flags & ~MS_POSIXACL);

	if ((*flags & MS_RDONLY) &&
	    sbi->s_snapshot_cno != old_opts.snapshot_cno) {
		printk(KERN_WARNING "NILFS (device %s): couldn't "
		       "remount to a different snapshot. \n",
		       sb->s_id);
		err = -EINVAL;
		goto restore_opts;
	}

	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
		goto out;
	if (*flags & MS_RDONLY) {
		/* Shutting down the segment constructor */
		nilfs_detach_segment_constructor(sbi);
		sb->s_flags |= MS_RDONLY;

		sbi->s_snapshot_cno = nilfs_last_cno(nilfs);
		/* nilfs_set_opt(sbi, SNAPSHOT); */

		/*
		 * Remounting a valid RW partition RDONLY, so set
		 * the RDONLY flag and then mark the partition as valid again.
		 */
		down_write(&nilfs->ns_sem);
		sbp = nilfs->ns_sbp[0];
		if (!(sbp->s_state & le16_to_cpu(NILFS_VALID_FS)) &&
		    (nilfs->ns_mount_state & NILFS_VALID_FS))
			sbp->s_state = cpu_to_le16(nilfs->ns_mount_state);
		sbp->s_mtime = cpu_to_le64(get_seconds());
		nilfs_commit_super(sbi, 1);
		up_write(&nilfs->ns_sem);
	} else {
		/*
		 * Mounting a RDONLY partition read-write, so reread and
		 * store the current valid flag.  (It may have been changed
		 * by fsck since we originally mounted the partition.)
		 */
		down(&sb->s_bdev->bd_mount_sem);
		/* Check existing RW-mount */
		if (test_exclusive_mount(sb->s_type, sb->s_bdev, 0)) {
			printk(KERN_WARNING "NILFS (device %s): couldn't "
			       "remount because a RW-mount exists.\n",
			       sb->s_id);
			err = -EBUSY;
			goto rw_remount_failed;
		}
		if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) {
			printk(KERN_WARNING "NILFS (device %s): couldn't "
			       "remount because the current RO-mount is not "
			       "the latest one.\n",
			       sb->s_id);
			err = -EINVAL;
			goto rw_remount_failed;
		}
		sb->s_flags &= ~MS_RDONLY;
		nilfs_clear_opt(sbi, SNAPSHOT);
		sbi->s_snapshot_cno = 0;

		err = nilfs_attach_segment_constructor(sbi);
		if (err)
			goto rw_remount_failed;

		down_write(&nilfs->ns_sem);
		nilfs_setup_super(sbi);
		up_write(&nilfs->ns_sem);

		up(&sb->s_bdev->bd_mount_sem);
	}
 out:
	return 0;

 rw_remount_failed:
	up(&sb->s_bdev->bd_mount_sem);
 restore_opts:
	sb->s_flags = old_sb_flags;
	sbi->s_mount_opt = old_opts.mount_opt;
	sbi->s_snapshot_cno = old_opts.snapshot_cno;
	return err;
}
Beispiel #23
0
/*
 * We need to be able to change a mapping table under a mounted
 * filesystem.  For example we might want to move some data in
 * the background.  Before the table can be swapped with
 * dm_bind_table, dm_suspend must be called to flush any in
 * flight bios and ensure that any further io gets deferred.
 */
int dm_suspend(struct mapped_device *md)
{
	struct dm_table *map;
	DECLARE_WAITQUEUE(wait, current);

	/* Flush I/O to the device. */
	down_read(&md->lock);
	if (test_bit(DMF_BLOCK_IO, &md->flags)) {
		up_read(&md->lock);
		return -EINVAL;
	}

	map = dm_get_table(md);
	if (map)
		dm_table_presuspend_targets(map);
	__lock_fs(md);

	up_read(&md->lock);

	/*
	 * First we set the BLOCK_IO flag so no more ios will be
	 * mapped.
	 */
	down_write(&md->lock);
	if (test_bit(DMF_BLOCK_IO, &md->flags)) {
		/*
		 * If we get here we know another thread is
		 * trying to suspend as well, so we leave the fs
		 * locked for this thread.
		 */
		up_write(&md->lock);
		return -EINVAL;
	}

	set_bit(DMF_BLOCK_IO, &md->flags);
	add_wait_queue(&md->wait, &wait);
	up_write(&md->lock);

	/* unplug */
	if (map) {
		dm_table_unplug_all(map);
		dm_table_put(map);
	}

	/*
	 * Then we wait for the already mapped ios to
	 * complete.
	 */
	while (1) {
		set_current_state(TASK_INTERRUPTIBLE);

		if (!atomic_read(&md->pending) || signal_pending(current))
			break;

		io_schedule();
	}
	set_current_state(TASK_RUNNING);

	down_write(&md->lock);
	remove_wait_queue(&md->wait, &wait);

	/* were we interrupted ? */
	if (atomic_read(&md->pending)) {
		__unlock_fs(md);
		clear_bit(DMF_BLOCK_IO, &md->flags);
		up_write(&md->lock);
		return -EINTR;
	}

	set_bit(DMF_SUSPENDED, &md->flags);

	map = dm_get_table(md);
	if (map)
		dm_table_postsuspend_targets(map);
	dm_table_put(map);
	up_write(&md->lock);

	return 0;
}
Beispiel #24
0
int
osi_UFSTruncate(struct osi_file *afile, afs_int32 asize)
{
    afs_int32 code;
    struct osi_stat tstat;
    struct iattr newattrs;
    struct inode *inode = OSIFILE_INODE(afile);
    AFS_STATCNT(osi_Truncate);

    /* This routine only shrinks files, and most systems
     * have very slow truncates, even when the file is already
     * small enough.  Check now and save some time.
     */
    code = afs_osi_Stat(afile, &tstat);
    if (code || tstat.size <= asize)
	return code;
    ObtainWriteLock(&afs_xosi, 321);
    AFS_GUNLOCK();
#ifdef STRUCT_INODE_HAS_I_ALLOC_SEM
    down_write(&inode->i_alloc_sem);
#endif
#ifdef STRUCT_INODE_HAS_I_MUTEX
    mutex_lock(&inode->i_mutex);
#else
    down(&inode->i_sem);
#endif
    newattrs.ia_size = asize;
    newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
#if defined(AFS_LINUX24_ENV)
    newattrs.ia_ctime = CURRENT_TIME;

    /* avoid notify_change() since it wants to update dentry->d_parent */
    lock_kernel();
    code = inode_change_ok(inode, &newattrs);
    if (!code) {
#ifdef INODE_SETATTR_NOT_VOID
	code = inode_setattr(inode, &newattrs);
#else
        inode_setattr(inode, &newattrs);
#endif
    }
    unlock_kernel();
    if (!code)
	truncate_inode_pages(&inode->i_data, asize);
#else
    i_size_write(inode, asize);
    if (inode->i_sb->s_op && inode->i_sb->s_op->notify_change) {
	code = inode->i_sb->s_op->notify_change(&afile->dentry, &newattrs);
    }
    if (!code) {
	truncate_inode_pages(inode, asize);
	if (inode->i_op && inode->i_op->truncate)
	    inode->i_op->truncate(inode);
    }
#endif
    code = -code;
#ifdef STRUCT_INODE_HAS_I_MUTEX
    mutex_unlock(&inode->i_mutex);
#else
    up(&inode->i_sem);
#endif
#ifdef STRUCT_INODE_HAS_I_ALLOC_SEM
    up_write(&inode->i_alloc_sem);
#endif
    AFS_GLOCK();
    ReleaseWriteLock(&afs_xosi);
    return code;
}
Beispiel #25
0
struct dentry *f2fs_get_parent(struct dentry *child)
{
	struct qstr dotdot = {.len = 2, .name = ".."};
	unsigned long ino = f2fs_inode_by_name(child->d_inode, &dotdot);
	if (!ino)
		return ERR_PTR(-ENOENT);
	return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino));
}

static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
					struct nameidata *nd)
{
	struct inode *inode = NULL;
	struct f2fs_dir_entry *de;
	struct page *page;

	if (dentry->d_name.len > F2FS_NAME_LEN)
		return ERR_PTR(-ENAMETOOLONG);

	de = f2fs_find_entry(dir, &dentry->d_name, &page, nd ? nd->flags : 0);
	if (de) {
		nid_t ino = le32_to_cpu(de->ino);
		if (!f2fs_has_inline_dentry(dir))
			kunmap(page);
		f2fs_put_page(page, 0);

		inode = f2fs_iget(dir->i_sb, ino);
		if (IS_ERR(inode))
			return ERR_CAST(inode);
	}

	return d_splice_alias(inode, dentry);
}

static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
	struct inode *inode = dentry->d_inode;
	struct f2fs_dir_entry *de;
	struct page *page;
	int err = -ENOENT;

	trace_f2fs_unlink_enter(dir, dentry);
	f2fs_balance_fs(sbi);

	de = f2fs_find_entry(dir, &dentry->d_name, &page, 0);
	if (!de)
		goto fail;

	f2fs_lock_op(sbi);
	err = acquire_orphan_inode(sbi);
	if (err) {
		f2fs_unlock_op(sbi);
		if (!f2fs_has_inline_dentry(dir))
			kunmap(page);
		f2fs_put_page(page, 0);
		goto fail;
	}
	f2fs_delete_entry(de, page, dir, inode);
	f2fs_unlock_op(sbi);

	/* In order to evict this inode, we set it dirty */
	mark_inode_dirty(inode);
fail:
	trace_f2fs_unlink_exit(inode, err);
	return err;
}

static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
					const char *symname)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
	struct inode *inode;
	size_t symlen = strlen(symname) + 1;
	int err;

	f2fs_balance_fs(sbi);

	inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
	if (IS_ERR(inode))
		return PTR_ERR(inode);

	inode->i_op = &f2fs_symlink_inode_operations;
	inode->i_mapping->a_ops = &f2fs_dblock_aops;

	f2fs_lock_op(sbi);
	err = f2fs_add_link(dentry, inode);
	if (err)
		goto out;
	f2fs_unlock_op(sbi);

	err = page_symlink(inode, symname, symlen);
	alloc_nid_done(sbi, inode->i_ino);

	d_instantiate(dentry, inode);
	unlock_new_inode(inode);
	return err;
out:
	handle_failed_inode(inode);
	return err;
}

static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
	struct inode *inode;
	int err;

	f2fs_balance_fs(sbi);

	inode = f2fs_new_inode(dir, S_IFDIR | mode);
	if (IS_ERR(inode))
		return PTR_ERR(inode);

	inode->i_op = &f2fs_dir_inode_operations;
	inode->i_fop = &f2fs_dir_operations;
	inode->i_mapping->a_ops = &f2fs_dblock_aops;
	mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);

	set_inode_flag(F2FS_I(inode), FI_INC_LINK);
	f2fs_lock_op(sbi);
	err = f2fs_add_link(dentry, inode);
	if (err)
		goto out_fail;
	f2fs_unlock_op(sbi);

	stat_inc_inline_dir(inode);
	alloc_nid_done(sbi, inode->i_ino);

	d_instantiate(dentry, inode);
	unlock_new_inode(inode);

	return 0;

out_fail:
	clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
	handle_failed_inode(inode);
	return err;
}

static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
{
	struct inode *inode = dentry->d_inode;
	if (f2fs_empty_dir(inode))
		return f2fs_unlink(dir, dentry);
	return -ENOTEMPTY;
}

static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
				umode_t mode, dev_t rdev)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
	struct inode *inode;
	int err = 0;

	if (!new_valid_dev(rdev))
		return -EINVAL;

	f2fs_balance_fs(sbi);

	inode = f2fs_new_inode(dir, mode);
	if (IS_ERR(inode))
		return PTR_ERR(inode);

	init_special_inode(inode, inode->i_mode, rdev);
	inode->i_op = &f2fs_special_inode_operations;

	f2fs_lock_op(sbi);
	err = f2fs_add_link(dentry, inode);
	if (err)
		goto out;
	f2fs_unlock_op(sbi);

	alloc_nid_done(sbi, inode->i_ino);
	d_instantiate(dentry, inode);
	unlock_new_inode(inode);
	return 0;
out:
	handle_failed_inode(inode);
	return err;
}

static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
			struct inode *new_dir, struct dentry *new_dentry)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir);
	struct inode *old_inode = old_dentry->d_inode;
	struct inode *new_inode = new_dentry->d_inode;
	struct page *old_dir_page;
	struct page *old_page, *new_page;
	struct f2fs_dir_entry *old_dir_entry = NULL;
	struct f2fs_dir_entry *old_entry;
	struct f2fs_dir_entry *new_entry;
	int err = -ENOENT;

	f2fs_balance_fs(sbi);

	old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page, 0);
	if (!old_entry)
		goto out;

	if (S_ISDIR(old_inode->i_mode)) {
		err = -EIO;
		old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page);
		if (!old_dir_entry)
			goto out_old;
	}

	if (new_inode) {

		err = -ENOTEMPTY;
		if (old_dir_entry && !f2fs_empty_dir(new_inode))
			goto out_dir;

		err = -ENOENT;
		new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name,
						&new_page, 0);
		if (!new_entry)
			goto out_dir;

		f2fs_lock_op(sbi);

		err = acquire_orphan_inode(sbi);
		if (err)
			goto put_out_dir;

		if (update_dent_inode(old_inode, &new_dentry->d_name)) {
			release_orphan_inode(sbi);
			goto put_out_dir;
		}

		f2fs_set_link(new_dir, new_entry, new_page, old_inode);

		new_inode->i_ctime = CURRENT_TIME;
		down_write(&F2FS_I(new_inode)->i_sem);
		if (old_dir_entry)
			drop_nlink(new_inode);
		drop_nlink(new_inode);
		up_write(&F2FS_I(new_inode)->i_sem);

		mark_inode_dirty(new_inode);

		if (!new_inode->i_nlink)
			add_orphan_inode(sbi, new_inode->i_ino);
		else
			release_orphan_inode(sbi);

		update_inode_page(old_inode);
		update_inode_page(new_inode);
	} else {
		f2fs_lock_op(sbi);

		err = f2fs_add_link(new_dentry, old_inode);
		if (err) {
			f2fs_unlock_op(sbi);
			goto out_dir;
		}

		if (old_dir_entry) {
			inc_nlink(new_dir);
			update_inode_page(new_dir);
		}
	}

	down_write(&F2FS_I(old_inode)->i_sem);
	file_lost_pino(old_inode);
	up_write(&F2FS_I(old_inode)->i_sem);

	old_inode->i_ctime = CURRENT_TIME;
	mark_inode_dirty(old_inode);

	f2fs_delete_entry(old_entry, old_page, old_dir, NULL);

	if (old_dir_entry) {
		if (old_dir != new_dir) {
			f2fs_set_link(old_inode, old_dir_entry,
						old_dir_page, new_dir);
			update_inode_page(old_inode);
		} else {
			if (!f2fs_has_inline_dentry(old_inode))
				kunmap(old_dir_page);
			f2fs_put_page(old_dir_page, 0);
		}
		drop_nlink(old_dir);
		mark_inode_dirty(old_dir);
		update_inode_page(old_dir);
	}

	f2fs_unlock_op(sbi);
	return 0;

put_out_dir:
	f2fs_unlock_op(sbi);
	if (!f2fs_has_inline_dentry(new_dir))
		kunmap(new_page);
	f2fs_put_page(new_page, 0);
out_dir:
	if (old_dir_entry) {
		if (!f2fs_has_inline_dentry(old_inode))
			kunmap(old_dir_page);
		f2fs_put_page(old_dir_page, 0);
	}
out_old:
	if (!f2fs_has_inline_dentry(old_dir))
		kunmap(old_page);
	f2fs_put_page(old_page, 0);
out:
	return err;
}

const struct inode_operations f2fs_dir_inode_operations = {
	.create		= f2fs_create,
	.lookup		= f2fs_lookup,
	.link		= f2fs_link,
	.unlink		= f2fs_unlink,
	.symlink	= f2fs_symlink,
	.mkdir		= f2fs_mkdir,
	.rmdir		= f2fs_rmdir,
	.mknod		= f2fs_mknod,
	.rename		= f2fs_rename,
	.getattr	= f2fs_getattr,
	.setattr	= f2fs_setattr,
	.get_acl	= f2fs_get_acl,
#ifdef CONFIG_F2FS_FS_XATTR
	.setxattr	= generic_setxattr,
	.getxattr	= generic_getxattr,
	.listxattr	= f2fs_listxattr,
	.removexattr	= generic_removexattr,
#endif
};

const struct inode_operations f2fs_symlink_inode_operations = {
	.readlink       = generic_readlink,
	.follow_link    = page_follow_link_light,
	.put_link       = page_put_link,
	.getattr	= f2fs_getattr,
	.setattr	= f2fs_setattr,
#ifdef CONFIG_F2FS_FS_XATTR
	.setxattr	= generic_setxattr,
	.getxattr	= generic_getxattr,
	.listxattr	= f2fs_listxattr,
	.removexattr	= generic_removexattr,
#endif
};

const struct inode_operations f2fs_special_inode_operations = {
	.getattr	= f2fs_getattr,
	.setattr        = f2fs_setattr,
	.get_acl	= f2fs_get_acl,
#ifdef CONFIG_F2FS_FS_XATTR
	.setxattr       = generic_setxattr,
	.getxattr       = generic_getxattr,
	.listxattr	= f2fs_listxattr,
	.removexattr    = generic_removexattr,
#endif
};
Beispiel #26
0
static int
islpci_reset_if(islpci_private *priv)
{
	long remaining;
	int result = -ETIME;
	int count;

	DEFINE_WAIT(wait);
	prepare_to_wait(&priv->reset_done, &wait, TASK_UNINTERRUPTIBLE);
	
	/* now the last step is to reset the interface */
	isl38xx_interface_reset(priv->device_base, priv->device_host_address);
	islpci_set_state(priv, PRV_STATE_PREINIT);

        for(count = 0; count < 2 && result; count++) {
		/* The software reset acknowledge needs about 220 msec here.
		 * Be conservative and wait for up to one second. */
	
		set_current_state(TASK_UNINTERRUPTIBLE);
		remaining = schedule_timeout(HZ);

		if(remaining > 0) {
			result = 0;
			break;
		}

		/* If we're here it's because our IRQ hasn't yet gone through. 
		 * Retry a bit more...
		 */
		printk(KERN_ERR "%s: no 'reset complete' IRQ seen - retrying\n",
			priv->ndev->name);
	}

	finish_wait(&priv->reset_done, &wait);

	if (result) {
		printk(KERN_ERR "%s: interface reset failure\n", priv->ndev->name);
		return result;
	}

	islpci_set_state(priv, PRV_STATE_INIT);

	/* Now that the device is 100% up, let's allow
	 * for the other interrupts --
	 * NOTE: this is not *yet* true since we've only allowed the 
	 * INIT interrupt on the IRQ line. We can perhaps poll
	 * the IRQ line until we know for sure the reset went through */
	isl38xx_enable_common_interrupts(priv->device_base);

	down_write(&priv->mib_sem);
	result = mgt_commit(priv);
	if (result) {
		printk(KERN_ERR "%s: interface reset failure\n", priv->ndev->name);
		up_write(&priv->mib_sem);
		return result;
	}
	up_write(&priv->mib_sem);

	islpci_set_state(priv, PRV_STATE_READY);

	printk(KERN_DEBUG "%s: interface reset complete\n", priv->ndev->name);
	return 0;
}
Beispiel #27
0
void ocfs2_clear_inode(struct inode *inode)
{
	int status;
	struct ocfs2_inode_info *oi = OCFS2_I(inode);

	mlog_entry_void();

	if (!inode)
		goto bail;

	mlog(0, "Clearing inode: %llu, nlink = %u\n",
	     (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink);

	mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
			"Inode=%lu\n", inode->i_ino);

	/* To preven remote deletes we hold open lock before, now it
	 * is time to unlock PR and EX open locks. */
	ocfs2_open_unlock(inode);

	/* Do these before all the other work so that we don't bounce
	 * the downconvert thread while waiting to destroy the locks. */
	ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres);
	ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres);
	ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);

	/* We very well may get a clear_inode before all an inodes
	 * metadata has hit disk. Of course, we can't drop any cluster
	 * locks until the journal has finished with it. The only
	 * exception here are successfully wiped inodes - their
	 * metadata can now be considered to be part of the system
	 * inodes from which it came. */
	if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED))
		ocfs2_checkpoint_inode(inode);

	mlog_bug_on_msg(!list_empty(&oi->ip_io_markers),
			"Clear inode of %llu, inode has io markers\n",
			(unsigned long long)oi->ip_blkno);

	ocfs2_extent_map_trunc(inode, 0);

	status = ocfs2_drop_inode_locks(inode);
	if (status < 0)
		mlog_errno(status);

	ocfs2_lock_res_free(&oi->ip_rw_lockres);
	ocfs2_lock_res_free(&oi->ip_inode_lockres);
	ocfs2_lock_res_free(&oi->ip_open_lockres);

	ocfs2_metadata_cache_purge(inode);

	mlog_bug_on_msg(oi->ip_metadata_cache.ci_num_cached,
			"Clear inode of %llu, inode has %u cache items\n",
			(unsigned long long)oi->ip_blkno, oi->ip_metadata_cache.ci_num_cached);

	mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE),
			"Clear inode of %llu, inode has a bad flag\n",
			(unsigned long long)oi->ip_blkno);

	mlog_bug_on_msg(spin_is_locked(&oi->ip_lock),
			"Clear inode of %llu, inode is locked\n",
			(unsigned long long)oi->ip_blkno);

	mlog_bug_on_msg(!mutex_trylock(&oi->ip_io_mutex),
			"Clear inode of %llu, io_mutex is locked\n",
			(unsigned long long)oi->ip_blkno);
	mutex_unlock(&oi->ip_io_mutex);

	/*
	 * down_trylock() returns 0, down_write_trylock() returns 1
	 * kernel 1, world 0
	 */
	mlog_bug_on_msg(!down_write_trylock(&oi->ip_alloc_sem),
			"Clear inode of %llu, alloc_sem is locked\n",
			(unsigned long long)oi->ip_blkno);
	up_write(&oi->ip_alloc_sem);

	mlog_bug_on_msg(oi->ip_open_count,
			"Clear inode of %llu has open count %d\n",
			(unsigned long long)oi->ip_blkno, oi->ip_open_count);

	/* Clear all other flags. */
	oi->ip_flags = OCFS2_INODE_CACHE_INLINE;
	oi->ip_created_trans = 0;
	oi->ip_last_trans = 0;
	oi->ip_dir_start_lookup = 0;
	oi->ip_blkno = 0ULL;

	/*
	 * ip_jinode is used to track txns against this inode. We ensure that
	 * the journal is flushed before journal shutdown. Thus it is safe to
	 * have inodes get cleaned up after journal shutdown.
	 */
	jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal,
				       &oi->ip_jinode);

bail:
	mlog_exit_void();
}
Beispiel #28
0
static void serio_release_driver(struct serio *serio)
{
	down_write(&serio_bus.subsys.rwsem);
	device_release_driver(&serio->dev);
	up_write(&serio_bus.subsys.rwsem);
}
Beispiel #29
0
/* SunOS is completely broken... it returns 0 on success, otherwise
 * ENOMEM.  For sys_sbrk() it wants the old brk value as a return
 * on success and ENOMEM as before on failure.
 */
asmlinkage int sunos_brk(unsigned long brk)
{
    int freepages, retval = -ENOMEM;
    unsigned long rlim;
    unsigned long newbrk, oldbrk;

    down_write(&current->mm->mmap_sem);
    if (ARCH_SUN4C_SUN4) {
        if (brk >= 0x20000000 && brk < 0xe0000000) {
            goto out;
        }
    }

    if (brk < current->mm->end_code)
        goto out;

    newbrk = PAGE_ALIGN(brk);
    oldbrk = PAGE_ALIGN(current->mm->brk);
    retval = 0;
    if (oldbrk == newbrk) {
        current->mm->brk = brk;
        goto out;
    }

    /*
     * Always allow shrinking brk
     */
    if (brk <= current->mm->brk) {
        current->mm->brk = brk;
        do_munmap(current->mm, newbrk, oldbrk-newbrk);
        goto out;
    }
    /*
     * Check against rlimit and stack..
     */
    retval = -ENOMEM;
    rlim = current->rlim[RLIMIT_DATA].rlim_cur;
    if (rlim >= RLIM_INFINITY)
        rlim = ~0;
    if (brk - current->mm->end_code > rlim)
        goto out;

    /*
     * Check against existing mmap mappings.
     */
    if (find_vma_intersection(current->mm, oldbrk, newbrk+PAGE_SIZE))
        goto out;

    /*
     * stupid algorithm to decide if we have enough memory: while
     * simple, it hopefully works in most obvious cases.. Easy to
     * fool it, but this should catch most mistakes.
     */
    freepages = get_page_cache_size();
    freepages >>= 1;
    freepages += nr_free_pages();
    freepages += nr_swap_pages;
    freepages -= num_physpages >> 4;
    freepages -= (newbrk-oldbrk) >> PAGE_SHIFT;
    if (freepages < 0)
        goto out;
    /*
     * Ok, we have probably got enough memory - let it rip.
     */
    current->mm->brk = brk;
    do_brk(oldbrk, newbrk-oldbrk);
    retval = 0;
out:
    up_write(&current->mm->mmap_sem);
    return retval;
}
Beispiel #30
0
/* This is really VOP_SETATTR() in sheep's clothing */
int
vnode_iop_notify_change(
    DENT_T *dent_p,
    struct iattr * iattr_p
)
{
    VNODE_T *vp;
    VATTR_T *vap;
    VNODE_T *cvp;
    int err = 0;
    DENT_T *rdent;
    CALL_DATA_T cd;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
    mdki_boolean_t tooksem = FALSE;
#endif

    if (iattr_p->ia_valid & ATTR_SIZE) {
        ASSERT_I_SEM_MINE(dent_p->d_inode);
    }

    if (MDKI_INOISMVFS(dent_p->d_inode)) {
        vap = VATTR_ALLOC();
	if (vap != NULL) {
            vnode_iop_iattr2vattr(iattr_p, vap);

            /* reject attempts to use setattr to change object type */
            vap->va_mask &= ~AT_TYPE;
            mdki_linux_init_call_data(&cd);
            vp = ITOV(dent_p->d_inode);
            err = VOP_SETATTR(vp, vap, 0, &cd);
            err = mdki_errno_unix_to_linux(err);
            /* Any underlying cleartxt got its inode truncated via changeattr
             * if there's a need to change its size.
             */
            if (!err)
                mdki_linux_vattr_pullup(vp, vap, vap->va_mask);
            VATTR_FREE(vap);
            mdki_linux_destroy_call_data(&cd);
	} else {
	    err = -ENOMEM;
	}
    } else {
        rdent = REALDENTRY_LOCKED(dent_p, &cvp);
        VNODE_DGET(rdent);
        if (rdent && rdent->d_inode) {
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
            err = inode_setattr(dent_p->d_inode, iattr_p);
            if (err == 0) {
                if (iattr_p->ia_valid & ATTR_SIZE) {
                    LOCK_INODE(rdent->d_inode);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13)
#if !defined RHEL_UPDATE || RHEL_UPDATE < 5
                    down_write(&rdent->d_inode->i_alloc_sem);
#endif
#endif
                    /*
                     * be paranoid and record the 'taken'ness in case
                     * the called function squashes ia_valid (as is
                     * done in nfs_setattr).
                     */
                    tooksem = TRUE;
                }
                err = MDKI_NOTIFY_CHANGE(rdent, CVN_TO_VFSMNT(cvp), iattr_p);
                if (tooksem) {
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13)
#if !defined(RHEL_UPDATE) || RHEL_UPDATE < 5
                    up_write(&rdent->d_inode->i_alloc_sem);
#endif
#endif
                    UNLOCK_INODE(rdent->d_inode);
                }
            }
#else /* LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) */
            err = simple_setattr(dent_p, iattr_p);
            if (err == 0)
                err = MDKI_NOTIFY_CHANGE(rdent, CVN_TO_VFSMNT(cvp), iattr_p);
#endif /* else LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) */
	} else {
            /* It looks as though someone removed the realdentry on us.
	     * I am not sure why this should happen.
	     */
            err = -ENOENT;
        }
        if (rdent) {
            VNODE_DPUT(rdent);
	    REALDENTRY_UNLOCK(dent_p, cvp);
        }
    }
    return err;
}