Exemple #1
0
/**
 * ntfs_pagein - read a range of pages into memory
 * @ni:		ntfs inode whose data to read into the page range
 * @attr_ofs:	byte offset in the inode at which to start
 * @size:	number of bytes to read from the inode
 * @upl:	page list describing destination page range
 * @upl_ofs:	byte offset into page list at which to start
 * @flags:	flags further describing the pagein request
 *
 * Read @size bytes from the ntfs inode @ni, starting at byte offset @attr_ofs
 * into the inode, into the range of pages specified by the page list @upl,
 * starting at byte offset @upl_ofs into the page list.
 *
 * The @flags further describe the pagein request.  The following pagein flags
 * are currently defined in OSX kernel:
 *	UPL_IOSYNC	- Perform synchronous i/o.
 *	UPL_NOCOMMIT	- Do not commit/abort the page range.
 *	UPL_NORDAHEAD	- Do not perform any speculative read-ahead.
 *	IO_PASSIVE	- This is background i/o so do not throttle other i/o.
 *
 * Inside the ntfs driver we have the need to perform pageins whilst the inode
 * is locked for writing (@ni->lock) thus we cheat and set UPL_NESTED_PAGEOUT
 * in @flags when this is the case.  We make sure to clear it in @flags before
 * calling into the cluster layer so we do not accidentally cause confusion.
 *
 * For encrypted attributes we abort for now as we do not support them yet.
 *
 * For non-resident, non-compressed attributes we use cluster_pagein_ext()
 * which deals with both normal and multi sector transfer protected attributes.
 *
 * For resident attributes and non-resident, compressed attributes we read the
 * data ourselves by mapping the page list, and in the resident case, mapping
 * the mft record, looking up the attribute in it, and copying the requested
 * data from the mapped attribute into the page list, then unmapping the mft
 * record, whilst for non-resident, compressed attributes, we get the raw inode
 * and use it with ntfs_read_compressed() to read and decompress the data into
 * our mapped page list.  We then unmap the page list and finally, if
 * UPL_NOCOMMIT is not specified, we commit (success) or abort (error) the page
 * range.
 *
 * Return 0 on success and errno on error.
 *
 * Note the pages in the page list are marked busy on entry and the busy bit is
 * cleared when we commit the page range.  Thus it is perfectly safe for us to
 * fill the pages with encrypted or mst protected data and to decrypt or mst
 * deprotect in place before committing the page range.
 *
 * Adapted from cluster_pagein_ext().
 *
 * Locking: - Caller must hold an iocount reference on the vnode of @ni.
 *	    - Caller must not hold @ni->lock or if it is held it must be for
 *	      reading unless UPL_NESTED_PAGEOUT is set in @flags in which case
 *	      the caller must hold @ni->lock for reading or writing.
 */
int ntfs_pagein(ntfs_inode *ni, s64 attr_ofs, unsigned size, upl_t upl,
		upl_offset_t upl_ofs, int flags)
{
	s64 attr_size;
	u8 *kaddr;
	kern_return_t kerr;
	unsigned to_read;
	int err;
	BOOL locked = FALSE;

	ntfs_debug("Entering for mft_no 0x%llx, offset 0x%llx, size 0x%x, "
			"pagein flags 0x%x, page list offset 0x%llx.",
			(unsigned long long)ni->mft_no,
			(unsigned long long)attr_ofs, size, flags,
			(unsigned long long)upl_ofs);
	/*
	 * If the caller did not specify any i/o, then we are done.  We cannot
	 * issue an abort because we do not have a upl or we do not know its
	 * size.
	 */
	if (!upl) {
		ntfs_error(ni->vol->mp, "NULL page list passed in (error "
				"EINVAL).");
		return EINVAL;
	}
	if (S_ISDIR(ni->mode)) {
		ntfs_error(ni->vol->mp, "Called for directory vnode.");
		err = EISDIR;
		goto err;
	}
	/*
	 * Protect against changes in initialized_size and thus against
	 * truncation also unless UPL_NESTED_PAGEOUT is set in which case the
	 * caller has already taken @ni->lock for exclusive access.  We simply
	 * leave @locked to be FALSE in this case so we do not try to drop the
	 * lock later on.
	 *
	 * If UPL_NESTED_PAGEOUT is set we clear it in @flags to ensure we do
	 * not cause confusion in the cluster layer or the VM.
	 */
	if (flags & UPL_NESTED_PAGEOUT)
		flags &= ~UPL_NESTED_PAGEOUT;
	else {
		locked = TRUE;
		lck_rw_lock_shared(&ni->lock);
	}
	/* Do not allow messing with the inode once it has been deleted. */
	if (NInoDeleted(ni)) {
		/* Remove the inode from the name cache. */
		cache_purge(ni->vn);
		err = ENOENT;
		goto err;
	}
retry_pagein:
	/*
	 * We guarantee that the size in the ubc will be smaller or equal to
	 * the size in the ntfs inode thus no need to check @ni->data_size.
	 */
	attr_size = ubc_getsize(ni->vn);
	/*
	 * Only $DATA attributes can be encrypted/compressed.  Index root can
	 * have the flags set but this means to create compressed/encrypted
	 * files, not that the attribute is compressed/encrypted.  Note we need
	 * to check for AT_INDEX_ALLOCATION since this is the type of directory
	 * index inodes.
	 */
	if (ni->type != AT_INDEX_ALLOCATION) {
		/* TODO: Deny access to encrypted attributes, just like NT4. */
		if (NInoEncrypted(ni)) {
			if (ni->type != AT_DATA)
				panic("%s(): Encrypted non-data attribute.\n",
						__FUNCTION__);
			ntfs_warning(ni->vol->mp, "Denying access to "
					"encrypted attribute (EACCES).");
			err = EACCES;
			goto err;
		}
		/* Compressed data streams need special handling. */
		if (NInoNonResident(ni) && NInoCompressed(ni) && !NInoRaw(ni)) {
			if (ni->type != AT_DATA)
				panic("%s(): Compressed non-data attribute.\n",
						__FUNCTION__);
			goto compressed;
		}
	}
	/* NInoNonResident() == NInoIndexAllocPresent() */
	if (NInoNonResident(ni)) {
		int (*callback)(buf_t, void *);

		callback = NULL;
		if (NInoMstProtected(ni) || NInoEncrypted(ni))
			callback = ntfs_cluster_iodone;
		/* Non-resident, possibly mst protected, attribute. */
		err = cluster_pagein_ext(ni->vn, upl, upl_ofs, attr_ofs, size,
				attr_size, flags, callback, NULL);
		if (!err)
			ntfs_debug("Done (cluster_pagein_ext()).");
		else
			ntfs_error(ni->vol->mp, "Failed (cluster_pagein_ext(), "
					"error %d).", err);
		if (locked)
			lck_rw_unlock_shared(&ni->lock);
		return err;
	}
compressed:
	/*
	 * The attribute is resident and/or compressed.
	 *
	 * Cannot pagein from a negative offset or if we are starting beyond
	 * the end of the attribute or if the attribute offset is not page
	 * aligned or the size requested is not a multiple of PAGE_SIZE.
	 */
	if (attr_ofs < 0 || attr_ofs >= attr_size || attr_ofs & PAGE_MASK_64 ||
			size & PAGE_MASK || upl_ofs & PAGE_MASK) {
		err = EINVAL;
		goto err;
	}
	to_read = size;
	attr_size -= attr_ofs;
	if (to_read > attr_size)
		to_read = attr_size;
	/*
	 * We do not need @attr_size any more so reuse it to hold the number of
	 * bytes available in the attribute starting at offset @attr_ofs up to
	 * a maximum of the requested number of bytes rounded up to a multiple
	 * of the system page size.
	 */
	attr_size = (to_read + PAGE_MASK) & ~PAGE_MASK;
	/* Abort any pages outside the end of the attribute. */
	if (size > attr_size && !(flags & UPL_NOCOMMIT)) {
		ubc_upl_abort_range(upl, upl_ofs + attr_size, size - attr_size,
				UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
		/* Update @size. */
		size = attr_size;
	}
	/* To access the page list contents, we need to map the page list. */
	kerr = ubc_upl_map(upl, (vm_offset_t*)&kaddr);
	if (kerr != KERN_SUCCESS) {
		ntfs_error(ni->vol->mp, "ubc_upl_map() failed (error %d).",
				(int)kerr);
		err = EIO;
		goto err;
	}
	if (!NInoNonResident(ni)) {
		/*
		 * Read the data from the resident attribute into the page
		 * list.
		 */
		err = ntfs_resident_attr_read(ni, attr_ofs, size,
				kaddr + upl_ofs);
		if (err && err != EAGAIN)
			ntfs_error(ni->vol->mp, "ntfs_resident_attr_read() "
					"failed (error %d).", err);
	} else {
		ntfs_inode *raw_ni;
		int ioflags;

		/*
		 * Get the raw inode.  We take the inode lock shared to protect
		 * against concurrent writers as the compressed data is invalid
		 * whilst a write is in progress.
		 */
		err = ntfs_raw_inode_get(ni, LCK_RW_TYPE_SHARED, &raw_ni);
		if (err)
			ntfs_error(ni->vol->mp, "Failed to get raw inode "
					"(error %d).", err);
		else {
			if (!NInoRaw(raw_ni))
				panic("%s(): Requested raw inode but got "
						"non-raw one.\n", __FUNCTION__);
			ioflags = 0;
			if (vnode_isnocache(ni->vn) ||
					vnode_isnocache(raw_ni->vn))
				ioflags |= IO_NOCACHE;
			if (vnode_isnoreadahead(ni->vn) ||
					vnode_isnoreadahead(raw_ni->vn))
				ioflags |= IO_RAOFF;
			err = ntfs_read_compressed(ni, raw_ni, attr_ofs, size,
					kaddr + upl_ofs, NULL, ioflags);
			if (err)
				ntfs_error(ni->vol->mp,
						"ntfs_read_compressed() "
						"failed (error %d).", err);
			lck_rw_unlock_shared(&raw_ni->lock);
			(void)vnode_put(raw_ni->vn);
		}
	}
	kerr = ubc_upl_unmap(upl);
	if (kerr != KERN_SUCCESS) {
		ntfs_error(ni->vol->mp, "ubc_upl_unmap() failed (error %d).",
				(int)kerr);
		if (!err)
			err = EIO;
	}
	if (!err) {
		if (!(flags & UPL_NOCOMMIT)) {
			/* Commit the page range we brought up to date. */
			ubc_upl_commit_range(upl, upl_ofs, size,
					UPL_COMMIT_FREE_ON_EMPTY);
		}
		ntfs_debug("Done (%s).", !NInoNonResident(ni) ?
				"ntfs_resident_attr_read()" :
				"ntfs_read_compressed()");
	} else /* if (err) */ {
		/*
		 * If the attribute was converted to non-resident under our
		 * nose, retry the pagein.
		 *
		 * TODO: This may no longer be possible to happen now that we
		 * lock against changes in initialized size and thus
		 * truncation...  Revisit this issue when the write code has
		 * been written and remove the check + goto if appropriate.
		 */
		if (err == EAGAIN)
			goto retry_pagein;
err:
		if (!(flags & UPL_NOCOMMIT)) {
			int upl_flags = UPL_ABORT_FREE_ON_EMPTY;
			if (err != ENOMEM)
				upl_flags |= UPL_ABORT_ERROR;
			ubc_upl_abort_range(upl, upl_ofs, size, upl_flags);
		}
		ntfs_error(ni->vol->mp, "Failed (error %d).", err);
	}
	if (locked)
		lck_rw_unlock_shared(&ni->lock);
	return err;
}
Exemple #2
0
/**
 * ntfs_page_map_ext - map a page of a vnode into memory
 * @ni:		ntfs inode of which to map a page
 * @ofs:	byte offset into @ni of which to map a page
 * @upl:	destination page list for the page
 * @pl:		destination array of pages containing the page itself
 * @kaddr:	destination pointer for the address of the mapped page contents
 * @uptodate:	if true return an uptodate page and if false return it as is
 * @rw:		if true we intend to modify the page and if false we do not
 *
 * Map the page corresponding to byte offset @ofs into the ntfs inode @ni into
 * memory and return the page list in @upl, the array of pages containing the
 * page in @pl and the address of the mapped page contents in @kaddr.
 *
 * If @uptodate is true the page is returned uptodate, i.e. if the page is
 * currently not valid, it will be brought uptodate via a call to ntfs_pagein()
 * before it is returned.  And if @uptodate is false, the page is just returned
 * ignoring its state.  This means the page may or may not be uptodate.
 *
 * The caller must set @rw to true if the page is going to be modified and to
 * false otherwise.
 *
 * Note: @ofs must be page aligned.
 *
 * Locking: - Caller must hold an iocount reference on the vnode of @ni.
 *	    - Caller must hold @ni->lock for reading or writing.
 *
 * Return 0 on success and errno on error in which case *@upl is set to NULL.
 */
errno_t ntfs_page_map_ext(ntfs_inode *ni, s64 ofs, upl_t *upl,
		upl_page_info_array_t *pl, u8 **kaddr, const BOOL uptodate,
		const BOOL rw)
{
	s64 size;
	kern_return_t kerr;
	int abort_flags;
	errno_t err;

	ntfs_debug("Entering for inode 0x%llx, offset 0x%llx, rw is %s.",
			(unsigned long long)ni->mft_no,
			(unsigned long long)ofs,
			rw ? "true" : "false");
	if (ofs & PAGE_MASK)
		panic("%s() called with non page aligned offset (0x%llx).",
				__FUNCTION__, (unsigned long long)ofs);
	lck_spin_lock(&ni->size_lock);
	size = ubc_getsize(ni->vn);
	if (size > ni->data_size)
		size = ni->data_size;
	lck_spin_unlock(&ni->size_lock);
	if (ofs > size) {
		ntfs_error(ni->vol->mp, "Offset 0x%llx is outside the end of "
				"the attribute (0x%llx).",
				(unsigned long long)ofs,
				(unsigned long long)size);
		err = EINVAL;
		goto err;
	}
	/* Create a page list for the wanted page. */
	kerr = ubc_create_upl(ni->vn, ofs, PAGE_SIZE, upl, pl, UPL_SET_LITE |
			(rw ? UPL_WILL_MODIFY : 0));
	if (kerr != KERN_SUCCESS)
		panic("%s(): Failed to get page (error %d).\n", __FUNCTION__,
				(int)kerr);
	/*
	 * If the page is not valid, need to read it in from the vnode now thus
	 * making it valid.
	 *
	 * We set UPL_NESTED_PAGEOUT to let ntfs_pagein() know that we already
	 * have the inode locked (@ni->lock is held by the caller).
	 */
	if (uptodate && !upl_valid_page(*pl, 0)) {
		ntfs_debug("Reading page as it was not valid.");
		err = ntfs_pagein(ni, ofs, PAGE_SIZE, *upl, 0, UPL_IOSYNC |
				UPL_NOCOMMIT | UPL_NESTED_PAGEOUT);
		if (err) {
			ntfs_error(ni->vol->mp, "Failed to read page (error "
					"%d).", err);
			goto pagein_err;
		}
	}
	/* Map the page into the kernel's address space. */
	kerr = ubc_upl_map(*upl, (vm_offset_t*)kaddr);
	if (kerr == KERN_SUCCESS) {
		ntfs_debug("Done.");
		return 0;
	}
	ntfs_error(ni->vol->mp, "Failed to map page (error %d).",
			(int)kerr);
	err = EIO;
pagein_err:
	abort_flags = UPL_ABORT_FREE_ON_EMPTY;
	if (!upl_valid_page(*pl, 0) ||
			(vnode_isnocache(ni->vn) && !upl_dirty_page(*pl, 0)))
		abort_flags |= UPL_ABORT_DUMP_PAGES;
	ubc_upl_abort_range(*upl, 0, PAGE_SIZE, abort_flags);
err:
	*upl = NULL;
	return err;
}
Exemple #3
0
/* relies on v1 paging */
static int
nullfs_pagein(struct vnop_pagein_args * ap)
{
	int error = EIO;
	struct vnode *vp, *lvp;

	NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp);

	vp  = ap->a_vp;
	lvp = NULLVPTOLOWERVP(vp);

	if (vnode_vtype(vp) != VREG) {
		return ENOTSUP;
	}

	/*
	 * Ask VM/UBC/VFS to do our bidding
	 */
	if (vnode_getwithvid(lvp, NULLVPTOLOWERVID(vp)) == 0) {
		vm_offset_t ioaddr;
		uio_t auio;
		kern_return_t kret;
		off_t bytes_to_commit;
		off_t lowersize;
		upl_t upl      = ap->a_pl;
		user_ssize_t bytes_remaining = 0;

		auio = uio_create(1, ap->a_f_offset, UIO_SYSSPACE, UIO_READ);
		if (auio == NULL) {
			error = EIO;
			goto exit_no_unmap;
		}

		kret = ubc_upl_map(upl, &ioaddr);
		if (KERN_SUCCESS != kret) {
			panic("nullfs_pagein: ubc_upl_map() failed with (%d)", kret);
		}

		ioaddr += ap->a_pl_offset;

		error = uio_addiov(auio, (user_addr_t)ioaddr, ap->a_size);
		if (error) {
			goto exit;
		}

		lowersize = ubc_getsize(lvp);
		if (lowersize != ubc_getsize(vp)) {
			(void)ubc_setsize(vp, lowersize); /* ignore failures, nothing can be done */
		}

		error = VNOP_READ(lvp, auio, ((ap->a_flags & UPL_IOSYNC) ? IO_SYNC : 0), ap->a_context);

		bytes_remaining = uio_resid(auio);
		if (bytes_remaining > 0 && bytes_remaining <= (user_ssize_t)ap->a_size)
		{
			/* zero bytes that weren't read in to the upl */
			bzero((void*)((uintptr_t)(ioaddr + ap->a_size - bytes_remaining)), (size_t) bytes_remaining);
		}

	exit:
		kret = ubc_upl_unmap(upl);
		if (KERN_SUCCESS != kret) {
			panic("nullfs_pagein: ubc_upl_unmap() failed with (%d)", kret);
		}

		if (auio != NULL) {
			uio_free(auio);
		}

	exit_no_unmap:
		if ((ap->a_flags & UPL_NOCOMMIT) == 0) {
			if (!error && (bytes_remaining >= 0) && (bytes_remaining <= (user_ssize_t)ap->a_size)) {
				/* only commit what was read in (page aligned)*/
				bytes_to_commit = ap->a_size - bytes_remaining;
				if (bytes_to_commit)
				{
					/* need to make sure bytes_to_commit and byte_remaining are page aligned before calling ubc_upl_commit_range*/
					if (bytes_to_commit & PAGE_MASK)
					{
						bytes_to_commit = (bytes_to_commit & (~PAGE_MASK)) + (PAGE_MASK + 1);
						assert(bytes_to_commit <= (off_t)ap->a_size);

						bytes_remaining = ap->a_size - bytes_to_commit;
					}
					ubc_upl_commit_range(upl, ap->a_pl_offset, (upl_size_t)bytes_to_commit, UPL_COMMIT_FREE_ON_EMPTY);
				}
				
				/* abort anything thats left */
				if (bytes_remaining) {
					ubc_upl_abort_range(upl, ap->a_pl_offset + bytes_to_commit, (upl_size_t)bytes_remaining, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
				}
			} else {
				ubc_upl_abort_range(upl, ap->a_pl_offset, (upl_size_t)ap->a_size, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
			}
		}
		vnode_put(lvp);
	} else if((ap->a_flags & UPL_NOCOMMIT) == 0) {
		ubc_upl_abort_range(ap->a_pl, ap->a_pl_offset, (upl_size_t)ap->a_size, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
	}
	return error;
}