/** * ntfs_file_fsync - sync a file to disk * @filp: file to be synced * @dentry: dentry describing the file to sync * @datasync: if non-zero only flush user data and not metadata * * Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync * system calls. This function is inspired by fs/buffer.c::file_fsync(). * * If @datasync is false, write the mft record and all associated extent mft * records as well as the $DATA attribute and then sync the block device. * * If @datasync is true and the attribute is non-resident, we skip the writing * of the mft record and all associated extent mft records (this might still * happen due to the write_inode_now() call). * * Also, if @datasync is true, we do not wait on the inode to be written out * but we always wait on the page cache pages to be written out. * * Note: In the past @filp could be NULL so we ignore it as we don't need it * anyway. * * Locking: Caller must hold i_sem on the inode. * * TODO: We should probably also write all attribute/index inodes associated * with this inode but since we have no simple way of getting to them we ignore * this problem for now. */ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry, int datasync) { struct inode *vi = dentry->d_inode; int err, ret = 0; ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); BUG_ON(S_ISDIR(vi->i_mode)); if (!datasync || !NInoNonResident(NTFS_I(vi))) ret = ntfs_write_inode(vi, 1); write_inode_now(vi, !datasync); /* * NOTE: If we were to use mapping->private_list (see ext2 and * fs/buffer.c) for dirty blocks then we could optimize the below to be * sync_mapping_buffers(vi->i_mapping). */ err = sync_blockdev(vi->i_sb->s_bdev); if (unlikely(err && !ret)) ret = err; if (likely(!ret)) ntfs_debug("Done."); else ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error " "%u.", datasync ? "data" : "", vi->i_ino, -ret); return ret; }
/** * ntfs_page_unmap - unmap a page belonging to a vnode from memory * @ni: ntfs inode to which the page belongs * @upl: page list of the page * @pl: array of pages containing the page itself * @mark_dirty: mark the page dirty * * Unmap the page belonging to the ntfs inode @ni from memory releasing it back * to the vm. * * The page is described by the page list @upl, the array of pages containing * the page @pl and the address of the mapped page contents @kaddr. * * If @mark_dirty is TRUE, tell the vm to mark the page dirty when releasing * the page. * * Locking: Caller must hold an iocount reference on the vnode of @ni. */ void ntfs_page_unmap(ntfs_inode *ni, upl_t upl, upl_page_info_array_t pl, const BOOL mark_dirty) { kern_return_t kerr; BOOL was_valid, was_dirty; was_valid = upl_valid_page(pl, 0); /* The page dirty bit is only valid if the page was valid. */ was_dirty = (was_valid && upl_dirty_page(pl, 0)); ntfs_debug("Entering for inode 0x%llx, page was %svalid %s %sdirty%s.", (unsigned long long)ni->mft_no, was_valid ? "" : "not ", (int)was_valid ^ (int)was_dirty ? "but" : "and", was_dirty ? "" : "not ", mark_dirty ? ", marking it dirty" : ""); /* Unmap the page from the kernel's address space. */ kerr = ubc_upl_unmap(upl); if (kerr != KERN_SUCCESS) ntfs_warning(ni->vol->mp, "ubc_upl_unmap() failed (error %d).", (int)kerr); /* * If the page was valid and dirty or is being made dirty or if caching * for the vnode is enabled (as it will usually be the case for all * metadata files), commit it thus releasing it into the vm taking care * to preserve the dirty state and marking the page dirty if requested * when committing the page. * * If the page was not valid or was valid but not dirty, it is not * being marked dirty, and caching is disabled on the vnode, dump the * page. */ if (was_dirty || mark_dirty || !vnode_isnocache(ni->vn)) { int commit_flags; commit_flags = UPL_COMMIT_FREE_ON_EMPTY | UPL_COMMIT_INACTIVATE; if (!was_valid && !mark_dirty) commit_flags |= UPL_COMMIT_CLEAR_DIRTY; else if (was_dirty || mark_dirty) commit_flags |= UPL_COMMIT_SET_DIRTY; ubc_upl_commit_range(upl, 0, PAGE_SIZE, commit_flags); ntfs_debug("Done (committed page)."); } else { ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY); ntfs_debug("Done (dumped page)."); } }
/** * ntfs_page_dump - discard a page belonging to a vnode from memory * @ni: ntfs inode to which the page belongs * @upl: page list of the page * @pl: array of pages containing the page itself * * Unmap the page belonging to the ntfs inode @ni from memory throwing it away. * Note that if the page is dirty all changes to the page will be lost as it * will be discarded so use this function with extreme caution. * * The page is described by the page list @upl, the array of pages containing * the page @pl and the address of the mapped page contents @kaddr. * * Locking: Caller must hold an iocount reference on the vnode of @ni. */ void ntfs_page_dump(ntfs_inode *ni, upl_t upl, upl_page_info_array_t pl __unused) { kern_return_t kerr; ntfs_debug("Entering for inode 0x%llx, page is %svalid, %sdirty.", (unsigned long long)ni->mft_no, upl_valid_page(pl, 0) ? "" : "not ", upl_dirty_page(pl, 0) ? "" : "not "); /* Unmap the page from the kernel's address space. */ kerr = ubc_upl_unmap(upl); if (kerr != KERN_SUCCESS) ntfs_warning(ni->vol->mp, "ubc_upl_unmap() failed (error %d).", (int)kerr); /* Dump the page. */ ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY); ntfs_debug("Done."); }
/** * ntfs_file_fsync - sync a file to disk * @filp: file to be synced * @dentry: dentry describing the file to sync * @datasync: if non-zero only flush user data and not metadata * * Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync * system calls. This function is inspired by fs/buffer.c::file_fsync(). * * If @datasync is false, write the mft record and all associated extent mft * records as well as the $DATA attribute and then sync the block device. * * If @datasync is true and the attribute is non-resident, we skip the writing * of the mft record and all associated extent mft records (this might still * happen due to the write_inode_now() call). * * Also, if @datasync is true, we do not wait on the inode to be written out * but we always wait on the page cache pages to be written out. * * Note: In the past @filp could be NULL so we ignore it as we don't need it * anyway. * * Locking: Caller must hold i_sem on the inode. * * TODO: We should probably also write all attribute/index inodes associated * with this inode but since we have no simple way of getting to them we ignore * this problem for now. */ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry, int datasync) { struct inode *vi = dentry->d_inode; int err, ret = 0; ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); BUG_ON(S_ISDIR(vi->i_mode)); if (!datasync || !NInoNonResident(NTFS_I(vi))) ret = ntfs_write_inode(vi, 1); write_inode_now(vi, !datasync); err = sync_blockdev(vi->i_sb->s_bdev); if (unlikely(err && !ret)) ret = err; if (likely(!ret)) ntfs_debug("Done."); else ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error " "%u.", datasync ? "data" : "", vi->i_ino, -ret); return ret; }
/** * ntfs_pagein - read a range of pages into memory * @ni: ntfs inode whose data to read into the page range * @attr_ofs: byte offset in the inode at which to start * @size: number of bytes to read from the inode * @upl: page list describing destination page range * @upl_ofs: byte offset into page list at which to start * @flags: flags further describing the pagein request * * Read @size bytes from the ntfs inode @ni, starting at byte offset @attr_ofs * into the inode, into the range of pages specified by the page list @upl, * starting at byte offset @upl_ofs into the page list. * * The @flags further describe the pagein request. The following pagein flags * are currently defined in OSX kernel: * UPL_IOSYNC - Perform synchronous i/o. * UPL_NOCOMMIT - Do not commit/abort the page range. * UPL_NORDAHEAD - Do not perform any speculative read-ahead. * IO_PASSIVE - This is background i/o so do not throttle other i/o. * * Inside the ntfs driver we have the need to perform pageins whilst the inode * is locked for writing (@ni->lock) thus we cheat and set UPL_NESTED_PAGEOUT * in @flags when this is the case. We make sure to clear it in @flags before * calling into the cluster layer so we do not accidentally cause confusion. * * For encrypted attributes we abort for now as we do not support them yet. * * For non-resident, non-compressed attributes we use cluster_pagein_ext() * which deals with both normal and multi sector transfer protected attributes. * * For resident attributes and non-resident, compressed attributes we read the * data ourselves by mapping the page list, and in the resident case, mapping * the mft record, looking up the attribute in it, and copying the requested * data from the mapped attribute into the page list, then unmapping the mft * record, whilst for non-resident, compressed attributes, we get the raw inode * and use it with ntfs_read_compressed() to read and decompress the data into * our mapped page list. We then unmap the page list and finally, if * UPL_NOCOMMIT is not specified, we commit (success) or abort (error) the page * range. * * Return 0 on success and errno on error. * * Note the pages in the page list are marked busy on entry and the busy bit is * cleared when we commit the page range. Thus it is perfectly safe for us to * fill the pages with encrypted or mst protected data and to decrypt or mst * deprotect in place before committing the page range. * * Adapted from cluster_pagein_ext(). * * Locking: - Caller must hold an iocount reference on the vnode of @ni. * - Caller must not hold @ni->lock or if it is held it must be for * reading unless UPL_NESTED_PAGEOUT is set in @flags in which case * the caller must hold @ni->lock for reading or writing. */ int ntfs_pagein(ntfs_inode *ni, s64 attr_ofs, unsigned size, upl_t upl, upl_offset_t upl_ofs, int flags) { s64 attr_size; u8 *kaddr; kern_return_t kerr; unsigned to_read; int err; BOOL locked = FALSE; ntfs_debug("Entering for mft_no 0x%llx, offset 0x%llx, size 0x%x, " "pagein flags 0x%x, page list offset 0x%llx.", (unsigned long long)ni->mft_no, (unsigned long long)attr_ofs, size, flags, (unsigned long long)upl_ofs); /* * If the caller did not specify any i/o, then we are done. We cannot * issue an abort because we do not have a upl or we do not know its * size. */ if (!upl) { ntfs_error(ni->vol->mp, "NULL page list passed in (error " "EINVAL)."); return EINVAL; } if (S_ISDIR(ni->mode)) { ntfs_error(ni->vol->mp, "Called for directory vnode."); err = EISDIR; goto err; } /* * Protect against changes in initialized_size and thus against * truncation also unless UPL_NESTED_PAGEOUT is set in which case the * caller has already taken @ni->lock for exclusive access. We simply * leave @locked to be FALSE in this case so we do not try to drop the * lock later on. * * If UPL_NESTED_PAGEOUT is set we clear it in @flags to ensure we do * not cause confusion in the cluster layer or the VM. */ if (flags & UPL_NESTED_PAGEOUT) flags &= ~UPL_NESTED_PAGEOUT; else { locked = TRUE; lck_rw_lock_shared(&ni->lock); } /* Do not allow messing with the inode once it has been deleted. */ if (NInoDeleted(ni)) { /* Remove the inode from the name cache. */ cache_purge(ni->vn); err = ENOENT; goto err; } retry_pagein: /* * We guarantee that the size in the ubc will be smaller or equal to * the size in the ntfs inode thus no need to check @ni->data_size. */ attr_size = ubc_getsize(ni->vn); /* * Only $DATA attributes can be encrypted/compressed. Index root can * have the flags set but this means to create compressed/encrypted * files, not that the attribute is compressed/encrypted. Note we need * to check for AT_INDEX_ALLOCATION since this is the type of directory * index inodes. */ if (ni->type != AT_INDEX_ALLOCATION) { /* TODO: Deny access to encrypted attributes, just like NT4. */ if (NInoEncrypted(ni)) { if (ni->type != AT_DATA) panic("%s(): Encrypted non-data attribute.\n", __FUNCTION__); ntfs_warning(ni->vol->mp, "Denying access to " "encrypted attribute (EACCES)."); err = EACCES; goto err; } /* Compressed data streams need special handling. */ if (NInoNonResident(ni) && NInoCompressed(ni) && !NInoRaw(ni)) { if (ni->type != AT_DATA) panic("%s(): Compressed non-data attribute.\n", __FUNCTION__); goto compressed; } } /* NInoNonResident() == NInoIndexAllocPresent() */ if (NInoNonResident(ni)) { int (*callback)(buf_t, void *); callback = NULL; if (NInoMstProtected(ni) || NInoEncrypted(ni)) callback = ntfs_cluster_iodone; /* Non-resident, possibly mst protected, attribute. */ err = cluster_pagein_ext(ni->vn, upl, upl_ofs, attr_ofs, size, attr_size, flags, callback, NULL); if (!err) ntfs_debug("Done (cluster_pagein_ext())."); else ntfs_error(ni->vol->mp, "Failed (cluster_pagein_ext(), " "error %d).", err); if (locked) lck_rw_unlock_shared(&ni->lock); return err; } compressed: /* * The attribute is resident and/or compressed. * * Cannot pagein from a negative offset or if we are starting beyond * the end of the attribute or if the attribute offset is not page * aligned or the size requested is not a multiple of PAGE_SIZE. */ if (attr_ofs < 0 || attr_ofs >= attr_size || attr_ofs & PAGE_MASK_64 || size & PAGE_MASK || upl_ofs & PAGE_MASK) { err = EINVAL; goto err; } to_read = size; attr_size -= attr_ofs; if (to_read > attr_size) to_read = attr_size; /* * We do not need @attr_size any more so reuse it to hold the number of * bytes available in the attribute starting at offset @attr_ofs up to * a maximum of the requested number of bytes rounded up to a multiple * of the system page size. */ attr_size = (to_read + PAGE_MASK) & ~PAGE_MASK; /* Abort any pages outside the end of the attribute. */ if (size > attr_size && !(flags & UPL_NOCOMMIT)) { ubc_upl_abort_range(upl, upl_ofs + attr_size, size - attr_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR); /* Update @size. */ size = attr_size; } /* To access the page list contents, we need to map the page list. */ kerr = ubc_upl_map(upl, (vm_offset_t*)&kaddr); if (kerr != KERN_SUCCESS) { ntfs_error(ni->vol->mp, "ubc_upl_map() failed (error %d).", (int)kerr); err = EIO; goto err; } if (!NInoNonResident(ni)) { /* * Read the data from the resident attribute into the page * list. */ err = ntfs_resident_attr_read(ni, attr_ofs, size, kaddr + upl_ofs); if (err && err != EAGAIN) ntfs_error(ni->vol->mp, "ntfs_resident_attr_read() " "failed (error %d).", err); } else { ntfs_inode *raw_ni; int ioflags; /* * Get the raw inode. We take the inode lock shared to protect * against concurrent writers as the compressed data is invalid * whilst a write is in progress. */ err = ntfs_raw_inode_get(ni, LCK_RW_TYPE_SHARED, &raw_ni); if (err) ntfs_error(ni->vol->mp, "Failed to get raw inode " "(error %d).", err); else { if (!NInoRaw(raw_ni)) panic("%s(): Requested raw inode but got " "non-raw one.\n", __FUNCTION__); ioflags = 0; if (vnode_isnocache(ni->vn) || vnode_isnocache(raw_ni->vn)) ioflags |= IO_NOCACHE; if (vnode_isnoreadahead(ni->vn) || vnode_isnoreadahead(raw_ni->vn)) ioflags |= IO_RAOFF; err = ntfs_read_compressed(ni, raw_ni, attr_ofs, size, kaddr + upl_ofs, NULL, ioflags); if (err) ntfs_error(ni->vol->mp, "ntfs_read_compressed() " "failed (error %d).", err); lck_rw_unlock_shared(&raw_ni->lock); (void)vnode_put(raw_ni->vn); } } kerr = ubc_upl_unmap(upl); if (kerr != KERN_SUCCESS) { ntfs_error(ni->vol->mp, "ubc_upl_unmap() failed (error %d).", (int)kerr); if (!err) err = EIO; } if (!err) { if (!(flags & UPL_NOCOMMIT)) { /* Commit the page range we brought up to date. */ ubc_upl_commit_range(upl, upl_ofs, size, UPL_COMMIT_FREE_ON_EMPTY); } ntfs_debug("Done (%s).", !NInoNonResident(ni) ? "ntfs_resident_attr_read()" : "ntfs_read_compressed()"); } else /* if (err) */ { /* * If the attribute was converted to non-resident under our * nose, retry the pagein. * * TODO: This may no longer be possible to happen now that we * lock against changes in initialized size and thus * truncation... Revisit this issue when the write code has * been written and remove the check + goto if appropriate. */ if (err == EAGAIN) goto retry_pagein; err: if (!(flags & UPL_NOCOMMIT)) { int upl_flags = UPL_ABORT_FREE_ON_EMPTY; if (err != ENOMEM) upl_flags |= UPL_ABORT_ERROR; ubc_upl_abort_range(upl, upl_ofs, size, upl_flags); } ntfs_error(ni->vol->mp, "Failed (error %d).", err); } if (locked) lck_rw_unlock_shared(&ni->lock); return err; }
/** * write_mft_record_nolock - write out a mapped (extent) mft record * @ni: ntfs inode describing the mapped (extent) mft record * @m: mapped (extent) mft record to write * @sync: if true, wait for i/o completion * * Write the mapped (extent) mft record @m described by the (regular or extent) * ntfs inode @ni to backing store. If the mft record @m has a counterpart in * the mft mirror, that is also updated. * * On success, clean the mft record and return 0. On error, leave the mft * record dirty and return -errno. The caller should call make_bad_inode() on * the base inode to ensure no more access happens to this inode. We do not do * it here as the caller may want to finish writing other extent mft records * first to minimize on-disk metadata inconsistencies. * * NOTE: We always perform synchronous i/o and ignore the @sync parameter. * However, if the mft record has a counterpart in the mft mirror and @sync is * true, we write the mft record, wait for i/o completion, and only then write * the mft mirror copy. This ensures that if the system crashes either the mft * or the mft mirror will contain a self-consistent mft record @m. If @sync is * false on the other hand, we start i/o on both and then wait for completion * on them. This provides a speedup but no longer guarantees that you will end * up with a self-consistent mft record in the case of a crash but if you asked * for asynchronous writing you probably do not care about that anyway. * * TODO: If @sync is false, want to do truly asynchronous i/o, i.e. just * schedule i/o via ->writepage or do it via kntfsd or whatever. */ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) { ntfs_volume *vol = ni->vol; struct page *page = ni->page; unsigned int blocksize = vol->sb->s_blocksize; int max_bhs = vol->mft_record_size / blocksize; struct buffer_head *bhs[max_bhs]; struct buffer_head *bh, *head; unsigned int block_start, block_end, m_start, m_end; int i_bhs, nr_bhs, err = 0; ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); BUG_ON(NInoAttr(ni)); BUG_ON(!max_bhs); BUG_ON(!PageLocked(page)); /* * If the ntfs_inode is clean no need to do anything. If it is dirty, * mark it as clean now so that it can be redirtied later on if needed. * There is no danger of races since the caller is holding the locks * for the mft record @m and the page it is in. */ if (!NInoTestClearDirty(ni)) goto done; /* Make sure we have mapped buffers. */ if (!page_has_buffers(page)) { no_buffers_err_out: ntfs_error(vol->sb, "Writing mft records without existing " "buffers is not implemented yet. %s", ntfs_please_email); err = -EOPNOTSUPP; goto err_out; } bh = head = page_buffers(page); if (!bh) goto no_buffers_err_out; nr_bhs = 0; block_start = 0; m_start = ni->page_ofs; m_end = m_start + vol->mft_record_size; do { block_end = block_start + blocksize; /* * If the buffer is outside the mft record, just skip it, * clearing it if it is dirty to make sure it is not written * out. It should never be marked dirty but better be safe. */ if ((block_end <= m_start) || (block_start >= m_end)) { if (buffer_dirty(bh)) { ntfs_warning(vol->sb, "Clearing dirty mft " "record page buffer. %s", ntfs_please_email); clear_buffer_dirty(bh); } continue; } if (!buffer_mapped(bh)) { ntfs_error(vol->sb, "Writing mft records without " "existing mapped buffers is not " "implemented yet. %s", ntfs_please_email); err = -EOPNOTSUPP; continue; } if (!buffer_uptodate(bh)) { ntfs_error(vol->sb, "Writing mft records without " "existing uptodate buffers is not " "implemented yet. %s", ntfs_please_email); err = -EOPNOTSUPP; continue; } BUG_ON(!nr_bhs && (m_start != block_start)); BUG_ON(nr_bhs >= max_bhs); bhs[nr_bhs++] = bh; BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end)); } while (block_start = block_end, (bh = bh->b_this_page) != head); if (unlikely(err)) goto cleanup_out; /* Apply the mst protection fixups. */ err = pre_write_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size); if (err) { ntfs_error(vol->sb, "Failed to apply mst fixups!"); goto cleanup_out; } flush_dcache_mft_record_page(ni); /* Lock buffers and start synchronous write i/o on them. */ for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; if (unlikely(test_set_buffer_locked(tbh))) BUG(); BUG_ON(!buffer_uptodate(tbh)); if (buffer_dirty(tbh)) clear_buffer_dirty(tbh); get_bh(tbh); tbh->b_end_io = end_buffer_write_sync; submit_bh(WRITE, tbh); } /* Synchronize the mft mirror now if not @sync. */ if (!sync && ni->mft_no < vol->mftmirr_size) sync_mft_mirror(ni, m, sync); /* Wait on i/o completion of buffers. */ for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; wait_on_buffer(tbh); if (unlikely(!buffer_uptodate(tbh))) { err = -EIO; /* * Set the buffer uptodate so the page & buffer states * don't become out of sync. */ if (PageUptodate(page)) set_buffer_uptodate(tbh); } } /* If @sync, now synchronize the mft mirror. */ if (sync && ni->mft_no < vol->mftmirr_size) sync_mft_mirror(ni, m, sync); /* Remove the mst protection fixups again. */ post_write_mst_fixup((NTFS_RECORD*)m); flush_dcache_mft_record_page(ni); if (unlikely(err)) { /* I/O error during writing. This is really bad! */ ntfs_error(vol->sb, "I/O error while writing mft record " "0x%lx! Marking base inode as bad. You " "should unmount the volume and run chkdsk.", ni->mft_no); goto err_out; } done: ntfs_debug("Done."); return 0; cleanup_out: /* Clean the buffers. */ for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) clear_buffer_dirty(bhs[i_bhs]); err_out: /* * Current state: all buffers are clean, unlocked, and uptodate. * The caller should mark the base inode as bad so that no more i/o * happens. ->clear_inode() will still be invoked so all extent inodes * and other allocated memory will be freed. */ if (err == -ENOMEM) { ntfs_error(vol->sb, "Not enough memory to write mft record. " "Redirtying so the write is retried later."); mark_mft_record_dirty(ni); err = 0; } return err; }
/** * sync_mft_mirror - synchronize an mft record to the mft mirror * @ni: ntfs inode whose mft record to synchronize * @m: mapped, mst protected (extent) mft record to synchronize * @sync: if true, wait for i/o completion * * Write the mapped, mst protected (extent) mft record @m described by the * (regular or extent) ntfs inode @ni to the mft mirror ($MFTMirr). * * On success return 0. On error return -errno and set the volume errors flag * in the ntfs_volume to which @ni belongs. * * NOTE: We always perform synchronous i/o and ignore the @sync parameter. * * TODO: If @sync is false, want to do truly asynchronous i/o, i.e. just * schedule i/o via ->writepage or do it via kntfsd or whatever. */ static int sync_mft_mirror(ntfs_inode *ni, MFT_RECORD *m, int sync) { ntfs_volume *vol = ni->vol; struct page *page; unsigned int blocksize = vol->sb->s_blocksize; int max_bhs = vol->mft_record_size / blocksize; struct buffer_head *bhs[max_bhs]; struct buffer_head *bh, *head; u8 *kmirr; unsigned int block_start, block_end, m_start, m_end; int i_bhs, nr_bhs, err = 0; ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); BUG_ON(!max_bhs); if (unlikely(!vol->mftmirr_ino)) { /* This could happen during umount... */ err = sync_mft_mirror_umount(ni, m); if (likely(!err)) return err; goto err_out; } /* Get the page containing the mirror copy of the mft record @m. */ page = ntfs_map_page(vol->mftmirr_ino->i_mapping, ni->mft_no >> (PAGE_CACHE_SHIFT - vol->mft_record_size_bits)); if (unlikely(IS_ERR(page))) { ntfs_error(vol->sb, "Failed to map mft mirror page."); err = PTR_ERR(page); goto err_out; } /* * Exclusion against other writers. This should never be a problem * since the page in which the mft record @m resides is also locked and * hence any other writers would be held up there but it is better to * make sure no one is writing from elsewhere. */ lock_page(page); /* The address in the page of the mirror copy of the mft record @m. */ kmirr = page_address(page) + ((ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK); /* Copy the mst protected mft record to the mirror. */ memcpy(kmirr, m, vol->mft_record_size); /* Make sure we have mapped buffers. */ if (!page_has_buffers(page)) { no_buffers_err_out: ntfs_error(vol->sb, "Writing mft mirror records without " "existing buffers is not implemented yet. %s", ntfs_please_email); err = -EOPNOTSUPP; goto unlock_err_out; } bh = head = page_buffers(page); if (!bh) goto no_buffers_err_out; nr_bhs = 0; block_start = 0; m_start = kmirr - (u8*)page_address(page); m_end = m_start + vol->mft_record_size; do { block_end = block_start + blocksize; /* * If the buffer is outside the mft record, just skip it, * clearing it if it is dirty to make sure it is not written * out. It should never be marked dirty but better be safe. */ if ((block_end <= m_start) || (block_start >= m_end)) { if (buffer_dirty(bh)) { ntfs_warning(vol->sb, "Clearing dirty mft " "record page buffer. %s", ntfs_please_email); clear_buffer_dirty(bh); } continue; } if (!buffer_mapped(bh)) { ntfs_error(vol->sb, "Writing mft mirror records " "without existing mapped buffers is " "not implemented yet. %s", ntfs_please_email); err = -EOPNOTSUPP; continue; } if (!buffer_uptodate(bh)) { ntfs_error(vol->sb, "Writing mft mirror records " "without existing uptodate buffers is " "not implemented yet. %s", ntfs_please_email); err = -EOPNOTSUPP; continue; } BUG_ON(!nr_bhs && (m_start != block_start)); BUG_ON(nr_bhs >= max_bhs); bhs[nr_bhs++] = bh; BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end)); } while (block_start = block_end, (bh = bh->b_this_page) != head); if (likely(!err)) { /* Lock buffers and start synchronous write i/o on them. */ for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; if (unlikely(test_set_buffer_locked(tbh))) BUG(); BUG_ON(!buffer_uptodate(tbh)); if (buffer_dirty(tbh)) clear_buffer_dirty(tbh); get_bh(tbh); tbh->b_end_io = end_buffer_write_sync; submit_bh(WRITE, tbh); } /* Wait on i/o completion of buffers. */ for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; wait_on_buffer(tbh); if (unlikely(!buffer_uptodate(tbh))) { err = -EIO; /* * Set the buffer uptodate so the page & buffer * states don't become out of sync. */ if (PageUptodate(page)) set_buffer_uptodate(tbh); } } } else /* if (unlikely(err)) */ { /* Clean the buffers. */ for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) clear_buffer_dirty(bhs[i_bhs]); } unlock_err_out: /* Current state: all buffers are clean, unlocked, and uptodate. */ /* Remove the mst protection fixups again. */ post_write_mst_fixup((NTFS_RECORD*)kmirr); flush_dcache_page(page); unlock_page(page); ntfs_unmap_page(page); if (unlikely(err)) { /* I/O error during writing. This is really bad! */ ntfs_error(vol->sb, "I/O error while writing mft mirror " "record 0x%lx! You should unmount the volume " "and run chkdsk or ntfsfix.", ni->mft_no); goto err_out; } ntfs_debug("Done."); return 0; err_out: ntfs_error(vol->sb, "Failed to synchronize $MFTMirr (error code %i). " "Volume will be left marked dirty on umount. Run " "ntfsfix on the partition after umounting to correct " "this.", -err); /* We don't want to clear the dirty bit on umount. */ NVolSetErrors(vol); return err; }