/** * write_mft_record_nolock - write out a mapped (extent) mft record * @ni: ntfs inode describing the mapped (extent) mft record * @m: mapped (extent) mft record to write * @sync: if true, wait for i/o completion * * Write the mapped (extent) mft record @m described by the (regular or extent) * ntfs inode @ni to backing store. If the mft record @m has a counterpart in * the mft mirror, that is also updated. * * On success, clean the mft record and return 0. On error, leave the mft * record dirty and return -errno. The caller should call make_bad_inode() on * the base inode to ensure no more access happens to this inode. We do not do * it here as the caller may want to finish writing other extent mft records * first to minimize on-disk metadata inconsistencies. * * NOTE: We always perform synchronous i/o and ignore the @sync parameter. * However, if the mft record has a counterpart in the mft mirror and @sync is * true, we write the mft record, wait for i/o completion, and only then write * the mft mirror copy. This ensures that if the system crashes either the mft * or the mft mirror will contain a self-consistent mft record @m. If @sync is * false on the other hand, we start i/o on both and then wait for completion * on them. This provides a speedup but no longer guarantees that you will end * up with a self-consistent mft record in the case of a crash but if you asked * for asynchronous writing you probably do not care about that anyway. * * TODO: If @sync is false, want to do truly asynchronous i/o, i.e. just * schedule i/o via ->writepage or do it via kntfsd or whatever. */ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) { ntfs_volume *vol = ni->vol; struct page *page = ni->page; unsigned int blocksize = vol->sb->s_blocksize; int max_bhs = vol->mft_record_size / blocksize; struct buffer_head *bhs[max_bhs]; struct buffer_head *bh, *head; unsigned int block_start, block_end, m_start, m_end; int i_bhs, nr_bhs, err = 0; ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); BUG_ON(NInoAttr(ni)); BUG_ON(!max_bhs); BUG_ON(!PageLocked(page)); /* * If the ntfs_inode is clean no need to do anything. If it is dirty, * mark it as clean now so that it can be redirtied later on if needed. * There is no danger of races since the caller is holding the locks * for the mft record @m and the page it is in. */ if (!NInoTestClearDirty(ni)) goto done; /* Make sure we have mapped buffers. */ if (!page_has_buffers(page)) { no_buffers_err_out: ntfs_error(vol->sb, "Writing mft records without existing " "buffers is not implemented yet. %s", ntfs_please_email); err = -EOPNOTSUPP; goto err_out; } bh = head = page_buffers(page); if (!bh) goto no_buffers_err_out; nr_bhs = 0; block_start = 0; m_start = ni->page_ofs; m_end = m_start + vol->mft_record_size; do { block_end = block_start + blocksize; /* * If the buffer is outside the mft record, just skip it, * clearing it if it is dirty to make sure it is not written * out. It should never be marked dirty but better be safe. */ if ((block_end <= m_start) || (block_start >= m_end)) { if (buffer_dirty(bh)) { ntfs_warning(vol->sb, "Clearing dirty mft " "record page buffer. %s", ntfs_please_email); clear_buffer_dirty(bh); } continue; } if (!buffer_mapped(bh)) { ntfs_error(vol->sb, "Writing mft records without " "existing mapped buffers is not " "implemented yet. %s", ntfs_please_email); err = -EOPNOTSUPP; continue; } if (!buffer_uptodate(bh)) { ntfs_error(vol->sb, "Writing mft records without " "existing uptodate buffers is not " "implemented yet. %s", ntfs_please_email); err = -EOPNOTSUPP; continue; } BUG_ON(!nr_bhs && (m_start != block_start)); BUG_ON(nr_bhs >= max_bhs); bhs[nr_bhs++] = bh; BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end)); } while (block_start = block_end, (bh = bh->b_this_page) != head); if (unlikely(err)) goto cleanup_out; /* Apply the mst protection fixups. */ err = pre_write_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size); if (err) { ntfs_error(vol->sb, "Failed to apply mst fixups!"); goto cleanup_out; } flush_dcache_mft_record_page(ni); /* Lock buffers and start synchronous write i/o on them. */ for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; if (unlikely(test_set_buffer_locked(tbh))) BUG(); BUG_ON(!buffer_uptodate(tbh)); if (buffer_dirty(tbh)) clear_buffer_dirty(tbh); get_bh(tbh); tbh->b_end_io = end_buffer_write_sync; submit_bh(WRITE, tbh); } /* Synchronize the mft mirror now if not @sync. */ if (!sync && ni->mft_no < vol->mftmirr_size) sync_mft_mirror(ni, m, sync); /* Wait on i/o completion of buffers. */ for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; wait_on_buffer(tbh); if (unlikely(!buffer_uptodate(tbh))) { err = -EIO; /* * Set the buffer uptodate so the page & buffer states * don't become out of sync. */ if (PageUptodate(page)) set_buffer_uptodate(tbh); } } /* If @sync, now synchronize the mft mirror. */ if (sync && ni->mft_no < vol->mftmirr_size) sync_mft_mirror(ni, m, sync); /* Remove the mst protection fixups again. */ post_write_mst_fixup((NTFS_RECORD*)m); flush_dcache_mft_record_page(ni); if (unlikely(err)) { /* I/O error during writing. This is really bad! */ ntfs_error(vol->sb, "I/O error while writing mft record " "0x%lx! Marking base inode as bad. You " "should unmount the volume and run chkdsk.", ni->mft_no); goto err_out; } done: ntfs_debug("Done."); return 0; cleanup_out: /* Clean the buffers. */ for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) clear_buffer_dirty(bhs[i_bhs]); err_out: /* * Current state: all buffers are clean, unlocked, and uptodate. * The caller should mark the base inode as bad so that no more i/o * happens. ->clear_inode() will still be invoked so all extent inodes * and other allocated memory will be freed. */ if (err == -ENOMEM) { ntfs_error(vol->sb, "Not enough memory to write mft record. " "Redirtying so the write is retried later."); mark_mft_record_dirty(ni); err = 0; } return err; }
/* * Submit all the data buffers to disk */ static void journal_submit_data_buffers(journal_t *journal, transaction_t *commit_transaction) { struct journal_head *jh; struct buffer_head *bh; int locked; int bufs = 0; struct buffer_head **wbuf = journal->j_wbuf; /* * Whenever we unlock the journal and sleep, things can get added * onto ->t_sync_datalist, so we have to keep looping back to * write_out_data until we *know* that the list is empty. * * Cleanup any flushed data buffers from the data list. Even in * abort mode, we want to flush this out as soon as possible. */ write_out_data: cond_resched(); spin_lock(&journal->j_list_lock); while (commit_transaction->t_sync_datalist) { jh = commit_transaction->t_sync_datalist; bh = jh2bh(jh); locked = 0; /* Get reference just to make sure buffer does not disappear * when we are forced to drop various locks */ get_bh(bh); /* If the buffer is dirty, we need to submit IO and hence * we need the buffer lock. We try to lock the buffer without * blocking. If we fail, we need to drop j_list_lock and do * blocking lock_buffer(). */ if (buffer_dirty(bh)) { if (test_set_buffer_locked(bh)) { BUFFER_TRACE(bh, "needs blocking lock"); spin_unlock(&journal->j_list_lock); /* Write out all data to prevent deadlocks */ journal_do_submit_data(wbuf, bufs); bufs = 0; lock_buffer(bh); spin_lock(&journal->j_list_lock); } locked = 1; } /* We have to get bh_state lock. Again out of order, sigh. */ if (!inverted_lock(journal, bh)) { jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); } /* Someone already cleaned up the buffer? */ if (!buffer_jbd(bh) || jh->b_transaction != commit_transaction || jh->b_jlist != BJ_SyncData) { jbd_unlock_bh_state(bh); if (locked) unlock_buffer(bh); BUFFER_TRACE(bh, "already cleaned up"); put_bh(bh); continue; } if (locked && test_clear_buffer_dirty(bh)) { BUFFER_TRACE(bh, "needs writeout, adding to array"); wbuf[bufs++] = bh; __jbd2_journal_file_buffer(jh, commit_transaction, BJ_Locked); jbd_unlock_bh_state(bh); if (bufs == journal->j_wbufsize) { spin_unlock(&journal->j_list_lock); journal_do_submit_data(wbuf, bufs); bufs = 0; goto write_out_data; } } else if (!locked && buffer_locked(bh)) { __jbd2_journal_file_buffer(jh, commit_transaction, BJ_Locked); jbd_unlock_bh_state(bh); put_bh(bh); } else { BUFFER_TRACE(bh, "writeout complete: unfile"); __jbd2_journal_unfile_buffer(jh); jbd_unlock_bh_state(bh); if (locked) unlock_buffer(bh); jbd2_journal_remove_journal_head(bh); /* Once for our safety reference, once for * jbd2_journal_remove_journal_head() */ put_bh(bh); put_bh(bh); } if (lock_need_resched(&journal->j_list_lock)) { spin_unlock(&journal->j_list_lock); goto write_out_data; } } spin_unlock(&journal->j_list_lock); journal_do_submit_data(wbuf, bufs); }
/** * sync_mft_mirror - synchronize an mft record to the mft mirror * @ni: ntfs inode whose mft record to synchronize * @m: mapped, mst protected (extent) mft record to synchronize * @sync: if true, wait for i/o completion * * Write the mapped, mst protected (extent) mft record @m described by the * (regular or extent) ntfs inode @ni to the mft mirror ($MFTMirr). * * On success return 0. On error return -errno and set the volume errors flag * in the ntfs_volume to which @ni belongs. * * NOTE: We always perform synchronous i/o and ignore the @sync parameter. * * TODO: If @sync is false, want to do truly asynchronous i/o, i.e. just * schedule i/o via ->writepage or do it via kntfsd or whatever. */ static int sync_mft_mirror(ntfs_inode *ni, MFT_RECORD *m, int sync) { ntfs_volume *vol = ni->vol; struct page *page; unsigned int blocksize = vol->sb->s_blocksize; int max_bhs = vol->mft_record_size / blocksize; struct buffer_head *bhs[max_bhs]; struct buffer_head *bh, *head; u8 *kmirr; unsigned int block_start, block_end, m_start, m_end; int i_bhs, nr_bhs, err = 0; ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); BUG_ON(!max_bhs); if (unlikely(!vol->mftmirr_ino)) { /* This could happen during umount... */ err = sync_mft_mirror_umount(ni, m); if (likely(!err)) return err; goto err_out; } /* Get the page containing the mirror copy of the mft record @m. */ page = ntfs_map_page(vol->mftmirr_ino->i_mapping, ni->mft_no >> (PAGE_CACHE_SHIFT - vol->mft_record_size_bits)); if (unlikely(IS_ERR(page))) { ntfs_error(vol->sb, "Failed to map mft mirror page."); err = PTR_ERR(page); goto err_out; } /* * Exclusion against other writers. This should never be a problem * since the page in which the mft record @m resides is also locked and * hence any other writers would be held up there but it is better to * make sure no one is writing from elsewhere. */ lock_page(page); /* The address in the page of the mirror copy of the mft record @m. */ kmirr = page_address(page) + ((ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK); /* Copy the mst protected mft record to the mirror. */ memcpy(kmirr, m, vol->mft_record_size); /* Make sure we have mapped buffers. */ if (!page_has_buffers(page)) { no_buffers_err_out: ntfs_error(vol->sb, "Writing mft mirror records without " "existing buffers is not implemented yet. %s", ntfs_please_email); err = -EOPNOTSUPP; goto unlock_err_out; } bh = head = page_buffers(page); if (!bh) goto no_buffers_err_out; nr_bhs = 0; block_start = 0; m_start = kmirr - (u8*)page_address(page); m_end = m_start + vol->mft_record_size; do { block_end = block_start + blocksize; /* * If the buffer is outside the mft record, just skip it, * clearing it if it is dirty to make sure it is not written * out. It should never be marked dirty but better be safe. */ if ((block_end <= m_start) || (block_start >= m_end)) { if (buffer_dirty(bh)) { ntfs_warning(vol->sb, "Clearing dirty mft " "record page buffer. %s", ntfs_please_email); clear_buffer_dirty(bh); } continue; } if (!buffer_mapped(bh)) { ntfs_error(vol->sb, "Writing mft mirror records " "without existing mapped buffers is " "not implemented yet. %s", ntfs_please_email); err = -EOPNOTSUPP; continue; } if (!buffer_uptodate(bh)) { ntfs_error(vol->sb, "Writing mft mirror records " "without existing uptodate buffers is " "not implemented yet. %s", ntfs_please_email); err = -EOPNOTSUPP; continue; } BUG_ON(!nr_bhs && (m_start != block_start)); BUG_ON(nr_bhs >= max_bhs); bhs[nr_bhs++] = bh; BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end)); } while (block_start = block_end, (bh = bh->b_this_page) != head); if (likely(!err)) { /* Lock buffers and start synchronous write i/o on them. */ for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; if (unlikely(test_set_buffer_locked(tbh))) BUG(); BUG_ON(!buffer_uptodate(tbh)); if (buffer_dirty(tbh)) clear_buffer_dirty(tbh); get_bh(tbh); tbh->b_end_io = end_buffer_write_sync; submit_bh(WRITE, tbh); } /* Wait on i/o completion of buffers. */ for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; wait_on_buffer(tbh); if (unlikely(!buffer_uptodate(tbh))) { err = -EIO; /* * Set the buffer uptodate so the page & buffer * states don't become out of sync. */ if (PageUptodate(page)) set_buffer_uptodate(tbh); } } } else /* if (unlikely(err)) */ { /* Clean the buffers. */ for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) clear_buffer_dirty(bhs[i_bhs]); } unlock_err_out: /* Current state: all buffers are clean, unlocked, and uptodate. */ /* Remove the mst protection fixups again. */ post_write_mst_fixup((NTFS_RECORD*)kmirr); flush_dcache_page(page); unlock_page(page); ntfs_unmap_page(page); if (unlikely(err)) { /* I/O error during writing. This is really bad! */ ntfs_error(vol->sb, "I/O error while writing mft mirror " "record 0x%lx! You should unmount the volume " "and run chkdsk or ntfsfix.", ni->mft_no); goto err_out; } ntfs_debug("Done."); return 0; err_out: ntfs_error(vol->sb, "Failed to synchronize $MFTMirr (error code %i). " "Volume will be left marked dirty on umount. Run " "ntfsfix on the partition after umounting to correct " "this.", -err); /* We don't want to clear the dirty bit on umount. */ NVolSetErrors(vol); return err; }