Beispiel #1
0
// corresponds to hammer_vop_strategy_read
int hammerfs_readpage(struct file *file, struct page *page) 
{
    void *page_addr;
    hammer_mount_t hmp;
    struct buffer_head *bh;
    struct super_block *sb;
    struct hammer_transaction trans;
    struct hammer_cursor cursor;
    struct inode *inode;
    struct hammer_inode *ip;
    hammer_base_elm_t base;
    hammer_off_t disk_offset;
    int64_t rec_offset;
    int64_t file_offset;
    int error = 0;
    int boff;
    int roff;
    int n;
    int i=0;
    int block_num;
    int block_offset;
    int bytes_read;
    int64_t sb_offset;
    hammer_off_t zone2_offset;
    int vol_no;
    hammer_volume_t volume;

    printk ("hammerfs_readpage(page->index=%d)\n", (int) page->index);

    inode = file->f_path.dentry->d_inode;
    ip = (struct hammer_inode *)inode->i_private;
    sb = inode->i_sb;
    hmp = (hammer_mount_t)sb->s_fs_info;
    hammer_simple_transaction(&trans, ip->hmp);
    hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip);
    file_offset = page->index * PAGE_SIZE;

    if (file_offset > inode->i_size) {
        error = -ENOSPC;
        goto done;
    }

    SetPageUptodate (page);
    page_addr = kmap (page);

    if(!page_addr) {
        error = -ENOSPC;
        goto failed;
    }

   /*
    * Key range (begin and end inclusive) to scan.  Note that the key's
    * stored in the actual records represent BASE+LEN, not BASE.  The
    * first record containing bio_offset will have a key > bio_offset.
    */
    cursor.key_beg.localization = ip->obj_localization +
                                  HAMMER_LOCALIZE_MISC;
    cursor.key_beg.obj_id = ip->obj_id;
    cursor.key_beg.create_tid = 0;
    cursor.key_beg.delete_tid = 0;
    cursor.key_beg.obj_type = 0;
    cursor.key_beg.key = file_offset + 1;
    cursor.asof = ip->obj_asof;
    cursor.flags |= HAMMER_CURSOR_ASOF;

    cursor.key_end = cursor.key_beg;
    KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE);

    cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
    cursor.key_end.rec_type = HAMMER_RECTYPE_DATA;
    cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
    cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;

    error = hammer_ip_first(&cursor);
    boff = 0;

    while(error == 0) {
       /*
        * Get the base file offset of the record.  The key for
        * data records is (base + bytes) rather then (base).
        */
        base = &cursor.leaf->base;
        rec_offset = base->key - cursor.leaf->data_len;

       /*
        * Calculate the gap, if any, and zero-fill it.
        *
        * n is the offset of the start of the record verses our
        * current seek offset in the bio.
        */
        n = (int)(rec_offset - (file_offset + boff));
        if (n > 0) {
            if (n > PAGE_SIZE - boff)
                n = PAGE_SIZE - boff;
            bzero((char *)page_addr + boff, n);
            boff += n;
            n = 0;
        }

       /*
        * Calculate the data offset in the record and the number
        * of bytes we can copy.
        *
        * There are two degenerate cases.  First, boff may already
        * be at bp->b_bufsize.  Secondly, the data offset within
        * the record may exceed the record's size.
        */
        roff = -n;
        rec_offset += roff;
        n = cursor.leaf->data_len - roff;
        if (n <= 0) {
            printk("hammerfs_readpage: bad n=%d roff=%d\n", n, roff);
            n = 0;
        } else if (n > PAGE_SIZE - boff) {
            n = PAGE_SIZE - boff;
        }

       /*
        * Deal with cached truncations.  This cool bit of code
        * allows truncate()/ftruncate() to avoid having to sync
        * the file.
        *
        * If the frontend is truncated then all backend records are
        * subject to the frontend's truncation.
        *
        * If the backend is truncated then backend records on-disk
        * (but not in-memory) are subject to the backend's
        * truncation.  In-memory records owned by the backend
        * represent data written after the truncation point on the
        * backend and must not be truncated.
        *
        * Truncate operations deal with frontend buffer cache
        * buffers and frontend-owned in-memory records synchronously.
        */
       if (ip->flags & HAMMER_INODE_TRUNCATED) {
               if (hammer_cursor_ondisk(&cursor) ||
                   cursor.iprec->flush_state == HAMMER_FST_FLUSH) {
                       if (ip->trunc_off <= rec_offset)
                               n = 0;
                       else if (ip->trunc_off < rec_offset + n)
                               n = (int)(ip->trunc_off - rec_offset);
               }
       }
       if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
               if (hammer_cursor_ondisk(&cursor)) {
                       if (ip->sync_trunc_off <= rec_offset)
                               n = 0;
                       else if (ip->sync_trunc_off < rec_offset + n)
                               n = (int)(ip->sync_trunc_off - rec_offset);
               }
       }

       /*
        * Calculate the data offset in the record and the number
        * of bytes we can copy.
        */
        disk_offset = cursor.leaf->data_offset + roff;

        // move this to hammerfs_direct_io_read
        zone2_offset = hammer_blockmap_lookup(hmp, disk_offset, &error);
        vol_no = HAMMER_VOL_DECODE(zone2_offset);
        volume = hammer_get_volume(hmp, vol_no, &error);

        // n is the number of bytes we should read, sb_offset the
        // offset on disk
        sb_offset = volume->ondisk->vol_buf_beg + (zone2_offset & HAMMER_OFF_SHORT_MASK);

        while(n > 0 && boff != PAGE_SIZE) {
            block_num = sb_offset / BLOCK_SIZE;
            block_offset = sb_offset % BLOCK_SIZE;

            // the minimum between what is available and what we can maximally provide
            bytes_read = min(BLOCK_SIZE - (int )block_offset, PAGE_SIZE - (int )boff);        

            bh = sb_bread(sb, block_num + i);
            if(!bh) {
                error = -ENOMEM;
                goto failed;
            }
            memcpy((char*)page_addr + roff, (char*)bh->b_data + boff + block_offset, bytes_read);
            brelse(bh);

            n -= bytes_read;
            boff += bytes_read;
            roff += bytes_read;
        }

       /*
        * Iterate until we have filled the request.
        */
        if (boff == PAGE_SIZE)
            break;
        error = hammer_ip_next(&cursor);
    }

    hammer_done_cursor(&cursor);
    hammer_done_transaction(&trans);

failed:
    if (PageLocked (page))
        unlock_page (page);
    kunmap (page);
done:
    return error;
}
static int
free_callback(hammer_transaction_t trans, hammer_volume_t volume __unused,
	hammer_buffer_t *bufferp,
	struct hammer_blockmap_layer1 *layer1,
	struct hammer_blockmap_layer2 *layer2,
	hammer_off_t phys_off,
	hammer_off_t block_off __unused,
	void *data)
{
	struct bigblock_stat *stat = (struct bigblock_stat*)data;

	/*
	 * No modifications to ondisk structures
	 */
	int testonly = (stat == NULL);

	if (layer1) {
		if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
			/*
			 * This layer1 entry is already free.
			 */
			return 0;
		}

		KKASSERT((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
			trans->hmp->volume_to_remove);

		if (testonly)
			return 0;

		/*
		 * Free the L1 entry
		 */
		hammer_modify_buffer(trans, *bufferp, layer1, sizeof(*layer1));
		bzero(layer1, sizeof(*layer1));
		layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
		layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
		hammer_modify_buffer_done(*bufferp);

		return 0;
	} else if (layer2) {
		if (layer2->zone == HAMMER_ZONE_UNAVAIL_INDEX) {
			return 0;
		}

		if (layer2->zone == HAMMER_ZONE_FREEMAP_INDEX) {
			if (stat) {
				++stat->total_bigblocks;
			}
			return 0;
		}

		if (layer2->append_off == 0 &&
		    layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
			if (stat) {
				++stat->total_bigblocks;
				++stat->total_free_bigblocks;
			}
			return 0;
		}

		/*
		 * We found a layer2 entry that is not empty!
		 */
		return EBUSY;
	} else {
		KKASSERT(0);
	}

	return EINVAL;
}
Beispiel #3
0
/*
 * Reblock the B-Tree (leaf) node, record, and/or data if necessary.
 *
 * XXX We have no visibility into internal B-Tree nodes at the moment,
 * only leaf nodes.
 */
static int
hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
		      hammer_cursor_t cursor, hammer_btree_elm_t elm)
{
	hammer_mount_t hmp;
	hammer_off_t tmp_offset;
	hammer_node_ondisk_t ondisk;
	struct hammer_btree_leaf_elm leaf;
	int error;
	int bytes;
	int cur;
	int iocflags;

	error = 0;
	hmp = cursor->trans->hmp;

	/*
	 * Reblock data.  Note that data embedded in a record is reblocked
	 * by the record reblock code.  Data processing only occurs at leaf
	 * nodes and for RECORD element types.
	 */
	if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF)
		goto skip;
	if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD)
		return(EINVAL);
	tmp_offset = elm->leaf.data_offset;
	if (tmp_offset == 0)
		goto skip;

	/*
	 * If reblock->vol_no is specified we only want to reblock data
	 * in that volume, but ignore everything else.
	 */
	if (reblock->vol_no != -1 &&
	    reblock->vol_no != HAMMER_VOL_DECODE(tmp_offset))
		goto skip;

	/*
	 * NOTE: Localization restrictions may also have been set-up, we can't
	 *	 just set the match flags willy-nilly here.
	 */
	switch(elm->leaf.base.rec_type) {
	case HAMMER_RECTYPE_INODE:
	case HAMMER_RECTYPE_SNAPSHOT:
	case HAMMER_RECTYPE_CONFIG:
		iocflags = HAMMER_IOC_DO_INODES;
		break;
	case HAMMER_RECTYPE_EXT:
	case HAMMER_RECTYPE_FIX:
	case HAMMER_RECTYPE_PFS:
	case HAMMER_RECTYPE_DIRENTRY:
		iocflags = HAMMER_IOC_DO_DIRS;
		break;
	case HAMMER_RECTYPE_DATA:
	case HAMMER_RECTYPE_DB:
		iocflags = HAMMER_IOC_DO_DATA;
		break;
	default:
		iocflags = 0;
		break;
	}
	if (reblock->head.flags & iocflags) {
		++reblock->data_count;
		reblock->data_byte_count += elm->leaf.data_len;
		bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
		if (hammer_debug_general & 0x4000)
			hdkprintf("D %6d/%d\n", bytes, reblock->free_level);
		/*
		 * Start data reblock if
		 * 1. there is no error
		 * 2. the data and allocator offset are not in the same
		 *    big-block, or free level threshold is 0
		 * 3. free bytes in the data's big-block is larger than
		 *    free level threshold (means if threshold is 0 then
		 *    do reblock no matter what).
		 */
		if (error == 0 && (cur == 0 || reblock->free_level == 0) &&
		    bytes >= reblock->free_level) {
			/*
			 * This is nasty, the uncache code may have to get
			 * vnode locks and because of that we can't hold
			 * the cursor locked.
			 *
			 * WARNING: See warnings in hammer_unlock_cursor()
			 *	    function.
			 */
			leaf = elm->leaf;
			hammer_unlock_cursor(cursor);
			hammer_io_direct_uncache(hmp, &leaf);
			hammer_lock_cursor(cursor);

			/*
			 * elm may have become stale or invalid, reload it.
			 * ondisk variable is temporary only.  Note that
			 * cursor->node and thus cursor->node->ondisk may
			 * also changed.
			 */
			ondisk = cursor->node->ondisk;
			elm = &ondisk->elms[cursor->index];
			if (cursor->flags & HAMMER_CURSOR_RETEST) {
				hkprintf("debug: retest on reblocker uncache\n");
				error = EDEADLK;
			} else if (ondisk->type != HAMMER_BTREE_TYPE_LEAF ||
				   cursor->index >= ondisk->count) {
				hkprintf("debug: shifted on reblocker uncache\n");
				error = EDEADLK;
			} else if (bcmp(&elm->leaf, &leaf, sizeof(leaf))) {
				hkprintf("debug: changed on reblocker uncache\n");
				error = EDEADLK;
			}
			if (error == 0)
				error = hammer_cursor_upgrade(cursor);
			if (error == 0) {
				KKASSERT(cursor->index < ondisk->count);
				error = hammer_reblock_data(reblock,
							    cursor, elm);
			}
			if (error == 0) {
				++reblock->data_moves;
				reblock->data_byte_moves += elm->leaf.data_len;
			}
		}
	}

skip:
	/*
	 * Reblock a B-Tree internal or leaf node.  A leaf node is reblocked
	 * on initial entry only (element 0).  An internal node is reblocked
	 * when entered upward from its first leaf node only (also element 0,
	 * see hammer_btree_iterate() where cursor moves up and may return).
	 * Further revisits of the internal node (index > 0) are ignored.
	 */
	tmp_offset = cursor->node->node_offset;

	/*
	 * If reblock->vol_no is specified we only want to reblock data
	 * in that volume, but ignore everything else.
	 */
	if (reblock->vol_no != -1 &&
	    reblock->vol_no != HAMMER_VOL_DECODE(tmp_offset))
		goto end;

	if (cursor->index == 0 &&
	    error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) {
		++reblock->btree_count;
		bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
		if (hammer_debug_general & 0x4000)
			hdkprintf("B %6d/%d\n", bytes, reblock->free_level);
		/*
		 * Start node reblock if
		 * 1. there is no error
		 * 2. the node and allocator offset are not in the same
		 *    big-block, or free level threshold is 0
		 * 3. free bytes in the node's big-block is larger than
		 *    free level threshold (means if threshold is 0 then
		 *    do reblock no matter what).
		 */
		if (error == 0 && (cur == 0 || reblock->free_level == 0) &&
		    bytes >= reblock->free_level) {
			error = hammer_cursor_upgrade(cursor);
			if (error == 0) {
				if (cursor->parent) {
					KKASSERT(cursor->parent_index <
						 cursor->parent->ondisk->count);
					elm = &cursor->parent->ondisk->elms[cursor->parent_index];
				} else {
					elm = NULL;
				}
				switch(cursor->node->ondisk->type) {
				case HAMMER_BTREE_TYPE_LEAF:
					error = hammer_reblock_leaf_node(
							reblock, cursor, elm);
					break;
				case HAMMER_BTREE_TYPE_INTERNAL:
					error = hammer_reblock_int_node(
							reblock, cursor, elm);
					break;
				default:
					hpanic("Illegal B-Tree node type");
				}
			}
			if (error == 0) {
				++reblock->btree_moves;
			}
		}
	}
end:
	hammer_cursor_downgrade(cursor);
	return(error);
}