// corresponds to hammer_vfs_vget struct inode *hammerfs_iget(struct super_block *sb, ino_t ino) { struct hammer_transaction trans; struct hammer_mount *hmp = (void*)sb->s_fs_info; struct hammer_inode *ip; struct inode *inode; int error = 0; hammer_simple_transaction(&trans, hmp); /* * Lookup the requested HAMMER inode. The structure must be * left unlocked while we manipulate the related vnode to avoid * a deadlock. */ ip = hammer_get_inode(&trans, NULL, ino, hmp->asof, HAMMER_DEF_LOCALIZATION, 0, &error); if (ip == NULL) { hammer_done_transaction(&trans); goto failed; } error = hammerfs_get_inode(sb, ip, &inode); // hammer_rel_inode(ip, 0); hammer_done_transaction(&trans); return inode; failed: iget_failed(inode); return ERR_PTR(error); }
/* * Obtain a vnode for the specified inode number. An exclusively locked * vnode is returned. */ int hammer_vfs_vget(struct mount *mp, struct vnode *dvp, ino_t ino, struct vnode **vpp) { struct hammer_transaction trans; struct hammer_mount *hmp = (void *)mp->mnt_data; struct hammer_inode *ip; int error; u_int32_t localization; lwkt_gettoken(&hmp->fs_token); hammer_simple_transaction(&trans, hmp); /* * If a directory vnode is supplied (mainly NFS) then we can acquire * the PFS domain from it. Otherwise we would only be able to vget * inodes in the root PFS. */ if (dvp) { localization = HAMMER_DEF_LOCALIZATION + VTOI(dvp)->obj_localization; } else { localization = HAMMER_DEF_LOCALIZATION; } /* * Lookup the requested HAMMER inode. The structure must be * left unlocked while we manipulate the related vnode to avoid * a deadlock. */ ip = hammer_get_inode(&trans, NULL, ino, hmp->asof, localization, 0, &error); if (ip == NULL) { *vpp = NULL; } else { error = hammer_get_vnode(ip, vpp); hammer_rel_inode(ip, 0); } hammer_done_transaction(&trans); lwkt_reltoken(&hmp->fs_token); return (error); }
/* * Convert a file handle back to a vnode. * * Use rootvp to enforce PFS isolation when a PFS is exported via a * null mount. */ static int hammer_vfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, struct vnode **vpp) { hammer_mount_t hmp = (void *)mp->mnt_data; struct hammer_transaction trans; struct hammer_inode *ip; struct hammer_inode_info info; int error; u_int32_t localization; bcopy(fhp->fid_data + 0, &info.obj_id, sizeof(info.obj_id)); bcopy(fhp->fid_data + 8, &info.obj_asof, sizeof(info.obj_asof)); if (rootvp) localization = VTOI(rootvp)->obj_localization; else localization = (u_int32_t)fhp->fid_ext << 16; lwkt_gettoken(&hmp->fs_token); hammer_simple_transaction(&trans, hmp); /* * Get/allocate the hammer_inode structure. The structure must be * unlocked while we manipulate the related vnode to avoid a * deadlock. */ ip = hammer_get_inode(&trans, NULL, info.obj_id, info.obj_asof, localization, 0, &error); if (ip) { error = hammer_get_vnode(ip, vpp); hammer_rel_inode(ip, 0); } else { *vpp = NULL; } hammer_done_transaction(&trans); lwkt_reltoken(&hmp->fs_token); return (error); }
int hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag, struct ucred *cred) { struct hammer_transaction trans; struct hammer_mount *hmp; int error; error = priv_check_cred(cred, PRIV_HAMMER_IOCTL, 0); hmp = ip->hmp; hammer_start_transaction(&trans, hmp); switch(com) { case HAMMERIOC_PRUNE: if (error == 0 && hmp->ronly) error = EROFS; if (error == 0) { error = hammer_ioc_prune(&trans, ip, (struct hammer_ioc_prune *)data); } break; case HAMMERIOC_GETHISTORY: error = hammer_ioc_gethistory(&trans, ip, (struct hammer_ioc_history *)data); break; case HAMMERIOC_REBLOCK: if (error == 0 && hmp->ronly) error = EROFS; if (error == 0) { error = hammer_ioc_reblock(&trans, ip, (struct hammer_ioc_reblock *)data); } break; case HAMMERIOC_REBALANCE: /* * Rebalancing needs to lock a lot of B-Tree nodes. The * children and children's children. Systems with very * little memory will not be able to do it. */ if (error == 0 && hmp->ronly) error = EROFS; if (error == 0 && nbuf < HAMMER_REBALANCE_MIN_BUFS) { hkprintf("System has insufficient buffers " "to rebalance the tree. nbuf < %d\n", HAMMER_REBALANCE_MIN_BUFS); error = ENOSPC; } if (error == 0) { error = hammer_ioc_rebalance(&trans, ip, (struct hammer_ioc_rebalance *)data); } break; case HAMMERIOC_SYNCTID: error = hammer_ioc_synctid(&trans, ip, (struct hammer_ioc_synctid *)data); break; case HAMMERIOC_GET_PSEUDOFS: error = hammer_ioc_get_pseudofs(&trans, ip, (struct hammer_ioc_pseudofs_rw *)data); break; case HAMMERIOC_SET_PSEUDOFS: if (error == 0 && hmp->ronly) error = EROFS; if (error == 0) { error = hammer_ioc_set_pseudofs(&trans, ip, cred, (struct hammer_ioc_pseudofs_rw *)data); } break; case HAMMERIOC_UPG_PSEUDOFS: if (error == 0 && hmp->ronly) error = EROFS; if (error == 0) { error = hammer_ioc_upgrade_pseudofs(&trans, ip, (struct hammer_ioc_pseudofs_rw *)data); } break; case HAMMERIOC_DGD_PSEUDOFS: if (error == 0 && hmp->ronly) error = EROFS; if (error == 0) { error = hammer_ioc_downgrade_pseudofs(&trans, ip, (struct hammer_ioc_pseudofs_rw *)data); } break; case HAMMERIOC_RMR_PSEUDOFS: if (error == 0 && hmp->ronly) error = EROFS; if (error == 0) { error = hammer_ioc_destroy_pseudofs(&trans, ip, (struct hammer_ioc_pseudofs_rw *)data); } break; case HAMMERIOC_WAI_PSEUDOFS: if (error == 0) { error = hammer_ioc_wait_pseudofs(&trans, ip, (struct hammer_ioc_pseudofs_rw *)data); } break; case HAMMERIOC_MIRROR_READ: if (error == 0) { error = hammer_ioc_mirror_read(&trans, ip, (struct hammer_ioc_mirror_rw *)data); } break; case HAMMERIOC_MIRROR_WRITE: if (error == 0 && hmp->ronly) error = EROFS; if (error == 0) { error = hammer_ioc_mirror_write(&trans, ip, (struct hammer_ioc_mirror_rw *)data); } break; case HAMMERIOC_GET_VERSION: error = hammer_ioc_get_version(&trans, ip, (struct hammer_ioc_version *)data); break; case HAMMERIOC_GET_INFO: error = hammer_ioc_get_info(&trans, (struct hammer_ioc_info *)data); break; case HAMMERIOC_SET_VERSION: if (error == 0 && hmp->ronly) error = EROFS; if (error == 0) { error = hammer_ioc_set_version(&trans, ip, (struct hammer_ioc_version *)data); } break; case HAMMERIOC_ADD_VOLUME: if (error == 0 && hmp->ronly) error = EROFS; if (error == 0) { error = priv_check_cred(cred, PRIV_HAMMER_VOLUME, 0); if (error == 0) error = hammer_ioc_volume_add(&trans, ip, (struct hammer_ioc_volume *)data); } break; case HAMMERIOC_DEL_VOLUME: if (error == 0 && hmp->ronly) error = EROFS; if (error == 0) { error = priv_check_cred(cred, PRIV_HAMMER_VOLUME, 0); if (error == 0) error = hammer_ioc_volume_del(&trans, ip, (struct hammer_ioc_volume *)data); } break; case HAMMERIOC_LIST_VOLUMES: error = hammer_ioc_volume_list(&trans, ip, (struct hammer_ioc_volume_list *)data); break; case HAMMERIOC_ADD_SNAPSHOT: if (error == 0 && hmp->ronly) error = EROFS; if (error == 0) { error = hammer_ioc_add_snapshot( &trans, ip, (struct hammer_ioc_snapshot *)data); } break; case HAMMERIOC_DEL_SNAPSHOT: if (error == 0 && hmp->ronly) error = EROFS; if (error == 0) { error = hammer_ioc_del_snapshot( &trans, ip, (struct hammer_ioc_snapshot *)data); } break; case HAMMERIOC_GET_SNAPSHOT: error = hammer_ioc_get_snapshot( &trans, ip, (struct hammer_ioc_snapshot *)data); break; case HAMMERIOC_GET_CONFIG: error = hammer_ioc_get_config( &trans, ip, (struct hammer_ioc_config *)data); break; case HAMMERIOC_SET_CONFIG: if (error == 0 && hmp->ronly) error = EROFS; if (error == 0) { error = hammer_ioc_set_config( &trans, ip, (struct hammer_ioc_config *)data); } break; case HAMMERIOC_DEDUP: if (error == 0 && hmp->ronly) error = EROFS; if (error == 0) { error = hammer_ioc_dedup( &trans, ip, (struct hammer_ioc_dedup *)data); } break; case HAMMERIOC_GET_DATA: if (error == 0) { error = hammer_ioc_get_data( &trans, ip, (struct hammer_ioc_data *)data); } break; case HAMMERIOC_SCAN_PSEUDOFS: error = hammer_ioc_scan_pseudofs( &trans, ip, (struct hammer_ioc_pseudofs_rw *)data); break; default: error = EOPNOTSUPP; break; } hammer_done_transaction(&trans); return (error); }
// corresponds to hammer_vop_strategy_read int hammerfs_readpage(struct file *file, struct page *page) { void *page_addr; hammer_mount_t hmp; struct buffer_head *bh; struct super_block *sb; struct hammer_transaction trans; struct hammer_cursor cursor; struct inode *inode; struct hammer_inode *ip; hammer_base_elm_t base; hammer_off_t disk_offset; int64_t rec_offset; int64_t file_offset; int error = 0; int boff; int roff; int n; int i=0; int block_num; int block_offset; int bytes_read; int64_t sb_offset; hammer_off_t zone2_offset; int vol_no; hammer_volume_t volume; printk ("hammerfs_readpage(page->index=%d)\n", (int) page->index); inode = file->f_path.dentry->d_inode; ip = (struct hammer_inode *)inode->i_private; sb = inode->i_sb; hmp = (hammer_mount_t)sb->s_fs_info; hammer_simple_transaction(&trans, ip->hmp); hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip); file_offset = page->index * PAGE_SIZE; if (file_offset > inode->i_size) { error = -ENOSPC; goto done; } SetPageUptodate (page); page_addr = kmap (page); if(!page_addr) { error = -ENOSPC; goto failed; } /* * Key range (begin and end inclusive) to scan. Note that the key's * stored in the actual records represent BASE+LEN, not BASE. The * first record containing bio_offset will have a key > bio_offset. */ cursor.key_beg.localization = ip->obj_localization + HAMMER_LOCALIZE_MISC; cursor.key_beg.obj_id = ip->obj_id; cursor.key_beg.create_tid = 0; cursor.key_beg.delete_tid = 0; cursor.key_beg.obj_type = 0; cursor.key_beg.key = file_offset + 1; cursor.asof = ip->obj_asof; cursor.flags |= HAMMER_CURSOR_ASOF; cursor.key_end = cursor.key_beg; KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE); cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; error = hammer_ip_first(&cursor); boff = 0; while(error == 0) { /* * Get the base file offset of the record. The key for * data records is (base + bytes) rather then (base). */ base = &cursor.leaf->base; rec_offset = base->key - cursor.leaf->data_len; /* * Calculate the gap, if any, and zero-fill it. * * n is the offset of the start of the record verses our * current seek offset in the bio. */ n = (int)(rec_offset - (file_offset + boff)); if (n > 0) { if (n > PAGE_SIZE - boff) n = PAGE_SIZE - boff; bzero((char *)page_addr + boff, n); boff += n; n = 0; } /* * Calculate the data offset in the record and the number * of bytes we can copy. * * There are two degenerate cases. First, boff may already * be at bp->b_bufsize. Secondly, the data offset within * the record may exceed the record's size. */ roff = -n; rec_offset += roff; n = cursor.leaf->data_len - roff; if (n <= 0) { printk("hammerfs_readpage: bad n=%d roff=%d\n", n, roff); n = 0; } else if (n > PAGE_SIZE - boff) { n = PAGE_SIZE - boff; } /* * Deal with cached truncations. This cool bit of code * allows truncate()/ftruncate() to avoid having to sync * the file. * * If the frontend is truncated then all backend records are * subject to the frontend's truncation. * * If the backend is truncated then backend records on-disk * (but not in-memory) are subject to the backend's * truncation. In-memory records owned by the backend * represent data written after the truncation point on the * backend and must not be truncated. * * Truncate operations deal with frontend buffer cache * buffers and frontend-owned in-memory records synchronously. */ if (ip->flags & HAMMER_INODE_TRUNCATED) { if (hammer_cursor_ondisk(&cursor) || cursor.iprec->flush_state == HAMMER_FST_FLUSH) { if (ip->trunc_off <= rec_offset) n = 0; else if (ip->trunc_off < rec_offset + n) n = (int)(ip->trunc_off - rec_offset); } } if (ip->sync_flags & HAMMER_INODE_TRUNCATED) { if (hammer_cursor_ondisk(&cursor)) { if (ip->sync_trunc_off <= rec_offset) n = 0; else if (ip->sync_trunc_off < rec_offset + n) n = (int)(ip->sync_trunc_off - rec_offset); } } /* * Calculate the data offset in the record and the number * of bytes we can copy. */ disk_offset = cursor.leaf->data_offset + roff; // move this to hammerfs_direct_io_read zone2_offset = hammer_blockmap_lookup(hmp, disk_offset, &error); vol_no = HAMMER_VOL_DECODE(zone2_offset); volume = hammer_get_volume(hmp, vol_no, &error); // n is the number of bytes we should read, sb_offset the // offset on disk sb_offset = volume->ondisk->vol_buf_beg + (zone2_offset & HAMMER_OFF_SHORT_MASK); while(n > 0 && boff != PAGE_SIZE) { block_num = sb_offset / BLOCK_SIZE; block_offset = sb_offset % BLOCK_SIZE; // the minimum between what is available and what we can maximally provide bytes_read = min(BLOCK_SIZE - (int )block_offset, PAGE_SIZE - (int )boff); bh = sb_bread(sb, block_num + i); if(!bh) { error = -ENOMEM; goto failed; } memcpy((char*)page_addr + roff, (char*)bh->b_data + boff + block_offset, bytes_read); brelse(bh); n -= bytes_read; boff += bytes_read; roff += bytes_read; } /* * Iterate until we have filled the request. */ if (boff == PAGE_SIZE) break; error = hammer_ip_next(&cursor); } hammer_done_cursor(&cursor); hammer_done_transaction(&trans); failed: if (PageLocked (page)) unlock_page (page); kunmap (page); done: return error; }
/* * Flush the next sequence number until an open flush group is encountered * or we reach (next). Not all sequence numbers will have flush groups * associated with them. These require that the UNDO/REDO FIFO still be * flushed since it can take at least one additional run to synchronize * the FIFO, and more to also synchronize the reserve structures. */ static int hammer_flusher_flush(hammer_mount_t hmp, int *nomorep) { hammer_flusher_info_t info; hammer_flush_group_t flg; hammer_reserve_t resv; int count; int seq; /* * Just in-case there's a flush race on mount. Seq number * does not change. */ if (TAILQ_FIRST(&hmp->flusher.ready_list) == NULL) { *nomorep = 1; return (hmp->flusher.done); } *nomorep = 0; /* * Flush the next sequence number. Sequence numbers can exist * without an assigned flush group, indicating that just a FIFO flush * should occur. */ seq = hmp->flusher.done + 1; flg = TAILQ_FIRST(&hmp->flush_group_list); if (flg == NULL) { if (seq == hmp->flusher.next) { *nomorep = 1; return (hmp->flusher.done); } } else if (seq == flg->seq) { if (flg->closed) { KKASSERT(flg->running == 0); flg->running = 1; if (hmp->fill_flush_group == flg) { hmp->fill_flush_group = TAILQ_NEXT(flg, flush_entry); } } else { *nomorep = 1; return (hmp->flusher.done); } } else { /* * Sequence number problems can only happen if a critical * filesystem error occurred which forced the filesystem into * read-only mode. */ KKASSERT(flg->seq - seq > 0 || hmp->ronly >= 2); flg = NULL; } /* * We only do one flg but we may have to loop/retry. * * Due to various races it is possible to come across a flush * group which as not yet been closed. */ count = 0; while (flg && flg->running) { ++count; if (hammer_debug_general & 0x0001) { hdkprintf("%d ttl=%d recs=%d\n", flg->seq, flg->total_count, flg->refs); } if (hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) break; hammer_start_transaction_fls(&hmp->flusher.trans, hmp); /* * If the previous flush cycle just about exhausted our * UNDO space we may have to do a dummy cycle to move the * first_offset up before actually digging into a new cycle, * or the new cycle will not have sufficient undo space. */ if (hammer_flusher_undo_exhausted(&hmp->flusher.trans, 3)) hammer_flusher_finalize(&hmp->flusher.trans, 0); KKASSERT(hmp->next_flush_group != flg); /* * Place the flg in the flusher structure and start the * slaves running. The slaves will compete for inodes * to flush. * * Make a per-thread copy of the transaction. */ while ((info = TAILQ_FIRST(&hmp->flusher.ready_list)) != NULL) { TAILQ_REMOVE(&hmp->flusher.ready_list, info, entry); info->flg = flg; info->runstate = 1; info->trans = hmp->flusher.trans; TAILQ_INSERT_TAIL(&hmp->flusher.run_list, info, entry); wakeup(&info->runstate); } /* * Wait for all slaves to finish running */ while (TAILQ_FIRST(&hmp->flusher.run_list) != NULL) tsleep(&hmp->flusher.ready_list, 0, "hmrfcc", 0); /* * Do the final finalization, clean up */ hammer_flusher_finalize(&hmp->flusher.trans, 1); hmp->flusher.tid = hmp->flusher.trans.tid; hammer_done_transaction(&hmp->flusher.trans); /* * Loop up on the same flg. If the flg is done clean it up * and break out. We only flush one flg. */ if (RB_EMPTY(&flg->flush_tree)) { KKASSERT(flg->refs == 0); TAILQ_REMOVE(&hmp->flush_group_list, flg, flush_entry); kfree(flg, hmp->m_misc); break; } KKASSERT(TAILQ_FIRST(&hmp->flush_group_list) == flg); } /* * We may have pure meta-data to flush, or we may have to finish * cycling the UNDO FIFO, even if there were no flush groups. */ if (count == 0 && hammer_flusher_haswork(hmp)) { hammer_start_transaction_fls(&hmp->flusher.trans, hmp); hammer_flusher_finalize(&hmp->flusher.trans, 1); hammer_done_transaction(&hmp->flusher.trans); } /* * Clean up any freed big-blocks (typically zone-2). * resv->flush_group is typically set several flush groups ahead * of the free to ensure that the freed block is not reused until * it can no longer be reused. */ while ((resv = TAILQ_FIRST(&hmp->delay_list)) != NULL) { if (resv->flg_no - seq > 0) break; hammer_reserve_clrdelay(hmp, resv); } return (seq); }
static inline int _vnode_validate(hammer_dedup_cache_t dcp, void *data, int *errorp) { struct hammer_transaction trans; hammer_inode_t ip; struct vnode *vp; struct buf *bp; /* off_t dooffset; */ int result, error; result = error = 0; *errorp = 0; hammer_simple_transaction(&trans, dcp->hmp); ip = hammer_get_inode(&trans, NULL, dcp->obj_id, HAMMER_MAX_TID, dcp->localization, 0, &error); if (ip == NULL) { kprintf("dedup: unable to find objid %016llx:%08x\n", (long long)dcp->obj_id, dcp->localization); *errorp = 1; goto failed2; } error = hammer_get_vnode(ip, &vp); if (error) { kprintf("dedup: unable to acquire vnode for %016llx:%08x\n", (long long)dcp->obj_id, dcp->localization); *errorp = 2; goto failed; } if ((bp = findblk(ip->vp, dcp->file_offset, FINDBLK_NBLOCK)) != NULL) { dfly_brelse(bp); /* bremfree(bp) */ /* XXX if (mapped to userspace) goto done, *errorp = 4 */ if ((bp->b_flags & B_CACHE) == 0 || bp->b_flags & B_DIRTY) { *errorp = 5; goto done; } /* XXX if (bp->b_bio2.bio_offset != dcp->data_offset) { error = VOP_BMAP(ip->vp, dcp->file_offset, &dooffset, NULL, NULL, BUF_CMD_READ); if (error) { *errorp = 6; goto done; } if (dooffset != dcp->data_offset) { *errorp = 7; goto done; } hammer_live_dedup_bmap_saves++; } */ if (bcmp(data, bp->b_data, dcp->bytes) == 0) result = 1; done: dfly_brelse(bp); /* XX free to buffer not kfree ... bqrelse(bp);*/ } else { *errorp = 3; } vput(vp); failed: hammer_rel_inode(ip, 0); failed2: hammer_done_transaction(&trans); return (result); }