struct dentry * vnlayer_get_dentry( SUPER_T *sb, void *fhbits ) { int error; VFS_T *vfsp = SBTOVFS(sb); VNODE_T *vp; MDKI_FID_T *lfidp = fhbits; DENT_T *dp; CALL_DATA_T cd; mdki_linux_init_call_data(&cd); error = VFS_VGET(vfsp, &vp, lfidp, &cd); if (error == 0) { /* rebind if needed */ if (mfs_rebind_vpp(1, &vp, &cd)) { MDKI_VFS_LOG(VFS_LOG_ESTALE, "%s: vp %p rebound\n", __FUNCTION__, vp); } dp = vnlayer_find_dentry(vp); /* always drop vnode's refcount */ VN_RELE(vp); } else { dp = ERR_PTR(mdki_errno_unix_to_linux(error)); } mdki_linux_destroy_call_data(&cd); return dp; }
/* object has an underlying thing to poll */ if (realfp->f_op && realfp->f_op->poll) { mask = (*realfp->f_op->poll)(realfp, pt_p); } } return mask; } #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,36) long vnode_fop_ioctl( FILE_T *file_p, uint cmd, ulong arg ) #else /* LINUX_VERSION_CODE > KERNEL_VERSION(2,6,36) */ int vnode_fop_ioctl( INODE_T *ino_p, FILE_T *file_p, uint cmd, ulong arg ) #endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2,6,36) */ { int err; int rval; /* unused */ CALL_DATA_T cd; struct ioctl_ctx ctx; ASSERT_KERNEL_LOCKED(); mdki_linux_init_call_data(&cd); ctx.filp = file_p; ctx.caller_is_32bit = 0; /* unknown as of yet */ #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,36) err = VOP_IOCTL(ITOV(file_p->f_path.dentry->d_inode), cmd, (void *)arg, 0, &cd, &rval, NULL, &ctx); #else err = VOP_IOCTL(ITOV(ino_p), cmd, (void *)arg, 0, &cd, &rval, NULL, &ctx); #endif err = mdki_errno_unix_to_linux(err); mdki_linux_destroy_call_data(&cd); return err; }
struct dentry * vnlayer_get_parent(struct dentry *child) { VNODE_T *parentvp; struct dentry *rdentp; struct lookup_ctx ctx = {0}; CALL_DATA_T cd; int err; if (!MDKI_INOISMVFS(child->d_inode)) return ERR_PTR(-ESTALE); mdki_linux_init_call_data(&cd); err = VOP_LOOKUP(ITOV(child->d_inode), "..", &parentvp, NULL, VNODE_LF_LOOKUP, NULL, &cd, &ctx); mdki_linux_destroy_call_data(&cd); if (err == 0) { ASSERT(ctx.dentrypp == NULL); ASSERT(parentvp != NULL); if (!MDKI_INOISMVFS(VTOI(parentvp))) { rdentp = ERR_PTR(-ESTALE); } else { rdentp = vnlayer_find_dentry(parentvp); } /* always drop vnode's refcount */ VN_RELE(parentvp); } else { rdentp = ERR_PTR(mdki_errno_unix_to_linux(err)); } return rdentp; }
STATIC int vnlayer_getattr( DENT_T *dentry, VATTR_T *vap ) { INODE_T *ip; VNODE_T *vp; CALL_DATA_T cd; int err; ip = dentry->d_inode; ASSERT_KERNEL_UNLOCKED(); mdki_linux_init_call_data(&cd); vp = ITOV(ip); /* implicit and explicit attribute pullup to the vnode */ VATTR_SET_MASK(vap, AT_ALL); err = VOP_GETATTR(vp, vap, GETATTR_FLAG_PULLUP_ATTRS, &cd); err = mdki_errno_unix_to_linux(err); mdki_linux_destroy_call_data(&cd); return err; }
extern int vnode_fop_fsync( FILE_T *file_p, loff_t start, loff_t end, int datasync ) #endif { INODE_T *ip; int err; CALL_DATA_T cd; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) fsync_ctx ctx; #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) if (file_p == NULL) { /* NFSD sometimes calls with null file_p and dentry_p filled in. */ ASSERT(dentry_p != NULL); ip = dentry_p->d_inode; } else #endif ip = file_p->f_dentry->d_inode; ASSERT_I_SEM_MINE(ip); ASSERT(MDKI_INOISOURS(ip)); if (!MDKI_INOISMVFS(ip)) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) MDKI_VFS_LOG(VFS_LOG_ERR, "%s shouldn't be called? (files swapped " "at open): file_p=%p dp=%p\n", __func__, file_p, dentry_p); #else MDKI_VFS_LOG(VFS_LOG_ERR, "%s shouldn't be called? (files swapped " "at open): file_p=%p dp=%p\n", __func__, file_p, file_p->f_dentry); #endif return 0; /* don't fail the operation, though */ } mdki_linux_init_call_data(&cd); #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) err = VOP_FSYNC(ITOV(ip), datasync == 0 ? FLAG_NODATASYNC : FLAG_DATASYNC, &cd, (file_ctx *)file_p); #else ctx.file_p = file_p; #if !defined (MRG) ctx.start = start; ctx.end = end; #endif /* !defined (MRG) */ err = VOP_FSYNC(ITOV(ip), datasync == 0 ? FLAG_NODATASYNC : FLAG_DATASYNC, &cd, &ctx); #endif /* else LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) */ err = mdki_errno_unix_to_linux(err); mdki_linux_destroy_call_data(&cd); return err; }
int #endif vnode_iop_follow_link( DENT_T *dentry, /* link */ struct nameidata *nd /* link resolution */ ) { INODE_T *ip; struct uio uio; iovec_t iov; int err = 0; char *buf = KMEM_ALLOC(PATH_MAX, KM_SLEEP); CALL_DATA_T cd; if (buf == NULL) #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) return ERR_PTR(-ENOMEM); #else return -ENOMEM; #endif uio.uio_iov = &iov; mfs_uioset(&uio, buf, PATH_MAX-1, 0, UIO_SYSSPACE); mdki_linux_init_call_data(&cd); ip = dentry->d_inode; ASSERT_KERNEL_UNLOCKED(); ASSERT_I_SEM_NOT_MINE(ip); err = VOP_READLINK(ITOV(ip), &uio, &cd); err = mdki_errno_unix_to_linux(err); mdki_linux_destroy_call_data(&cd); if (err == 0) { if (uio.uio_resid == 0) err = -ENAMETOOLONG; else { /* readlink doesn't copy a NUL at the end, we must do it */ buf[uio.uio_offset] = '\0'; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) /* follow the link */ err = vfs_follow_link(nd, buf); #else nd_set_link(nd, buf); return(buf); /* vnop_iop_put_link() will free this buf. */ #endif } } KMEM_FREE(buf, PATH_MAX); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) return ERR_PTR(err); #else return(err); #endif }
loff_t vnode_fop_llseek( FILE_T *file_p, loff_t offset, int origin ) { INODE_T *ip = file_p->f_dentry->d_inode; loff_t result; MOFFSET_T mresult; struct seek_ctx ctx; int err; ASSERT(MDKI_INOISMVFS(ip)); switch (origin) { case /* SEEK_SET */ 0: result = offset; break; case /* SEEK_CUR */ 1: result = offset + file_p->f_pos; break; case /* SEEK_END */ 2: result = offset + READ_I_SIZE(ip); break; default: #ifdef MVFS_DEBUG MDKI_VFS_LOG(VFS_LOG_INFO, "%s: invalid origin %d, ra=%p\n", __func__, origin, mdki_getmycaller()); #endif return -EINVAL; } ctx.filep = file_p; ctx.done = FALSE; ctx.offset = offset; ctx.origin = origin; mresult = result; err = VOP_SEEK(ITOV(ip), file_p->f_pos, &mresult, &ctx); err = mdki_errno_unix_to_linux(err); result = mresult; if (err) { ASSERT(err < 0); return err; } if (!ctx.done && result != file_p->f_pos) { file_p->f_pos = result; file_p->f_version = 0; /* See default_llseek() in fs/read_write.c */ } return result; }
ssize_t vnode_fop_rdwr( FILE_T *file_p, char * buf, size_t buflen, loff_t *off_p, uio_rw_t dir ) { int rval; int ioflag; INODE_T *ip; CALL_DATA_T cd; loff_t loff; struct uio uio; iovec_t iov; ip = file_p->f_dentry->d_inode; ASSERT(MDKI_INOISOURS(ip)); if (MDKI_INOISMVFS(ip)) { uio.uio_iov = &iov; if (file_p->f_flags & O_APPEND) { ioflag = FAPPEND; if (dir == UIO_WRITE) loff = READ_I_SIZE(file_p->f_dentry->d_inode); else loff = *off_p; } else { ioflag = 0; loff = *off_p; } mdki_linux_uioset(&uio, buf, buflen, loff, UIO_USERSPACE); mdki_linux_init_call_data(&cd); rval = VOP_RDWR(ITOV(ip), &uio, dir, ioflag, NULL, &cd, (file_ctx *)file_p); rval = mdki_errno_unix_to_linux(rval); mdki_linux_destroy_call_data(&cd); if (rval == 0) { rval = buflen - uio.uio_resid; /* count of transferred bytes */ *off_p = uio.uio_offset; /* underlying FS sets it after write */ } } else { MDKI_TRACE(TRACE_RDWR,"shadow rdwr? fp=%p ip=%p dir=%d\n", file_p, ip, dir); rval = -ENOSYS; } return rval; }
/* This is VOP_ACCESS(). * permtype = bitwise-OR of MAY_READ, MAY_WRITE, MAY_EXEC * For 2.6.27 and beyond we may need to handle other * permission requests than the tradional MAY_[RWX], like * MAY_ACCESS. */ extern int vnode_iop_permission( INODE_T *ip, int permtype #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) , struct nameidata *nd #endif #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,32) , unsigned int flags #endif ) { int err; CALL_DATA_T cd; ASSERT_I_SEM_NOT_MINE(ip); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,38) /* We can't deal with RCU lookups, the lookup will happen after * a rcu_read_lock call, which means we can't block. Additionally, * vfsmount_lock is locked, which means we can't call mntput * (and other functions). We use the permission callback to detect * and refuse RCU operations, which are then retried without using RCU. */ if (flags & IPERM_FLAG_RCU) return -ECHILD; #endif mdki_linux_init_call_data(&cd); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27) /* we are not dealing with MAY_ACCESS and MAY_OPEN */ permtype &= (MAY_READ | MAY_WRITE | MAY_EXEC); #endif /* * Vnode core wants the mode test bits to be in the user position, not the * low bits. Bits are in same order as standard UNIX rwx. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) err = VOP_ACCESS(ITOV(ip), permtype << 6, 0, &cd, (nameidata_ctx *) nd); #elif LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,32) err = VOP_ACCESS(ITOV(ip), permtype << 6, 0, &cd, NULL); #else err = VOP_ACCESS(ITOV(ip), permtype << 6, 0, &cd, (nameidata_ctx *) (unsigned long) flags); #endif err = mdki_errno_unix_to_linux(err); mdki_linux_destroy_call_data(&cd); return err; }
int vnode_fop_mmap( FILE_T *fp, struct vm_area_struct *mem_p ) { INODE_T *ip = fp->f_dentry->d_inode; int err = 0; loff_t len; loff_t offset = mem_p->vm_pgoff << PAGE_CACHE_SHIFT; loff_t maxoffset = MVFS_MAXOFF_T; VNODE_T *vp; struct mmap_ctx ctx; CALL_DATA_T cd; /* make sure offset, len are within our range */ if (!(fp->f_flags & O_LARGEFILE)) maxoffset = MAX_NON_LFS; len = mem_p->vm_end - mem_p->vm_start; if (offset < 0) return -EINVAL; if (offset >= maxoffset || (offset + len) < 0 || (offset + len) >= maxoffset) { return -EOVERFLOW; } ASSERT(MDKI_INOISMVFS(ip)); vp = ITOV(ip); if (vp->v_type != VREG) return -ENODEV; ctx.file = fp; ctx.mem = mem_p; mdki_linux_init_call_data(&cd); err = VOP_MMAP(vp, vma_to_sharing(mem_p), vma_to_rwx(mem_p), &cd, &ctx); mdki_linux_destroy_call_data(&cd); MDKI_TRACE(TRACE_MAP, "vp=%p mflags=%x prot=%x error=%d\n", vp, vma_to_sharing(mem_p), vma_to_rwx(mem_p), err); return mdki_errno_unix_to_linux(err); }
extern int vnode_iop_unlink( INODE_T *dir, DENT_T *dent ) { int err = 0; VNODE_T *obj; struct unlink_ctx ctx; CALL_DATA_T cd; struct dentry *peer; ASSERT_I_SEM_MINE(dent->d_inode); ASSERT_I_SEM_MINE(dir); ASSERT(MDKI_INOISMVFS(dir)); if (!S_ISDIR(dir->i_mode)) { /* bogus */ return -ENOTDIR; } peer = vnlayer_dentry_peer(dent); mdki_linux_init_call_data(&cd); ctx.dentry = dent; ctx.done = FALSE; if (dent->d_inode && MDKI_INOISMVFS(dent->d_inode)) obj = ITOV(dent->d_inode); /* no extra reference (be careful) */ else obj = NULL; err = VOP_REMOVE(ITOV(dir), obj, (char *)dent->d_name.name, &cd, &ctx); /* XXX pullup attributes on removed object, if it's not gone yet? */ err = mdki_errno_unix_to_linux(err); mdki_linux_destroy_call_data(&cd); /* XXX Don't d_delete(dentry), our caller will do that */ if (peer != NULL) { /* * unhash peer name in other mode. Don't d_delete in case * we're racing */ if (err == 0) d_drop(peer); VNODE_DPUT(peer); } return err; }
int vnode_fop_open( INODE_T *ino_p, FILE_T *file_p ) { int status = 0; VNODE_T *avp; VNODE_T *vp; CALL_DATA_T cd; /* No asserts on BKL; locking protocol is changing */ ASSERT(MDKI_INOISOURS(ino_p)); if (!MDKI_INOISMVFS(ino_p)) { MDKI_VFS_LOG(VFS_LOG_ERR, "%s shouldn't be called on shadow?" " (files swapped at open): vp %p fp %p\n", __func__, ino_p, file_p); return -ENOSYS; } if ((status = generic_file_open(ino_p, file_p))) { return status; } avp = ITOV(ino_p); vp = avp; mdki_linux_init_call_data(&cd); status = VOP_OPEN(&vp, vnlayer_filep_to_flags(file_p), &cd, (file_ctx *)file_p); status = mdki_errno_unix_to_linux(status); mdki_linux_destroy_call_data(&cd); MDKI_TRACE(TRACE_OPEN, "%s opened vp=%p fp=%p pvt=%p pcnt=%ld\n", __func__, vp, file_p, REALFILE(file_p), REALFILE(file_p) ? (long)F_COUNT(REALFILE(file_p)) : 0); if (avp != vp) { printk("switcheroo on open? %p became %p\n", avp, vp); /* XXX */ BUG(); } return status; }
/* * This is VOP_CLOSE(). Called when a file pointer is being cleaned * up--guaranteed only once! */ int vnode_fop_release( INODE_T *ino_p, FILE_T *file_p ) { int status = 0; VNODE_T *vp; MOFFSET_T off = 0; CALL_DATA_T cd; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) || defined(SLES10SP2) mdki_vop_close_ctx_t ctx; #endif ASSERT_KERNEL_UNLOCKED(); ASSERT(MDKI_INOISOURS(ino_p)); if (!MDKI_INOISMVFS(ino_p)) { MDKI_TRACE(TRACE_CLOSE, "shadow no-op fp=%p ip=%p\n", file_p, ino_p); return 0; /* XXX shadow something? */ } mdki_linux_init_call_data(&cd); vp = ITOV(ino_p); MDKI_TRACE(TRACE_CLOSE, "%s: fp=%p vp=%p fcount=%ld pvt=%p rfcount=%ld pid=%ld\n", __func__, file_p, vp, (long)F_COUNT(file_p), REALFILE(file_p), REALFILE(file_p) ? (long)F_COUNT(REALFILE(file_p)) : 0, (long)mdki_curpid()); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) || defined(SLES10SP2) ctx.file_p = file_p; ctx.owner_id = NULL; status = VOP_CLOSE(vp, vnlayer_filep_to_flags(file_p), VNODE_LASTCLOSE_COUNT, off, &cd, (file_ctx *)&ctx); #else status = VOP_CLOSE(vp, vnlayer_filep_to_flags(file_p), VNODE_LASTCLOSE_COUNT, off, &cd, (file_ctx *)file_p); #endif status = mdki_errno_unix_to_linux(status); mdki_linux_destroy_call_data(&cd); return status; }
/* This function used to call back to mvfs_locktl_ctx to validate the * vnode, to verify that we are being called on a MFS_LOOPCLAS or MFS_VOBCLAS * file, and to call mvfs_getcleartext to get the cleartext vnode pointer. * If that call failed for any reason, we would never perform the locking * operation. In the case of an unlock called from the close code, this * would panic the system because locks_remove_flock() would find dangling * Posix locks. * One alternative would be to check for an error and if we had one, call * mvop_linux_lockctl directly. Since we only used the vnode returned * from mvfs_getcleartext to check that this is a regular file, there was * no longer any compelling reason to call into common code at all. * We assume that since the file is open, the cleartext is good. We will * add code to mvop_linux_lockctl to verify the existence of the realfp * as a replacement for the check for mnode class since only loopback or * vob files will have a realfp. */ int vnode_fop_lock( FILE_T *fp, int cmd, struct file_lock *lock_p ) { INODE_T *ip = fp->f_dentry->d_inode; int err; CALL_DATA_T cd; ASSERT(MDKI_INOISMVFS(ip)); mdki_linux_init_call_data(&cd); err = mvop_linux_lockctl(ITOV(ip), lock_p, cmd, &cd, (file_ctx *)fp); mdki_linux_destroy_call_data(&cd); return mdki_errno_unix_to_linux(err); }
/* reset the file position */ file_p->f_pos = (loff_t)uios.uio_offset; dir_ctx->pos = (loff_t) uios.uio_offset; } return err; } #else /* LINUX_VERSION_CODE > KERNEL_VERSION(3,10,0) */ int vnode_fop_readdir( FILE_T *file_p, void *dirent_p, filldir_t filldir_func ) { uio_t uios; INODE_T *inode; DENT_T *dentry; int err; CALL_DATA_T cd; struct readdir_ctx ctx; dentry = file_p->f_dentry; inode = dentry->d_inode; ASSERT(MDKI_INOISMVFS(inode)); ctx.file = file_p; ctx.done = FALSE; BZERO(&uios, sizeof(uios)); uios.uio_offset = (loff_t)file_p->f_pos; /* This value cannot be larger than value in the view_v4_procinfo table * It is used for maximum size of the return data. */ uios.uio_resid = MVFS_LINUX_MAXRPCDATA; uios.uio_buff = dirent_p; uios.uio_func = filldir_func; mdki_linux_init_call_data(&cd); err = VOP_READDIR(ITOV(inode), &uios, &cd, NULL, &ctx); err = mdki_errno_unix_to_linux(err); mdki_linux_destroy_call_data(&cd); if (!ctx.done) /* reset the file position */ file_p->f_pos = (loff_t)uios.uio_offset; return err; }
extern int vnlayer_linux_mount( VFS_T *vfsp, void *data_p ) { int err; CALL_DATA_T cd; MVFS_CALLER_INFO_STRUCT ctx; mdki_linux_init_call_data(&cd); BZERO(&ctx, sizeof(ctx)); /* VFS_MOUNT method must detect 32- or 64-bit caller, if necessary */ err = VFS_MOUNT(vfsp, NULL, NULL, vfsp->vfs_flag, data_p, 0, &cd, &ctx); err = mdki_errno_unix_to_linux(err); mdki_linux_destroy_call_data(&cd); return(err); }
extern int vnlayer_linux_mount( VFS_T *vfsp, void *data_p ) { int err; CRED_T *cred; MVFS_CALLER_INFO_STRUCT ctx; cred = MDKI_GET_UCRED(); BZERO(&ctx, sizeof(ctx)); /* VFS_MOUNT method must detect 32- or 64-bit caller, if necessary */ err = VFS_MOUNT(vfsp, NULL, NULL, vfsp->vfs_flag, data_p, 0, cred, &ctx); err = mdki_errno_unix_to_linux(err); MDKI_CRFREE(cred); return(err); }
/* They rewrote sync_supers so that it won't proceed through their loop * until the dirty bit is cleared. */ super_p->s_dirt = 0; if (err != 0) VFS_LOG(SBTOVFS(super_p), VFS_LOG_ERR, "%s: error %d syncing\n", __func__, err); return /* mdki_errno_unix_to_linux(err) */; } #else /* LINUX_VERSION_CODE < KERNEL_VERSION(3,6,0) */ int vnlayer_sync_super( struct super_block *super_p, int wait ) { CALL_DATA_T cd; int err; /* if wait is not allowed, do nothing */ if (wait == 0) { return 0; } ASSERT_SB_LOCKED(super_p); mdki_linux_init_call_data(&cd); err = VFS_SYNC(SBTOVFS(super_p), SBTOVFS(super_p), 0, &cd); mdki_linux_destroy_call_data(&cd); if (err != 0) { VFS_LOG(SBTOVFS(super_p), VFS_LOG_ERR, "%s: error %d syncing\n", __func__, err); } return mdki_errno_unix_to_linux(err); }
extern int vnode_fop_flush( FILE_T *fp #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) || defined(SLES10SP2) , fl_owner_t id #endif ) { INODE_T *ip = fp->f_dentry->d_inode; int err; CALL_DATA_T cd; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) || defined(SLES10SP2) mdki_vop_close_ctx_t ctx; #endif ASSERT(MDKI_INOISOURS(ip)); if (!MDKI_INOISMVFS(ip)) { MDKI_VFS_LOG(VFS_LOG_ERR, "%s shouldn't be called? (files swapped at open): fp %p\n", __func__, fp); return 0; /* don't fail the operation, though */ } mdki_linux_init_call_data(&cd); ASSERT(F_COUNT(fp) != VNODE_LASTCLOSE_COUNT); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) || defined(SLES10SP2) ctx.file_p = fp; ctx.owner_id = id; err = VOP_CLOSE(ITOV(ip), vnlayer_filep_to_flags(fp), F_COUNT(fp), (MOFFSET_T) 0, &cd, (file_ctx *)&ctx); #else err = VOP_CLOSE(ITOV(ip), vnlayer_filep_to_flags(fp), F_COUNT(fp), (MOFFSET_T) 0, &cd, (file_ctx *)fp); #endif err = mdki_errno_unix_to_linux(err); mdki_linux_destroy_call_data(&cd); return err; }
extern int vnode_iop_readlink( DENT_T *dentry, char * buf, int buflen ) { INODE_T *ip; struct uio uio; iovec_t iov; int err = 0; CALL_DATA_T cd; /* * This routine is not called for shadow objects which need * special handling; they're done in shadow_readlink. */ uio.uio_iov = &iov; mdki_linux_uioset(&uio, buf, buflen, 0, UIO_USERSPACE); mdki_linux_init_call_data(&cd); ip = dentry->d_inode; ASSERT_KERNEL_UNLOCKED(); ASSERT_I_SEM_NOT_MINE(ip); err = VOP_READLINK(ITOV(ip), &uio, &cd); err = mdki_errno_unix_to_linux(err); mdki_linux_destroy_call_data(&cd); if (err == 0) { /* return count of bytes */ err = buflen - uio.uio_resid; } return(err); }
extern int vnode_iop_create( INODE_T * parent, struct dentry * dentry, int mode, struct nameidata *nd ) { int err = 0; VATTR_T *vap; VNODE_T *newvp; struct create_ctx ctx; CALL_DATA_T cd; ASSERT_I_SEM_MINE(parent); ASSERT(MDKI_INOISMVFS(parent)); vap = VATTR_ALLOC(); if (vap == NULL) return -ENOMEM; VATTR_NULL(vap); mdki_linux_init_call_data(&cd); /* * Solaris sends only type, mode, size, so we will too. */ vap->va_type = VREG; vap->va_mode = mode & ~S_IFMT; vap->va_size = 0; vap->va_mask = AT_TYPE|AT_MODE|AT_SIZE; newvp = NULL; dentry->d_inode = NULL; ctx.dentry = dentry; ctx.parent = parent; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,38) /* break any rcu-walk in progress */ # if defined(MRG) write_seqlock_barrier(&dentry->d_lock); # else /* defined (MRG) */ write_seqcount_barrier(&dentry->d_seq); # endif /* else defined (MRG) */ #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,38) */ err = VOP_CREATE(ITOV(parent), (/* drop const */ char *) dentry->d_name.name, vap, NONEXCL, /* XXX handled by generic layer? */ mode, /* not used except for passthrough, see vap->va_mode */ &newvp, &cd, &ctx); err = mdki_errno_unix_to_linux(err); /* dentry reference uses the hold count from a successful create */ if (!err) { if (dentry->d_inode == NULL) { /* Not a shadow object */ ASSERT(newvp != NULL); ASSERT(VTOI(newvp) != NULL); VNODE_D_INSTANTIATE(dentry, VTOI(newvp)); VATTR_SET_MASK(vap, AT_ALL); if (VOP_GETATTR(newvp, vap, 0, &cd) == 0) mdki_linux_vattr_pullup(newvp, vap, AT_ALL); } else { /* drop the extra ref returned in newvp */ VN_RELE(newvp); } /* I nuked the code checking not VCHR, VREG--we are always VREG */ } else { ASSERT(!dentry->d_inode); ASSERT(!newvp); } VATTR_FREE(vap); mdki_linux_destroy_call_data(&cd); return(err); }
int vnlayer_fill_super( SUPER_T *super_p, void *data_p, int silent ) { INODE_T *ino_p; VNODE_T *rootvp; VATTR_T va; VFS_T *vfsp; int err = 0; CALL_DATA_T cd; ASSERT_KERNEL_LOCKED(); /* sys_mount() */ ASSERT_SB_MOUNT_LOCKED_W(super_p); /* can't assert on mount_sem, we don't have access to it. */ if (vnlayer_vfs_opvec == NULL) { MDKI_VFS_LOG(VFS_LOG_ERR, "%s: VFS operation not set yet " "(no file system module loaded?)\n", __func__); err = -ENODATA; goto return_NULL; } if (MDKI_INOISOURS(vnlayer_get_urdir_inode())) { /* can't handle this case */ MDKI_VFS_LOG(VFS_LOG_ERR, "%s: can't handle mounts inside setview.\n", __func__); err = -EINVAL; goto return_NULL; } /* * The only fields we have coming in are s_type and s_flags. */ /* Verify this */ super_p->s_blocksize = MVFS_DEF_BLKSIZE; super_p->s_blocksize_bits = MVFS_DEF_BLKSIZE_BITS; super_p->s_maxbytes = MVFS_DEF_MAX_FILESIZE; super_p->s_op = &mvfs_super_ops; super_p->s_export_op = &vnlayer_export_ops; super_p->dq_op = NULL; super_p->s_magic = MVFS_SUPER_MAGIC; /* * XXX This module is currently restricted to one client file system * type at a time, as registered via the vnlayer_vfs_opvec. */ vfsp = KMEM_ALLOC(sizeof(*vfsp), KM_SLEEP); if (vfsp == NULL) { MDKI_VFS_LOG(VFS_LOG_ERR, "%s failed: no memory\n", __func__); SET_SBTOVFS(super_p, NULL); err = -ENOMEM; goto return_NULL; } BZERO(vfsp, sizeof(*vfsp)); SET_VFSTOSB(vfsp, super_p); SET_SBTOVFS(super_p, vfsp); vfsp->vfs_op = vnlayer_vfs_opvec; /* XXX fill in more of vfsp (flag?) */ if (super_p->s_flags & MS_RDONLY) vfsp->vfs_flag |= VFS_RDONLY; if (super_p->s_flags & MS_NOSUID) vfsp->vfs_flag |= VFS_NOSUID; err = vnlayer_linux_mount(vfsp, data_p); if (err) { goto bailout; } /* * Now create our dentry and set that up in the superblock. Get * the inode from the vnode at the root of the file system, and * attach it to a new dentry. */ mdki_linux_init_call_data(&cd); err = VFS_ROOT(SBTOVFS(super_p), &rootvp); if (err) { err = mdki_errno_unix_to_linux(err); (void) VFS_UNMOUNT(vfsp,&cd); mdki_linux_destroy_call_data(&cd); goto bailout; } ino_p = VTOI(rootvp); #ifdef CONFIG_FS_POSIX_ACL /* If the system supports ACLs, we set the flag in the superblock * depending on the ability of the underlying filesystem */ if (vfsp->vfs_flag & VFS_POSIXACL) { super_p->s_flags |= MS_POSIXACL; } #endif /* * Call getattr() to prime this inode with real attributes via the * callback to mdki_linux_vattr_pullup() */ VATTR_NULL(&va); /* ignore error code, we're committed */ (void) VOP_GETATTR(rootvp, &va, 0, &cd); /* This will allocate a dentry with a name of /, which is * what Linux uses in all filesystem roots. The dentry is * also not put on the hash chains because Linux does not * hash file system roots. It finds them through the super * blocks. */ super_p->s_root = VNODE_D_ALLOC_ROOT(ino_p); if (super_p->s_root) { if (VFSTOSB(vnlayer_looproot_vp->v_vfsp) == super_p) { /* loopback names are done with regular dentry ops */ MDKI_SET_DOPS(super_p->s_root, &vnode_dentry_ops); } else { /* * setview names come in via VOB mounts, they're marked * with setview dentry ops */ MDKI_SET_DOPS(super_p->s_root, &vnode_setview_dentry_ops); } super_p->s_root->d_fsdata = NULL; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) atomic_set(&super_p->s_root->d_count, 1); #endif /* d_alloc_root assumes that the caller will take care of * bumping the inode count for the dentry. So we will oblige */ igrab(ino_p); } else { VN_RELE(rootvp); (void) VFS_UNMOUNT(vfsp,&cd); mdki_linux_destroy_call_data(&cd); err = -ENOMEM; goto bailout; } mdki_linux_destroy_call_data(&cd); #if LINUX_VERSION_CODE < KERNEL_VERSION(3,6,0) super_p->s_dirt = 1; /* we want to be called on write_super/sync() */ #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,6,0) */ #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,38) /* write back is delegated to the undelying fs */ super_p->s_bdi = &noop_backing_dev_info; #endif /* * release reference on rootvp--super block holds appropriate * references now */ VN_RELE(rootvp); return(0); bailout: MDKI_VFS_LOG(VFS_LOG_ERR, "%s failed: error %d\n", __func__, vnlayer_errno_linux_to_unix(err)); SET_SBTOVFS(super_p, NULL); KMEM_FREE(vfsp, sizeof(*vfsp)); return_NULL: return(err); }
/* This is really VOP_SETATTR() in sheep's clothing */ int vnode_iop_notify_change( DENT_T *dent_p, struct iattr * iattr_p ) { VNODE_T *vp; VATTR_T *vap; VNODE_T *cvp; int err = 0; DENT_T *rdent; CALL_DATA_T cd; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) mdki_boolean_t tooksem = FALSE; #endif if (iattr_p->ia_valid & ATTR_SIZE) { ASSERT_I_SEM_MINE(dent_p->d_inode); } if (MDKI_INOISMVFS(dent_p->d_inode)) { vap = VATTR_ALLOC(); if (vap != NULL) { vnode_iop_iattr2vattr(iattr_p, vap); /* reject attempts to use setattr to change object type */ vap->va_mask &= ~AT_TYPE; mdki_linux_init_call_data(&cd); vp = ITOV(dent_p->d_inode); err = VOP_SETATTR(vp, vap, 0, &cd); err = mdki_errno_unix_to_linux(err); /* Any underlying cleartxt got its inode truncated via changeattr * if there's a need to change its size. */ if (!err) mdki_linux_vattr_pullup(vp, vap, vap->va_mask); VATTR_FREE(vap); mdki_linux_destroy_call_data(&cd); } else { err = -ENOMEM; } } else { rdent = REALDENTRY_LOCKED(dent_p, &cvp); VNODE_DGET(rdent); if (rdent && rdent->d_inode) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) err = inode_setattr(dent_p->d_inode, iattr_p); if (err == 0) { if (iattr_p->ia_valid & ATTR_SIZE) { LOCK_INODE(rdent->d_inode); #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) #if !defined RHEL_UPDATE || RHEL_UPDATE < 5 down_write(&rdent->d_inode->i_alloc_sem); #endif #endif /* * be paranoid and record the 'taken'ness in case * the called function squashes ia_valid (as is * done in nfs_setattr). */ tooksem = TRUE; } err = MDKI_NOTIFY_CHANGE(rdent, CVN_TO_VFSMNT(cvp), iattr_p); if (tooksem) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) #if !defined(RHEL_UPDATE) || RHEL_UPDATE < 5 up_write(&rdent->d_inode->i_alloc_sem); #endif #endif UNLOCK_INODE(rdent->d_inode); } } #else /* LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) */ err = simple_setattr(dent_p, iattr_p); if (err == 0) err = MDKI_NOTIFY_CHANGE(rdent, CVN_TO_VFSMNT(cvp), iattr_p); #endif /* else LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) */ } else { /* It looks as though someone removed the realdentry on us. * I am not sure why this should happen. */ err = -ENOENT; } if (rdent) { VNODE_DPUT(rdent); REALDENTRY_UNLOCK(dent_p, cvp); } } return err; }
DENT_T * vnode_iop_lookup( INODE_T *dir, struct dentry *dent, struct nameidata *nd ) { char *name; mdki_boolean_t rele = FALSE; int err; VNODE_T *dvp; VNODE_T *rt_vnode; /* returned vnode */ INODE_T *rt_inode = NULL; /* returned inode ptr */ DENT_T * real_dentry; DENT_T *found_dentry = dent; VATTR_T *vap; struct lookup_ctx ctx; CALL_DATA_T cd; ASSERT_I_SEM_MINE(dir); /* We can find our parent entry via the dentry provided to us. */ ASSERT(dent->d_parent->d_inode == dir); if (dent->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); name = /* drop the const */(char *) dent->d_name.name; mdki_linux_init_call_data(&cd); /* We pass along the dentry, as well as the parent inode so that * mvop_linux_lookup_* has everything it needs, even if it is passed in * the realvp, and it gets back a negative dentry. */ dvp = ITOV(dir); ctx.dentrypp = &found_dentry; ctx.flags = LOOKUP_CTX_VALID; err = VOP_LOOKUP(dvp, name, &rt_vnode, (struct pathname *)NULL, VNODE_LF_LOOKUP, NULL, &cd, &ctx); err = mdki_errno_unix_to_linux(err); if (!err) { ASSERT(rt_vnode != NULL); if (MDKI_INOISCLRVN(VTOI(rt_vnode))) { /* unwrap to the real object */ ASSERT(CVN_TO_DENT(rt_vnode)); rt_inode = CVN_TO_INO(rt_vnode); if (MDKI_INOISMVFS(rt_inode)) { VN_HOLD(ITOV(rt_inode)); VN_RELE(rt_vnode); rt_vnode = ITOV(rt_inode); } else { igrab(rt_inode); VN_RELE(rt_vnode); rt_vnode = NULL; } } else rt_inode = VTOI(rt_vnode); } if (!err && (found_dentry != dent)) { mdki_linux_destroy_call_data(&cd); /* The hold was granted in makeloopnode() in the 'nocover' case. */ if (rt_vnode != NULL) VN_RELE(rt_vnode); else iput(rt_inode); /* * found_dentry is the real socket/block/char device node's dentry. * See mvop_linux_lookup_component(). * * For sockets, we use a dentry in our tree (we fill in the * provided dentry "dent") linked to the inode of the real * object. This lets file name operations work in our * namespace, and lets socket connections all work (as they're * keyed off of the inode address) from inside to outside & * v.v. * * We also do this for VCHR, VBLK devices, and it seems to work OK * (e.g. make a node the same as /dev/tty, you can write to it) */ switch (found_dentry->d_inode->i_mode & S_IFMT) { case S_IFSOCK: case S_IFCHR: case S_IFBLK: ASSERT(dent->d_inode == NULL); MDKI_SET_DOPS(dent, &vnode_shadow_dentry_ops); igrab(found_dentry->d_inode); VNODE_D_ADD(dent, found_dentry->d_inode); VNODE_DPUT(found_dentry); found_dentry = NULL; /* tell caller to use original dentry */ break; default: /* use returned dentry */ break; } return(found_dentry); } /* We need to pass back dentry ops even for negative dentries, I think. * Shadow inodes will have been taken care of in lookup_component. */ if (dent->d_op != &vnode_shadow_dentry_ops) { if (dent->d_parent->d_op == &vnode_setview_dentry_ops) MDKI_SET_DOPS(dent, &vnode_setview_dentry_ops); else MDKI_SET_DOPS(dent, &vnode_dentry_ops); } vap = VATTR_ALLOC(); if (vap == NULL) { err = -ENOMEM; goto alloc_err; } if (!err && MDKI_INOISMVFS(rt_inode)) { /* fetch attributes & place in inode */ VATTR_SET_MASK(vap, AT_ALL); err = VOP_GETATTR(rt_vnode, vap, GETATTR_FLAG_UPDATE_ATTRS, &cd); err = mdki_errno_unix_to_linux(err); if (err == -EOPNOTSUPP) /* ignore it */ err = 0; else if (err) rele = TRUE; else if ((rt_vnode->v_flag & VLOOPROOT) != 0 && rt_inode == vnlayer_get_urdir_inode()) { /* return the real root */ VN_RELE(rt_vnode); VATTR_FREE(vap); mdki_linux_destroy_call_data(&cd); return VNODE_DGET(vnlayer_get_root_dentry()); } else if (vnlayer_looproot_vp != NULL && rt_vnode == vnlayer_looproot_vp && (real_dentry = MVOP_DENT(rt_inode, &vnode_dentry_ops)) != NULL) { /* return the real /view */ VN_RELE(rt_vnode); VATTR_FREE(vap); mdki_linux_destroy_call_data(&cd); return real_dentry; } } VATTR_FREE(vap); alloc_err: mdki_linux_destroy_call_data(&cd); /* It's an mnode-based object, set up a dentry for it */ /* We don't return ENOENT. For Linux, the negative dentry is enough */ switch (err) { case -ENOENT: err = 0; ASSERT(rt_inode == NULL); VNODE_D_ADD(dent, rt_inode); break; case 0: /* We will consume the count on rt_inode as a reference for dent */ /* * For VOB vnodes, we maintain two separate dentry trees for * the vnodes. One tree is for setview-mode names (process * sets to a view context, then looks directly at the VOB * mountpoint without any cover vnodes in the path). The * other tree is for view-extended naming into a VOB, with * dentries starting at the view tag and covering non-VOB * objects until crossing a mount point into a VOB. * * Mostly the system doesn't care, as long as it goes down the * tree from parent to child, since it will be traversing only one * of the dentry trees. But when the cache misses, the system calls * this lookup method and wants to get a dentry in return. * There are standard interfaces ( d_splice_alias() in 2.6) * which can find a good dentry referencing the inode returned * by the file system's lookup method, but these methods don't * work right when we have VOB directory vnodes with both setview * and view-extended dentries. We implement our own function * [vnlayer_inode2dentry_internal()] which knows the * distinctions and the rules for determining that an existing * attached dentry is valid for the lookup request. * * We have our own d_compare() function which forces all VOB * lookups to come to the inode lookup method (this function), * and then we get to choose the right dentry to return. We * have our own lookup cache inside MVFS so we don't care that * the dentry cache is always missing on our names. * * If we have to make a new dentry, we may need to merge it * with an NFS-created temporary dentry using d_move() * (d_splice_alias() would do this for us, but we can't use it * for reasons listed above). */ /* * We want to find the "right" dentry (if there is one), so * look for one that has a d_parent with the same dentry ops * (indicating it's in the same dentry tree). */ if (S_ISDIR(rt_inode->i_mode)) { /* * It has been empirically shown that we have to check the * parent of the dentry. If the parent has been checked out * it is possible for the cache lookup to return an inode * from the tree below the old parent directory. If this * happens on a rename, the system will panic because the * Linux rename code checks the parent of the returned * dentry to see that it matches what it has for a parent. */ found_dentry = vnlayer_inode2dentry_internal(rt_inode, dent->d_parent, NULL, dent->d_op); } else { /* * For non-directories, we also need to consider the * parent & the requested name so that * vnlayer_inode2dentry_internal() finds the right dentry. * (There may be multiple hard links; we want the one in * the same directory with the same name) */ found_dentry = vnlayer_inode2dentry_internal(rt_inode, dent->d_parent, &dent->d_name, dent->d_op); } if (found_dentry != NULL) { ASSERT(found_dentry->d_inode == rt_inode); /* * If the existing one is a disconnected dentry, we need * to move the old one to the new one (just like * d_splice_alias) to get the proper name/parent attached * in the dcache. */ if ((found_dentry->d_flags & DCACHE_DISCONNECTED) != 0) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,7) ASSERT((dent->d_flags & DCACHE_UNHASHED) != 0); #else ASSERT((dent->d_vfs_flags & DCACHE_UNHASHED) != 0); #endif d_rehash(dent); d_move(found_dentry, dent); } /* Release our count. found_dentry also references inode. */ iput(rt_inode); return found_dentry; } /* * Nothing suitable, wire it up to the proposed dentry. */ VNODE_D_ADD(dent, rt_inode); break; default: /* some other error case */ if (rele) VN_RELE(rt_vnode); break; } if (err) return ERR_PTR(err); else return NULL; }
extern int vnode_iop_link( DENT_T * olddent, INODE_T * parent, DENT_T * newdent ) { int err = 0; struct link_ctx ctx; VATTR_T *vap; VNODE_T *parentvp; ASSERT_I_SEM_MINE(olddent->d_inode); ASSERT_I_SEM_MINE(parent); ASSERT(MDKI_INOISMVFS(parent)); if (!vnlayer_link_eligible(olddent)) return -EXDEV; /* VOP_REALVP will check that the parent is a loopback directory and * return EINVAL if it isn't. */ if (VOP_REALVP(ITOV(parent), &parentvp) == 0) { /* We are creating a shadow link so bypass the mvfs for the rest */ err = vnlayer_do_linux_link(parentvp, olddent, parent, newdent); err = mdki_errno_unix_to_linux(err); } else { /* This needs to be passed on to the mvfs to deal with */ CALL_DATA_T cd; INODE_T *inode; if (!MDKI_INOISOURS(olddent->d_inode)) return -EXDEV; ctx.parent = parent; ctx.newdent = newdent; ctx.olddent = olddent; ctx.done = FALSE; mdki_linux_init_call_data(&cd); if (MDKI_INOISMVFS(olddent->d_inode)) { err = VOP_LINK(ITOV(parent), ITOV(olddent->d_inode), (char *)newdent->d_name.name, &cd, &ctx); err = mdki_errno_unix_to_linux(err); if (err == 0 && !ctx.done) { /* Again, a heavy handed way of bumping the inode count and * handling the locking (This will use the inode lock) */ inode = igrab(olddent->d_inode); VNODE_D_INSTANTIATE(newdent, inode); if ((vap = VATTR_ALLOC()) != NULL) { VATTR_SET_MASK(vap, AT_ALL); if (VOP_GETATTR(ITOV(inode), vap, 0, &cd) == 0) mdki_linux_vattr_pullup(ITOV(inode), vap, AT_ALL); VATTR_FREE(vap); } } } else { err = -EXDEV; } mdki_linux_destroy_call_data(&cd); } return err; }