/* get lvp's parent, if possible, even if it isn't set. lvp is expected to have an iocount before and after this call. if a dvpp is populated the returned vnode has an iocount. */ static int null_get_lowerparent(vnode_t lvp, vnode_t * dvpp, vfs_context_t ctx) { int error = 0; struct vnode_attr va; mount_t mp = vnode_mount(lvp); vnode_t dvp = vnode_parent(lvp); if (dvp) { error = vnode_get(dvp); goto end; } error = ENOENT; if (!(mp->mnt_kern_flag & MNTK_PATH_FROM_ID)) { goto end; } VATTR_INIT(&va); VATTR_WANTED(&va, va_parentid); error = vnode_getattr(lvp, &va, ctx); if (error || !VATTR_IS_SUPPORTED(&va, va_parentid)) { goto end; } error = VFS_VGET(mp, (ino64_t)va.va_parentid, &dvp, ctx); end: if (error == 0) { *dvpp = dvp; } return error; }
ino_t VnodeToIno(vnode_t avp) { unsigned long ret; #ifndef AFS_DARWIN80_ENV if (afs_CacheFSType == AFS_APPL_UFS_CACHE) { struct inode *ip = VTOI(avp); ret = ip->i_number; } else if (afs_CacheFSType == AFS_APPL_HFS_CACHE) { #endif #if defined(AFS_DARWIN80_ENV) struct vattr va; VATTR_INIT(&va); VATTR_WANTED(&va, va_fileid); if (vnode_getattr(avp, &va, afs_osi_ctxtp)) osi_Panic("VOP_GETATTR failed in VnodeToIno\n"); if (!VATTR_ALL_SUPPORTED(&va)) osi_Panic("VOP_GETATTR unsupported fileid in VnodeToIno\n"); ret = va.va_fileid; #elif !defined(VTOH) struct vattr va; if (VOP_GETATTR(avp, &va, &afs_osi_cred, current_proc())) osi_Panic("VOP_GETATTR failed in VnodeToIno\n"); ret = va.va_fileid; #else struct hfsnode *hp = VTOH(avp); ret = H_FILEID(hp); #endif #ifndef AFS_DARWIN80_ENV } else osi_Panic("VnodeToIno called before cacheops initialized\n"); #endif return ret; }
int afs_osi_Stat(struct osi_file *afile, struct osi_stat *astat) { afs_int32 code; struct vattr tvattr; AFS_STATCNT(osi_Stat); ObtainWriteLock(&afs_xosi, 320); AFS_GUNLOCK(); #ifdef AFS_DARWIN80_ENV VATTR_INIT(&tvattr); VATTR_WANTED(&tvattr, va_size); VATTR_WANTED(&tvattr, va_blocksize); VATTR_WANTED(&tvattr, va_mtime); VATTR_WANTED(&tvattr, va_atime); code = vnode_getattr(afile->vnode, &tvattr, afs_osi_ctxtp); if (code == 0 && !VATTR_ALL_SUPPORTED(&tvattr)) code = EINVAL; #else code = VOP_GETATTR(afile->vnode, &tvattr, &afs_osi_cred, current_proc()); #endif AFS_GLOCK(); if (code == 0) { astat->size = tvattr.va_size; astat->mtime = tvattr.va_mtime.tv_sec; astat->atime = tvattr.va_atime.tv_sec; } ReleaseWriteLock(&afs_xosi); return code; }
int osi_UFSTruncate(struct osi_file *afile, afs_int32 asize) { afs_ucred_t *oldCred; struct vattr tvattr; afs_int32 code; struct osi_stat tstat; AFS_STATCNT(osi_Truncate); /* This routine only shrinks files, and most systems * have very slow truncates, even when the file is already * small enough. Check now and save some time. */ code = afs_osi_Stat(afile, &tstat); if (code || tstat.size <= asize) return code; ObtainWriteLock(&afs_xosi, 321); AFS_GUNLOCK(); #ifdef AFS_DARWIN80_ENV VATTR_INIT(&tvattr); VATTR_SET(&tvattr, va_size, asize); code = vnode_setattr(afile->vnode, &tvattr, afs_osi_ctxtp); #else VATTR_NULL(&tvattr); tvattr.va_size = asize; code = VOP_SETATTR(afile->vnode, &tvattr, &afs_osi_cred, current_proc()); #endif AFS_GLOCK(); ReleaseWriteLock(&afs_xosi); return code; }
dev_t VnodeToDev(vnode_t avp) { #ifndef AFS_DARWIN80_ENV if (afs_CacheFSType == AFS_APPL_UFS_CACHE) { struct inode *ip = VTOI(avp); return ip->i_dev; } else if (afs_CacheFSType == AFS_APPL_HFS_CACHE) { #endif #if defined(AFS_DARWIN80_ENV) struct vattr va; VATTR_INIT(&va); VATTR_WANTED(&va, va_fsid); if (vnode_getattr(avp, &va, afs_osi_ctxtp)) osi_Panic("VOP_GETATTR failed in VnodeToDev\n"); if (!VATTR_ALL_SUPPORTED(&va)) osi_Panic("VOP_GETATTR unsupported fsid in VnodeToIno\n"); return va.va_fsid; /* XXX they say it's the dev.... */ #elif !defined(VTOH) struct vattr va; if (VOP_GETATTR(avp, &va, &afs_osi_cred, current_proc())) osi_Panic("VOP_GETATTR failed in VnodeToDev\n"); return va.va_fsid; /* XXX they say it's the dev.... */ #else struct hfsnode *hp = VTOH(avp); return H_DEV(hp); #endif #ifndef AFS_DARWIN80_ENV } else osi_Panic("VnodeToDev called before cacheops initialized\n"); #endif }
/* vnode_size() is not exported */ static errno_t vnode_size(vnode_t vp, off_t *sizep, vfs_context_t ctx) { struct vnode_attr va; int error; VATTR_INIT(&va); VATTR_WANTED(&va, va_data_size); error = vnode_getattr(vp, &va, ctx); if (!error) *sizep = va.va_data_size; return(error); }
bool VNodeDiskDeviceClass::setupVNode() { int vapError = -1; struct vnode_attr vap; if (m_vnode != NULL) return true; vfs_context_t vfsContext = vfs_context_create((vfs_context_t) 0); int vnodeError = vnode_open(m_filePath->getCStringNoCopy(), (FREAD | FWRITE), 0, 0, &m_vnode, vfsContext); if (vnodeError || m_vnode == NULL) { IOLog("Error when opening file %s: error %d\n", m_filePath->getCStringNoCopy(), vnodeError); goto failure; } if (!vnode_isreg(m_vnode)) { IOLog("Error when opening file %s: not a regular file\n", m_filePath->getCStringNoCopy()); vnode_close(m_vnode, (FREAD | FWRITE), vfsContext); goto failure; } VATTR_INIT(&vap); VATTR_WANTED(&vap, va_data_size); vapError = vnode_getattr(m_vnode, &vap, vfsContext); if (vapError) { IOLog("Error when retrieving vnode's attributes with error code %d\n", vapError); goto failure; } if (vap.va_data_size < m_blockSize * m_blockNum) { IOLog("Error file %s is too small, actual size is %llu\n", m_filePath->getCStringNoCopy(), vap.va_data_size); goto failure; } vfs_context_rele(vfsContext); return true; failure: vfs_context_rele(vfsContext); return false; }
osi_DisableAtimes(struct vnode *avp) #endif { #ifdef AFS_DARWIN80_ENV struct vnode_attr vap; VATTR_INIT(&vap); VATTR_CLEAR_SUPPORTED(&vap, va_access_time); vnode_setattr(avp, &vap, afs_osi_ctxtp); #else if (afs_CacheFSType == AFS_APPL_UFS_CACHE) { struct inode *ip = VTOI(avp); ip->i_flag &= ~IN_ACCESS; } #ifdef VTOH /* can't do this without internals */ else if (afs_CacheFSType == AFS_APPL_HFS_CACHE) { struct hfsnode *hp = VTOH(avp); hp->h_nodeflags &= ~IN_ACCESS; } #endif #endif }
struct kern_direct_file_io_ref_t * kern_open_file_for_direct_io(const char * name, uint32_t iflags, kern_get_file_extents_callback_t callback, void * callback_ref, off_t set_file_size, off_t fs_free_size, off_t write_file_offset, void * write_file_addr, size_t write_file_len, dev_t * partition_device_result, dev_t * image_device_result, uint64_t * partitionbase_result, uint64_t * maxiocount_result, uint32_t * oflags) { struct kern_direct_file_io_ref_t * ref; proc_t p; struct vnode_attr va; dk_apfs_wbc_range_t wbc_range; int error; off_t f_offset; uint64_t fileblk; size_t filechunk; uint64_t physoffset, minoffset; dev_t device; dev_t target = 0; int isssd = 0; uint32_t flags = 0; uint32_t blksize; off_t maxiocount, count, segcount, wbctotal; boolean_t locked = FALSE; int fmode, cmode; struct nameidata nd; u_int32_t ndflags; off_t mpFree; int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result); void * p1 = NULL; void * p2 = NULL; error = EFAULT; ref = (struct kern_direct_file_io_ref_t *) kalloc(sizeof(struct kern_direct_file_io_ref_t)); if (!ref) { error = EFAULT; goto out; } bzero(ref, sizeof(*ref)); p = kernproc; ref->ctx = vfs_context_kernel(); fmode = (kIOPolledFileCreate & iflags) ? (O_CREAT | FWRITE) : FWRITE; cmode = S_IRUSR | S_IWUSR; ndflags = NOFOLLOW; NDINIT(&nd, LOOKUP, OP_OPEN, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(name), ref->ctx); VATTR_INIT(&va); VATTR_SET(&va, va_mode, cmode); VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED); VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_D); if ((error = vn_open_auth(&nd, &fmode, &va))) { kprintf("vn_open_auth(fmode: %d, cmode: %d) failed with error: %d\n", fmode, cmode, error); goto out; } ref->vp = nd.ni_vp; if (ref->vp->v_type == VREG) { vnode_lock_spin(ref->vp); SET(ref->vp->v_flag, VSWAP); vnode_unlock(ref->vp); } if (write_file_addr && write_file_len) { if ((error = kern_write_file(ref, write_file_offset, write_file_addr, write_file_len, IO_SKIP_ENCRYPTION))) { kprintf("kern_write_file() failed with error: %d\n", error); goto out; } } VATTR_INIT(&va); VATTR_WANTED(&va, va_rdev); VATTR_WANTED(&va, va_fsid); VATTR_WANTED(&va, va_devid); VATTR_WANTED(&va, va_data_size); VATTR_WANTED(&va, va_data_alloc); VATTR_WANTED(&va, va_nlink); error = EFAULT; if (vnode_getattr(ref->vp, &va, ref->ctx)) goto out; wbctotal = 0; mpFree = freespace_mb(ref->vp); mpFree <<= 20; kprintf("kern_direct_file(%s): vp size %qd, alloc %qd, mp free %qd, keep free %qd\n", name, va.va_data_size, va.va_data_alloc, mpFree, fs_free_size); if (ref->vp->v_type == VREG) { /* Don't dump files with links. */ if (va.va_nlink != 1) goto out; device = (VATTR_IS_SUPPORTED(&va, va_devid)) ? va.va_devid : va.va_fsid; ref->filelength = va.va_data_size; p1 = &device; p2 = p; do_ioctl = &file_ioctl; if (kIOPolledFileHibernate & iflags) { error = do_ioctl(p1, p2, DKIOCAPFSGETWBCRANGE, (caddr_t) &wbc_range); ref->wbcranged = (error == 0); } if (ref->wbcranged) { uint32_t idx; assert(wbc_range.count <= (sizeof(wbc_range.extents) / sizeof(wbc_range.extents[0]))); for (idx = 0; idx < wbc_range.count; idx++) wbctotal += wbc_range.extents[idx].length; kprintf("kern_direct_file(%s): wbc %qd\n", name, wbctotal); if (wbctotal) target = wbc_range.dev; } if (set_file_size) { if (wbctotal) { if (wbctotal >= set_file_size) set_file_size = HIBERNATE_MIN_FILE_SIZE; else { set_file_size -= wbctotal; if (set_file_size < HIBERNATE_MIN_FILE_SIZE) set_file_size = HIBERNATE_MIN_FILE_SIZE; } } if (fs_free_size) { mpFree += va.va_data_alloc; if ((mpFree < set_file_size) || ((mpFree - set_file_size) < fs_free_size)) { error = ENOSPC; goto out; } } error = vnode_setsize(ref->vp, set_file_size, IO_NOZEROFILL | IO_NOAUTH, ref->ctx); if (error) goto out; ref->filelength = set_file_size; } } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { /* Partition. */ device = va.va_rdev; p1 = ref->vp; p2 = ref->ctx; do_ioctl = &device_ioctl; } else { /* Don't dump to non-regular files. */ error = EFAULT; goto out; } ref->device = device; // probe for CF dk_corestorage_info_t cs_info; memset(&cs_info, 0, sizeof(dk_corestorage_info_t)); error = do_ioctl(p1, p2, DKIOCCORESTORAGE, (caddr_t)&cs_info); ref->cf = (error == 0) && (cs_info.flags & DK_CORESTORAGE_ENABLE_HOTFILES); // get block size error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize); if (error) goto out; minoffset = HIBERNATE_MIN_PHYSICAL_LBA * ref->blksize; if (ref->vp->v_type != VREG) { error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk); if (error) goto out; ref->filelength = fileblk * ref->blksize; } // pin logical extents, CS version error = kern_ioctl_file_extents(ref, _DKIOCCSPINEXTENT, 0, ref->filelength); if (error && (ENOTTY != error)) goto out; ref->pinned = (error == 0); // pin logical extents, apfs version error = VNOP_IOCTL(ref->vp, FSCTL_FREEZE_EXTENTS, NULL, 0, ref->ctx); if (error && (ENOTTY != error)) goto out; ref->frozen = (error == 0); // generate the block list error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL); if (error) goto out; locked = TRUE; f_offset = 0; for (; f_offset < ref->filelength; f_offset += filechunk) { if (ref->vp->v_type == VREG) { filechunk = 1*1024*1024*1024; daddr64_t blkno; error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno, &filechunk, NULL, VNODE_WRITE | VNODE_BLOCKMAP_NO_TRACK, NULL); if (error) goto out; if (-1LL == blkno) continue; fileblk = blkno * ref->blksize; } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { fileblk = f_offset; filechunk = f_offset ? 0 : ref->filelength; } physoffset = 0; while (physoffset < filechunk) { dk_physical_extent_t getphysreq; bzero(&getphysreq, sizeof(getphysreq)); getphysreq.offset = fileblk + physoffset; getphysreq.length = (filechunk - physoffset); error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq); if (error) goto out; if (!target) { target = getphysreq.dev; } else if (target != getphysreq.dev) { error = ENOTSUP; goto out; } assert(getphysreq.offset >= minoffset); #if HIBFRAGMENT uint64_t rev; for (rev = 4096; rev <= getphysreq.length; rev += 4096) { callback(callback_ref, getphysreq.offset + getphysreq.length - rev, 4096); } #else callback(callback_ref, getphysreq.offset, getphysreq.length); #endif physoffset += getphysreq.length; } } if (ref->wbcranged) { uint32_t idx; for (idx = 0; idx < wbc_range.count; idx++) { assert(wbc_range.extents[idx].offset >= minoffset); callback(callback_ref, wbc_range.extents[idx].offset, wbc_range.extents[idx].length); } } callback(callback_ref, 0ULL, 0ULL); if (ref->vp->v_type == VREG) p1 = ⌖ else { p1 = ⌖ p2 = p; do_ioctl = &file_ioctl; } // get partition base if (partitionbase_result) { error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result); if (error) goto out; } // get block size & constraints error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize); if (error) goto out; maxiocount = 1*1024*1024*1024; error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t) &count); if (error) count = 0; count *= blksize; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t) &count); if (error) count = 0; count *= blksize; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTREAD, (caddr_t) &count); if (error) count = 0; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t) &count); if (error) count = 0; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count); if (!error) error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTREAD, (caddr_t) &segcount); if (error) count = segcount = 0; count *= segcount; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count); if (!error) error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTWRITE, (caddr_t) &segcount); if (error) count = segcount = 0; count *= segcount; if (count && (count < maxiocount)) maxiocount = count; kprintf("max io 0x%qx bytes\n", maxiocount); if (maxiocount_result) *maxiocount_result = maxiocount; error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd); if (!error && isssd) flags |= kIOPolledFileSSD; if (partition_device_result) *partition_device_result = device; if (image_device_result) *image_device_result = target; if (oflags) *oflags = flags; if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { vnode_close(ref->vp, FWRITE, ref->ctx); ref->vp = NULLVP; ref->ctx = NULL; } out: printf("kern_open_file_for_direct_io(%p, %d)\n", ref, error); if (error && locked) { p1 = &device; (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL); } if (error && ref) { if (ref->vp) { (void) kern_ioctl_file_extents(ref, _DKIOCCSUNPINEXTENT, 0, (ref->pinned && ref->cf) ? ref->filelength : 0); if (ref->frozen) { (void) VNOP_IOCTL(ref->vp, FSCTL_THAW_EXTENTS, NULL, 0, ref->ctx); } if (ref->wbcranged) { (void) do_ioctl(p1, p2, DKIOCAPFSRELEASEWBCRANGE, (caddr_t) NULL); } vnode_close(ref->vp, FWRITE, ref->ctx); ref->vp = NULLVP; } ref->ctx = NULL; kfree(ref, sizeof(struct kern_direct_file_io_ref_t)); ref = NULL; } return(ref); }
static int process_cred_label_update_execvew(kauth_cred_t old_cred, kauth_cred_t new_cred, struct proc *p, struct vnode *vp, off_t offset, struct vnode *scriptvp, struct label *vnodelabel, struct label *scriptvnodelabel, struct label *execlabel, u_int *csflags, void *macpolicyattr, size_t macpolicyattrlen, int *disjointp) { int path_len = MAXPATHLEN; if (!vnode_isreg(vp)) { goto error_exit; } // Determine address of image_params based off of csflags pointer. (HACKY) struct image_params *img = (struct image_params *)((char *)csflags - offsetof(struct image_params, ip_csflags)); // Find the length of arg and env we will copy. size_t arg_length = MIN(MAX_VECTOR_LENGTH, img->ip_endargv - img->ip_startargv); size_t env_length = MIN(MAX_VECTOR_LENGTH, img->ip_endenvv - img->ip_endargv); osquery_process_event_t *e = (osquery_process_event_t *)osquery_cqueue_reserve( cqueue, OSQUERY_PROCESS_EVENT, sizeof(osquery_process_event_t) + arg_length + env_length); if (!e) { goto error_exit; } // Copy the arg and env vectors. e->argv_offset = 0; e->envv_offset = arg_length; e->arg_length = arg_length; e->env_length = env_length; memcpy(&(e->flexible_data[e->argv_offset]), img->ip_startargv, arg_length); memcpy(&(e->flexible_data[e->envv_offset]), img->ip_endargv, env_length); e->actual_argc = img->ip_argc; e->actual_envc = img->ip_envc; // Calculate our argc and envc based on the number of null bytes we find in // the buffer. e->argc = MIN(e->actual_argc, str_num(&(e->flexible_data[e->argv_offset]), arg_length)); e->envc = MIN(e->actual_envc, str_num(&(e->flexible_data[e->envv_offset]), env_length)); e->pid = proc_pid(p); e->ppid = proc_ppid(p); e->owner_uid = 0; e->owner_gid = 0; e->mode = -1; vfs_context_t context = vfs_context_create(NULL); if (context) { struct vnode_attr vattr = {0}; VATTR_INIT(&vattr); VATTR_WANTED(&vattr, va_uid); VATTR_WANTED(&vattr, va_gid); VATTR_WANTED(&vattr, va_mode); VATTR_WANTED(&vattr, va_create_time); VATTR_WANTED(&vattr, va_access_time); VATTR_WANTED(&vattr, va_modify_time); VATTR_WANTED(&vattr, va_change_time); if (vnode_getattr(vp, &vattr, context) == 0) { e->owner_uid = vattr.va_uid; e->owner_gid = vattr.va_gid; e->mode = vattr.va_mode; e->create_time = vattr.va_create_time.tv_sec; e->access_time = vattr.va_access_time.tv_sec; e->modify_time = vattr.va_modify_time.tv_sec; e->change_time = vattr.va_change_time.tv_sec; } vfs_context_rele(context); } e->uid = kauth_cred_getruid(new_cred); e->euid = kauth_cred_getuid(new_cred); e->gid = kauth_cred_getrgid(new_cred); e->egid = kauth_cred_getgid(new_cred); vn_getpath(vp, e->path, &path_len); osquery_cqueue_commit(cqueue, e); error_exit: return 0; }
/* works like PFlushVolumeData */ void darwin_notify_perms(struct unixuser *auser, int event) { int i; struct afs_q *tq, *uq = NULL; struct vcache *tvc, *hnext; int isglock = ISAFS_GLOCK(); struct vnode *vp; struct vnode_attr va; int isctxtowner = 0; if (!afs_darwin_fsevents) return; VATTR_INIT(&va); VATTR_SET(&va, va_mode, 0777); if (event & UTokensObtained) VATTR_SET(&va, va_uid, auser->uid); else VATTR_SET(&va, va_uid, -2); /* nobody */ if (!isglock) AFS_GLOCK(); if (!(vfs_context_owner == current_thread())) { get_vfs_context(); isctxtowner = 1; } loop: ObtainReadLock(&afs_xvcache); for (i = 0; i < VCSIZE; i++) { for (tq = afs_vhashTV[i].prev; tq != &afs_vhashTV[i]; tq = uq) { uq = QPrev(tq); tvc = QTOVH(tq); if (tvc->f.states & CDeadVnode) { /* we can afford to be best-effort */ continue; } /* no per-file acls, so only notify on directories */ if (!(vp = AFSTOV(tvc)) || !vnode_isdir(AFSTOV(tvc))) continue; /* dynroot object. no callbacks. anonymous ACL. just no. */ if (afs_IsDynrootFid(&tvc->f.fid)) continue; /* no fake fsevents on mount point sources. leaks refs */ if (tvc->mvstat == 1) continue; /* if it's being reclaimed, just pass */ if (vnode_get(vp)) continue; if (vnode_ref(vp)) { AFS_GUNLOCK(); vnode_put(vp); AFS_GLOCK(); continue; } ReleaseReadLock(&afs_xvcache); /* Avoid potentially re-entering on this lock */ if (0 == NBObtainWriteLock(&tvc->lock, 234)) { tvc->f.states |= CEvent; AFS_GUNLOCK(); vnode_setattr(vp, &va, afs_osi_ctxtp); tvc->f.states &= ~CEvent; vnode_put(vp); AFS_GLOCK(); ReleaseWriteLock(&tvc->lock); } ObtainReadLock(&afs_xvcache); uq = QPrev(tq); /* our tvc ptr is still good until now */ AFS_FAST_RELE(tvc); } } ReleaseReadLock(&afs_xvcache); if (isctxtowner) put_vfs_context(); if (!isglock) AFS_GUNLOCK(); }
kern_return_t map_fd_funneled( int fd, vm_object_offset_t offset, vm_offset_t *va, boolean_t findspace, vm_size_t size) { kern_return_t result; struct fileproc *fp; struct vnode *vp; void * pager; vm_offset_t map_addr=0; vm_size_t map_size; int err=0; vm_map_t my_map; proc_t p = current_proc(); struct vnode_attr vattr; /* * Find the inode; verify that it's a regular file. */ err = fp_lookup(p, fd, &fp, 0); if (err) return(err); if (fp->f_fglob->fg_type != DTYPE_VNODE){ err = KERN_INVALID_ARGUMENT; goto bad; } if (!(fp->f_fglob->fg_flag & FREAD)) { err = KERN_PROTECTION_FAILURE; goto bad; } vp = (struct vnode *)fp->f_fglob->fg_data; err = vnode_getwithref(vp); if(err != 0) goto bad; if (vp->v_type != VREG) { (void)vnode_put(vp); err = KERN_INVALID_ARGUMENT; goto bad; } AUDIT_ARG(vnpath, vp, ARG_VNODE1); /* * POSIX: mmap needs to update access time for mapped files */ if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) { VATTR_INIT(&vattr); nanotime(&vattr.va_access_time); VATTR_SET_ACTIVE(&vattr, va_access_time); vnode_setattr(vp, &vattr, vfs_context_current()); } if (offset & PAGE_MASK_64) { printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm); (void)vnode_put(vp); err = KERN_INVALID_ARGUMENT; goto bad; } map_size = round_page(size); /* * Allow user to map in a zero length file. */ if (size == 0) { (void)vnode_put(vp); err = KERN_SUCCESS; goto bad; } /* * Map in the file. */ pager = (void *)ubc_getpager(vp); if (pager == NULL) { (void)vnode_put(vp); err = KERN_FAILURE; goto bad; } my_map = current_map(); result = vm_map_64( my_map, &map_addr, map_size, (vm_offset_t)0, VM_FLAGS_ANYWHERE, pager, offset, TRUE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); if (result != KERN_SUCCESS) { (void)vnode_put(vp); err = result; goto bad; } if (!findspace) { vm_offset_t dst_addr; vm_map_copy_t tmp; if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr)) || trunc_page_32(dst_addr) != dst_addr) { (void) vm_map_remove( my_map, map_addr, map_addr + map_size, VM_MAP_NO_FLAGS); (void)vnode_put(vp); err = KERN_INVALID_ADDRESS; goto bad; } result = vm_map_copyin(my_map, (vm_map_address_t)map_addr, (vm_map_size_t)map_size, TRUE, &tmp); if (result != KERN_SUCCESS) { (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + map_size), VM_MAP_NO_FLAGS); (void)vnode_put(vp); err = result; goto bad; } result = vm_map_copy_overwrite(my_map, (vm_map_address_t)dst_addr, tmp, FALSE); if (result != KERN_SUCCESS) { vm_map_copy_discard(tmp); (void)vnode_put(vp); err = result; goto bad; } } else { if (copyout(&map_addr, CAST_USER_ADDR_T(va), sizeof (map_addr))) { (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + map_size), VM_MAP_NO_FLAGS); (void)vnode_put(vp); err = KERN_INVALID_ADDRESS; goto bad; } } ubc_setthreadcred(vp, current_proc(), current_thread()); (void)ubc_map(vp, (PROT_READ | PROT_EXEC)); (void)vnode_put(vp); err = 0; bad: fp_drop(p, fd, fp, 0); return (err); }
/* * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct * XXX usage is PROT_* from an interface perspective. Thus the values of * XXX VM_PROT_* and PROT_* need to correspond. */ int mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) { /* * Map in special device (must be SHARED) or file */ struct fileproc *fp; register struct vnode *vp; int flags; int prot, file_prot; int err=0; vm_map_t user_map; kern_return_t result; mach_vm_offset_t user_addr; mach_vm_size_t user_size; vm_object_offset_t pageoff; vm_object_offset_t file_pos; int alloc_flags=0; boolean_t docow; vm_prot_t maxprot; void *handle; vm_pager_t pager; int mapanon=0; int fpref=0; int error =0; int fd = uap->fd; user_addr = (mach_vm_offset_t)uap->addr; user_size = (mach_vm_size_t) uap->len; AUDIT_ARG(addr, user_addr); AUDIT_ARG(len, user_size); AUDIT_ARG(fd, uap->fd); prot = (uap->prot & VM_PROT_ALL); #if 3777787 /* * Since the hardware currently does not support writing without * read-before-write, or execution-without-read, if the request is * for write or execute access, we must imply read access as well; * otherwise programs expecting this to work will fail to operate. */ if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; #endif /* radar 3777787 */ flags = uap->flags; vp = NULLVP; /* * The vm code does not have prototypes & compiler doesn't do the' * the right thing when you cast 64bit value and pass it in function * call. So here it is. */ file_pos = (vm_object_offset_t)uap->pos; /* make sure mapping fits into numeric range etc */ if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64) return (EINVAL); /* * Align the file position to a page boundary, * and save its page offset component. */ pageoff = (file_pos & PAGE_MASK); file_pos -= (vm_object_offset_t)pageoff; /* Adjust size for rounding (on both ends). */ user_size += pageoff; /* low end... */ user_size = mach_vm_round_page(user_size); /* hi end */ /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). */ if (flags & MAP_FIXED) { /* * The specified address must have the same remainder * as the file offset taken modulo PAGE_SIZE, so it * should be aligned after adjustment by pageoff. */ user_addr -= pageoff; if (user_addr & PAGE_MASK) return (EINVAL); } #ifdef notyet /* DO not have apis to get this info, need to wait till then*/ /* * XXX for non-fixed mappings where no hint is provided or * the hint would fall in the potential heap space, * place it after the end of the largest possible heap. * * There should really be a pmap call to determine a reasonable * location. */ else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ)) addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ); #endif alloc_flags = 0; if (flags & MAP_ANON) { /* * Mapping blank space is trivial. Use positive fds as the alias * value for memory tracking. */ if (fd != -1) { /* * Use "fd" to pass (some) Mach VM allocation flags, * (see the VM_FLAGS_* definitions). */ alloc_flags = fd & (VM_FLAGS_ALIAS_MASK | VM_FLAGS_PURGABLE); if (alloc_flags != fd) { /* reject if there are any extra flags */ return EINVAL; } } handle = NULL; maxprot = VM_PROT_ALL; file_pos = 0; mapanon = 1; } else { struct vnode_attr va; vfs_context_t ctx = vfs_context_current(); /* * Mapping file, get fp for validation. Obtain vnode and make * sure it is of appropriate type. */ err = fp_lookup(p, fd, &fp, 0); if (err) return(err); fpref = 1; if(fp->f_fglob->fg_type == DTYPE_PSXSHM) { uap->addr = (user_addr_t)user_addr; uap->len = (user_size_t)user_size; uap->prot = prot; uap->flags = flags; uap->pos = file_pos; error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff); goto bad; } if (fp->f_fglob->fg_type != DTYPE_VNODE) { error = EINVAL; goto bad; } vp = (struct vnode *)fp->f_fglob->fg_data; error = vnode_getwithref(vp); if(error != 0) goto bad; if (vp->v_type != VREG && vp->v_type != VCHR) { (void)vnode_put(vp); error = EINVAL; goto bad; } AUDIT_ARG(vnpath, vp, ARG_VNODE1); /* * POSIX: mmap needs to update access time for mapped files */ if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) { VATTR_INIT(&va); nanotime(&va.va_access_time); VATTR_SET_ACTIVE(&va, va_access_time); vnode_setattr(vp, &va, ctx); } /* * XXX hack to handle use of /dev/zero to map anon memory (ala * SunOS). */ if (vp->v_type == VCHR || vp->v_type == VSTR) { (void)vnode_put(vp); error = ENODEV; goto bad; } else { /* * Ensure that file and memory protections are * compatible. Note that we only worry about * writability if mapping is shared; in this case, * current and max prot are dictated by the open file. * XXX use the vnode instead? Problem is: what * credentials do we use for determination? What if * proc does a setuid? */ maxprot = VM_PROT_EXECUTE; /* ??? */ if (fp->f_fglob->fg_flag & FREAD) maxprot |= VM_PROT_READ; else if (prot & PROT_READ) { (void)vnode_put(vp); error = EACCES; goto bad; } /* * If we are sharing potential changes (either via * MAP_SHARED or via the implicit sharing of character * device mappings), and we are trying to get write * permission although we opened it without asking * for it, bail out. */ if ((flags & MAP_SHARED) != 0) { if ((fp->f_fglob->fg_flag & FWRITE) != 0) { /* * check for write access * * Note that we already made this check when granting FWRITE * against the file, so it seems redundant here. */ error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx); /* if not granted for any reason, but we wanted it, bad */ if ((prot & PROT_WRITE) && (error != 0)) { vnode_put(vp); goto bad; } /* if writable, remember */ if (error == 0) maxprot |= VM_PROT_WRITE; } else if ((prot & PROT_WRITE) != 0) { (void)vnode_put(vp); error = EACCES; goto bad; } } else maxprot |= VM_PROT_WRITE; handle = (void *)vp; #if CONFIG_MACF error = mac_file_check_mmap(vfs_context_ucred(ctx), fp->f_fglob, prot, flags, &maxprot); if (error) { (void)vnode_put(vp); goto bad; } #endif /* MAC */ } } if (user_size == 0) { if (!mapanon) (void)vnode_put(vp); error = 0; goto bad; } /* * We bend a little - round the start and end addresses * to the nearest page boundary. */ user_size = mach_vm_round_page(user_size); if (file_pos & PAGE_MASK_64) { if (!mapanon) (void)vnode_put(vp); error = EINVAL; goto bad; } user_map = current_map(); if ((flags & MAP_FIXED) == 0) { alloc_flags |= VM_FLAGS_ANYWHERE; user_addr = mach_vm_round_page(user_addr); } else { if (user_addr != mach_vm_trunc_page(user_addr)) { if (!mapanon) (void)vnode_put(vp); error = EINVAL; goto bad; } /* * mmap(MAP_FIXED) will replace any existing mappings in the * specified range, if the new mapping is successful. * If we just deallocate the specified address range here, * another thread might jump in and allocate memory in that * range before we get a chance to establish the new mapping, * and we won't have a chance to restore the old mappings. * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it * has to deallocate the existing mappings and establish the * new ones atomically. */ alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE; } if (flags & MAP_NOCACHE) alloc_flags |= VM_FLAGS_NO_CACHE; /* * Lookup/allocate object. */ if (handle == NULL) { pager = NULL; #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) if (prot & VM_PROT_READ) prot |= VM_PROT_EXECUTE; if (maxprot & VM_PROT_READ) maxprot |= VM_PROT_EXECUTE; #endif #endif #if 3777787 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) maxprot |= VM_PROT_READ; #endif /* radar 3777787 */ result = vm_map_enter_mem_object(user_map, &user_addr, user_size, 0, alloc_flags, IPC_PORT_NULL, 0, FALSE, prot, maxprot, (flags & MAP_SHARED) ? VM_INHERIT_SHARE : VM_INHERIT_DEFAULT); if (result != KERN_SUCCESS) goto out; } else { pager = (vm_pager_t)ubc_getpager(vp); if (pager == NULL) { (void)vnode_put(vp); error = ENOMEM; goto bad; } /* * Set credentials: * FIXME: if we're writing the file we need a way to * ensure that someone doesn't replace our R/W creds * with ones that only work for read. */ ubc_setthreadcred(vp, p, current_thread()); docow = FALSE; if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { docow = TRUE; } #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) if (prot & VM_PROT_READ) prot |= VM_PROT_EXECUTE; if (maxprot & VM_PROT_READ) maxprot |= VM_PROT_EXECUTE; #endif #endif /* notyet */ #if 3777787 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) maxprot |= VM_PROT_READ; #endif /* radar 3777787 */ result = vm_map_enter_mem_object(user_map, &user_addr, user_size, 0, alloc_flags, (ipc_port_t)pager, file_pos, docow, prot, maxprot, (flags & MAP_SHARED) ? VM_INHERIT_SHARE : VM_INHERIT_DEFAULT); if (result != KERN_SUCCESS) { (void)vnode_put(vp); goto out; } file_prot = prot & (PROT_READ | PROT_WRITE | PROT_EXEC); if (docow) { /* private mapping: won't write to the file */ file_prot &= ~PROT_WRITE; } (void) ubc_map(vp, file_prot); } if (!mapanon) (void)vnode_put(vp); out: switch (result) { case KERN_SUCCESS: *retval = user_addr + pageoff; error = 0; break; case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: error = ENOMEM; break; case KERN_PROTECTION_FAILURE: error = EACCES; break; default: error = EINVAL; break; } bad: if (fpref) fp_drop(p, fd, fp, 0); KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0); KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32), (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0); return(error); }
/* kauth file scope listener * this allows to detect files written to the filesystem * arg2 contains a flag KAUTH_FILEOP_CLOSE which is set if a modified file is being closed * this way we don't need to trace every close(), only the ones writing to the filesystem */ static int fileop_scope_listener(kauth_cred_t credential, void * idata, kauth_action_t action, uintptr_t arg0, /* vnode reference */ uintptr_t arg1, /* full path to file being closed */ uintptr_t arg2, /* flags */ uintptr_t arg3) { /* ignore all actions except FILE_CLOSE */ if (action != KAUTH_FILEOP_CLOSE) { return KAUTH_RESULT_DEFER; } /* ignore operations with bad data */ if (credential == NULL || (vnode_t)arg0 == NULL || (char*)arg1 == NULL) { ERROR_MSG("Arguments contain null pointers!"); return KAUTH_RESULT_DEFER; } /* ignore closes on folders, character and block devices */ switch ( vnode_vtype((vnode_t)arg0) ) { case VDIR: case VCHR: case VBLK: return KAUTH_RESULT_DEFER; default: break; } /* we are only interested when a modified file is being closed */ if ((int)arg2 != KAUTH_FILEOP_CLOSE_MODIFIED) { return KAUTH_RESULT_DEFER; } char *file_path = (char*)arg1; /* get information from current proc trying to write to the vnode */ proc_t proc = current_proc(); pid_t mypid = proc_pid(proc); char myprocname[MAXCOMLEN+1] = {0}; proc_name(mypid, myprocname, sizeof(myprocname)); /* retrieve the vnode attributes, we can get a lot of vnode information from here */ struct vnode_attr vap = {0}; vfs_context_t context = vfs_context_create(NULL); /* initialize the structure fields we are interested in * reference vn_stat_noauth() xnu/bsd/vfs/vfs_vnops.c */ VATTR_INIT(&vap); VATTR_WANTED(&vap, va_mode); VATTR_WANTED(&vap, va_type); VATTR_WANTED(&vap, va_uid); VATTR_WANTED(&vap, va_gid); VATTR_WANTED(&vap, va_data_size); VATTR_WANTED(&vap, va_flags); int attr_ok = 1; if ( vnode_getattr((vnode_t)arg0, &vap, context) != 0 ) { /* in case of error permissions and filesize will be bogus */ ERROR_MSG("failed to vnode_getattr"); attr_ok = 0; } /* release the context we created, else kab00m! */ vfs_context_rele(context); int error = 0; /* make sure we : * - were able to read the attributes * - file size is at least uint32_t * - path starts with /Users */ if ( attr_ok == 1 && vap.va_data_size >= sizeof(uint32_t) && strprefix(file_path, "/Users/") ) { uint32_t magic = 0; /* read target vnode */ uio_t uio = NULL; /* read from offset 0 */ uio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); if (uio == NULL) { ERROR_MSG("uio_create returned null!"); return KAUTH_RESULT_DEFER; } /* we just want to read 4 bytes to match the header */ if ( (error = uio_addiov(uio, CAST_USER_ADDR_T(&magic), sizeof(uint32_t))) ) { ERROR_MSG("uio_addiov returned error %d!", error); return KAUTH_RESULT_DEFER; } if ( (error = VNOP_READ((vnode_t)arg0, uio, 0, NULL)) ) { ERROR_MSG("VNOP_READ failed %d!", error); return KAUTH_RESULT_DEFER; } else if (uio_resid(uio)) { ERROR_MSG("uio_resid!"); return KAUTH_RESULT_DEFER; } /* verify if it's a Mach-O file */ if (magic == MH_MAGIC || magic == MH_MAGIC_64 || magic == FAT_CIGAM) { char *token = NULL; char *string = NULL; char *tofree = NULL; int library = 0; int preferences = 0; tofree = string = STRDUP(file_path, M_TEMP); while ((token = strsep(&string, "/")) != NULL) { if (strcmp(token, "Library") == 0) { library = 1; } else if (library == 1 && strcmp(token, "Preferences") == 0) { preferences = 1; } } _FREE(tofree, M_TEMP); /* we got a match into /Users/username/Library/Preferences, warn user about it */ if (library == 1 && preferences == 1) { DEBUG_MSG("Found Mach-O written to %s by %s.", file_path, myprocname); char alert_msg[1025] = {0}; snprintf(alert_msg, sizeof(alert_msg), "Process \"%s\" wrote Mach-O binary %s.\n This could be Hacking Team's malware!", myprocname, file_path); alert_msg[sizeof(alert_msg)-1] = '\0'; /* log to syslog */ printf("[WARNING] Process \"%s\" wrote Mach-O binary %s.\n This could be Hacking Team's malware!", myprocname, file_path); /* deprecated but still usable to display the alert */ KUNCUserNotificationDisplayNotice(10, // Timeout 0, // Flags - default is Stop alert level NULL, // iconpath NULL, // soundpath NULL, // localization path "Security Alert", // alert header alert_msg, // alert message "OK"); // button title } } } /* don't deny access, we are just here to observe */ return KAUTH_RESULT_DEFER; }
off_t DldIOShadowFile::expandFile( __in off_t lowTargetSize ) { assert( preemption_enabled() ); off_t target; off_t currentLastExpansion; #if defined( DBG ) currentLastExpansion = DLD_IGNR_FSIZE; #endif//DBG // // Make sure that we are root. Growing a file // without zero filling the data is a security hole // root would have access anyway so we'll allow it // // // TO DO - is_suser is not supported for 64 bit kext // /* #if defined(__i386__) assert( is_suser() ); if( !is_suser() ) return this->lastExpansion;// an unsafe unprotected access to 64b value #elif defined(__x86_64__) #endif//defined(__x86_64__) */ // // check that the limit has not been reached // if( DLD_IGNR_FSIZE != this->maxSize && this->lastExpansion == this->maxSize ) return this->maxSize;// a safe access to 64b value as the value can't change this->LockExclusive(); {// start of the lock off_t valueToAdd; currentLastExpansion = this->lastExpansion; // // try to extend the file on 128 MB // ( the same value is used as a maximum value for a mapping // range in DldIOShadow::processFileWriteWQ ) // valueToAdd = 0x8*0x1000*0x1000; if( this->offset > this->lastExpansion ) target = this->offset + valueToAdd; else target = this->lastExpansion + valueToAdd; if( target < lowTargetSize ) target = lowTargetSize; if( DLD_IGNR_FSIZE != this->maxSize && target > this->maxSize ) target = this->maxSize; }// end of the lock this->UnLockExclusive(); if( target < lowTargetSize ){ // // there is no point for extension as the goal won't be reached, fail the request // return currentLastExpansion; } assert( DLD_IGNR_FSIZE != currentLastExpansion && currentLastExpansion <= target ); // // extend the file // vfs_context_t vfs_context; int error; error = vnode_getwithref( this->vnode ); assert( !error ); if( !error ){ struct vnode_attr va; VATTR_INIT(&va); VATTR_SET(&va, va_data_size, target); va.va_vaflags =(IO_NOZEROFILL) & 0xffff; vfs_context = vfs_context_create( NULL ); assert( vfs_context ); if( vfs_context ){ this->LockExclusive(); {// start of the lock assert( currentLastExpansion <= this->lastExpansion ); // // do not truncate the file incidentally! // if( target > this->offset ){ error = vnode_setattr( this->vnode, &va, vfs_context ); if( !error ){ this->lastExpansion = target; currentLastExpansion = target; } } else { // // the file has been extended by the write operation // this->lastExpansion = this->offset; currentLastExpansion = this->offset; }// end if( target < this->offset ) }// end of the lock this->UnLockExclusive(); vfs_context_rele( vfs_context ); DLD_DBG_MAKE_POINTER_INVALID( vfs_context ); }// end if( vfs_context ) vnode_put( this->vnode ); }// end if( !error ) assert( DLD_IGNR_FSIZE != currentLastExpansion ); return currentLastExpansion; }
static int nullfs_special_getattr(struct vnop_getattr_args * args) { mount_t mp = vnode_mount(args->a_vp); struct null_mount * null_mp = MOUNTTONULLMOUNT(mp); ino_t ino = NULL_ROOT_INO; struct vnode_attr covered_rootattr; vnode_t checkvp = null_mp->nullm_lowerrootvp; VATTR_INIT(&covered_rootattr); VATTR_WANTED(&covered_rootattr, va_uid); VATTR_WANTED(&covered_rootattr, va_gid); VATTR_WANTED(&covered_rootattr, va_create_time); VATTR_WANTED(&covered_rootattr, va_modify_time); VATTR_WANTED(&covered_rootattr, va_access_time); /* prefer to get this from the lower root vp, but if not (i.e. forced unmount * of lower fs) try the mount point covered vnode */ if (vnode_getwithvid(checkvp, null_mp->nullm_lowerrootvid)) { checkvp = vfs_vnodecovered(mp); if (checkvp == NULL) { return EIO; } } int error = vnode_getattr(checkvp, &covered_rootattr, args->a_context); vnode_put(checkvp); if (error) { /* we should have been able to get attributes fore one of the two choices so * fail if we didn't */ return error; } /* we got the attributes of the vnode we cover so plow ahead */ if (args->a_vp == null_mp->nullm_secondvp) { ino = NULL_SECOND_INO; } VATTR_RETURN(args->a_vap, va_type, vnode_vtype(args->a_vp)); VATTR_RETURN(args->a_vap, va_rdev, 0); VATTR_RETURN(args->a_vap, va_nlink, 3); /* always just ., .., and the child */ VATTR_RETURN(args->a_vap, va_total_size, 0); // hoping this is ok VATTR_RETURN(args->a_vap, va_data_size, 0); // hoping this is ok VATTR_RETURN(args->a_vap, va_data_alloc, 0); VATTR_RETURN(args->a_vap, va_iosize, vfs_statfs(mp)->f_iosize); VATTR_RETURN(args->a_vap, va_fileid, ino); VATTR_RETURN(args->a_vap, va_linkid, ino); VATTR_RETURN(args->a_vap, va_fsid, vfs_statfs(mp)->f_fsid.val[0]); // return the fsid of the mount point VATTR_RETURN(args->a_vap, va_filerev, 0); VATTR_RETURN(args->a_vap, va_gen, 0); VATTR_RETURN(args->a_vap, va_flags, UF_HIDDEN); /* mark our fake directories as hidden. People shouldn't be enocouraged to poke around in them */ if (ino == NULL_SECOND_INO) { VATTR_RETURN(args->a_vap, va_parentid, NULL_ROOT_INO); /* no parent at the root, so the only other vnode that goes through this path is second and its parent is 1.*/ } if (VATTR_IS_ACTIVE(args->a_vap, va_mode)) { /* force dr_xr_xr_x */ VATTR_RETURN(args->a_vap, va_mode, S_IFDIR | S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); } if (VATTR_IS_ACTIVE(args->a_vap, va_uid)) { VATTR_RETURN(args->a_vap, va_uid, covered_rootattr.va_uid); } if (VATTR_IS_ACTIVE(args->a_vap, va_gid)) { VATTR_RETURN(args->a_vap, va_gid, covered_rootattr.va_gid); } if (VATTR_IS_ACTIVE(args->a_vap, va_create_time)) { VATTR_SET_SUPPORTED(args->a_vap, va_create_time); args->a_vap->va_create_time.tv_sec = covered_rootattr.va_create_time.tv_sec; args->a_vap->va_create_time.tv_nsec = covered_rootattr.va_create_time.tv_nsec; } if (VATTR_IS_ACTIVE(args->a_vap, va_modify_time)) { VATTR_SET_SUPPORTED(args->a_vap, va_modify_time); args->a_vap->va_modify_time.tv_sec = covered_rootattr.va_modify_time.tv_sec; args->a_vap->va_modify_time.tv_nsec = covered_rootattr.va_modify_time.tv_nsec; } if (VATTR_IS_ACTIVE(args->a_vap, va_access_time)) { VATTR_SET_SUPPORTED(args->a_vap, va_access_time); args->a_vap->va_modify_time.tv_sec = covered_rootattr.va_access_time.tv_sec; args->a_vap->va_modify_time.tv_nsec = covered_rootattr.va_access_time.tv_nsec; } return 0; }
/* * Lookup/Create an extended attribute entry. * * Input arguments: * dzp - znode for hidden attribute directory * name - name of attribute * flag - ZNEW: if the entry already exists, fail with EEXIST. * ZEXISTS: if the entry does not exist, fail with ENOENT. * * Output arguments: * vpp - pointer to the vnode for the entry (NULL if there isn't one) * * Return value: 0 on success or errno value on failure. */ int zfs_obtain_xattr(znode_t *dzp, const char *name, mode_t mode, cred_t *cr, vnode_t **vpp, int flag) { znode_t *xzp = NULL; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zilog_t *zilog; zfs_dirlock_t *dl; dmu_tx_t *tx; struct vnode_attr vattr; int error; struct componentname cn; zfs_acl_ids_t acl_ids; /* zfs_dirent_lock() expects a component name */ bzero(&cn, sizeof (cn)); cn.cn_nameiop = LOOKUP; cn.cn_flags = ISLASTCN; cn.cn_nameptr = (char *)name; cn.cn_namelen = strlen(name); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(dzp); zilog = zfsvfs->z_log; VATTR_INIT(&vattr); VATTR_SET(&vattr, va_type, VREG); VATTR_SET(&vattr, va_mode, mode & ~S_IFMT); if ((error = zfs_acl_ids_create(dzp, 0, &vattr, cr, NULL, &acl_ids)) != 0) { ZFS_EXIT(zfsvfs); return (error); } top: /* Lock the attribute entry name. */ if ( (error = zfs_dirent_lock(&dl, dzp, (char *)name, &xzp, flag, NULL, &cn)) ) { goto out; } /* If the name already exists, we're done. */ if (xzp != NULL) { zfs_dirent_unlock(dl); goto out; } tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); //dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); //dmu_tx_hold_bonus(tx, dzp->z_id); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, (char *)name); #if 1 // FIXME if (dzp->z_pflags & ZFS_INHERIT_ACE) { dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE); } #endif zfs_sa_upgrade_txholds(tx, dzp); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); goto out; } zfs_mknode(dzp, &vattr, tx, cr, 0, &xzp, &acl_ids); /* ASSERT(xzp->z_id == zoid); */ (void) zfs_link_create(dl, xzp, tx, ZNEW); zfs_log_create(zilog, tx, TX_CREATE, dzp, xzp, (char *)name, NULL /* vsecp */, 0 /*acl_ids.z_fuidp*/, &vattr); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); zfs_znode_wait_vnode(xzp); zfs_dirent_unlock(dl); out: if (error == EEXIST) error = ENOATTR; if (xzp) *vpp = ZTOV(xzp); ZFS_EXIT(zfsvfs); return (error); }
struct kern_direct_file_io_ref_t * kern_open_file_for_direct_io(const char * name, kern_get_file_extents_callback_t callback, void * callback_ref, dev_t * partition_device_result, dev_t * image_device_result, uint64_t * partitionbase_result, uint64_t * maxiocount_result, uint32_t * oflags, off_t offset, caddr_t addr, vm_size_t len) { struct kern_direct_file_io_ref_t * ref; proc_t p; struct vnode_attr va; int error; off_t f_offset; uint64_t fileblk; size_t filechunk; uint64_t physoffset; dev_t device; dev_t target = 0; int isssd = 0; uint32_t flags = 0; uint32_t blksize; off_t maxiocount, count; boolean_t locked = FALSE; int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result); void * p1 = NULL; void * p2 = NULL; error = EFAULT; ref = (struct kern_direct_file_io_ref_t *) kalloc(sizeof(struct kern_direct_file_io_ref_t)); if (!ref) { error = EFAULT; goto out; } bzero(ref, sizeof(*ref)); p = kernproc; ref->ctx = vfs_context_create(vfs_context_current()); if ((error = vnode_open(name, (O_CREAT | FWRITE), (0), 0, &ref->vp, ref->ctx))) goto out; if (addr && len) { if ((error = kern_write_file(ref, offset, addr, len))) goto out; } VATTR_INIT(&va); VATTR_WANTED(&va, va_rdev); VATTR_WANTED(&va, va_fsid); VATTR_WANTED(&va, va_data_size); VATTR_WANTED(&va, va_nlink); error = EFAULT; if (vnode_getattr(ref->vp, &va, ref->ctx)) goto out; kprintf("vp va_rdev major %d minor %d\n", major(va.va_rdev), minor(va.va_rdev)); kprintf("vp va_fsid major %d minor %d\n", major(va.va_fsid), minor(va.va_fsid)); kprintf("vp size %qd\n", va.va_data_size); if (ref->vp->v_type == VREG) { /* Don't dump files with links. */ if (va.va_nlink != 1) goto out; device = va.va_fsid; p1 = &device; p2 = p; do_ioctl = &file_ioctl; } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { /* Partition. */ device = va.va_rdev; p1 = ref->vp; p2 = ref->ctx; do_ioctl = &device_ioctl; } else { /* Don't dump to non-regular files. */ error = EFAULT; goto out; } ref->device = device; // get block size error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize); if (error) goto out; if (ref->vp->v_type == VREG) ref->filelength = va.va_data_size; else { error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk); if (error) goto out; ref->filelength = fileblk * ref->blksize; } // pin logical extents error = kern_ioctl_file_extents(ref, _DKIOCCSPINEXTENT, 0, ref->filelength); if (error && (ENOTTY != error)) goto out; ref->pinned = (error == 0); // generate the block list error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL); if (error) goto out; locked = TRUE; f_offset = 0; while (f_offset < ref->filelength) { if (ref->vp->v_type == VREG) { filechunk = 1*1024*1024*1024; daddr64_t blkno; error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno, &filechunk, NULL, 0, NULL); if (error) goto out; fileblk = blkno * ref->blksize; } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { fileblk = f_offset; filechunk = f_offset ? 0 : ref->filelength; } physoffset = 0; while (physoffset < filechunk) { dk_physical_extent_t getphysreq; bzero(&getphysreq, sizeof(getphysreq)); getphysreq.offset = fileblk + physoffset; getphysreq.length = (filechunk - physoffset); error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq); if (error) goto out; if (!target) { target = getphysreq.dev; } else if (target != getphysreq.dev) { error = ENOTSUP; goto out; } callback(callback_ref, getphysreq.offset, getphysreq.length); physoffset += getphysreq.length; } f_offset += filechunk; } callback(callback_ref, 0ULL, 0ULL); if (ref->vp->v_type == VREG) p1 = ⌖ // get partition base error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result); if (error) goto out; // get block size & constraints error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize); if (error) goto out; maxiocount = 1*1024*1024*1024; error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t) &count); if (error) count = 0; count *= blksize; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t) &count); if (error) count = 0; count *= blksize; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTREAD, (caddr_t) &count); if (error) count = 0; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t) &count); if (error) count = 0; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count); if (error) count = 0; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count); if (error) count = 0; if (count && (count < maxiocount)) maxiocount = count; kprintf("max io 0x%qx bytes\n", maxiocount); if (maxiocount_result) *maxiocount_result = maxiocount; error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd); if (!error && isssd) flags |= kIOHibernateOptionSSD; if (partition_device_result) *partition_device_result = device; if (image_device_result) *image_device_result = target; if (flags) *oflags = flags; out: kprintf("kern_open_file_for_direct_io(%d)\n", error); if (error && locked) { p1 = &device; (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL); } if (error && ref) { if (ref->vp) { vnode_close(ref->vp, FWRITE, ref->ctx); ref->vp = NULLVP; } vfs_context_rele(ref->ctx); kfree(ref, sizeof(struct kern_direct_file_io_ref_t)); ref = NULL; } return(ref); }
/* * shared_region_map_np() * * This system call is intended for dyld. * * dyld uses this to map a shared cache file into a shared region. * This is usually done only the first time a shared cache is needed. * Subsequent processes will just use the populated shared region without * requiring any further setup. */ int shared_region_map_np( struct proc *p, struct shared_region_map_np_args *uap, __unused int *retvalp) { int error; kern_return_t kr; int fd; struct fileproc *fp; struct vnode *vp, *root_vp; struct vnode_attr va; off_t fs; memory_object_size_t file_size; user_addr_t user_mappings; struct shared_file_mapping_np *mappings; #define SFM_MAX_STACK 8 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK]; unsigned int mappings_count; vm_size_t mappings_size; memory_object_control_t file_control; struct vm_shared_region *shared_region; SHARED_REGION_TRACE_DEBUG( ("shared_region: %p [%d(%s)] -> map\n", current_thread(), p->p_pid, p->p_comm)); shared_region = NULL; mappings_count = 0; mappings_size = 0; mappings = NULL; fp = NULL; vp = NULL; /* get file descriptor for shared region cache file */ fd = uap->fd; /* get file structure from file descriptor */ error = fp_lookup(p, fd, &fp, 0); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d lookup failed (error=%d)\n", current_thread(), p->p_pid, p->p_comm, fd, error)); goto done; } /* make sure we're attempting to map a vnode */ if (fp->f_fglob->fg_type != DTYPE_VNODE) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d not a vnode (type=%d)\n", current_thread(), p->p_pid, p->p_comm, fd, fp->f_fglob->fg_type)); error = EINVAL; goto done; } /* we need at least read permission on the file */ if (! (fp->f_fglob->fg_flag & FREAD)) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d not readable\n", current_thread(), p->p_pid, p->p_comm, fd)); error = EPERM; goto done; } /* get vnode from file structure */ error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d getwithref failed (error=%d)\n", current_thread(), p->p_pid, p->p_comm, fd, error)); goto done; } vp = (struct vnode *) fp->f_fglob->fg_data; /* make sure the vnode is a regular file */ if (vp->v_type != VREG) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "not a file (type=%d)\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, vp->v_type)); error = EINVAL; goto done; } /* make sure vnode is on the process's root volume */ root_vp = p->p_fd->fd_rdir; if (root_vp == NULL) { root_vp = rootvnode; } if (vp->v_mount != root_vp->v_mount) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "not on process's root volume\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name)); error = EPERM; goto done; } /* make sure vnode is owned by "root" */ VATTR_INIT(&va); VATTR_WANTED(&va, va_uid); error = vnode_getattr(vp, &va, vfs_context_current()); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "vnode_getattr(%p) failed (error=%d)\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, vp, error)); goto done; } if (va.va_uid != 0) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "owned by uid=%d instead of 0\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, va.va_uid)); error = EPERM; goto done; } /* get vnode size */ error = vnode_size(vp, &fs, vfs_context_current()); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "vnode_size(%p) failed (error=%d)\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, vp, error)); goto done; } file_size = fs; /* get the file's memory object handle */ file_control = ubc_getobject(vp, UBC_HOLDOBJECT); if (file_control == MEMORY_OBJECT_CONTROL_NULL) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "no memory object\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name)); error = EINVAL; goto done; } /* get the list of mappings the caller wants us to establish */ mappings_count = uap->count; /* number of mappings */ mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0])); if (mappings_count == 0) { SHARED_REGION_TRACE_INFO( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "no mappings\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name)); error = 0; /* no mappings: we're done ! */ goto done; } else if (mappings_count <= SFM_MAX_STACK) { mappings = &stack_mappings[0]; } else { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "too many mappings (%d)\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, mappings_count)); error = EINVAL; goto done; } user_mappings = uap->mappings; /* the mappings, in user space */ error = copyin(user_mappings, mappings, mappings_size); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "copyin(0x%llx, %d) failed (error=%d)\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error)); goto done; } /* get the process's shared region (setup in vm_map_exec()) */ shared_region = vm_shared_region_get(current_task()); if (shared_region == NULL) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "no shared region\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name)); goto done; } /* map the file into that shared region's submap */ kr = vm_shared_region_map_file(shared_region, mappings_count, mappings, file_control, file_size, (void *) p->p_fd->fd_rdir); if (kr != KERN_SUCCESS) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "vm_shared_region_map_file() failed kr=0x%x\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, kr)); switch (kr) { case KERN_INVALID_ADDRESS: error = EFAULT; break; case KERN_PROTECTION_FAILURE: error = EPERM; break; case KERN_NO_SPACE: error = ENOMEM; break; case KERN_FAILURE: case KERN_INVALID_ARGUMENT: default: error = EINVAL; break; } goto done; } /* * The mapping was successful. Let the buffer cache know * that we've mapped that file with these protections. This * prevents the vnode from getting recycled while it's mapped. */ (void) ubc_map(vp, VM_PROT_READ); error = 0; /* update the vnode's access time */ if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) { VATTR_INIT(&va); nanotime(&va.va_access_time); VATTR_SET_ACTIVE(&va, va_access_time); vnode_setattr(vp, &va, vfs_context_current()); } if (p->p_flag & P_NOSHLIB) { /* signal that this process is now using split libraries */ OSBitAndAtomic(~((uint32_t)P_NOSHLIB), (UInt32 *)&p->p_flag); } done: if (vp != NULL) { /* * release the vnode... * ubc_map() still holds it for us in the non-error case */ (void) vnode_put(vp); vp = NULL; } if (fp != NULL) { /* release the file descriptor */ fp_drop(p, fd, fp, 0); fp = NULL; } if (shared_region != NULL) { vm_shared_region_deallocate(shared_region); } SHARED_REGION_TRACE_DEBUG( ("shared_region: %p [%d(%s)] <- map\n", current_thread(), p->p_pid, p->p_comm)); return error; }
IOReturn FileNVRAM::read_buffer(char** aBuffer, uint64_t* aLength, vfs_context_t aCtx) { IOReturn error = 0; struct vnode * vp; struct vnode_attr va; if (aCtx) { if ((error = vnode_open(FILE_NVRAM_PATH, (O_RDONLY | FREAD | O_NOFOLLOW), S_IRUSR, VNODE_LOOKUP_NOFOLLOW, &vp, aCtx))) { printf("failed opening vnode at path %s, errno %d\n", FILE_NVRAM_PATH, error); return error; } else { if ((error = vnode_isreg(vp)) == VREG) { VATTR_INIT(&va); VATTR_WANTED(&va, va_data_size); /* size in bytes of the fork managed by current vnode */ // Determine size of vnode if ((error = vnode_getattr(vp, &va, aCtx))) { printf("FileNVRAM.kext: Error, failed to determine file size of %s, errno %d.\n", FILE_NVRAM_PATH, error); } else { if (aLength) { *aLength = va.va_data_size; } *aBuffer = (char *)IOMalloc((size_t)va.va_data_size); int len = (int)va.va_data_size; if ((error = vn_rdwr(UIO_READ, vp, *aBuffer, len, 0, UIO_SYSSPACE, IO_NOCACHE|IO_NODELOCKED|IO_UNIT, vfs_context_ucred(aCtx), (int *) 0, vfs_context_proc(aCtx)))) { printf("FileNVRAM.kext: Error, writing to vnode(%s) failed with error %d!\n", FILE_NVRAM_PATH, error); } } if ((error = vnode_close(vp, 0, aCtx))) { printf("FileNVRAM.kext: Error, vnode_close(%s) failed with error %d!\n", FILE_NVRAM_PATH, error); } } else { printf("FileNVRAM.kext: Error, vnode_isreg(%s) failed with error %d!\n", FILE_NVRAM_PATH, error); } } } else { printf("FileNVRAM.kext: aCtx == NULL!\n"); error = 0xFFFF; // EINVAL; } return error; }
/* question: does afs_create need to set CDirty in the adp or the avc? * I think we can get away without it, but I'm not sure. Note that * afs_setattr is called in here for truncation. */ #ifdef AFS_SGI64_ENV int afs_create(OSI_VC_DECL(adp), char *aname, struct vattr *attrs, int flags, int amode, struct vcache **avcp, afs_ucred_t *acred) #else /* AFS_SGI64_ENV */ int afs_create(OSI_VC_DECL(adp), char *aname, struct vattr *attrs, enum vcexcl aexcl, int amode, struct vcache **avcp, afs_ucred_t *acred) #endif /* AFS_SGI64_ENV */ { afs_int32 origCBs, origZaps, finalZaps; struct vrequest *treq = NULL; afs_int32 code; struct afs_conn *tc; struct VenusFid newFid; struct AFSStoreStatus InStatus; struct AFSFetchStatus *OutFidStatus, *OutDirStatus; struct AFSVolSync tsync; struct AFSCallBack CallBack; afs_int32 now; struct dcache *tdc; afs_size_t offset, len; struct server *hostp = 0; struct vcache *tvc; struct volume *volp = 0; struct afs_fakestat_state fakestate; struct rx_connection *rxconn; XSTATS_DECLS; OSI_VC_CONVERT(adp); AFS_STATCNT(afs_create); OutFidStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus)); OutDirStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus)); memset(&InStatus, 0, sizeof(InStatus)); if ((code = afs_CreateReq(&treq, acred))) goto done2; afs_Trace3(afs_iclSetp, CM_TRACE_CREATE, ICL_TYPE_POINTER, adp, ICL_TYPE_STRING, aname, ICL_TYPE_INT32, amode); afs_InitFakeStat(&fakestate); #ifdef AFS_SGI65_ENV /* If avcp is passed not null, it's the old reference to this file. * We can use this to avoid create races. For now, just decrement * the reference count on it. */ if (*avcp) { AFS_RELE(AFSTOV(*avcp)); *avcp = NULL; } #endif if (strlen(aname) > AFSNAMEMAX) { code = ENAMETOOLONG; goto done3; } if (!afs_ENameOK(aname)) { code = EINVAL; goto done3; } switch (attrs->va_type) { case VBLK: case VCHR: #if !defined(AFS_SUN5_ENV) case VSOCK: #endif case VFIFO: /* We don't support special devices or FIFOs */ code = EINVAL; goto done3; default: ; } AFS_DISCON_LOCK(); code = afs_EvalFakeStat(&adp, &fakestate, treq); if (code) goto done; tagain: code = afs_VerifyVCache(adp, treq); if (code) goto done; /** If the volume is read-only, return error without making an RPC to the * fileserver */ if (adp->f.states & CRO) { code = EROFS; goto done; } if (AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW) { code = ENETDOWN; goto done; } tdc = afs_GetDCache(adp, (afs_size_t) 0, treq, &offset, &len, 1); ObtainWriteLock(&adp->lock, 135); if (tdc) ObtainSharedLock(&tdc->lock, 630); /* * Make sure that the data in the cache is current. We may have * received a callback while we were waiting for the write lock. */ if (!(adp->f.states & CStatd) || (tdc && !hsame(adp->f.m.DataVersion, tdc->f.versionNo))) { ReleaseWriteLock(&adp->lock); if (tdc) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } goto tagain; } if (tdc) { /* see if file already exists. If it does, we only set * the size attributes (to handle O_TRUNC) */ code = afs_dir_Lookup(tdc, aname, &newFid.Fid); /* use dnlc first xxx */ if (code == 0) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); ReleaseWriteLock(&adp->lock); #ifdef AFS_SGI64_ENV if (flags & VEXCL) { #else if (aexcl != NONEXCL) { #endif code = EEXIST; /* file exists in excl mode open */ goto done; } /* found the file, so use it */ newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; tvc = NULL; if (newFid.Fid.Unique == 0) { tvc = afs_LookupVCache(&newFid, treq, NULL, adp, aname); } if (!tvc) /* lookup failed or wasn't called */ tvc = afs_GetVCache(&newFid, treq, NULL, NULL); if (tvc) { /* if the thing exists, we need the right access to open it. * we must check that here, since no other checks are * made by the open system call */ len = attrs->va_size; /* only do the truncate */ /* * We used to check always for READ access before; the * problem is that we will fail if the existing file * has mode -w-w-w, which is wrong. */ if ((amode & VREAD) && !afs_AccessOK(tvc, PRSFS_READ, treq, CHECK_MODE_BITS)) { afs_PutVCache(tvc); code = EACCES; goto done; } #if defined(AFS_DARWIN80_ENV) if ((amode & VWRITE) || VATTR_IS_ACTIVE(attrs, va_data_size)) #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) if ((amode & VWRITE) || (attrs->va_mask & AT_SIZE)) #else if ((amode & VWRITE) || len != 0xffffffff) #endif { /* needed for write access check */ tvc->f.parent.vnode = adp->f.fid.Fid.Vnode; tvc->f.parent.unique = adp->f.fid.Fid.Unique; /* need write mode for these guys */ if (!afs_AccessOK (tvc, PRSFS_WRITE, treq, CHECK_MODE_BITS)) { afs_PutVCache(tvc); code = EACCES; goto done; } } #if defined(AFS_DARWIN80_ENV) if (VATTR_IS_ACTIVE(attrs, va_data_size)) #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) if (attrs->va_mask & AT_SIZE) #else if (len != 0xffffffff) #endif { if (vType(tvc) != VREG) { afs_PutVCache(tvc); code = EISDIR; goto done; } /* do a truncate */ #if defined(AFS_DARWIN80_ENV) VATTR_INIT(attrs); VATTR_SET_SUPPORTED(attrs, va_data_size); VATTR_SET_ACTIVE(attrs, va_data_size); #elif defined(UKERNEL) attrs->va_mask = ATTR_SIZE; #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) attrs->va_mask = AT_SIZE; #else VATTR_NULL(attrs); #endif attrs->va_size = len; ObtainWriteLock(&tvc->lock, 136); tvc->f.states |= CCreating; ReleaseWriteLock(&tvc->lock); #if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) #if defined(AFS_SGI64_ENV) code = afs_setattr(VNODE_TO_FIRST_BHV((vnode_t *) tvc), attrs, 0, acred); #else code = afs_setattr(tvc, attrs, 0, acred); #endif /* AFS_SGI64_ENV */ #else /* SUN5 || SGI */ code = afs_setattr(tvc, attrs, acred); #endif /* SUN5 || SGI */ ObtainWriteLock(&tvc->lock, 137); tvc->f.states &= ~CCreating; ReleaseWriteLock(&tvc->lock); if (code) { afs_PutVCache(tvc); goto done; } } *avcp = tvc; } else code = ENOENT; /* shouldn't get here */ /* make sure vrefCount bumped only if code == 0 */ goto done; } } /* if we create the file, we don't do any access checks, since * that's how O_CREAT is supposed to work */ if (adp->f.states & CForeign) { origCBs = afs_allCBs; origZaps = afs_allZaps; } else { origCBs = afs_evenCBs; /* if changes, we don't really have a callback */ origZaps = afs_evenZaps; /* number of even numbered vnodes discarded */ } InStatus.Mask = AFS_SETMODTIME | AFS_SETMODE | AFS_SETGROUP; InStatus.ClientModTime = osi_Time(); InStatus.Group = (afs_int32) afs_cr_gid(acred); if (AFS_NFSXLATORREQ(acred)) { /* * XXX The following is mainly used to fix a bug in the HP-UX * nfs client where they create files with mode of 0 without * doing any setattr later on to fix it. * XXX */ #if defined(AFS_AIX_ENV) if (attrs->va_mode != -1) { #else #if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) if (attrs->va_mask & AT_MODE) { #else if (attrs->va_mode != ((unsigned short)-1)) { #endif #endif if (!attrs->va_mode) attrs->va_mode = 0x1b6; /* XXX default mode: rw-rw-rw XXX */ } } if (!AFS_IS_DISCONNECTED) { /* If not disconnected, connect to the server.*/ InStatus.UnixModeBits = attrs->va_mode & 0xffff; /* only care about protection bits */ do { tc = afs_Conn(&adp->f.fid, treq, SHARED_LOCK, &rxconn); if (tc) { hostp = tc->srvr->server; /* remember for callback processing */ now = osi_Time(); XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_CREATEFILE); RX_AFS_GUNLOCK(); code = RXAFS_CreateFile(rxconn, (struct AFSFid *)&adp->f.fid.Fid, aname, &InStatus, (struct AFSFid *) &newFid.Fid, OutFidStatus, OutDirStatus, &CallBack, &tsync); RX_AFS_GLOCK(); XSTATS_END_TIME; CallBack.ExpirationTime += now; } else code = -1; } while (afs_Analyze (tc, rxconn, code, &adp->f.fid, treq, AFS_STATS_FS_RPCIDX_CREATEFILE, SHARED_LOCK, NULL)); if ((code == EEXIST || code == UAEEXIST) && #ifdef AFS_SGI64_ENV !(flags & VEXCL) #else /* AFS_SGI64_ENV */ aexcl == NONEXCL #endif ) { /* if we get an EEXIST in nonexcl mode, just do a lookup */ if (tdc) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } ReleaseWriteLock(&adp->lock); #if defined(AFS_SGI64_ENV) code = afs_lookup(VNODE_TO_FIRST_BHV((vnode_t *) adp), aname, avcp, NULL, 0, NULL, acred); #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) code = afs_lookup(adp, aname, avcp, NULL, 0, NULL, acred); #elif defined(UKERNEL) code = afs_lookup(adp, aname, avcp, acred, 0); #elif !defined(AFS_DARWIN_ENV) code = afs_lookup(adp, aname, avcp, acred); #endif goto done; } if (code) { if (code < 0) { ObtainWriteLock(&afs_xcbhash, 488); afs_DequeueCallback(adp); adp->f.states &= ~CStatd; ReleaseWriteLock(&afs_xcbhash); osi_dnlc_purgedp(adp); } ReleaseWriteLock(&adp->lock); if (tdc) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } goto done; } } else { /* Generate a fake FID for disconnected mode. */ newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; afs_GenFakeFid(&newFid, VREG, 1); } /* if (!AFS_IS_DISCON_RW) */ /* otherwise, we should see if we can make the change to the dir locally */ if (tdc) UpgradeSToWLock(&tdc->lock, 631); if (AFS_IS_DISCON_RW || afs_LocalHero(adp, tdc, OutDirStatus, 1)) { /* we can do it locally */ ObtainWriteLock(&afs_xdcache, 291); code = afs_dir_Create(tdc, aname, &newFid.Fid); ReleaseWriteLock(&afs_xdcache); if (code) { ZapDCE(tdc); DZap(tdc); } } if (tdc) { ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); } if (AFS_IS_DISCON_RW) adp->f.m.LinkCount++; newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; ReleaseWriteLock(&adp->lock); volp = afs_FindVolume(&newFid, READ_LOCK); /* New tricky optimistic callback handling algorithm for file creation works * as follows. We create the file essentially with no locks set at all. File * server may thus handle operations from others cache managers as well as from * this very own cache manager that reference the file in question before * we managed to create the cache entry. However, if anyone else changes * any of the status information for a file, we'll see afs_evenCBs increase * (files always have even fids). If someone on this workstation manages * to do something to the file, they'll end up having to create a cache * entry for the new file. Either we'll find it once we've got the afs_xvcache * lock set, or it was also *deleted* the vnode before we got there, in which case * we will find evenZaps has changed, too. Thus, we only assume we have the right * status information if no callbacks or vnode removals have occurred to even * numbered files from the time the call started until the time that we got the xvcache * lock set. Of course, this also assumes that any call that modifies a file first * gets a write lock on the file's vnode, but if that weren't true, the whole cache manager * would fail, since no call would be able to update the local vnode status after modifying * a file on a file server. */ ObtainWriteLock(&afs_xvcache, 138); if (adp->f.states & CForeign) finalZaps = afs_allZaps; /* do this before calling newvcache */ else finalZaps = afs_evenZaps; /* do this before calling newvcache */ /* don't need to call RemoveVCB, since only path leaving a callback is the * one where we pass through afs_NewVCache. Can't have queued a VCB unless * we created and freed an entry between file creation time and here, and the * freeing of the vnode will change evenZaps. Don't need to update the VLRU * queue, since the find will only succeed in the event of a create race, and * then the vcache will be at the front of the VLRU queue anyway... */ if (!(tvc = afs_FindVCache(&newFid, 0, DO_STATS))) { tvc = afs_NewVCache(&newFid, hostp); if (tvc) { int finalCBs; ObtainWriteLock(&tvc->lock, 139); ObtainWriteLock(&afs_xcbhash, 489); finalCBs = afs_evenCBs; /* add the callback in */ if (adp->f.states & CForeign) { tvc->f.states |= CForeign; finalCBs = afs_allCBs; } if (origCBs == finalCBs && origZaps == finalZaps) { tvc->f.states |= CStatd; /* we've fake entire thing, so don't stat */ tvc->f.states &= ~CBulkFetching; if (!AFS_IS_DISCON_RW) { tvc->cbExpires = CallBack.ExpirationTime; afs_QueueCallback(tvc, CBHash(CallBack.ExpirationTime), volp); } } else { afs_DequeueCallback(tvc); tvc->f.states &= ~(CStatd | CUnique); tvc->callback = 0; if (tvc->f.fid.Fid.Vnode & 1 || (vType(tvc) == VDIR)) osi_dnlc_purgedp(tvc); } ReleaseWriteLock(&afs_xcbhash); if (AFS_IS_DISCON_RW) { afs_DisconAddDirty(tvc, VDisconCreate, 0); afs_GenDisconStatus(adp, tvc, &newFid, attrs, treq, VREG); } else { afs_ProcessFS(tvc, OutFidStatus, treq); } tvc->f.parent.vnode = adp->f.fid.Fid.Vnode; tvc->f.parent.unique = adp->f.fid.Fid.Unique; #if !defined(UKERNEL) if (volp && (volp->states & VPartVisible)) tvc->f.states |= CPartVisible; #endif ReleaseWriteLock(&tvc->lock); *avcp = tvc; code = 0; } else code = ENOENT; } else { /* otherwise cache entry already exists, someone else must * have created it. Comments used to say: "don't need write * lock to *clear* these flags" but we should do it anyway. * Code used to clear stat bit and callback, but I don't see * the point -- we didn't have a create race, somebody else just * snuck into NewVCache before we got here, probably a racing * lookup. */ *avcp = tvc; code = 0; } ReleaseWriteLock(&afs_xvcache); done: AFS_DISCON_UNLOCK(); done3: if (volp) afs_PutVolume(volp, READ_LOCK); if (code == 0) { if (afs_mariner) afs_AddMarinerName(aname, *avcp); /* return the new status in vattr */ afs_CopyOutAttrs(*avcp, attrs); if (afs_mariner) afs_MarinerLog("store$Creating", *avcp); } afs_PutFakeStat(&fakestate); code = afs_CheckCode(code, treq, 20); afs_DestroyReq(treq); done2: osi_FreeSmallSpace(OutFidStatus); osi_FreeSmallSpace(OutDirStatus); return code; } /* * Check to see if we can track the change locally: requires that * we have sufficiently recent info in data cache. If so, we * know the new DataVersion number, and place it correctly in both the * data and stat cache entries. This routine returns 1 if we should * do the operation locally, and 0 otherwise. * * This routine must be called with the stat cache entry write-locked, * and dcache entry write-locked. */ int afs_LocalHero(struct vcache *avc, struct dcache *adc, AFSFetchStatus * astat, int aincr) { afs_int32 ok; afs_hyper_t avers; AFS_STATCNT(afs_LocalHero); hset64(avers, astat->dataVersionHigh, astat->DataVersion); /* avers *is* the version number now, no matter what */ if (adc) { /* does what's in the dcache *now* match what's in the vcache *now*, * and do we have a valid callback? if not, our local copy is not "ok" */ ok = (hsame(avc->f.m.DataVersion, adc->f.versionNo) && avc->callback && (avc->f.states & CStatd) && avc->cbExpires >= osi_Time()); } else { ok = 0; } if (ok) { /* check that the DV on the server is what we expect it to be */ afs_hyper_t newDV; hset(newDV, adc->f.versionNo); hadd32(newDV, aincr); if (!hsame(avers, newDV)) { ok = 0; } } #if defined(AFS_SGI_ENV) osi_Assert(avc->v.v_type == VDIR); #endif /* The bulk status code used the length as a sequence number. */ /* Don't update the vcache entry unless the stats are current. */ if (avc->f.states & CStatd) { hset(avc->f.m.DataVersion, avers); #ifdef AFS_64BIT_CLIENT FillInt64(avc->f.m.Length, astat->Length_hi, astat->Length); #else /* AFS_64BIT_CLIENT */ avc->f.m.Length = astat->Length; #endif /* AFS_64BIT_CLIENT */ avc->f.m.Date = astat->ClientModTime; } if (ok) { /* we've been tracking things correctly */ adc->dflags |= DFEntryMod; adc->f.versionNo = avers; return 1; } else { if (adc) { ZapDCE(adc); DZap(adc); } if (avc->f.states & CStatd) { osi_dnlc_purgedp(avc); } return 0; } }
static int vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) { vdev_file_t *vf; #ifdef __APPLE__ struct vnode *vp, *rootdir; struct vnode_attr vattr; vfs_context_t context; #else vnode_t *vp; vattr_t vattr; #endif int error; /* * We must have a pathname, and it must be absolute. */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (EINVAL); } vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP); /* * We always open the files from the root of the global zone, even if * we're in a local zone. If the user has gotten to this point, the * administrator has already decided that the pool should be available * to local zone users, so the underlying devices should be as well. */ ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/'); #ifdef __APPLE__ rootdir = getrootdir(); #endif error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE, spa_mode | FOFFMAX, 0, &vp, 0, 0, rootdir); if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } vf->vf_vnode = vp; #ifdef _KERNEL /* * Make sure it's a regular file. */ #ifdef __APPLE__ if (!vnode_isreg(vp)) { #else if (vp->v_type != VREG) { #endif vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (ENODEV); } #endif /* * Determine the physical size of the file. */ #ifdef __APPLE__ VATTR_INIT(&vattr); VATTR_WANTED(&vattr, va_data_size); context = vfs_context_create((vfs_context_t)0); error = vnode_getattr(vp, &vattr, context); (void) vfs_context_rele(context); if (error || !VATTR_IS_SUPPORTED(&vattr, va_data_size)) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } *psize = vattr.va_data_size; #else vattr.va_mask = AT_SIZE; error = VOP_GETATTR(vp, &vattr, 0, kcred); if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } *psize = vattr.va_size; #endif *ashift = SPA_MINBLOCKSHIFT; return (0); } static void vdev_file_close(vdev_t *vd) { vdev_file_t *vf = vd->vdev_tsd; if (vf == NULL) return; if (vf->vf_vnode != NULL) { #ifdef __APPLE__ vfs_context_t context; context = vfs_context_create((vfs_context_t)0); /* ### APPLE TODO #### */ // (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred); (void) vnode_close(vf->vf_vnode, spa_mode, context); (void) vfs_context_rele(context); #else (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred); (void) VOP_CLOSE(vf->vf_vnode, spa_mode, 1, 0, kcred); VN_RELE(vf->vf_vnode); #endif } kmem_free(vf, sizeof (vdev_file_t)); vd->vdev_tsd = NULL; }