Esempio n. 1
0
/* get lvp's parent, if possible, even if it isn't set.

   lvp is expected to have an iocount before and after this call.

   if a dvpp is populated the returned vnode has an iocount. */
static int
null_get_lowerparent(vnode_t lvp, vnode_t * dvpp, vfs_context_t ctx)
{
	int error = 0;
	struct vnode_attr va;
	mount_t mp  = vnode_mount(lvp);
	vnode_t dvp = vnode_parent(lvp);

	if (dvp) {
		error = vnode_get(dvp);
		goto end;
	}

	error = ENOENT;
	if (!(mp->mnt_kern_flag & MNTK_PATH_FROM_ID)) {
		goto end;
	}

	VATTR_INIT(&va);
	VATTR_WANTED(&va, va_parentid);

	error = vnode_getattr(lvp, &va, ctx);

	if (error || !VATTR_IS_SUPPORTED(&va, va_parentid)) {
		goto end;
	}

	error = VFS_VGET(mp, (ino64_t)va.va_parentid, &dvp, ctx);

end:
	if (error == 0) {
		*dvpp = dvp;
	}
	return error;
}
Esempio n. 2
0
ino_t
VnodeToIno(vnode_t avp)
{
    unsigned long ret;

#ifndef AFS_DARWIN80_ENV
    if (afs_CacheFSType == AFS_APPL_UFS_CACHE) {
	struct inode *ip = VTOI(avp);
	ret = ip->i_number;
    } else if (afs_CacheFSType == AFS_APPL_HFS_CACHE) {
#endif
#if defined(AFS_DARWIN80_ENV) 
	struct vattr va;
	VATTR_INIT(&va);
        VATTR_WANTED(&va, va_fileid);
	if (vnode_getattr(avp, &va, afs_osi_ctxtp))
	    osi_Panic("VOP_GETATTR failed in VnodeToIno\n");
        if (!VATTR_ALL_SUPPORTED(&va))
	    osi_Panic("VOP_GETATTR unsupported fileid in VnodeToIno\n");
	ret = va.va_fileid;
#elif !defined(VTOH)
	struct vattr va;
	if (VOP_GETATTR(avp, &va, &afs_osi_cred, current_proc()))
	    osi_Panic("VOP_GETATTR failed in VnodeToIno\n");
	ret = va.va_fileid;
#else
	struct hfsnode *hp = VTOH(avp);
	ret = H_FILEID(hp);
#endif
#ifndef AFS_DARWIN80_ENV
    } else
	osi_Panic("VnodeToIno called before cacheops initialized\n");
#endif
    return ret;
}
Esempio n. 3
0
int
afs_osi_Stat(struct osi_file *afile, struct osi_stat *astat)
{
    afs_int32 code;
    struct vattr tvattr;
    AFS_STATCNT(osi_Stat);
    ObtainWriteLock(&afs_xosi, 320);
    AFS_GUNLOCK();
#ifdef AFS_DARWIN80_ENV
    VATTR_INIT(&tvattr);
    VATTR_WANTED(&tvattr, va_size);
    VATTR_WANTED(&tvattr, va_blocksize);
    VATTR_WANTED(&tvattr, va_mtime);
    VATTR_WANTED(&tvattr, va_atime);
    code = vnode_getattr(afile->vnode, &tvattr, afs_osi_ctxtp);
    if (code == 0 && !VATTR_ALL_SUPPORTED(&tvattr))
       code = EINVAL;
#else
    code = VOP_GETATTR(afile->vnode, &tvattr, &afs_osi_cred, current_proc());
#endif
    AFS_GLOCK();
    if (code == 0) {
	astat->size = tvattr.va_size;
	astat->mtime = tvattr.va_mtime.tv_sec;
	astat->atime = tvattr.va_atime.tv_sec;
    }
    ReleaseWriteLock(&afs_xosi);
    return code;
}
Esempio n. 4
0
int
osi_UFSTruncate(struct osi_file *afile, afs_int32 asize)
{
    afs_ucred_t *oldCred;
    struct vattr tvattr;
    afs_int32 code;
    struct osi_stat tstat;
    AFS_STATCNT(osi_Truncate);

    /* This routine only shrinks files, and most systems
     * have very slow truncates, even when the file is already
     * small enough.  Check now and save some time.
     */
    code = afs_osi_Stat(afile, &tstat);
    if (code || tstat.size <= asize)
	return code;
    ObtainWriteLock(&afs_xosi, 321);
    AFS_GUNLOCK();
#ifdef AFS_DARWIN80_ENV
    VATTR_INIT(&tvattr);
    VATTR_SET(&tvattr, va_size, asize);
    code = vnode_setattr(afile->vnode, &tvattr, afs_osi_ctxtp);
#else
    VATTR_NULL(&tvattr);
    tvattr.va_size = asize;
    code = VOP_SETATTR(afile->vnode, &tvattr, &afs_osi_cred, current_proc());
#endif
    AFS_GLOCK();
    ReleaseWriteLock(&afs_xosi);
    return code;
}
Esempio n. 5
0
dev_t
VnodeToDev(vnode_t avp)
{
#ifndef AFS_DARWIN80_ENV
    if (afs_CacheFSType == AFS_APPL_UFS_CACHE) {
	struct inode *ip = VTOI(avp);
	return ip->i_dev;
    } else if (afs_CacheFSType == AFS_APPL_HFS_CACHE) {
#endif
#if defined(AFS_DARWIN80_ENV) 
	struct vattr va;
        VATTR_INIT(&va);
        VATTR_WANTED(&va, va_fsid);
	if (vnode_getattr(avp, &va, afs_osi_ctxtp))
	    osi_Panic("VOP_GETATTR failed in VnodeToDev\n");
        if (!VATTR_ALL_SUPPORTED(&va))
	    osi_Panic("VOP_GETATTR unsupported fsid in VnodeToIno\n");
	return va.va_fsid;	/* XXX they say it's the dev.... */
#elif !defined(VTOH)
	struct vattr va;
	if (VOP_GETATTR(avp, &va, &afs_osi_cred, current_proc()))
	    osi_Panic("VOP_GETATTR failed in VnodeToDev\n");
	return va.va_fsid;	/* XXX they say it's the dev.... */
#else
	struct hfsnode *hp = VTOH(avp);
	return H_DEV(hp);
#endif
#ifndef AFS_DARWIN80_ENV
    } else
	osi_Panic("VnodeToDev called before cacheops initialized\n");
#endif
}
Esempio n. 6
0
/* vnode_size() is not exported */
static errno_t
vnode_size(vnode_t vp, off_t *sizep, vfs_context_t ctx)
{
        struct vnode_attr       va;
        int                     error; 
        
        VATTR_INIT(&va);
        VATTR_WANTED(&va, va_data_size);
        error = vnode_getattr(vp, &va, ctx);
        if (!error)
                *sizep = va.va_data_size;
        return(error);
}
bool VNodeDiskDeviceClass::setupVNode() {
  int vapError = -1;
  struct vnode_attr vap;

  if (m_vnode != NULL)
    return true;

  vfs_context_t vfsContext = vfs_context_create((vfs_context_t) 0);

  int vnodeError = vnode_open(m_filePath->getCStringNoCopy(), (FREAD | FWRITE), 0, 0,
    &m_vnode, vfsContext);

  if (vnodeError || m_vnode == NULL) {
    IOLog("Error when opening file %s: error %d\n", 
      m_filePath->getCStringNoCopy(), vnodeError);
    goto failure;
  }

  if (!vnode_isreg(m_vnode)) {
    IOLog("Error when opening file %s: not a regular file\n", 
      m_filePath->getCStringNoCopy());
    vnode_close(m_vnode, (FREAD | FWRITE), vfsContext);
    goto failure;
  }

  VATTR_INIT(&vap);
  VATTR_WANTED(&vap, va_data_size);
  vapError = vnode_getattr(m_vnode, &vap, vfsContext);
  if (vapError) {
    IOLog("Error when retrieving vnode's attributes with error code %d\n", vapError);
    goto failure;
  }

  if (vap.va_data_size < m_blockSize * m_blockNum) {
    IOLog("Error file %s is too small, actual size is %llu\n", 
      m_filePath->getCStringNoCopy(), vap.va_data_size);
    goto failure;
  }

  vfs_context_rele(vfsContext);
  return true;

failure:
  vfs_context_rele(vfsContext);
  return false;
}
Esempio n. 8
0
osi_DisableAtimes(struct vnode *avp)
#endif
{
#ifdef AFS_DARWIN80_ENV
    struct vnode_attr vap;

    VATTR_INIT(&vap);
    VATTR_CLEAR_SUPPORTED(&vap, va_access_time);
    vnode_setattr(avp, &vap, afs_osi_ctxtp);
#else
    if (afs_CacheFSType == AFS_APPL_UFS_CACHE) {
	struct inode *ip = VTOI(avp);
	ip->i_flag &= ~IN_ACCESS;
    }
#ifdef VTOH			/* can't do this without internals */
    else if (afs_CacheFSType == AFS_APPL_HFS_CACHE) {
	struct hfsnode *hp = VTOH(avp);
	hp->h_nodeflags &= ~IN_ACCESS;
    }
#endif
#endif
}
Esempio n. 9
0
struct kern_direct_file_io_ref_t *
kern_open_file_for_direct_io(const char * name, 
			     uint32_t iflags,
			     kern_get_file_extents_callback_t callback, 
			     void * callback_ref,
                             off_t set_file_size,
                             off_t fs_free_size,
                             off_t write_file_offset,
                             void * write_file_addr,
                             size_t write_file_len,
			     dev_t * partition_device_result,
			     dev_t * image_device_result,
                             uint64_t * partitionbase_result,
                             uint64_t * maxiocount_result,
                             uint32_t * oflags)
{
    struct kern_direct_file_io_ref_t * ref;

    proc_t            p;
    struct vnode_attr va;
    dk_apfs_wbc_range_t wbc_range;
    int               error;
    off_t             f_offset;
    uint64_t          fileblk;
    size_t            filechunk;
    uint64_t          physoffset, minoffset;
    dev_t             device;
    dev_t             target = 0;
    int               isssd = 0;
    uint32_t          flags = 0;
    uint32_t          blksize;
    off_t             maxiocount, count, segcount, wbctotal;
    boolean_t         locked = FALSE;
    int               fmode, cmode;
    struct            nameidata nd;
    u_int32_t         ndflags;
    off_t             mpFree;

    int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
    void * p1 = NULL;
    void * p2 = NULL;

    error = EFAULT;

    ref = (struct kern_direct_file_io_ref_t *) kalloc(sizeof(struct kern_direct_file_io_ref_t));
    if (!ref)
    {
	error = EFAULT;
    	goto out;
    }

    bzero(ref, sizeof(*ref));
    p = kernproc;
    ref->ctx = vfs_context_kernel();

    fmode  = (kIOPolledFileCreate & iflags) ? (O_CREAT | FWRITE) : FWRITE;
    cmode =  S_IRUSR | S_IWUSR;
    ndflags = NOFOLLOW;
    NDINIT(&nd, LOOKUP, OP_OPEN, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(name), ref->ctx);
    VATTR_INIT(&va);
    VATTR_SET(&va, va_mode, cmode);
    VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
    VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_D);
    if ((error = vn_open_auth(&nd, &fmode, &va))) {
	kprintf("vn_open_auth(fmode: %d, cmode: %d) failed with error: %d\n", fmode, cmode, error);
	goto out;
    }

    ref->vp = nd.ni_vp;
    if (ref->vp->v_type == VREG)
    {
        vnode_lock_spin(ref->vp);
        SET(ref->vp->v_flag, VSWAP);
        vnode_unlock(ref->vp);
    }

    if (write_file_addr && write_file_len)
    {
        if ((error = kern_write_file(ref, write_file_offset, write_file_addr, write_file_len, IO_SKIP_ENCRYPTION))) {
            kprintf("kern_write_file() failed with error: %d\n", error);
            goto out;
        }
    }

    VATTR_INIT(&va);
    VATTR_WANTED(&va, va_rdev);
    VATTR_WANTED(&va, va_fsid);
    VATTR_WANTED(&va, va_devid);
    VATTR_WANTED(&va, va_data_size);
    VATTR_WANTED(&va, va_data_alloc);
    VATTR_WANTED(&va, va_nlink);
    error = EFAULT;
    if (vnode_getattr(ref->vp, &va, ref->ctx)) goto out;

    wbctotal = 0;
    mpFree = freespace_mb(ref->vp);
    mpFree <<= 20;
    kprintf("kern_direct_file(%s): vp size %qd, alloc %qd, mp free %qd, keep free %qd\n", 
    		name, va.va_data_size, va.va_data_alloc, mpFree, fs_free_size);

    if (ref->vp->v_type == VREG)
    {
        /* Don't dump files with links. */
        if (va.va_nlink != 1) goto out;

        device = (VATTR_IS_SUPPORTED(&va, va_devid)) ? va.va_devid : va.va_fsid;
        ref->filelength = va.va_data_size;

        p1 = &device;
        p2 = p;
        do_ioctl = &file_ioctl;

        if (kIOPolledFileHibernate & iflags)
        {
            error = do_ioctl(p1, p2, DKIOCAPFSGETWBCRANGE, (caddr_t) &wbc_range);
            ref->wbcranged = (error == 0);
        }
        if (ref->wbcranged)
        {
            uint32_t idx;
            assert(wbc_range.count <= (sizeof(wbc_range.extents) / sizeof(wbc_range.extents[0])));
            for (idx = 0; idx < wbc_range.count; idx++) wbctotal += wbc_range.extents[idx].length;
            kprintf("kern_direct_file(%s): wbc %qd\n", name, wbctotal);
            if (wbctotal) target = wbc_range.dev;
        }

        if (set_file_size)
        {
            if (wbctotal)
            {
                if (wbctotal >= set_file_size) set_file_size = HIBERNATE_MIN_FILE_SIZE;
                else
                {
                    set_file_size -= wbctotal;
                    if (set_file_size < HIBERNATE_MIN_FILE_SIZE) set_file_size = HIBERNATE_MIN_FILE_SIZE;
                }
            }
            if (fs_free_size)
            {
		mpFree += va.va_data_alloc;
		if ((mpFree < set_file_size) || ((mpFree - set_file_size) < fs_free_size))
		{
		    error = ENOSPC;
		    goto out;
		}
	    }
	    error = vnode_setsize(ref->vp, set_file_size, IO_NOZEROFILL | IO_NOAUTH, ref->ctx);
	    if (error) goto out;
	    ref->filelength = set_file_size;
        }
    }
    else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
    {
	/* Partition. */
        device = va.va_rdev;

        p1 = ref->vp;
        p2 = ref->ctx;
        do_ioctl = &device_ioctl;
    }
    else
    {
	/* Don't dump to non-regular files. */
        error = EFAULT;
        goto out;
    }
    ref->device = device;

    // probe for CF
    dk_corestorage_info_t cs_info;
    memset(&cs_info, 0, sizeof(dk_corestorage_info_t));
    error = do_ioctl(p1, p2, DKIOCCORESTORAGE, (caddr_t)&cs_info);
    ref->cf = (error == 0) && (cs_info.flags & DK_CORESTORAGE_ENABLE_HOTFILES);

    // get block size

    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize);
    if (error)
        goto out;

    minoffset = HIBERNATE_MIN_PHYSICAL_LBA * ref->blksize;

    if (ref->vp->v_type != VREG)
    {
        error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk);
        if (error) goto out;
	ref->filelength = fileblk * ref->blksize;    
    }

    // pin logical extents, CS version

    error = kern_ioctl_file_extents(ref, _DKIOCCSPINEXTENT, 0, ref->filelength);
    if (error && (ENOTTY != error)) goto out;
    ref->pinned = (error == 0);

    // pin logical extents, apfs version

    error = VNOP_IOCTL(ref->vp, FSCTL_FREEZE_EXTENTS, NULL, 0, ref->ctx);
    if (error && (ENOTTY != error)) goto out;
    ref->frozen = (error == 0);

    // generate the block list

    error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL);
    if (error) goto out;
    locked = TRUE;

    f_offset = 0;
    for (; f_offset < ref->filelength; f_offset += filechunk)
    {
        if (ref->vp->v_type == VREG)
        {
            filechunk = 1*1024*1024*1024;
            daddr64_t blkno;

            error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno,
								  &filechunk, NULL, VNODE_WRITE | VNODE_BLOCKMAP_NO_TRACK, NULL);
            if (error) goto out;
            if (-1LL == blkno) continue;
            fileblk = blkno * ref->blksize;
        }
        else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
        {
            fileblk = f_offset;
            filechunk = f_offset ? 0 : ref->filelength;
        }

        physoffset = 0;
        while (physoffset < filechunk)
        {
            dk_physical_extent_t getphysreq;
            bzero(&getphysreq, sizeof(getphysreq));

            getphysreq.offset = fileblk + physoffset;
            getphysreq.length = (filechunk - physoffset);
            error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq);
            if (error) goto out;
            if (!target)
            {
                target = getphysreq.dev;
            }
            else if (target != getphysreq.dev)
            {
                error = ENOTSUP;
                goto out;
            }

            assert(getphysreq.offset >= minoffset);

#if HIBFRAGMENT
	    uint64_t rev;
	    for (rev = 4096; rev <= getphysreq.length; rev += 4096)
	    {
		callback(callback_ref, getphysreq.offset + getphysreq.length - rev, 4096);
	    }
#else
            callback(callback_ref, getphysreq.offset, getphysreq.length);
#endif
            physoffset += getphysreq.length;
        }
    }
    if (ref->wbcranged)
    {
        uint32_t idx;
        for (idx = 0; idx < wbc_range.count; idx++)
        {
            assert(wbc_range.extents[idx].offset >= minoffset);
            callback(callback_ref, wbc_range.extents[idx].offset, wbc_range.extents[idx].length);
        }
    }
    callback(callback_ref, 0ULL, 0ULL);

    if (ref->vp->v_type == VREG) p1 = &target;
    else
    {
	p1 = &target;
	p2 = p;
	do_ioctl = &file_ioctl;
    }

    // get partition base

    if (partitionbase_result) 
    {
        error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result);
        if (error)
            goto out;
    }

    // get block size & constraints

    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize);
    if (error)
        goto out;

    maxiocount = 1*1024*1024*1024;

    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t) &count);
    if (error)
        count = 0;
    count *= blksize;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t) &count);
    if (error)
        count = 0;
    count *= blksize;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTREAD, (caddr_t) &count);
    if (error)
        count = 0;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t) &count);
    if (error)
        count = 0;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count);
    if (!error)
	error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTREAD, (caddr_t) &segcount);
    if (error)
        count = segcount = 0;
    count *= segcount;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count);
    if (!error)
	error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTWRITE, (caddr_t) &segcount);
    if (error)
        count = segcount = 0;
    count *= segcount;
    if (count && (count < maxiocount))
        maxiocount = count;

    kprintf("max io 0x%qx bytes\n", maxiocount);
    if (maxiocount_result)
        *maxiocount_result = maxiocount;

    error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd);
    if (!error && isssd)
        flags |= kIOPolledFileSSD;

    if (partition_device_result)
        *partition_device_result = device;
    if (image_device_result)
        *image_device_result = target;
    if (oflags)
        *oflags = flags;

    if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
    {
        vnode_close(ref->vp, FWRITE, ref->ctx);
        ref->vp = NULLVP;
	ref->ctx = NULL;
    }

out:
    printf("kern_open_file_for_direct_io(%p, %d)\n", ref, error);


    if (error && locked)
    {
        p1 = &device;
        (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL);
    }

    if (error && ref)
    {
        if (ref->vp)
        {
            (void) kern_ioctl_file_extents(ref, _DKIOCCSUNPINEXTENT, 0, (ref->pinned && ref->cf) ? ref->filelength : 0);

            if (ref->frozen)
            {
                (void) VNOP_IOCTL(ref->vp, FSCTL_THAW_EXTENTS, NULL, 0, ref->ctx);
            }
            if (ref->wbcranged)
            {
                (void) do_ioctl(p1, p2, DKIOCAPFSRELEASEWBCRANGE, (caddr_t) NULL);
            }
            vnode_close(ref->vp, FWRITE, ref->ctx);
            ref->vp = NULLVP;
        }
        ref->ctx = NULL;
        kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
        ref = NULL;
    }

    return(ref);
}
Esempio n. 10
0
static int process_cred_label_update_execvew(kauth_cred_t old_cred,
                                             kauth_cred_t new_cred,
                                             struct proc *p,
                                             struct vnode *vp,
                                             off_t offset,
                                             struct vnode *scriptvp,
                                             struct label *vnodelabel,
                                             struct label *scriptvnodelabel,
                                             struct label *execlabel,
                                             u_int *csflags,
                                             void *macpolicyattr,
                                             size_t macpolicyattrlen,
                                             int *disjointp) {
  int path_len = MAXPATHLEN;

  if (!vnode_isreg(vp)) {
    goto error_exit;
  }

  // Determine address of image_params based off of csflags pointer. (HACKY)
  struct image_params *img =
      (struct image_params *)((char *)csflags -
                              offsetof(struct image_params, ip_csflags));

  // Find the length of arg and env we will copy.
  size_t arg_length =
      MIN(MAX_VECTOR_LENGTH, img->ip_endargv - img->ip_startargv);
  size_t env_length = MIN(MAX_VECTOR_LENGTH, img->ip_endenvv - img->ip_endargv);

  osquery_process_event_t *e =
      (osquery_process_event_t *)osquery_cqueue_reserve(
          cqueue,
          OSQUERY_PROCESS_EVENT,
          sizeof(osquery_process_event_t) + arg_length + env_length);

  if (!e) {
    goto error_exit;
  }
  // Copy the arg and env vectors.
  e->argv_offset = 0;
  e->envv_offset = arg_length;

  e->arg_length = arg_length;
  e->env_length = env_length;

  memcpy(&(e->flexible_data[e->argv_offset]), img->ip_startargv, arg_length);
  memcpy(&(e->flexible_data[e->envv_offset]), img->ip_endargv, env_length);

  e->actual_argc = img->ip_argc;
  e->actual_envc = img->ip_envc;

  // Calculate our argc and envc based on the number of null bytes we find in
  // the buffer.
  e->argc = MIN(e->actual_argc,
                str_num(&(e->flexible_data[e->argv_offset]), arg_length));
  e->envc = MIN(e->actual_envc,
                str_num(&(e->flexible_data[e->envv_offset]), env_length));

  e->pid = proc_pid(p);
  e->ppid = proc_ppid(p);
  e->owner_uid = 0;
  e->owner_gid = 0;
  e->mode = -1;
  vfs_context_t context = vfs_context_create(NULL);
  if (context) {
    struct vnode_attr vattr = {0};
    VATTR_INIT(&vattr);
    VATTR_WANTED(&vattr, va_uid);
    VATTR_WANTED(&vattr, va_gid);
    VATTR_WANTED(&vattr, va_mode);
    VATTR_WANTED(&vattr, va_create_time);
    VATTR_WANTED(&vattr, va_access_time);
    VATTR_WANTED(&vattr, va_modify_time);
    VATTR_WANTED(&vattr, va_change_time);

    if (vnode_getattr(vp, &vattr, context) == 0) {
      e->owner_uid = vattr.va_uid;
      e->owner_gid = vattr.va_gid;
      e->mode = vattr.va_mode;
      e->create_time = vattr.va_create_time.tv_sec;
      e->access_time = vattr.va_access_time.tv_sec;
      e->modify_time = vattr.va_modify_time.tv_sec;
      e->change_time = vattr.va_change_time.tv_sec;
    }

    vfs_context_rele(context);
  }

  e->uid = kauth_cred_getruid(new_cred);
  e->euid = kauth_cred_getuid(new_cred);

  e->gid = kauth_cred_getrgid(new_cred);
  e->egid = kauth_cred_getgid(new_cred);

  vn_getpath(vp, e->path, &path_len);

  osquery_cqueue_commit(cqueue, e);
error_exit:

  return 0;
}
Esempio n. 11
0
/* works like PFlushVolumeData */
void
darwin_notify_perms(struct unixuser *auser, int event)
{
    int i;
    struct afs_q *tq, *uq = NULL;
    struct vcache *tvc, *hnext;
    int isglock = ISAFS_GLOCK();
    struct vnode *vp;
    struct vnode_attr va;
    int isctxtowner = 0;

    if (!afs_darwin_fsevents)
	return;

    VATTR_INIT(&va);
    VATTR_SET(&va, va_mode, 0777);
    if (event & UTokensObtained)
	VATTR_SET(&va, va_uid, auser->uid);
    else
	VATTR_SET(&va, va_uid, -2); /* nobody */

    if (!isglock)
	AFS_GLOCK();
    if (!(vfs_context_owner == current_thread())) {
	get_vfs_context();
	isctxtowner = 1;
    }
loop:
    ObtainReadLock(&afs_xvcache);
    for (i = 0; i < VCSIZE; i++) {
	for (tq = afs_vhashTV[i].prev; tq != &afs_vhashTV[i]; tq = uq) {
	    uq = QPrev(tq);
	    tvc = QTOVH(tq);
	    if (tvc->f.states & CDeadVnode) {
		/* we can afford to be best-effort */
		continue;
	    }
	    /* no per-file acls, so only notify on directories */
	    if (!(vp = AFSTOV(tvc)) || !vnode_isdir(AFSTOV(tvc)))
		continue;
	    /* dynroot object. no callbacks. anonymous ACL. just no. */
	    if (afs_IsDynrootFid(&tvc->f.fid))
		continue;
	    /* no fake fsevents on mount point sources. leaks refs */
	    if (tvc->mvstat == 1)
		continue;
	    /* if it's being reclaimed, just pass */
	    if (vnode_get(vp))
		continue;
	    if (vnode_ref(vp)) {
		AFS_GUNLOCK();
		vnode_put(vp);
		AFS_GLOCK();
		continue;
	    }
	    ReleaseReadLock(&afs_xvcache);
	    /* Avoid potentially re-entering on this lock */
	    if (0 == NBObtainWriteLock(&tvc->lock, 234)) {
		tvc->f.states |= CEvent;
		AFS_GUNLOCK();
		vnode_setattr(vp, &va, afs_osi_ctxtp);
		tvc->f.states &= ~CEvent;
		vnode_put(vp);
		AFS_GLOCK();
		ReleaseWriteLock(&tvc->lock);
	    }
	    ObtainReadLock(&afs_xvcache);
	    uq = QPrev(tq);
	    /* our tvc ptr is still good until now */
	    AFS_FAST_RELE(tvc);
	}
    }
    ReleaseReadLock(&afs_xvcache);
    if (isctxtowner)
	put_vfs_context();
    if (!isglock)
	AFS_GUNLOCK();
}
Esempio n. 12
0
kern_return_t
map_fd_funneled(
	int			fd,
	vm_object_offset_t	offset,
	vm_offset_t		*va,
	boolean_t		findspace,
	vm_size_t		size)
{
	kern_return_t	result;
	struct fileproc	*fp;
	struct vnode	*vp;
	void *	pager;
	vm_offset_t	map_addr=0;
	vm_size_t	map_size;
	int		err=0;
	vm_map_t	my_map;
	proc_t		p = current_proc();
	struct vnode_attr vattr;

	/*
	 *	Find the inode; verify that it's a regular file.
	 */

	err = fp_lookup(p, fd, &fp, 0);
	if (err)
		return(err);
	
	if (fp->f_fglob->fg_type != DTYPE_VNODE){
		err = KERN_INVALID_ARGUMENT;
		goto bad;
	}

	if (!(fp->f_fglob->fg_flag & FREAD)) {
		err = KERN_PROTECTION_FAILURE;
		goto bad;
	}

	vp = (struct vnode *)fp->f_fglob->fg_data;
	err = vnode_getwithref(vp);
	if(err != 0) 
		goto bad;

	if (vp->v_type != VREG) {
		(void)vnode_put(vp);
		err = KERN_INVALID_ARGUMENT;
		goto bad;
	}

	AUDIT_ARG(vnpath, vp, ARG_VNODE1);

	/*
	 * POSIX: mmap needs to update access time for mapped files
	 */
	if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
		VATTR_INIT(&vattr);
		nanotime(&vattr.va_access_time);
		VATTR_SET_ACTIVE(&vattr, va_access_time);
		vnode_setattr(vp, &vattr, vfs_context_current());
	}
	
	if (offset & PAGE_MASK_64) {
		printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm);
		(void)vnode_put(vp);
		err = KERN_INVALID_ARGUMENT;
		goto bad;
	}
	map_size = round_page(size);

	/*
	 * Allow user to map in a zero length file.
	 */
	if (size == 0) {
		(void)vnode_put(vp);
		err = KERN_SUCCESS;
		goto bad;
	}
	/*
	 *	Map in the file.
	 */
	pager = (void *)ubc_getpager(vp);
	if (pager == NULL) {
		(void)vnode_put(vp);
		err = KERN_FAILURE;
		goto bad;
	}


	my_map = current_map();

	result = vm_map_64(
			my_map,
			&map_addr, map_size, (vm_offset_t)0, 
			VM_FLAGS_ANYWHERE, pager, offset, TRUE,
			VM_PROT_DEFAULT, VM_PROT_ALL,
			VM_INHERIT_DEFAULT);
	if (result != KERN_SUCCESS) {
		(void)vnode_put(vp);
		err = result;
		goto bad;
	}


	if (!findspace) {
		vm_offset_t	dst_addr;
		vm_map_copy_t	tmp;

		if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr))	||
					trunc_page_32(dst_addr) != dst_addr) {
			(void) vm_map_remove(
					my_map,
					map_addr, map_addr + map_size,
					VM_MAP_NO_FLAGS);
			(void)vnode_put(vp);
			err = KERN_INVALID_ADDRESS;
			goto bad;
		}

		result = vm_map_copyin(my_map, (vm_map_address_t)map_addr,
				       (vm_map_size_t)map_size, TRUE, &tmp);
		if (result != KERN_SUCCESS) {
			
			(void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
					vm_map_round_page(map_addr + map_size),
					VM_MAP_NO_FLAGS);
			(void)vnode_put(vp);
			err = result;
			goto bad;
		}

		result = vm_map_copy_overwrite(my_map,
					(vm_map_address_t)dst_addr, tmp, FALSE);
		if (result != KERN_SUCCESS) {
			vm_map_copy_discard(tmp);
			(void)vnode_put(vp);
			err = result;
			goto bad;
		}
	} else {
		if (copyout(&map_addr, CAST_USER_ADDR_T(va), sizeof (map_addr))) {
			(void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
					vm_map_round_page(map_addr + map_size),
					VM_MAP_NO_FLAGS);
			(void)vnode_put(vp);
			err = KERN_INVALID_ADDRESS;
			goto bad;
		}
	}

	ubc_setthreadcred(vp, current_proc(), current_thread());
	(void)ubc_map(vp, (PROT_READ | PROT_EXEC));
	(void)vnode_put(vp);
	err = 0;
bad:
	fp_drop(p, fd, fp, 0);
	return (err);
}
Esempio n. 13
0
/*
 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
 * XXX usage is PROT_* from an interface perspective.  Thus the values of
 * XXX VM_PROT_* and PROT_* need to correspond.
 */
int
mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
{
	/*
	 *	Map in special device (must be SHARED) or file
	 */
	struct fileproc *fp;
	register struct		vnode *vp;
	int			flags;
	int			prot, file_prot;
	int			err=0;
	vm_map_t		user_map;
	kern_return_t		result;
	mach_vm_offset_t	user_addr;
	mach_vm_size_t		user_size;
	vm_object_offset_t	pageoff;
	vm_object_offset_t	file_pos;
	int			alloc_flags=0;
	boolean_t		docow;
	vm_prot_t		maxprot;
	void 			*handle;
	vm_pager_t		pager;
	int 			mapanon=0;
	int 			fpref=0;
	int error =0;
	int fd = uap->fd;

	user_addr = (mach_vm_offset_t)uap->addr;
	user_size = (mach_vm_size_t) uap->len;

	AUDIT_ARG(addr, user_addr);
	AUDIT_ARG(len, user_size);
	AUDIT_ARG(fd, uap->fd);

	prot = (uap->prot & VM_PROT_ALL);
#if 3777787
	/*
	 * Since the hardware currently does not support writing without
	 * read-before-write, or execution-without-read, if the request is
	 * for write or execute access, we must imply read access as well;
	 * otherwise programs expecting this to work will fail to operate.
	 */
	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
		prot |= VM_PROT_READ;
#endif	/* radar 3777787 */

	flags = uap->flags;
	vp = NULLVP;

	/*
	 * The vm code does not have prototypes & compiler doesn't do the'
	 * the right thing when you cast 64bit value and pass it in function 
	 * call. So here it is.
	 */
	file_pos = (vm_object_offset_t)uap->pos;


	/* make sure mapping fits into numeric range etc */
	if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
		return (EINVAL);

	/*
	 * Align the file position to a page boundary,
	 * and save its page offset component.
	 */
	pageoff = (file_pos & PAGE_MASK);
	file_pos -= (vm_object_offset_t)pageoff;


	/* Adjust size for rounding (on both ends). */
	user_size += pageoff;			/* low end... */
	user_size = mach_vm_round_page(user_size);	/* hi end */


	/*
	 * Check for illegal addresses.  Watch out for address wrap... Note
	 * that VM_*_ADDRESS are not constants due to casts (argh).
	 */
	if (flags & MAP_FIXED) {
		/*
		 * The specified address must have the same remainder
		 * as the file offset taken modulo PAGE_SIZE, so it
		 * should be aligned after adjustment by pageoff.
		 */
		user_addr -= pageoff;
		if (user_addr & PAGE_MASK)
		return (EINVAL);
	}
#ifdef notyet
	/* DO not have apis to get this info, need to wait till then*/
	/*
	 * XXX for non-fixed mappings where no hint is provided or
	 * the hint would fall in the potential heap space,
	 * place it after the end of the largest possible heap.
	 *
	 * There should really be a pmap call to determine a reasonable
	 * location.
	 */
	else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
		addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ);

#endif

	alloc_flags = 0;

	if (flags & MAP_ANON) {
		/*
		 * Mapping blank space is trivial.  Use positive fds as the alias
		 * value for memory tracking. 
		 */
		if (fd != -1) {
			/*
			 * Use "fd" to pass (some) Mach VM allocation flags,
			 * (see the VM_FLAGS_* definitions).
			 */
			alloc_flags = fd & (VM_FLAGS_ALIAS_MASK |
					    VM_FLAGS_PURGABLE);
			if (alloc_flags != fd) {
				/* reject if there are any extra flags */
				return EINVAL;
			}
		}
			
		handle = NULL;
		maxprot = VM_PROT_ALL;
		file_pos = 0;
		mapanon = 1;
	} else {
		struct vnode_attr va;
		vfs_context_t ctx = vfs_context_current();

		/*
		 * Mapping file, get fp for validation. Obtain vnode and make
		 * sure it is of appropriate type.
		 */
		err = fp_lookup(p, fd, &fp, 0);
		if (err)
			return(err);
		fpref = 1;
		if(fp->f_fglob->fg_type == DTYPE_PSXSHM) {
			uap->addr = (user_addr_t)user_addr;
			uap->len = (user_size_t)user_size;
			uap->prot = prot;
			uap->flags = flags;
			uap->pos = file_pos;
			error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
			goto bad;
		}

		if (fp->f_fglob->fg_type != DTYPE_VNODE) {
			error = EINVAL;
			goto bad;
		}
		vp = (struct vnode *)fp->f_fglob->fg_data;
		error = vnode_getwithref(vp);
		if(error != 0)
			goto bad;

		if (vp->v_type != VREG && vp->v_type != VCHR) {
			(void)vnode_put(vp);
			error = EINVAL;
			goto bad;
		}

		AUDIT_ARG(vnpath, vp, ARG_VNODE1);
		
		/*
		 * POSIX: mmap needs to update access time for mapped files
		 */
		if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
			VATTR_INIT(&va);
			nanotime(&va.va_access_time);
			VATTR_SET_ACTIVE(&va, va_access_time);
			vnode_setattr(vp, &va, ctx);
		}
		
		/*
		 * XXX hack to handle use of /dev/zero to map anon memory (ala
		 * SunOS).
		 */
		if (vp->v_type == VCHR || vp->v_type == VSTR) {
			(void)vnode_put(vp);
			error = ENODEV;
			goto bad;
		} else {
			/*
			 * Ensure that file and memory protections are
			 * compatible.  Note that we only worry about
			 * writability if mapping is shared; in this case,
			 * current and max prot are dictated by the open file.
			 * XXX use the vnode instead?  Problem is: what
			 * credentials do we use for determination? What if
			 * proc does a setuid?
			 */
			maxprot = VM_PROT_EXECUTE;	/* ??? */
			if (fp->f_fglob->fg_flag & FREAD)
				maxprot |= VM_PROT_READ;
			else if (prot & PROT_READ) {
				(void)vnode_put(vp);
				error = EACCES;
				goto bad;
			}
			/*
			 * If we are sharing potential changes (either via
			 * MAP_SHARED or via the implicit sharing of character
			 * device mappings), and we are trying to get write
			 * permission although we opened it without asking
			 * for it, bail out. 
			 */

			if ((flags & MAP_SHARED) != 0) {
				if ((fp->f_fglob->fg_flag & FWRITE) != 0) {
 					/*
 					 * check for write access
 					 *
 					 * Note that we already made this check when granting FWRITE
 					 * against the file, so it seems redundant here.
 					 */
 					error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
 
 					/* if not granted for any reason, but we wanted it, bad */
 					if ((prot & PROT_WRITE) && (error != 0)) {
 						vnode_put(vp);
  						goto bad;
  					}
 
 					/* if writable, remember */
 					if (error == 0)
  						maxprot |= VM_PROT_WRITE;

				} else if ((prot & PROT_WRITE) != 0) {
					(void)vnode_put(vp);
					error = EACCES;
					goto bad;
				}
			} else
				maxprot |= VM_PROT_WRITE;

			handle = (void *)vp;
#if CONFIG_MACF
			error = mac_file_check_mmap(vfs_context_ucred(ctx),
			    fp->f_fglob, prot, flags, &maxprot);
			if (error) {
				(void)vnode_put(vp);
				goto bad;
			}
#endif /* MAC */
		}
	}

	if (user_size == 0)  {
		if (!mapanon)
			(void)vnode_put(vp);
		error = 0;
		goto bad;
	}

	/*
	 *	We bend a little - round the start and end addresses
	 *	to the nearest page boundary.
	 */
	user_size = mach_vm_round_page(user_size);

	if (file_pos & PAGE_MASK_64) {
		if (!mapanon)
			(void)vnode_put(vp);
		error = EINVAL;
		goto bad;
	}

	user_map = current_map();

	if ((flags & MAP_FIXED) == 0) {
		alloc_flags |= VM_FLAGS_ANYWHERE;
		user_addr = mach_vm_round_page(user_addr);
	} else {
		if (user_addr != mach_vm_trunc_page(user_addr)) {
		        if (!mapanon)
			        (void)vnode_put(vp);
			error = EINVAL;
			goto bad;
		}
		/*
		 * mmap(MAP_FIXED) will replace any existing mappings in the
		 * specified range, if the new mapping is successful.
		 * If we just deallocate the specified address range here,
		 * another thread might jump in and allocate memory in that
		 * range before we get a chance to establish the new mapping,
		 * and we won't have a chance to restore the old mappings.
		 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
		 * has to deallocate the existing mappings and establish the
		 * new ones atomically.
		 */
		alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
	}

	if (flags & MAP_NOCACHE)
		alloc_flags |= VM_FLAGS_NO_CACHE;

	/*
	 * Lookup/allocate object.
	 */
	if (handle == NULL) {
		pager = NULL;
#ifdef notyet
/* Hmm .. */
#if defined(VM_PROT_READ_IS_EXEC)
		if (prot & VM_PROT_READ)
			prot |= VM_PROT_EXECUTE;
		if (maxprot & VM_PROT_READ)
			maxprot |= VM_PROT_EXECUTE;
#endif
#endif

#if 3777787
		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			prot |= VM_PROT_READ;
		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			maxprot |= VM_PROT_READ;
#endif	/* radar 3777787 */

		result = vm_map_enter_mem_object(user_map,
						 &user_addr, user_size,
						 0, alloc_flags,
						 IPC_PORT_NULL, 0, FALSE,
						 prot, maxprot,
						 (flags & MAP_SHARED) ?
						 VM_INHERIT_SHARE : 
						 VM_INHERIT_DEFAULT);
		if (result != KERN_SUCCESS) 
				goto out;
	} else {
		pager = (vm_pager_t)ubc_getpager(vp);
		
		if (pager == NULL) {
			(void)vnode_put(vp);
			error = ENOMEM;
			goto bad;
		}

		/*
		 *  Set credentials:
		 *	FIXME: if we're writing the file we need a way to
		 *      ensure that someone doesn't replace our R/W creds
		 * 	with ones that only work for read.
		 */

		ubc_setthreadcred(vp, p, current_thread());
		docow = FALSE;
		if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
			docow = TRUE;
		}

#ifdef notyet
/* Hmm .. */
#if defined(VM_PROT_READ_IS_EXEC)
		if (prot & VM_PROT_READ)
			prot |= VM_PROT_EXECUTE;
		if (maxprot & VM_PROT_READ)
			maxprot |= VM_PROT_EXECUTE;
#endif
#endif /* notyet */

#if 3777787
		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			prot |= VM_PROT_READ;
		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			maxprot |= VM_PROT_READ;
#endif	/* radar 3777787 */

		result = vm_map_enter_mem_object(user_map,
						 &user_addr, user_size,
						 0, alloc_flags,
						 (ipc_port_t)pager, file_pos,
						 docow, prot, maxprot, 
						 (flags & MAP_SHARED) ?
						 VM_INHERIT_SHARE : 
						 VM_INHERIT_DEFAULT);

		if (result != KERN_SUCCESS)  {
				(void)vnode_put(vp);
				goto out;
		}

		file_prot = prot & (PROT_READ | PROT_WRITE | PROT_EXEC);
		if (docow) {
			/* private mapping: won't write to the file */
			file_prot &= ~PROT_WRITE;
		}
		(void) ubc_map(vp, file_prot);
	}

	if (!mapanon)
		(void)vnode_put(vp);

out:
	switch (result) {
	case KERN_SUCCESS:
		*retval = user_addr + pageoff;
		error = 0;
		break;
	case KERN_INVALID_ADDRESS:
	case KERN_NO_SPACE:
		error =  ENOMEM;
		break;
	case KERN_PROTECTION_FAILURE:
		error =  EACCES;
		break;
	default:
		error =  EINVAL;
		break;
	}
bad:
	if (fpref)
		fp_drop(p, fd, fp, 0);

	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
			      (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);

	return(error);
}
Esempio n. 14
0
/* kauth file scope listener 
 * this allows to detect files written to the filesystem
 * arg2 contains a flag KAUTH_FILEOP_CLOSE which is set if a modified file is being closed
 * this way we don't need to trace every close(), only the ones writing to the filesystem
 */
static int
fileop_scope_listener(kauth_cred_t    credential,
                      void *          idata,
                      kauth_action_t  action,
                      uintptr_t       arg0,     /* vnode reference */
                      uintptr_t       arg1,     /* full path to file being closed */
                      uintptr_t       arg2,     /* flags */
                      uintptr_t       arg3)
{
    /* ignore all actions except FILE_CLOSE */
    if (action != KAUTH_FILEOP_CLOSE)
    {
        return KAUTH_RESULT_DEFER;
    }
    
    /* ignore operations with bad data */
    if (credential == NULL || (vnode_t)arg0 == NULL || (char*)arg1 == NULL)
    {
        ERROR_MSG("Arguments contain null pointers!");
        return KAUTH_RESULT_DEFER;
    }
    
    /* ignore closes on folders, character and block devices */
    switch ( vnode_vtype((vnode_t)arg0) )
    {
        case VDIR:
        case VCHR:
        case VBLK:
            return KAUTH_RESULT_DEFER;
        default:
            break;
    }
    
    /* we are only interested when a modified file is being closed */
    if ((int)arg2 != KAUTH_FILEOP_CLOSE_MODIFIED)
    {
        return KAUTH_RESULT_DEFER;
    }
    
    char *file_path = (char*)arg1;
    /* get information from current proc trying to write to the vnode */
    proc_t proc = current_proc();
    pid_t mypid = proc_pid(proc);
    char myprocname[MAXCOMLEN+1] = {0};
    proc_name(mypid, myprocname, sizeof(myprocname));

    /* retrieve the vnode attributes, we can get a lot of vnode information from here */
    struct vnode_attr vap = {0};
    vfs_context_t context = vfs_context_create(NULL);
    /* initialize the structure fields we are interested in
     * reference vn_stat_noauth() xnu/bsd/vfs/vfs_vnops.c
     */
    VATTR_INIT(&vap);
    VATTR_WANTED(&vap, va_mode);
    VATTR_WANTED(&vap, va_type);
    VATTR_WANTED(&vap, va_uid);
    VATTR_WANTED(&vap, va_gid);
    VATTR_WANTED(&vap, va_data_size);
    VATTR_WANTED(&vap, va_flags);
    int attr_ok = 1;
    if ( vnode_getattr((vnode_t)arg0, &vap, context) != 0 )
    {
        /* in case of error permissions and filesize will be bogus */
        ERROR_MSG("failed to vnode_getattr");
        attr_ok = 0;
    }
    /* release the context we created, else kab00m! */
    vfs_context_rele(context);
    
    int error = 0;
    /* make sure we :
     * - were able to read the attributes
     * - file size is at least uint32_t 
     * - path starts with /Users
     */
    if ( attr_ok == 1 &&
         vap.va_data_size >= sizeof(uint32_t) &&
         strprefix(file_path, "/Users/") )
    {
        uint32_t magic = 0;
        /* read target vnode */
        uio_t uio = NULL;
        /* read from offset 0 */
        uio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
        if (uio == NULL)
        {
            ERROR_MSG("uio_create returned null!");
            return KAUTH_RESULT_DEFER;
        }
        /* we just want to read 4 bytes to match the header */
        if ( (error = uio_addiov(uio, CAST_USER_ADDR_T(&magic), sizeof(uint32_t))) )
        {
            ERROR_MSG("uio_addiov returned error %d!", error);
            return KAUTH_RESULT_DEFER;
        }
        if ( (error = VNOP_READ((vnode_t)arg0, uio, 0, NULL)) )
        {
            ERROR_MSG("VNOP_READ failed %d!", error);
            return KAUTH_RESULT_DEFER;
        }
        else if (uio_resid(uio))
        {
            ERROR_MSG("uio_resid!");
            return KAUTH_RESULT_DEFER;
        }
        
        /* verify if it's a Mach-O file */
        if (magic == MH_MAGIC || magic == MH_MAGIC_64 || magic == FAT_CIGAM)
        {
            char *token = NULL;
            char *string = NULL;
            char *tofree = NULL;
            int library = 0;
            int preferences = 0;
            
            tofree = string = STRDUP(file_path, M_TEMP);
            while ((token = strsep(&string, "/")) != NULL)
            {
                if (strcmp(token, "Library") == 0)
                {
                    library = 1;
                }
                else if (library == 1 && strcmp(token, "Preferences") == 0)
                {
                    preferences = 1;
                }
            }
            _FREE(tofree, M_TEMP);
            /* we got a match into /Users/username/Library/Preferences, warn user about it */
            if (library == 1 && preferences == 1)
            {
                DEBUG_MSG("Found Mach-O written to %s by %s.", file_path, myprocname);
                char alert_msg[1025] = {0};
                snprintf(alert_msg, sizeof(alert_msg), "Process \"%s\" wrote Mach-O binary %s.\n This could be Hacking Team's malware!", myprocname, file_path);
                alert_msg[sizeof(alert_msg)-1] = '\0';
                
                /* log to syslog */
                printf("[WARNING] Process \"%s\" wrote Mach-O binary %s.\n This could be Hacking Team's malware!", myprocname, file_path);
                /* deprecated but still usable to display the alert */
                KUNCUserNotificationDisplayNotice(10,		// Timeout
                                                  0,		// Flags - default is Stop alert level
                                                  NULL,     // iconpath
                                                  NULL,     // soundpath
                                                  NULL,     // localization path
                                                  "Security Alert", // alert header
                                                  alert_msg, // alert message
                                                  "OK");	// button title
            }
        }
    }
    /* don't deny access, we are just here to observe */
    return KAUTH_RESULT_DEFER;
}
off_t DldIOShadowFile::expandFile( __in off_t lowTargetSize )
{
    assert( preemption_enabled() );
    
    off_t             target;
    off_t             currentLastExpansion;
#if defined( DBG )
    currentLastExpansion = DLD_IGNR_FSIZE;
#endif//DBG
    
    //
    // Make sure that we are root.  Growing a file
    // without zero filling the data is a security hole 
    // root would have access anyway so we'll allow it
    //
    
    //
    // TO DO - is_suser is not supported for 64 bit kext
    //

/*
#if defined(__i386__)
    assert( is_suser() );
    if( !is_suser() )
        return this->lastExpansion;// an unsafe unprotected access to 64b value
#elif defined(__x86_64__)
#endif//defined(__x86_64__)
*/

    
    //
    // check that the limit has not been reached
    //
    if( DLD_IGNR_FSIZE != this->maxSize  && this->lastExpansion == this->maxSize )
        return this->maxSize;// a safe access to 64b value as the value can't change
    
    
    this->LockExclusive();
    {// start of the lock
        
        off_t   valueToAdd;
        
        currentLastExpansion = this->lastExpansion;
        
        //
        // try to extend the file on 128 MB
        // ( the same value is used as a maximum value for a mapping
        //   range in DldIOShadow::processFileWriteWQ )
        //
        valueToAdd = 0x8*0x1000*0x1000;
        
        
        if( this->offset > this->lastExpansion )
            target = this->offset + valueToAdd;
        else
            target = this->lastExpansion + valueToAdd;
        
        if( target < lowTargetSize )
            target  = lowTargetSize;
        
        if( DLD_IGNR_FSIZE != this->maxSize && target > this->maxSize )
            target = this->maxSize;
        
    }// end of the lock
    this->UnLockExclusive();
    
    
    if( target < lowTargetSize ){
        
        //
        // there is no point for extension as the goal won't be reached, fail the request
        //
        return currentLastExpansion;
    }
    
    assert( DLD_IGNR_FSIZE != currentLastExpansion && currentLastExpansion <= target );
    
    //
    // extend the file
    //
    vfs_context_t       vfs_context;
    int                 error;
    
    error = vnode_getwithref( this->vnode );
    assert( !error );
    if( !error ){
        
        struct vnode_attr	va;
        
        VATTR_INIT(&va);
        VATTR_SET(&va, va_data_size, target);
        va.va_vaflags =(IO_NOZEROFILL) & 0xffff;
        
        vfs_context = vfs_context_create( NULL );
        assert( vfs_context );
        if( vfs_context ){
            
            this->LockExclusive();
            {// start of the lock
                
                assert( currentLastExpansion <= this->lastExpansion );
                //
                // do not truncate the file incidentally!
                //
                if( target > this->offset ){
                    
                    error = vnode_setattr( this->vnode, &va, vfs_context );
                    if( !error ){
                        
                        this->lastExpansion = target;
                        currentLastExpansion = target;
                    }
                    
                } else {
                  
                    //
                    // the file has been extended by the write operation
                    //
                    this->lastExpansion = this->offset;
                    currentLastExpansion = this->offset;

                }// end if( target < this->offset )
                
            }// end of the lock
            this->UnLockExclusive();
            
            vfs_context_rele( vfs_context );
            DLD_DBG_MAKE_POINTER_INVALID( vfs_context );
            
        }// end if( vfs_context )
        
        vnode_put( this->vnode );
        
    }// end if( !error )
    
    assert( DLD_IGNR_FSIZE != currentLastExpansion );
    
    return currentLastExpansion;
}
Esempio n. 16
0
static int
nullfs_special_getattr(struct vnop_getattr_args * args)
{
	mount_t mp                  = vnode_mount(args->a_vp);
	struct null_mount * null_mp = MOUNTTONULLMOUNT(mp);

	ino_t ino = NULL_ROOT_INO;
	struct vnode_attr covered_rootattr;
	vnode_t checkvp = null_mp->nullm_lowerrootvp;

	VATTR_INIT(&covered_rootattr);
	VATTR_WANTED(&covered_rootattr, va_uid);
	VATTR_WANTED(&covered_rootattr, va_gid);
	VATTR_WANTED(&covered_rootattr, va_create_time);
	VATTR_WANTED(&covered_rootattr, va_modify_time);
	VATTR_WANTED(&covered_rootattr, va_access_time);

	/* prefer to get this from the lower root vp, but if not (i.e. forced unmount
	 * of lower fs) try the mount point covered vnode */
	if (vnode_getwithvid(checkvp, null_mp->nullm_lowerrootvid)) {
		checkvp = vfs_vnodecovered(mp);
		if (checkvp == NULL) {
			return EIO;
		}
	}

	int error = vnode_getattr(checkvp, &covered_rootattr, args->a_context);

	vnode_put(checkvp);
	if (error) {
		/* we should have been able to get attributes fore one of the two choices so
		 * fail if we didn't */
		return error;
	}

	/* we got the attributes of the vnode we cover so plow ahead */
	if (args->a_vp == null_mp->nullm_secondvp) {
		ino = NULL_SECOND_INO;
	}

	VATTR_RETURN(args->a_vap, va_type, vnode_vtype(args->a_vp));
	VATTR_RETURN(args->a_vap, va_rdev, 0);
	VATTR_RETURN(args->a_vap, va_nlink, 3);      /* always just ., .., and the child */
	VATTR_RETURN(args->a_vap, va_total_size, 0); // hoping this is ok

	VATTR_RETURN(args->a_vap, va_data_size, 0); // hoping this is ok
	VATTR_RETURN(args->a_vap, va_data_alloc, 0);
	VATTR_RETURN(args->a_vap, va_iosize, vfs_statfs(mp)->f_iosize);
	VATTR_RETURN(args->a_vap, va_fileid, ino);
	VATTR_RETURN(args->a_vap, va_linkid, ino);
	VATTR_RETURN(args->a_vap, va_fsid, vfs_statfs(mp)->f_fsid.val[0]); // return the fsid of the mount point
	VATTR_RETURN(args->a_vap, va_filerev, 0);
	VATTR_RETURN(args->a_vap, va_gen, 0);
	VATTR_RETURN(args->a_vap, va_flags, UF_HIDDEN); /* mark our fake directories as hidden. People
	                                                   shouldn't be enocouraged to poke around in them */

	if (ino == NULL_SECOND_INO) {
		VATTR_RETURN(args->a_vap, va_parentid, NULL_ROOT_INO); /* no parent at the root, so
		                                                          the only other vnode that
		                                                          goes through this path is
		                                                          second and its parent is
		                                                          1.*/
	}

	if (VATTR_IS_ACTIVE(args->a_vap, va_mode)) {
		/* force dr_xr_xr_x */
		VATTR_RETURN(args->a_vap, va_mode, S_IFDIR | S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
	}
	if (VATTR_IS_ACTIVE(args->a_vap, va_uid)) {
		VATTR_RETURN(args->a_vap, va_uid, covered_rootattr.va_uid);
	}
	if (VATTR_IS_ACTIVE(args->a_vap, va_gid)) {
		VATTR_RETURN(args->a_vap, va_gid, covered_rootattr.va_gid);
	}

	if (VATTR_IS_ACTIVE(args->a_vap, va_create_time)) {
		VATTR_SET_SUPPORTED(args->a_vap, va_create_time);
		args->a_vap->va_create_time.tv_sec  = covered_rootattr.va_create_time.tv_sec;
		args->a_vap->va_create_time.tv_nsec = covered_rootattr.va_create_time.tv_nsec;
	}
	if (VATTR_IS_ACTIVE(args->a_vap, va_modify_time)) {
		VATTR_SET_SUPPORTED(args->a_vap, va_modify_time);
		args->a_vap->va_modify_time.tv_sec  = covered_rootattr.va_modify_time.tv_sec;
		args->a_vap->va_modify_time.tv_nsec = covered_rootattr.va_modify_time.tv_nsec;
	}
	if (VATTR_IS_ACTIVE(args->a_vap, va_access_time)) {
		VATTR_SET_SUPPORTED(args->a_vap, va_access_time);
		args->a_vap->va_modify_time.tv_sec  = covered_rootattr.va_access_time.tv_sec;
		args->a_vap->va_modify_time.tv_nsec = covered_rootattr.va_access_time.tv_nsec;
	}

	return 0;
}
Esempio n. 17
0
/*
 * Lookup/Create an extended attribute entry.
 *
 * Input arguments:
 *	dzp	- znode for hidden attribute directory
 *	name	- name of attribute
 *	flag	- ZNEW: if the entry already exists, fail with EEXIST.
 *		  ZEXISTS: if the entry does not exist, fail with ENOENT.
 *
 * Output arguments:
 *	vpp	- pointer to the vnode for the entry (NULL if there isn't one)
 *
 * Return value: 0 on success or errno value on failure.
 */
int
zfs_obtain_xattr(znode_t *dzp, const char *name, mode_t mode, cred_t *cr,
                 vnode_t **vpp, int flag)
{
	znode_t  *xzp = NULL;
	zfsvfs_t  *zfsvfs = dzp->z_zfsvfs;
	zilog_t  *zilog;
	zfs_dirlock_t  *dl;
	dmu_tx_t  *tx;
	struct vnode_attr  vattr;
	int error;
	struct componentname cn;
	zfs_acl_ids_t	acl_ids;

	/* zfs_dirent_lock() expects a component name */
	bzero(&cn, sizeof (cn));
	cn.cn_nameiop = LOOKUP;
	cn.cn_flags = ISLASTCN;
	cn.cn_nameptr = (char *)name;
	cn.cn_namelen = strlen(name);

    ZFS_ENTER(zfsvfs);
    ZFS_VERIFY_ZP(dzp);
    zilog = zfsvfs->z_log;

	VATTR_INIT(&vattr);
	VATTR_SET(&vattr, va_type, VREG);
	VATTR_SET(&vattr, va_mode, mode & ~S_IFMT);

	if ((error = zfs_acl_ids_create(dzp, 0,
                                    &vattr, cr, NULL, &acl_ids)) != 0) {
		ZFS_EXIT(zfsvfs);
		return (error);
	}
 top:
	/* Lock the attribute entry name. */
	if ( (error = zfs_dirent_lock(&dl, dzp, (char *)name, &xzp, flag,
                                  NULL, &cn)) ) {
		goto out;
	}
	/* If the name already exists, we're done. */
	if (xzp != NULL) {
		zfs_dirent_unlock(dl);
		goto out;
	}
	tx = dmu_tx_create(zfsvfs->z_os);
	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
	//dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
	//dmu_tx_hold_bonus(tx, dzp->z_id);
	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, (char *)name);

#if 1 // FIXME
	if (dzp->z_pflags & ZFS_INHERIT_ACE) {
		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE);
	}
#endif
    zfs_sa_upgrade_txholds(tx, dzp);
	error = dmu_tx_assign(tx, TXG_NOWAIT);
	if (error) {
		zfs_dirent_unlock(dl);
		if (error == ERESTART) {
			dmu_tx_wait(tx);
			dmu_tx_abort(tx);
			goto top;
		}
		dmu_tx_abort(tx);
		goto out;
	}

	zfs_mknode(dzp, &vattr, tx, cr, 0, &xzp, &acl_ids);

    /*
      ASSERT(xzp->z_id == zoid);
    */
	(void) zfs_link_create(dl, xzp, tx, ZNEW);
	zfs_log_create(zilog, tx, TX_CREATE, dzp, xzp, (char *)name,
                   NULL /* vsecp */, 0 /*acl_ids.z_fuidp*/, &vattr);
    zfs_acl_ids_free(&acl_ids);
	dmu_tx_commit(tx);
    zfs_znode_wait_vnode(xzp);

	zfs_dirent_unlock(dl);
 out:
	if (error == EEXIST)
		error = ENOATTR;
	if (xzp)
		*vpp = ZTOV(xzp);

    ZFS_EXIT(zfsvfs);
	return (error);
}
Esempio n. 18
0
struct kern_direct_file_io_ref_t *
kern_open_file_for_direct_io(const char * name, 
			     kern_get_file_extents_callback_t callback, 
			     void * callback_ref,
			     dev_t * partition_device_result,
			     dev_t * image_device_result,
                             uint64_t * partitionbase_result,
                             uint64_t * maxiocount_result,
                             uint32_t * oflags,
                             off_t offset,
                             caddr_t addr,
                             vm_size_t len)
{
    struct kern_direct_file_io_ref_t * ref;

    proc_t			p;
    struct vnode_attr		va;
    int				error;
    off_t			f_offset;
    uint64_t                    fileblk;
    size_t                      filechunk;
    uint64_t                    physoffset;
    dev_t			device;
    dev_t			target = 0;
    int			        isssd = 0;
    uint32_t                    flags = 0;
    uint32_t			blksize;
    off_t 			maxiocount, count;
    boolean_t                   locked = FALSE;

    int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
    void * p1 = NULL;
    void * p2 = NULL;

    error = EFAULT;

    ref = (struct kern_direct_file_io_ref_t *) kalloc(sizeof(struct kern_direct_file_io_ref_t));
    if (!ref)
    {
	error = EFAULT;
    	goto out;
    }

    bzero(ref, sizeof(*ref));
    p = kernproc;
    ref->ctx = vfs_context_create(vfs_context_current());

    if ((error = vnode_open(name, (O_CREAT | FWRITE), (0), 0, &ref->vp, ref->ctx)))
        goto out;

    if (addr && len)
    {
	if ((error = kern_write_file(ref, offset, addr, len)))
	    goto out;
    }

    VATTR_INIT(&va);
    VATTR_WANTED(&va, va_rdev);
    VATTR_WANTED(&va, va_fsid);
    VATTR_WANTED(&va, va_data_size);
    VATTR_WANTED(&va, va_nlink);
    error = EFAULT;
    if (vnode_getattr(ref->vp, &va, ref->ctx))
    	goto out;

    kprintf("vp va_rdev major %d minor %d\n", major(va.va_rdev), minor(va.va_rdev));
    kprintf("vp va_fsid major %d minor %d\n", major(va.va_fsid), minor(va.va_fsid));
    kprintf("vp size %qd\n", va.va_data_size);

    if (ref->vp->v_type == VREG)
    {
	/* Don't dump files with links. */
	if (va.va_nlink != 1)
	    goto out;

        device = va.va_fsid;
        p1 = &device;
        p2 = p;
        do_ioctl = &file_ioctl;
    }
    else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
    {
	/* Partition. */
        device = va.va_rdev;

        p1 = ref->vp;
        p2 = ref->ctx;
        do_ioctl = &device_ioctl;
    }
    else
    {
	/* Don't dump to non-regular files. */
	error = EFAULT;
        goto out;
    }
    ref->device = device;

    // get block size

    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize);
    if (error)
        goto out;

    if (ref->vp->v_type == VREG)
        ref->filelength = va.va_data_size;
    else
    {
        error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk);
        if (error)
            goto out;
	ref->filelength = fileblk * ref->blksize;    
    }

    // pin logical extents

    error = kern_ioctl_file_extents(ref, _DKIOCCSPINEXTENT, 0, ref->filelength);
    if (error && (ENOTTY != error)) goto out;
    ref->pinned = (error == 0);

    // generate the block list

    error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL);
    if (error)
        goto out;
    locked = TRUE;

    f_offset = 0;
    while (f_offset < ref->filelength) 
    {
        if (ref->vp->v_type == VREG)
        {
            filechunk = 1*1024*1024*1024;
            daddr64_t blkno;

            error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno, &filechunk, NULL, 0, NULL);
            if (error)
                goto out;

            fileblk = blkno * ref->blksize;
        }
        else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
        {
            fileblk = f_offset;
            filechunk = f_offset ? 0 : ref->filelength;
        }

        physoffset = 0;
        while (physoffset < filechunk)
        {
            dk_physical_extent_t getphysreq;
            bzero(&getphysreq, sizeof(getphysreq));

            getphysreq.offset = fileblk + physoffset;
            getphysreq.length = (filechunk - physoffset);
            error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq);
            if (error)
                goto out;
            if (!target)
            {
                target = getphysreq.dev;
            }
            else if (target != getphysreq.dev)
            {
                error = ENOTSUP;
                goto out;
            }
            callback(callback_ref, getphysreq.offset, getphysreq.length);
            physoffset += getphysreq.length;
        }
        f_offset += filechunk;
    }
    callback(callback_ref, 0ULL, 0ULL);

    if (ref->vp->v_type == VREG)
        p1 = &target;

    // get partition base

    error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result);
    if (error)
        goto out;

    // get block size & constraints

    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize);
    if (error)
        goto out;

    maxiocount = 1*1024*1024*1024;

    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t) &count);
    if (error)
        count = 0;
    count *= blksize;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t) &count);
    if (error)
        count = 0;
    count *= blksize;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTREAD, (caddr_t) &count);
    if (error)
        count = 0;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t) &count);
    if (error)
        count = 0;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count);
    if (error)
        count = 0;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count);
    if (error)
        count = 0;
    if (count && (count < maxiocount))
        maxiocount = count;

    kprintf("max io 0x%qx bytes\n", maxiocount);
    if (maxiocount_result)
        *maxiocount_result = maxiocount;

    error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd);
    if (!error && isssd)
        flags |= kIOHibernateOptionSSD;

    if (partition_device_result)
        *partition_device_result = device;
    if (image_device_result)
        *image_device_result = target;
    if (flags)
        *oflags = flags;

out:
    kprintf("kern_open_file_for_direct_io(%d)\n", error);

    if (error && locked)
    {
        p1 = &device;
        (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL);
    }

    if (error && ref)
    {
	if (ref->vp)
	{
	    vnode_close(ref->vp, FWRITE, ref->ctx);
	    ref->vp = NULLVP;
	}
	vfs_context_rele(ref->ctx);
	kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
	ref = NULL;
    }
    return(ref);
}
Esempio n. 19
0
/*
 * shared_region_map_np()
 *
 * This system call is intended for dyld.
 *
 * dyld uses this to map a shared cache file into a shared region.
 * This is usually done only the first time a shared cache is needed.
 * Subsequent processes will just use the populated shared region without
 * requiring any further setup.
 */
int
shared_region_map_np(
	struct proc				*p,
	struct shared_region_map_np_args	*uap,
	__unused int				*retvalp)
{
	int				error;
	kern_return_t			kr;
	int				fd;
	struct fileproc			*fp;
	struct vnode			*vp, *root_vp;
	struct vnode_attr		va;
	off_t				fs;
	memory_object_size_t		file_size;
	user_addr_t			user_mappings;
	struct shared_file_mapping_np	*mappings;
#define SFM_MAX_STACK	8
	struct shared_file_mapping_np	stack_mappings[SFM_MAX_STACK];
	unsigned int			mappings_count;
	vm_size_t			mappings_size;
	memory_object_control_t		file_control;
	struct vm_shared_region		*shared_region;

	SHARED_REGION_TRACE_DEBUG(
		("shared_region: %p [%d(%s)] -> map\n",
		 current_thread(), p->p_pid, p->p_comm));

	shared_region = NULL;
	mappings_count = 0;
	mappings_size = 0;
	mappings = NULL;
	fp = NULL;
	vp = NULL;

	/* get file descriptor for shared region cache file */
	fd = uap->fd;

	/* get file structure from file descriptor */
	error = fp_lookup(p, fd, &fp, 0);
	if (error) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map: "
			 "fd=%d lookup failed (error=%d)\n",
			 current_thread(), p->p_pid, p->p_comm, fd, error));
		goto done;
	}

	/* make sure we're attempting to map a vnode */
	if (fp->f_fglob->fg_type != DTYPE_VNODE) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map: "
			 "fd=%d not a vnode (type=%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 fd, fp->f_fglob->fg_type));
		error = EINVAL;
		goto done;
	}

	/* we need at least read permission on the file */
	if (! (fp->f_fglob->fg_flag & FREAD)) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map: "
			 "fd=%d not readable\n",
			 current_thread(), p->p_pid, p->p_comm, fd));
		error = EPERM;
		goto done;
	}

	/* get vnode from file structure */
	error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
	if (error) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map: "
			 "fd=%d getwithref failed (error=%d)\n",
			 current_thread(), p->p_pid, p->p_comm, fd, error));
		goto done;
	}
	vp = (struct vnode *) fp->f_fglob->fg_data;

	/* make sure the vnode is a regular file */
	if (vp->v_type != VREG) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "not a file (type=%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, vp->v_type));
		error = EINVAL;
		goto done;
	}

	/* make sure vnode is on the process's root volume */
	root_vp = p->p_fd->fd_rdir;
	if (root_vp == NULL) {
		root_vp = rootvnode;
	}
	if (vp->v_mount != root_vp->v_mount) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "not on process's root volume\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name));
		error = EPERM;
		goto done;
	}

	/* make sure vnode is owned by "root" */
	VATTR_INIT(&va);
	VATTR_WANTED(&va, va_uid);
	error = vnode_getattr(vp, &va, vfs_context_current());
	if (error) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "vnode_getattr(%p) failed (error=%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, vp, error));
		goto done;
	}
	if (va.va_uid != 0) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "owned by uid=%d instead of 0\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, va.va_uid));
		error = EPERM;
		goto done;
	}

	/* get vnode size */
	error = vnode_size(vp, &fs, vfs_context_current());
	if (error) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "vnode_size(%p) failed (error=%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, vp, error));
		goto done;
	}
	file_size = fs;

	/* get the file's memory object handle */
	file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
	if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "no memory object\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name));
		error = EINVAL;
		goto done;
	}
			 
	/* get the list of mappings the caller wants us to establish */
	mappings_count = uap->count;	/* number of mappings */
	mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
	if (mappings_count == 0) {
		SHARED_REGION_TRACE_INFO(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "no mappings\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name));
		error = 0;	/* no mappings: we're done ! */
		goto done;
	} else if (mappings_count <= SFM_MAX_STACK) {
		mappings = &stack_mappings[0];
	} else {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "too many mappings (%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, mappings_count));
		error = EINVAL;
		goto done;
	}

	user_mappings = uap->mappings;	/* the mappings, in user space */
	error = copyin(user_mappings,
		       mappings,
		       mappings_size);
	if (error) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "copyin(0x%llx, %d) failed (error=%d)\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error));
		goto done;
	}

	/* get the process's shared region (setup in vm_map_exec()) */
	shared_region = vm_shared_region_get(current_task());
	if (shared_region == NULL) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "no shared region\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name));
		goto done;
	}

	/* map the file into that shared region's submap */
	kr = vm_shared_region_map_file(shared_region,
				       mappings_count,
				       mappings,
				       file_control,
				       file_size,
				       (void *) p->p_fd->fd_rdir);
	if (kr != KERN_SUCCESS) {
		SHARED_REGION_TRACE_ERROR(
			("shared_region: %p [%d(%s)] map(%p:'%s'): "
			 "vm_shared_region_map_file() failed kr=0x%x\n",
			 current_thread(), p->p_pid, p->p_comm,
			 vp, vp->v_name, kr));
		switch (kr) {
		case KERN_INVALID_ADDRESS:
			error = EFAULT;
			break;
		case KERN_PROTECTION_FAILURE:
			error = EPERM;
			break;
		case KERN_NO_SPACE:
			error = ENOMEM;
			break;
		case KERN_FAILURE:
		case KERN_INVALID_ARGUMENT:
		default:
			error = EINVAL;
			break;
		}
		goto done;
	}

	/*
	 * The mapping was successful.  Let the buffer cache know
	 * that we've mapped that file with these protections.  This
	 * prevents the vnode from getting recycled while it's mapped.
	 */
	(void) ubc_map(vp, VM_PROT_READ);
	error = 0;

	/* update the vnode's access time */
	if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
		VATTR_INIT(&va);
		nanotime(&va.va_access_time);
		VATTR_SET_ACTIVE(&va, va_access_time);
		vnode_setattr(vp, &va, vfs_context_current());
	}

	if (p->p_flag & P_NOSHLIB) {
		/* signal that this process is now using split libraries */
		OSBitAndAtomic(~((uint32_t)P_NOSHLIB), (UInt32 *)&p->p_flag);
	}

done:
	if (vp != NULL) {
		/*
		 * release the vnode...
		 * ubc_map() still holds it for us in the non-error case
		 */
		(void) vnode_put(vp);
		vp = NULL;
	}
	if (fp != NULL) {
		/* release the file descriptor */
		fp_drop(p, fd, fp, 0);
		fp = NULL;
	}

	if (shared_region != NULL) {
		vm_shared_region_deallocate(shared_region);
	}

	SHARED_REGION_TRACE_DEBUG(
		("shared_region: %p [%d(%s)] <- map\n",
		 current_thread(), p->p_pid, p->p_comm));

	return error;
}
Esempio n. 20
0
IOReturn FileNVRAM::read_buffer(char** aBuffer, uint64_t* aLength, vfs_context_t aCtx)
{
	IOReturn error = 0;

	struct vnode * vp;
	struct vnode_attr va;

	if (aCtx)
	{
		if ((error = vnode_open(FILE_NVRAM_PATH, (O_RDONLY | FREAD | O_NOFOLLOW), S_IRUSR, VNODE_LOOKUP_NOFOLLOW, &vp, aCtx)))
		{
			printf("failed opening vnode at path %s, errno %d\n", FILE_NVRAM_PATH, error);
			
			return error;
		}
		else
		{
			if ((error = vnode_isreg(vp)) == VREG)
			{
				VATTR_INIT(&va);
				VATTR_WANTED(&va, va_data_size);	/* size in bytes of the fork managed by current vnode */

				// Determine size of vnode
				if ((error = vnode_getattr(vp, &va, aCtx)))
				{
					printf("FileNVRAM.kext: Error, failed to determine file size of %s, errno %d.\n", FILE_NVRAM_PATH, error);
				}
				else
				{
					if (aLength)
					{
						*aLength = va.va_data_size;
					}
					
					*aBuffer = (char *)IOMalloc((size_t)va.va_data_size);
					int len = (int)va.va_data_size;
					
					if ((error = vn_rdwr(UIO_READ, vp, *aBuffer, len, 0, UIO_SYSSPACE, IO_NOCACHE|IO_NODELOCKED|IO_UNIT, vfs_context_ucred(aCtx), (int *) 0, vfs_context_proc(aCtx))))
					{
						printf("FileNVRAM.kext: Error, writing to vnode(%s) failed with error %d!\n", FILE_NVRAM_PATH, error);
					}
				}
				
				if ((error = vnode_close(vp, 0, aCtx)))
				{
					printf("FileNVRAM.kext: Error, vnode_close(%s) failed with error %d!\n", FILE_NVRAM_PATH, error);
				}
			}
			else
			{
				printf("FileNVRAM.kext: Error, vnode_isreg(%s) failed with error %d!\n", FILE_NVRAM_PATH, error);
			}
		}
	}
	else
	{
		printf("FileNVRAM.kext: aCtx == NULL!\n");
		error = 0xFFFF; // EINVAL;
	}
	
	return error;
}
Esempio n. 21
0
/* question: does afs_create need to set CDirty in the adp or the avc?
 * I think we can get away without it, but I'm not sure.  Note that
 * afs_setattr is called in here for truncation.
 */
#ifdef AFS_SGI64_ENV
int
afs_create(OSI_VC_DECL(adp), char *aname, struct vattr *attrs, int flags,
	   int amode, struct vcache **avcp, afs_ucred_t *acred)
#else /* AFS_SGI64_ENV */
int
afs_create(OSI_VC_DECL(adp), char *aname, struct vattr *attrs,
	   enum vcexcl aexcl, int amode, struct vcache **avcp,
	   afs_ucred_t *acred)
#endif				/* AFS_SGI64_ENV */
{
    afs_int32 origCBs, origZaps, finalZaps;
    struct vrequest *treq = NULL;
    afs_int32 code;
    struct afs_conn *tc;
    struct VenusFid newFid;
    struct AFSStoreStatus InStatus;
    struct AFSFetchStatus *OutFidStatus, *OutDirStatus;
    struct AFSVolSync tsync;
    struct AFSCallBack CallBack;
    afs_int32 now;
    struct dcache *tdc;
    afs_size_t offset, len;
    struct server *hostp = 0;
    struct vcache *tvc;
    struct volume *volp = 0;
    struct afs_fakestat_state fakestate;
    struct rx_connection *rxconn;
    XSTATS_DECLS;
    OSI_VC_CONVERT(adp);

    AFS_STATCNT(afs_create);

    OutFidStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus));
    OutDirStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus));
    memset(&InStatus, 0, sizeof(InStatus));

    if ((code = afs_CreateReq(&treq, acred)))
	goto done2;

    afs_Trace3(afs_iclSetp, CM_TRACE_CREATE, ICL_TYPE_POINTER, adp,
	       ICL_TYPE_STRING, aname, ICL_TYPE_INT32, amode);

    afs_InitFakeStat(&fakestate);

#ifdef AFS_SGI65_ENV
    /* If avcp is passed not null, it's the old reference to this file.
     * We can use this to avoid create races. For now, just decrement
     * the reference count on it.
     */
    if (*avcp) {
	AFS_RELE(AFSTOV(*avcp));
	*avcp = NULL;
    }
#endif

    if (strlen(aname) > AFSNAMEMAX) {
	code = ENAMETOOLONG;
	goto done3;
    }

    if (!afs_ENameOK(aname)) {
	code = EINVAL;
	goto done3;
    }
    switch (attrs->va_type) {
    case VBLK:
    case VCHR:
#if	!defined(AFS_SUN5_ENV)
    case VSOCK:
#endif
    case VFIFO:
	/* We don't support special devices or FIFOs */
	code = EINVAL;
	goto done3;
    default:
	;
    }
    AFS_DISCON_LOCK();

    code = afs_EvalFakeStat(&adp, &fakestate, treq);
    if (code)
	goto done;
  tagain:
    code = afs_VerifyVCache(adp, treq);
    if (code)
	goto done;

    /** If the volume is read-only, return error without making an RPC to the
      * fileserver
      */
    if (adp->f.states & CRO) {
	code = EROFS;
	goto done;
    }

    if (AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW) {
        code = ENETDOWN;
        goto done;
    }

    tdc = afs_GetDCache(adp, (afs_size_t) 0, treq, &offset, &len, 1);
    ObtainWriteLock(&adp->lock, 135);
    if (tdc)
	ObtainSharedLock(&tdc->lock, 630);

    /*
     * Make sure that the data in the cache is current. We may have
     * received a callback while we were waiting for the write lock.
     */
    if (!(adp->f.states & CStatd)
	|| (tdc && !hsame(adp->f.m.DataVersion, tdc->f.versionNo))) {
	ReleaseWriteLock(&adp->lock);
	if (tdc) {
	    ReleaseSharedLock(&tdc->lock);
	    afs_PutDCache(tdc);
	}
	goto tagain;
    }
    if (tdc) {
	/* see if file already exists.  If it does, we only set 
	 * the size attributes (to handle O_TRUNC) */
	code = afs_dir_Lookup(tdc, aname, &newFid.Fid);	/* use dnlc first xxx */
	if (code == 0) {
	    ReleaseSharedLock(&tdc->lock);
	    afs_PutDCache(tdc);
	    ReleaseWriteLock(&adp->lock);
#ifdef AFS_SGI64_ENV
	    if (flags & VEXCL) {
#else
	    if (aexcl != NONEXCL) {
#endif
		code = EEXIST;	/* file exists in excl mode open */
		goto done;
	    }
	    /* found the file, so use it */
	    newFid.Cell = adp->f.fid.Cell;
	    newFid.Fid.Volume = adp->f.fid.Fid.Volume;
	    tvc = NULL;
	    if (newFid.Fid.Unique == 0) {
		tvc = afs_LookupVCache(&newFid, treq, NULL, adp, aname);
	    }
	    if (!tvc)		/* lookup failed or wasn't called */
		tvc = afs_GetVCache(&newFid, treq, NULL, NULL);

	    if (tvc) {
		/* if the thing exists, we need the right access to open it.
		 * we must check that here, since no other checks are
		 * made by the open system call */
		len = attrs->va_size;	/* only do the truncate */
		/*
		 * We used to check always for READ access before; the
		 * problem is that we will fail if the existing file
		 * has mode -w-w-w, which is wrong.
		 */
		if ((amode & VREAD)
		    && !afs_AccessOK(tvc, PRSFS_READ, treq, CHECK_MODE_BITS)) {
		    afs_PutVCache(tvc);
		    code = EACCES;
		    goto done;
		}
#if defined(AFS_DARWIN80_ENV)
		if ((amode & VWRITE) || VATTR_IS_ACTIVE(attrs, va_data_size))
#elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
		if ((amode & VWRITE) || (attrs->va_mask & AT_SIZE))
#else
		if ((amode & VWRITE) || len != 0xffffffff)
#endif
		{
		    /* needed for write access check */
		    tvc->f.parent.vnode = adp->f.fid.Fid.Vnode;
		    tvc->f.parent.unique = adp->f.fid.Fid.Unique;
		    /* need write mode for these guys */
		    if (!afs_AccessOK
			(tvc, PRSFS_WRITE, treq, CHECK_MODE_BITS)) {
			afs_PutVCache(tvc);
			code = EACCES;
			goto done;
		    }
		}
#if defined(AFS_DARWIN80_ENV)
		if (VATTR_IS_ACTIVE(attrs, va_data_size))
#elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
		if (attrs->va_mask & AT_SIZE)
#else
		if (len != 0xffffffff)
#endif
		{
		    if (vType(tvc) != VREG) {
			afs_PutVCache(tvc);
			code = EISDIR;
			goto done;
		    }
		    /* do a truncate */
#if defined(AFS_DARWIN80_ENV)
		    VATTR_INIT(attrs);
		    VATTR_SET_SUPPORTED(attrs, va_data_size);
		    VATTR_SET_ACTIVE(attrs, va_data_size);
#elif defined(UKERNEL)
		    attrs->va_mask = ATTR_SIZE;
#elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
		    attrs->va_mask = AT_SIZE;
#else
		    VATTR_NULL(attrs);
#endif
		    attrs->va_size = len;
		    ObtainWriteLock(&tvc->lock, 136);
		    tvc->f.states |= CCreating;
		    ReleaseWriteLock(&tvc->lock);
#if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
#if defined(AFS_SGI64_ENV)
		    code =
			afs_setattr(VNODE_TO_FIRST_BHV((vnode_t *) tvc),
				    attrs, 0, acred);
#else
		    code = afs_setattr(tvc, attrs, 0, acred);
#endif /* AFS_SGI64_ENV */
#else /* SUN5 || SGI */
		    code = afs_setattr(tvc, attrs, acred);
#endif /* SUN5 || SGI */
		    ObtainWriteLock(&tvc->lock, 137);
		    tvc->f.states &= ~CCreating;
		    ReleaseWriteLock(&tvc->lock);
		    if (code) {
			afs_PutVCache(tvc);
			goto done;
		    }
		}
		*avcp = tvc;
	    } else
		code = ENOENT;	/* shouldn't get here */
	    /* make sure vrefCount bumped only if code == 0 */
	    goto done;
	}
    }
    
    /* if we create the file, we don't do any access checks, since
     * that's how O_CREAT is supposed to work */
    if (adp->f.states & CForeign) {
	origCBs = afs_allCBs;
	origZaps = afs_allZaps;
    } else {
	origCBs = afs_evenCBs;	/* if changes, we don't really have a callback */
	origZaps = afs_evenZaps;	/* number of even numbered vnodes discarded */
    }
    InStatus.Mask = AFS_SETMODTIME | AFS_SETMODE | AFS_SETGROUP;
    InStatus.ClientModTime = osi_Time();
    InStatus.Group = (afs_int32) afs_cr_gid(acred);
    if (AFS_NFSXLATORREQ(acred)) {
	/*
	 * XXX The following is mainly used to fix a bug in the HP-UX
	 * nfs client where they create files with mode of 0 without
	 * doing any setattr later on to fix it.  * XXX
	 */
#if	defined(AFS_AIX_ENV)
	if (attrs->va_mode != -1) {
#else
#if	defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
	if (attrs->va_mask & AT_MODE) {
#else
	if (attrs->va_mode != ((unsigned short)-1)) {
#endif
#endif
	    if (!attrs->va_mode)
		attrs->va_mode = 0x1b6;	/* XXX default mode: rw-rw-rw XXX */
	}
    }

    if (!AFS_IS_DISCONNECTED) {
	/* If not disconnected, connect to the server.*/

    	InStatus.UnixModeBits = attrs->va_mode & 0xffff;	/* only care about protection bits */
    	do {
	    tc = afs_Conn(&adp->f.fid, treq, SHARED_LOCK, &rxconn);
	    if (tc) {
	    	hostp = tc->srvr->server;	/* remember for callback processing */
	    	now = osi_Time();
	    	XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_CREATEFILE);
	    	RX_AFS_GUNLOCK();
	    	code =
		    RXAFS_CreateFile(rxconn, (struct AFSFid *)&adp->f.fid.Fid,
				 aname, &InStatus, (struct AFSFid *)
				 &newFid.Fid, OutFidStatus, OutDirStatus,
				 &CallBack, &tsync);
	    	RX_AFS_GLOCK();
	    	XSTATS_END_TIME;
	    	CallBack.ExpirationTime += now;
	    } else
	    	code = -1;
    	} while (afs_Analyze
	         (tc, rxconn, code, &adp->f.fid, treq, AFS_STATS_FS_RPCIDX_CREATEFILE,
	          SHARED_LOCK, NULL));

	if ((code == EEXIST || code == UAEEXIST) &&
#ifdef AFS_SGI64_ENV
    	!(flags & VEXCL)
#else /* AFS_SGI64_ENV */
    	aexcl == NONEXCL
#endif
    	) {
	    /* if we get an EEXIST in nonexcl mode, just do a lookup */
	    if (tdc) {
	    	ReleaseSharedLock(&tdc->lock);
	    	afs_PutDCache(tdc);
	    }
	    ReleaseWriteLock(&adp->lock);


#if defined(AFS_SGI64_ENV)
	    code = afs_lookup(VNODE_TO_FIRST_BHV((vnode_t *) adp), aname, avcp,
				  NULL, 0, NULL, acred);
#elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
	    code = afs_lookup(adp, aname, avcp, NULL, 0, NULL, acred);
#elif defined(UKERNEL)
	    code = afs_lookup(adp, aname, avcp, acred, 0);
#elif !defined(AFS_DARWIN_ENV)
	    code = afs_lookup(adp, aname, avcp, acred);
#endif
	goto done;
        }

	if (code) {
	    if (code < 0) {
	    	ObtainWriteLock(&afs_xcbhash, 488);
	    	afs_DequeueCallback(adp);
	    	adp->f.states &= ~CStatd;
	    	ReleaseWriteLock(&afs_xcbhash);
	    	osi_dnlc_purgedp(adp);
	    }
	    ReleaseWriteLock(&adp->lock);
	    if (tdc) {
	    	ReleaseSharedLock(&tdc->lock);
	    	afs_PutDCache(tdc);
	    }
	goto done;
	}

    } else {
	/* Generate a fake FID for disconnected mode. */
	newFid.Cell = adp->f.fid.Cell;
	newFid.Fid.Volume = adp->f.fid.Fid.Volume;
	afs_GenFakeFid(&newFid, VREG, 1);
    }				/* if (!AFS_IS_DISCON_RW) */

    /* otherwise, we should see if we can make the change to the dir locally */
    if (tdc)
	UpgradeSToWLock(&tdc->lock, 631);
    if (AFS_IS_DISCON_RW || afs_LocalHero(adp, tdc, OutDirStatus, 1)) {
	/* we can do it locally */
	ObtainWriteLock(&afs_xdcache, 291);
	code = afs_dir_Create(tdc, aname, &newFid.Fid);
	ReleaseWriteLock(&afs_xdcache);
	if (code) {
	    ZapDCE(tdc);
	    DZap(tdc);
	}
    }
    if (tdc) {
	ReleaseWriteLock(&tdc->lock);
	afs_PutDCache(tdc);
    }
    if (AFS_IS_DISCON_RW)
	adp->f.m.LinkCount++;

    newFid.Cell = adp->f.fid.Cell;
    newFid.Fid.Volume = adp->f.fid.Fid.Volume;
    ReleaseWriteLock(&adp->lock);
    volp = afs_FindVolume(&newFid, READ_LOCK);

    /* New tricky optimistic callback handling algorithm for file creation works
     * as follows.  We create the file essentially with no locks set at all.  File
     * server may thus handle operations from others cache managers as well as from
     * this very own cache manager that reference the file in question before
     * we managed to create the cache entry.  However, if anyone else changes
     * any of the status information for a file, we'll see afs_evenCBs increase
     * (files always have even fids).  If someone on this workstation manages
     * to do something to the file, they'll end up having to create a cache
     * entry for the new file.  Either we'll find it once we've got the afs_xvcache
     * lock set, or it was also *deleted* the vnode before we got there, in which case
     * we will find evenZaps has changed, too.  Thus, we only assume we have the right
     * status information if no callbacks or vnode removals have occurred to even
     * numbered files from the time the call started until the time that we got the xvcache
     * lock set.  Of course, this also assumes that any call that modifies a file first
     * gets a write lock on the file's vnode, but if that weren't true, the whole cache manager
     * would fail, since no call would be able to update the local vnode status after modifying
     * a file on a file server. */
    ObtainWriteLock(&afs_xvcache, 138);
    if (adp->f.states & CForeign)
	finalZaps = afs_allZaps;	/* do this before calling newvcache */
    else
	finalZaps = afs_evenZaps;	/* do this before calling newvcache */
    /* don't need to call RemoveVCB, since only path leaving a callback is the
     * one where we pass through afs_NewVCache.  Can't have queued a VCB unless
     * we created and freed an entry between file creation time and here, and the
     * freeing of the vnode will change evenZaps.  Don't need to update the VLRU
     * queue, since the find will only succeed in the event of a create race, and 
     * then the vcache will be at the front of the VLRU queue anyway...  */
    if (!(tvc = afs_FindVCache(&newFid, 0, DO_STATS))) {
	tvc = afs_NewVCache(&newFid, hostp);
	if (tvc) {
	    int finalCBs;
	    ObtainWriteLock(&tvc->lock, 139);

	    ObtainWriteLock(&afs_xcbhash, 489);
	    finalCBs = afs_evenCBs;
	    /* add the callback in */
	    if (adp->f.states & CForeign) {
		tvc->f.states |= CForeign;
		finalCBs = afs_allCBs;
	    }
	    if (origCBs == finalCBs && origZaps == finalZaps) {
		tvc->f.states |= CStatd;	/* we've fake entire thing, so don't stat */
		tvc->f.states &= ~CBulkFetching;
		if (!AFS_IS_DISCON_RW) {
		    tvc->cbExpires = CallBack.ExpirationTime;
		    afs_QueueCallback(tvc, CBHash(CallBack.ExpirationTime), volp);
		}
	    } else {
		afs_DequeueCallback(tvc);
		tvc->f.states &= ~(CStatd | CUnique);
		tvc->callback = 0;
		if (tvc->f.fid.Fid.Vnode & 1 || (vType(tvc) == VDIR))
		    osi_dnlc_purgedp(tvc);
	    }
	    ReleaseWriteLock(&afs_xcbhash);
	    if (AFS_IS_DISCON_RW) {
		afs_DisconAddDirty(tvc, VDisconCreate, 0);
		afs_GenDisconStatus(adp, tvc, &newFid, attrs, treq, VREG);
	    } else {
		afs_ProcessFS(tvc, OutFidStatus, treq);
	    }

	    tvc->f.parent.vnode = adp->f.fid.Fid.Vnode;
	    tvc->f.parent.unique = adp->f.fid.Fid.Unique;
#if !defined(UKERNEL)
            if (volp && (volp->states & VPartVisible))
                tvc->f.states |= CPartVisible;
#endif
	    ReleaseWriteLock(&tvc->lock);
	    *avcp = tvc;
	    code = 0;
	} else
	    code = ENOENT;
    } else {
	/* otherwise cache entry already exists, someone else must
	 * have created it.  Comments used to say:  "don't need write
	 * lock to *clear* these flags" but we should do it anyway.
	 * Code used to clear stat bit and callback, but I don't see 
	 * the point -- we didn't have a create race, somebody else just
	 * snuck into NewVCache before we got here, probably a racing 
	 * lookup.
	 */
	*avcp = tvc;
	code = 0;
    }
    ReleaseWriteLock(&afs_xvcache);

  done:
    AFS_DISCON_UNLOCK();

  done3:
    if (volp)
	afs_PutVolume(volp, READ_LOCK);

    if (code == 0) {
	if (afs_mariner)
	    afs_AddMarinerName(aname, *avcp);
	/* return the new status in vattr */
	afs_CopyOutAttrs(*avcp, attrs);
	if (afs_mariner)
	    afs_MarinerLog("store$Creating", *avcp);
    }

    afs_PutFakeStat(&fakestate);
    code = afs_CheckCode(code, treq, 20);
    afs_DestroyReq(treq);

  done2:
    osi_FreeSmallSpace(OutFidStatus);
    osi_FreeSmallSpace(OutDirStatus);
    return code;
}


/*
 * Check to see if we can track the change locally: requires that
 * we have sufficiently recent info in data cache.  If so, we
 * know the new DataVersion number, and place it correctly in both the
 * data and stat cache entries.  This routine returns 1 if we should
 * do the operation locally, and 0 otherwise.
 *
 * This routine must be called with the stat cache entry write-locked,
 * and dcache entry write-locked.
 */
int
afs_LocalHero(struct vcache *avc, struct dcache *adc,
	      AFSFetchStatus * astat, int aincr)
{
    afs_int32 ok;
    afs_hyper_t avers;

    AFS_STATCNT(afs_LocalHero);
    hset64(avers, astat->dataVersionHigh, astat->DataVersion);
    /* avers *is* the version number now, no matter what */

    if (adc) {
	/* does what's in the dcache *now* match what's in the vcache *now*,
	 * and do we have a valid callback? if not, our local copy is not "ok" */
	ok = (hsame(avc->f.m.DataVersion, adc->f.versionNo) && avc->callback
	      && (avc->f.states & CStatd) && avc->cbExpires >= osi_Time());
    } else {
	ok = 0;
    }
    if (ok) {
	/* check that the DV on the server is what we expect it to be */
	afs_hyper_t newDV;
	hset(newDV, adc->f.versionNo);
	hadd32(newDV, aincr);
	if (!hsame(avers, newDV)) {
	    ok = 0;
	}
    }
#if defined(AFS_SGI_ENV)
    osi_Assert(avc->v.v_type == VDIR);
#endif
    /* The bulk status code used the length as a sequence number.  */
    /* Don't update the vcache entry unless the stats are current. */
    if (avc->f.states & CStatd) {
	hset(avc->f.m.DataVersion, avers);
#ifdef AFS_64BIT_CLIENT
	FillInt64(avc->f.m.Length, astat->Length_hi, astat->Length);
#else /* AFS_64BIT_CLIENT */
	avc->f.m.Length = astat->Length;
#endif /* AFS_64BIT_CLIENT */
	avc->f.m.Date = astat->ClientModTime;
    }
    if (ok) {
	/* we've been tracking things correctly */
	adc->dflags |= DFEntryMod;
	adc->f.versionNo = avers;
	return 1;
    } else {
	if (adc) {
	    ZapDCE(adc);
	    DZap(adc);
	}
	if (avc->f.states & CStatd) {
	    osi_dnlc_purgedp(avc);
	}
	return 0;
    }
}
Esempio n. 22
0
static int
vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
{
	vdev_file_t *vf;
#ifdef __APPLE__
	struct vnode *vp, *rootdir;
	struct vnode_attr vattr;
	vfs_context_t context;
#else
	vnode_t *vp;
	vattr_t vattr;
#endif
	int error;

	/*
	 * We must have a pathname, and it must be absolute.
	 */
	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
		return (EINVAL);
	}

	vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);

	/*
	 * We always open the files from the root of the global zone, even if
	 * we're in a local zone.  If the user has gotten to this point, the
	 * administrator has already decided that the pool should be available
	 * to local zone users, so the underlying devices should be as well.
	 */
	ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
#ifdef __APPLE__
	rootdir = getrootdir();
#endif
	error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE, spa_mode | FOFFMAX,
	    0, &vp, 0, 0, rootdir);
	
	if (error) {
		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
		return (error);
	}

	vf->vf_vnode = vp;

#ifdef _KERNEL
	/*
	 * Make sure it's a regular file.
	 */
#ifdef __APPLE__
	if (!vnode_isreg(vp)) {
#else
	if (vp->v_type != VREG) {
#endif
		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
		return (ENODEV);
	}
#endif

	/*
	 * Determine the physical size of the file.
	 */
#ifdef __APPLE__
	VATTR_INIT(&vattr);
	VATTR_WANTED(&vattr, va_data_size);

	context = vfs_context_create((vfs_context_t)0);
	error = vnode_getattr(vp, &vattr, context);
	(void) vfs_context_rele(context);

	if (error || !VATTR_IS_SUPPORTED(&vattr, va_data_size)) {
		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
		return (error);
	}

	*psize = vattr.va_data_size;
#else
	vattr.va_mask = AT_SIZE;
	error = VOP_GETATTR(vp, &vattr, 0, kcred);
	if (error) {
		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
		return (error);
	}

	*psize = vattr.va_size;
#endif
	*ashift = SPA_MINBLOCKSHIFT;

	return (0);
}

static void
vdev_file_close(vdev_t *vd)
{
	vdev_file_t *vf = vd->vdev_tsd;

	if (vf == NULL)
		return;

	if (vf->vf_vnode != NULL) {
#ifdef __APPLE__
		vfs_context_t context;

		context = vfs_context_create((vfs_context_t)0);
		/* ### APPLE TODO #### */
	//	(void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred);
		(void) vnode_close(vf->vf_vnode, spa_mode, context);
		(void) vfs_context_rele(context);
#else
		(void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred);
		(void) VOP_CLOSE(vf->vf_vnode, spa_mode, 1, 0, kcred);
		VN_RELE(vf->vf_vnode);
#endif
	}

	kmem_free(vf, sizeof (vdev_file_t));
	vd->vdev_tsd = NULL;
}