Example #1
0
/*
 *	Gets the first page of kernel binary at disk into a buffer
 *	Uses KPI VFS functions and a ripped uio_createwithbuffer() from XNU
 */
static kern_return_t get_k_mh(void *buffer, vnode_t k_vnode, struct kernel_info *kinfo)
{
	uio_t uio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
	
	if(uio == NULL) {
		return KERN_FAILURE;
	}
	
	/* imitate the kernel and read a single page from the header */
	if(uio_addiov(uio, CAST_USER_ADDR_T(buffer), HEADER_SIZE) != 0) {
		return KERN_FAILURE;
	}
	
	/* read kernel vnode into the buffer */
	if(VNOP_READ(k_vnode, uio, 0, NULL) != 0) {
		return KERN_FAILURE;
	} else if(uio_resid(uio)) {
		return KERN_FAILURE;
	}
	
	/* process the header */
	uint32_t magic = *(uint32_t *)buffer;
	
	if(magic == FAT_CIGAM) {
		struct fat_header *fh = (struct fat_header *)buffer;
		struct fat_arch *fa = (struct fat_arch *)(buffer + sizeof(struct fat_header));
		
		uint32_t file_off = 0;
		
		for(uint32_t i = 0; i  < ntohl(fh->nfat_arch); i++) {
			if(sizeof(void *) == 8 && ntohl(fa->cputype) == CPU_TYPE_X86_64) {
				file_off = ntohl(fa->offset);
				
				break;
			} else if(sizeof(void *) == 4 && ntohl(fa->cputype) == CPU_TYPE_X86) {
				file_off = ntohl(fa->offset);
				
				break;
			}
			
			fa++;
		}
		
		/* read again */
		uio = uio_create(1, file_off, UIO_SYSSPACE, UIO_READ);

		uio_addiov(uio, CAST_USER_ADDR_T(buffer), HEADER_SIZE);
		VNOP_READ(k_vnode, uio, 0, NULL);
		
		kinfo->fat_offset = file_off;
	} else {
		kinfo->fat_offset = 0;
	}
	
	return KERN_SUCCESS;
}
Example #2
0
int readFile(char *file, uint8_t *buffer, off_t offset, user_size_t size) {
    int res = EIO;

    vfs_context_t vfsContext = vfs_context_create(NULL);
    if (vfsContext == NULL) {
        return EIO;
    }

    vnode_t fileVnode = NULLVP;
    if (vnode_lookup(file, 0, &fileVnode, vfsContext) == 0) {
        uio_t uio = uio_create(1, offset, UIO_SYSSPACE, UIO_READ);
        if (uio == NULL)
            goto exit;

        if (uio_addiov(uio, CAST_USER_ADDR_T(buffer), size))
            goto exit;

        if (VNOP_READ(fileVnode, uio, 0, vfsContext))
            goto exit;

        if (uio_resid(uio))
            goto exit;

        res = 0;
    } else {
        vfs_context_rele(vfsContext);
        return ENOENT;
    }

exit:
    vnode_put(fileVnode);
    vfs_context_rele(vfsContext);
    return res;
}
Example #3
0
static int
vnread_shadow(struct vn_softc * vn, struct uio *uio, int ioflag, 
	      vfs_context_t ctx)
{
	u_int32_t		blocksize = vn->sc_secsize;
	int 		error = 0;
	off_t		offset;
	user_ssize_t	resid;
	off_t		orig_offset;
	user_ssize_t	orig_resid;

	orig_resid = resid = uio_resid(uio);
	orig_offset = offset = uio_offset(uio);

	while (resid > 0) {
		u_int32_t		remainder;
		u_int32_t		this_block_number;
		u_int32_t		this_block_count;
		off_t		this_offset;
		user_ssize_t	this_resid;
		struct vnode *	vp;

		/* figure out which blocks to read */
		remainder = block_remainder(offset, blocksize);
		if (shadow_map_read(vn->sc_shadow_map,
				    block_truncate(offset, blocksize),
				    block_round(resid + remainder, blocksize),
				    &this_block_number, &this_block_count)) {
			vp = vn->sc_shadow_vp;
		}
		else {
			vp = vn->sc_vp;
		}

		/* read the blocks (or parts thereof) */
		this_offset = (off_t)this_block_number * blocksize + remainder;
		uio_setoffset(uio, this_offset);
		this_resid = this_block_count * blocksize - remainder;
		if (this_resid > resid) {
			this_resid = resid;
		}
		uio_setresid(uio, this_resid);
		error = VNOP_READ(vp, uio, ioflag, ctx);
		if (error) {
			break;
		}

		/* figure out how much we actually read */
		this_resid -= uio_resid(uio);
		if (this_resid == 0) {
			printf("vn device: vnread_shadow zero length read\n");
			break;
		}
		resid -= this_resid;
		offset += this_resid;
	}
	uio_setresid(uio, resid);
	uio_setoffset(uio, offset);
	return (error);
}
Example #4
0
/*
 * retrieve the first page of kernel binary at disk into a buffer
 * version that uses KPI VFS functions and a ripped uio_createwithbuffer() from XNU
 */
kern_return_t get_mach_header(void *buffer, vnode_t vnode, vfs_context_t ctxt) {
	int error = 0;
	uio_t uio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
	if (uio == NULL) {
		return KERN_FAILURE;
	}
	// imitate the kernel and read a single page from the header
	error = uio_addiov(uio, CAST_USER_ADDR_T(buffer), HEADER_SIZE);
	if (error) {
		return error;
	}
	// read kernel vnode into the buffer
	error = VNOP_READ(vnode, uio, 0, ctxt);
	
	if (error) {
		return error;
	}
	else if (uio_resid(uio)) return EINVAL;
	
	// process the header
	uint32_t magic = *(uint32_t*)buffer;
	if (magic != MH_MAGIC_64) {
		return KERN_FAILURE;
	}
	
	return KERN_SUCCESS;
}
Example #5
0
int zfs_vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, ssize_t len,
                offset_t offset, enum uio_seg seg, int ioflag, rlim64_t ulimit,
                cred_t *cr, ssize_t *residp)
{
    uio_t *auio;
    int spacetype;
    int error=0;
    vfs_context_t vctx;

    spacetype = UIO_SEG_IS_USER_SPACE(seg) ? UIO_USERSPACE32 : UIO_SYSSPACE;

    vctx = vfs_context_create((vfs_context_t)0);
    auio = uio_create(1, 0, spacetype, rw);
    uio_reset(auio, offset, spacetype, rw);
    uio_addiov(auio, (uint64_t)(uintptr_t)base, len);

    if (rw == UIO_READ) {
        error = VNOP_READ(vp, auio, ioflag, vctx);
    } else {
        error = VNOP_WRITE(vp, auio, ioflag, vctx);
    }

    if (residp) {
        *residp = uio_resid(auio);
    } else {
        if (uio_resid(auio) && error == 0)
            error = EIO;
    }

    uio_free(auio);
    vfs_context_rele(vctx);

    return (error);
}
Example #6
0
/* Generic read interface */
int
afs_osi_Read(struct osi_file *afile, int offset, void *aptr,
	     afs_int32 asize)
{
    afs_ucred_t *oldCred;
    afs_size_t resid;
    afs_int32 code;
#ifdef AFS_DARWIN80_ENV
    uio_t uio;
#endif
    AFS_STATCNT(osi_Read);

    /**
      * If the osi_file passed in is NULL, panic only if AFS is not shutting
      * down. No point in crashing when we are already shutting down
      */
    if (!afile) {
	if (afs_shuttingdown == AFS_RUNNING)
	    osi_Panic("osi_Read called with null param");
	else
	    return -EIO;
    }

    if (offset != -1)
	afile->offset = offset;
    AFS_GUNLOCK();
#ifdef AFS_DARWIN80_ENV
    uio=uio_create(1, afile->offset, AFS_UIOSYS, UIO_READ);
    uio_addiov(uio, CAST_USER_ADDR_T(aptr), asize);
    code = VNOP_READ(afile->vnode, uio, IO_UNIT, afs_osi_ctxtp);
    resid = AFS_UIO_RESID(uio);
    uio_free(uio);
#else
    code =
	gop_rdwr(UIO_READ, afile->vnode, (caddr_t) aptr, asize, afile->offset,
		 AFS_UIOSYS, IO_UNIT, &afs_osi_cred, &resid);
#endif
    AFS_GLOCK();
    if (code == 0) {
	code = asize - resid;
	afile->offset += code;
	osi_DisableAtimes(afile->vnode);
    } else {
	afs_Trace2(afs_iclSetp, CM_TRACE_READFAILED, ICL_TYPE_INT32, resid,
		   ICL_TYPE_INT32, code);
	if (code > 0) {
	    code *= -1;
	}
    }
    return code;
}
Example #7
0
int
cttyread(__unused dev_t dev, struct uio *uio, int flag)
{
	vnode_t ttyvp = cttyvp(current_proc());
	struct vfs_context context;
	int error;

	if (ttyvp == NULL)
		return (EIO);

	context.vc_thread = current_thread();
	context.vc_ucred = NOCRED;

	error = VNOP_READ(ttyvp, uio, flag, &context);
	vnode_put(ttyvp);

	return (error);
}
Example #8
0
/*
 * retrieve the whole linkedit segment into target buffer from kernel binary at disk
 * we keep this buffer until we don't need to solve symbols anymore
 */
static kern_return_t get_kernel_linkedit(vnode_t kernel_vnode, vfs_context_t ctxt, kernel_info *kinfo) {
	int error = 0;
	uio_t uio = uio_create(1, kinfo->linkedit_fileoff, UIO_SYSSPACE, UIO_READ);
	if (uio == NULL) {
		return KERN_FAILURE;
	}
	error = uio_addiov(uio, CAST_USER_ADDR_T(kinfo->linkedit_buf), kinfo->linkedit_size);
	if (error) return error;
	
	error = VNOP_READ(kernel_vnode, uio, 0, ctxt);
	
	if (error) {
		return error;
	} else if (uio_resid(uio)) {
		return EINVAL;
	}
	
	return KERN_SUCCESS;
}
Example #9
0
static int
nullfs_read(struct vnop_read_args * ap)
{
	int error = EIO;

	struct vnode *vp, *lvp;

	NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp);

	if (nullfs_checkspecialvp(ap->a_vp)) {
		return ENOTSUP; /* the special vnodes can't be read */
	}

	vp  = ap->a_vp;
	lvp = NULLVPTOLOWERVP(vp);

	/*
	 * First some house keeping
	 */
	if (vnode_getwithvid(lvp, NULLVPTOLOWERVID(vp)) == 0) {
		if (!vnode_isreg(lvp) && !vnode_islnk(lvp)) {
			error = EPERM;
			goto end;
		}

		if (uio_resid(ap->a_uio) == 0) {
			error = 0;
			goto end;
		}

		/*
		 * Now ask VM/UBC/VFS to do our bidding
		 */

		error = VNOP_READ(lvp, ap->a_uio, ap->a_ioflag, ap->a_context);
		if (error) {
			NULLFSDEBUG("VNOP_READ failed: %d\n", error);
		}
	end:
		vnode_put(lvp);
	}
	return error;
}
Example #10
0
/*
 *	Get the whole __LINKEDIT segment into target buffer from kernel binary at disk
 *	This buffer is keeped until we don't need to solve symbols anymore
 */
static kern_return_t get_k_linkedit(vnode_t k_vnode, struct kernel_info *kinfo)
{
	uio_t uio = uio_create(1, kinfo->linkedit_fileoff, UIO_SYSSPACE, UIO_READ);
	
	if(uio == NULL) {
		return KERN_FAILURE;
	}
	
	if(uio_addiov(uio, CAST_USER_ADDR_T(kinfo->linkedit_buf), kinfo->linkedit_size) != 0) {
		return KERN_FAILURE;
	}
	
	if(VNOP_READ(k_vnode, uio, 0, NULL) != 0) {
		return KERN_FAILURE;
	} else if(uio_resid(uio)) {
		return KERN_FAILURE;
	}
	
	return KERN_SUCCESS;
}
Example #11
0
static int
file_io(struct vnode * vp, vfs_context_t ctx, 
	enum uio_rw op, char * base, off_t offset, user_ssize_t count,
	user_ssize_t * resid)
{
	uio_t 		auio;
	int		error;
	char		uio_buf[UIO_SIZEOF(1)];
	
	auio = uio_createwithbuffer(1, offset, UIO_SYSSPACE, op, 
				    &uio_buf[0], sizeof(uio_buf));
	uio_addiov(auio, CAST_USER_ADDR_T(base), count);
	if (op == UIO_READ)
		error = VNOP_READ(vp, auio, IO_SYNC, ctx);
	else
		error = VNOP_WRITE(vp, auio, IO_SYNC, ctx);

	if (resid != NULL) {
		*resid = uio_resid(auio);
	}
	return (error);
}
Example #12
0
/* kauth file scope listener 
 * this allows to detect files written to the filesystem
 * arg2 contains a flag KAUTH_FILEOP_CLOSE which is set if a modified file is being closed
 * this way we don't need to trace every close(), only the ones writing to the filesystem
 */
static int
fileop_scope_listener(kauth_cred_t    credential,
                      void *          idata,
                      kauth_action_t  action,
                      uintptr_t       arg0,     /* vnode reference */
                      uintptr_t       arg1,     /* full path to file being closed */
                      uintptr_t       arg2,     /* flags */
                      uintptr_t       arg3)
{
    /* ignore all actions except FILE_CLOSE */
    if (action != KAUTH_FILEOP_CLOSE)
    {
        return KAUTH_RESULT_DEFER;
    }
    
    /* ignore operations with bad data */
    if (credential == NULL || (vnode_t)arg0 == NULL || (char*)arg1 == NULL)
    {
        ERROR_MSG("Arguments contain null pointers!");
        return KAUTH_RESULT_DEFER;
    }
    
    /* ignore closes on folders, character and block devices */
    switch ( vnode_vtype((vnode_t)arg0) )
    {
        case VDIR:
        case VCHR:
        case VBLK:
            return KAUTH_RESULT_DEFER;
        default:
            break;
    }
    
    /* we are only interested when a modified file is being closed */
    if ((int)arg2 != KAUTH_FILEOP_CLOSE_MODIFIED)
    {
        return KAUTH_RESULT_DEFER;
    }
    
    char *file_path = (char*)arg1;
    /* get information from current proc trying to write to the vnode */
    proc_t proc = current_proc();
    pid_t mypid = proc_pid(proc);
    char myprocname[MAXCOMLEN+1] = {0};
    proc_name(mypid, myprocname, sizeof(myprocname));

    /* retrieve the vnode attributes, we can get a lot of vnode information from here */
    struct vnode_attr vap = {0};
    vfs_context_t context = vfs_context_create(NULL);
    /* initialize the structure fields we are interested in
     * reference vn_stat_noauth() xnu/bsd/vfs/vfs_vnops.c
     */
    VATTR_INIT(&vap);
    VATTR_WANTED(&vap, va_mode);
    VATTR_WANTED(&vap, va_type);
    VATTR_WANTED(&vap, va_uid);
    VATTR_WANTED(&vap, va_gid);
    VATTR_WANTED(&vap, va_data_size);
    VATTR_WANTED(&vap, va_flags);
    int attr_ok = 1;
    if ( vnode_getattr((vnode_t)arg0, &vap, context) != 0 )
    {
        /* in case of error permissions and filesize will be bogus */
        ERROR_MSG("failed to vnode_getattr");
        attr_ok = 0;
    }
    /* release the context we created, else kab00m! */
    vfs_context_rele(context);
    
    int error = 0;
    /* make sure we :
     * - were able to read the attributes
     * - file size is at least uint32_t 
     * - path starts with /Users
     */
    if ( attr_ok == 1 &&
         vap.va_data_size >= sizeof(uint32_t) &&
         strprefix(file_path, "/Users/") )
    {
        uint32_t magic = 0;
        /* read target vnode */
        uio_t uio = NULL;
        /* read from offset 0 */
        uio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
        if (uio == NULL)
        {
            ERROR_MSG("uio_create returned null!");
            return KAUTH_RESULT_DEFER;
        }
        /* we just want to read 4 bytes to match the header */
        if ( (error = uio_addiov(uio, CAST_USER_ADDR_T(&magic), sizeof(uint32_t))) )
        {
            ERROR_MSG("uio_addiov returned error %d!", error);
            return KAUTH_RESULT_DEFER;
        }
        if ( (error = VNOP_READ((vnode_t)arg0, uio, 0, NULL)) )
        {
            ERROR_MSG("VNOP_READ failed %d!", error);
            return KAUTH_RESULT_DEFER;
        }
        else if (uio_resid(uio))
        {
            ERROR_MSG("uio_resid!");
            return KAUTH_RESULT_DEFER;
        }
        
        /* verify if it's a Mach-O file */
        if (magic == MH_MAGIC || magic == MH_MAGIC_64 || magic == FAT_CIGAM)
        {
            char *token = NULL;
            char *string = NULL;
            char *tofree = NULL;
            int library = 0;
            int preferences = 0;
            
            tofree = string = STRDUP(file_path, M_TEMP);
            while ((token = strsep(&string, "/")) != NULL)
            {
                if (strcmp(token, "Library") == 0)
                {
                    library = 1;
                }
                else if (library == 1 && strcmp(token, "Preferences") == 0)
                {
                    preferences = 1;
                }
            }
            _FREE(tofree, M_TEMP);
            /* we got a match into /Users/username/Library/Preferences, warn user about it */
            if (library == 1 && preferences == 1)
            {
                DEBUG_MSG("Found Mach-O written to %s by %s.", file_path, myprocname);
                char alert_msg[1025] = {0};
                snprintf(alert_msg, sizeof(alert_msg), "Process \"%s\" wrote Mach-O binary %s.\n This could be Hacking Team's malware!", myprocname, file_path);
                alert_msg[sizeof(alert_msg)-1] = '\0';
                
                /* log to syslog */
                printf("[WARNING] Process \"%s\" wrote Mach-O binary %s.\n This could be Hacking Team's malware!", myprocname, file_path);
                /* deprecated but still usable to display the alert */
                KUNCUserNotificationDisplayNotice(10,		// Timeout
                                                  0,		// Flags - default is Stop alert level
                                                  NULL,     // iconpath
                                                  NULL,     // soundpath
                                                  NULL,     // localization path
                                                  "Security Alert", // alert header
                                                  alert_msg, // alert message
                                                  "OK");	// button title
            }
        }
    }
    /* don't deny access, we are just here to observe */
    return KAUTH_RESULT_DEFER;
}
Example #13
0
/* relies on v1 paging */
static int
nullfs_pagein(struct vnop_pagein_args * ap)
{
	int error = EIO;
	struct vnode *vp, *lvp;

	NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp);

	vp  = ap->a_vp;
	lvp = NULLVPTOLOWERVP(vp);

	if (vnode_vtype(vp) != VREG) {
		return ENOTSUP;
	}

	/*
	 * Ask VM/UBC/VFS to do our bidding
	 */
	if (vnode_getwithvid(lvp, NULLVPTOLOWERVID(vp)) == 0) {
		vm_offset_t ioaddr;
		uio_t auio;
		kern_return_t kret;
		off_t bytes_to_commit;
		off_t lowersize;
		upl_t upl      = ap->a_pl;
		user_ssize_t bytes_remaining = 0;

		auio = uio_create(1, ap->a_f_offset, UIO_SYSSPACE, UIO_READ);
		if (auio == NULL) {
			error = EIO;
			goto exit_no_unmap;
		}

		kret = ubc_upl_map(upl, &ioaddr);
		if (KERN_SUCCESS != kret) {
			panic("nullfs_pagein: ubc_upl_map() failed with (%d)", kret);
		}

		ioaddr += ap->a_pl_offset;

		error = uio_addiov(auio, (user_addr_t)ioaddr, ap->a_size);
		if (error) {
			goto exit;
		}

		lowersize = ubc_getsize(lvp);
		if (lowersize != ubc_getsize(vp)) {
			(void)ubc_setsize(vp, lowersize); /* ignore failures, nothing can be done */
		}

		error = VNOP_READ(lvp, auio, ((ap->a_flags & UPL_IOSYNC) ? IO_SYNC : 0), ap->a_context);

		bytes_remaining = uio_resid(auio);
		if (bytes_remaining > 0 && bytes_remaining <= (user_ssize_t)ap->a_size)
		{
			/* zero bytes that weren't read in to the upl */
			bzero((void*)((uintptr_t)(ioaddr + ap->a_size - bytes_remaining)), (size_t) bytes_remaining);
		}

	exit:
		kret = ubc_upl_unmap(upl);
		if (KERN_SUCCESS != kret) {
			panic("nullfs_pagein: ubc_upl_unmap() failed with (%d)", kret);
		}

		if (auio != NULL) {
			uio_free(auio);
		}

	exit_no_unmap:
		if ((ap->a_flags & UPL_NOCOMMIT) == 0) {
			if (!error && (bytes_remaining >= 0) && (bytes_remaining <= (user_ssize_t)ap->a_size)) {
				/* only commit what was read in (page aligned)*/
				bytes_to_commit = ap->a_size - bytes_remaining;
				if (bytes_to_commit)
				{
					/* need to make sure bytes_to_commit and byte_remaining are page aligned before calling ubc_upl_commit_range*/
					if (bytes_to_commit & PAGE_MASK)
					{
						bytes_to_commit = (bytes_to_commit & (~PAGE_MASK)) + (PAGE_MASK + 1);
						assert(bytes_to_commit <= (off_t)ap->a_size);

						bytes_remaining = ap->a_size - bytes_to_commit;
					}
					ubc_upl_commit_range(upl, ap->a_pl_offset, (upl_size_t)bytes_to_commit, UPL_COMMIT_FREE_ON_EMPTY);
				}
				
				/* abort anything thats left */
				if (bytes_remaining) {
					ubc_upl_abort_range(upl, ap->a_pl_offset + bytes_to_commit, (upl_size_t)bytes_remaining, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
				}
			} else {
				ubc_upl_abort_range(upl, ap->a_pl_offset, (upl_size_t)ap->a_size, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
			}
		}
		vnode_put(lvp);
	} else if((ap->a_flags & UPL_NOCOMMIT) == 0) {
		ubc_upl_abort_range(ap->a_pl, ap->a_pl_offset, (upl_size_t)ap->a_size, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
	}
	return error;
}
Example #14
0
static int 
vnread(dev_t dev, struct uio *uio, int ioflag)
{
	struct vfs_context  	context; 		
	int 			error = 0;
	off_t			offset;
	proc_t			p;
	user_ssize_t		resid;
	struct vn_softc *	vn;
	int 			unit;

	unit = vnunit(dev);
	if (vnunit(dev) >= NVNDEVICE) {
		return (ENXIO);
	}
	p = current_proc();
	vn = vn_table + unit;
	if ((vn->sc_flags & VNF_INITED) == 0) {
		error = ENXIO;
		goto done;
	}

	context.vc_thread = current_thread();
	context.vc_ucred = vn->sc_cred;

	error = vnode_getwithvid(vn->sc_vp, vn->sc_vid);
	if (error != 0) {
		/* the vnode is no longer available, abort */
		error = ENXIO;
		vnclear(vn, &context);
		goto done;
	}

	resid = uio_resid(uio);
	offset = uio_offset(uio);

	/*
	 * If out of bounds return an error.  If at the EOF point,
	 * simply read less.
	 */
	if (offset >= (off_t)vn->sc_fsize) {
		if (offset > (off_t)vn->sc_fsize) {
			error = EINVAL;
		}
		goto done;
	}
	/*
	 * If the request crosses EOF, truncate the request.
	 */
	if ((offset + resid) > (off_t)vn->sc_fsize) {
		resid = vn->sc_fsize - offset;
		uio_setresid(uio, resid);
	}

	if (vn->sc_shadow_vp != NULL) {
		error = vnode_getwithvid(vn->sc_shadow_vp,
					 vn->sc_shadow_vid);
		if (error != 0) {
			/* the vnode is no longer available, abort */
			error = ENXIO;
			vnode_put(vn->sc_vp);
			vnclear(vn, &context);
			goto done;
		}
		error = vnread_shadow(vn, uio, ioflag, &context);
		vnode_put(vn->sc_shadow_vp);
	} else {
		error = VNOP_READ(vn->sc_vp, uio, ioflag, &context);
	}
	vnode_put(vn->sc_vp);
 done:
	return (error);
}
Example #15
0
int
afs_MemRead(struct vcache *avc, struct uio *auio,
	    afs_ucred_t *acred, daddr_t albn, struct buf **abpp,
	    int noLock)
{
    afs_size_t totalLength;
    afs_size_t transferLength;
    afs_size_t filePos;
    afs_size_t offset, tlen;
    afs_size_t len = 0;
    afs_int32 trimlen;
    struct dcache *tdc = 0;
    afs_int32 error, trybusy = 1;
    afs_int32 code;
    struct vrequest *treq = NULL;
#ifdef AFS_DARWIN80_ENV
    uio_t tuiop = NULL;
#else
    struct uio tuio;
    struct uio *tuiop = &tuio;
    struct iovec *tvec;
    memset(&tuio, 0, sizeof(tuio));
#endif

    AFS_STATCNT(afs_MemRead);
    if (avc->vc_error)
	return EIO;

    /* check that we have the latest status info in the vnode cache */
    if ((code = afs_CreateReq(&treq, acred)))
	return code;
    if (!noLock) {
	code = afs_VerifyVCache(avc, treq);
	if (code) {
	    code = afs_CheckCode(code, treq, 8);	/* failed to get it */
	    afs_DestroyReq(treq);
	    return code;
	}
    }
#ifndef	AFS_VM_RDWR_ENV
    if (AFS_NFSXLATORREQ(acred)) {
	if (!afs_AccessOK
	    (avc, PRSFS_READ, treq,
	     CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) {
	    code = afs_CheckCode(EACCES, treq, 9);
	    afs_DestroyReq(treq);
	    return code;
	}
    }
#endif

#ifndef AFS_DARWIN80_ENV
    tvec = (struct iovec *)osi_AllocSmallSpace(sizeof(struct iovec));
    memset(tvec, 0, sizeof(struct iovec));
#endif
    totalLength = AFS_UIO_RESID(auio);
    filePos = AFS_UIO_OFFSET(auio);
    afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc,
	       ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32,
	       totalLength, ICL_TYPE_OFFSET,
	       ICL_HANDLE_OFFSET(avc->f.m.Length));
    error = 0;
    transferLength = 0;
    if (!noLock)
	ObtainReadLock(&avc->lock);
#if	defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV)
    if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) {
	hset(avc->flushDV, avc->f.m.DataVersion);
    }
#endif

    /*
     * Locks held:
     * avc->lock(R)
     */

    /* This bit is bogus. We're checking to see if the read goes past the
     * end of the file. If so, we should be zeroing out all of the buffers
     * that the client has passed into us (there is a danger that we may leak
     * kernel memory if we do not). However, this behaviour is disabled by
     * not setting len before this segment runs, and by setting len to 0
     * immediately we enter it. In addition, we also need to check for a read
     * which partially goes off the end of the file in the while loop below.
     */

    if (filePos >= avc->f.m.Length) {
	if (len > AFS_ZEROS)
	    len = sizeof(afs_zeros);	/* and in 0 buffer */
	len = 0;
#ifdef AFS_DARWIN80_ENV
	trimlen = len;
	tuiop = afsio_darwin_partialcopy(auio, trimlen);
#else
	afsio_copy(auio, &tuio, tvec);
	trimlen = len;
	afsio_trim(&tuio, trimlen);
#endif
	AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code);
    }

    while (avc->f.m.Length > 0 && totalLength > 0) {
	/* read all of the cached info */
	if (filePos >= avc->f.m.Length)
	    break;		/* all done */
	if (noLock) {
	    if (tdc) {
		ReleaseReadLock(&tdc->lock);
		afs_PutDCache(tdc);
	    }
	    tdc = afs_FindDCache(avc, filePos);
	    if (tdc) {
		ObtainReadLock(&tdc->lock);
		offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
		len = tdc->f.chunkBytes - offset;
	    }
	} else {
	    int versionOk;
	    /* a tricky question: does the presence of the DFFetching flag
	     * mean that we're fetching the latest version of the file?  No.
	     * The server could update the file as soon as the fetch responsible
	     * for the setting of the DFFetching flag completes.
	     *
	     * However, the presence of the DFFetching flag (visible under
	     * a dcache read lock since it is set and cleared only under a
	     * dcache write lock) means that we're fetching as good a version
	     * as was known to this client at the time of the last call to
	     * afs_VerifyVCache, since the latter updates the stat cache's
	     * m.DataVersion field under a vcache write lock, and from the
	     * time that the DFFetching flag goes on in afs_GetDCache (before
	     * the fetch starts), to the time it goes off (after the fetch
	     * completes), afs_GetDCache keeps at least a read lock on the
	     * vcache entry.
	     *
	     * This means that if the DFFetching flag is set, we can use that
	     * data for any reads that must come from the current version of
	     * the file (current == m.DataVersion).
	     *
	     * Another way of looking at this same point is this: if we're
	     * fetching some data and then try do an afs_VerifyVCache, the
	     * VerifyVCache operation will not complete until after the
	     * DFFetching flag is turned off and the dcache entry's f.versionNo
	     * field is updated.
	     *
	     * Note, by the way, that if DFFetching is set,
	     * m.DataVersion > f.versionNo (the latter is not updated until
	     * after the fetch completes).
	     */
	    if (tdc) {
		ReleaseReadLock(&tdc->lock);
		afs_PutDCache(tdc);	/* before reusing tdc */
	    }
#ifdef STRUCT_TASK_STRUCT_HAS_CRED
    try_background:
#endif
	    tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 2);
	    ObtainReadLock(&tdc->lock);
	    /* now, first try to start transfer, if we'll need the data.  If
	     * data already coming, we don't need to do this, obviously.  Type
	     * 2 requests never return a null dcache entry, btw.
	     */
	    if (!(tdc->dflags & DFFetching)
		&& !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) {
		/* have cache entry, it is not coming in now,
		 * and we'll need new data */
	      tagain:
#ifdef STRUCT_TASK_STRUCT_HAS_CRED
		if (trybusy && (!afs_BBusy() || (afs_protocols & VICEP_ACCESS))) {
#else
		if (trybusy && !afs_BBusy()) {
#endif
		    struct brequest *bp;
		    /* daemon is not busy */
		    ObtainSharedLock(&tdc->mflock, 665);
		    if (!(tdc->mflags & DFFetchReq)) {
			int dontwait = B_DONTWAIT;
			/* start the daemon (may already be running, however) */
			UpgradeSToWLock(&tdc->mflock, 666);
			tdc->mflags |= DFFetchReq;
#ifdef STRUCT_TASK_STRUCT_HAS_CRED
			if (afs_protocols & VICEP_ACCESS)
			    dontwait = 0;
#endif
			bp = afs_BQueue(BOP_FETCH, avc, dontwait, 0, acred,
					(afs_size_t) filePos, (afs_size_t) 0,
					tdc, (void *)0, (void *)0);
			if (!bp) {
			    tdc->mflags &= ~DFFetchReq;
			    trybusy = 0;	/* Avoid bkg daemon since they're too busy */
			    ReleaseWriteLock(&tdc->mflock);
			    goto tagain;
			}
			ConvertWToSLock(&tdc->mflock);
			/* don't use bp pointer! */
		    }
		    code = 0;
		    ConvertSToRLock(&tdc->mflock);
		    while (!code && tdc->mflags & DFFetchReq) {
			afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT,
				   ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32,
				   __LINE__, ICL_TYPE_POINTER, tdc,
				   ICL_TYPE_INT32, tdc->dflags);
			/* don't need waiting flag on this one */
			ReleaseReadLock(&tdc->mflock);
			ReleaseReadLock(&tdc->lock);
			ReleaseReadLock(&avc->lock);
			code = afs_osi_SleepSig(&tdc->validPos);
			ObtainReadLock(&avc->lock);
			ObtainReadLock(&tdc->lock);
			ObtainReadLock(&tdc->mflock);
		    }
		    ReleaseReadLock(&tdc->mflock);
		    if (code) {
			error = code;
			break;
		    }
		}
	    }
	    /* now data may have started flowing in (if DFFetching is on).  If
	     * data is now streaming in, then wait for some interesting stuff.
	     */
	    code = 0;
	    while (!code && (tdc->dflags & DFFetching)
		   && tdc->validPos <= filePos) {
		/* too early: wait for DFFetching flag to vanish,
		 * or data to appear */
		afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING,
			   __FILE__, ICL_TYPE_INT32, __LINE__,
			   ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32,
			   tdc->dflags);
		ReleaseReadLock(&tdc->lock);
		ReleaseReadLock(&avc->lock);
		code = afs_osi_SleepSig(&tdc->validPos);
		ObtainReadLock(&avc->lock);
		ObtainReadLock(&tdc->lock);
	    }
	    if (code) {
		error = code;
		break;
	    }
	    /* fetching flag gone, data is here, or we never tried
	     * (BBusy for instance) */
	    len = tdc->validPos - filePos;
	    versionOk = hsame(avc->f.m.DataVersion, tdc->f.versionNo) ? 1 : 0;
	    if (tdc->dflags & DFFetching) {
		/* still fetching, some new data is here:
		 * compute length and offset */
		offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
	    } else {
		/* no longer fetching, verify data version
		 * (avoid new GetDCache call) */
		if (versionOk && len > 0) {
		    offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
		} else {
		    /* don't have current data, so get it below */
		    afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO,
			       ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos),
			       ICL_TYPE_HYPER, &avc->f.m.DataVersion,
			       ICL_TYPE_HYPER, &tdc->f.versionNo);
#if 0
#ifdef STRUCT_TASK_STRUCT_HAS_CRED
if (afs_protocols & VICEP_ACCESS) {
printf("afs_read: DV mismatch? %d instead of %d for %u.%u.%u\n", tdc->f.versionNo.low, avc->f.m.DataVersion.low, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique);
printf("afs_read: validPos %llu filePos %llu totalLength %lld m.Length %llu noLock %d\n", tdc->validPos, filePos, totalLength, avc->f.m.Length, noLock);
printf("afs_read: or len too low? %lld for %u.%u.%u\n", len, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique);
}
#endif
#endif
		    ReleaseReadLock(&tdc->lock);
		    afs_PutDCache(tdc);
		    tdc = NULL;
		}
	    }

	    if (!tdc) {
#ifdef STRUCT_TASK_STRUCT_HAS_CRED
		if (afs_protocols & VICEP_ACCESS) { /* avoid foreground fetch */
		    if (!versionOk) {
printf("afs_read: avoid forground %u.%u.%u\n", avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique);
		        goto try_background;
		    }
#if 0
printf("afs_read: forground %u.%u.%u\n", avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique);
#endif
		}
#endif
		/* If we get here, it was not possible to start the
		 * background daemon. With flag == 1 afs_GetDCache
		 * does the FetchData rpc synchronously.
		 */
		ReleaseReadLock(&avc->lock);
		tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 1);
		ObtainReadLock(&avc->lock);
		if (tdc)
		    ObtainReadLock(&tdc->lock);
	    }
	}

	afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc,
		   ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset),
		   ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len));
	if (!tdc) {
	    error = EIO;
	    break;
	}

	/*
	 * Locks held:
	 * avc->lock(R)
	 * tdc->lock(R)
	 */

	if (len > totalLength)
	    len = totalLength;	/* will read len bytes */
	if (len <= 0) {		/* shouldn't get here if DFFetching is on */
	    /* read past the end of a chunk, may not be at next chunk yet, and yet
	     * also not at eof, so may have to supply fake zeros */
	    len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset;	/* bytes left in chunk addr space */
	    if (len > totalLength)
		len = totalLength;	/* and still within xfr request */
	    tlen = avc->f.m.Length - offset;	/* and still within file */
	    if (len > tlen)
		len = tlen;
	    if (len > AFS_ZEROS)
		len = sizeof(afs_zeros);	/* and in 0 buffer */
#ifdef AFS_DARWIN80_ENV
	    trimlen = len;
            tuiop = afsio_darwin_partialcopy(auio, trimlen);
#else
	    afsio_copy(auio, &tuio, tvec);
	    trimlen = len;
	    afsio_trim(&tuio, trimlen);
#endif
	        AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code);
	    if (code) {
		error = code;
		break;
	    }
	} else {
	    /* get the data from the mem cache */

	    /* mung uio structure to be right for this transfer */
#ifdef AFS_DARWIN80_ENV
	    trimlen = len;
            tuiop = afsio_darwin_partialcopy(auio, trimlen);
	    uio_setoffset(tuiop, offset);
#else
	    afsio_copy(auio, &tuio, tvec);
	    trimlen = len;
	    afsio_trim(&tuio, trimlen);
	    tuio.afsio_offset = offset;
#endif

	    code = afs_MemReadUIO(&tdc->f.inode, tuiop);

	    if (code) {
		error = code;
		break;
	    }
	}
	/* otherwise we've read some, fixup length, etc and continue with next seg */
	len = len - AFS_UIO_RESID(tuiop);	/* compute amount really transferred */
	trimlen = len;
	afsio_skip(auio, trimlen);	/* update input uio structure */
	totalLength -= len;
	transferLength += len;
	filePos += len;

	if (len <= 0)
	    break;		/* surprise eof */
#ifdef AFS_DARWIN80_ENV
	if (tuiop) {
	    uio_free(tuiop);
	    tuiop = 0;
	}
#endif
    }				/* the whole while loop */

    /*
     * Locks held:
     * avc->lock(R)
     * tdc->lock(R) if tdc
     */

    /* if we make it here with tdc non-zero, then it is the last chunk we
     * dealt with, and we have to release it when we're done.  We hold on
     * to it in case we need to do a prefetch.
     */
    if (tdc) {
	ReleaseReadLock(&tdc->lock);
	/*
         * try to queue prefetch, if needed. If DataVersion is zero there
	 * should not be any more: files with DV 0 never have been stored
	 * on the fileserver, symbolic links and directories never require
	 * more than a single chunk.
	 */
	if (!noLock && !(hiszero(avc->f.m.DataVersion)) &&
#ifndef AFS_VM_RDWR_ENV
	    afs_preCache
#else
	    1
#endif
	    ) {
	    afs_PrefetchChunk(avc, tdc, acred, treq);
	}
	afs_PutDCache(tdc);
    }
    if (!noLock)
	ReleaseReadLock(&avc->lock);
#ifdef AFS_DARWIN80_ENV
    if (tuiop)
       uio_free(tuiop);
#else
    osi_FreeSmallSpace(tvec);
#endif
    error = afs_CheckCode(error, treq, 10);
    afs_DestroyReq(treq);
    return error;
}

/* called with the dcache entry triggering the fetch, the vcache entry involved,
 * and a vrequest for the read call.  Marks the dcache entry as having already
 * triggered a prefetch, starts the prefetch going and sets the DFFetchReq
 * flag in the prefetched block, so that the next call to read knows to wait
 * for the daemon to start doing things.
 *
 * This function must be called with the vnode at least read-locked, and
 * no locks on the dcache, because it plays around with dcache entries.
 */
void
afs_PrefetchChunk(struct vcache *avc, struct dcache *adc,
		  afs_ucred_t *acred, struct vrequest *areq)
{
    struct dcache *tdc;
    afs_size_t offset;
    afs_size_t j1, j2;		/* junk vbls for GetDCache to trash */

    offset = adc->f.chunk + 1;	/* next chunk we'll need */
    offset = AFS_CHUNKTOBASE(offset);	/* base of next chunk */
    ObtainReadLock(&adc->lock);
    ObtainSharedLock(&adc->mflock, 662);
    if (offset < avc->f.m.Length && !(adc->mflags & DFNextStarted)
	&& !afs_BBusy()) {
	struct brequest *bp;

	UpgradeSToWLock(&adc->mflock, 663);
	adc->mflags |= DFNextStarted;	/* we've tried to prefetch for this guy */
	ReleaseWriteLock(&adc->mflock);
	ReleaseReadLock(&adc->lock);

	tdc = afs_GetDCache(avc, offset, areq, &j1, &j2, 2);	/* type 2 never returns 0 */
        /*
         * In disconnected mode, type 2 can return 0 because it doesn't
         * make any sense to allocate a dcache we can never fill
         */
         if (tdc == NULL)
             return;

	ObtainSharedLock(&tdc->mflock, 651);
	if (!(tdc->mflags & DFFetchReq)) {
	    /* ask the daemon to do the work */
	    UpgradeSToWLock(&tdc->mflock, 652);
	    tdc->mflags |= DFFetchReq;	/* guaranteed to be cleared by BKG or GetDCache */
	    /* last parm (1) tells bkg daemon to do an afs_PutDCache when it is done,
	     * since we don't want to wait for it to finish before doing so ourselves.
	     */
	    bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred,
			    (afs_size_t) offset, (afs_size_t) 1, tdc,
			    (void *)0, (void *)0);
	    if (!bp) {
		/* Bkg table full; just abort non-important prefetching to avoid deadlocks */
		tdc->mflags &= ~DFFetchReq;
		ReleaseWriteLock(&tdc->mflock);
		afs_PutDCache(tdc);

		/*
		 * DCLOCKXXX: This is a little sketchy, since someone else
		 * could have already started a prefetch..  In practice,
		 * this probably doesn't matter; at most it would cause an
		 * extra slot in the BKG table to be used up when someone
		 * prefetches this for the second time.
		 */
		ObtainReadLock(&adc->lock);
		ObtainWriteLock(&adc->mflock, 664);
		adc->mflags &= ~DFNextStarted;
		ReleaseWriteLock(&adc->mflock);
		ReleaseReadLock(&adc->lock);
	    } else {
		ReleaseWriteLock(&tdc->mflock);
	    }
	} else {
	    ReleaseSharedLock(&tdc->mflock);
	    afs_PutDCache(tdc);
	}
    } else {
	ReleaseSharedLock(&adc->mflock);
	ReleaseReadLock(&adc->lock);
    }
}

int
afs_UFSRead(struct vcache *avc, struct uio *auio,
	    afs_ucred_t *acred, daddr_t albn, struct buf **abpp,
	    int noLock)
{
    afs_size_t totalLength;
    afs_size_t transferLength;
    afs_size_t filePos;
    afs_size_t offset, tlen;
    afs_size_t len = 0;
    afs_int32 trimlen;
    struct dcache *tdc = 0;
    afs_int32 error;
    struct osi_file *tfile;
    afs_int32 code;
    int trybusy = 1;
    struct vrequest *treq = NULL;
#ifdef AFS_DARWIN80_ENV
    uio_t tuiop=NULL;
#else
    struct uio tuio;
    struct uio *tuiop = &tuio;
    struct iovec *tvec;
    memset(&tuio, 0, sizeof(tuio));
#endif

    AFS_STATCNT(afs_UFSRead);
    if (avc && avc->vc_error)
	return EIO;

    AFS_DISCON_LOCK();

    /* check that we have the latest status info in the vnode cache */
    if ((code = afs_CreateReq(&treq, acred)))
	return code;
    if (!noLock) {
	if (!avc)
	    osi_Panic("null avc in afs_UFSRead");
	else {
	    code = afs_VerifyVCache(avc, treq);
	    if (code) {
		code = afs_CheckCode(code, treq, 11);	/* failed to get it */
		afs_DestroyReq(treq);
		AFS_DISCON_UNLOCK();
		return code;
	    }
	}
    }
#ifndef	AFS_VM_RDWR_ENV
    if (AFS_NFSXLATORREQ(acred)) {
	if (!afs_AccessOK
	    (avc, PRSFS_READ, treq,
	     CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) {
	    AFS_DISCON_UNLOCK();
	    code = afs_CheckCode(EACCES, treq, 12);
	    afs_DestroyReq(treq);
	    return code;
	}
    }
#endif

#ifndef AFS_DARWIN80_ENV
    tvec = (struct iovec *)osi_AllocSmallSpace(sizeof(struct iovec));
    memset(tvec, 0, sizeof(struct iovec));
#endif
    totalLength = AFS_UIO_RESID(auio);
    filePos = AFS_UIO_OFFSET(auio);
    afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc,
	       ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32,
	       totalLength, ICL_TYPE_OFFSET,
	       ICL_HANDLE_OFFSET(avc->f.m.Length));
    error = 0;
    transferLength = 0;
    if (!noLock)
	ObtainReadLock(&avc->lock);
#if	defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV)
    if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) {
	hset(avc->flushDV, avc->f.m.DataVersion);
    }
#endif

    /* This bit is bogus. We're checking to see if the read goes past the
     * end of the file. If so, we should be zeroing out all of the buffers
     * that the client has passed into us (there is a danger that we may leak
     * kernel memory if we do not). However, this behaviour is disabled by
     * not setting len before this segment runs, and by setting len to 0
     * immediately we enter it. In addition, we also need to check for a read
     * which partially goes off the end of the file in the while loop below.
     */

    if (filePos >= avc->f.m.Length) {
	if (len > AFS_ZEROS)
	    len = sizeof(afs_zeros);	/* and in 0 buffer */
	len = 0;
#ifdef AFS_DARWIN80_ENV
	trimlen = len;
	tuiop = afsio_darwin_partialcopy(auio, trimlen);
#else
	afsio_copy(auio, &tuio, tvec);
	trimlen = len;
	afsio_trim(&tuio, trimlen);
#endif
	AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code);
    }

    while (avc->f.m.Length > 0 && totalLength > 0) {
	/* read all of the cached info */
	if (filePos >= avc->f.m.Length)
	    break;		/* all done */
	if (noLock) {
	    if (tdc) {
		ReleaseReadLock(&tdc->lock);
		afs_PutDCache(tdc);
	    }
	    tdc = afs_FindDCache(avc, filePos);
	    if (tdc) {
		ObtainReadLock(&tdc->lock);
		offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
		len = tdc->validPos - filePos;
	    }
	} else {
	    int versionOk;
	    /* a tricky question: does the presence of the DFFetching flag
	     * mean that we're fetching the latest version of the file?  No.
	     * The server could update the file as soon as the fetch responsible
	     * for the setting of the DFFetching flag completes.
	     *
	     * However, the presence of the DFFetching flag (visible under
	     * a dcache read lock since it is set and cleared only under a
	     * dcache write lock) means that we're fetching as good a version
	     * as was known to this client at the time of the last call to
	     * afs_VerifyVCache, since the latter updates the stat cache's
	     * m.DataVersion field under a vcache write lock, and from the
	     * time that the DFFetching flag goes on in afs_GetDCache (before
	     * the fetch starts), to the time it goes off (after the fetch
	     * completes), afs_GetDCache keeps at least a read lock on the
	     * vcache entry.
	     *
	     * This means that if the DFFetching flag is set, we can use that
	     * data for any reads that must come from the current version of
	     * the file (current == m.DataVersion).
	     *
	     * Another way of looking at this same point is this: if we're
	     * fetching some data and then try do an afs_VerifyVCache, the
	     * VerifyVCache operation will not complete until after the
	     * DFFetching flag is turned off and the dcache entry's f.versionNo
	     * field is updated.
	     *
	     * Note, by the way, that if DFFetching is set,
	     * m.DataVersion > f.versionNo (the latter is not updated until
	     * after the fetch completes).
	     */
	    if (tdc) {
		ReleaseReadLock(&tdc->lock);
		afs_PutDCache(tdc);	/* before reusing tdc */
	    }
#ifdef STRUCT_TASK_STRUCT_HAS_CRED
    try_background:
#endif
	    tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 2);
	    if (!tdc) {
	        error = ENETDOWN;
	        break;
	    }

	    ObtainReadLock(&tdc->lock);
	    /* now, first try to start transfer, if we'll need the data.  If
	     * data already coming, we don't need to do this, obviously.  Type
	     * 2 requests never return a null dcache entry, btw. */
	    if (!(tdc->dflags & DFFetching)
		&& !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) {
		/* have cache entry, it is not coming in now, and we'll need new data */
	      tagain:
#ifdef STRUCT_TASK_STRUCT_HAS_CRED
		if (trybusy && (!afs_BBusy() || (afs_protocols & VICEP_ACCESS))) {
#else
		if (trybusy && !afs_BBusy()) {
#endif
		    struct brequest *bp;
		    /* daemon is not busy */
		    ObtainSharedLock(&tdc->mflock, 667);
		    if (!(tdc->mflags & DFFetchReq)) {
			int dontwait = B_DONTWAIT;
			UpgradeSToWLock(&tdc->mflock, 668);
			tdc->mflags |= DFFetchReq;
#ifdef STRUCT_TASK_STRUCT_HAS_CRED
			if (afs_protocols & VICEP_ACCESS)
			    dontwait = 0;
#endif
			bp = afs_BQueue(BOP_FETCH, avc, dontwait, 0, acred,
					(afs_size_t) filePos, (afs_size_t) 0,
					tdc, (void *)0, (void *)0);
			if (!bp) {
			    /* Bkg table full; retry deadlocks */
			    tdc->mflags &= ~DFFetchReq;
			    trybusy = 0;	/* Avoid bkg daemon since they're too busy */
			    ReleaseWriteLock(&tdc->mflock);
			    goto tagain;
			}
			ConvertWToSLock(&tdc->mflock);
		    }
		    code = 0;
		    ConvertSToRLock(&tdc->mflock);
		    while (!code && tdc->mflags & DFFetchReq) {
			afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT,
				   ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32,
				   __LINE__, ICL_TYPE_POINTER, tdc,
				   ICL_TYPE_INT32, tdc->dflags);
			/* don't need waiting flag on this one */
			ReleaseReadLock(&tdc->mflock);
			ReleaseReadLock(&tdc->lock);
			ReleaseReadLock(&avc->lock);
			code = afs_osi_SleepSig(&tdc->validPos);
			ObtainReadLock(&avc->lock);
			ObtainReadLock(&tdc->lock);
			ObtainReadLock(&tdc->mflock);
		    }
		    ReleaseReadLock(&tdc->mflock);
		    if (code) {
			error = code;
			break;
		    }
		}
	    }
	    /* now data may have started flowing in (if DFFetching is on).  If
	     * data is now streaming in, then wait for some interesting stuff.
	     */
	    code = 0;
	    while (!code && (tdc->dflags & DFFetching)
		   && tdc->validPos <= filePos) {
		/* too early: wait for DFFetching flag to vanish,
		 * or data to appear */
		afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING,
			   __FILE__, ICL_TYPE_INT32, __LINE__,
			   ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32,
			   tdc->dflags);
		ReleaseReadLock(&tdc->lock);
		ReleaseReadLock(&avc->lock);
		code = afs_osi_SleepSig(&tdc->validPos);
		ObtainReadLock(&avc->lock);
		ObtainReadLock(&tdc->lock);
	    }
	    if (code) {
		error = code;
		break;
	    }
	    /* fetching flag gone, data is here, or we never tried
	     * (BBusy for instance) */
	    len = tdc->validPos - filePos;
	    versionOk = hsame(avc->f.m.DataVersion, tdc->f.versionNo) ? 1 : 0;
	    if (tdc->dflags & DFFetching) {
		/* still fetching, some new data is here:
		 * compute length and offset */
		offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
	    } else {
		/* no longer fetching, verify data version (avoid new
		 * GetDCache call) */
		if (versionOk && len > 0) {
		    offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
		} else {
		    /* don't have current data, so get it below */
		    afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO,
			       ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos),
			       ICL_TYPE_HYPER, &avc->f.m.DataVersion,
			       ICL_TYPE_HYPER, &tdc->f.versionNo);
#if 0
#ifdef STRUCT_TASK_STRUCT_HAS_CRED
if (afs_protocols & VICEP_ACCESS) {
printf("afs_read: DV mismatch? %d instead of %d for %u.%u.%u\n", tdc->f.versionNo.low, avc->f.m.DataVersion.low, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique);
printf("afs_read: validPos %llu filePos %llu totalLength %d m.Length %llu noLock %d\n", tdc->validPos, filePos, totalLength, avc->f.m.Length, noLock);
printf("afs_read: or len too low? %lld for %u.%u.%u\n", len, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique);
}
#endif
#endif
		    ReleaseReadLock(&tdc->lock);
		    afs_PutDCache(tdc);
		    tdc = NULL;
		}
	    }

	    if (!tdc) {
#ifdef STRUCT_TASK_STRUCT_HAS_CRED
		if (afs_protocols & VICEP_ACCESS) { /* avoid foreground fetch */
		    if (!versionOk) {
printf("afs_read: avoid forground %u.%u.%u\n", avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique);
			goto try_background;
		    }
		}
#endif
		/* If we get here, it was not possible to start the
		 * background daemon. With flag == 1 afs_GetDCache
		 * does the FetchData rpc synchronously.
		 */
		ReleaseReadLock(&avc->lock);
		tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 1);
		ObtainReadLock(&avc->lock);
		if (tdc)
		    ObtainReadLock(&tdc->lock);
	    }
	}

	if (!tdc) {
	    error = EIO;
	    break;
	}
	len = tdc->validPos - filePos;
	afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc,
		   ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset),
		   ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len));
	if (len > totalLength)
	    len = totalLength;	/* will read len bytes */
	if (len <= 0) {		/* shouldn't get here if DFFetching is on */
	    afs_Trace4(afs_iclSetp, CM_TRACE_VNODEREAD2, ICL_TYPE_POINTER,
		       tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(tdc->validPos),
		       ICL_TYPE_INT32, tdc->f.chunkBytes, ICL_TYPE_INT32,
		       tdc->dflags);
	    /* read past the end of a chunk, may not be at next chunk yet, and yet
	     * also not at eof, so may have to supply fake zeros */
	    len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset;	/* bytes left in chunk addr space */
	    if (len > totalLength)
		len = totalLength;	/* and still within xfr request */
	    tlen = avc->f.m.Length - offset;	/* and still within file */
	    if (len > tlen)
		len = tlen;
	    if (len > AFS_ZEROS)
		len = sizeof(afs_zeros);	/* and in 0 buffer */
#ifdef AFS_DARWIN80_ENV
	    trimlen = len;
            tuiop = afsio_darwin_partialcopy(auio, trimlen);
#else
	    afsio_copy(auio, &tuio, tvec);
	    trimlen = len;
	    afsio_trim(&tuio, trimlen);
#endif
	    AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code);
	    if (code) {
		error = code;
		break;
	    }
	} else {
	    /* get the data from the file */
	    tfile = (struct osi_file *)osi_UFSOpen(&tdc->f.inode);
#ifdef AFS_DARWIN80_ENV
	    trimlen = len;
            tuiop = afsio_darwin_partialcopy(auio, trimlen);
	    uio_setoffset(tuiop, offset);
#else
	    /* mung uio structure to be right for this transfer */
	    afsio_copy(auio, &tuio, tvec);
	    trimlen = len;
	    afsio_trim(&tuio, trimlen);
	    tuio.afsio_offset = offset;
#endif

#if defined(AFS_AIX41_ENV)
	    AFS_GUNLOCK();
	    code =
		VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL,
			  NULL, afs_osi_credp);
	    AFS_GLOCK();
#elif defined(AFS_AIX32_ENV)
	    code =
		VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL);
	    /* Flush all JFS pages now for big performance gain in big file cases
	     * If we do something like this, must check to be sure that AFS file
	     * isn't mmapped... see afs_gn_map() for why.
	     */
/*
	  if (tfile->vnode->v_gnode && tfile->vnode->v_gnode->gn_seg) {
 many different ways to do similar things:
   so far, the best performing one is #2, but #1 might match it if we
   straighten out the confusion regarding which pages to flush.  It
   really does matter.
   1.	    vm_flushp(tfile->vnode->v_gnode->gn_seg, 0, len/PAGESIZE - 1);
   2.	    vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE,
			(len + PAGESIZE-1)/PAGESIZE);
   3.	    vms_inactive(tfile->vnode->v_gnode->gn_seg) Doesn't work correctly
   4.  	    vms_delete(tfile->vnode->v_gnode->gn_seg) probably also fails
	    tfile->vnode->v_gnode->gn_seg = NULL;
   5.       deletep
   6.       ipgrlse
   7.       ifreeseg
          Unfortunately, this seems to cause frequent "cache corruption" episodes.
   	    vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE,
			(len + PAGESIZE-1)/PAGESIZE);
	  }
*/
#elif defined(AFS_AIX_ENV)
	    code =
		VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, (off_t) & offset,
			  &tuio, NULL, NULL, -1);
#elif defined(AFS_SUN5_ENV)
	    AFS_GUNLOCK();
#ifdef AFS_SUN510_ENV
	    VOP_RWLOCK(tfile->vnode, 0, NULL);
	    code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp, NULL);
	    VOP_RWUNLOCK(tfile->vnode, 0, NULL);
#else
	    VOP_RWLOCK(tfile->vnode, 0);
	    code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp);
	    VOP_RWUNLOCK(tfile->vnode, 0);
#endif
	    AFS_GLOCK();
#elif defined(AFS_SGI_ENV)
	    AFS_GUNLOCK();
	    AFS_VOP_RWLOCK(tfile->vnode, VRWLOCK_READ);
	    AFS_VOP_READ(tfile->vnode, &tuio, IO_ISLOCKED, afs_osi_credp,
			 code);
	    AFS_VOP_RWUNLOCK(tfile->vnode, VRWLOCK_READ);
	    AFS_GLOCK();
#elif defined(AFS_HPUX100_ENV)
	    AFS_GUNLOCK();
	    code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp);
	    AFS_GLOCK();
#elif defined(AFS_LINUX20_ENV)
	    AFS_GUNLOCK();
	    code = osi_rdwr(tfile, &tuio, UIO_READ);
	    AFS_GLOCK();
#elif defined(AFS_DARWIN80_ENV)
	    AFS_GUNLOCK();
	    code = VNOP_READ(tfile->vnode, tuiop, 0, afs_osi_ctxtp);
	    AFS_GLOCK();
#elif defined(AFS_DARWIN_ENV)
	    AFS_GUNLOCK();
	    VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, current_proc());
	    code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp);
	    VOP_UNLOCK(tfile->vnode, 0, current_proc());
	    AFS_GLOCK();
#elif defined(AFS_FBSD80_ENV)
	    AFS_GUNLOCK();
	    VOP_LOCK(tfile->vnode, LK_EXCLUSIVE);
	    code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp);
	    VOP_UNLOCK(tfile->vnode, 0);
	    AFS_GLOCK();
#elif defined(AFS_FBSD_ENV)
	    AFS_GUNLOCK();
	    VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curthread);
	    code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp);
	    VOP_UNLOCK(tfile->vnode, 0, curthread);
	    AFS_GLOCK();
#elif defined(AFS_NBSD_ENV)
            AFS_GUNLOCK();
            VOP_LOCK(tfile->vnode, LK_EXCLUSIVE);
            code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp);
            VOP_UNLOCK(tfile->vnode, 0);
            AFS_GLOCK();

#elif defined(AFS_XBSD_ENV)
	    AFS_GUNLOCK();
	    VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curproc);
	    code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp);
	    VOP_UNLOCK(tfile->vnode, 0, curproc);
	    AFS_GLOCK();
#else
	    code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp);
#endif
	    osi_UFSClose(tfile);

	    if (code) {
		error = code;
		break;
	    }
	}
	/* otherwise we've read some, fixup length, etc and continue with next seg */
	len = len - AFS_UIO_RESID(tuiop);	/* compute amount really transferred */
	trimlen = len;
	afsio_skip(auio, trimlen);	/* update input uio structure */
	totalLength -= len;
	transferLength += len;
	filePos += len;
	if (len <= 0)
	    break;		/* surprise eof */
#ifdef AFS_DARWIN80_ENV
	if (tuiop) {
	    uio_free(tuiop);
	    tuiop = 0;
	}
#endif
    }

    /* if we make it here with tdc non-zero, then it is the last chunk we
     * dealt with, and we have to release it when we're done.  We hold on
     * to it in case we need to do a prefetch, obviously.
     */
    if (tdc) {
	ReleaseReadLock(&tdc->lock);
#if !defined(AFS_VM_RDWR_ENV)
	/*
         * try to queue prefetch, if needed. If DataVersion is zero there
	 * should not be any more: files with DV 0 never have been stored
	 * on the fileserver, symbolic links and directories never require
	 * more than a single chunk.
	 */
	if (!noLock && !(hiszero(avc->f.m.DataVersion))) {
	    if (!(tdc->mflags & DFNextStarted))
		afs_PrefetchChunk(avc, tdc, acred, treq);
	}
#endif
	afs_PutDCache(tdc);
    }
    if (!noLock)
	ReleaseReadLock(&avc->lock);

#ifdef AFS_DARWIN80_ENV
    if (tuiop)
       uio_free(tuiop);
#else
    osi_FreeSmallSpace(tvec);
#endif
    AFS_DISCON_UNLOCK();
    error = afs_CheckCode(error, treq, 13);
    afs_DestroyReq(treq);
    return error;
}
Example #16
0
int
vn_rdwr_64FromKernelCode(
    enum uio_rw rw,
    struct vnode *vp,
    uint64_t base,
    int64_t len,
    off_t offset,
    enum uio_seg segflg,
    int ioflg,
    int *aresid,
    proc_t p)
{
    uio_t auio;
    //int spacetype;
    vfs_context_t context;
    int error=0;
    //char uio_buf[ UIO_SIZEOF(1) ];

    //context.vc_thread = current_thread();
    //context.vc_ucred = cred;

//	if (UIO_SEG_IS_USER_SPACE(segflg)) {
//		spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
//	}
//	else {
//		spacetype = UIO_SYSSPACE;
//	}
    auio = uio_create(1, offset, UIO_SYSSPACE, rw);
    uio_addiov(auio, base, len);

//#if CONFIG_MACF
//	/* XXXMAC
//	 * 	IO_NOAUTH should be re-examined.
// 	 *	Likely that mediation should be performed in caller.
//	 */
//	if ((ioflg & IO_NOAUTH) == 0) {
//		/* passed cred is fp->f_cred */
//		if (rw == UIO_READ)
//			error = mac_vnode_check_read(&context, cred, vp);
//		else
//			error = mac_vnode_check_write(&context, cred, vp);
//	}
//#endif


    if (rw == UIO_READ)
    {
        error = VNOP_READ(vp, auio, ioflg, context);
    }
    else
    {
        error = VNOP_WRITE(vp, auio, ioflg, context);
    }

    if (aresid)
        // LP64todo - fix this
        *aresid = uio_resid(auio);
    else if (uio_resid(auio) && error == 0)
        error = EIO;
    return (error);
}
Example #17
0
int
afs_UFSRead(register struct vcache *avc, struct uio *auio,
            struct AFS_UCRED *acred, daddr_t albn, struct buf **abpp,
            int noLock)
{
    afs_size_t totalLength;
    afs_size_t transferLength;
    afs_size_t filePos;
    afs_size_t offset, len, tlen;
    afs_int32 trimlen;
    struct dcache *tdc = 0;
    afs_int32 error;
#ifdef AFS_DARWIN80_ENV
    uio_t tuiop=NULL;
#else
    struct uio tuio;
    struct uio *tuiop = &tuio;
    struct iovec *tvec;
#endif
    struct osi_file *tfile;
    afs_int32 code;
    int trybusy = 1;
    struct vrequest treq;

    AFS_STATCNT(afs_UFSRead);
    if (avc && avc->vc_error)
        return EIO;

    AFS_DISCON_LOCK();

    /* check that we have the latest status info in the vnode cache */
    if ((code = afs_InitReq(&treq, acred)))
        return code;
    if (!noLock) {
        if (!avc)
            osi_Panic("null avc in afs_UFSRead");
        else {
            code = afs_VerifyVCache(avc, &treq);
            if (code) {
                code = afs_CheckCode(code, &treq, 11);	/* failed to get it */
                AFS_DISCON_UNLOCK();
                return code;
            }
        }
    }
#ifndef	AFS_VM_RDWR_ENV
    if (AFS_NFSXLATORREQ(acred)) {
        if (!afs_AccessOK
                (avc, PRSFS_READ, &treq,
                 CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) {
            AFS_DISCON_UNLOCK();
            return afs_CheckCode(EACCES, &treq, 12);
        }
    }
#endif

#ifndef AFS_DARWIN80_ENV
    tvec = (struct iovec *)osi_AllocSmallSpace(sizeof(struct iovec));
#endif
    totalLength = AFS_UIO_RESID(auio);
    filePos = AFS_UIO_OFFSET(auio);
    afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc,
               ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32,
               totalLength, ICL_TYPE_OFFSET,
               ICL_HANDLE_OFFSET(avc->f.m.Length));
    error = 0;
    transferLength = 0;
    if (!noLock)
        ObtainReadLock(&avc->lock);
#if	defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV)
    if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) {
        hset(avc->flushDV, avc->f.m.DataVersion);
    }
#endif

    if (filePos >= avc->f.m.Length) {
        if (len > AFS_ZEROS)
            len = sizeof(afs_zeros);	/* and in 0 buffer */
        len = 0;
#ifdef AFS_DARWIN80_ENV
        trimlen = len;
        tuiop = afsio_darwin_partialcopy(auio, trimlen);
#else
        afsio_copy(auio, &tuio, tvec);
        trimlen = len;
        afsio_trim(&tuio, trimlen);
#endif
        AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code);
    }

    while (avc->f.m.Length > 0 && totalLength > 0) {
        /* read all of the cached info */
        if (filePos >= avc->f.m.Length)
            break;		/* all done */
        if (noLock) {
            if (tdc) {
                ReleaseReadLock(&tdc->lock);
                afs_PutDCache(tdc);
            }
            tdc = afs_FindDCache(avc, filePos);
            if (tdc) {
                ObtainReadLock(&tdc->lock);
                offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
                len = tdc->validPos - filePos;
            }
        } else {
            /* a tricky question: does the presence of the DFFetching flag
             * mean that we're fetching the latest version of the file?  No.
             * The server could update the file as soon as the fetch responsible
             * for the setting of the DFFetching flag completes.
             *
             * However, the presence of the DFFetching flag (visible under
             * a dcache read lock since it is set and cleared only under a
             * dcache write lock) means that we're fetching as good a version
             * as was known to this client at the time of the last call to
             * afs_VerifyVCache, since the latter updates the stat cache's
             * m.DataVersion field under a vcache write lock, and from the
             * time that the DFFetching flag goes on in afs_GetDCache (before
             * the fetch starts), to the time it goes off (after the fetch
             * completes), afs_GetDCache keeps at least a read lock on the
             * vcache entry.
             *
             * This means that if the DFFetching flag is set, we can use that
             * data for any reads that must come from the current version of
             * the file (current == m.DataVersion).
             *
             * Another way of looking at this same point is this: if we're
             * fetching some data and then try do an afs_VerifyVCache, the
             * VerifyVCache operation will not complete until after the
             * DFFetching flag is turned off and the dcache entry's f.versionNo
             * field is updated.
             *
             * Note, by the way, that if DFFetching is set,
             * m.DataVersion > f.versionNo (the latter is not updated until
             * after the fetch completes).
             */
            if (tdc) {
                ReleaseReadLock(&tdc->lock);
                afs_PutDCache(tdc);	/* before reusing tdc */
            }
            tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 2);
#ifdef AFS_DISCON_ENV
            if (!tdc) {
                printf("Network down in afs_read");
                error = ENETDOWN;
                break;
            }
#endif /* AFS_DISCON_ENV */

            ObtainReadLock(&tdc->lock);
            /* now, first try to start transfer, if we'll need the data.  If
             * data already coming, we don't need to do this, obviously.  Type
             * 2 requests never return a null dcache entry, btw. */
            if (!(tdc->dflags & DFFetching)
                    && !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) {
                /* have cache entry, it is not coming in now, and we'll need new data */
tagain:
                if (trybusy && !afs_BBusy()) {
                    struct brequest *bp;
                    /* daemon is not busy */
                    ObtainSharedLock(&tdc->mflock, 667);
                    if (!(tdc->mflags & DFFetchReq)) {
                        UpgradeSToWLock(&tdc->mflock, 668);
                        tdc->mflags |= DFFetchReq;
                        bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred,
                                        (afs_size_t) filePos, (afs_size_t) 0,
                                        tdc);
                        if (!bp) {
                            /* Bkg table full; retry deadlocks */
                            tdc->mflags &= ~DFFetchReq;
                            trybusy = 0;	/* Avoid bkg daemon since they're too busy */
                            ReleaseWriteLock(&tdc->mflock);
                            goto tagain;
                        }
                        ConvertWToSLock(&tdc->mflock);
                    }
                    code = 0;
                    ConvertSToRLock(&tdc->mflock);
                    while (!code && tdc->mflags & DFFetchReq) {
                        afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT,
                                   ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32,
                                   __LINE__, ICL_TYPE_POINTER, tdc,
                                   ICL_TYPE_INT32, tdc->dflags);
                        /* don't need waiting flag on this one */
                        ReleaseReadLock(&tdc->mflock);
                        ReleaseReadLock(&tdc->lock);
                        ReleaseReadLock(&avc->lock);
                        code = afs_osi_SleepSig(&tdc->validPos);
                        ObtainReadLock(&avc->lock);
                        ObtainReadLock(&tdc->lock);
                        ObtainReadLock(&tdc->mflock);
                    }
                    ReleaseReadLock(&tdc->mflock);
                    if (code) {
                        error = code;
                        break;
                    }
                }
            }
            /* now data may have started flowing in (if DFFetching is on).  If
             * data is now streaming in, then wait for some interesting stuff.
             */
            code = 0;
            while (!code && (tdc->dflags & DFFetching)
                    && tdc->validPos <= filePos) {
                /* too early: wait for DFFetching flag to vanish,
                 * or data to appear */
                afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING,
                           __FILE__, ICL_TYPE_INT32, __LINE__,
                           ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32,
                           tdc->dflags);
                ReleaseReadLock(&tdc->lock);
                ReleaseReadLock(&avc->lock);
                code = afs_osi_SleepSig(&tdc->validPos);
                ObtainReadLock(&avc->lock);
                ObtainReadLock(&tdc->lock);
            }
            if (code) {
                error = code;
                break;
            }
            /* fetching flag gone, data is here, or we never tried
             * (BBusy for instance) */
            if (tdc->dflags & DFFetching) {
                /* still fetching, some new data is here:
                 * compute length and offset */
                offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
                len = tdc->validPos - filePos;
            } else {
                /* no longer fetching, verify data version (avoid new
                 * GetDCache call) */
                if (hsame(avc->f.m.DataVersion, tdc->f.versionNo)
                        && ((len = tdc->validPos - filePos) > 0)) {
                    offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk);
                } else {
                    /* don't have current data, so get it below */
                    afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO,
                               ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos),
                               ICL_TYPE_HYPER, &avc->f.m.DataVersion,
                               ICL_TYPE_HYPER, &tdc->f.versionNo);
                    ReleaseReadLock(&tdc->lock);
                    afs_PutDCache(tdc);
                    tdc = NULL;
                }
            }

            if (!tdc) {
                /* If we get, it was not possible to start the
                 * background daemon. With flag == 1 afs_GetDCache
                 * does the FetchData rpc synchronously.
                 */
                ReleaseReadLock(&avc->lock);
                tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 1);
                ObtainReadLock(&avc->lock);
                if (tdc)
                    ObtainReadLock(&tdc->lock);
            }
        }

        if (!tdc) {
            error = EIO;
            break;
        }
        len = tdc->validPos - filePos;
        afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc,
                   ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset),
                   ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len));
        if (len > totalLength)
            len = totalLength;	/* will read len bytes */
        if (len <= 0) {		/* shouldn't get here if DFFetching is on */
            afs_Trace4(afs_iclSetp, CM_TRACE_VNODEREAD2, ICL_TYPE_POINTER,
                       tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(tdc->validPos),
                       ICL_TYPE_INT32, tdc->f.chunkBytes, ICL_TYPE_INT32,
                       tdc->dflags);
            /* read past the end of a chunk, may not be at next chunk yet, and yet
             * also not at eof, so may have to supply fake zeros */
            len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset;	/* bytes left in chunk addr space */
            if (len > totalLength)
                len = totalLength;	/* and still within xfr request */
            tlen = avc->f.m.Length - offset;	/* and still within file */
            if (len > tlen)
                len = tlen;
            if (len > AFS_ZEROS)
                len = sizeof(afs_zeros);	/* and in 0 buffer */
#ifdef AFS_DARWIN80_ENV
            trimlen = len;
            tuiop = afsio_darwin_partialcopy(auio, trimlen);
#else
            afsio_copy(auio, &tuio, tvec);
            trimlen = len;
            afsio_trim(&tuio, trimlen);
#endif
            AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code);
            if (code) {
                error = code;
                break;
            }
        } else {
            /* get the data from the file */
#ifdef IHINT
            if (tfile = tdc->ihint) {
                if (tdc->f.inode != tfile->inum) {
                    afs_warn("afs_UFSRead: %x hint mismatch tdc %d inum %d\n",
                             tdc, tdc->f.inode, tfile->inum);
                    osi_UFSClose(tfile);
                    tdc->ihint = tfile = 0;
                    nihints--;
                }
            }
            if (tfile != 0) {
                usedihint++;
            } else
#endif /* IHINT */
#if defined(LINUX_USE_FH)
                tfile = (struct osi_file *)osi_UFSOpen_fh(&tdc->f.fh, tdc->f.fh_type);
#else
                tfile = (struct osi_file *)osi_UFSOpen(tdc->f.inode);
#endif
#ifdef AFS_DARWIN80_ENV
            trimlen = len;
            tuiop = afsio_darwin_partialcopy(auio, trimlen);
            uio_setoffset(tuiop, offset);
#else
            /* mung uio structure to be right for this transfer */
            afsio_copy(auio, &tuio, tvec);
            trimlen = len;
            afsio_trim(&tuio, trimlen);
            tuio.afsio_offset = offset;
#endif

#if defined(AFS_AIX41_ENV)
            AFS_GUNLOCK();
            code =
                VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL,
                          NULL, afs_osi_credp);
            AFS_GLOCK();
#elif defined(AFS_AIX32_ENV)
            code =
                VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL);
            /* Flush all JFS pages now for big performance gain in big file cases
             * If we do something like this, must check to be sure that AFS file
             * isn't mmapped... see afs_gn_map() for why.
             */
            /*
            	  if (tfile->vnode->v_gnode && tfile->vnode->v_gnode->gn_seg) {
             many different ways to do similar things:
               so far, the best performing one is #2, but #1 might match it if we
               straighten out the confusion regarding which pages to flush.  It
               really does matter.
               1.	    vm_flushp(tfile->vnode->v_gnode->gn_seg, 0, len/PAGESIZE - 1);
               2.	    vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE,
            			(len + PAGESIZE-1)/PAGESIZE);
               3.	    vms_inactive(tfile->vnode->v_gnode->gn_seg) Doesn't work correctly
               4.  	    vms_delete(tfile->vnode->v_gnode->gn_seg) probably also fails
            	    tfile->vnode->v_gnode->gn_seg = NULL;
               5.       deletep
               6.       ipgrlse
               7.       ifreeseg
                      Unfortunately, this seems to cause frequent "cache corruption" episodes.
               	    vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE,
            			(len + PAGESIZE-1)/PAGESIZE);
            	  }
            */
#elif defined(AFS_AIX_ENV)
            code =
                VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, (off_t) & offset,
                          &tuio, NULL, NULL, -1);
#elif defined(AFS_SUN5_ENV)
            AFS_GUNLOCK();
#ifdef AFS_SUN510_ENV
            {
                caller_context_t ct;

                VOP_RWLOCK(tfile->vnode, 0, &ct);
                code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp, &ct);
                VOP_RWUNLOCK(tfile->vnode, 0, &ct);
            }
#else
            VOP_RWLOCK(tfile->vnode, 0);
            code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp);
            VOP_RWUNLOCK(tfile->vnode, 0);
#endif
            AFS_GLOCK();
#elif defined(AFS_SGI_ENV)
            AFS_GUNLOCK();
            AFS_VOP_RWLOCK(tfile->vnode, VRWLOCK_READ);
            AFS_VOP_READ(tfile->vnode, &tuio, IO_ISLOCKED, afs_osi_credp,
                         code);
            AFS_VOP_RWUNLOCK(tfile->vnode, VRWLOCK_READ);
            AFS_GLOCK();
#elif defined(AFS_OSF_ENV)
            tuio.uio_rw = UIO_READ;
            AFS_GUNLOCK();
            VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp, code);
            AFS_GLOCK();
#elif defined(AFS_HPUX100_ENV)
            AFS_GUNLOCK();
            code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp);
            AFS_GLOCK();
#elif defined(AFS_LINUX20_ENV)
            AFS_GUNLOCK();
            code = osi_rdwr(tfile, &tuio, UIO_READ);
            AFS_GLOCK();
#elif defined(AFS_DARWIN80_ENV)
            AFS_GUNLOCK();
            code = VNOP_READ(tfile->vnode, tuiop, 0, afs_osi_ctxtp);
            AFS_GLOCK();
#elif defined(AFS_DARWIN_ENV)
            AFS_GUNLOCK();
            VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, current_proc());
            code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp);
            VOP_UNLOCK(tfile->vnode, 0, current_proc());
            AFS_GLOCK();
#elif defined(AFS_FBSD80_ENV)
            AFS_GUNLOCK();
            VOP_LOCK(tfile->vnode, LK_EXCLUSIVE);
            code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp);
            VOP_UNLOCK(tfile->vnode, 0);
            AFS_GLOCK();
#elif defined(AFS_FBSD50_ENV)
            AFS_GUNLOCK();
            VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curthread);
            code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp);
            VOP_UNLOCK(tfile->vnode, 0, curthread);
            AFS_GLOCK();
#elif defined(AFS_XBSD_ENV)
            AFS_GUNLOCK();
            VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curproc);
            code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp);
            VOP_UNLOCK(tfile->vnode, 0, curproc);
            AFS_GLOCK();
#else
            code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp);
#endif

#ifdef IHINT
            if (!tdc->ihint && nihints < maxIHint) {
                tdc->ihint = tfile;
                nihints++;
            } else
#endif /* IHINT */
                osi_UFSClose(tfile);

            if (code) {
                error = code;
                break;
            }
        }
        /* otherwise we've read some, fixup length, etc and continue with next seg */
        len = len - AFS_UIO_RESID(tuiop);	/* compute amount really transferred */
        trimlen = len;
        afsio_skip(auio, trimlen);	/* update input uio structure */
        totalLength -= len;
        transferLength += len;
        filePos += len;
        if (len <= 0)
            break;		/* surprise eof */
#ifdef AFS_DARWIN80_ENV
        if (tuiop) {
            uio_free(tuiop);
            tuiop = 0;
        }
#endif
    }

    /* if we make it here with tdc non-zero, then it is the last chunk we
     * dealt with, and we have to release it when we're done.  We hold on
     * to it in case we need to do a prefetch, obviously.
     */
    if (tdc) {
        ReleaseReadLock(&tdc->lock);
#if !defined(AFS_VM_RDWR_ENV)
        /* try to queue prefetch, if needed */
        if (!noLock) {
            if (!(tdc->mflags & DFNextStarted))
                afs_PrefetchChunk(avc, tdc, acred, &treq);
        }
#endif
        afs_PutDCache(tdc);
    }
    if (!noLock)
        ReleaseReadLock(&avc->lock);

#ifdef AFS_DARWIN80_ENV
    if (tuiop)
        uio_free(tuiop);
#else
    osi_FreeSmallSpace(tvec);
#endif
    AFS_DISCON_UNLOCK();
    error = afs_CheckCode(error, &treq, 13);
    return error;
}