Beispiel #1
0
int
afs_StoreAllSegments(struct vcache *avc, struct vrequest *areq,
		     int sync)
{
    struct dcache *tdc;
    afs_int32 code = 0;
    afs_int32 index;
    afs_int32 origCBs, foreign = 0;
    int hash;
    afs_hyper_t newDV, oldDV;	/* DV when we start, and finish, respectively */
    struct dcache **dcList;
    unsigned int i, j, minj, moredata, high, off;
    afs_size_t maxStoredLength;	/* highest offset we've written to server. */
    int safety, marineronce = 0;

    AFS_STATCNT(afs_StoreAllSegments);

    hset(oldDV, avc->f.m.DataVersion);
    hset(newDV, avc->f.m.DataVersion);
    hash = DVHash(&avc->f.fid);
    foreign = (avc->f.states & CForeign);
    dcList = osi_AllocLargeSpace(AFS_LRALLOCSIZ);
    afs_Trace2(afs_iclSetp, CM_TRACE_STOREALL, ICL_TYPE_POINTER, avc,
	       ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length));
#if !defined(AFS_AIX32_ENV) && !defined(AFS_SGI65_ENV)
    /* In the aix vm implementation we need to do the vm_writep even
     * on the memcache case since that's we adjust the file's size
     * and finish flushing partial vm pages.
     */
    if ((cacheDiskType != AFS_FCACHE_TYPE_MEM) ||
	(sync & AFS_VMSYNC_INVAL) || (sync & AFS_VMSYNC) ||
	(sync & AFS_LASTSTORE))
#endif /* !AFS_AIX32_ENV && !AFS_SGI65_ENV */
    {
	/* If we're not diskless, reading a file may stress the VM
	 * system enough to cause a pageout, and this vnode would be
	 * locked when the pageout occurs.  We can prevent this problem
	 * by making sure all dirty pages are already flushed.  We don't
	 * do this when diskless because reading a diskless (i.e.
	 * memory-resident) chunk doesn't require using new VM, and we
	 * also don't want to dump more dirty data into a diskless cache,
	 * since they're smaller, and we might exceed its available
	 * space.
	 */
#if	defined(AFS_SUN5_ENV)
	if (sync & AFS_VMSYNC_INVAL)	/* invalidate VM pages */
	    osi_VM_TryToSmush(avc, CRED(), 1);
	else
#endif
	    osi_VM_StoreAllSegments(avc);
    }
    if (AFS_IS_DISCONNECTED && !AFS_IN_SYNC) {
	/* This will probably make someone sad ... */
	/*printf("Net down in afs_StoreSegments\n");*/
	return ENETDOWN;
    }
    ConvertWToSLock(&avc->lock);

    /*
     * Subsequent code expects a sorted list, and it expects all the
     * chunks in the list to be contiguous, so we need a sort and a
     * while loop in here, too - but this will work for a first pass...
     * 92.10.05 - OK, there's a sort in here now.  It's kind of a modified
     *            bin sort, I guess.  Chunk numbers start with 0
     *
     * - Have to get a write lock on xdcache because GetDSlot might need it (if
     *   the chunk doesn't have a dcache struct).
     *   This seems like overkill in most cases.
     * - I'm not sure that it's safe to do "index = .hvNextp", then unlock
     *   xdcache, then relock xdcache and try to use index.  It is done
     *   a lot elsewhere in the CM, but I'm not buying that argument.
     * - should be able to check IFDataMod without doing the GetDSlot (just
     *   hold afs_xdcache).  That way, it's easy to do this without the
     *   writelock on afs_xdcache, and we save unneccessary disk
     *   operations. I don't think that works, 'cuz the next pointers
     *   are still on disk.
     */
    origCBs = afs_allCBs;

    maxStoredLength = 0;
    minj = 0;

    do {
	memset(dcList, 0, NCHUNKSATONCE * sizeof(struct dcache *));
	high = 0;
	moredata = FALSE;

	/* lock and start over from beginning of hash chain
	 * in order to avoid a race condition. */
	ObtainWriteLock(&afs_xdcache, 284);
	index = afs_dvhashTbl[hash];

	for (j = 0; index != NULLIDX;) {
	    if ((afs_indexFlags[index] & IFDataMod)
		&& (afs_indexUnique[index] == avc->f.fid.Fid.Unique)) {
		tdc = afs_GetValidDSlot(index);	/* refcount+1. */
		if (!tdc) {
		    ReleaseWriteLock(&afs_xdcache);
		    code = EIO;
		    goto done;
		}
		ReleaseReadLock(&tdc->tlock);
		if (!FidCmp(&tdc->f.fid, &avc->f.fid) && tdc->f.chunk >= minj) {
		    off = tdc->f.chunk - minj;
		    if (off < NCHUNKSATONCE) {
			if (dcList[off])
			    osi_Panic("dclist slot already in use!");
			if (afs_mariner && !marineronce) {
			    /* first chunk only */
			    afs_MarinerLog("store$Storing", avc);
			    marineronce++;
			}
			dcList[off] = tdc;
			if (off > high)
			    high = off;
			j++;
			/* DCLOCKXXX: chunkBytes is protected by tdc->lock which we
			 * can't grab here, due to lock ordering with afs_xdcache.
			 * So, disable this shortcut for now.  -- kolya 2001-10-13
			 */
			/* shortcut: big win for little files */
			/* tlen -= tdc->f.chunkBytes;
			 * if (tlen <= 0)
			 *    break;
			 */
		    } else {
			moredata = TRUE;
			afs_PutDCache(tdc);
			if (j == NCHUNKSATONCE)
			    break;
		    }
		} else {
		    afs_PutDCache(tdc);
		}
	    }
	    index = afs_dvnextTbl[index];
	}
	ReleaseWriteLock(&afs_xdcache);

	/* this guy writes chunks, puts back dcache structs, and bumps newDV */
	/* "moredata" just says "there are more dirty chunks yet to come".
	 */
	if (j) {
	    code =
		afs_CacheStoreVCache(dcList, avc, areq, sync,
				   minj, high, moredata,
				   &newDV, &maxStoredLength);
	    /* Release any zero-length dcache entries in our interval
	     * that we locked but didn't store back above.
	     */
	    for (j = 0; j <= high; j++) {
		tdc = dcList[j];
		if (tdc) {
		    osi_Assert(tdc->f.chunkBytes == 0);
		    ReleaseSharedLock(&tdc->lock);
		    afs_PutDCache(tdc);
		}
	    }
	}
	/* if (j) */
	minj += NCHUNKSATONCE;
    } while (!code && moredata);

 done:
    UpgradeSToWLock(&avc->lock, 29);

    /* send a trivial truncation store if did nothing else */
    if (code == 0) {
	/*
	 * Call StoreMini if we haven't written enough data to extend the
	 * file at the fileserver to the client's notion of the file length.
	 */
	if ((avc->f.truncPos != AFS_NOTRUNC)
	    || ((avc->f.states & CExtendedFile)
		&& (maxStoredLength < avc->f.m.Length))) {
	    code = afs_StoreMini(avc, areq);
	    if (code == 0)
		hadd32(newDV, 1);	/* just bumped here, too */
	}
	avc->f.states &= ~CExtendedFile;
    }

    /*
     * Finally, turn off DWriting, turn on DFEntryMod,
     * update f.versionNo.
     * A lot of this could be integrated into the loop above
     */
    if (!code) {
	afs_hyper_t h_unset;
	hones(h_unset);

	minj = 0;

	do {
	    moredata = FALSE;
	    memset(dcList, 0,
		   NCHUNKSATONCE * sizeof(struct dcache *));

	    /* overkill, but it gets the lock in case GetDSlot needs it */
	    ObtainWriteLock(&afs_xdcache, 285);

	    for (j = 0, safety = 0, index = afs_dvhashTbl[hash];
		 index != NULLIDX && safety < afs_cacheFiles + 2;
	         index = afs_dvnextTbl[index]) {

		if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) {
		    tdc = afs_GetValidDSlot(index);
		    if (!tdc) {
			/* This is okay; since manipulating the dcaches at this
			 * point is best-effort. We only get a dcache here to
			 * increment the dv and turn off DWriting. If we were
			 * supposed to do that for a dcache, but could not
			 * due to an I/O error, it just means the dv won't
			 * be updated so we don't be able to use that cached
			 * chunk in the future. That's inefficient, but not
			 * an error. */
			continue;
		    }
		    ReleaseReadLock(&tdc->tlock);

		    if (!FidCmp(&tdc->f.fid, &avc->f.fid)
			&& tdc->f.chunk >= minj) {
			off = tdc->f.chunk - minj;
			if (off < NCHUNKSATONCE) {
			    /* this is the file, and the correct chunk range */
			    if (j >= NCHUNKSATONCE)
				osi_Panic
				    ("Too many dcache entries in range\n");
			    dcList[j++] = tdc;
			} else {
			    moredata = TRUE;
			    afs_PutDCache(tdc);
			    if (j == NCHUNKSATONCE)
				break;
			}
		    } else {
			afs_PutDCache(tdc);
		    }
		}
	    }
	    ReleaseWriteLock(&afs_xdcache);

	    for (i = 0; i < j; i++) {
		/* Iterate over the dcache entries we collected above */
		tdc = dcList[i];
		ObtainSharedLock(&tdc->lock, 677);

		/* was code here to clear IFDataMod, but it should only be done
		 * in storedcache and storealldcache.
		 */
		/* Only increase DV if we had up-to-date data to start with.
		 * Otherwise, we could be falsely upgrading an old chunk
		 * (that we never read) into one labelled with the current
		 * DV #.  Also note that we check that no intervening stores
		 * occurred, otherwise we might mislabel cache information
		 * for a chunk that we didn't store this time
		 */
		/* Don't update the version number if it's not yet set. */
		if (!hsame(tdc->f.versionNo, h_unset)
		    && hcmp(tdc->f.versionNo, oldDV) >= 0) {

		    if ((!(afs_dvhack || foreign)
			 && hsame(avc->f.m.DataVersion, newDV))
			|| ((afs_dvhack || foreign)
			    && (origCBs == afs_allCBs))) {
			/* no error, this is the DV */

			UpgradeSToWLock(&tdc->lock, 678);
			hset(tdc->f.versionNo, avc->f.m.DataVersion);
			tdc->dflags |= DFEntryMod;
			/* DWriting may not have gotten cleared above, if all
			 * we did was a StoreMini */
			tdc->f.states &= ~DWriting;
			ConvertWToSLock(&tdc->lock);
		    }
		}

		ReleaseSharedLock(&tdc->lock);
		afs_PutDCache(tdc);
	    }

	    minj += NCHUNKSATONCE;

	} while (moredata);
    }

    if (code) {
	/*
	 * Invalidate chunks after an error for ccores files since
	 * afs_inactive won't be called for these and they won't be
	 * invalidated. Also discard data if it's a permanent error from the
	 * fileserver.
	 */
	if (areq->permWriteError || (avc->f.states & CCore)) {
	    afs_InvalidateAllSegments(avc);
	}
    }
    afs_Trace3(afs_iclSetp, CM_TRACE_STOREALLDONE, ICL_TYPE_POINTER, avc,
	       ICL_TYPE_INT32, avc->f.m.Length, ICL_TYPE_INT32, code);
    /* would like a Trace5, but it doesn't exist... */
    afs_Trace3(afs_iclSetp, CM_TRACE_AVCLOCKER, ICL_TYPE_POINTER, avc,
	       ICL_TYPE_INT32, avc->lock.wait_states, ICL_TYPE_INT32,
	       avc->lock.excl_locked);
    afs_Trace4(afs_iclSetp, CM_TRACE_AVCLOCKEE, ICL_TYPE_POINTER, avc,
	       ICL_TYPE_INT32, avc->lock.wait_states, ICL_TYPE_INT32,
	       avc->lock.readers_reading, ICL_TYPE_INT32,
	       avc->lock.num_waiting);

    /*
     * Finally, if updated DataVersion matches newDV, we did all of the
     * stores.  If mapDV indicates that the page cache was flushed up
     * to when we started the store, then we can relabel them as flushed
     * as recently as newDV.
     * Turn off CDirty bit because the stored data is now in sync with server.
     */
    if (code == 0 && hcmp(avc->mapDV, oldDV) >= 0) {
	if ((!(afs_dvhack || foreign) && hsame(avc->f.m.DataVersion, newDV))
	    || ((afs_dvhack || foreign) && (origCBs == afs_allCBs))) {
	    hset(avc->mapDV, newDV);
	    avc->f.states &= ~CDirty;
	}
    }
    osi_FreeLargeSpace(dcList);

    /* If not the final write a temporary error is ok. */
    if (code && !areq->permWriteError && !(sync & AFS_LASTSTORE))
	code = 0;

    return code;

}				/*afs_StoreAllSegments (new 03/02/94) */
Beispiel #2
0
/* question: does afs_create need to set CDirty in the adp or the avc?
 * I think we can get away without it, but I'm not sure.  Note that
 * afs_setattr is called in here for truncation.
 */
#ifdef AFS_SGI64_ENV
int
afs_create(OSI_VC_DECL(adp), char *aname, struct vattr *attrs, int flags,
	   int amode, struct vcache **avcp, afs_ucred_t *acred)
#else /* AFS_SGI64_ENV */
int
afs_create(OSI_VC_DECL(adp), char *aname, struct vattr *attrs,
	   enum vcexcl aexcl, int amode, struct vcache **avcp,
	   afs_ucred_t *acred)
#endif				/* AFS_SGI64_ENV */
{
    afs_int32 origCBs, origZaps, finalZaps;
    struct vrequest *treq = NULL;
    afs_int32 code;
    struct afs_conn *tc;
    struct VenusFid newFid;
    struct AFSStoreStatus InStatus;
    struct AFSFetchStatus *OutFidStatus, *OutDirStatus;
    struct AFSVolSync tsync;
    struct AFSCallBack CallBack;
    afs_int32 now;
    struct dcache *tdc;
    afs_size_t offset, len;
    struct server *hostp = 0;
    struct vcache *tvc;
    struct volume *volp = 0;
    struct afs_fakestat_state fakestate;
    struct rx_connection *rxconn;
    XSTATS_DECLS;
    OSI_VC_CONVERT(adp);

    AFS_STATCNT(afs_create);

    OutFidStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus));
    OutDirStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus));
    memset(&InStatus, 0, sizeof(InStatus));

    if ((code = afs_CreateReq(&treq, acred)))
	goto done2;

    afs_Trace3(afs_iclSetp, CM_TRACE_CREATE, ICL_TYPE_POINTER, adp,
	       ICL_TYPE_STRING, aname, ICL_TYPE_INT32, amode);

    afs_InitFakeStat(&fakestate);

#ifdef AFS_SGI65_ENV
    /* If avcp is passed not null, it's the old reference to this file.
     * We can use this to avoid create races. For now, just decrement
     * the reference count on it.
     */
    if (*avcp) {
	AFS_RELE(AFSTOV(*avcp));
	*avcp = NULL;
    }
#endif

    if (strlen(aname) > AFSNAMEMAX) {
	code = ENAMETOOLONG;
	goto done3;
    }

    if (!afs_ENameOK(aname)) {
	code = EINVAL;
	goto done3;
    }
    switch (attrs->va_type) {
    case VBLK:
    case VCHR:
#if	!defined(AFS_SUN5_ENV)
    case VSOCK:
#endif
    case VFIFO:
	/* We don't support special devices or FIFOs */
	code = EINVAL;
	goto done3;
    default:
	;
    }
    AFS_DISCON_LOCK();

    code = afs_EvalFakeStat(&adp, &fakestate, treq);
    if (code)
	goto done;
  tagain:
    code = afs_VerifyVCache(adp, treq);
    if (code)
	goto done;

    /** If the volume is read-only, return error without making an RPC to the
      * fileserver
      */
    if (adp->f.states & CRO) {
	code = EROFS;
	goto done;
    }

    if (AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW) {
        code = ENETDOWN;
        goto done;
    }

    tdc = afs_GetDCache(adp, (afs_size_t) 0, treq, &offset, &len, 1);
    ObtainWriteLock(&adp->lock, 135);
    if (tdc)
	ObtainSharedLock(&tdc->lock, 630);

    /*
     * Make sure that the data in the cache is current. We may have
     * received a callback while we were waiting for the write lock.
     */
    if (!(adp->f.states & CStatd)
	|| (tdc && !hsame(adp->f.m.DataVersion, tdc->f.versionNo))) {
	ReleaseWriteLock(&adp->lock);
	if (tdc) {
	    ReleaseSharedLock(&tdc->lock);
	    afs_PutDCache(tdc);
	}
	goto tagain;
    }
    if (tdc) {
	/* see if file already exists.  If it does, we only set 
	 * the size attributes (to handle O_TRUNC) */
	code = afs_dir_Lookup(tdc, aname, &newFid.Fid);	/* use dnlc first xxx */
	if (code == 0) {
	    ReleaseSharedLock(&tdc->lock);
	    afs_PutDCache(tdc);
	    ReleaseWriteLock(&adp->lock);
#ifdef AFS_SGI64_ENV
	    if (flags & VEXCL) {
#else
	    if (aexcl != NONEXCL) {
#endif
		code = EEXIST;	/* file exists in excl mode open */
		goto done;
	    }
	    /* found the file, so use it */
	    newFid.Cell = adp->f.fid.Cell;
	    newFid.Fid.Volume = adp->f.fid.Fid.Volume;
	    tvc = NULL;
	    if (newFid.Fid.Unique == 0) {
		tvc = afs_LookupVCache(&newFid, treq, NULL, adp, aname);
	    }
	    if (!tvc)		/* lookup failed or wasn't called */
		tvc = afs_GetVCache(&newFid, treq, NULL, NULL);

	    if (tvc) {
		/* if the thing exists, we need the right access to open it.
		 * we must check that here, since no other checks are
		 * made by the open system call */
		len = attrs->va_size;	/* only do the truncate */
		/*
		 * We used to check always for READ access before; the
		 * problem is that we will fail if the existing file
		 * has mode -w-w-w, which is wrong.
		 */
		if ((amode & VREAD)
		    && !afs_AccessOK(tvc, PRSFS_READ, treq, CHECK_MODE_BITS)) {
		    afs_PutVCache(tvc);
		    code = EACCES;
		    goto done;
		}
#if defined(AFS_DARWIN80_ENV)
		if ((amode & VWRITE) || VATTR_IS_ACTIVE(attrs, va_data_size))
#elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
		if ((amode & VWRITE) || (attrs->va_mask & AT_SIZE))
#else
		if ((amode & VWRITE) || len != 0xffffffff)
#endif
		{
		    /* needed for write access check */
		    tvc->f.parent.vnode = adp->f.fid.Fid.Vnode;
		    tvc->f.parent.unique = adp->f.fid.Fid.Unique;
		    /* need write mode for these guys */
		    if (!afs_AccessOK
			(tvc, PRSFS_WRITE, treq, CHECK_MODE_BITS)) {
			afs_PutVCache(tvc);
			code = EACCES;
			goto done;
		    }
		}
#if defined(AFS_DARWIN80_ENV)
		if (VATTR_IS_ACTIVE(attrs, va_data_size))
#elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
		if (attrs->va_mask & AT_SIZE)
#else
		if (len != 0xffffffff)
#endif
		{
		    if (vType(tvc) != VREG) {
			afs_PutVCache(tvc);
			code = EISDIR;
			goto done;
		    }
		    /* do a truncate */
#if defined(AFS_DARWIN80_ENV)
		    VATTR_INIT(attrs);
		    VATTR_SET_SUPPORTED(attrs, va_data_size);
		    VATTR_SET_ACTIVE(attrs, va_data_size);
#elif defined(UKERNEL)
		    attrs->va_mask = ATTR_SIZE;
#elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
		    attrs->va_mask = AT_SIZE;
#else
		    VATTR_NULL(attrs);
#endif
		    attrs->va_size = len;
		    ObtainWriteLock(&tvc->lock, 136);
		    tvc->f.states |= CCreating;
		    ReleaseWriteLock(&tvc->lock);
#if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
#if defined(AFS_SGI64_ENV)
		    code =
			afs_setattr(VNODE_TO_FIRST_BHV((vnode_t *) tvc),
				    attrs, 0, acred);
#else
		    code = afs_setattr(tvc, attrs, 0, acred);
#endif /* AFS_SGI64_ENV */
#else /* SUN5 || SGI */
		    code = afs_setattr(tvc, attrs, acred);
#endif /* SUN5 || SGI */
		    ObtainWriteLock(&tvc->lock, 137);
		    tvc->f.states &= ~CCreating;
		    ReleaseWriteLock(&tvc->lock);
		    if (code) {
			afs_PutVCache(tvc);
			goto done;
		    }
		}
		*avcp = tvc;
	    } else
		code = ENOENT;	/* shouldn't get here */
	    /* make sure vrefCount bumped only if code == 0 */
	    goto done;
	}
    }
    
    /* if we create the file, we don't do any access checks, since
     * that's how O_CREAT is supposed to work */
    if (adp->f.states & CForeign) {
	origCBs = afs_allCBs;
	origZaps = afs_allZaps;
    } else {
	origCBs = afs_evenCBs;	/* if changes, we don't really have a callback */
	origZaps = afs_evenZaps;	/* number of even numbered vnodes discarded */
    }
    InStatus.Mask = AFS_SETMODTIME | AFS_SETMODE | AFS_SETGROUP;
    InStatus.ClientModTime = osi_Time();
    InStatus.Group = (afs_int32) afs_cr_gid(acred);
    if (AFS_NFSXLATORREQ(acred)) {
	/*
	 * XXX The following is mainly used to fix a bug in the HP-UX
	 * nfs client where they create files with mode of 0 without
	 * doing any setattr later on to fix it.  * XXX
	 */
#if	defined(AFS_AIX_ENV)
	if (attrs->va_mode != -1) {
#else
#if	defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
	if (attrs->va_mask & AT_MODE) {
#else
	if (attrs->va_mode != ((unsigned short)-1)) {
#endif
#endif
	    if (!attrs->va_mode)
		attrs->va_mode = 0x1b6;	/* XXX default mode: rw-rw-rw XXX */
	}
    }

    if (!AFS_IS_DISCONNECTED) {
	/* If not disconnected, connect to the server.*/

    	InStatus.UnixModeBits = attrs->va_mode & 0xffff;	/* only care about protection bits */
    	do {
	    tc = afs_Conn(&adp->f.fid, treq, SHARED_LOCK, &rxconn);
	    if (tc) {
	    	hostp = tc->srvr->server;	/* remember for callback processing */
	    	now = osi_Time();
	    	XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_CREATEFILE);
	    	RX_AFS_GUNLOCK();
	    	code =
		    RXAFS_CreateFile(rxconn, (struct AFSFid *)&adp->f.fid.Fid,
				 aname, &InStatus, (struct AFSFid *)
				 &newFid.Fid, OutFidStatus, OutDirStatus,
				 &CallBack, &tsync);
	    	RX_AFS_GLOCK();
	    	XSTATS_END_TIME;
	    	CallBack.ExpirationTime += now;
	    } else
	    	code = -1;
    	} while (afs_Analyze
	         (tc, rxconn, code, &adp->f.fid, treq, AFS_STATS_FS_RPCIDX_CREATEFILE,
	          SHARED_LOCK, NULL));

	if ((code == EEXIST || code == UAEEXIST) &&
#ifdef AFS_SGI64_ENV
    	!(flags & VEXCL)
#else /* AFS_SGI64_ENV */
    	aexcl == NONEXCL
#endif
    	) {
	    /* if we get an EEXIST in nonexcl mode, just do a lookup */
	    if (tdc) {
	    	ReleaseSharedLock(&tdc->lock);
	    	afs_PutDCache(tdc);
	    }
	    ReleaseWriteLock(&adp->lock);


#if defined(AFS_SGI64_ENV)
	    code = afs_lookup(VNODE_TO_FIRST_BHV((vnode_t *) adp), aname, avcp,
				  NULL, 0, NULL, acred);
#elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV)
	    code = afs_lookup(adp, aname, avcp, NULL, 0, NULL, acred);
#elif defined(UKERNEL)
	    code = afs_lookup(adp, aname, avcp, acred, 0);
#elif !defined(AFS_DARWIN_ENV)
	    code = afs_lookup(adp, aname, avcp, acred);
#endif
	goto done;
        }

	if (code) {
	    if (code < 0) {
	    	ObtainWriteLock(&afs_xcbhash, 488);
	    	afs_DequeueCallback(adp);
	    	adp->f.states &= ~CStatd;
	    	ReleaseWriteLock(&afs_xcbhash);
	    	osi_dnlc_purgedp(adp);
	    }
	    ReleaseWriteLock(&adp->lock);
	    if (tdc) {
	    	ReleaseSharedLock(&tdc->lock);
	    	afs_PutDCache(tdc);
	    }
	goto done;
	}

    } else {
	/* Generate a fake FID for disconnected mode. */
	newFid.Cell = adp->f.fid.Cell;
	newFid.Fid.Volume = adp->f.fid.Fid.Volume;
	afs_GenFakeFid(&newFid, VREG, 1);
    }				/* if (!AFS_IS_DISCON_RW) */

    /* otherwise, we should see if we can make the change to the dir locally */
    if (tdc)
	UpgradeSToWLock(&tdc->lock, 631);
    if (AFS_IS_DISCON_RW || afs_LocalHero(adp, tdc, OutDirStatus, 1)) {
	/* we can do it locally */
	ObtainWriteLock(&afs_xdcache, 291);
	code = afs_dir_Create(tdc, aname, &newFid.Fid);
	ReleaseWriteLock(&afs_xdcache);
	if (code) {
	    ZapDCE(tdc);
	    DZap(tdc);
	}
    }
    if (tdc) {
	ReleaseWriteLock(&tdc->lock);
	afs_PutDCache(tdc);
    }
    if (AFS_IS_DISCON_RW)
	adp->f.m.LinkCount++;

    newFid.Cell = adp->f.fid.Cell;
    newFid.Fid.Volume = adp->f.fid.Fid.Volume;
    ReleaseWriteLock(&adp->lock);
    volp = afs_FindVolume(&newFid, READ_LOCK);

    /* New tricky optimistic callback handling algorithm for file creation works
     * as follows.  We create the file essentially with no locks set at all.  File
     * server may thus handle operations from others cache managers as well as from
     * this very own cache manager that reference the file in question before
     * we managed to create the cache entry.  However, if anyone else changes
     * any of the status information for a file, we'll see afs_evenCBs increase
     * (files always have even fids).  If someone on this workstation manages
     * to do something to the file, they'll end up having to create a cache
     * entry for the new file.  Either we'll find it once we've got the afs_xvcache
     * lock set, or it was also *deleted* the vnode before we got there, in which case
     * we will find evenZaps has changed, too.  Thus, we only assume we have the right
     * status information if no callbacks or vnode removals have occurred to even
     * numbered files from the time the call started until the time that we got the xvcache
     * lock set.  Of course, this also assumes that any call that modifies a file first
     * gets a write lock on the file's vnode, but if that weren't true, the whole cache manager
     * would fail, since no call would be able to update the local vnode status after modifying
     * a file on a file server. */
    ObtainWriteLock(&afs_xvcache, 138);
    if (adp->f.states & CForeign)
	finalZaps = afs_allZaps;	/* do this before calling newvcache */
    else
	finalZaps = afs_evenZaps;	/* do this before calling newvcache */
    /* don't need to call RemoveVCB, since only path leaving a callback is the
     * one where we pass through afs_NewVCache.  Can't have queued a VCB unless
     * we created and freed an entry between file creation time and here, and the
     * freeing of the vnode will change evenZaps.  Don't need to update the VLRU
     * queue, since the find will only succeed in the event of a create race, and 
     * then the vcache will be at the front of the VLRU queue anyway...  */
    if (!(tvc = afs_FindVCache(&newFid, 0, DO_STATS))) {
	tvc = afs_NewVCache(&newFid, hostp);
	if (tvc) {
	    int finalCBs;
	    ObtainWriteLock(&tvc->lock, 139);

	    ObtainWriteLock(&afs_xcbhash, 489);
	    finalCBs = afs_evenCBs;
	    /* add the callback in */
	    if (adp->f.states & CForeign) {
		tvc->f.states |= CForeign;
		finalCBs = afs_allCBs;
	    }
	    if (origCBs == finalCBs && origZaps == finalZaps) {
		tvc->f.states |= CStatd;	/* we've fake entire thing, so don't stat */
		tvc->f.states &= ~CBulkFetching;
		if (!AFS_IS_DISCON_RW) {
		    tvc->cbExpires = CallBack.ExpirationTime;
		    afs_QueueCallback(tvc, CBHash(CallBack.ExpirationTime), volp);
		}
	    } else {
		afs_DequeueCallback(tvc);
		tvc->f.states &= ~(CStatd | CUnique);
		tvc->callback = 0;
		if (tvc->f.fid.Fid.Vnode & 1 || (vType(tvc) == VDIR))
		    osi_dnlc_purgedp(tvc);
	    }
	    ReleaseWriteLock(&afs_xcbhash);
	    if (AFS_IS_DISCON_RW) {
		afs_DisconAddDirty(tvc, VDisconCreate, 0);
		afs_GenDisconStatus(adp, tvc, &newFid, attrs, treq, VREG);
	    } else {
		afs_ProcessFS(tvc, OutFidStatus, treq);
	    }

	    tvc->f.parent.vnode = adp->f.fid.Fid.Vnode;
	    tvc->f.parent.unique = adp->f.fid.Fid.Unique;
#if !defined(UKERNEL)
            if (volp && (volp->states & VPartVisible))
                tvc->f.states |= CPartVisible;
#endif
	    ReleaseWriteLock(&tvc->lock);
	    *avcp = tvc;
	    code = 0;
	} else
	    code = ENOENT;
    } else {
	/* otherwise cache entry already exists, someone else must
	 * have created it.  Comments used to say:  "don't need write
	 * lock to *clear* these flags" but we should do it anyway.
	 * Code used to clear stat bit and callback, but I don't see 
	 * the point -- we didn't have a create race, somebody else just
	 * snuck into NewVCache before we got here, probably a racing 
	 * lookup.
	 */
	*avcp = tvc;
	code = 0;
    }
    ReleaseWriteLock(&afs_xvcache);

  done:
    AFS_DISCON_UNLOCK();

  done3:
    if (volp)
	afs_PutVolume(volp, READ_LOCK);

    if (code == 0) {
	if (afs_mariner)
	    afs_AddMarinerName(aname, *avcp);
	/* return the new status in vattr */
	afs_CopyOutAttrs(*avcp, attrs);
	if (afs_mariner)
	    afs_MarinerLog("store$Creating", *avcp);
    }

    afs_PutFakeStat(&fakestate);
    code = afs_CheckCode(code, treq, 20);
    afs_DestroyReq(treq);

  done2:
    osi_FreeSmallSpace(OutFidStatus);
    osi_FreeSmallSpace(OutDirStatus);
    return code;
}


/*
 * Check to see if we can track the change locally: requires that
 * we have sufficiently recent info in data cache.  If so, we
 * know the new DataVersion number, and place it correctly in both the
 * data and stat cache entries.  This routine returns 1 if we should
 * do the operation locally, and 0 otherwise.
 *
 * This routine must be called with the stat cache entry write-locked,
 * and dcache entry write-locked.
 */
int
afs_LocalHero(struct vcache *avc, struct dcache *adc,
	      AFSFetchStatus * astat, int aincr)
{
    afs_int32 ok;
    afs_hyper_t avers;

    AFS_STATCNT(afs_LocalHero);
    hset64(avers, astat->dataVersionHigh, astat->DataVersion);
    /* avers *is* the version number now, no matter what */

    if (adc) {
	/* does what's in the dcache *now* match what's in the vcache *now*,
	 * and do we have a valid callback? if not, our local copy is not "ok" */
	ok = (hsame(avc->f.m.DataVersion, adc->f.versionNo) && avc->callback
	      && (avc->f.states & CStatd) && avc->cbExpires >= osi_Time());
    } else {
	ok = 0;
    }
    if (ok) {
	/* check that the DV on the server is what we expect it to be */
	afs_hyper_t newDV;
	hset(newDV, adc->f.versionNo);
	hadd32(newDV, aincr);
	if (!hsame(avers, newDV)) {
	    ok = 0;
	}
    }
#if defined(AFS_SGI_ENV)
    osi_Assert(avc->v.v_type == VDIR);
#endif
    /* The bulk status code used the length as a sequence number.  */
    /* Don't update the vcache entry unless the stats are current. */
    if (avc->f.states & CStatd) {
	hset(avc->f.m.DataVersion, avers);
#ifdef AFS_64BIT_CLIENT
	FillInt64(avc->f.m.Length, astat->Length_hi, astat->Length);
#else /* AFS_64BIT_CLIENT */
	avc->f.m.Length = astat->Length;
#endif /* AFS_64BIT_CLIENT */
	avc->f.m.Date = astat->ClientModTime;
    }
    if (ok) {
	/* we've been tracking things correctly */
	adc->dflags |= DFEntryMod;
	adc->f.versionNo = avers;
	return 1;
    } else {
	if (adc) {
	    ZapDCE(adc);
	    DZap(adc);
	}
	if (avc->f.states & CStatd) {
	    osi_dnlc_purgedp(avc);
	}
	return 0;
    }
}
Beispiel #3
0
/* rxi_ReadProc -- internal version.
 *
 * LOCKS USED -- called at netpri
 */
int
rxi_ReadProc(struct rx_call *call, char *buf,
	     int nbytes)
{
    struct rx_packet *cp = call->currentPacket;
    struct rx_packet *rp;
    int requestCount;
    unsigned int t;

/* XXXX took out clock_NewTime from here.  Was it needed? */
    requestCount = nbytes;

    /* Free any packets from the last call to ReadvProc/WritevProc */
    if (queue_IsNotEmpty(&call->iovq)) {
#ifdef RXDEBUG_PACKET
        call->iovqc -=
#endif /* RXDEBUG_PACKET */
            rxi_FreePackets(0, &call->iovq);
    }

    do {
	if (call->nLeft == 0) {
	    /* Get next packet */
	    MUTEX_ENTER(&call->lock);
	    for (;;) {
		if (call->error || (call->mode != RX_MODE_RECEIVING)) {
		    if (call->error) {
                        call->mode = RX_MODE_ERROR;
			MUTEX_EXIT(&call->lock);
			return 0;
		    }
		    if (call->mode == RX_MODE_SENDING) {
                        MUTEX_EXIT(&call->lock);
			rxi_FlushWrite(call);
                        MUTEX_ENTER(&call->lock);
			continue;
		    }
		}
		if (queue_IsNotEmpty(&call->rq)) {
		    /* Check that next packet available is next in sequence */
		    rp = queue_First(&call->rq, rx_packet);
		    if (rp->header.seq == call->rnext) {
			afs_int32 error;
			struct rx_connection *conn = call->conn;
			queue_Remove(rp);
#ifdef RX_TRACK_PACKETS
			rp->flags &= ~RX_PKTFLAG_RQ;
#endif
#ifdef RXDEBUG_PACKET
                        call->rqc--;
#endif /* RXDEBUG_PACKET */

			/* RXS_CheckPacket called to undo RXS_PreparePacket's
			 * work.  It may reduce the length of the packet by up
			 * to conn->maxTrailerSize, to reflect the length of the
			 * data + the header. */
			if ((error =
			     RXS_CheckPacket(conn->securityObject, call,
					     rp))) {
			    /* Used to merely shut down the call, but now we
			     * shut down the whole connection since this may
			     * indicate an attempt to hijack it */

			    MUTEX_EXIT(&call->lock);
			    rxi_ConnectionError(conn, error);
			    MUTEX_ENTER(&conn->conn_data_lock);
			    rp = rxi_SendConnectionAbort(conn, rp, 0, 0);
			    MUTEX_EXIT(&conn->conn_data_lock);
			    rxi_FreePacket(rp);

			    return 0;
			}
			call->rnext++;
			cp = call->currentPacket = rp;
#ifdef RX_TRACK_PACKETS
			call->currentPacket->flags |= RX_PKTFLAG_CP;
#endif
			call->curvec = 1;	/* 0th vec is always header */
			/* begin at the beginning [ more or less ], continue
			 * on until the end, then stop. */
			call->curpos =
			    (char *)cp->wirevec[1].iov_base +
			    call->conn->securityHeaderSize;
			call->curlen =
			    cp->wirevec[1].iov_len -
			    call->conn->securityHeaderSize;

			/* Notice that this code works correctly if the data
			 * size is 0 (which it may be--no reply arguments from
			 * server, for example).  This relies heavily on the
			 * fact that the code below immediately frees the packet
			 * (no yields, etc.).  If it didn't, this would be a
			 * problem because a value of zero for call->nLeft
			 * normally means that there is no read packet */
			call->nLeft = cp->length;
			hadd32(call->bytesRcvd, cp->length);

			/* Send a hard ack for every rxi_HardAckRate+1 packets
			 * consumed. Otherwise schedule an event to send
			 * the hard ack later on.
			 */
			call->nHardAcks++;
			if (!(call->flags & RX_CALL_RECEIVE_DONE)) {
			    if (call->nHardAcks > (u_short) rxi_HardAckRate) {
				rxevent_Cancel(call->delayedAckEvent, call,
					       RX_CALL_REFCOUNT_DELAY);
				rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0);
			    } else {
				struct clock when, now;
				clock_GetTime(&now);
				when = now;
				/* Delay to consolidate ack packets */
				clock_Add(&when, &rx_hardAckDelay);
				if (!call->delayedAckEvent
				    || clock_Gt(&call->delayedAckEvent->
						eventTime, &when)) {
				    rxevent_Cancel(call->delayedAckEvent,
						   call,
						   RX_CALL_REFCOUNT_DELAY);
                                    MUTEX_ENTER(&rx_refcnt_mutex);
				    CALL_HOLD(call, RX_CALL_REFCOUNT_DELAY);
                                    MUTEX_EXIT(&rx_refcnt_mutex);
                                    call->delayedAckEvent =
				      rxevent_PostNow(&when, &now,
						     rxi_SendDelayedAck, call,
						     0);
				}
			    }
			}
			break;
		    }
		}

                /*
                 * If we reach this point either we have no packets in the
                 * receive queue or the next packet in the queue is not the
                 * one we are looking for.  There is nothing else for us to
                 * do but wait for another packet to arrive.
                 */

		/* Are there ever going to be any more packets? */
		if (call->flags & RX_CALL_RECEIVE_DONE) {
		    MUTEX_EXIT(&call->lock);
		    return requestCount - nbytes;
		}
		/* Wait for in-sequence packet */
		call->flags |= RX_CALL_READER_WAIT;
		clock_NewTime();
		call->startWait = clock_Sec();
		while (call->flags & RX_CALL_READER_WAIT) {
#ifdef	RX_ENABLE_LOCKS
		    CV_WAIT(&call->cv_rq, &call->lock);
#else
		    osi_rxSleep(&call->rq);
#endif
		}
                cp = call->currentPacket;

		call->startWait = 0;
#ifdef RX_ENABLE_LOCKS
		if (call->error) {
		    MUTEX_EXIT(&call->lock);
		    return 0;
		}
#endif /* RX_ENABLE_LOCKS */
	    }
	    MUTEX_EXIT(&call->lock);
	} else
	    /* osi_Assert(cp); */
	    /* MTUXXX  this should be replaced by some error-recovery code before shipping */
	    /* yes, the following block is allowed to be the ELSE clause (or not) */
	    /* It's possible for call->nLeft to be smaller than any particular
	     * iov_len.  Usually, recvmsg doesn't change the iov_len, since it
	     * reflects the size of the buffer.  We have to keep track of the
	     * number of bytes read in the length field of the packet struct.  On
	     * the final portion of a received packet, it's almost certain that
	     * call->nLeft will be smaller than the final buffer. */
	    while (nbytes && cp) {
		t = MIN((int)call->curlen, nbytes);
		t = MIN(t, (int)call->nLeft);
		memcpy(buf, call->curpos, t);
		buf += t;
		nbytes -= t;
		call->curpos += t;
		call->curlen -= t;
		call->nLeft -= t;

		if (!call->nLeft) {
		    /* out of packet.  Get another one. */
#ifdef RX_TRACK_PACKETS
		    call->currentPacket->flags &= ~RX_PKTFLAG_CP;
#endif
		    rxi_FreePacket(cp);
		    cp = call->currentPacket = (struct rx_packet *)0;
		} else if (!call->curlen) {
		    /* need to get another struct iov */
		    if (++call->curvec >= cp->niovecs) {
			/* current packet is exhausted, get ready for another */
			/* don't worry about curvec and stuff, they get set somewhere else */
#ifdef RX_TRACK_PACKETS
			call->currentPacket->flags &= ~RX_PKTFLAG_CP;
#endif
			rxi_FreePacket(cp);
			cp = call->currentPacket = (struct rx_packet *)0;
			call->nLeft = 0;
		    } else {
			call->curpos =
			    (char *)cp->wirevec[call->curvec].iov_base;
			call->curlen = cp->wirevec[call->curvec].iov_len;
		    }
		}
	    }
	if (!nbytes) {
	    /* user buffer is full, return */
	    return requestCount;
	}

    } while (nbytes);

    return requestCount;
}
Beispiel #4
0
int
rxi_WriteProc(struct rx_call *call, char *buf,
	      int nbytes)
{
    struct rx_connection *conn = call->conn;
    struct rx_packet *cp = call->currentPacket;
    unsigned int t;
    int requestCount = nbytes;

    /* Free any packets from the last call to ReadvProc/WritevProc */
    if (queue_IsNotEmpty(&call->iovq)) {
#ifdef RXDEBUG_PACKET
        call->iovqc -=
#endif /* RXDEBUG_PACKET */
            rxi_FreePackets(0, &call->iovq);
    }

    if (call->mode != RX_MODE_SENDING) {
	if ((conn->type == RX_SERVER_CONNECTION)
	    && (call->mode == RX_MODE_RECEIVING)) {
	    call->mode = RX_MODE_SENDING;
	    if (cp) {
#ifdef RX_TRACK_PACKETS
		cp->flags &= ~RX_PKTFLAG_CP;
#endif
		rxi_FreePacket(cp);
		cp = call->currentPacket = (struct rx_packet *)0;
		call->nLeft = 0;
		call->nFree = 0;
	    }
	} else {
	    return 0;
	}
    }

    /* Loop condition is checked at end, so that a write of 0 bytes
     * will force a packet to be created--specially for the case where
     * there are 0 bytes on the stream, but we must send a packet
     * anyway. */
    do {
	if (call->nFree == 0) {
	    MUTEX_ENTER(&call->lock);
#ifdef AFS_GLOBAL_RXLOCK_KERNEL
            rxi_WaitforTQBusy(call);
#endif /* AFS_GLOBAL_RXLOCK_KERNEL */
            cp = call->currentPacket;
            if (call->error)
                call->mode = RX_MODE_ERROR;
	    if (!call->error && cp) {
                /* Clear the current packet now so that if
                 * we are forced to wait and drop the lock
                 * the packet we are planning on using
                 * cannot be freed.
                 */
#ifdef RX_TRACK_PACKETS
                cp->flags &= ~RX_PKTFLAG_CP;
#endif
		call->currentPacket = (struct rx_packet *)0;
		clock_NewTime();	/* Bogus:  need new time package */
		/* The 0, below, specifies that it is not the last packet:
		 * there will be others. PrepareSendPacket may
		 * alter the packet length by up to
		 * conn->securityMaxTrailerSize */
		hadd32(call->bytesSent, cp->length);
		rxi_PrepareSendPacket(call, cp, 0);
#ifdef RX_TRACK_PACKETS
		cp->flags |= RX_PKTFLAG_TQ;
#endif
		queue_Append(&call->tq, cp);
#ifdef RXDEBUG_PACKET
                call->tqc++;
#endif /* RXDEBUG_PACKET */
                cp = (struct rx_packet *)0;
		if (!
		    (call->
		     flags & (RX_CALL_FAST_RECOVER |
			      RX_CALL_FAST_RECOVER_WAIT))) {
		    rxi_Start(0, call, 0, 0);
		}
	    } else if (cp) {
#ifdef RX_TRACK_PACKETS
		cp->flags &= ~RX_PKTFLAG_CP;
#endif
		rxi_FreePacket(cp);
		cp = call->currentPacket = (struct rx_packet *)0;
	    }
	    /* Wait for transmit window to open up */
	    while (!call->error
		   && call->tnext + 1 > call->tfirst + (2 * call->twind)) {
		clock_NewTime();
		call->startWait = clock_Sec();

#ifdef	RX_ENABLE_LOCKS
		CV_WAIT(&call->cv_twind, &call->lock);
#else
		call->flags |= RX_CALL_WAIT_WINDOW_ALLOC;
		osi_rxSleep(&call->twind);
#endif

		call->startWait = 0;
#ifdef RX_ENABLE_LOCKS
		if (call->error) {
                    call->mode = RX_MODE_ERROR;
		    MUTEX_EXIT(&call->lock);
		    return 0;
		}
#endif /* RX_ENABLE_LOCKS */
	    }
	    if ((cp = rxi_AllocSendPacket(call, nbytes))) {
#ifdef RX_TRACK_PACKETS
		cp->flags |= RX_PKTFLAG_CP;
#endif
		call->currentPacket = cp;
		call->nFree = cp->length;
		call->curvec = 1;	/* 0th vec is always header */
		/* begin at the beginning [ more or less ], continue
		 * on until the end, then stop. */
		call->curpos =
		    (char *)cp->wirevec[1].iov_base +
		    call->conn->securityHeaderSize;
		call->curlen =
		    cp->wirevec[1].iov_len - call->conn->securityHeaderSize;
	    }
	    if (call->error) {
                call->mode = RX_MODE_ERROR;
		if (cp) {
#ifdef RX_TRACK_PACKETS
		    cp->flags &= ~RX_PKTFLAG_CP;
#endif
		    rxi_FreePacket(cp);
		    call->currentPacket = NULL;
		}
		MUTEX_EXIT(&call->lock);
		return 0;
	    }
	    MUTEX_EXIT(&call->lock);
	}

	if (cp && (int)call->nFree < nbytes) {
	    /* Try to extend the current buffer */
	    int len, mud;
	    len = cp->length;
	    mud = rx_MaxUserDataSize(call);
	    if (mud > len) {
		int want;
		want = MIN(nbytes - (int)call->nFree, mud - len);
		rxi_AllocDataBuf(cp, want, RX_PACKET_CLASS_SEND_CBUF);
		if (cp->length > (unsigned)mud)
		    cp->length = mud;
		call->nFree += (cp->length - len);
	    }
	}

	/* If the remaining bytes fit in the buffer, then store them
	 * and return.  Don't ship a buffer that's full immediately to
	 * the peer--we don't know if it's the last buffer yet */

	if (!cp) {
	    call->nFree = 0;
	}

	while (nbytes && call->nFree) {

	    t = MIN((int)call->curlen, nbytes);
	    t = MIN((int)call->nFree, t);
	    memcpy(call->curpos, buf, t);
	    buf += t;
	    nbytes -= t;
	    call->curpos += t;
	    call->curlen -= (u_short)t;
	    call->nFree -= (u_short)t;

	    if (!call->curlen) {
		/* need to get another struct iov */
		if (++call->curvec >= cp->niovecs) {
		    /* current packet is full, extend or send it */
		    call->nFree = 0;
		} else {
		    call->curpos = (char *)cp->wirevec[call->curvec].iov_base;
		    call->curlen = cp->wirevec[call->curvec].iov_len;
		}
	    }
	}			/* while bytes to send and room to send them */

	/* might be out of space now */
	if (!nbytes) {
	    return requestCount;
	} else;			/* more data to send, so get another packet and keep going */
    } while (nbytes);

    return requestCount - nbytes;
}
Beispiel #5
0
/* rxi_FillReadVec
 *
 * Uses packets in the receive queue to fill in as much of the
 * current iovec as possible. Does not block if it runs out
 * of packets to complete the iovec. Return true if an ack packet
 * was sent, otherwise return false */
int
rxi_FillReadVec(struct rx_call *call, afs_uint32 serial)
{
    int didConsume = 0;
    int didHardAck = 0;
    unsigned int t;
    struct rx_packet *rp;
    struct rx_packet *curp;
    struct iovec *call_iov;
    struct iovec *cur_iov = NULL;

    curp = call->currentPacket;
    if (curp) {
	cur_iov = &curp->wirevec[call->curvec];
    }
    call_iov = &call->iov[call->iovNext];

    while (!call->error && call->iovNBytes && call->iovNext < call->iovMax) {
	if (call->nLeft == 0) {
	    /* Get next packet */
	    if (queue_IsNotEmpty(&call->rq)) {
		/* Check that next packet available is next in sequence */
		rp = queue_First(&call->rq, rx_packet);
		if (rp->header.seq == call->rnext) {
		    afs_int32 error;
		    struct rx_connection *conn = call->conn;
		    queue_Remove(rp);
#ifdef RX_TRACK_PACKETS
		    rp->flags &= ~RX_PKTFLAG_RQ;
#endif
#ifdef RXDEBUG_PACKET
                    call->rqc--;
#endif /* RXDEBUG_PACKET */

		    /* RXS_CheckPacket called to undo RXS_PreparePacket's
		     * work.  It may reduce the length of the packet by up
		     * to conn->maxTrailerSize, to reflect the length of the
		     * data + the header. */
		    if ((error =
			 RXS_CheckPacket(conn->securityObject, call, rp))) {
			/* Used to merely shut down the call, but now we
			 * shut down the whole connection since this may
			 * indicate an attempt to hijack it */

			MUTEX_EXIT(&call->lock);
			rxi_ConnectionError(conn, error);
			MUTEX_ENTER(&conn->conn_data_lock);
			rp = rxi_SendConnectionAbort(conn, rp, 0, 0);
			MUTEX_EXIT(&conn->conn_data_lock);
			rxi_FreePacket(rp);
			MUTEX_ENTER(&call->lock);

			return 1;
		    }
		    call->rnext++;
		    curp = call->currentPacket = rp;
#ifdef RX_TRACK_PACKETS
		    call->currentPacket->flags |= RX_PKTFLAG_CP;
#endif
		    call->curvec = 1;	/* 0th vec is always header */
		    cur_iov = &curp->wirevec[1];
		    /* begin at the beginning [ more or less ], continue
		     * on until the end, then stop. */
		    call->curpos =
			(char *)curp->wirevec[1].iov_base +
			call->conn->securityHeaderSize;
		    call->curlen =
			curp->wirevec[1].iov_len -
			call->conn->securityHeaderSize;

		    /* Notice that this code works correctly if the data
		     * size is 0 (which it may be--no reply arguments from
		     * server, for example).  This relies heavily on the
		     * fact that the code below immediately frees the packet
		     * (no yields, etc.).  If it didn't, this would be a
		     * problem because a value of zero for call->nLeft
		     * normally means that there is no read packet */
		    call->nLeft = curp->length;
		    hadd32(call->bytesRcvd, curp->length);

		    /* Send a hard ack for every rxi_HardAckRate+1 packets
		     * consumed. Otherwise schedule an event to send
		     * the hard ack later on.
		     */
		    call->nHardAcks++;
		    didConsume = 1;
		    continue;
		}
	    }
	    break;
	}

	/* It's possible for call->nLeft to be smaller than any particular
	 * iov_len.  Usually, recvmsg doesn't change the iov_len, since it
	 * reflects the size of the buffer.  We have to keep track of the
	 * number of bytes read in the length field of the packet struct.  On
	 * the final portion of a received packet, it's almost certain that
	 * call->nLeft will be smaller than the final buffer. */
	while (call->iovNBytes && call->iovNext < call->iovMax && curp) {

	    t = MIN((int)call->curlen, call->iovNBytes);
	    t = MIN(t, (int)call->nLeft);
	    call_iov->iov_base = call->curpos;
	    call_iov->iov_len = t;
	    call_iov++;
	    call->iovNext++;
	    call->iovNBytes -= t;
	    call->curpos += t;
	    call->curlen -= t;
	    call->nLeft -= t;

	    if (!call->nLeft) {
		/* out of packet.  Get another one. */
#ifdef RX_TRACK_PACKETS
                curp->flags &= ~RX_PKTFLAG_CP;
                curp->flags |= RX_PKTFLAG_IOVQ;
#endif
		queue_Append(&call->iovq, curp);
#ifdef RXDEBUG_PACKET
                call->iovqc++;
#endif /* RXDEBUG_PACKET */
		curp = call->currentPacket = (struct rx_packet *)0;
	    } else if (!call->curlen) {
		/* need to get another struct iov */
		if (++call->curvec >= curp->niovecs) {
		    /* current packet is exhausted, get ready for another */
		    /* don't worry about curvec and stuff, they get set somewhere else */
#ifdef RX_TRACK_PACKETS
		    curp->flags &= ~RX_PKTFLAG_CP;
		    curp->flags |= RX_PKTFLAG_IOVQ;
#endif
		    queue_Append(&call->iovq, curp);
#ifdef RXDEBUG_PACKET
                    call->iovqc++;
#endif /* RXDEBUG_PACKET */
		    curp = call->currentPacket = (struct rx_packet *)0;
		    call->nLeft = 0;
		} else {
		    cur_iov++;
		    call->curpos = (char *)cur_iov->iov_base;
		    call->curlen = cur_iov->iov_len;
		}
	    }
	}
    }

    /* If we consumed any packets then check whether we need to
     * send a hard ack. */
    if (didConsume && (!(call->flags & RX_CALL_RECEIVE_DONE))) {
	if (call->nHardAcks > (u_short) rxi_HardAckRate) {
	    rxevent_Cancel(call->delayedAckEvent, call,
			   RX_CALL_REFCOUNT_DELAY);
	    rxi_SendAck(call, 0, serial, RX_ACK_DELAY, 0);
	    didHardAck = 1;
	} else {
	    struct clock when, now;
	    clock_GetTime(&now);
	    when = now;
	    /* Delay to consolidate ack packets */
	    clock_Add(&when, &rx_hardAckDelay);
	    if (!call->delayedAckEvent
		|| clock_Gt(&call->delayedAckEvent->eventTime, &when)) {
		rxevent_Cancel(call->delayedAckEvent, call,
			       RX_CALL_REFCOUNT_DELAY);
                MUTEX_ENTER(&rx_refcnt_mutex);
		CALL_HOLD(call, RX_CALL_REFCOUNT_DELAY);
                MUTEX_EXIT(&rx_refcnt_mutex);
		call->delayedAckEvent =
		    rxevent_PostNow(&when, &now, rxi_SendDelayedAck, call, 0);
	    }
	}
    }
    return didHardAck;
}
Beispiel #6
0
/* Flush any buffered data to the stream, switch to read mode
 * (clients) or to EOF mode (servers)
 *
 * LOCKS HELD: called at netpri.
 */
void
rxi_FlushWrite(struct rx_call *call)
{
    struct rx_packet *cp = NULL;

    /* Free any packets from the last call to ReadvProc/WritevProc */
    if (queue_IsNotEmpty(&call->iovq)) {
#ifdef RXDEBUG_PACKET
        call->iovqc -=
#endif /* RXDEBUG_PACKET */
            rxi_FreePackets(0, &call->iovq);
    }

    if (call->mode == RX_MODE_SENDING) {

	call->mode =
	    (call->conn->type ==
	     RX_CLIENT_CONNECTION ? RX_MODE_RECEIVING : RX_MODE_EOF);

#ifdef RX_KERNEL_TRACE
	{
	    int glockOwner = ISAFS_GLOCK();
	    if (!glockOwner)
		AFS_GLOCK();
	    afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
		       __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
		       call);
	    if (!glockOwner)
		AFS_GUNLOCK();
	}
#endif

        MUTEX_ENTER(&call->lock);
#ifdef AFS_GLOBAL_RXLOCK_KERNEL
        rxi_WaitforTQBusy(call);
#endif /* AFS_GLOBAL_RXLOCK_KERNEL */
        if (call->error)
            call->mode = RX_MODE_ERROR;

        cp = call->currentPacket;

	if (cp) {
	    /* cp->length is only supposed to be the user's data */
	    /* cp->length was already set to (then-current)
	     * MaxUserDataSize or less. */
#ifdef RX_TRACK_PACKETS
	    cp->flags &= ~RX_PKTFLAG_CP;
#endif
	    cp->length -= call->nFree;
	    call->currentPacket = (struct rx_packet *)0;
	    call->nFree = 0;
	} else {
	    cp = rxi_AllocSendPacket(call, 0);
	    if (!cp) {
		/* Mode can no longer be MODE_SENDING */
		return;
	    }
	    cp->length = 0;
	    cp->niovecs = 2;	/* header + space for rxkad stuff */
	    call->nFree = 0;
	}

	/* The 1 specifies that this is the last packet */
	hadd32(call->bytesSent, cp->length);
	rxi_PrepareSendPacket(call, cp, 1);
#ifdef RX_TRACK_PACKETS
	cp->flags |= RX_PKTFLAG_TQ;
#endif
	queue_Append(&call->tq, cp);
#ifdef RXDEBUG_PACKET
        call->tqc++;
#endif /* RXDEBUG_PACKET */
	if (!
	    (call->
	     flags & (RX_CALL_FAST_RECOVER | RX_CALL_FAST_RECOVER_WAIT))) {
	    rxi_Start(0, call, 0, 0);
	}
        MUTEX_EXIT(&call->lock);
    }
}
Beispiel #7
0
/* rxi_WritevProc -- internal version.
 *
 * Send buffers allocated in rxi_WritevAlloc.
 *
 * LOCKS USED -- called at netpri.
 */
int
rxi_WritevProc(struct rx_call *call, struct iovec *iov, int nio, int nbytes)
{
    struct rx_packet *cp = NULL;
#ifdef RX_TRACK_PACKETS
    struct rx_packet *p, *np;
#endif
    int nextio;
    int requestCount;
    struct rx_queue tmpq;
#ifdef RXDEBUG_PACKET
    u_short tmpqc;
#endif

    requestCount = nbytes;
    nextio = 0;

    MUTEX_ENTER(&call->lock);
    if (call->error) {
        call->mode = RX_MODE_ERROR;
    } else if (call->mode != RX_MODE_SENDING) {
	call->error = RX_PROTOCOL_ERROR;
    }
#ifdef AFS_GLOBAL_RXLOCK_KERNEL
    rxi_WaitforTQBusy(call);
#endif /* AFS_GLOBAL_RXLOCK_KERNEL */
    cp = call->currentPacket;

    if (call->error) {
        call->mode = RX_MODE_ERROR;
	MUTEX_EXIT(&call->lock);
	if (cp) {
#ifdef RX_TRACK_PACKETS
            cp->flags &= ~RX_PKTFLAG_CP;
            cp->flags |= RX_PKTFLAG_IOVQ;
#endif
	    queue_Prepend(&call->iovq, cp);
#ifdef RXDEBUG_PACKET
            call->iovqc++;
#endif /* RXDEBUG_PACKET */
	    call->currentPacket = (struct rx_packet *)0;
	}
#ifdef RXDEBUG_PACKET
        call->iovqc -=
#endif /* RXDEBUG_PACKET */
            rxi_FreePackets(0, &call->iovq);
	return 0;
    }

    /* Loop through the I/O vector adjusting packet pointers.
     * Place full packets back onto the iovq once they are ready
     * to send. Set RX_PROTOCOL_ERROR if any problems are found in
     * the iovec. We put the loop condition at the end to ensure that
     * a zero length write will push a short packet. */
    nextio = 0;
    queue_Init(&tmpq);
#ifdef RXDEBUG_PACKET
    tmpqc = 0;
#endif /* RXDEBUG_PACKET */
    do {
	if (call->nFree == 0 && cp) {
	    clock_NewTime();	/* Bogus:  need new time package */
	    /* The 0, below, specifies that it is not the last packet:
	     * there will be others. PrepareSendPacket may
	     * alter the packet length by up to
	     * conn->securityMaxTrailerSize */
	    hadd32(call->bytesSent, cp->length);
	    rxi_PrepareSendPacket(call, cp, 0);
	    queue_Append(&tmpq, cp);
#ifdef RXDEBUG_PACKET
            tmpqc++;
#endif /* RXDEBUG_PACKET */
            cp = call->currentPacket = (struct rx_packet *)0;

	    /* The head of the iovq is now the current packet */
	    if (nbytes) {
		if (queue_IsEmpty(&call->iovq)) {
                    MUTEX_EXIT(&call->lock);
		    call->error = RX_PROTOCOL_ERROR;
#ifdef RXDEBUG_PACKET
                    tmpqc -=
#endif /* RXDEBUG_PACKET */
                        rxi_FreePackets(0, &tmpq);
		    return 0;
		}
		cp = queue_First(&call->iovq, rx_packet);
		queue_Remove(cp);
#ifdef RX_TRACK_PACKETS
                cp->flags &= ~RX_PKTFLAG_IOVQ;
#endif
#ifdef RXDEBUG_PACKET
                call->iovqc--;
#endif /* RXDEBUG_PACKET */
#ifdef RX_TRACK_PACKETS
                cp->flags |= RX_PKTFLAG_CP;
#endif
		call->currentPacket = cp;
		call->nFree = cp->length;
		call->curvec = 1;
		call->curpos =
		    (char *)cp->wirevec[1].iov_base +
		    call->conn->securityHeaderSize;
		call->curlen =
		    cp->wirevec[1].iov_len - call->conn->securityHeaderSize;
	    }
	}

	if (nbytes) {
	    /* The next iovec should point to the current position */
	    if (iov[nextio].iov_base != call->curpos
		|| iov[nextio].iov_len > (int)call->curlen) {
		call->error = RX_PROTOCOL_ERROR;
                MUTEX_EXIT(&call->lock);
		if (cp) {
#ifdef RX_TRACK_PACKETS
		    cp->flags &= ~RX_PKTFLAG_CP;
#endif
                    queue_Prepend(&tmpq, cp);
#ifdef RXDEBUG_PACKET
                    tmpqc++;
#endif /* RXDEBUG_PACKET */
                    cp = call->currentPacket = (struct rx_packet *)0;
		}
#ifdef RXDEBUG_PACKET
                tmpqc -=
#endif /* RXDEBUG_PACKET */
                    rxi_FreePackets(0, &tmpq);
		return 0;
	    }
	    nbytes -= iov[nextio].iov_len;
	    call->curpos += iov[nextio].iov_len;
	    call->curlen -= iov[nextio].iov_len;
	    call->nFree -= iov[nextio].iov_len;
	    nextio++;
	    if (call->curlen == 0) {
		if (++call->curvec > cp->niovecs) {
		    call->nFree = 0;
		} else {
		    call->curpos = (char *)cp->wirevec[call->curvec].iov_base;
		    call->curlen = cp->wirevec[call->curvec].iov_len;
		}
	    }
	}
    } while (nbytes && nextio < nio);

    /* Move the packets from the temporary queue onto the transmit queue.
     * We may end up with more than call->twind packets on the queue. */

#ifdef RX_TRACK_PACKETS
    for (queue_Scan(&tmpq, p, np, rx_packet))
    {
        p->flags |= RX_PKTFLAG_TQ;
    }
#endif

    if (call->error)
        call->mode = RX_MODE_ERROR;

    queue_SpliceAppend(&call->tq, &tmpq);

    if (!(call->flags & (RX_CALL_FAST_RECOVER | RX_CALL_FAST_RECOVER_WAIT))) {
	rxi_Start(0, call, 0, 0);
    }

    /* Wait for the length of the transmit queue to fall below call->twind */
    while (!call->error && call->tnext + 1 > call->tfirst + (2 * call->twind)) {
	clock_NewTime();
	call->startWait = clock_Sec();
#ifdef	RX_ENABLE_LOCKS
	CV_WAIT(&call->cv_twind, &call->lock);
#else
	call->flags |= RX_CALL_WAIT_WINDOW_ALLOC;
	osi_rxSleep(&call->twind);
#endif
	call->startWait = 0;
    }

    /* cp is no longer valid since we may have given up the lock */
    cp = call->currentPacket;

    if (call->error) {
        call->mode = RX_MODE_ERROR;
        call->currentPacket = NULL;
        MUTEX_EXIT(&call->lock);
	if (cp) {
#ifdef RX_TRACK_PACKETS
	    cp->flags &= ~RX_PKTFLAG_CP;
#endif
	    rxi_FreePacket(cp);
	}
	return 0;
    }
    MUTEX_EXIT(&call->lock);

    return requestCount - nbytes;
}