int afs_StoreAllSegments(struct vcache *avc, struct vrequest *areq, int sync) { struct dcache *tdc; afs_int32 code = 0; afs_int32 index; afs_int32 origCBs, foreign = 0; int hash; afs_hyper_t newDV, oldDV; /* DV when we start, and finish, respectively */ struct dcache **dcList; unsigned int i, j, minj, moredata, high, off; afs_size_t maxStoredLength; /* highest offset we've written to server. */ int safety, marineronce = 0; AFS_STATCNT(afs_StoreAllSegments); hset(oldDV, avc->f.m.DataVersion); hset(newDV, avc->f.m.DataVersion); hash = DVHash(&avc->f.fid); foreign = (avc->f.states & CForeign); dcList = osi_AllocLargeSpace(AFS_LRALLOCSIZ); afs_Trace2(afs_iclSetp, CM_TRACE_STOREALL, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); #if !defined(AFS_AIX32_ENV) && !defined(AFS_SGI65_ENV) /* In the aix vm implementation we need to do the vm_writep even * on the memcache case since that's we adjust the file's size * and finish flushing partial vm pages. */ if ((cacheDiskType != AFS_FCACHE_TYPE_MEM) || (sync & AFS_VMSYNC_INVAL) || (sync & AFS_VMSYNC) || (sync & AFS_LASTSTORE)) #endif /* !AFS_AIX32_ENV && !AFS_SGI65_ENV */ { /* If we're not diskless, reading a file may stress the VM * system enough to cause a pageout, and this vnode would be * locked when the pageout occurs. We can prevent this problem * by making sure all dirty pages are already flushed. We don't * do this when diskless because reading a diskless (i.e. * memory-resident) chunk doesn't require using new VM, and we * also don't want to dump more dirty data into a diskless cache, * since they're smaller, and we might exceed its available * space. */ #if defined(AFS_SUN5_ENV) if (sync & AFS_VMSYNC_INVAL) /* invalidate VM pages */ osi_VM_TryToSmush(avc, CRED(), 1); else #endif osi_VM_StoreAllSegments(avc); } if (AFS_IS_DISCONNECTED && !AFS_IN_SYNC) { /* This will probably make someone sad ... */ /*printf("Net down in afs_StoreSegments\n");*/ return ENETDOWN; } ConvertWToSLock(&avc->lock); /* * Subsequent code expects a sorted list, and it expects all the * chunks in the list to be contiguous, so we need a sort and a * while loop in here, too - but this will work for a first pass... * 92.10.05 - OK, there's a sort in here now. It's kind of a modified * bin sort, I guess. Chunk numbers start with 0 * * - Have to get a write lock on xdcache because GetDSlot might need it (if * the chunk doesn't have a dcache struct). * This seems like overkill in most cases. * - I'm not sure that it's safe to do "index = .hvNextp", then unlock * xdcache, then relock xdcache and try to use index. It is done * a lot elsewhere in the CM, but I'm not buying that argument. * - should be able to check IFDataMod without doing the GetDSlot (just * hold afs_xdcache). That way, it's easy to do this without the * writelock on afs_xdcache, and we save unneccessary disk * operations. I don't think that works, 'cuz the next pointers * are still on disk. */ origCBs = afs_allCBs; maxStoredLength = 0; minj = 0; do { memset(dcList, 0, NCHUNKSATONCE * sizeof(struct dcache *)); high = 0; moredata = FALSE; /* lock and start over from beginning of hash chain * in order to avoid a race condition. */ ObtainWriteLock(&afs_xdcache, 284); index = afs_dvhashTbl[hash]; for (j = 0; index != NULLIDX;) { if ((afs_indexFlags[index] & IFDataMod) && (afs_indexUnique[index] == avc->f.fid.Fid.Unique)) { tdc = afs_GetValidDSlot(index); /* refcount+1. */ if (!tdc) { ReleaseWriteLock(&afs_xdcache); code = EIO; goto done; } ReleaseReadLock(&tdc->tlock); if (!FidCmp(&tdc->f.fid, &avc->f.fid) && tdc->f.chunk >= minj) { off = tdc->f.chunk - minj; if (off < NCHUNKSATONCE) { if (dcList[off]) osi_Panic("dclist slot already in use!"); if (afs_mariner && !marineronce) { /* first chunk only */ afs_MarinerLog("store$Storing", avc); marineronce++; } dcList[off] = tdc; if (off > high) high = off; j++; /* DCLOCKXXX: chunkBytes is protected by tdc->lock which we * can't grab here, due to lock ordering with afs_xdcache. * So, disable this shortcut for now. -- kolya 2001-10-13 */ /* shortcut: big win for little files */ /* tlen -= tdc->f.chunkBytes; * if (tlen <= 0) * break; */ } else { moredata = TRUE; afs_PutDCache(tdc); if (j == NCHUNKSATONCE) break; } } else { afs_PutDCache(tdc); } } index = afs_dvnextTbl[index]; } ReleaseWriteLock(&afs_xdcache); /* this guy writes chunks, puts back dcache structs, and bumps newDV */ /* "moredata" just says "there are more dirty chunks yet to come". */ if (j) { code = afs_CacheStoreVCache(dcList, avc, areq, sync, minj, high, moredata, &newDV, &maxStoredLength); /* Release any zero-length dcache entries in our interval * that we locked but didn't store back above. */ for (j = 0; j <= high; j++) { tdc = dcList[j]; if (tdc) { osi_Assert(tdc->f.chunkBytes == 0); ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } } } /* if (j) */ minj += NCHUNKSATONCE; } while (!code && moredata); done: UpgradeSToWLock(&avc->lock, 29); /* send a trivial truncation store if did nothing else */ if (code == 0) { /* * Call StoreMini if we haven't written enough data to extend the * file at the fileserver to the client's notion of the file length. */ if ((avc->f.truncPos != AFS_NOTRUNC) || ((avc->f.states & CExtendedFile) && (maxStoredLength < avc->f.m.Length))) { code = afs_StoreMini(avc, areq); if (code == 0) hadd32(newDV, 1); /* just bumped here, too */ } avc->f.states &= ~CExtendedFile; } /* * Finally, turn off DWriting, turn on DFEntryMod, * update f.versionNo. * A lot of this could be integrated into the loop above */ if (!code) { afs_hyper_t h_unset; hones(h_unset); minj = 0; do { moredata = FALSE; memset(dcList, 0, NCHUNKSATONCE * sizeof(struct dcache *)); /* overkill, but it gets the lock in case GetDSlot needs it */ ObtainWriteLock(&afs_xdcache, 285); for (j = 0, safety = 0, index = afs_dvhashTbl[hash]; index != NULLIDX && safety < afs_cacheFiles + 2; index = afs_dvnextTbl[index]) { if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) { tdc = afs_GetValidDSlot(index); if (!tdc) { /* This is okay; since manipulating the dcaches at this * point is best-effort. We only get a dcache here to * increment the dv and turn off DWriting. If we were * supposed to do that for a dcache, but could not * due to an I/O error, it just means the dv won't * be updated so we don't be able to use that cached * chunk in the future. That's inefficient, but not * an error. */ continue; } ReleaseReadLock(&tdc->tlock); if (!FidCmp(&tdc->f.fid, &avc->f.fid) && tdc->f.chunk >= minj) { off = tdc->f.chunk - minj; if (off < NCHUNKSATONCE) { /* this is the file, and the correct chunk range */ if (j >= NCHUNKSATONCE) osi_Panic ("Too many dcache entries in range\n"); dcList[j++] = tdc; } else { moredata = TRUE; afs_PutDCache(tdc); if (j == NCHUNKSATONCE) break; } } else { afs_PutDCache(tdc); } } } ReleaseWriteLock(&afs_xdcache); for (i = 0; i < j; i++) { /* Iterate over the dcache entries we collected above */ tdc = dcList[i]; ObtainSharedLock(&tdc->lock, 677); /* was code here to clear IFDataMod, but it should only be done * in storedcache and storealldcache. */ /* Only increase DV if we had up-to-date data to start with. * Otherwise, we could be falsely upgrading an old chunk * (that we never read) into one labelled with the current * DV #. Also note that we check that no intervening stores * occurred, otherwise we might mislabel cache information * for a chunk that we didn't store this time */ /* Don't update the version number if it's not yet set. */ if (!hsame(tdc->f.versionNo, h_unset) && hcmp(tdc->f.versionNo, oldDV) >= 0) { if ((!(afs_dvhack || foreign) && hsame(avc->f.m.DataVersion, newDV)) || ((afs_dvhack || foreign) && (origCBs == afs_allCBs))) { /* no error, this is the DV */ UpgradeSToWLock(&tdc->lock, 678); hset(tdc->f.versionNo, avc->f.m.DataVersion); tdc->dflags |= DFEntryMod; /* DWriting may not have gotten cleared above, if all * we did was a StoreMini */ tdc->f.states &= ~DWriting; ConvertWToSLock(&tdc->lock); } } ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } minj += NCHUNKSATONCE; } while (moredata); } if (code) { /* * Invalidate chunks after an error for ccores files since * afs_inactive won't be called for these and they won't be * invalidated. Also discard data if it's a permanent error from the * fileserver. */ if (areq->permWriteError || (avc->f.states & CCore)) { afs_InvalidateAllSegments(avc); } } afs_Trace3(afs_iclSetp, CM_TRACE_STOREALLDONE, ICL_TYPE_POINTER, avc, ICL_TYPE_INT32, avc->f.m.Length, ICL_TYPE_INT32, code); /* would like a Trace5, but it doesn't exist... */ afs_Trace3(afs_iclSetp, CM_TRACE_AVCLOCKER, ICL_TYPE_POINTER, avc, ICL_TYPE_INT32, avc->lock.wait_states, ICL_TYPE_INT32, avc->lock.excl_locked); afs_Trace4(afs_iclSetp, CM_TRACE_AVCLOCKEE, ICL_TYPE_POINTER, avc, ICL_TYPE_INT32, avc->lock.wait_states, ICL_TYPE_INT32, avc->lock.readers_reading, ICL_TYPE_INT32, avc->lock.num_waiting); /* * Finally, if updated DataVersion matches newDV, we did all of the * stores. If mapDV indicates that the page cache was flushed up * to when we started the store, then we can relabel them as flushed * as recently as newDV. * Turn off CDirty bit because the stored data is now in sync with server. */ if (code == 0 && hcmp(avc->mapDV, oldDV) >= 0) { if ((!(afs_dvhack || foreign) && hsame(avc->f.m.DataVersion, newDV)) || ((afs_dvhack || foreign) && (origCBs == afs_allCBs))) { hset(avc->mapDV, newDV); avc->f.states &= ~CDirty; } } osi_FreeLargeSpace(dcList); /* If not the final write a temporary error is ok. */ if (code && !areq->permWriteError && !(sync & AFS_LASTSTORE)) code = 0; return code; } /*afs_StoreAllSegments (new 03/02/94) */
/* question: does afs_create need to set CDirty in the adp or the avc? * I think we can get away without it, but I'm not sure. Note that * afs_setattr is called in here for truncation. */ #ifdef AFS_SGI64_ENV int afs_create(OSI_VC_DECL(adp), char *aname, struct vattr *attrs, int flags, int amode, struct vcache **avcp, afs_ucred_t *acred) #else /* AFS_SGI64_ENV */ int afs_create(OSI_VC_DECL(adp), char *aname, struct vattr *attrs, enum vcexcl aexcl, int amode, struct vcache **avcp, afs_ucred_t *acred) #endif /* AFS_SGI64_ENV */ { afs_int32 origCBs, origZaps, finalZaps; struct vrequest *treq = NULL; afs_int32 code; struct afs_conn *tc; struct VenusFid newFid; struct AFSStoreStatus InStatus; struct AFSFetchStatus *OutFidStatus, *OutDirStatus; struct AFSVolSync tsync; struct AFSCallBack CallBack; afs_int32 now; struct dcache *tdc; afs_size_t offset, len; struct server *hostp = 0; struct vcache *tvc; struct volume *volp = 0; struct afs_fakestat_state fakestate; struct rx_connection *rxconn; XSTATS_DECLS; OSI_VC_CONVERT(adp); AFS_STATCNT(afs_create); OutFidStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus)); OutDirStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus)); memset(&InStatus, 0, sizeof(InStatus)); if ((code = afs_CreateReq(&treq, acred))) goto done2; afs_Trace3(afs_iclSetp, CM_TRACE_CREATE, ICL_TYPE_POINTER, adp, ICL_TYPE_STRING, aname, ICL_TYPE_INT32, amode); afs_InitFakeStat(&fakestate); #ifdef AFS_SGI65_ENV /* If avcp is passed not null, it's the old reference to this file. * We can use this to avoid create races. For now, just decrement * the reference count on it. */ if (*avcp) { AFS_RELE(AFSTOV(*avcp)); *avcp = NULL; } #endif if (strlen(aname) > AFSNAMEMAX) { code = ENAMETOOLONG; goto done3; } if (!afs_ENameOK(aname)) { code = EINVAL; goto done3; } switch (attrs->va_type) { case VBLK: case VCHR: #if !defined(AFS_SUN5_ENV) case VSOCK: #endif case VFIFO: /* We don't support special devices or FIFOs */ code = EINVAL; goto done3; default: ; } AFS_DISCON_LOCK(); code = afs_EvalFakeStat(&adp, &fakestate, treq); if (code) goto done; tagain: code = afs_VerifyVCache(adp, treq); if (code) goto done; /** If the volume is read-only, return error without making an RPC to the * fileserver */ if (adp->f.states & CRO) { code = EROFS; goto done; } if (AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW) { code = ENETDOWN; goto done; } tdc = afs_GetDCache(adp, (afs_size_t) 0, treq, &offset, &len, 1); ObtainWriteLock(&adp->lock, 135); if (tdc) ObtainSharedLock(&tdc->lock, 630); /* * Make sure that the data in the cache is current. We may have * received a callback while we were waiting for the write lock. */ if (!(adp->f.states & CStatd) || (tdc && !hsame(adp->f.m.DataVersion, tdc->f.versionNo))) { ReleaseWriteLock(&adp->lock); if (tdc) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } goto tagain; } if (tdc) { /* see if file already exists. If it does, we only set * the size attributes (to handle O_TRUNC) */ code = afs_dir_Lookup(tdc, aname, &newFid.Fid); /* use dnlc first xxx */ if (code == 0) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); ReleaseWriteLock(&adp->lock); #ifdef AFS_SGI64_ENV if (flags & VEXCL) { #else if (aexcl != NONEXCL) { #endif code = EEXIST; /* file exists in excl mode open */ goto done; } /* found the file, so use it */ newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; tvc = NULL; if (newFid.Fid.Unique == 0) { tvc = afs_LookupVCache(&newFid, treq, NULL, adp, aname); } if (!tvc) /* lookup failed or wasn't called */ tvc = afs_GetVCache(&newFid, treq, NULL, NULL); if (tvc) { /* if the thing exists, we need the right access to open it. * we must check that here, since no other checks are * made by the open system call */ len = attrs->va_size; /* only do the truncate */ /* * We used to check always for READ access before; the * problem is that we will fail if the existing file * has mode -w-w-w, which is wrong. */ if ((amode & VREAD) && !afs_AccessOK(tvc, PRSFS_READ, treq, CHECK_MODE_BITS)) { afs_PutVCache(tvc); code = EACCES; goto done; } #if defined(AFS_DARWIN80_ENV) if ((amode & VWRITE) || VATTR_IS_ACTIVE(attrs, va_data_size)) #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) if ((amode & VWRITE) || (attrs->va_mask & AT_SIZE)) #else if ((amode & VWRITE) || len != 0xffffffff) #endif { /* needed for write access check */ tvc->f.parent.vnode = adp->f.fid.Fid.Vnode; tvc->f.parent.unique = adp->f.fid.Fid.Unique; /* need write mode for these guys */ if (!afs_AccessOK (tvc, PRSFS_WRITE, treq, CHECK_MODE_BITS)) { afs_PutVCache(tvc); code = EACCES; goto done; } } #if defined(AFS_DARWIN80_ENV) if (VATTR_IS_ACTIVE(attrs, va_data_size)) #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) if (attrs->va_mask & AT_SIZE) #else if (len != 0xffffffff) #endif { if (vType(tvc) != VREG) { afs_PutVCache(tvc); code = EISDIR; goto done; } /* do a truncate */ #if defined(AFS_DARWIN80_ENV) VATTR_INIT(attrs); VATTR_SET_SUPPORTED(attrs, va_data_size); VATTR_SET_ACTIVE(attrs, va_data_size); #elif defined(UKERNEL) attrs->va_mask = ATTR_SIZE; #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) attrs->va_mask = AT_SIZE; #else VATTR_NULL(attrs); #endif attrs->va_size = len; ObtainWriteLock(&tvc->lock, 136); tvc->f.states |= CCreating; ReleaseWriteLock(&tvc->lock); #if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) #if defined(AFS_SGI64_ENV) code = afs_setattr(VNODE_TO_FIRST_BHV((vnode_t *) tvc), attrs, 0, acred); #else code = afs_setattr(tvc, attrs, 0, acred); #endif /* AFS_SGI64_ENV */ #else /* SUN5 || SGI */ code = afs_setattr(tvc, attrs, acred); #endif /* SUN5 || SGI */ ObtainWriteLock(&tvc->lock, 137); tvc->f.states &= ~CCreating; ReleaseWriteLock(&tvc->lock); if (code) { afs_PutVCache(tvc); goto done; } } *avcp = tvc; } else code = ENOENT; /* shouldn't get here */ /* make sure vrefCount bumped only if code == 0 */ goto done; } } /* if we create the file, we don't do any access checks, since * that's how O_CREAT is supposed to work */ if (adp->f.states & CForeign) { origCBs = afs_allCBs; origZaps = afs_allZaps; } else { origCBs = afs_evenCBs; /* if changes, we don't really have a callback */ origZaps = afs_evenZaps; /* number of even numbered vnodes discarded */ } InStatus.Mask = AFS_SETMODTIME | AFS_SETMODE | AFS_SETGROUP; InStatus.ClientModTime = osi_Time(); InStatus.Group = (afs_int32) afs_cr_gid(acred); if (AFS_NFSXLATORREQ(acred)) { /* * XXX The following is mainly used to fix a bug in the HP-UX * nfs client where they create files with mode of 0 without * doing any setattr later on to fix it. * XXX */ #if defined(AFS_AIX_ENV) if (attrs->va_mode != -1) { #else #if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) if (attrs->va_mask & AT_MODE) { #else if (attrs->va_mode != ((unsigned short)-1)) { #endif #endif if (!attrs->va_mode) attrs->va_mode = 0x1b6; /* XXX default mode: rw-rw-rw XXX */ } } if (!AFS_IS_DISCONNECTED) { /* If not disconnected, connect to the server.*/ InStatus.UnixModeBits = attrs->va_mode & 0xffff; /* only care about protection bits */ do { tc = afs_Conn(&adp->f.fid, treq, SHARED_LOCK, &rxconn); if (tc) { hostp = tc->srvr->server; /* remember for callback processing */ now = osi_Time(); XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_CREATEFILE); RX_AFS_GUNLOCK(); code = RXAFS_CreateFile(rxconn, (struct AFSFid *)&adp->f.fid.Fid, aname, &InStatus, (struct AFSFid *) &newFid.Fid, OutFidStatus, OutDirStatus, &CallBack, &tsync); RX_AFS_GLOCK(); XSTATS_END_TIME; CallBack.ExpirationTime += now; } else code = -1; } while (afs_Analyze (tc, rxconn, code, &adp->f.fid, treq, AFS_STATS_FS_RPCIDX_CREATEFILE, SHARED_LOCK, NULL)); if ((code == EEXIST || code == UAEEXIST) && #ifdef AFS_SGI64_ENV !(flags & VEXCL) #else /* AFS_SGI64_ENV */ aexcl == NONEXCL #endif ) { /* if we get an EEXIST in nonexcl mode, just do a lookup */ if (tdc) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } ReleaseWriteLock(&adp->lock); #if defined(AFS_SGI64_ENV) code = afs_lookup(VNODE_TO_FIRST_BHV((vnode_t *) adp), aname, avcp, NULL, 0, NULL, acred); #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) code = afs_lookup(adp, aname, avcp, NULL, 0, NULL, acred); #elif defined(UKERNEL) code = afs_lookup(adp, aname, avcp, acred, 0); #elif !defined(AFS_DARWIN_ENV) code = afs_lookup(adp, aname, avcp, acred); #endif goto done; } if (code) { if (code < 0) { ObtainWriteLock(&afs_xcbhash, 488); afs_DequeueCallback(adp); adp->f.states &= ~CStatd; ReleaseWriteLock(&afs_xcbhash); osi_dnlc_purgedp(adp); } ReleaseWriteLock(&adp->lock); if (tdc) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } goto done; } } else { /* Generate a fake FID for disconnected mode. */ newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; afs_GenFakeFid(&newFid, VREG, 1); } /* if (!AFS_IS_DISCON_RW) */ /* otherwise, we should see if we can make the change to the dir locally */ if (tdc) UpgradeSToWLock(&tdc->lock, 631); if (AFS_IS_DISCON_RW || afs_LocalHero(adp, tdc, OutDirStatus, 1)) { /* we can do it locally */ ObtainWriteLock(&afs_xdcache, 291); code = afs_dir_Create(tdc, aname, &newFid.Fid); ReleaseWriteLock(&afs_xdcache); if (code) { ZapDCE(tdc); DZap(tdc); } } if (tdc) { ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); } if (AFS_IS_DISCON_RW) adp->f.m.LinkCount++; newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; ReleaseWriteLock(&adp->lock); volp = afs_FindVolume(&newFid, READ_LOCK); /* New tricky optimistic callback handling algorithm for file creation works * as follows. We create the file essentially with no locks set at all. File * server may thus handle operations from others cache managers as well as from * this very own cache manager that reference the file in question before * we managed to create the cache entry. However, if anyone else changes * any of the status information for a file, we'll see afs_evenCBs increase * (files always have even fids). If someone on this workstation manages * to do something to the file, they'll end up having to create a cache * entry for the new file. Either we'll find it once we've got the afs_xvcache * lock set, or it was also *deleted* the vnode before we got there, in which case * we will find evenZaps has changed, too. Thus, we only assume we have the right * status information if no callbacks or vnode removals have occurred to even * numbered files from the time the call started until the time that we got the xvcache * lock set. Of course, this also assumes that any call that modifies a file first * gets a write lock on the file's vnode, but if that weren't true, the whole cache manager * would fail, since no call would be able to update the local vnode status after modifying * a file on a file server. */ ObtainWriteLock(&afs_xvcache, 138); if (adp->f.states & CForeign) finalZaps = afs_allZaps; /* do this before calling newvcache */ else finalZaps = afs_evenZaps; /* do this before calling newvcache */ /* don't need to call RemoveVCB, since only path leaving a callback is the * one where we pass through afs_NewVCache. Can't have queued a VCB unless * we created and freed an entry between file creation time and here, and the * freeing of the vnode will change evenZaps. Don't need to update the VLRU * queue, since the find will only succeed in the event of a create race, and * then the vcache will be at the front of the VLRU queue anyway... */ if (!(tvc = afs_FindVCache(&newFid, 0, DO_STATS))) { tvc = afs_NewVCache(&newFid, hostp); if (tvc) { int finalCBs; ObtainWriteLock(&tvc->lock, 139); ObtainWriteLock(&afs_xcbhash, 489); finalCBs = afs_evenCBs; /* add the callback in */ if (adp->f.states & CForeign) { tvc->f.states |= CForeign; finalCBs = afs_allCBs; } if (origCBs == finalCBs && origZaps == finalZaps) { tvc->f.states |= CStatd; /* we've fake entire thing, so don't stat */ tvc->f.states &= ~CBulkFetching; if (!AFS_IS_DISCON_RW) { tvc->cbExpires = CallBack.ExpirationTime; afs_QueueCallback(tvc, CBHash(CallBack.ExpirationTime), volp); } } else { afs_DequeueCallback(tvc); tvc->f.states &= ~(CStatd | CUnique); tvc->callback = 0; if (tvc->f.fid.Fid.Vnode & 1 || (vType(tvc) == VDIR)) osi_dnlc_purgedp(tvc); } ReleaseWriteLock(&afs_xcbhash); if (AFS_IS_DISCON_RW) { afs_DisconAddDirty(tvc, VDisconCreate, 0); afs_GenDisconStatus(adp, tvc, &newFid, attrs, treq, VREG); } else { afs_ProcessFS(tvc, OutFidStatus, treq); } tvc->f.parent.vnode = adp->f.fid.Fid.Vnode; tvc->f.parent.unique = adp->f.fid.Fid.Unique; #if !defined(UKERNEL) if (volp && (volp->states & VPartVisible)) tvc->f.states |= CPartVisible; #endif ReleaseWriteLock(&tvc->lock); *avcp = tvc; code = 0; } else code = ENOENT; } else { /* otherwise cache entry already exists, someone else must * have created it. Comments used to say: "don't need write * lock to *clear* these flags" but we should do it anyway. * Code used to clear stat bit and callback, but I don't see * the point -- we didn't have a create race, somebody else just * snuck into NewVCache before we got here, probably a racing * lookup. */ *avcp = tvc; code = 0; } ReleaseWriteLock(&afs_xvcache); done: AFS_DISCON_UNLOCK(); done3: if (volp) afs_PutVolume(volp, READ_LOCK); if (code == 0) { if (afs_mariner) afs_AddMarinerName(aname, *avcp); /* return the new status in vattr */ afs_CopyOutAttrs(*avcp, attrs); if (afs_mariner) afs_MarinerLog("store$Creating", *avcp); } afs_PutFakeStat(&fakestate); code = afs_CheckCode(code, treq, 20); afs_DestroyReq(treq); done2: osi_FreeSmallSpace(OutFidStatus); osi_FreeSmallSpace(OutDirStatus); return code; } /* * Check to see if we can track the change locally: requires that * we have sufficiently recent info in data cache. If so, we * know the new DataVersion number, and place it correctly in both the * data and stat cache entries. This routine returns 1 if we should * do the operation locally, and 0 otherwise. * * This routine must be called with the stat cache entry write-locked, * and dcache entry write-locked. */ int afs_LocalHero(struct vcache *avc, struct dcache *adc, AFSFetchStatus * astat, int aincr) { afs_int32 ok; afs_hyper_t avers; AFS_STATCNT(afs_LocalHero); hset64(avers, astat->dataVersionHigh, astat->DataVersion); /* avers *is* the version number now, no matter what */ if (adc) { /* does what's in the dcache *now* match what's in the vcache *now*, * and do we have a valid callback? if not, our local copy is not "ok" */ ok = (hsame(avc->f.m.DataVersion, adc->f.versionNo) && avc->callback && (avc->f.states & CStatd) && avc->cbExpires >= osi_Time()); } else { ok = 0; } if (ok) { /* check that the DV on the server is what we expect it to be */ afs_hyper_t newDV; hset(newDV, adc->f.versionNo); hadd32(newDV, aincr); if (!hsame(avers, newDV)) { ok = 0; } } #if defined(AFS_SGI_ENV) osi_Assert(avc->v.v_type == VDIR); #endif /* The bulk status code used the length as a sequence number. */ /* Don't update the vcache entry unless the stats are current. */ if (avc->f.states & CStatd) { hset(avc->f.m.DataVersion, avers); #ifdef AFS_64BIT_CLIENT FillInt64(avc->f.m.Length, astat->Length_hi, astat->Length); #else /* AFS_64BIT_CLIENT */ avc->f.m.Length = astat->Length; #endif /* AFS_64BIT_CLIENT */ avc->f.m.Date = astat->ClientModTime; } if (ok) { /* we've been tracking things correctly */ adc->dflags |= DFEntryMod; adc->f.versionNo = avers; return 1; } else { if (adc) { ZapDCE(adc); DZap(adc); } if (avc->f.states & CStatd) { osi_dnlc_purgedp(avc); } return 0; } }
/* rxi_ReadProc -- internal version. * * LOCKS USED -- called at netpri */ int rxi_ReadProc(struct rx_call *call, char *buf, int nbytes) { struct rx_packet *cp = call->currentPacket; struct rx_packet *rp; int requestCount; unsigned int t; /* XXXX took out clock_NewTime from here. Was it needed? */ requestCount = nbytes; /* Free any packets from the last call to ReadvProc/WritevProc */ if (queue_IsNotEmpty(&call->iovq)) { #ifdef RXDEBUG_PACKET call->iovqc -= #endif /* RXDEBUG_PACKET */ rxi_FreePackets(0, &call->iovq); } do { if (call->nLeft == 0) { /* Get next packet */ MUTEX_ENTER(&call->lock); for (;;) { if (call->error || (call->mode != RX_MODE_RECEIVING)) { if (call->error) { call->mode = RX_MODE_ERROR; MUTEX_EXIT(&call->lock); return 0; } if (call->mode == RX_MODE_SENDING) { MUTEX_EXIT(&call->lock); rxi_FlushWrite(call); MUTEX_ENTER(&call->lock); continue; } } if (queue_IsNotEmpty(&call->rq)) { /* Check that next packet available is next in sequence */ rp = queue_First(&call->rq, rx_packet); if (rp->header.seq == call->rnext) { afs_int32 error; struct rx_connection *conn = call->conn; queue_Remove(rp); #ifdef RX_TRACK_PACKETS rp->flags &= ~RX_PKTFLAG_RQ; #endif #ifdef RXDEBUG_PACKET call->rqc--; #endif /* RXDEBUG_PACKET */ /* RXS_CheckPacket called to undo RXS_PreparePacket's * work. It may reduce the length of the packet by up * to conn->maxTrailerSize, to reflect the length of the * data + the header. */ if ((error = RXS_CheckPacket(conn->securityObject, call, rp))) { /* Used to merely shut down the call, but now we * shut down the whole connection since this may * indicate an attempt to hijack it */ MUTEX_EXIT(&call->lock); rxi_ConnectionError(conn, error); MUTEX_ENTER(&conn->conn_data_lock); rp = rxi_SendConnectionAbort(conn, rp, 0, 0); MUTEX_EXIT(&conn->conn_data_lock); rxi_FreePacket(rp); return 0; } call->rnext++; cp = call->currentPacket = rp; #ifdef RX_TRACK_PACKETS call->currentPacket->flags |= RX_PKTFLAG_CP; #endif call->curvec = 1; /* 0th vec is always header */ /* begin at the beginning [ more or less ], continue * on until the end, then stop. */ call->curpos = (char *)cp->wirevec[1].iov_base + call->conn->securityHeaderSize; call->curlen = cp->wirevec[1].iov_len - call->conn->securityHeaderSize; /* Notice that this code works correctly if the data * size is 0 (which it may be--no reply arguments from * server, for example). This relies heavily on the * fact that the code below immediately frees the packet * (no yields, etc.). If it didn't, this would be a * problem because a value of zero for call->nLeft * normally means that there is no read packet */ call->nLeft = cp->length; hadd32(call->bytesRcvd, cp->length); /* Send a hard ack for every rxi_HardAckRate+1 packets * consumed. Otherwise schedule an event to send * the hard ack later on. */ call->nHardAcks++; if (!(call->flags & RX_CALL_RECEIVE_DONE)) { if (call->nHardAcks > (u_short) rxi_HardAckRate) { rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY); rxi_SendAck(call, 0, 0, RX_ACK_DELAY, 0); } else { struct clock when, now; clock_GetTime(&now); when = now; /* Delay to consolidate ack packets */ clock_Add(&when, &rx_hardAckDelay); if (!call->delayedAckEvent || clock_Gt(&call->delayedAckEvent-> eventTime, &when)) { rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY); MUTEX_ENTER(&rx_refcnt_mutex); CALL_HOLD(call, RX_CALL_REFCOUNT_DELAY); MUTEX_EXIT(&rx_refcnt_mutex); call->delayedAckEvent = rxevent_PostNow(&when, &now, rxi_SendDelayedAck, call, 0); } } } break; } } /* * If we reach this point either we have no packets in the * receive queue or the next packet in the queue is not the * one we are looking for. There is nothing else for us to * do but wait for another packet to arrive. */ /* Are there ever going to be any more packets? */ if (call->flags & RX_CALL_RECEIVE_DONE) { MUTEX_EXIT(&call->lock); return requestCount - nbytes; } /* Wait for in-sequence packet */ call->flags |= RX_CALL_READER_WAIT; clock_NewTime(); call->startWait = clock_Sec(); while (call->flags & RX_CALL_READER_WAIT) { #ifdef RX_ENABLE_LOCKS CV_WAIT(&call->cv_rq, &call->lock); #else osi_rxSleep(&call->rq); #endif } cp = call->currentPacket; call->startWait = 0; #ifdef RX_ENABLE_LOCKS if (call->error) { MUTEX_EXIT(&call->lock); return 0; } #endif /* RX_ENABLE_LOCKS */ } MUTEX_EXIT(&call->lock); } else /* osi_Assert(cp); */ /* MTUXXX this should be replaced by some error-recovery code before shipping */ /* yes, the following block is allowed to be the ELSE clause (or not) */ /* It's possible for call->nLeft to be smaller than any particular * iov_len. Usually, recvmsg doesn't change the iov_len, since it * reflects the size of the buffer. We have to keep track of the * number of bytes read in the length field of the packet struct. On * the final portion of a received packet, it's almost certain that * call->nLeft will be smaller than the final buffer. */ while (nbytes && cp) { t = MIN((int)call->curlen, nbytes); t = MIN(t, (int)call->nLeft); memcpy(buf, call->curpos, t); buf += t; nbytes -= t; call->curpos += t; call->curlen -= t; call->nLeft -= t; if (!call->nLeft) { /* out of packet. Get another one. */ #ifdef RX_TRACK_PACKETS call->currentPacket->flags &= ~RX_PKTFLAG_CP; #endif rxi_FreePacket(cp); cp = call->currentPacket = (struct rx_packet *)0; } else if (!call->curlen) { /* need to get another struct iov */ if (++call->curvec >= cp->niovecs) { /* current packet is exhausted, get ready for another */ /* don't worry about curvec and stuff, they get set somewhere else */ #ifdef RX_TRACK_PACKETS call->currentPacket->flags &= ~RX_PKTFLAG_CP; #endif rxi_FreePacket(cp); cp = call->currentPacket = (struct rx_packet *)0; call->nLeft = 0; } else { call->curpos = (char *)cp->wirevec[call->curvec].iov_base; call->curlen = cp->wirevec[call->curvec].iov_len; } } } if (!nbytes) { /* user buffer is full, return */ return requestCount; } } while (nbytes); return requestCount; }
int rxi_WriteProc(struct rx_call *call, char *buf, int nbytes) { struct rx_connection *conn = call->conn; struct rx_packet *cp = call->currentPacket; unsigned int t; int requestCount = nbytes; /* Free any packets from the last call to ReadvProc/WritevProc */ if (queue_IsNotEmpty(&call->iovq)) { #ifdef RXDEBUG_PACKET call->iovqc -= #endif /* RXDEBUG_PACKET */ rxi_FreePackets(0, &call->iovq); } if (call->mode != RX_MODE_SENDING) { if ((conn->type == RX_SERVER_CONNECTION) && (call->mode == RX_MODE_RECEIVING)) { call->mode = RX_MODE_SENDING; if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif rxi_FreePacket(cp); cp = call->currentPacket = (struct rx_packet *)0; call->nLeft = 0; call->nFree = 0; } } else { return 0; } } /* Loop condition is checked at end, so that a write of 0 bytes * will force a packet to be created--specially for the case where * there are 0 bytes on the stream, but we must send a packet * anyway. */ do { if (call->nFree == 0) { MUTEX_ENTER(&call->lock); #ifdef AFS_GLOBAL_RXLOCK_KERNEL rxi_WaitforTQBusy(call); #endif /* AFS_GLOBAL_RXLOCK_KERNEL */ cp = call->currentPacket; if (call->error) call->mode = RX_MODE_ERROR; if (!call->error && cp) { /* Clear the current packet now so that if * we are forced to wait and drop the lock * the packet we are planning on using * cannot be freed. */ #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif call->currentPacket = (struct rx_packet *)0; clock_NewTime(); /* Bogus: need new time package */ /* The 0, below, specifies that it is not the last packet: * there will be others. PrepareSendPacket may * alter the packet length by up to * conn->securityMaxTrailerSize */ hadd32(call->bytesSent, cp->length); rxi_PrepareSendPacket(call, cp, 0); #ifdef RX_TRACK_PACKETS cp->flags |= RX_PKTFLAG_TQ; #endif queue_Append(&call->tq, cp); #ifdef RXDEBUG_PACKET call->tqc++; #endif /* RXDEBUG_PACKET */ cp = (struct rx_packet *)0; if (! (call-> flags & (RX_CALL_FAST_RECOVER | RX_CALL_FAST_RECOVER_WAIT))) { rxi_Start(0, call, 0, 0); } } else if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif rxi_FreePacket(cp); cp = call->currentPacket = (struct rx_packet *)0; } /* Wait for transmit window to open up */ while (!call->error && call->tnext + 1 > call->tfirst + (2 * call->twind)) { clock_NewTime(); call->startWait = clock_Sec(); #ifdef RX_ENABLE_LOCKS CV_WAIT(&call->cv_twind, &call->lock); #else call->flags |= RX_CALL_WAIT_WINDOW_ALLOC; osi_rxSleep(&call->twind); #endif call->startWait = 0; #ifdef RX_ENABLE_LOCKS if (call->error) { call->mode = RX_MODE_ERROR; MUTEX_EXIT(&call->lock); return 0; } #endif /* RX_ENABLE_LOCKS */ } if ((cp = rxi_AllocSendPacket(call, nbytes))) { #ifdef RX_TRACK_PACKETS cp->flags |= RX_PKTFLAG_CP; #endif call->currentPacket = cp; call->nFree = cp->length; call->curvec = 1; /* 0th vec is always header */ /* begin at the beginning [ more or less ], continue * on until the end, then stop. */ call->curpos = (char *)cp->wirevec[1].iov_base + call->conn->securityHeaderSize; call->curlen = cp->wirevec[1].iov_len - call->conn->securityHeaderSize; } if (call->error) { call->mode = RX_MODE_ERROR; if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif rxi_FreePacket(cp); call->currentPacket = NULL; } MUTEX_EXIT(&call->lock); return 0; } MUTEX_EXIT(&call->lock); } if (cp && (int)call->nFree < nbytes) { /* Try to extend the current buffer */ int len, mud; len = cp->length; mud = rx_MaxUserDataSize(call); if (mud > len) { int want; want = MIN(nbytes - (int)call->nFree, mud - len); rxi_AllocDataBuf(cp, want, RX_PACKET_CLASS_SEND_CBUF); if (cp->length > (unsigned)mud) cp->length = mud; call->nFree += (cp->length - len); } } /* If the remaining bytes fit in the buffer, then store them * and return. Don't ship a buffer that's full immediately to * the peer--we don't know if it's the last buffer yet */ if (!cp) { call->nFree = 0; } while (nbytes && call->nFree) { t = MIN((int)call->curlen, nbytes); t = MIN((int)call->nFree, t); memcpy(call->curpos, buf, t); buf += t; nbytes -= t; call->curpos += t; call->curlen -= (u_short)t; call->nFree -= (u_short)t; if (!call->curlen) { /* need to get another struct iov */ if (++call->curvec >= cp->niovecs) { /* current packet is full, extend or send it */ call->nFree = 0; } else { call->curpos = (char *)cp->wirevec[call->curvec].iov_base; call->curlen = cp->wirevec[call->curvec].iov_len; } } } /* while bytes to send and room to send them */ /* might be out of space now */ if (!nbytes) { return requestCount; } else; /* more data to send, so get another packet and keep going */ } while (nbytes); return requestCount - nbytes; }
/* rxi_FillReadVec * * Uses packets in the receive queue to fill in as much of the * current iovec as possible. Does not block if it runs out * of packets to complete the iovec. Return true if an ack packet * was sent, otherwise return false */ int rxi_FillReadVec(struct rx_call *call, afs_uint32 serial) { int didConsume = 0; int didHardAck = 0; unsigned int t; struct rx_packet *rp; struct rx_packet *curp; struct iovec *call_iov; struct iovec *cur_iov = NULL; curp = call->currentPacket; if (curp) { cur_iov = &curp->wirevec[call->curvec]; } call_iov = &call->iov[call->iovNext]; while (!call->error && call->iovNBytes && call->iovNext < call->iovMax) { if (call->nLeft == 0) { /* Get next packet */ if (queue_IsNotEmpty(&call->rq)) { /* Check that next packet available is next in sequence */ rp = queue_First(&call->rq, rx_packet); if (rp->header.seq == call->rnext) { afs_int32 error; struct rx_connection *conn = call->conn; queue_Remove(rp); #ifdef RX_TRACK_PACKETS rp->flags &= ~RX_PKTFLAG_RQ; #endif #ifdef RXDEBUG_PACKET call->rqc--; #endif /* RXDEBUG_PACKET */ /* RXS_CheckPacket called to undo RXS_PreparePacket's * work. It may reduce the length of the packet by up * to conn->maxTrailerSize, to reflect the length of the * data + the header. */ if ((error = RXS_CheckPacket(conn->securityObject, call, rp))) { /* Used to merely shut down the call, but now we * shut down the whole connection since this may * indicate an attempt to hijack it */ MUTEX_EXIT(&call->lock); rxi_ConnectionError(conn, error); MUTEX_ENTER(&conn->conn_data_lock); rp = rxi_SendConnectionAbort(conn, rp, 0, 0); MUTEX_EXIT(&conn->conn_data_lock); rxi_FreePacket(rp); MUTEX_ENTER(&call->lock); return 1; } call->rnext++; curp = call->currentPacket = rp; #ifdef RX_TRACK_PACKETS call->currentPacket->flags |= RX_PKTFLAG_CP; #endif call->curvec = 1; /* 0th vec is always header */ cur_iov = &curp->wirevec[1]; /* begin at the beginning [ more or less ], continue * on until the end, then stop. */ call->curpos = (char *)curp->wirevec[1].iov_base + call->conn->securityHeaderSize; call->curlen = curp->wirevec[1].iov_len - call->conn->securityHeaderSize; /* Notice that this code works correctly if the data * size is 0 (which it may be--no reply arguments from * server, for example). This relies heavily on the * fact that the code below immediately frees the packet * (no yields, etc.). If it didn't, this would be a * problem because a value of zero for call->nLeft * normally means that there is no read packet */ call->nLeft = curp->length; hadd32(call->bytesRcvd, curp->length); /* Send a hard ack for every rxi_HardAckRate+1 packets * consumed. Otherwise schedule an event to send * the hard ack later on. */ call->nHardAcks++; didConsume = 1; continue; } } break; } /* It's possible for call->nLeft to be smaller than any particular * iov_len. Usually, recvmsg doesn't change the iov_len, since it * reflects the size of the buffer. We have to keep track of the * number of bytes read in the length field of the packet struct. On * the final portion of a received packet, it's almost certain that * call->nLeft will be smaller than the final buffer. */ while (call->iovNBytes && call->iovNext < call->iovMax && curp) { t = MIN((int)call->curlen, call->iovNBytes); t = MIN(t, (int)call->nLeft); call_iov->iov_base = call->curpos; call_iov->iov_len = t; call_iov++; call->iovNext++; call->iovNBytes -= t; call->curpos += t; call->curlen -= t; call->nLeft -= t; if (!call->nLeft) { /* out of packet. Get another one. */ #ifdef RX_TRACK_PACKETS curp->flags &= ~RX_PKTFLAG_CP; curp->flags |= RX_PKTFLAG_IOVQ; #endif queue_Append(&call->iovq, curp); #ifdef RXDEBUG_PACKET call->iovqc++; #endif /* RXDEBUG_PACKET */ curp = call->currentPacket = (struct rx_packet *)0; } else if (!call->curlen) { /* need to get another struct iov */ if (++call->curvec >= curp->niovecs) { /* current packet is exhausted, get ready for another */ /* don't worry about curvec and stuff, they get set somewhere else */ #ifdef RX_TRACK_PACKETS curp->flags &= ~RX_PKTFLAG_CP; curp->flags |= RX_PKTFLAG_IOVQ; #endif queue_Append(&call->iovq, curp); #ifdef RXDEBUG_PACKET call->iovqc++; #endif /* RXDEBUG_PACKET */ curp = call->currentPacket = (struct rx_packet *)0; call->nLeft = 0; } else { cur_iov++; call->curpos = (char *)cur_iov->iov_base; call->curlen = cur_iov->iov_len; } } } } /* If we consumed any packets then check whether we need to * send a hard ack. */ if (didConsume && (!(call->flags & RX_CALL_RECEIVE_DONE))) { if (call->nHardAcks > (u_short) rxi_HardAckRate) { rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY); rxi_SendAck(call, 0, serial, RX_ACK_DELAY, 0); didHardAck = 1; } else { struct clock when, now; clock_GetTime(&now); when = now; /* Delay to consolidate ack packets */ clock_Add(&when, &rx_hardAckDelay); if (!call->delayedAckEvent || clock_Gt(&call->delayedAckEvent->eventTime, &when)) { rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY); MUTEX_ENTER(&rx_refcnt_mutex); CALL_HOLD(call, RX_CALL_REFCOUNT_DELAY); MUTEX_EXIT(&rx_refcnt_mutex); call->delayedAckEvent = rxevent_PostNow(&when, &now, rxi_SendDelayedAck, call, 0); } } } return didHardAck; }
/* Flush any buffered data to the stream, switch to read mode * (clients) or to EOF mode (servers) * * LOCKS HELD: called at netpri. */ void rxi_FlushWrite(struct rx_call *call) { struct rx_packet *cp = NULL; /* Free any packets from the last call to ReadvProc/WritevProc */ if (queue_IsNotEmpty(&call->iovq)) { #ifdef RXDEBUG_PACKET call->iovqc -= #endif /* RXDEBUG_PACKET */ rxi_FreePackets(0, &call->iovq); } if (call->mode == RX_MODE_SENDING) { call->mode = (call->conn->type == RX_CLIENT_CONNECTION ? RX_MODE_RECEIVING : RX_MODE_EOF); #ifdef RX_KERNEL_TRACE { int glockOwner = ISAFS_GLOCK(); if (!glockOwner) AFS_GLOCK(); afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, call); if (!glockOwner) AFS_GUNLOCK(); } #endif MUTEX_ENTER(&call->lock); #ifdef AFS_GLOBAL_RXLOCK_KERNEL rxi_WaitforTQBusy(call); #endif /* AFS_GLOBAL_RXLOCK_KERNEL */ if (call->error) call->mode = RX_MODE_ERROR; cp = call->currentPacket; if (cp) { /* cp->length is only supposed to be the user's data */ /* cp->length was already set to (then-current) * MaxUserDataSize or less. */ #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif cp->length -= call->nFree; call->currentPacket = (struct rx_packet *)0; call->nFree = 0; } else { cp = rxi_AllocSendPacket(call, 0); if (!cp) { /* Mode can no longer be MODE_SENDING */ return; } cp->length = 0; cp->niovecs = 2; /* header + space for rxkad stuff */ call->nFree = 0; } /* The 1 specifies that this is the last packet */ hadd32(call->bytesSent, cp->length); rxi_PrepareSendPacket(call, cp, 1); #ifdef RX_TRACK_PACKETS cp->flags |= RX_PKTFLAG_TQ; #endif queue_Append(&call->tq, cp); #ifdef RXDEBUG_PACKET call->tqc++; #endif /* RXDEBUG_PACKET */ if (! (call-> flags & (RX_CALL_FAST_RECOVER | RX_CALL_FAST_RECOVER_WAIT))) { rxi_Start(0, call, 0, 0); } MUTEX_EXIT(&call->lock); } }
/* rxi_WritevProc -- internal version. * * Send buffers allocated in rxi_WritevAlloc. * * LOCKS USED -- called at netpri. */ int rxi_WritevProc(struct rx_call *call, struct iovec *iov, int nio, int nbytes) { struct rx_packet *cp = NULL; #ifdef RX_TRACK_PACKETS struct rx_packet *p, *np; #endif int nextio; int requestCount; struct rx_queue tmpq; #ifdef RXDEBUG_PACKET u_short tmpqc; #endif requestCount = nbytes; nextio = 0; MUTEX_ENTER(&call->lock); if (call->error) { call->mode = RX_MODE_ERROR; } else if (call->mode != RX_MODE_SENDING) { call->error = RX_PROTOCOL_ERROR; } #ifdef AFS_GLOBAL_RXLOCK_KERNEL rxi_WaitforTQBusy(call); #endif /* AFS_GLOBAL_RXLOCK_KERNEL */ cp = call->currentPacket; if (call->error) { call->mode = RX_MODE_ERROR; MUTEX_EXIT(&call->lock); if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; cp->flags |= RX_PKTFLAG_IOVQ; #endif queue_Prepend(&call->iovq, cp); #ifdef RXDEBUG_PACKET call->iovqc++; #endif /* RXDEBUG_PACKET */ call->currentPacket = (struct rx_packet *)0; } #ifdef RXDEBUG_PACKET call->iovqc -= #endif /* RXDEBUG_PACKET */ rxi_FreePackets(0, &call->iovq); return 0; } /* Loop through the I/O vector adjusting packet pointers. * Place full packets back onto the iovq once they are ready * to send. Set RX_PROTOCOL_ERROR if any problems are found in * the iovec. We put the loop condition at the end to ensure that * a zero length write will push a short packet. */ nextio = 0; queue_Init(&tmpq); #ifdef RXDEBUG_PACKET tmpqc = 0; #endif /* RXDEBUG_PACKET */ do { if (call->nFree == 0 && cp) { clock_NewTime(); /* Bogus: need new time package */ /* The 0, below, specifies that it is not the last packet: * there will be others. PrepareSendPacket may * alter the packet length by up to * conn->securityMaxTrailerSize */ hadd32(call->bytesSent, cp->length); rxi_PrepareSendPacket(call, cp, 0); queue_Append(&tmpq, cp); #ifdef RXDEBUG_PACKET tmpqc++; #endif /* RXDEBUG_PACKET */ cp = call->currentPacket = (struct rx_packet *)0; /* The head of the iovq is now the current packet */ if (nbytes) { if (queue_IsEmpty(&call->iovq)) { MUTEX_EXIT(&call->lock); call->error = RX_PROTOCOL_ERROR; #ifdef RXDEBUG_PACKET tmpqc -= #endif /* RXDEBUG_PACKET */ rxi_FreePackets(0, &tmpq); return 0; } cp = queue_First(&call->iovq, rx_packet); queue_Remove(cp); #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_IOVQ; #endif #ifdef RXDEBUG_PACKET call->iovqc--; #endif /* RXDEBUG_PACKET */ #ifdef RX_TRACK_PACKETS cp->flags |= RX_PKTFLAG_CP; #endif call->currentPacket = cp; call->nFree = cp->length; call->curvec = 1; call->curpos = (char *)cp->wirevec[1].iov_base + call->conn->securityHeaderSize; call->curlen = cp->wirevec[1].iov_len - call->conn->securityHeaderSize; } } if (nbytes) { /* The next iovec should point to the current position */ if (iov[nextio].iov_base != call->curpos || iov[nextio].iov_len > (int)call->curlen) { call->error = RX_PROTOCOL_ERROR; MUTEX_EXIT(&call->lock); if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif queue_Prepend(&tmpq, cp); #ifdef RXDEBUG_PACKET tmpqc++; #endif /* RXDEBUG_PACKET */ cp = call->currentPacket = (struct rx_packet *)0; } #ifdef RXDEBUG_PACKET tmpqc -= #endif /* RXDEBUG_PACKET */ rxi_FreePackets(0, &tmpq); return 0; } nbytes -= iov[nextio].iov_len; call->curpos += iov[nextio].iov_len; call->curlen -= iov[nextio].iov_len; call->nFree -= iov[nextio].iov_len; nextio++; if (call->curlen == 0) { if (++call->curvec > cp->niovecs) { call->nFree = 0; } else { call->curpos = (char *)cp->wirevec[call->curvec].iov_base; call->curlen = cp->wirevec[call->curvec].iov_len; } } } } while (nbytes && nextio < nio); /* Move the packets from the temporary queue onto the transmit queue. * We may end up with more than call->twind packets on the queue. */ #ifdef RX_TRACK_PACKETS for (queue_Scan(&tmpq, p, np, rx_packet)) { p->flags |= RX_PKTFLAG_TQ; } #endif if (call->error) call->mode = RX_MODE_ERROR; queue_SpliceAppend(&call->tq, &tmpq); if (!(call->flags & (RX_CALL_FAST_RECOVER | RX_CALL_FAST_RECOVER_WAIT))) { rxi_Start(0, call, 0, 0); } /* Wait for the length of the transmit queue to fall below call->twind */ while (!call->error && call->tnext + 1 > call->tfirst + (2 * call->twind)) { clock_NewTime(); call->startWait = clock_Sec(); #ifdef RX_ENABLE_LOCKS CV_WAIT(&call->cv_twind, &call->lock); #else call->flags |= RX_CALL_WAIT_WINDOW_ALLOC; osi_rxSleep(&call->twind); #endif call->startWait = 0; } /* cp is no longer valid since we may have given up the lock */ cp = call->currentPacket; if (call->error) { call->mode = RX_MODE_ERROR; call->currentPacket = NULL; MUTEX_EXIT(&call->lock); if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif rxi_FreePacket(cp); } return 0; } MUTEX_EXIT(&call->lock); return requestCount - nbytes; }