void bq_heap_t::place(item_t *item, size_t i, bool flag) throw() { size_t j; assert(i > 0); for(; (j = i / 2); i = j) { item_t *_item = get(j); if(hcmp(_item, item)) break; put(i, _item); flag = false; } if(flag) { for(; (j = i * 2) <= count; i = j) { item_t *_item = get(j); if(j < count) { item_t *__item = get(j + 1); if(!hcmp(_item, __item)) { ++j; _item = __item; } } if(hcmp(item, _item)) break; put(i, _item); } } put(i, item); }
void bq_heap_t::validate() { #ifdef DEBUG_BQ for(size_t i = count; i > 1; --i) { if(!hcmp(get(i / 2), get(i))) fatal("bq_cont_heap validation failed"); } #endif }
void osi_FlushText_really(struct vcache *vp) { afs_hyper_t fdv; /* version before which we'll flush */ AFS_STATCNT(osi_FlushText); /* see if we've already flushed this data version */ if (hcmp(vp->f.m.DataVersion, vp->flushDV) <= 0) return; ObtainWriteLock(&afs_ftf, 317); hset(fdv, vp->f.m.DataVersion); /* why this disgusting code below? * xuntext, called by xrele, doesn't notice when it is called * with a freed text object. Sun continually calls xrele or xuntext * without any locking, as long as VTEXT is set on the * corresponding vnode. * But, if the text object is locked when you check the VTEXT * flag, several processes can wait in xuntext, waiting for the * text lock; when the second one finally enters xuntext's * critical region, the text object is already free, but the check * was already done by xuntext's caller. * Even worse, it turns out that xalloc locks the text object * before reading or stating a file via the vnode layer. Thus, we * could end up in getdcache, being asked to bring in a new * version of a file, but the corresponding text object could be * locked. We can't flush the text object without causing * deadlock, so now we just don't try to lock the text object * unless it is guaranteed to work. And we try to flush the text * when we need to a bit more often at the vnode layer. Sun * really blew the vm-cache flushing interface. */ #if defined (AFS_HPUX_ENV) if (vp->v.v_flag & VTEXT) { xrele(vp); if (vp->v.v_flag & VTEXT) { /* still has a text object? */ ReleaseWriteLock(&afs_ftf); return; } } #endif /* next do the stuff that need not check for deadlock problems */ mpurge(vp); /* finally, record that we've done it */ hset(vp->flushDV, fdv); ReleaseWriteLock(&afs_ftf); }
int afs_StoreAllSegments(struct vcache *avc, struct vrequest *areq, int sync) { struct dcache *tdc; afs_int32 code = 0; afs_int32 index; afs_int32 origCBs, foreign = 0; int hash; afs_hyper_t newDV, oldDV; /* DV when we start, and finish, respectively */ struct dcache **dcList; unsigned int i, j, minj, moredata, high, off; afs_size_t maxStoredLength; /* highest offset we've written to server. */ int safety, marineronce = 0; AFS_STATCNT(afs_StoreAllSegments); hset(oldDV, avc->f.m.DataVersion); hset(newDV, avc->f.m.DataVersion); hash = DVHash(&avc->f.fid); foreign = (avc->f.states & CForeign); dcList = osi_AllocLargeSpace(AFS_LRALLOCSIZ); afs_Trace2(afs_iclSetp, CM_TRACE_STOREALL, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); #if !defined(AFS_AIX32_ENV) && !defined(AFS_SGI65_ENV) /* In the aix vm implementation we need to do the vm_writep even * on the memcache case since that's we adjust the file's size * and finish flushing partial vm pages. */ if ((cacheDiskType != AFS_FCACHE_TYPE_MEM) || (sync & AFS_VMSYNC_INVAL) || (sync & AFS_VMSYNC) || (sync & AFS_LASTSTORE)) #endif /* !AFS_AIX32_ENV && !AFS_SGI65_ENV */ { /* If we're not diskless, reading a file may stress the VM * system enough to cause a pageout, and this vnode would be * locked when the pageout occurs. We can prevent this problem * by making sure all dirty pages are already flushed. We don't * do this when diskless because reading a diskless (i.e. * memory-resident) chunk doesn't require using new VM, and we * also don't want to dump more dirty data into a diskless cache, * since they're smaller, and we might exceed its available * space. */ #if defined(AFS_SUN5_ENV) if (sync & AFS_VMSYNC_INVAL) /* invalidate VM pages */ osi_VM_TryToSmush(avc, CRED(), 1); else #endif osi_VM_StoreAllSegments(avc); } if (AFS_IS_DISCONNECTED && !AFS_IN_SYNC) { /* This will probably make someone sad ... */ /*printf("Net down in afs_StoreSegments\n");*/ return ENETDOWN; } ConvertWToSLock(&avc->lock); /* * Subsequent code expects a sorted list, and it expects all the * chunks in the list to be contiguous, so we need a sort and a * while loop in here, too - but this will work for a first pass... * 92.10.05 - OK, there's a sort in here now. It's kind of a modified * bin sort, I guess. Chunk numbers start with 0 * * - Have to get a write lock on xdcache because GetDSlot might need it (if * the chunk doesn't have a dcache struct). * This seems like overkill in most cases. * - I'm not sure that it's safe to do "index = .hvNextp", then unlock * xdcache, then relock xdcache and try to use index. It is done * a lot elsewhere in the CM, but I'm not buying that argument. * - should be able to check IFDataMod without doing the GetDSlot (just * hold afs_xdcache). That way, it's easy to do this without the * writelock on afs_xdcache, and we save unneccessary disk * operations. I don't think that works, 'cuz the next pointers * are still on disk. */ origCBs = afs_allCBs; maxStoredLength = 0; minj = 0; do { memset(dcList, 0, NCHUNKSATONCE * sizeof(struct dcache *)); high = 0; moredata = FALSE; /* lock and start over from beginning of hash chain * in order to avoid a race condition. */ ObtainWriteLock(&afs_xdcache, 284); index = afs_dvhashTbl[hash]; for (j = 0; index != NULLIDX;) { if ((afs_indexFlags[index] & IFDataMod) && (afs_indexUnique[index] == avc->f.fid.Fid.Unique)) { tdc = afs_GetValidDSlot(index); /* refcount+1. */ if (!tdc) { ReleaseWriteLock(&afs_xdcache); code = EIO; goto done; } ReleaseReadLock(&tdc->tlock); if (!FidCmp(&tdc->f.fid, &avc->f.fid) && tdc->f.chunk >= minj) { off = tdc->f.chunk - minj; if (off < NCHUNKSATONCE) { if (dcList[off]) osi_Panic("dclist slot already in use!"); if (afs_mariner && !marineronce) { /* first chunk only */ afs_MarinerLog("store$Storing", avc); marineronce++; } dcList[off] = tdc; if (off > high) high = off; j++; /* DCLOCKXXX: chunkBytes is protected by tdc->lock which we * can't grab here, due to lock ordering with afs_xdcache. * So, disable this shortcut for now. -- kolya 2001-10-13 */ /* shortcut: big win for little files */ /* tlen -= tdc->f.chunkBytes; * if (tlen <= 0) * break; */ } else { moredata = TRUE; afs_PutDCache(tdc); if (j == NCHUNKSATONCE) break; } } else { afs_PutDCache(tdc); } } index = afs_dvnextTbl[index]; } ReleaseWriteLock(&afs_xdcache); /* this guy writes chunks, puts back dcache structs, and bumps newDV */ /* "moredata" just says "there are more dirty chunks yet to come". */ if (j) { code = afs_CacheStoreVCache(dcList, avc, areq, sync, minj, high, moredata, &newDV, &maxStoredLength); /* Release any zero-length dcache entries in our interval * that we locked but didn't store back above. */ for (j = 0; j <= high; j++) { tdc = dcList[j]; if (tdc) { osi_Assert(tdc->f.chunkBytes == 0); ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } } } /* if (j) */ minj += NCHUNKSATONCE; } while (!code && moredata); done: UpgradeSToWLock(&avc->lock, 29); /* send a trivial truncation store if did nothing else */ if (code == 0) { /* * Call StoreMini if we haven't written enough data to extend the * file at the fileserver to the client's notion of the file length. */ if ((avc->f.truncPos != AFS_NOTRUNC) || ((avc->f.states & CExtendedFile) && (maxStoredLength < avc->f.m.Length))) { code = afs_StoreMini(avc, areq); if (code == 0) hadd32(newDV, 1); /* just bumped here, too */ } avc->f.states &= ~CExtendedFile; } /* * Finally, turn off DWriting, turn on DFEntryMod, * update f.versionNo. * A lot of this could be integrated into the loop above */ if (!code) { afs_hyper_t h_unset; hones(h_unset); minj = 0; do { moredata = FALSE; memset(dcList, 0, NCHUNKSATONCE * sizeof(struct dcache *)); /* overkill, but it gets the lock in case GetDSlot needs it */ ObtainWriteLock(&afs_xdcache, 285); for (j = 0, safety = 0, index = afs_dvhashTbl[hash]; index != NULLIDX && safety < afs_cacheFiles + 2; index = afs_dvnextTbl[index]) { if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) { tdc = afs_GetValidDSlot(index); if (!tdc) { /* This is okay; since manipulating the dcaches at this * point is best-effort. We only get a dcache here to * increment the dv and turn off DWriting. If we were * supposed to do that for a dcache, but could not * due to an I/O error, it just means the dv won't * be updated so we don't be able to use that cached * chunk in the future. That's inefficient, but not * an error. */ continue; } ReleaseReadLock(&tdc->tlock); if (!FidCmp(&tdc->f.fid, &avc->f.fid) && tdc->f.chunk >= minj) { off = tdc->f.chunk - minj; if (off < NCHUNKSATONCE) { /* this is the file, and the correct chunk range */ if (j >= NCHUNKSATONCE) osi_Panic ("Too many dcache entries in range\n"); dcList[j++] = tdc; } else { moredata = TRUE; afs_PutDCache(tdc); if (j == NCHUNKSATONCE) break; } } else { afs_PutDCache(tdc); } } } ReleaseWriteLock(&afs_xdcache); for (i = 0; i < j; i++) { /* Iterate over the dcache entries we collected above */ tdc = dcList[i]; ObtainSharedLock(&tdc->lock, 677); /* was code here to clear IFDataMod, but it should only be done * in storedcache and storealldcache. */ /* Only increase DV if we had up-to-date data to start with. * Otherwise, we could be falsely upgrading an old chunk * (that we never read) into one labelled with the current * DV #. Also note that we check that no intervening stores * occurred, otherwise we might mislabel cache information * for a chunk that we didn't store this time */ /* Don't update the version number if it's not yet set. */ if (!hsame(tdc->f.versionNo, h_unset) && hcmp(tdc->f.versionNo, oldDV) >= 0) { if ((!(afs_dvhack || foreign) && hsame(avc->f.m.DataVersion, newDV)) || ((afs_dvhack || foreign) && (origCBs == afs_allCBs))) { /* no error, this is the DV */ UpgradeSToWLock(&tdc->lock, 678); hset(tdc->f.versionNo, avc->f.m.DataVersion); tdc->dflags |= DFEntryMod; /* DWriting may not have gotten cleared above, if all * we did was a StoreMini */ tdc->f.states &= ~DWriting; ConvertWToSLock(&tdc->lock); } } ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } minj += NCHUNKSATONCE; } while (moredata); } if (code) { /* * Invalidate chunks after an error for ccores files since * afs_inactive won't be called for these and they won't be * invalidated. Also discard data if it's a permanent error from the * fileserver. */ if (areq->permWriteError || (avc->f.states & CCore)) { afs_InvalidateAllSegments(avc); } } afs_Trace3(afs_iclSetp, CM_TRACE_STOREALLDONE, ICL_TYPE_POINTER, avc, ICL_TYPE_INT32, avc->f.m.Length, ICL_TYPE_INT32, code); /* would like a Trace5, but it doesn't exist... */ afs_Trace3(afs_iclSetp, CM_TRACE_AVCLOCKER, ICL_TYPE_POINTER, avc, ICL_TYPE_INT32, avc->lock.wait_states, ICL_TYPE_INT32, avc->lock.excl_locked); afs_Trace4(afs_iclSetp, CM_TRACE_AVCLOCKEE, ICL_TYPE_POINTER, avc, ICL_TYPE_INT32, avc->lock.wait_states, ICL_TYPE_INT32, avc->lock.readers_reading, ICL_TYPE_INT32, avc->lock.num_waiting); /* * Finally, if updated DataVersion matches newDV, we did all of the * stores. If mapDV indicates that the page cache was flushed up * to when we started the store, then we can relabel them as flushed * as recently as newDV. * Turn off CDirty bit because the stored data is now in sync with server. */ if (code == 0 && hcmp(avc->mapDV, oldDV) >= 0) { if ((!(afs_dvhack || foreign) && hsame(avc->f.m.DataVersion, newDV)) || ((afs_dvhack || foreign) && (origCBs == afs_allCBs))) { hset(avc->mapDV, newDV); avc->f.states &= ~CDirty; } } osi_FreeLargeSpace(dcList); /* If not the final write a temporary error is ok. */ if (code && !areq->permWriteError && !(sync & AFS_LASTSTORE)) code = 0; return code; } /*afs_StoreAllSegments (new 03/02/94) */
/* this call, unlike osi_FlushText, is supposed to discard caches that may contain invalid information if a file is written remotely, but that may contain valid information that needs to be written back if the file is being written locally. It doesn't subsume osi_FlushText, since the latter function may be needed to flush caches that are invalidated by local writes. avc->pvnLock is already held, avc->lock is guaranteed not to be held (by us, of course). */ void osi_FlushPages(struct vcache *avc, afs_ucred_t *credp) { afs_hyper_t origDV; #if defined(AFS_CACHE_BYPASS) /* The optimization to check DV under read lock below is identical a * change in CITI cache bypass work. The problem CITI found in 1999 * was that this code and background daemon doing prefetching competed * for the vcache entry shared lock. It's not clear to me from the * tech report, but it looks like CITI fixed the general prefetch code * path as a bonus when experimenting on prefetch for cache bypass, see * citi-tr-01-3. */ #endif if (vType(avc) == VDIR) { /* not applicable to directories; they're never mapped or stored in * pages */ return; } ObtainReadLock(&avc->lock); /* If we've already purged this version, or if we're the ones * writing this version, don't flush it (could lose the * data we're writing). */ if ((hcmp((avc->f.m.DataVersion), (avc->mapDV)) <= 0) || ((avc->execsOrWriters > 0) && afs_DirtyPages(avc))) { ReleaseReadLock(&avc->lock); return; } ReleaseReadLock(&avc->lock); ObtainWriteLock(&avc->lock, 10); /* Check again */ if ((hcmp((avc->f.m.DataVersion), (avc->mapDV)) <= 0) || ((avc->execsOrWriters > 0) && afs_DirtyPages(avc))) { ReleaseWriteLock(&avc->lock); return; } /* At this point, you might think that we can skip trying to flush pages * if mapDV is zero, since a file with a zero DV will not have any data in * it. However, some platforms (notably Linux 2.6.22+) will keep a page * full of zeroes around for an empty file. So play it safe and always * flush pages. */ AFS_STATCNT(osi_FlushPages); hset(origDV, avc->f.m.DataVersion); afs_Trace3(afs_iclSetp, CM_TRACE_FLUSHPAGES, ICL_TYPE_POINTER, avc, ICL_TYPE_INT32, origDV.low, ICL_TYPE_INT32, avc->f.m.Length); ReleaseWriteLock(&avc->lock); #ifndef AFS_FBSD70_ENV AFS_GUNLOCK(); #endif osi_VM_FlushPages(avc, credp); #ifndef AFS_FBSD70_ENV AFS_GLOCK(); #endif ObtainWriteLock(&avc->lock, 88); /* do this last, and to original version, since stores may occur * while executing above PUTPAGE call */ hset(avc->mapDV, origDV); ReleaseWriteLock(&avc->lock); }
/* this call, unlike osi_FlushText, is supposed to discard caches that may contain invalid information if a file is written remotely, but that may contain valid information that needs to be written back if the file is being written locally. It doesn't subsume osi_FlushText, since the latter function may be needed to flush caches that are invalidated by local writes. avc->pvnLock is already held, avc->lock is guaranteed not to be held (by us, of course). */ void osi_FlushPages(struct vcache *avc, afs_ucred_t *credp) { #ifdef AFS_FBSD70_ENV int vfslocked; #endif afs_hyper_t origDV; #if defined(AFS_CACHE_BYPASS) /* The optimization to check DV under read lock below is identical a * change in CITI cache bypass work. The problem CITI found in 1999 * was that this code and background daemon doing prefetching competed * for the vcache entry shared lock. It's not clear to me from the * tech report, but it looks like CITI fixed the general prefetch code * path as a bonus when experimenting on prefetch for cache bypass, see * citi-tr-01-3. */ #endif ObtainReadLock(&avc->lock); /* If we've already purged this version, or if we're the ones * writing this version, don't flush it (could lose the * data we're writing). */ if ((hcmp((avc->f.m.DataVersion), (avc->mapDV)) <= 0) || ((avc->execsOrWriters > 0) && afs_DirtyPages(avc))) { ReleaseReadLock(&avc->lock); return; } ReleaseReadLock(&avc->lock); ObtainWriteLock(&avc->lock, 10); /* Check again */ if ((hcmp((avc->f.m.DataVersion), (avc->mapDV)) <= 0) || ((avc->execsOrWriters > 0) && afs_DirtyPages(avc))) { ReleaseWriteLock(&avc->lock); return; } if (hiszero(avc->mapDV)) { hset(avc->mapDV, avc->f.m.DataVersion); ReleaseWriteLock(&avc->lock); return; } AFS_STATCNT(osi_FlushPages); hset(origDV, avc->f.m.DataVersion); afs_Trace3(afs_iclSetp, CM_TRACE_FLUSHPAGES, ICL_TYPE_POINTER, avc, ICL_TYPE_INT32, origDV.low, ICL_TYPE_INT32, avc->f.m.Length); ReleaseWriteLock(&avc->lock); #ifdef AFS_FBSD70_ENV vfslocked = VFS_LOCK_GIANT(AFSTOV(avc)->v_mount); #endif #ifndef AFS_FBSD70_ENV AFS_GUNLOCK(); #endif osi_VM_FlushPages(avc, credp); #ifndef AFS_FBSD70_ENV AFS_GLOCK(); #endif #ifdef AFS_FBSD70_ENV VFS_UNLOCK_GIANT(vfslocked); #endif ObtainWriteLock(&avc->lock, 88); /* do this last, and to original version, since stores may occur * while executing above PUTPAGE call */ hset(avc->mapDV, origDV); ReleaseWriteLock(&avc->lock); }