int DRead(struct dcache *adc, int page, struct DirBuffer *entry) { /* Read a page from the disk. */ struct buffer *tb, *tb2; struct osi_file *tfile; int code; AFS_STATCNT(DRead); memset(entry, 0, sizeof(struct DirBuffer)); ObtainWriteLock(&afs_bufferLock, 256); #define bufmatch(tb) (tb->page == page && tb->fid == adc->index) #define buf_Front(head,parent,p) {(parent)->hashNext = (p)->hashNext; (p)->hashNext= *(head);*(head)=(p);} /* this apparently-complicated-looking code is simply an example of * a little bit of loop unrolling, and is a standard linked-list * traversal trick. It saves a few assignments at the the expense * of larger code size. This could be simplified by better use of * macros. */ if ((tb = phTable[pHash(adc->index, page)])) { if (bufmatch(tb)) { ObtainWriteLock(&tb->lock, 257); tb->lockers++; ReleaseWriteLock(&afs_bufferLock); tb->accesstime = timecounter++; AFS_STATS(afs_stats_cmperf.bufHits++); ReleaseWriteLock(&tb->lock); entry->buffer = tb; entry->data = tb->data; return 0; } else { struct buffer **bufhead; bufhead = &(phTable[pHash(adc->index, page)]); while ((tb2 = tb->hashNext)) { if (bufmatch(tb2)) { buf_Front(bufhead, tb, tb2); ObtainWriteLock(&tb2->lock, 258); tb2->lockers++; ReleaseWriteLock(&afs_bufferLock); tb2->accesstime = timecounter++; AFS_STATS(afs_stats_cmperf.bufHits++); ReleaseWriteLock(&tb2->lock); entry->buffer = tb2; entry->data = tb2->data; return 0; } if ((tb = tb2->hashNext)) { if (bufmatch(tb)) { buf_Front(bufhead, tb2, tb); ObtainWriteLock(&tb->lock, 259); tb->lockers++; ReleaseWriteLock(&afs_bufferLock); tb->accesstime = timecounter++; AFS_STATS(afs_stats_cmperf.bufHits++); ReleaseWriteLock(&tb->lock); entry->buffer = tb; entry->data = tb->data; return 0; } } else break; } } } else tb2 = NULL; AFS_STATS(afs_stats_cmperf.bufMisses++); /* can't find it */ /* The last thing we looked at was either tb or tb2 (or nothing). That * is at least the oldest buffer on one particular hash chain, so it's * a pretty good place to start looking for the truly oldest buffer. */ tb = afs_newslot(adc, page, (tb ? tb : tb2)); if (!tb) { ReleaseWriteLock(&afs_bufferLock); return EIO; } ObtainWriteLock(&tb->lock, 260); tb->lockers++; ReleaseWriteLock(&afs_bufferLock); if (page * AFS_BUFFER_PAGESIZE >= adc->f.chunkBytes) { tb->fid = NULLIDX; afs_reset_inode(&tb->inode); tb->lockers--; ReleaseWriteLock(&tb->lock); return EIO; } tfile = afs_CFileOpen(&adc->f.inode); code = afs_CFileRead(tfile, tb->page * AFS_BUFFER_PAGESIZE, tb->data, AFS_BUFFER_PAGESIZE); afs_CFileClose(tfile); if (code < AFS_BUFFER_PAGESIZE) { tb->fid = NULLIDX; afs_reset_inode(&tb->inode); tb->lockers--; ReleaseWriteLock(&tb->lock); return EIO; } /* Note that findslot sets the page field in the buffer equal to * what it is searching for. */ ReleaseWriteLock(&tb->lock); entry->buffer = tb; entry->data = tb->data; return 0; }
/* * afs_TruncateAllSegments * * Description: * Truncate a cache file. * * Parameters: * avc : Ptr to vcache entry to truncate. * alen : Number of bytes to make the file. * areq : Ptr to request structure. * * Environment: * Called with avc write-locked; in VFS40 systems, pvnLock is also * held. */ int afs_TruncateAllSegments(struct vcache *avc, afs_size_t alen, struct vrequest *areq, afs_ucred_t *acred) { struct dcache *tdc; afs_int32 code; afs_int32 index; afs_size_t newSize; int dcCount, dcPos; struct dcache **tdcArray = NULL; AFS_STATCNT(afs_TruncateAllSegments); avc->f.m.Date = osi_Time(); afs_Trace3(afs_iclSetp, CM_TRACE_TRUNCALL, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length), ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(alen)); if (alen >= avc->f.m.Length) { /* * Special speedup since Sun's vm extends the file this way; * we've never written to the file thus we can just set the new * length and avoid the needless calls below. * Also used for ftruncate calls which can extend the file. * To completely minimize the possible extra StoreMini RPC, we really * should keep the ExtendedPos as well and clear this flag if we * truncate below that value before we store the file back. */ avc->f.states |= CExtendedFile; avc->f.m.Length = alen; return 0; } #if (defined(AFS_SUN5_ENV)) /* Zero unused portion of last page */ osi_VM_PreTruncate(avc, alen, acred); #endif #if (defined(AFS_SUN5_ENV)) ObtainWriteLock(&avc->vlock, 546); avc->activeV++; /* Block new getpages */ ReleaseWriteLock(&avc->vlock); #endif ReleaseWriteLock(&avc->lock); AFS_GUNLOCK(); /* Flush pages beyond end-of-file. */ osi_VM_Truncate(avc, alen, acred); AFS_GLOCK(); ObtainWriteLock(&avc->lock, 79); avc->f.m.Length = alen; if (alen < avc->f.truncPos) avc->f.truncPos = alen; code = DVHash(&avc->f.fid); /* block out others from screwing with this table */ ObtainWriteLock(&afs_xdcache, 287); dcCount = 0; for (index = afs_dvhashTbl[code]; index != NULLIDX;) { if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) { tdc = afs_GetValidDSlot(index); if (!tdc) { ReleaseWriteLock(&afs_xdcache); code = EIO; goto done; } ReleaseReadLock(&tdc->tlock); if (!FidCmp(&tdc->f.fid, &avc->f.fid)) dcCount++; afs_PutDCache(tdc); } index = afs_dvnextTbl[index]; } /* Now allocate space where we can save those dcache entries, and * do a second pass over them.. Since we're holding xdcache, it * shouldn't be changing. */ tdcArray = osi_Alloc(dcCount * sizeof(struct dcache *)); dcPos = 0; for (index = afs_dvhashTbl[code]; index != NULLIDX; index = afs_dvnextTbl[index]) { if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) { tdc = afs_GetValidDSlot(index); if (!tdc) { /* make sure we put back all of the tdcArray members before * bailing out */ /* remember, the last valid tdc is at dcPos-1, so start at * dcPos-1, not at dcPos itself. */ for (dcPos = dcPos - 1; dcPos >= 0; dcPos--) { tdc = tdcArray[dcPos]; afs_PutDCache(tdc); } code = EIO; goto done; } ReleaseReadLock(&tdc->tlock); if (!FidCmp(&tdc->f.fid, &avc->f.fid)) { /* same file, and modified, we'll store it back */ if (dcPos < dcCount) { tdcArray[dcPos++] = tdc; } else { afs_PutDCache(tdc); } } else { afs_PutDCache(tdc); } } } ReleaseWriteLock(&afs_xdcache); /* Now we loop over the array of dcache entries and truncate them */ for (index = 0; index < dcPos; index++) { struct osi_file *tfile; tdc = tdcArray[index]; newSize = alen - AFS_CHUNKTOBASE(tdc->f.chunk); if (newSize < 0) newSize = 0; ObtainSharedLock(&tdc->lock, 672); if (newSize < tdc->f.chunkBytes && newSize < MAX_AFS_UINT32) { UpgradeSToWLock(&tdc->lock, 673); tdc->f.states |= DWriting; tfile = afs_CFileOpen(&tdc->f.inode); afs_CFileTruncate(tfile, (afs_int32)newSize); afs_CFileClose(tfile); afs_AdjustSize(tdc, (afs_int32)newSize); if (alen < tdc->validPos) { if (alen < AFS_CHUNKTOBASE(tdc->f.chunk)) tdc->validPos = 0; else tdc->validPos = alen; } ConvertWToSLock(&tdc->lock); } ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } code = 0; done: if (tdcArray) { osi_Free(tdcArray, dcCount * sizeof(struct dcache *)); } #if (defined(AFS_SUN5_ENV)) ObtainWriteLock(&avc->vlock, 547); if (--avc->activeV == 0 && (avc->vstates & VRevokeWait)) { avc->vstates &= ~VRevokeWait; afs_osi_Wakeup((char *)&avc->vstates); } ReleaseWriteLock(&avc->vlock); #endif return code; }
void * osi_UFSOpen(afs_dcache_id_t *ainode) { struct vnode *vp; struct vattr va; struct osi_file *afile = NULL; extern int cacheDiskType; afs_int32 code = 0; int dummy; char fname[1024]; struct osi_stat tstat; AFS_STATCNT(osi_UFSOpen); if (cacheDiskType != AFS_FCACHE_TYPE_UFS) { osi_Panic("UFSOpen called for non-UFS cache\n"); } if (!afs_osicred_initialized) { /* valid for alpha_osf, SunOS, Ultrix */ memset(&afs_osi_cred, 0, sizeof(afs_ucred_t)); afs_osi_cred.cr_ref++; #ifndef AFS_DARWIN110_ENV afs_osi_cred.cr_ngroups = 1; #endif afs_osicred_initialized = 1; } afile = (struct osi_file *)osi_AllocSmallSpace(sizeof(struct osi_file)); AFS_GUNLOCK(); #ifdef AFS_CACHE_VNODE_PATH if (!ainode->ufs) { osi_Panic("No cache inode\n"); } code = vnode_open(ainode->ufs, O_RDWR, 0, 0, &vp, afs_osi_ctxtp); #else #ifndef AFS_DARWIN80_ENV if (afs_CacheFSType == AFS_APPL_HFS_CACHE) code = igetinode(afs_cacheVfsp, (dev_t) cacheDev.dev, &ainode->ufs, &vp, &va, &dummy); /* XXX hfs is broken */ else if (afs_CacheFSType == AFS_APPL_UFS_CACHE) #endif code = igetinode(afs_cacheVfsp, (dev_t) cacheDev.dev, (ino_t) ainode->ufs, &vp, &va, &dummy); #ifndef AFS_DARWIN80_ENV else panic("osi_UFSOpen called before cacheops initialized\n"); #endif #endif AFS_GLOCK(); if (code) { osi_FreeSmallSpace(afile); osi_Panic("UFSOpen: igetinode failed"); } afile->vnode = vp; afile->offset = 0; afile->proc = (int (*)())0; #ifndef AFS_CACHE_VNODE_PATH afile->size = va.va_size; #else code = afs_osi_Stat(afile, &tstat); afile->size = tstat.size; #endif return (void *)afile; }
/* free socket allocated by osi_NetSocket */ int rxk_FreeSocket(struct socket *asocket) { AFS_STATCNT(osi_FreeSocket); return 0; }
static int afs_StoreMini(struct vcache *avc, struct vrequest *areq) { struct afs_conn *tc; struct AFSStoreStatus InStatus; struct AFSFetchStatus OutStatus; struct AFSVolSync tsync; afs_int32 code; struct rx_call *tcall; struct rx_connection *rxconn; afs_size_t tlen, xlen = 0; XSTATS_DECLS; AFS_STATCNT(afs_StoreMini); afs_Trace2(afs_iclSetp, CM_TRACE_STOREMINI, ICL_TYPE_POINTER, avc, ICL_TYPE_INT32, avc->f.m.Length); tlen = avc->f.m.Length; if (avc->f.truncPos < tlen) tlen = avc->f.truncPos; avc->f.truncPos = AFS_NOTRUNC; avc->f.states &= ~CExtendedFile; do { tc = afs_Conn(&avc->f.fid, areq, SHARED_LOCK, &rxconn); if (tc) { #ifdef AFS_64BIT_CLIENT retry: #endif RX_AFS_GUNLOCK(); tcall = rx_NewCall(rxconn); RX_AFS_GLOCK(); /* Set the client mod time since we always want the file * to have the client's mod time and not the server's one * (to avoid problems with make, etc.) It almost always * works fine with standard afs because them server/client * times are in sync and more importantly this storemini * it's a special call that would typically be followed by * the proper store-data or store-status calls. */ InStatus.Mask = AFS_SETMODTIME; InStatus.ClientModTime = avc->f.m.Date; XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_STOREDATA); afs_Trace4(afs_iclSetp, CM_TRACE_STOREDATA64, ICL_TYPE_FID, &avc->f.fid.Fid, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length), ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(xlen), ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(tlen)); RX_AFS_GUNLOCK(); #ifdef AFS_64BIT_CLIENT if (!afs_serverHasNo64Bit(tc)) { code = StartRXAFS_StoreData64(tcall, (struct AFSFid *)&avc->f.fid.Fid, &InStatus, avc->f.m.Length, (afs_size_t) 0, tlen); } else { afs_int32 l1, l2; l1 = avc->f.m.Length; l2 = tlen; if ((avc->f.m.Length > 0x7fffffff) || (tlen > 0x7fffffff) || ((0x7fffffff - tlen) < avc->f.m.Length)) { code = EFBIG; goto error; } code = StartRXAFS_StoreData(tcall, (struct AFSFid *)&avc->f.fid.Fid, &InStatus, l1, 0, l2); } #else /* AFS_64BIT_CLIENT */ code = StartRXAFS_StoreData(tcall, (struct AFSFid *)&avc->f.fid.Fid, &InStatus, avc->f.m.Length, 0, tlen); #endif /* AFS_64BIT_CLIENT */ if (code == 0) { code = EndRXAFS_StoreData(tcall, &OutStatus, &tsync); } #ifdef AFS_64BIT_CLIENT error: #endif code = rx_EndCall(tcall, code); RX_AFS_GLOCK(); XSTATS_END_TIME; #ifdef AFS_64BIT_CLIENT if (code == RXGEN_OPCODE && !afs_serverHasNo64Bit(tc)) { afs_serverSetNo64Bit(tc); goto retry; } #endif /* AFS_64BIT_CLIENT */ } else code = -1; } while (afs_Analyze (tc, rxconn, code, &avc->f.fid, areq, AFS_STATS_FS_RPCIDX_STOREDATA, SHARED_LOCK, NULL)); if (code == 0) afs_ProcessFS(avc, &OutStatus, areq); return code; } /*afs_StoreMini */
int afs_sync(struct vfs *afsp) { AFS_STATCNT(afs_sync); return 0; }
afs_open(struct vcache **avcp, afs_int32 aflags, afs_ucred_t *acred) #endif { afs_int32 code; struct vrequest treq; struct vcache *tvc; int writing; struct afs_fakestat_state fakestate; AFS_STATCNT(afs_open); if ((code = afs_InitReq(&treq, acred))) return code; #ifdef AFS_SGI64_ENV /* avcpp can be, but is not necesarily, bhp's vnode. */ tvc = VTOAFS(BHV_TO_VNODE(bhv)); #else tvc = *avcp; #endif afs_Trace2(afs_iclSetp, CM_TRACE_OPEN, ICL_TYPE_POINTER, tvc, ICL_TYPE_INT32, aflags); afs_InitFakeStat(&fakestate); AFS_DISCON_LOCK(); code = afs_EvalFakeStat(&tvc, &fakestate, &treq); if (code) goto done; code = afs_VerifyVCache(tvc, &treq); if (code) goto done; ObtainReadLock(&tvc->lock); if (AFS_IS_DISCONNECTED && (afs_DCacheMissingChunks(tvc) != 0)) { ReleaseReadLock(&tvc->lock); /* printf("Network is down in afs_open: missing chunks\n"); */ code = ENETDOWN; goto done; } ReleaseReadLock(&tvc->lock); if (aflags & (FWRITE | FTRUNC)) writing = 1; else writing = 0; if (vType(tvc) == VDIR) { /* directory */ if (writing) { code = EISDIR; goto done; } else { if (!afs_AccessOK (tvc, ((tvc->f.states & CForeign) ? PRSFS_READ : PRSFS_LOOKUP), &treq, CHECK_MODE_BITS)) { code = EACCES; /* printf("afs_Open: no access for dir\n"); */ goto done; } } } else { #ifdef AFS_SUN5_ENV if (AFS_NFSXLATORREQ(acred) && (aflags & FREAD)) { if (!afs_AccessOK (tvc, PRSFS_READ, &treq, CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) { code = EACCES; goto done; } } #endif #ifdef AFS_AIX41_ENV if (aflags & FRSHARE) { /* * Hack for AIX 4.1: * Apparently it is possible for a file to get mapped without * either VNOP_MAP or VNOP_RDWR being called, if (1) it is a * sharable library, and (2) it has already been loaded. We must * ensure that the credp is up to date. We detect the situation * by checking for O_RSHARE at open time. */ /* * We keep the caller's credentials since an async daemon will * handle the request at some point. We assume that the same * credentials will be used. */ ObtainWriteLock(&tvc->lock, 140); if (!tvc->credp || (tvc->credp != acred)) { crhold(acred); if (tvc->credp) { struct ucred *crp = tvc->credp; tvc->credp = NULL; crfree(crp); } tvc->credp = acred; } ReleaseWriteLock(&tvc->lock); } #endif /* normal file or symlink */ osi_FlushText(tvc); /* only needed to flush text if text locked last time */ #ifdef AFS_BOZONLOCK_ENV afs_BozonLock(&tvc->pvnLock, tvc); #endif osi_FlushPages(tvc, acred); #ifdef AFS_BOZONLOCK_ENV afs_BozonUnlock(&tvc->pvnLock, tvc); #endif } /* set date on file if open in O_TRUNC mode */ if (aflags & FTRUNC) { /* this fixes touch */ ObtainWriteLock(&tvc->lock, 123); tvc->f.m.Date = osi_Time(); tvc->f.states |= CDirty; ReleaseWriteLock(&tvc->lock); } ObtainReadLock(&tvc->lock); if (writing) tvc->execsOrWriters++; tvc->opens++; #if defined(AFS_SGI_ENV) || defined (AFS_LINUX26_ENV) if (writing && tvc->cred == NULL) { crhold(acred); tvc->cred = acred; } #endif ReleaseReadLock(&tvc->lock); if ((afs_preCache != 0) && (writing == 0) && (vType(tvc) != VDIR) && (!afs_BBusy())) { struct dcache *tdc; afs_size_t offset, len; tdc = afs_GetDCache(tvc, 0, &treq, &offset, &len, 1); ObtainSharedLock(&tdc->mflock, 865); if (!(tdc->mflags & DFFetchReq)) { struct brequest *bp; /* start the daemon (may already be running, however) */ UpgradeSToWLock(&tdc->mflock, 666); tdc->mflags |= DFFetchReq; /* guaranteed to be cleared by BKG or GetDCache */ /* last parm (1) tells bkg daemon to do an afs_PutDCache when it is done, since we don't want to wait for it to finish before doing so ourselves. */ bp = afs_BQueue(BOP_FETCH, tvc, B_DONTWAIT, 0, acred, (afs_size_t) 0, (afs_size_t) 1, tdc, (void *)0, (void *)0); if (!bp) { tdc->mflags &= ~DFFetchReq; } ReleaseWriteLock(&tdc->mflock); } else { ReleaseSharedLock(&tdc->mflock); } } done: afs_PutFakeStat(&fakestate); AFS_DISCON_UNLOCK(); code = afs_CheckCode(code, &treq, 4); /* avoid AIX -O bug */ afs_Trace2(afs_iclSetp, CM_TRACE_OPEN, ICL_TYPE_POINTER, tvc, ICL_TYPE_INT32, 999999); return code; }
/** * Try setting up a connection to the server containing the specified fid. * Gets the volume, checks if it's up and does the connection by server address. * * @param afid * @param areq Request filled in by the caller. * @param locktype Type of lock that will be used. * * @return The conn struct, or NULL. */ struct afs_conn * afs_Conn(struct VenusFid *afid, struct vrequest *areq, afs_int32 locktype, struct rx_connection **rxconn) { u_short fsport = AFS_FSPORT; struct volume *tv; struct afs_conn *tconn = NULL; struct srvAddr *lowp = NULL; struct unixuser *tu; int notbusy; int i; struct srvAddr *sa1p; *rxconn = NULL; AFS_STATCNT(afs_Conn); /* Get fid's volume. */ tv = afs_GetVolume(afid, areq, READ_LOCK); if (!tv) { if (areq) { afs_FinalizeReq(areq); areq->volumeError = 1; } return NULL; } if (tv->serverHost[0] && tv->serverHost[0]->cell) { fsport = tv->serverHost[0]->cell->fsport; } else { VNOSERVERS++; } /* First is always lowest rank, if it's up */ if ((tv->status[0] == not_busy) && tv->serverHost[0] && !(tv->serverHost[0]->addr->sa_flags & SRVR_ISDOWN) && !(((areq->idleError > 0) || (areq->tokenError > 0)) && (areq->skipserver[0] == 1))) lowp = tv->serverHost[0]->addr; /* Otherwise we look at all of them. There are seven levels of * not_busy. This means we will check a volume seven times before it * is marked offline. Ideally, we only need two levels, but this * serves a second purpose of waiting some number of seconds before * the client decides the volume is offline (ie: a clone could finish * in this time). */ for (notbusy = not_busy; (!lowp && (notbusy <= end_not_busy)); notbusy++) { for (i = 0; i < AFS_MAXHOSTS && tv->serverHost[i]; i++) { if (((areq->tokenError > 0)||(areq->idleError > 0)) && (areq->skipserver[i] == 1)) continue; if (tv->status[i] != notbusy) { if (tv->status[i] == rd_busy || tv->status[i] == rdwr_busy) { if (!areq->busyCount) areq->busyCount++; } else if (tv->status[i] == offline) { if (!areq->volumeError) areq->volumeError = VOLMISSING; } continue; } for (sa1p = tv->serverHost[i]->addr; sa1p; sa1p = sa1p->next_sa) { if (sa1p->sa_flags & SRVR_ISDOWN) continue; if (!lowp || (lowp->sa_iprank > sa1p->sa_iprank)) lowp = sa1p; } } } afs_PutVolume(tv, READ_LOCK); if (lowp) { tu = afs_GetUser(areq->uid, afid->Cell, SHARED_LOCK); tconn = afs_ConnBySA(lowp, fsport, afid->Cell, tu, 0 /*!force */ , 1 /*create */ , locktype, rxconn); afs_PutUser(tu, SHARED_LOCK); } return tconn; } /*afs_Conn */
int afspag_PSetTokens(char *ain, afs_int32 ainSize, struct AFS_UCRED **acred) { afs_int32 i; register struct unixuser *tu; struct afspag_cell *tcell; struct ClearToken clear; char *stp; int stLen; afs_int32 flag, set_parent_pag = 0; afs_int32 pag, uid; AFS_STATCNT(PSetTokens); if (!afs_resourceinit_flag) { return EIO; } memcpy((char *)&i, ain, sizeof(afs_int32)); ain += sizeof(afs_int32); stp = ain; /* remember where the ticket is */ if (i < 0 || i > MAXKTCTICKETLEN) return EINVAL; /* malloc may fail */ stLen = i; ain += i; /* skip over ticket */ memcpy((char *)&i, ain, sizeof(afs_int32)); ain += sizeof(afs_int32); if (i != sizeof(struct ClearToken)) { return EINVAL; } memcpy((char *)&clear, ain, sizeof(struct ClearToken)); if (clear.AuthHandle == -1) clear.AuthHandle = 999; /* more rxvab compat stuff */ ain += sizeof(struct ClearToken); if (ainSize != 2 * sizeof(afs_int32) + stLen + sizeof(struct ClearToken)) { /* still stuff left? we've got primary flag and cell name. Set these */ memcpy((char *)&flag, ain, sizeof(afs_int32)); /* primary id flag */ ain += sizeof(afs_int32); /* skip id field */ /* rest is cell name, look it up */ /* some versions of gcc appear to need != 0 in order to get this right */ if ((flag & 0x8000) != 0) { /* XXX Use Constant XXX */ flag &= ~0x8000; set_parent_pag = 1; } tcell = afspag_GetCell(ain); } else { /* default to primary cell, primary id */ flag = 1; /* primary id */ tcell = afspag_GetPrimaryCell(); } if (!tcell) return ESRCH; if (set_parent_pag) { #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV) #if defined(AFS_DARWIN_ENV) struct proc *p = current_proc(); /* XXX */ #else struct proc *p = curproc; /* XXX */ #endif #ifndef AFS_DARWIN80_ENV uprintf("Process %d (%s) tried to change pags in PSetTokens\n", p->p_pid, p->p_comm); #endif setpag(p, acred, -1, &pag, 1); #else #ifdef AFS_OSF_ENV setpag(u.u_procp, acred, -1, &pag, 1); /* XXX u.u_procp is a no-op XXX */ #else setpag(acred, -1, &pag, 1); #endif #endif } pag = PagInCred(*acred); uid = (pag == NOPAG) ? (*acred)->cr_uid : pag; /* now we just set the tokens */ tu = afs_GetUser(uid, tcell->cellnum, WRITE_LOCK); if (!tu->cellinfo) tu->cellinfo = (void *)tcell; tu->vid = clear.ViceId; if (tu->stp != NULL) { afs_osi_Free(tu->stp, tu->stLen); } tu->stp = (char *)afs_osi_Alloc(stLen); tu->stLen = stLen; memcpy(tu->stp, stp, stLen); tu->ct = clear; #ifndef AFS_NOSTATS afs_stats_cmfullperf.authent.TicketUpdates++; afs_ComputePAGStats(); #endif /* AFS_NOSTATS */ tu->states |= UHasTokens; tu->states &= ~UTokensBad; afs_SetPrimary(tu, flag); tu->tokenTime = osi_Time(); afs_PutUser(tu, WRITE_LOCK); return 0; }
/*------------------------------------------------------------------------ * EXPORTED afs_Analyze * * Description: * Analyze the outcome of an RPC operation, taking whatever support * actions are necessary. * * Arguments: * aconn : Ptr to the relevant connection on which the call was made. * acode : The return code experienced by the RPC. * afid : The FID of the file involved in the action. This argument * may be null if none was involved. * areq : The request record associated with this operation. * op : which RPC we are analyzing. * cellp : pointer to a cell struct. Must provide either fid or cell. * * Returns: * Non-zero value if the related RPC operation should be retried, * zero otherwise. * * Environment: * This routine is typically called in a do-while loop, causing the * embedded RPC operation to be called repeatedly if appropriate * until whatever error condition (if any) is intolerable. * * Side Effects: * As advertised. * * NOTE: * The retry return value is used by afs_StoreAllSegments to determine * if this is a temporary or permanent error. *------------------------------------------------------------------------*/ int afs_Analyze(register struct afs_conn *aconn, afs_int32 acode, struct VenusFid *afid, register struct vrequest *areq, int op, afs_int32 locktype, struct cell *cellp) { afs_int32 i; struct srvAddr *sa; struct server *tsp; struct volume *tvp; afs_int32 shouldRetry = 0; afs_int32 serversleft = 1; struct afs_stats_RPCErrors *aerrP; afs_int32 markeddown; if (AFS_IS_DISCONNECTED && !AFS_IN_SYNC) { /* On reconnection, act as connected. XXX: for now.... */ /* SXW - This may get very tired after a while. We should try and * intercept all RPCs before they get here ... */ /*printf("afs_Analyze: disconnected\n");*/ afs_FinalizeReq(areq); if (aconn) { /* SXW - I suspect that this will _never_ happen - we shouldn't * get a connection because we're disconnected !!!*/ afs_PutConn(aconn, locktype); } return 0; } AFS_STATCNT(afs_Analyze); afs_Trace4(afs_iclSetp, CM_TRACE_ANALYZE, ICL_TYPE_INT32, op, ICL_TYPE_POINTER, aconn, ICL_TYPE_INT32, acode, ICL_TYPE_LONG, areq->uid); aerrP = (struct afs_stats_RPCErrors *)0; if ((op >= 0) && (op < AFS_STATS_NUM_FS_RPC_OPS)) aerrP = &(afs_stats_cmfullperf.rpc.fsRPCErrors[op]); afs_FinalizeReq(areq); if (!aconn && areq->busyCount) { /* one RPC or more got VBUSY/VRESTARTING */ tvp = afs_FindVolume(afid, READ_LOCK); if (tvp) { afs_warnuser("afs: Waiting for busy volume %u (%s) in cell %s\n", (afid ? afid->Fid.Volume : 0), (tvp->name ? tvp->name : ""), ((tvp->serverHost[0] && tvp->serverHost[0]->cell) ? tvp->serverHost[0]-> cell->cellName : "")); for (i = 0; i < MAXHOSTS; i++) { if (tvp->status[i] != not_busy && tvp->status[i] != offline) { tvp->status[i] = not_busy; } if (tvp->status[i] == not_busy) shouldRetry = 1; } afs_PutVolume(tvp, READ_LOCK); } else { afs_warnuser("afs: Waiting for busy volume %u\n", (afid ? afid->Fid.Volume : 0)); } if (areq->busyCount > 100) { if (aerrP) (aerrP->err_Volume)++; areq->volumeError = VOLBUSY; shouldRetry = 0; } else { VSleep(afs_BusyWaitPeriod); /* poll periodically */ } if (shouldRetry != 0) areq->busyCount++; return shouldRetry; /* should retry */ } if (!aconn || !aconn->srvr) { if (!areq->volumeError) { if (aerrP) (aerrP->err_Network)++; if (hm_retry_int && !(areq->flags & O_NONBLOCK) && /* "hard" mount */ ((afid && afs_IsPrimaryCellNum(afid->Cell)) || (cellp && afs_IsPrimaryCell(cellp)))) { if (!afid) { afs_warnuser ("afs: hard-mount waiting for a vlserver to return to service\n"); VSleep(hm_retry_int); afs_CheckServers(1, cellp); shouldRetry = 1; } else { tvp = afs_FindVolume(afid, READ_LOCK); if (!tvp || (tvp->states & VRO)) { shouldRetry = hm_retry_RO; } else { shouldRetry = hm_retry_RW; } if (tvp) afs_PutVolume(tvp, READ_LOCK); if (shouldRetry) { afs_warnuser ("afs: hard-mount waiting for volume %u\n", afid->Fid.Volume); VSleep(hm_retry_int); afs_CheckServers(1, cellp); } } } /* if (hm_retry_int ... */ else { areq->networkError = 1; } } return shouldRetry; } /* Find server associated with this connection. */ sa = aconn->srvr; tsp = sa->server; /* Before we do anything with acode, make sure we translate it back to * a system error */ if ((acode & ~0xff) == ERROR_TABLE_BASE_uae) acode = et_to_sys_error(acode); if (acode == 0) { /* If we previously took an error, mark this volume not busy */ if (areq->volumeError) { tvp = afs_FindVolume(afid, READ_LOCK); if (tvp) { for (i = 0; i < MAXHOSTS; i++) { if (tvp->serverHost[i] == tsp) { tvp->status[i] = not_busy; } } afs_PutVolume(tvp, READ_LOCK); } } afs_PutConn(aconn, locktype); return 0; } /* If network troubles, mark server as having bogued out again. */ /* VRESTARTING is < 0 because of backward compatibility issues * with 3.4 file servers and older cache managers */ #ifdef AFS_64BIT_CLIENT if (acode == -455) acode = 455; #endif /* AFS_64BIT_CLIENT */ if ((acode < 0) && (acode != VRESTARTING)) { if (acode == RX_CALL_TIMEOUT) { serversleft = afs_BlackListOnce(areq, afid, tsp); areq->idleError++; if (serversleft) { shouldRetry = 1; } else { shouldRetry = 0; } /* By doing this, we avoid ever marking a server down * in an idle timeout case. That's because the server is * still responding and may only be letting a single vnode * time out. We otherwise risk having the server continually * be marked down, then up, then down again... */ goto out; } markeddown = afs_ServerDown(sa); ForceNewConnections(sa); /**multi homed clients lock:afs_xsrvAddr? */ if (aerrP) (aerrP->err_Server)++; #if 0 /* retry *once* when the server is timed out in case of NAT */ if (markeddown && acode == RX_CALL_DEAD) { aconn->forceConnectFS = 1; shouldRetry = 1; } #endif } if (acode == VBUSY || acode == VRESTARTING) { if (acode == VBUSY) { areq->busyCount++; if (aerrP) (aerrP->err_VolumeBusies)++; } else areq->busyCount = 1; tvp = afs_FindVolume(afid, READ_LOCK); if (tvp) { for (i = 0; i < MAXHOSTS; i++) { if (tvp->serverHost[i] == tsp) { tvp->status[i] = rdwr_busy; /* can't tell which yet */ /* to tell which, have to look at the op code. */ } } afs_PutVolume(tvp, READ_LOCK); } else { afs_warnuser("afs: Waiting for busy volume %u in cell %s\n", (afid ? afid->Fid.Volume : 0), tsp->cell->cellName); VSleep(afs_BusyWaitPeriod); /* poll periodically */ } shouldRetry = 1; acode = 0; } else if (acode == VICETOKENDEAD || (acode & ~0xff) == ERROR_TABLE_BASE_RXK) { /* any rxkad error is treated as token expiration */ struct unixuser *tu; /* * I'm calling these errors protection errors, since they involve * faulty authentication. */ if (aerrP) (aerrP->err_Protection)++; tu = afs_FindUser(areq->uid, tsp->cell->cellNum, READ_LOCK); if (tu) { if (acode == VICETOKENDEAD) { aconn->forceConnectFS = 1; } else if (acode == RXKADEXPIRED) { aconn->forceConnectFS = 0; /* don't check until new tokens set */ aconn->user->states |= UTokensBad; afs_warnuser ("afs: Tokens for user of AFS id %d for cell %s have expired\n", tu->vid, aconn->srvr->server->cell->cellName); } else { serversleft = afs_BlackListOnce(areq, afid, tsp); areq->tokenError++; if (serversleft) { afs_warnuser ("afs: Tokens for user of AFS id %d for cell %s: rxkad error=%d\n", tu->vid, aconn->srvr->server->cell->cellName, acode); shouldRetry = 1; } else { areq->tokenError = 0; aconn->forceConnectFS = 0; /* don't check until new tokens set */ aconn->user->states |= UTokensBad; afs_warnuser ("afs: Tokens for user of AFS id %d for cell %s are discarded (rxkad error=%d)\n", tu->vid, aconn->srvr->server->cell->cellName, acode); } } afs_PutUser(tu, READ_LOCK); } else { /* The else case shouldn't be possible and should probably be replaced by a panic? */ if (acode == VICETOKENDEAD) { aconn->forceConnectFS = 1; } else if (acode == RXKADEXPIRED) { aconn->forceConnectFS = 0; /* don't check until new tokens set */ aconn->user->states |= UTokensBad; afs_warnuser ("afs: Tokens for user %d for cell %s have expired\n", areq->uid, aconn->srvr->server->cell->cellName); } else { aconn->forceConnectFS = 0; /* don't check until new tokens set */ aconn->user->states |= UTokensBad; afs_warnuser ("afs: Tokens for user %d for cell %s are discarded (rxkad error = %d)\n", areq->uid, aconn->srvr->server->cell->cellName, acode); } } shouldRetry = 1; /* Try again (as root). */ } /* Check for access violation. */ else if (acode == EACCES) { /* should mark access error in non-existent per-user global structure */ if (aerrP) (aerrP->err_Protection)++; areq->accessError = 1; if (op == AFS_STATS_FS_RPCIDX_STOREDATA) areq->permWriteError = 1; shouldRetry = 0; } /* check for ubik errors; treat them like crashed servers */ else if (acode >= ERROR_TABLE_BASE_U && acode < ERROR_TABLE_BASE_U + 255) { afs_ServerDown(sa); if (aerrP) (aerrP->err_Server)++; shouldRetry = 1; /* retryable (maybe one is working) */ VSleep(1); /* just in case */ } /* Check for bad volume data base / missing volume. */ else if (acode == VSALVAGE || acode == VOFFLINE || acode == VNOVOL || acode == VNOSERVICE || acode == VMOVED) { struct cell *tcell; int same; shouldRetry = 1; areq->volumeError = VOLMISSING; if (aerrP) (aerrP->err_Volume)++; if (afid && (tcell = afs_GetCell(afid->Cell, 0))) { same = VLDB_Same(afid, areq); tvp = afs_FindVolume(afid, READ_LOCK); if (tvp) { for (i = 0; i < MAXHOSTS && tvp->serverHost[i]; i++) { if (tvp->serverHost[i] == tsp) { if (tvp->status[i] == end_not_busy) tvp->status[i] = offline; else tvp->status[i]++; } else if (!same) { tvp->status[i] = not_busy; /* reset the others */ } } afs_PutVolume(tvp, READ_LOCK); } } } else if (acode >= ERROR_TABLE_BASE_VL && acode <= ERROR_TABLE_BASE_VL + 255) { /* vlserver errors */ shouldRetry = 0; areq->volumeError = VOLMISSING; } else if (acode >= 0) { if (aerrP) (aerrP->err_Other)++; if (op == AFS_STATS_FS_RPCIDX_STOREDATA) areq->permWriteError = 1; shouldRetry = 0; /* Other random Vice error. */ } else if (acode == RX_MSGSIZE) { /* same meaning as EMSGSIZE... */ VSleep(1); /* Just a hack for desperate times. */ if (aerrP) (aerrP->err_Other)++; shouldRetry = 1; /* packet was too big, please retry call */ } if (acode < 0 && acode != RX_MSGSIZE && acode != VRESTARTING) { /* If we get here, code < 0 and we have network/Server troubles. * areq->networkError is not set here, since we always * retry in case there is another server. However, if we find * no connection (aconn == 0) we set the networkError flag. */ afs_MarkServerUpOrDown(sa, SRVR_ISDOWN); if (aerrP) (aerrP->err_Server)++; VSleep(1); /* Just a hack for desperate times. */ shouldRetry = 1; } out: /* now unlock the connection and return */ afs_PutConn(aconn, locktype); return (shouldRetry); } /*afs_Analyze */
static int VLDB_Same(struct VenusFid *afid, struct vrequest *areq) { struct vrequest treq; struct afs_conn *tconn; int i, type = 0; union { struct vldbentry tve; struct nvldbentry ntve; struct uvldbentry utve; } *v; struct volume *tvp; struct cell *tcell; char *bp, tbuf[CVBS]; /* biggest volume id is 2^32, ~ 4*10^9 */ unsigned int changed; struct server *(oldhosts[NMAXNSERVERS]); AFS_STATCNT(CheckVLDB); afs_FinalizeReq(areq); if ((i = afs_InitReq(&treq, afs_osi_credp))) return DUNNO; v = afs_osi_Alloc(sizeof(*v)); tcell = afs_GetCell(afid->Cell, READ_LOCK); bp = afs_cv2string(&tbuf[CVBS], afid->Fid.Volume); do { VSleep(2); /* Better safe than sorry. */ tconn = afs_ConnByMHosts(tcell->cellHosts, tcell->vlport, tcell->cellNum, &treq, SHARED_LOCK); if (tconn) { if (tconn->srvr->server->flags & SNO_LHOSTS) { type = 0; RX_AFS_GUNLOCK(); i = VL_GetEntryByNameO(tconn->id, bp, &v->tve); RX_AFS_GLOCK(); } else if (tconn->srvr->server->flags & SYES_LHOSTS) { type = 1; RX_AFS_GUNLOCK(); i = VL_GetEntryByNameN(tconn->id, bp, &v->ntve); RX_AFS_GLOCK(); } else { type = 2; RX_AFS_GUNLOCK(); i = VL_GetEntryByNameU(tconn->id, bp, &v->utve); RX_AFS_GLOCK(); if (!(tconn->srvr->server->flags & SVLSRV_UUID)) { if (i == RXGEN_OPCODE) { type = 1; RX_AFS_GUNLOCK(); i = VL_GetEntryByNameN(tconn->id, bp, &v->ntve); RX_AFS_GLOCK(); if (i == RXGEN_OPCODE) { type = 0; tconn->srvr->server->flags |= SNO_LHOSTS; RX_AFS_GUNLOCK(); i = VL_GetEntryByNameO(tconn->id, bp, &v->tve); RX_AFS_GLOCK(); } else if (!i) tconn->srvr->server->flags |= SYES_LHOSTS; } else if (!i) tconn->srvr->server->flags |= SVLSRV_UUID; } lastcode = i; } } else i = -1; } while (afs_Analyze(tconn, i, NULL, &treq, -1, /* no op code for this */ SHARED_LOCK, tcell)); afs_PutCell(tcell, READ_LOCK); afs_Trace2(afs_iclSetp, CM_TRACE_CHECKVLDB, ICL_TYPE_FID, &afid, ICL_TYPE_INT32, i); if (i) { afs_osi_Free(v, sizeof(*v)); return DUNNO; } /* have info, copy into serverHost array */ changed = 0; tvp = afs_FindVolume(afid, WRITE_LOCK); if (tvp) { ObtainWriteLock(&tvp->lock, 107); for (i = 0; i < NMAXNSERVERS && tvp->serverHost[i]; i++) { oldhosts[i] = tvp->serverHost[i]; } if (type == 2) { InstallUVolumeEntry(tvp, &v->utve, afid->Cell, tcell, &treq); } else if (type == 1) { InstallNVolumeEntry(tvp, &v->ntve, afid->Cell); } else { InstallVolumeEntry(tvp, &v->tve, afid->Cell); } if (i < NMAXNSERVERS && tvp->serverHost[i]) { changed = 1; } for (--i; !changed && i >= 0; i--) { if (tvp->serverHost[i] != oldhosts[i]) { changed = 1; /* also happens if prefs change. big deal. */ } } ReleaseWriteLock(&tvp->lock); afs_PutVolume(tvp, WRITE_LOCK); } else { /* can't find volume */ tvp = afs_GetVolume(afid, &treq, WRITE_LOCK); if (tvp) { afs_PutVolume(tvp, WRITE_LOCK); afs_osi_Free(v, sizeof(*v)); return DIFFERENT; } else { afs_osi_Free(v, sizeof(*v)); return DUNNO; } } afs_osi_Free(v, sizeof(*v)); return (changed ? DIFFERENT : SAME); } /*VLDB_Same */
/* don't set CDirty in here because RPC is called synchronously */ int afs_symlink(OSI_VC_DECL(adp), char *aname, struct vattr *attrs, char *atargetName, struct vcache **tvcp, afs_ucred_t *acred) { afs_uint32 now = 0; struct vrequest *treq = NULL; afs_int32 code = 0; struct afs_conn *tc; struct VenusFid newFid; struct dcache *tdc; afs_size_t offset, len; afs_int32 alen; struct server *hostp = 0; struct vcache *tvc; struct AFSStoreStatus InStatus; struct AFSFetchStatus *OutFidStatus, *OutDirStatus; struct AFSCallBack CallBack; struct AFSVolSync tsync; struct volume *volp = 0; struct afs_fakestat_state fakestate; struct rx_connection *rxconn; XSTATS_DECLS; OSI_VC_CONVERT(adp); AFS_STATCNT(afs_symlink); afs_Trace2(afs_iclSetp, CM_TRACE_SYMLINK, ICL_TYPE_POINTER, adp, ICL_TYPE_STRING, aname); OutFidStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus)); OutDirStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus)); memset(&InStatus, 0, sizeof(InStatus)); if ((code = afs_CreateReq(&treq, acred))) goto done2; afs_InitFakeStat(&fakestate); AFS_DISCON_LOCK(); code = afs_EvalFakeStat(&adp, &fakestate, treq); if (code) goto done; if (strlen(aname) > AFSNAMEMAX || strlen(atargetName) > AFSPATHMAX) { code = ENAMETOOLONG; goto done; } if (afs_IsDynroot(adp)) { code = afs_DynrootVOPSymlink(adp, acred, aname, atargetName); goto done; } if (afs_IsDynrootMount(adp)) { code = EROFS; goto done; } code = afs_VerifyVCache(adp, treq); if (code) { code = afs_CheckCode(code, treq, 30); goto done; } /** If the volume is read-only, return error without making an RPC to the * fileserver */ if (adp->f.states & CRO) { code = EROFS; goto done; } if (AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW) { code = ENETDOWN; goto done; } InStatus.Mask = AFS_SETMODTIME | AFS_SETMODE; InStatus.ClientModTime = osi_Time(); alen = strlen(atargetName); /* we want it to include the null */ if ( (*atargetName == '#' || *atargetName == '%') && alen > 1 && atargetName[alen-1] == '.') { InStatus.UnixModeBits = 0644; /* mt pt: null from "." at end */ if (alen == 1) alen++; /* Empty string */ } else { InStatus.UnixModeBits = 0755; alen++; /* add in the null */ } tdc = afs_GetDCache(adp, (afs_size_t) 0, treq, &offset, &len, 1); volp = afs_FindVolume(&adp->f.fid, READ_LOCK); /*parent is also in same vol */ ObtainWriteLock(&adp->lock, 156); if (tdc) ObtainWriteLock(&tdc->lock, 636); /* No further locks: if the SymLink succeeds, it does not matter what happens * to our local copy of the directory. If somebody tampers with it in the meantime, * the copy will be invalidated */ if (!AFS_IS_DISCON_RW) { do { tc = afs_Conn(&adp->f.fid, treq, SHARED_LOCK, &rxconn); if (tc) { hostp = tc->parent->srvr->server; XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_SYMLINK); if (adp->f.states & CForeign) { now = osi_Time(); RX_AFS_GUNLOCK(); code = RXAFS_DFSSymlink(rxconn, (struct AFSFid *)&adp->f.fid.Fid, aname, atargetName, &InStatus, (struct AFSFid *)&newFid.Fid, OutFidStatus, OutDirStatus, &CallBack, &tsync); RX_AFS_GLOCK(); } else { RX_AFS_GUNLOCK(); code = RXAFS_Symlink(rxconn, (struct AFSFid *)&adp->f.fid.Fid, aname, atargetName, &InStatus, (struct AFSFid *)&newFid.Fid, OutFidStatus, OutDirStatus, &tsync); RX_AFS_GLOCK(); } XSTATS_END_TIME; } else code = -1; } while (afs_Analyze (tc, rxconn, code, &adp->f.fid, treq, AFS_STATS_FS_RPCIDX_SYMLINK, SHARED_LOCK, NULL)); } else { newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; afs_GenFakeFid(&newFid, VREG, 0); } ObtainWriteLock(&afs_xvcache, 40); if (code) { if (code < 0) { afs_StaleVCache(adp); } ReleaseWriteLock(&adp->lock); ReleaseWriteLock(&afs_xvcache); if (tdc) { ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); } goto done; } /* otherwise, we should see if we can make the change to the dir locally */ if (AFS_IS_DISCON_RW || afs_LocalHero(adp, tdc, OutDirStatus, 1)) { /* we can do it locally */ ObtainWriteLock(&afs_xdcache, 293); /* If the following fails because the name has been created in the meantime, the * directory is out-of-date - the file server knows best! */ code = afs_dir_Create(tdc, aname, &newFid.Fid); ReleaseWriteLock(&afs_xdcache); if (code && !AFS_IS_DISCON_RW) { ZapDCE(tdc); /* surprise error -- use invalid value */ DZap(tdc); } } if (tdc) { ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); } newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; ReleaseWriteLock(&adp->lock); /* now we're done with parent dir, create the link's entry. Note that * no one can get a pointer to the new cache entry until we release * the xvcache lock. */ tvc = afs_NewVCache(&newFid, hostp); if (!tvc) { code = -2; ReleaseWriteLock(&afs_xvcache); goto done; } ObtainWriteLock(&tvc->lock, 157); ObtainWriteLock(&afs_xcbhash, 500); tvc->f.states |= CStatd; /* have valid info */ tvc->f.states &= ~CBulkFetching; if (adp->f.states & CForeign) { tvc->f.states |= CForeign; /* We don't have to worry about losing the callback since we're doing it * under the afs_xvcache lock actually, afs_NewVCache may drop the * afs_xvcache lock, if it calls afs_FlushVCache */ tvc->cbExpires = CallBack.ExpirationTime + now; afs_QueueCallback(tvc, CBHash(CallBack.ExpirationTime), volp); } else { tvc->cbExpires = 0x7fffffff; /* never expires, they can't change */ /* since it never expires, we don't have to queue the callback */ } ReleaseWriteLock(&afs_xcbhash); if (AFS_IS_DISCON_RW) { attrs->va_mode = InStatus.UnixModeBits; afs_GenDisconStatus(adp, tvc, &newFid, attrs, treq, VLNK); code = afs_DisconCreateSymlink(tvc, atargetName, treq); if (code) { /* XXX - When this goes wrong, we need to tidy up the changes we made to * the parent, and get rid of the vcache we just created */ ReleaseWriteLock(&tvc->lock); ReleaseWriteLock(&afs_xvcache); afs_PutVCache(tvc); goto done; } afs_DisconAddDirty(tvc, VDisconCreate, 0); } else { afs_ProcessFS(tvc, OutFidStatus, treq); } if (!tvc->linkData) { tvc->linkData = afs_osi_Alloc(alen); osi_Assert(tvc->linkData != NULL); strncpy(tvc->linkData, atargetName, alen - 1); tvc->linkData[alen - 1] = 0; } ReleaseWriteLock(&tvc->lock); ReleaseWriteLock(&afs_xvcache); if (tvcp) *tvcp = tvc; else afs_PutVCache(tvc); code = 0; done: afs_PutFakeStat(&fakestate); if (volp) afs_PutVolume(volp, READ_LOCK); AFS_DISCON_UNLOCK(); code = afs_CheckCode(code, treq, 31); afs_DestroyReq(treq); done2: osi_FreeSmallSpace(OutFidStatus); osi_FreeSmallSpace(OutDirStatus); return code; }
int afs_UFSHandleLink(struct vcache *avc, struct vrequest *areq) { struct dcache *tdc; char *tp, *rbuf; void *tfile; afs_size_t offset, len; afs_int32 tlen, alen; afs_int32 code; /* two different formats, one for links protected 644, have a "." at the * end of the file name, which we turn into a null. Others, protected * 755, we add a null to the end of */ AFS_STATCNT(afs_UFSHandleLink); if (!avc->linkData) { tdc = afs_GetDCache(avc, (afs_size_t) 0, areq, &offset, &len, 0); afs_Trace3(afs_iclSetp, CM_TRACE_UFSLINK, ICL_TYPE_POINTER, avc, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); if (!tdc) { if (AFS_IS_DISCONNECTED) return ENETDOWN; else return EIO; } /* otherwise we have the data loaded, go for it */ if (len > 1024) { afs_PutDCache(tdc); return EFAULT; } if (avc->f.m.Mode & 0111) alen = len + 1; /* regular link */ else alen = len; /* mt point */ rbuf = osi_AllocLargeSpace(AFS_LRALLOCSIZ); tlen = len; ObtainReadLock(&tdc->lock); tfile = osi_UFSOpen(&tdc->f.inode); if (!tfile) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); osi_FreeLargeSpace(rbuf); return EIO; } code = afs_osi_Read(tfile, -1, rbuf, tlen); osi_UFSClose(tfile); ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); rbuf[alen - 1] = '\0'; alen = strlen(rbuf) + 1; tp = afs_osi_Alloc(alen); /* make room for terminating null */ osi_Assert(tp != NULL); memcpy(tp, rbuf, alen); osi_FreeLargeSpace(rbuf); if (code != tlen) { afs_osi_Free(tp, alen); return EIO; } avc->linkData = tp; } return 0; }
/* lp is pointer to a fairly-old buffer */ static struct buffer * afs_newslot(struct dcache *adc, afs_int32 apage, struct buffer *lp) { /* Find a usable buffer slot */ afs_int32 i; afs_int32 lt = 0; struct buffer *tp; struct osi_file *tfile; AFS_STATCNT(afs_newslot); /* we take a pointer here to a buffer which was at the end of an * LRU hash chain. Odds are, it's one of the older buffers, not * one of the newer. Having an older buffer to start with may * permit us to avoid a few of the assignments in the "typical * case" for loop below. */ if (lp && (lp->lockers == 0)) { lt = lp->accesstime; } else { lp = NULL; } /* timecounter might have wrapped, if machine is very very busy * and stays up for a long time. Timecounter mustn't wrap twice * (positive->negative->positive) before calling newslot, but that * would require 2 billion consecutive cache hits... Anyway, the * penalty is only that the cache replacement policy will be * almost MRU for the next ~2 billion DReads... newslot doesn't * get called nearly as often as DRead, so in order to avoid the * performance penalty of using the hypers, it's worth doing the * extra check here every time. It's probably cheaper than doing * hcmp, anyway. There is a little performance hit resulting from * resetting all the access times to 0, but it only happens once * every month or so, and the access times will rapidly sort * themselves back out after just a few more DReads. */ if (timecounter < 0) { timecounter = 1; tp = Buffers; for (i = 0; i < nbuffers; i++, tp++) { tp->accesstime = 0; if (!lp && !tp->lockers) /* one is as good as the rest, I guess */ lp = tp; } } else { /* this is the typical case */ tp = Buffers; for (i = 0; i < nbuffers; i++, tp++) { if (tp->lockers == 0) { if (!lp || tp->accesstime < lt) { lp = tp; lt = tp->accesstime; } } } } if (lp == 0) { /* No unlocked buffers. If still possible, allocate a new increment */ if (nbuffers + NPB > afs_max_buffers) { /* There are no unlocked buffers -- this used to panic, but that * seems extreme. To the best of my knowledge, all the callers * of DRead are prepared to handle a zero return. Some of them * just panic directly, but not all of them. */ afs_warn("afs: all buffers locked\n"); return 0; } BufferData = afs_osi_Alloc(AFS_BUFFER_PAGESIZE * NPB); osi_Assert(BufferData != NULL); for (i = 0; i< NPB; i++) { /* Fill in each buffer with an empty indication. */ tp = &Buffers[i + nbuffers]; tp->fid = NULLIDX; afs_reset_inode(&tp->inode); tp->accesstime = 0; tp->lockers = 0; tp->data = &BufferData[AFS_BUFFER_PAGESIZE * i]; tp->hashIndex = 0; tp->dirty = 0; AFS_RWLOCK_INIT(&tp->lock, "buffer lock"); } lp = &Buffers[nbuffers]; nbuffers += NPB; } if (lp->dirty) { /* see DFlush for rationale for not getting and locking the dcache */ tfile = afs_CFileOpen(&lp->inode); afs_CFileWrite(tfile, lp->page * AFS_BUFFER_PAGESIZE, lp->data, AFS_BUFFER_PAGESIZE); lp->dirty = 0; afs_CFileClose(tfile); AFS_STATS(afs_stats_cmperf.bufFlushDirty++); } /* Now fill in the header. */ lp->fid = adc->index; afs_copy_inode(&lp->inode, &adc->f.inode); lp->page = apage; lp->accesstime = timecounter++; FixupBucket(lp); /* move to the right hash bucket */ return lp; }
int afs_mountroot(void) { AFS_STATCNT(afs_mountroot); return (EINVAL); }
afs_fid(OSI_VC_DECL(avc), struct fid **fidpp) #endif /* AFS_AIX41_ENV */ { struct SmallFid Sfid; long addr[2]; register struct cell *tcell; extern struct vcache *afs_globalVp; int SizeOfSmallFid = SIZEOF_SMALLFID; int rootvp = 0; OSI_VC_CONVERT(avc); AFS_STATCNT(afs_fid); if (afs_shuttingdown) return EIO; if (afs_NFSRootOnly && (avc == afs_globalVp)) rootvp = 1; if (!afs_NFSRootOnly || rootvp #ifdef AFS_AIX41_ENV || USE_SMALLFID(credp) #endif ) { tcell = afs_GetCell(avc->f.fid.Cell, READ_LOCK); Sfid.Volume = avc->f.fid.Fid.Volume; Sfid.Vnode = avc->f.fid.Fid.Vnode; Sfid.CellAndUnique = ((tcell->cellIndex << 24) + (avc->f.fid.Fid.Unique & 0xffffff)); afs_PutCell(tcell, READ_LOCK); if (avc->f.fid.Fid.Vnode > 0xffff) afs_fid_vnodeoverflow++; if (avc->f.fid.Fid.Unique > 0xffffff) afs_fid_uniqueoverflow++; } else { #if defined(AFS_SUN57_64BIT_ENV) || (defined(AFS_SGI61_ENV) && (_MIPS_SZPTR == 64)) addr[1] = (long)AFS_XLATOR_MAGIC << 48; #else /* defined(AFS_SGI61_ENV) && (_MIPS_SZPTR == 64) */ addr[1] = AFS_XLATOR_MAGIC; SizeOfSmallFid = sizeof(addr); #endif /* defined(AFS_SGI61_ENV) && (_MIPS_SZPTR == 64) */ addr[0] = (long)avc; #ifndef AFS_AIX41_ENV /* No post processing, so don't hold ref count. */ AFS_FAST_HOLD(avc); #endif } #if defined(AFS_AIX_ENV) || defined(AFS_SUN54_ENV) /* Use the fid pointer passed to us. */ fidpp->fid_len = SizeOfSmallFid; if (afs_NFSRootOnly) { if (rootvp #ifdef AFS_AIX41_ENV || USE_SMALLFID(credp) #endif ) { memcpy(fidpp->fid_data, (caddr_t) & Sfid, SizeOfSmallFid); } else { memcpy(fidpp->fid_data, (caddr_t) addr, SizeOfSmallFid); } } else { memcpy(fidpp->fid_data, (caddr_t) & Sfid, SizeOfSmallFid); } #else /* malloc a fid pointer ourselves. */ *fidpp = (struct fid *)AFS_KALLOC(SizeOfSmallFid + 2); (*fidpp)->fid_len = SizeOfSmallFid; if (afs_NFSRootOnly) { if (rootvp) { memcpy((*fidpp)->fid_data, (char *)&Sfid, SizeOfSmallFid); } else { memcpy((*fidpp)->fid_data, (char *)addr, SizeOfSmallFid); } } else { memcpy((*fidpp)->fid_data, (char *)&Sfid, SizeOfSmallFid); } #endif return (0); }
int afs_swapvp(void) { AFS_STATCNT(afs_swapvp); return (EINVAL); }
/*! * Create or update a cell entry. * \param acellName Name of cell. * \param acellHosts Array of hosts that this cell has. * \param aflags Cell flags. * \param linkedcname * \param fsport File server port. * \param vlport Volume server port. * \param timeout Cell timeout value, 0 means static AFSDB entry. * \return */ afs_int32 afs_NewCell(char *acellName, afs_int32 * acellHosts, int aflags, char *linkedcname, u_short fsport, u_short vlport, int timeout) { struct cell *tc, *tcl = 0; afs_int32 i, newc = 0, code = 0; AFS_STATCNT(afs_NewCell); ObtainWriteLock(&afs_xcell, 103); tc = afs_FindCellByName_nl(acellName, READ_LOCK); if (tc) { aflags &= ~CNoSUID; } else { tc = afs_osi_Alloc(sizeof(struct cell)); osi_Assert(tc != NULL); memset(tc, 0, sizeof(*tc)); tc->cellName = afs_strdup(acellName); tc->fsport = AFS_FSPORT; tc->vlport = AFS_VLPORT; AFS_MD5_String(tc->cellHandle, tc->cellName, strlen(tc->cellName)); AFS_RWLOCK_INIT(&tc->lock, "cell lock"); newc = 1; aflags |= CNoSUID; } ObtainWriteLock(&tc->lock, 688); /* If the cell we've found has the correct name but no timeout, * and we're called with a non-zero timeout, bail out: never * override static configuration entries with AFSDB ones. * One exception: if the original cell entry had no servers, * it must get servers from AFSDB. */ if (timeout && !tc->timeout && tc->cellHosts[0]) { code = EEXIST; /* This code is checked for in afs_LookupAFSDB */ goto bad; } /* we don't want to keep pinging old vlservers which were down, * since they don't matter any more. It's easier to do this than * to remove the server from its various hash tables. */ for (i = 0; i < AFS_MAXCELLHOSTS; i++) { if (!tc->cellHosts[i]) break; tc->cellHosts[i]->flags &= ~SRVR_ISDOWN; tc->cellHosts[i]->flags |= SRVR_ISGONE; } if (fsport) tc->fsport = fsport; if (vlport) tc->vlport = vlport; if (aflags & CLinkedCell) { if (!linkedcname) { code = EINVAL; goto bad; } tcl = afs_FindCellByName_nl(linkedcname, READ_LOCK); if (!tcl) { code = ENOENT; goto bad; } if (tcl->lcellp) { /* XXX Overwriting if one existed before! XXX */ tcl->lcellp->lcellp = (struct cell *)0; tcl->lcellp->states &= ~CLinkedCell; } tc->lcellp = tcl; tcl->lcellp = tc; } tc->states |= aflags; tc->timeout = timeout; memset(tc->cellHosts, 0, sizeof(tc->cellHosts)); for (i = 0; i < AFS_MAXCELLHOSTS; i++) { /* Get server for each host and link this cell in.*/ struct server *ts; afs_uint32 temp = acellHosts[i]; if (!temp) break; ts = afs_GetServer(&temp, 1, 0, tc->vlport, WRITE_LOCK, NULL, 0); ts->cell = tc; ts->flags &= ~SRVR_ISGONE; /* Set the server as a host of the new cell. */ tc->cellHosts[i] = ts; afs_PutServer(ts, WRITE_LOCK); } afs_SortServers(tc->cellHosts, AFS_MAXCELLHOSTS); /* randomize servers */ /* New cell: Build and add to LRU cell queue. */ if (newc) { struct cell_name *cn; cn = afs_cellname_lookup_name(acellName); if (!cn) cn = afs_cellname_new(acellName, 0); tc->cnamep = cn; tc->cellNum = cn->cellnum; tc->cellIndex = afs_cellindex++; afs_stats_cmperf.numCellsVisible++; QAdd(&CellLRU, &tc->lruq); } ReleaseWriteLock(&tc->lock); ReleaseWriteLock(&afs_xcell); afs_PutCell(tc, 0); if (!(aflags & CHush)) afs_DynrootInvalidate(); return 0; bad: if (newc) { afs_osi_FreeStr(tc->cellName); afs_osi_Free(tc, sizeof(struct cell)); } ReleaseWriteLock(&tc->lock); ReleaseWriteLock(&afs_xcell); return code; }
int osi_NetSend(osi_socket so, struct sockaddr_in *addr, struct iovec *dvec, int nvecs, afs_int32 alength, int istack) { #ifdef AFS_DARWIN80_ENV socket_t asocket = (socket_t)so; struct msghdr msg; size_t slen; #else struct socket *asocket = (struct socket *)so; struct uio u; #endif afs_int32 code; int i; struct iovec iov[RX_MAXIOVECS]; int haveGlock = ISAFS_GLOCK(); AFS_STATCNT(osi_NetSend); if (nvecs > RX_MAXIOVECS) osi_Panic("osi_NetSend: %d: Too many iovecs.\n", nvecs); for (i = 0; i < nvecs; i++) iov[i] = dvec[i]; addr->sin_len = sizeof(struct sockaddr_in); if ((afs_termState == AFSOP_STOP_RXK_LISTENER) || (afs_termState == AFSOP_STOP_COMPLETE)) return -1; if (haveGlock) AFS_GUNLOCK(); #if defined(KERNEL_FUNNEL) thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); #endif #ifdef AFS_DARWIN80_ENV memset(&msg, 0, sizeof(struct msghdr)); msg.msg_name = addr; msg.msg_namelen = ((struct sockaddr *)addr)->sa_len; msg.msg_iov = &iov[0]; msg.msg_iovlen = nvecs; code = sock_send(asocket, &msg, 0, &slen); #else u.uio_iov = &iov[0]; u.uio_iovcnt = nvecs; u.uio_offset = 0; u.uio_resid = alength; u.uio_segflg = UIO_SYSSPACE; u.uio_rw = UIO_WRITE; u.uio_procp = NULL; code = sosend(asocket, (struct sockaddr *)addr, &u, NULL, NULL, 0); #endif #if defined(KERNEL_FUNNEL) thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); #endif if (haveGlock) AFS_GLOCK(); return code; }
afs_root(struct mount *mp, struct vnode **vpp) #endif { int error; struct vrequest treq; register struct vcache *tvp = 0; #ifdef AFS_FBSD50_ENV #ifndef AFS_FBSD53_ENV struct thread *td = curthread; #endif struct ucred *cr = td->td_ucred; #else struct proc *p = curproc; struct ucred *cr = p->p_cred->pc_ucred; #endif AFS_GLOCK(); AFS_STATCNT(afs_root); crhold(cr); if (afs_globalVp && (afs_globalVp->f.states & CStatd)) { tvp = afs_globalVp; error = 0; } else { tryagain: #ifndef AFS_FBSD80_ENV if (afs_globalVp) { afs_PutVCache(afs_globalVp); /* vrele() needed here or not? */ afs_globalVp = NULL; } #endif if (!(error = afs_InitReq(&treq, cr)) && !(error = afs_CheckInit())) { tvp = afs_GetVCache(&afs_rootFid, &treq, NULL, NULL); /* we really want this to stay around */ if (tvp) afs_globalVp = tvp; else error = ENOENT; } } if (tvp) { struct vnode *vp = AFSTOV(tvp); #ifdef AFS_FBSD50_ENV ASSERT_VI_UNLOCKED(vp, "afs_root"); #endif AFS_GUNLOCK(); /* * I'm uncomfortable about this. Shouldn't this happen at a * higher level, and shouldn't we busy the top-level directory * to prevent recycling? */ #ifdef AFS_FBSD50_ENV error = vget(vp, LK_EXCLUSIVE | LK_RETRY, td); vp->v_vflag |= VV_ROOT; #else error = vget(vp, LK_EXCLUSIVE | LK_RETRY, p); vp->v_flag |= VROOT; #endif AFS_GLOCK(); if (error != 0) goto tryagain; afs_globalVFS = mp; *vpp = vp; } afs_Trace2(afs_iclSetp, CM_TRACE_VFSROOT, ICL_TYPE_POINTER, tvp ? AFSTOV(tvp) : NULL, ICL_TYPE_INT32, error); AFS_GUNLOCK(); crfree(cr); return error; }
int afs_MemRead(struct vcache *avc, struct uio *auio, afs_ucred_t *acred, daddr_t albn, struct buf **abpp, int noLock) { afs_size_t totalLength; afs_size_t transferLength; afs_size_t filePos; afs_size_t offset, tlen; afs_size_t len = 0; afs_int32 trimlen; struct dcache *tdc = 0; afs_int32 error, trybusy = 1; afs_int32 code; struct vrequest *treq = NULL; #ifdef AFS_DARWIN80_ENV uio_t tuiop = NULL; #else struct uio tuio; struct uio *tuiop = &tuio; struct iovec *tvec; memset(&tuio, 0, sizeof(tuio)); #endif AFS_STATCNT(afs_MemRead); if (avc->vc_error) return EIO; /* check that we have the latest status info in the vnode cache */ if ((code = afs_CreateReq(&treq, acred))) return code; if (!noLock) { code = afs_VerifyVCache(avc, treq); if (code) { code = afs_CheckCode(code, treq, 8); /* failed to get it */ afs_DestroyReq(treq); return code; } } #ifndef AFS_VM_RDWR_ENV if (AFS_NFSXLATORREQ(acred)) { if (!afs_AccessOK (avc, PRSFS_READ, treq, CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) { code = afs_CheckCode(EACCES, treq, 9); afs_DestroyReq(treq); return code; } } #endif #ifndef AFS_DARWIN80_ENV tvec = (struct iovec *)osi_AllocSmallSpace(sizeof(struct iovec)); memset(tvec, 0, sizeof(struct iovec)); #endif totalLength = AFS_UIO_RESID(auio); filePos = AFS_UIO_OFFSET(auio); afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32, totalLength, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); error = 0; transferLength = 0; if (!noLock) ObtainReadLock(&avc->lock); #if defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV) if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) { hset(avc->flushDV, avc->f.m.DataVersion); } #endif /* * Locks held: * avc->lock(R) */ /* This bit is bogus. We're checking to see if the read goes past the * end of the file. If so, we should be zeroing out all of the buffers * that the client has passed into us (there is a danger that we may leak * kernel memory if we do not). However, this behaviour is disabled by * not setting len before this segment runs, and by setting len to 0 * immediately we enter it. In addition, we also need to check for a read * which partially goes off the end of the file in the while loop below. */ if (filePos >= avc->f.m.Length) { if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ len = 0; #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); } while (avc->f.m.Length > 0 && totalLength > 0) { /* read all of the cached info */ if (filePos >= avc->f.m.Length) break; /* all done */ if (noLock) { if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); } tdc = afs_FindDCache(avc, filePos); if (tdc) { ObtainReadLock(&tdc->lock); offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->f.chunkBytes - offset; } } else { int versionOk; /* a tricky question: does the presence of the DFFetching flag * mean that we're fetching the latest version of the file? No. * The server could update the file as soon as the fetch responsible * for the setting of the DFFetching flag completes. * * However, the presence of the DFFetching flag (visible under * a dcache read lock since it is set and cleared only under a * dcache write lock) means that we're fetching as good a version * as was known to this client at the time of the last call to * afs_VerifyVCache, since the latter updates the stat cache's * m.DataVersion field under a vcache write lock, and from the * time that the DFFetching flag goes on in afs_GetDCache (before * the fetch starts), to the time it goes off (after the fetch * completes), afs_GetDCache keeps at least a read lock on the * vcache entry. * * This means that if the DFFetching flag is set, we can use that * data for any reads that must come from the current version of * the file (current == m.DataVersion). * * Another way of looking at this same point is this: if we're * fetching some data and then try do an afs_VerifyVCache, the * VerifyVCache operation will not complete until after the * DFFetching flag is turned off and the dcache entry's f.versionNo * field is updated. * * Note, by the way, that if DFFetching is set, * m.DataVersion > f.versionNo (the latter is not updated until * after the fetch completes). */ if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); /* before reusing tdc */ } #ifdef STRUCT_TASK_STRUCT_HAS_CRED try_background: #endif tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 2); ObtainReadLock(&tdc->lock); /* now, first try to start transfer, if we'll need the data. If * data already coming, we don't need to do this, obviously. Type * 2 requests never return a null dcache entry, btw. */ if (!(tdc->dflags & DFFetching) && !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) { /* have cache entry, it is not coming in now, * and we'll need new data */ tagain: #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (trybusy && (!afs_BBusy() || (afs_protocols & VICEP_ACCESS))) { #else if (trybusy && !afs_BBusy()) { #endif struct brequest *bp; /* daemon is not busy */ ObtainSharedLock(&tdc->mflock, 665); if (!(tdc->mflags & DFFetchReq)) { int dontwait = B_DONTWAIT; /* start the daemon (may already be running, however) */ UpgradeSToWLock(&tdc->mflock, 666); tdc->mflags |= DFFetchReq; #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) dontwait = 0; #endif bp = afs_BQueue(BOP_FETCH, avc, dontwait, 0, acred, (afs_size_t) filePos, (afs_size_t) 0, tdc, (void *)0, (void *)0); if (!bp) { tdc->mflags &= ~DFFetchReq; trybusy = 0; /* Avoid bkg daemon since they're too busy */ ReleaseWriteLock(&tdc->mflock); goto tagain; } ConvertWToSLock(&tdc->mflock); /* don't use bp pointer! */ } code = 0; ConvertSToRLock(&tdc->mflock); while (!code && tdc->mflags & DFFetchReq) { afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); /* don't need waiting flag on this one */ ReleaseReadLock(&tdc->mflock); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); ObtainReadLock(&tdc->mflock); } ReleaseReadLock(&tdc->mflock); if (code) { error = code; break; } } } /* now data may have started flowing in (if DFFetching is on). If * data is now streaming in, then wait for some interesting stuff. */ code = 0; while (!code && (tdc->dflags & DFFetching) && tdc->validPos <= filePos) { /* too early: wait for DFFetching flag to vanish, * or data to appear */ afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); } if (code) { error = code; break; } /* fetching flag gone, data is here, or we never tried * (BBusy for instance) */ len = tdc->validPos - filePos; versionOk = hsame(avc->f.m.DataVersion, tdc->f.versionNo) ? 1 : 0; if (tdc->dflags & DFFetching) { /* still fetching, some new data is here: * compute length and offset */ offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* no longer fetching, verify data version * (avoid new GetDCache call) */ if (versionOk && len > 0) { offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* don't have current data, so get it below */ afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO, ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_HYPER, &avc->f.m.DataVersion, ICL_TYPE_HYPER, &tdc->f.versionNo); #if 0 #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { printf("afs_read: DV mismatch? %d instead of %d for %u.%u.%u\n", tdc->f.versionNo.low, avc->f.m.DataVersion.low, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); printf("afs_read: validPos %llu filePos %llu totalLength %lld m.Length %llu noLock %d\n", tdc->validPos, filePos, totalLength, avc->f.m.Length, noLock); printf("afs_read: or len too low? %lld for %u.%u.%u\n", len, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); } #endif #endif ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); tdc = NULL; } } if (!tdc) { #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { /* avoid foreground fetch */ if (!versionOk) { printf("afs_read: avoid forground %u.%u.%u\n", avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); goto try_background; } #if 0 printf("afs_read: forground %u.%u.%u\n", avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); #endif } #endif /* If we get here, it was not possible to start the * background daemon. With flag == 1 afs_GetDCache * does the FetchData rpc synchronously. */ ReleaseReadLock(&avc->lock); tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 1); ObtainReadLock(&avc->lock); if (tdc) ObtainReadLock(&tdc->lock); } } afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset), ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len)); if (!tdc) { error = EIO; break; } /* * Locks held: * avc->lock(R) * tdc->lock(R) */ if (len > totalLength) len = totalLength; /* will read len bytes */ if (len <= 0) { /* shouldn't get here if DFFetching is on */ /* read past the end of a chunk, may not be at next chunk yet, and yet * also not at eof, so may have to supply fake zeros */ len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset; /* bytes left in chunk addr space */ if (len > totalLength) len = totalLength; /* and still within xfr request */ tlen = avc->f.m.Length - offset; /* and still within file */ if (len > tlen) len = tlen; if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); if (code) { error = code; break; } } else { /* get the data from the mem cache */ /* mung uio structure to be right for this transfer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); uio_setoffset(tuiop, offset); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); tuio.afsio_offset = offset; #endif code = afs_MemReadUIO(&tdc->f.inode, tuiop); if (code) { error = code; break; } } /* otherwise we've read some, fixup length, etc and continue with next seg */ len = len - AFS_UIO_RESID(tuiop); /* compute amount really transferred */ trimlen = len; afsio_skip(auio, trimlen); /* update input uio structure */ totalLength -= len; transferLength += len; filePos += len; if (len <= 0) break; /* surprise eof */ #ifdef AFS_DARWIN80_ENV if (tuiop) { uio_free(tuiop); tuiop = 0; } #endif } /* the whole while loop */ /* * Locks held: * avc->lock(R) * tdc->lock(R) if tdc */ /* if we make it here with tdc non-zero, then it is the last chunk we * dealt with, and we have to release it when we're done. We hold on * to it in case we need to do a prefetch. */ if (tdc) { ReleaseReadLock(&tdc->lock); /* * try to queue prefetch, if needed. If DataVersion is zero there * should not be any more: files with DV 0 never have been stored * on the fileserver, symbolic links and directories never require * more than a single chunk. */ if (!noLock && !(hiszero(avc->f.m.DataVersion)) && #ifndef AFS_VM_RDWR_ENV afs_preCache #else 1 #endif ) { afs_PrefetchChunk(avc, tdc, acred, treq); } afs_PutDCache(tdc); } if (!noLock) ReleaseReadLock(&avc->lock); #ifdef AFS_DARWIN80_ENV if (tuiop) uio_free(tuiop); #else osi_FreeSmallSpace(tvec); #endif error = afs_CheckCode(error, treq, 10); afs_DestroyReq(treq); return error; } /* called with the dcache entry triggering the fetch, the vcache entry involved, * and a vrequest for the read call. Marks the dcache entry as having already * triggered a prefetch, starts the prefetch going and sets the DFFetchReq * flag in the prefetched block, so that the next call to read knows to wait * for the daemon to start doing things. * * This function must be called with the vnode at least read-locked, and * no locks on the dcache, because it plays around with dcache entries. */ void afs_PrefetchChunk(struct vcache *avc, struct dcache *adc, afs_ucred_t *acred, struct vrequest *areq) { struct dcache *tdc; afs_size_t offset; afs_size_t j1, j2; /* junk vbls for GetDCache to trash */ offset = adc->f.chunk + 1; /* next chunk we'll need */ offset = AFS_CHUNKTOBASE(offset); /* base of next chunk */ ObtainReadLock(&adc->lock); ObtainSharedLock(&adc->mflock, 662); if (offset < avc->f.m.Length && !(adc->mflags & DFNextStarted) && !afs_BBusy()) { struct brequest *bp; UpgradeSToWLock(&adc->mflock, 663); adc->mflags |= DFNextStarted; /* we've tried to prefetch for this guy */ ReleaseWriteLock(&adc->mflock); ReleaseReadLock(&adc->lock); tdc = afs_GetDCache(avc, offset, areq, &j1, &j2, 2); /* type 2 never returns 0 */ /* * In disconnected mode, type 2 can return 0 because it doesn't * make any sense to allocate a dcache we can never fill */ if (tdc == NULL) return; ObtainSharedLock(&tdc->mflock, 651); if (!(tdc->mflags & DFFetchReq)) { /* ask the daemon to do the work */ UpgradeSToWLock(&tdc->mflock, 652); tdc->mflags |= DFFetchReq; /* guaranteed to be cleared by BKG or GetDCache */ /* last parm (1) tells bkg daemon to do an afs_PutDCache when it is done, * since we don't want to wait for it to finish before doing so ourselves. */ bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred, (afs_size_t) offset, (afs_size_t) 1, tdc, (void *)0, (void *)0); if (!bp) { /* Bkg table full; just abort non-important prefetching to avoid deadlocks */ tdc->mflags &= ~DFFetchReq; ReleaseWriteLock(&tdc->mflock); afs_PutDCache(tdc); /* * DCLOCKXXX: This is a little sketchy, since someone else * could have already started a prefetch.. In practice, * this probably doesn't matter; at most it would cause an * extra slot in the BKG table to be used up when someone * prefetches this for the second time. */ ObtainReadLock(&adc->lock); ObtainWriteLock(&adc->mflock, 664); adc->mflags &= ~DFNextStarted; ReleaseWriteLock(&adc->mflock); ReleaseReadLock(&adc->lock); } else { ReleaseWriteLock(&tdc->mflock); } } else { ReleaseSharedLock(&tdc->mflock); afs_PutDCache(tdc); } } else { ReleaseSharedLock(&adc->mflock); ReleaseReadLock(&adc->lock); } } int afs_UFSRead(struct vcache *avc, struct uio *auio, afs_ucred_t *acred, daddr_t albn, struct buf **abpp, int noLock) { afs_size_t totalLength; afs_size_t transferLength; afs_size_t filePos; afs_size_t offset, tlen; afs_size_t len = 0; afs_int32 trimlen; struct dcache *tdc = 0; afs_int32 error; struct osi_file *tfile; afs_int32 code; int trybusy = 1; struct vrequest *treq = NULL; #ifdef AFS_DARWIN80_ENV uio_t tuiop=NULL; #else struct uio tuio; struct uio *tuiop = &tuio; struct iovec *tvec; memset(&tuio, 0, sizeof(tuio)); #endif AFS_STATCNT(afs_UFSRead); if (avc && avc->vc_error) return EIO; AFS_DISCON_LOCK(); /* check that we have the latest status info in the vnode cache */ if ((code = afs_CreateReq(&treq, acred))) return code; if (!noLock) { if (!avc) osi_Panic("null avc in afs_UFSRead"); else { code = afs_VerifyVCache(avc, treq); if (code) { code = afs_CheckCode(code, treq, 11); /* failed to get it */ afs_DestroyReq(treq); AFS_DISCON_UNLOCK(); return code; } } } #ifndef AFS_VM_RDWR_ENV if (AFS_NFSXLATORREQ(acred)) { if (!afs_AccessOK (avc, PRSFS_READ, treq, CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) { AFS_DISCON_UNLOCK(); code = afs_CheckCode(EACCES, treq, 12); afs_DestroyReq(treq); return code; } } #endif #ifndef AFS_DARWIN80_ENV tvec = (struct iovec *)osi_AllocSmallSpace(sizeof(struct iovec)); memset(tvec, 0, sizeof(struct iovec)); #endif totalLength = AFS_UIO_RESID(auio); filePos = AFS_UIO_OFFSET(auio); afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32, totalLength, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); error = 0; transferLength = 0; if (!noLock) ObtainReadLock(&avc->lock); #if defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV) if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) { hset(avc->flushDV, avc->f.m.DataVersion); } #endif /* This bit is bogus. We're checking to see if the read goes past the * end of the file. If so, we should be zeroing out all of the buffers * that the client has passed into us (there is a danger that we may leak * kernel memory if we do not). However, this behaviour is disabled by * not setting len before this segment runs, and by setting len to 0 * immediately we enter it. In addition, we also need to check for a read * which partially goes off the end of the file in the while loop below. */ if (filePos >= avc->f.m.Length) { if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ len = 0; #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); } while (avc->f.m.Length > 0 && totalLength > 0) { /* read all of the cached info */ if (filePos >= avc->f.m.Length) break; /* all done */ if (noLock) { if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); } tdc = afs_FindDCache(avc, filePos); if (tdc) { ObtainReadLock(&tdc->lock); offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->validPos - filePos; } } else { int versionOk; /* a tricky question: does the presence of the DFFetching flag * mean that we're fetching the latest version of the file? No. * The server could update the file as soon as the fetch responsible * for the setting of the DFFetching flag completes. * * However, the presence of the DFFetching flag (visible under * a dcache read lock since it is set and cleared only under a * dcache write lock) means that we're fetching as good a version * as was known to this client at the time of the last call to * afs_VerifyVCache, since the latter updates the stat cache's * m.DataVersion field under a vcache write lock, and from the * time that the DFFetching flag goes on in afs_GetDCache (before * the fetch starts), to the time it goes off (after the fetch * completes), afs_GetDCache keeps at least a read lock on the * vcache entry. * * This means that if the DFFetching flag is set, we can use that * data for any reads that must come from the current version of * the file (current == m.DataVersion). * * Another way of looking at this same point is this: if we're * fetching some data and then try do an afs_VerifyVCache, the * VerifyVCache operation will not complete until after the * DFFetching flag is turned off and the dcache entry's f.versionNo * field is updated. * * Note, by the way, that if DFFetching is set, * m.DataVersion > f.versionNo (the latter is not updated until * after the fetch completes). */ if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); /* before reusing tdc */ } #ifdef STRUCT_TASK_STRUCT_HAS_CRED try_background: #endif tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 2); if (!tdc) { error = ENETDOWN; break; } ObtainReadLock(&tdc->lock); /* now, first try to start transfer, if we'll need the data. If * data already coming, we don't need to do this, obviously. Type * 2 requests never return a null dcache entry, btw. */ if (!(tdc->dflags & DFFetching) && !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) { /* have cache entry, it is not coming in now, and we'll need new data */ tagain: #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (trybusy && (!afs_BBusy() || (afs_protocols & VICEP_ACCESS))) { #else if (trybusy && !afs_BBusy()) { #endif struct brequest *bp; /* daemon is not busy */ ObtainSharedLock(&tdc->mflock, 667); if (!(tdc->mflags & DFFetchReq)) { int dontwait = B_DONTWAIT; UpgradeSToWLock(&tdc->mflock, 668); tdc->mflags |= DFFetchReq; #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) dontwait = 0; #endif bp = afs_BQueue(BOP_FETCH, avc, dontwait, 0, acred, (afs_size_t) filePos, (afs_size_t) 0, tdc, (void *)0, (void *)0); if (!bp) { /* Bkg table full; retry deadlocks */ tdc->mflags &= ~DFFetchReq; trybusy = 0; /* Avoid bkg daemon since they're too busy */ ReleaseWriteLock(&tdc->mflock); goto tagain; } ConvertWToSLock(&tdc->mflock); } code = 0; ConvertSToRLock(&tdc->mflock); while (!code && tdc->mflags & DFFetchReq) { afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); /* don't need waiting flag on this one */ ReleaseReadLock(&tdc->mflock); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); ObtainReadLock(&tdc->mflock); } ReleaseReadLock(&tdc->mflock); if (code) { error = code; break; } } } /* now data may have started flowing in (if DFFetching is on). If * data is now streaming in, then wait for some interesting stuff. */ code = 0; while (!code && (tdc->dflags & DFFetching) && tdc->validPos <= filePos) { /* too early: wait for DFFetching flag to vanish, * or data to appear */ afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); } if (code) { error = code; break; } /* fetching flag gone, data is here, or we never tried * (BBusy for instance) */ len = tdc->validPos - filePos; versionOk = hsame(avc->f.m.DataVersion, tdc->f.versionNo) ? 1 : 0; if (tdc->dflags & DFFetching) { /* still fetching, some new data is here: * compute length and offset */ offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* no longer fetching, verify data version (avoid new * GetDCache call) */ if (versionOk && len > 0) { offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* don't have current data, so get it below */ afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO, ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_HYPER, &avc->f.m.DataVersion, ICL_TYPE_HYPER, &tdc->f.versionNo); #if 0 #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { printf("afs_read: DV mismatch? %d instead of %d for %u.%u.%u\n", tdc->f.versionNo.low, avc->f.m.DataVersion.low, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); printf("afs_read: validPos %llu filePos %llu totalLength %d m.Length %llu noLock %d\n", tdc->validPos, filePos, totalLength, avc->f.m.Length, noLock); printf("afs_read: or len too low? %lld for %u.%u.%u\n", len, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); } #endif #endif ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); tdc = NULL; } } if (!tdc) { #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { /* avoid foreground fetch */ if (!versionOk) { printf("afs_read: avoid forground %u.%u.%u\n", avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); goto try_background; } } #endif /* If we get here, it was not possible to start the * background daemon. With flag == 1 afs_GetDCache * does the FetchData rpc synchronously. */ ReleaseReadLock(&avc->lock); tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 1); ObtainReadLock(&avc->lock); if (tdc) ObtainReadLock(&tdc->lock); } } if (!tdc) { error = EIO; break; } len = tdc->validPos - filePos; afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset), ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len)); if (len > totalLength) len = totalLength; /* will read len bytes */ if (len <= 0) { /* shouldn't get here if DFFetching is on */ afs_Trace4(afs_iclSetp, CM_TRACE_VNODEREAD2, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(tdc->validPos), ICL_TYPE_INT32, tdc->f.chunkBytes, ICL_TYPE_INT32, tdc->dflags); /* read past the end of a chunk, may not be at next chunk yet, and yet * also not at eof, so may have to supply fake zeros */ len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset; /* bytes left in chunk addr space */ if (len > totalLength) len = totalLength; /* and still within xfr request */ tlen = avc->f.m.Length - offset; /* and still within file */ if (len > tlen) len = tlen; if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); if (code) { error = code; break; } } else { /* get the data from the file */ tfile = (struct osi_file *)osi_UFSOpen(&tdc->f.inode); #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); uio_setoffset(tuiop, offset); #else /* mung uio structure to be right for this transfer */ afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); tuio.afsio_offset = offset; #endif #if defined(AFS_AIX41_ENV) AFS_GUNLOCK(); code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL, NULL, afs_osi_credp); AFS_GLOCK(); #elif defined(AFS_AIX32_ENV) code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL); /* Flush all JFS pages now for big performance gain in big file cases * If we do something like this, must check to be sure that AFS file * isn't mmapped... see afs_gn_map() for why. */ /* if (tfile->vnode->v_gnode && tfile->vnode->v_gnode->gn_seg) { many different ways to do similar things: so far, the best performing one is #2, but #1 might match it if we straighten out the confusion regarding which pages to flush. It really does matter. 1. vm_flushp(tfile->vnode->v_gnode->gn_seg, 0, len/PAGESIZE - 1); 2. vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE, (len + PAGESIZE-1)/PAGESIZE); 3. vms_inactive(tfile->vnode->v_gnode->gn_seg) Doesn't work correctly 4. vms_delete(tfile->vnode->v_gnode->gn_seg) probably also fails tfile->vnode->v_gnode->gn_seg = NULL; 5. deletep 6. ipgrlse 7. ifreeseg Unfortunately, this seems to cause frequent "cache corruption" episodes. vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE, (len + PAGESIZE-1)/PAGESIZE); } */ #elif defined(AFS_AIX_ENV) code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, (off_t) & offset, &tuio, NULL, NULL, -1); #elif defined(AFS_SUN5_ENV) AFS_GUNLOCK(); #ifdef AFS_SUN510_ENV VOP_RWLOCK(tfile->vnode, 0, NULL); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp, NULL); VOP_RWUNLOCK(tfile->vnode, 0, NULL); #else VOP_RWLOCK(tfile->vnode, 0); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_RWUNLOCK(tfile->vnode, 0); #endif AFS_GLOCK(); #elif defined(AFS_SGI_ENV) AFS_GUNLOCK(); AFS_VOP_RWLOCK(tfile->vnode, VRWLOCK_READ); AFS_VOP_READ(tfile->vnode, &tuio, IO_ISLOCKED, afs_osi_credp, code); AFS_VOP_RWUNLOCK(tfile->vnode, VRWLOCK_READ); AFS_GLOCK(); #elif defined(AFS_HPUX100_ENV) AFS_GUNLOCK(); code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp); AFS_GLOCK(); #elif defined(AFS_LINUX20_ENV) AFS_GUNLOCK(); code = osi_rdwr(tfile, &tuio, UIO_READ); AFS_GLOCK(); #elif defined(AFS_DARWIN80_ENV) AFS_GUNLOCK(); code = VNOP_READ(tfile->vnode, tuiop, 0, afs_osi_ctxtp); AFS_GLOCK(); #elif defined(AFS_DARWIN_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, current_proc()); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, current_proc()); AFS_GLOCK(); #elif defined(AFS_FBSD80_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0); AFS_GLOCK(); #elif defined(AFS_FBSD_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curthread); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, curthread); AFS_GLOCK(); #elif defined(AFS_NBSD_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0); AFS_GLOCK(); #elif defined(AFS_XBSD_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curproc); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, curproc); AFS_GLOCK(); #else code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp); #endif osi_UFSClose(tfile); if (code) { error = code; break; } } /* otherwise we've read some, fixup length, etc and continue with next seg */ len = len - AFS_UIO_RESID(tuiop); /* compute amount really transferred */ trimlen = len; afsio_skip(auio, trimlen); /* update input uio structure */ totalLength -= len; transferLength += len; filePos += len; if (len <= 0) break; /* surprise eof */ #ifdef AFS_DARWIN80_ENV if (tuiop) { uio_free(tuiop); tuiop = 0; } #endif } /* if we make it here with tdc non-zero, then it is the last chunk we * dealt with, and we have to release it when we're done. We hold on * to it in case we need to do a prefetch, obviously. */ if (tdc) { ReleaseReadLock(&tdc->lock); #if !defined(AFS_VM_RDWR_ENV) /* * try to queue prefetch, if needed. If DataVersion is zero there * should not be any more: files with DV 0 never have been stored * on the fileserver, symbolic links and directories never require * more than a single chunk. */ if (!noLock && !(hiszero(avc->f.m.DataVersion))) { if (!(tdc->mflags & DFNextStarted)) afs_PrefetchChunk(avc, tdc, acred, treq); } #endif afs_PutDCache(tdc); } if (!noLock) ReleaseReadLock(&avc->lock); #ifdef AFS_DARWIN80_ENV if (tuiop) uio_free(tuiop); #else osi_FreeSmallSpace(tvec); #endif AFS_DISCON_UNLOCK(); error = afs_CheckCode(error, treq, 13); afs_DestroyReq(treq); return error; }
/* question: does afs_create need to set CDirty in the adp or the avc? * I think we can get away without it, but I'm not sure. Note that * afs_setattr is called in here for truncation. */ #ifdef AFS_SGI64_ENV int afs_create(OSI_VC_DECL(adp), char *aname, struct vattr *attrs, int flags, int amode, struct vcache **avcp, afs_ucred_t *acred) #else /* AFS_SGI64_ENV */ int afs_create(OSI_VC_DECL(adp), char *aname, struct vattr *attrs, enum vcexcl aexcl, int amode, struct vcache **avcp, afs_ucred_t *acred) #endif /* AFS_SGI64_ENV */ { afs_int32 origCBs, origZaps, finalZaps; struct vrequest *treq = NULL; afs_int32 code; struct afs_conn *tc; struct VenusFid newFid; struct AFSStoreStatus InStatus; struct AFSFetchStatus *OutFidStatus, *OutDirStatus; struct AFSVolSync tsync; struct AFSCallBack CallBack; afs_int32 now; struct dcache *tdc; afs_size_t offset, len; struct server *hostp = 0; struct vcache *tvc; struct volume *volp = 0; struct afs_fakestat_state fakestate; struct rx_connection *rxconn; XSTATS_DECLS; OSI_VC_CONVERT(adp); AFS_STATCNT(afs_create); OutFidStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus)); OutDirStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus)); memset(&InStatus, 0, sizeof(InStatus)); if ((code = afs_CreateReq(&treq, acred))) goto done2; afs_Trace3(afs_iclSetp, CM_TRACE_CREATE, ICL_TYPE_POINTER, adp, ICL_TYPE_STRING, aname, ICL_TYPE_INT32, amode); afs_InitFakeStat(&fakestate); #ifdef AFS_SGI65_ENV /* If avcp is passed not null, it's the old reference to this file. * We can use this to avoid create races. For now, just decrement * the reference count on it. */ if (*avcp) { AFS_RELE(AFSTOV(*avcp)); *avcp = NULL; } #endif if (strlen(aname) > AFSNAMEMAX) { code = ENAMETOOLONG; goto done3; } if (!afs_ENameOK(aname)) { code = EINVAL; goto done3; } switch (attrs->va_type) { case VBLK: case VCHR: #if !defined(AFS_SUN5_ENV) case VSOCK: #endif case VFIFO: /* We don't support special devices or FIFOs */ code = EINVAL; goto done3; default: ; } AFS_DISCON_LOCK(); code = afs_EvalFakeStat(&adp, &fakestate, treq); if (code) goto done; tagain: code = afs_VerifyVCache(adp, treq); if (code) goto done; /** If the volume is read-only, return error without making an RPC to the * fileserver */ if (adp->f.states & CRO) { code = EROFS; goto done; } if (AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW) { code = ENETDOWN; goto done; } tdc = afs_GetDCache(adp, (afs_size_t) 0, treq, &offset, &len, 1); /** Prevent multiple fetchStatus calls to fileserver when afs_GetDCache() * returns NULL for an error condition */ if (!tdc) { code = EIO; goto done; } ObtainWriteLock(&adp->lock, 135); if (tdc) ObtainSharedLock(&tdc->lock, 630); /* * Make sure that the data in the cache is current. We may have * received a callback while we were waiting for the write lock. */ if (!(adp->f.states & CStatd) || (tdc && !hsame(adp->f.m.DataVersion, tdc->f.versionNo))) { ReleaseWriteLock(&adp->lock); if (tdc) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } goto tagain; } if (tdc) { /* see if file already exists. If it does, we only set * the size attributes (to handle O_TRUNC) */ code = afs_dir_Lookup(tdc, aname, &newFid.Fid); /* use dnlc first xxx */ if (code == 0) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); ReleaseWriteLock(&adp->lock); #ifdef AFS_SGI64_ENV if (flags & VEXCL) { #else if (aexcl != NONEXCL) { #endif code = EEXIST; /* file exists in excl mode open */ goto done; } /* found the file, so use it */ newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; tvc = NULL; if (newFid.Fid.Unique == 0) { tvc = afs_LookupVCache(&newFid, treq, NULL, adp, aname); } if (!tvc) /* lookup failed or wasn't called */ tvc = afs_GetVCache(&newFid, treq, NULL, NULL); if (tvc) { /* if the thing exists, we need the right access to open it. * we must check that here, since no other checks are * made by the open system call */ len = attrs->va_size; /* only do the truncate */ /* * We used to check always for READ access before; the * problem is that we will fail if the existing file * has mode -w-w-w, which is wrong. */ if ((amode & VREAD) && !afs_AccessOK(tvc, PRSFS_READ, treq, CHECK_MODE_BITS)) { afs_PutVCache(tvc); code = EACCES; goto done; } #if defined(AFS_DARWIN80_ENV) if ((amode & VWRITE) || VATTR_IS_ACTIVE(attrs, va_data_size)) #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) if ((amode & VWRITE) || (attrs->va_mask & AT_SIZE)) #else if ((amode & VWRITE) || len != 0xffffffff) #endif { /* needed for write access check */ tvc->f.parent.vnode = adp->f.fid.Fid.Vnode; tvc->f.parent.unique = adp->f.fid.Fid.Unique; /* need write mode for these guys */ if (!afs_AccessOK (tvc, PRSFS_WRITE, treq, CHECK_MODE_BITS)) { afs_PutVCache(tvc); code = EACCES; goto done; } } #if defined(AFS_DARWIN80_ENV) if (VATTR_IS_ACTIVE(attrs, va_data_size)) #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) if (attrs->va_mask & AT_SIZE) #else if (len != 0xffffffff) #endif { if (vType(tvc) != VREG) { afs_PutVCache(tvc); code = EISDIR; goto done; } /* do a truncate */ #if defined(AFS_DARWIN80_ENV) VATTR_INIT(attrs); VATTR_SET_SUPPORTED(attrs, va_data_size); VATTR_SET_ACTIVE(attrs, va_data_size); #elif defined(UKERNEL) attrs->va_mask = ATTR_SIZE; #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) attrs->va_mask = AT_SIZE; #else VATTR_NULL(attrs); #endif attrs->va_size = len; ObtainWriteLock(&tvc->lock, 136); tvc->f.states |= CCreating; ReleaseWriteLock(&tvc->lock); #if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) #if defined(AFS_SGI64_ENV) code = afs_setattr(VNODE_TO_FIRST_BHV((vnode_t *) tvc), attrs, 0, acred); #else code = afs_setattr(tvc, attrs, 0, acred); #endif /* AFS_SGI64_ENV */ #else /* SUN5 || SGI */ code = afs_setattr(tvc, attrs, acred); #endif /* SUN5 || SGI */ ObtainWriteLock(&tvc->lock, 137); tvc->f.states &= ~CCreating; ReleaseWriteLock(&tvc->lock); if (code) { afs_PutVCache(tvc); goto done; } } *avcp = tvc; } else { /* Directory entry already exists, but we cannot fetch the * fid it points to. */ code = EIO; } /* make sure vrefCount bumped only if code == 0 */ goto done; } } /* if we create the file, we don't do any access checks, since * that's how O_CREAT is supposed to work */ if (adp->f.states & CForeign) { origCBs = afs_allCBs; origZaps = afs_allZaps; } else { origCBs = afs_evenCBs; /* if changes, we don't really have a callback */ origZaps = afs_evenZaps; /* number of even numbered vnodes discarded */ } InStatus.Mask = AFS_SETMODTIME | AFS_SETMODE | AFS_SETGROUP; InStatus.ClientModTime = osi_Time(); InStatus.Group = (afs_int32) afs_cr_gid(acred); if (AFS_NFSXLATORREQ(acred)) { /* * XXX The following is mainly used to fix a bug in the HP-UX * nfs client where they create files with mode of 0 without * doing any setattr later on to fix it. * XXX */ #if defined(AFS_AIX_ENV) if (attrs->va_mode != -1) { #else #if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) if (attrs->va_mask & AT_MODE) { #else if (attrs->va_mode != ((unsigned short)-1)) { #endif #endif if (!attrs->va_mode) attrs->va_mode = 0x1b6; /* XXX default mode: rw-rw-rw XXX */ } } if (!AFS_IS_DISCONNECTED) { /* If not disconnected, connect to the server.*/ InStatus.UnixModeBits = attrs->va_mode & 0xffff; /* only care about protection bits */ do { tc = afs_Conn(&adp->f.fid, treq, SHARED_LOCK, &rxconn); if (tc) { hostp = tc->parent->srvr->server; /* remember for callback processing */ now = osi_Time(); XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_CREATEFILE); RX_AFS_GUNLOCK(); code = RXAFS_CreateFile(rxconn, (struct AFSFid *)&adp->f.fid.Fid, aname, &InStatus, (struct AFSFid *) &newFid.Fid, OutFidStatus, OutDirStatus, &CallBack, &tsync); RX_AFS_GLOCK(); XSTATS_END_TIME; CallBack.ExpirationTime += now; } else code = -1; } while (afs_Analyze (tc, rxconn, code, &adp->f.fid, treq, AFS_STATS_FS_RPCIDX_CREATEFILE, SHARED_LOCK, NULL)); if ((code == EEXIST || code == UAEEXIST) && #ifdef AFS_SGI64_ENV !(flags & VEXCL) #else /* AFS_SGI64_ENV */ aexcl == NONEXCL #endif ) { /* if we get an EEXIST in nonexcl mode, just do a lookup */ if (tdc) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } ReleaseWriteLock(&adp->lock); #if defined(AFS_SGI64_ENV) code = afs_lookup(VNODE_TO_FIRST_BHV((vnode_t *) adp), aname, avcp, NULL, 0, NULL, acred); #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) code = afs_lookup(adp, aname, avcp, NULL, 0, NULL, acred); #elif defined(UKERNEL) code = afs_lookup(adp, aname, avcp, acred, 0); #elif !defined(AFS_DARWIN_ENV) code = afs_lookup(adp, aname, avcp, acred); #endif goto done; } if (code) { if (code < 0) { afs_StaleVCache(adp); } ReleaseWriteLock(&adp->lock); if (tdc) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } goto done; } } else { /* Generate a fake FID for disconnected mode. */ newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; afs_GenFakeFid(&newFid, VREG, 1); } /* if (!AFS_IS_DISCON_RW) */ /* otherwise, we should see if we can make the change to the dir locally */ if (tdc) UpgradeSToWLock(&tdc->lock, 631); if (AFS_IS_DISCON_RW || afs_LocalHero(adp, tdc, OutDirStatus, 1)) { /* we can do it locally */ ObtainWriteLock(&afs_xdcache, 291); code = afs_dir_Create(tdc, aname, &newFid.Fid); ReleaseWriteLock(&afs_xdcache); if (code) { ZapDCE(tdc); DZap(tdc); } } if (tdc) { ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); } if (AFS_IS_DISCON_RW) adp->f.m.LinkCount++; newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; ReleaseWriteLock(&adp->lock); volp = afs_FindVolume(&newFid, READ_LOCK); /* New tricky optimistic callback handling algorithm for file creation works * as follows. We create the file essentially with no locks set at all. File * server may thus handle operations from others cache managers as well as from * this very own cache manager that reference the file in question before * we managed to create the cache entry. However, if anyone else changes * any of the status information for a file, we'll see afs_evenCBs increase * (files always have even fids). If someone on this workstation manages * to do something to the file, they'll end up having to create a cache * entry for the new file. Either we'll find it once we've got the afs_xvcache * lock set, or it was also *deleted* the vnode before we got there, in which case * we will find evenZaps has changed, too. Thus, we only assume we have the right * status information if no callbacks or vnode removals have occurred to even * numbered files from the time the call started until the time that we got the xvcache * lock set. Of course, this also assumes that any call that modifies a file first * gets a write lock on the file's vnode, but if that weren't true, the whole cache manager * would fail, since no call would be able to update the local vnode status after modifying * a file on a file server. */ ObtainWriteLock(&afs_xvcache, 138); if (adp->f.states & CForeign) finalZaps = afs_allZaps; /* do this before calling newvcache */ else finalZaps = afs_evenZaps; /* do this before calling newvcache */ /* don't need to call RemoveVCB, since only path leaving a callback is the * one where we pass through afs_NewVCache. Can't have queued a VCB unless * we created and freed an entry between file creation time and here, and the * freeing of the vnode will change evenZaps. Don't need to update the VLRU * queue, since the find will only succeed in the event of a create race, and * then the vcache will be at the front of the VLRU queue anyway... */ if (!(tvc = afs_FindVCache(&newFid, 0, DO_STATS))) { tvc = afs_NewVCache(&newFid, hostp); if (tvc) { int finalCBs; ObtainWriteLock(&tvc->lock, 139); ObtainWriteLock(&afs_xcbhash, 489); finalCBs = afs_evenCBs; /* add the callback in */ if (adp->f.states & CForeign) { tvc->f.states |= CForeign; finalCBs = afs_allCBs; } if (origCBs == finalCBs && origZaps == finalZaps) { tvc->f.states |= CStatd; /* we've fake entire thing, so don't stat */ tvc->f.states &= ~CBulkFetching; if (!AFS_IS_DISCON_RW) { tvc->cbExpires = CallBack.ExpirationTime; afs_QueueCallback(tvc, CBHash(CallBack.ExpirationTime), volp); } } else { afs_StaleVCacheFlags(tvc, AFS_STALEVC_CBLOCKED | AFS_STALEVC_CLEARCB, CUnique); } ReleaseWriteLock(&afs_xcbhash); if (AFS_IS_DISCON_RW) { afs_DisconAddDirty(tvc, VDisconCreate, 0); afs_GenDisconStatus(adp, tvc, &newFid, attrs, treq, VREG); } else { afs_ProcessFS(tvc, OutFidStatus, treq); } tvc->f.parent.vnode = adp->f.fid.Fid.Vnode; tvc->f.parent.unique = adp->f.fid.Fid.Unique; ReleaseWriteLock(&tvc->lock); *avcp = tvc; code = 0; } else { /* Cannot create a new vcache. */ code = EIO; } } else { /* otherwise cache entry already exists, someone else must * have created it. Comments used to say: "don't need write * lock to *clear* these flags" but we should do it anyway. * Code used to clear stat bit and callback, but I don't see * the point -- we didn't have a create race, somebody else just * snuck into NewVCache before we got here, probably a racing * lookup. */ *avcp = tvc; code = 0; } ReleaseWriteLock(&afs_xvcache); done: AFS_DISCON_UNLOCK(); done3: if (volp) afs_PutVolume(volp, READ_LOCK); if (code == 0) { if (afs_mariner) afs_AddMarinerName(aname, *avcp); /* return the new status in vattr */ afs_CopyOutAttrs(*avcp, attrs); if (afs_mariner) afs_MarinerLog("store$Creating", *avcp); } afs_PutFakeStat(&fakestate); code = afs_CheckCode(code, treq, 20); afs_DestroyReq(treq); done2: osi_FreeSmallSpace(OutFidStatus); osi_FreeSmallSpace(OutDirStatus); return code; } /* * Check to see if we can track the change locally: requires that * we have sufficiently recent info in data cache. If so, we * know the new DataVersion number, and place it correctly in both the * data and stat cache entries. This routine returns 1 if we should * do the operation locally, and 0 otherwise. * * This routine must be called with the stat cache entry write-locked, * and dcache entry write-locked. */ int afs_LocalHero(struct vcache *avc, struct dcache *adc, AFSFetchStatus * astat, int aincr) { afs_int32 ok; afs_hyper_t avers; AFS_STATCNT(afs_LocalHero); hset64(avers, astat->dataVersionHigh, astat->DataVersion); /* avers *is* the version number now, no matter what */ if (adc) { /* does what's in the dcache *now* match what's in the vcache *now*, * and do we have a valid callback? if not, our local copy is not "ok" */ ok = (hsame(avc->f.m.DataVersion, adc->f.versionNo) && avc->callback && (avc->f.states & CStatd) && avc->cbExpires >= osi_Time()); } else { ok = 0; } if (ok) { /* check that the DV on the server is what we expect it to be */ afs_hyper_t newDV; hset(newDV, adc->f.versionNo); hadd32(newDV, aincr); if (!hsame(avers, newDV)) { ok = 0; } } #if defined(AFS_SGI_ENV) osi_Assert(avc->v.v_type == VDIR); #endif /* The bulk status code used the length as a sequence number. */ /* Don't update the vcache entry unless the stats are current. */ if (avc->f.states & CStatd) { afs_SetDataVersion(avc, &avers); #ifdef AFS_64BIT_CLIENT FillInt64(avc->f.m.Length, astat->Length_hi, astat->Length); #else /* AFS_64BIT_CLIENT */ avc->f.m.Length = astat->Length; #endif /* AFS_64BIT_CLIENT */ avc->f.m.Date = astat->ClientModTime; } if (ok) { /* we've been tracking things correctly */ adc->dflags |= DFEntryMod; adc->f.versionNo = avers; return 1; } else { if (adc) { ZapDCE(adc); DZap(adc); } if (avc->f.states & CStatd) { osi_dnlc_purgedp(avc); } return 0; } }
int afs_StoreAllSegments(struct vcache *avc, struct vrequest *areq, int sync) { struct dcache *tdc; afs_int32 code = 0; afs_int32 index; afs_int32 origCBs, foreign = 0; int hash; afs_hyper_t newDV, oldDV; /* DV when we start, and finish, respectively */ struct dcache **dcList; unsigned int i, j, minj, moredata, high, off; afs_size_t maxStoredLength; /* highest offset we've written to server. */ int safety, marineronce = 0; AFS_STATCNT(afs_StoreAllSegments); hset(oldDV, avc->f.m.DataVersion); hset(newDV, avc->f.m.DataVersion); hash = DVHash(&avc->f.fid); foreign = (avc->f.states & CForeign); dcList = osi_AllocLargeSpace(AFS_LRALLOCSIZ); afs_Trace2(afs_iclSetp, CM_TRACE_STOREALL, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); #if !defined(AFS_AIX32_ENV) && !defined(AFS_SGI65_ENV) /* In the aix vm implementation we need to do the vm_writep even * on the memcache case since that's we adjust the file's size * and finish flushing partial vm pages. */ if ((cacheDiskType != AFS_FCACHE_TYPE_MEM) || (sync & AFS_VMSYNC_INVAL) || (sync & AFS_VMSYNC) || (sync & AFS_LASTSTORE)) #endif /* !AFS_AIX32_ENV && !AFS_SGI65_ENV */ { /* If we're not diskless, reading a file may stress the VM * system enough to cause a pageout, and this vnode would be * locked when the pageout occurs. We can prevent this problem * by making sure all dirty pages are already flushed. We don't * do this when diskless because reading a diskless (i.e. * memory-resident) chunk doesn't require using new VM, and we * also don't want to dump more dirty data into a diskless cache, * since they're smaller, and we might exceed its available * space. */ #if defined(AFS_SUN5_ENV) if (sync & AFS_VMSYNC_INVAL) /* invalidate VM pages */ osi_VM_TryToSmush(avc, CRED(), 1); else #endif osi_VM_StoreAllSegments(avc); } if (AFS_IS_DISCONNECTED && !AFS_IN_SYNC) { /* This will probably make someone sad ... */ /*printf("Net down in afs_StoreSegments\n");*/ return ENETDOWN; } ConvertWToSLock(&avc->lock); /* * Subsequent code expects a sorted list, and it expects all the * chunks in the list to be contiguous, so we need a sort and a * while loop in here, too - but this will work for a first pass... * 92.10.05 - OK, there's a sort in here now. It's kind of a modified * bin sort, I guess. Chunk numbers start with 0 * * - Have to get a write lock on xdcache because GetDSlot might need it (if * the chunk doesn't have a dcache struct). * This seems like overkill in most cases. * - I'm not sure that it's safe to do "index = .hvNextp", then unlock * xdcache, then relock xdcache and try to use index. It is done * a lot elsewhere in the CM, but I'm not buying that argument. * - should be able to check IFDataMod without doing the GetDSlot (just * hold afs_xdcache). That way, it's easy to do this without the * writelock on afs_xdcache, and we save unneccessary disk * operations. I don't think that works, 'cuz the next pointers * are still on disk. */ origCBs = afs_allCBs; maxStoredLength = 0; minj = 0; do { memset(dcList, 0, NCHUNKSATONCE * sizeof(struct dcache *)); high = 0; moredata = FALSE; /* lock and start over from beginning of hash chain * in order to avoid a race condition. */ ObtainWriteLock(&afs_xdcache, 284); index = afs_dvhashTbl[hash]; for (j = 0; index != NULLIDX;) { if ((afs_indexFlags[index] & IFDataMod) && (afs_indexUnique[index] == avc->f.fid.Fid.Unique)) { tdc = afs_GetValidDSlot(index); /* refcount+1. */ if (!tdc) { ReleaseWriteLock(&afs_xdcache); code = EIO; goto done; } ReleaseReadLock(&tdc->tlock); if (!FidCmp(&tdc->f.fid, &avc->f.fid) && tdc->f.chunk >= minj) { off = tdc->f.chunk - minj; if (off < NCHUNKSATONCE) { if (dcList[off]) osi_Panic("dclist slot already in use!"); if (afs_mariner && !marineronce) { /* first chunk only */ afs_MarinerLog("store$Storing", avc); marineronce++; } dcList[off] = tdc; if (off > high) high = off; j++; /* DCLOCKXXX: chunkBytes is protected by tdc->lock which we * can't grab here, due to lock ordering with afs_xdcache. * So, disable this shortcut for now. -- kolya 2001-10-13 */ /* shortcut: big win for little files */ /* tlen -= tdc->f.chunkBytes; * if (tlen <= 0) * break; */ } else { moredata = TRUE; afs_PutDCache(tdc); if (j == NCHUNKSATONCE) break; } } else { afs_PutDCache(tdc); } } index = afs_dvnextTbl[index]; } ReleaseWriteLock(&afs_xdcache); /* this guy writes chunks, puts back dcache structs, and bumps newDV */ /* "moredata" just says "there are more dirty chunks yet to come". */ if (j) { code = afs_CacheStoreVCache(dcList, avc, areq, sync, minj, high, moredata, &newDV, &maxStoredLength); /* Release any zero-length dcache entries in our interval * that we locked but didn't store back above. */ for (j = 0; j <= high; j++) { tdc = dcList[j]; if (tdc) { osi_Assert(tdc->f.chunkBytes == 0); ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } } } /* if (j) */ minj += NCHUNKSATONCE; } while (!code && moredata); done: UpgradeSToWLock(&avc->lock, 29); /* send a trivial truncation store if did nothing else */ if (code == 0) { /* * Call StoreMini if we haven't written enough data to extend the * file at the fileserver to the client's notion of the file length. */ if ((avc->f.truncPos != AFS_NOTRUNC) || ((avc->f.states & CExtendedFile) && (maxStoredLength < avc->f.m.Length))) { code = afs_StoreMini(avc, areq); if (code == 0) hadd32(newDV, 1); /* just bumped here, too */ } avc->f.states &= ~CExtendedFile; } /* * Finally, turn off DWriting, turn on DFEntryMod, * update f.versionNo. * A lot of this could be integrated into the loop above */ if (!code) { afs_hyper_t h_unset; hones(h_unset); minj = 0; do { moredata = FALSE; memset(dcList, 0, NCHUNKSATONCE * sizeof(struct dcache *)); /* overkill, but it gets the lock in case GetDSlot needs it */ ObtainWriteLock(&afs_xdcache, 285); for (j = 0, safety = 0, index = afs_dvhashTbl[hash]; index != NULLIDX && safety < afs_cacheFiles + 2; index = afs_dvnextTbl[index]) { if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) { tdc = afs_GetValidDSlot(index); if (!tdc) { /* This is okay; since manipulating the dcaches at this * point is best-effort. We only get a dcache here to * increment the dv and turn off DWriting. If we were * supposed to do that for a dcache, but could not * due to an I/O error, it just means the dv won't * be updated so we don't be able to use that cached * chunk in the future. That's inefficient, but not * an error. */ continue; } ReleaseReadLock(&tdc->tlock); if (!FidCmp(&tdc->f.fid, &avc->f.fid) && tdc->f.chunk >= minj) { off = tdc->f.chunk - minj; if (off < NCHUNKSATONCE) { /* this is the file, and the correct chunk range */ if (j >= NCHUNKSATONCE) osi_Panic ("Too many dcache entries in range\n"); dcList[j++] = tdc; } else { moredata = TRUE; afs_PutDCache(tdc); if (j == NCHUNKSATONCE) break; } } else { afs_PutDCache(tdc); } } } ReleaseWriteLock(&afs_xdcache); for (i = 0; i < j; i++) { /* Iterate over the dcache entries we collected above */ tdc = dcList[i]; ObtainSharedLock(&tdc->lock, 677); /* was code here to clear IFDataMod, but it should only be done * in storedcache and storealldcache. */ /* Only increase DV if we had up-to-date data to start with. * Otherwise, we could be falsely upgrading an old chunk * (that we never read) into one labelled with the current * DV #. Also note that we check that no intervening stores * occurred, otherwise we might mislabel cache information * for a chunk that we didn't store this time */ /* Don't update the version number if it's not yet set. */ if (!hsame(tdc->f.versionNo, h_unset) && hcmp(tdc->f.versionNo, oldDV) >= 0) { if ((!(afs_dvhack || foreign) && hsame(avc->f.m.DataVersion, newDV)) || ((afs_dvhack || foreign) && (origCBs == afs_allCBs))) { /* no error, this is the DV */ UpgradeSToWLock(&tdc->lock, 678); hset(tdc->f.versionNo, avc->f.m.DataVersion); tdc->dflags |= DFEntryMod; /* DWriting may not have gotten cleared above, if all * we did was a StoreMini */ tdc->f.states &= ~DWriting; ConvertWToSLock(&tdc->lock); } } ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } minj += NCHUNKSATONCE; } while (moredata); } if (code) { /* * Invalidate chunks after an error for ccores files since * afs_inactive won't be called for these and they won't be * invalidated. Also discard data if it's a permanent error from the * fileserver. */ if (areq->permWriteError || (avc->f.states & CCore)) { afs_InvalidateAllSegments(avc); } } afs_Trace3(afs_iclSetp, CM_TRACE_STOREALLDONE, ICL_TYPE_POINTER, avc, ICL_TYPE_INT32, avc->f.m.Length, ICL_TYPE_INT32, code); /* would like a Trace5, but it doesn't exist... */ afs_Trace3(afs_iclSetp, CM_TRACE_AVCLOCKER, ICL_TYPE_POINTER, avc, ICL_TYPE_INT32, avc->lock.wait_states, ICL_TYPE_INT32, avc->lock.excl_locked); afs_Trace4(afs_iclSetp, CM_TRACE_AVCLOCKEE, ICL_TYPE_POINTER, avc, ICL_TYPE_INT32, avc->lock.wait_states, ICL_TYPE_INT32, avc->lock.readers_reading, ICL_TYPE_INT32, avc->lock.num_waiting); /* * Finally, if updated DataVersion matches newDV, we did all of the * stores. If mapDV indicates that the page cache was flushed up * to when we started the store, then we can relabel them as flushed * as recently as newDV. * Turn off CDirty bit because the stored data is now in sync with server. */ if (code == 0 && hcmp(avc->mapDV, oldDV) >= 0) { if ((!(afs_dvhack || foreign) && hsame(avc->f.m.DataVersion, newDV)) || ((afs_dvhack || foreign) && (origCBs == afs_allCBs))) { hset(avc->mapDV, newDV); avc->f.states &= ~CDirty; } } osi_FreeLargeSpace(dcList); /* If not the final write a temporary error is ok. */ if (code && !areq->permWriteError && !(sync & AFS_LASTSTORE)) code = 0; return code; } /*afs_StoreAllSegments (new 03/02/94) */
/* set the real time */ void afs_osi_SetTime(osi_timeval_t * atv) { #if defined(AFS_AIX32_ENV) struct timestruc_t t; t.tv_sec = atv->tv_sec; t.tv_nsec = atv->tv_usec * 1000; ksettimer(&t); /* Was -> settimer(TIMEOFDAY, &t); */ #elif defined(AFS_SUN5_ENV) stime(atv->tv_sec); #elif defined(AFS_SGI_ENV) struct stimea { sysarg_t time; } sta; AFS_GUNLOCK(); sta.time = atv->tv_sec; stime(&sta); AFS_GLOCK(); #elif defined(AFS_DARWIN_ENV) #ifndef AFS_DARWIN80_ENV AFS_GUNLOCK(); setthetime(atv); AFS_GLOCK(); #endif #else /* stolen from kern_time.c */ #ifndef AFS_AUX_ENV boottime.tv_sec += atv->tv_sec - time.tv_sec; #endif #ifdef AFS_HPUX_ENV { #if !defined(AFS_HPUX1122_ENV) /* drop the setting of the clock for now. spl7 is not * known on hpux11.22 */ ulong_t s; struct timeval t; t.tv_sec = atv->tv_sec; t.tv_usec = atv->tv_usec; s = spl7(); time = t; (void)splx(s); resettodr(atv); #endif } #else { int s; s = splclock(); time = *atv; (void)splx(s); } resettodr(); #endif #ifdef AFS_AUX_ENV logtchg(atv->tv_sec); #endif #endif /* AFS_DARWIN_ENV */ AFS_STATCNT(osi_SetTime); }
int afs_InvalidateAllSegments(struct vcache *avc) { struct dcache *tdc; afs_int32 hash; afs_int32 index; struct dcache **dcList; int i, dcListMax, dcListCount; AFS_STATCNT(afs_InvalidateAllSegments); afs_Trace2(afs_iclSetp, CM_TRACE_INVALL, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); hash = DVHash(&avc->f.fid); avc->f.truncPos = AFS_NOTRUNC; /* don't truncate later */ avc->f.states &= ~CExtendedFile; /* not any more */ ObtainWriteLock(&afs_xcbhash, 459); afs_DequeueCallback(avc); avc->f.states &= ~(CStatd | CDirty); /* mark status information as bad, too */ ReleaseWriteLock(&afs_xcbhash); if (avc->f.fid.Fid.Vnode & 1 || (vType(avc) == VDIR)) osi_dnlc_purgedp(avc); /* Blow away pages; for now, only for Solaris */ #if (defined(AFS_SUN5_ENV)) if (WriteLocked(&avc->lock)) osi_ReleaseVM(avc, (afs_ucred_t *)0); #endif /* * Block out others from screwing with this table; is a read lock * sufficient? */ ObtainWriteLock(&afs_xdcache, 286); dcListMax = 0; for (index = afs_dvhashTbl[hash]; index != NULLIDX;) { if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) { tdc = afs_GetValidDSlot(index); if (!tdc) { /* In the case of fatal errors during stores, we MUST * invalidate all of the relevant chunks. Otherwise, the chunks * will be left with the 'new' data that was never successfully * written to the server, but the DV in the dcache is still the * old DV. So, we may indefinitely serve data to applications * that is not actually in the file on the fileserver. If we * cannot afs_GetValidDSlot the appropriate entries, currently * there is no way to ensure the dcache is invalidated. So for * now, to avoid risking serving bad data from the cache, panic * instead. */ osi_Panic("afs_InvalidateAllSegments tdc count"); } ReleaseReadLock(&tdc->tlock); if (!FidCmp(&tdc->f.fid, &avc->f.fid)) dcListMax++; afs_PutDCache(tdc); } index = afs_dvnextTbl[index]; } dcList = osi_Alloc(dcListMax * sizeof(struct dcache *)); dcListCount = 0; for (index = afs_dvhashTbl[hash]; index != NULLIDX;) { if (afs_indexUnique[index] == avc->f.fid.Fid.Unique) { tdc = afs_GetValidDSlot(index); if (!tdc) { /* We cannot proceed after getting this error; we risk serving * incorrect data to applications. So panic instead. See the * above comment next to the previous afs_GetValidDSlot call * for details. */ osi_Panic("afs_InvalidateAllSegments tdc store"); } ReleaseReadLock(&tdc->tlock); if (!FidCmp(&tdc->f.fid, &avc->f.fid)) { /* same file? we'll zap it */ if (afs_indexFlags[index] & IFDataMod) { afs_stats_cmperf.cacheCurrDirtyChunks--; /* don't write it back */ afs_indexFlags[index] &= ~IFDataMod; } afs_indexFlags[index] &= ~IFAnyPages; if (dcListCount < dcListMax) dcList[dcListCount++] = tdc; else afs_PutDCache(tdc); } else { afs_PutDCache(tdc); } } index = afs_dvnextTbl[index]; } ReleaseWriteLock(&afs_xdcache); for (i = 0; i < dcListCount; i++) { tdc = dcList[i]; ObtainWriteLock(&tdc->lock, 679); ZapDCE(tdc); if (vType(avc) == VDIR) DZap(tdc); ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); } osi_Free(dcList, dcListMax * sizeof(struct dcache *)); return 0; }
/* Note that we don't set CDirty here, this is OK because the unlink * RPC is called synchronously */ int afs_remove(OSI_VC_DECL(adp), char *aname, afs_ucred_t *acred) { struct vrequest treq; register struct dcache *tdc; struct VenusFid unlinkFid; register afs_int32 code; register struct vcache *tvc; afs_size_t offset, len; struct afs_fakestat_state fakestate; OSI_VC_CONVERT(adp); AFS_STATCNT(afs_remove); afs_Trace2(afs_iclSetp, CM_TRACE_REMOVE, ICL_TYPE_POINTER, adp, ICL_TYPE_STRING, aname); if ((code = afs_InitReq(&treq, acred))) { return code; } afs_InitFakeStat(&fakestate); AFS_DISCON_LOCK(); code = afs_EvalFakeStat(&adp, &fakestate, &treq); if (code) goto done; /* Check if this is dynroot */ if (afs_IsDynroot(adp)) { code = afs_DynrootVOPRemove(adp, acred, aname); goto done; } if (afs_IsDynrootMount(adp)) { code = ENOENT; goto done; } if (strlen(aname) > AFSNAMEMAX) { code = ENAMETOOLONG; goto done; } tagain: code = afs_VerifyVCache(adp, &treq); tvc = NULL; if (code) { code = afs_CheckCode(code, &treq, 23); goto done; } /** If the volume is read-only, return error without making an RPC to the * fileserver */ if (adp->f.states & CRO) { code = EROFS; goto done; } /* If we're running disconnected without logging, go no further... */ if (AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW) { code = ENETDOWN; goto done; } tdc = afs_GetDCache(adp, (afs_size_t) 0, &treq, &offset, &len, 1); /* test for error below */ ObtainWriteLock(&adp->lock, 142); if (tdc) ObtainSharedLock(&tdc->lock, 638); /* * Make sure that the data in the cache is current. We may have * received a callback while we were waiting for the write lock. */ if (!(adp->f.states & CStatd) || (tdc && !hsame(adp->f.m.DataVersion, tdc->f.versionNo))) { ReleaseWriteLock(&adp->lock); if (tdc) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } goto tagain; } unlinkFid.Fid.Vnode = 0; if (!tvc) { tvc = osi_dnlc_lookup(adp, aname, WRITE_LOCK); } /* This should not be necessary since afs_lookup() has already * done the work. */ if (!tvc) if (tdc) { code = afs_dir_Lookup(tdc, aname, &unlinkFid.Fid); if (code == 0) { afs_int32 cached = 0; unlinkFid.Cell = adp->f.fid.Cell; unlinkFid.Fid.Volume = adp->f.fid.Fid.Volume; if (unlinkFid.Fid.Unique == 0) { tvc = afs_LookupVCache(&unlinkFid, &treq, &cached, adp, aname); } else { ObtainReadLock(&afs_xvcache); tvc = afs_FindVCache(&unlinkFid, 0, DO_STATS); ReleaseReadLock(&afs_xvcache); } } } if (AFS_IS_DISCON_RW) { if (!adp->f.shadow.vnode && !(adp->f.ddirty_flags & VDisconCreate)) { /* Make shadow copy of parent dir. */ afs_MakeShadowDir(adp, tdc); } /* Can't hold a dcache lock whilst we're getting a vcache one */ if (tdc) ReleaseSharedLock(&tdc->lock); /* XXX - We're holding adp->lock still, and we've got no * guarantee about whether the ordering matches the lock hierarchy */ ObtainWriteLock(&tvc->lock, 713); /* If we were locally created, then we don't need to do very * much beyond ensuring that we don't exist anymore */ if (tvc->f.ddirty_flags & VDisconCreate) { afs_DisconRemoveDirty(tvc); } else { /* Add removed file vcache to dirty list. */ afs_DisconAddDirty(tvc, VDisconRemove, 1); } adp->f.m.LinkCount--; ReleaseWriteLock(&tvc->lock); if (tdc) ObtainSharedLock(&tdc->lock, 714); } if (tvc && osi_Active(tvc)) { /* about to delete whole file, prefetch it first */ ReleaseWriteLock(&adp->lock); if (tdc) ReleaseSharedLock(&tdc->lock); ObtainWriteLock(&tvc->lock, 143); FetchWholeEnchilada(tvc, &treq); ReleaseWriteLock(&tvc->lock); ObtainWriteLock(&adp->lock, 144); /* Technically I don't think we need this back, but let's hold it anyway; The "got" reference should actually be sufficient. */ if (tdc) ObtainSharedLock(&tdc->lock, 640); } osi_dnlc_remove(adp, aname, tvc); Tadp1 = adp; #ifndef AFS_DARWIN80_ENV Tadpr = VREFCOUNT(adp); #endif Ttvc = tvc; Tnam = aname; Tnam1 = 0; #ifndef AFS_DARWIN80_ENV if (tvc) Ttvcr = VREFCOUNT(tvc); #endif #ifdef AFS_AIX_ENV if (tvc && VREFCOUNT_GT(tvc, 2) && tvc->opens > 0 && !(tvc->f.states & CUnlinked)) { #else if (tvc && VREFCOUNT_GT(tvc, 1) && tvc->opens > 0 && !(tvc->f.states & CUnlinked)) { #endif char *unlname = afs_newname(); ReleaseWriteLock(&adp->lock); if (tdc) ReleaseSharedLock(&tdc->lock); code = afsrename(adp, aname, adp, unlname, acred, &treq); Tnam1 = unlname; if (!code) { struct VenusFid *oldmvid = NULL; if (tvc->mvid) oldmvid = tvc->mvid; tvc->mvid = (struct VenusFid *)unlname; if (oldmvid) osi_FreeSmallSpace(oldmvid); crhold(acred); if (tvc->uncred) { crfree(tvc->uncred); } tvc->uncred = acred; tvc->f.states |= CUnlinked; /* if rename succeeded, remove should not */ ObtainWriteLock(&tvc->lock, 715); if (tvc->f.ddirty_flags & VDisconRemove) { tvc->f.ddirty_flags &= ~VDisconRemove; } ReleaseWriteLock(&tvc->lock); } else { osi_FreeSmallSpace(unlname); } if (tdc) afs_PutDCache(tdc); afs_PutVCache(tvc); } else { code = afsremove(adp, tdc, tvc, aname, acred, &treq); } done: afs_PutFakeStat(&fakestate); #ifndef AFS_DARWIN80_ENV /* we can't track by thread, it's not exported in the KPI; only do this on !macos */ osi_Assert(!WriteLocked(&adp->lock) || (adp->lock.pid_writer != MyPidxx)); #endif AFS_DISCON_UNLOCK(); return code; } /* afs_remunlink -- This tries to delete the file at the server after it has * been renamed when unlinked locally but now has been finally released. * * CAUTION -- may be called with avc unheld. */ int afs_remunlink(register struct vcache *avc, register int doit) { afs_ucred_t *cred; char *unlname; struct vcache *adp; struct vrequest treq; struct VenusFid dirFid; register struct dcache *tdc; afs_int32 code = 0; if (NBObtainWriteLock(&avc->lock, 423)) return 0; #if defined(AFS_DARWIN80_ENV) if (vnode_get(AFSTOV(avc))) { ReleaseWriteLock(&avc->lock); return 0; } #endif if (avc->mvid && (doit || (avc->f.states & CUnlinkedDel))) { if ((code = afs_InitReq(&treq, avc->uncred))) { ReleaseWriteLock(&avc->lock); } else { /* Must bump the refCount because GetVCache may block. * Also clear mvid so no other thread comes here if we block. */ unlname = (char *)avc->mvid; avc->mvid = NULL; cred = avc->uncred; avc->uncred = NULL; #if defined(AFS_DARWIN_ENV) && !defined(AFS_DARWIN80_ENV) VREF(AFSTOV(avc)); #else AFS_FAST_HOLD(avc); #endif /* We'll only try this once. If it fails, just release the vnode. * Clear after doing hold so that NewVCache doesn't find us yet. */ avc->f.states &= ~(CUnlinked | CUnlinkedDel); ReleaseWriteLock(&avc->lock); dirFid.Cell = avc->f.fid.Cell; dirFid.Fid.Volume = avc->f.fid.Fid.Volume; dirFid.Fid.Vnode = avc->f.parent.vnode; dirFid.Fid.Unique = avc->f.parent.unique; adp = afs_GetVCache(&dirFid, &treq, NULL, NULL); if (adp) { tdc = afs_FindDCache(adp, (afs_size_t) 0); ObtainWriteLock(&adp->lock, 159); if (tdc) ObtainSharedLock(&tdc->lock, 639); /* afsremove releases the adp & tdc locks, and does vn_rele(avc) */ code = afsremove(adp, tdc, avc, unlname, cred, &treq); afs_PutVCache(adp); } else { /* we failed - and won't be back to try again. */ afs_PutVCache(avc); } osi_FreeSmallSpace(unlname); crfree(cred); } } else { #if defined(AFS_DARWIN80_ENV) vnode_put(AFSTOV(avc)); #endif ReleaseWriteLock(&avc->lock); } return code; }
void afs_osi_InitWaitHandle(struct afs_osi_WaitHandle *achandle) { AFS_STATCNT(osi_InitWaitHandle); achandle->proc = (caddr_t) 0; }
int afsrename(struct vcache *aodp, char *aname1, struct vcache *andp, char *aname2, struct AFS_UCRED *acred, struct vrequest *areq) { register struct afs_conn *tc; register afs_int32 code = 0; afs_int32 returnCode; int oneDir, doLocally; afs_size_t offset, len; struct VenusFid unlinkFid, fileFid; struct vcache *tvc; struct dcache *tdc1, *tdc2; struct AFSFetchStatus OutOldDirStatus, OutNewDirStatus; struct AFSVolSync tsync; XSTATS_DECLS; AFS_STATCNT(afs_rename); afs_Trace4(afs_iclSetp, CM_TRACE_RENAME, ICL_TYPE_POINTER, aodp, ICL_TYPE_STRING, aname1, ICL_TYPE_POINTER, andp, ICL_TYPE_STRING, aname2); if (strlen(aname1) > AFSNAMEMAX || strlen(aname2) > AFSNAMEMAX) { code = ENAMETOOLONG; goto done; } /* verify the latest versions of the stat cache entries */ tagain: code = afs_VerifyVCache(aodp, areq); if (code) goto done; code = afs_VerifyVCache(andp, areq); if (code) goto done; /* lock in appropriate order, after some checks */ if (aodp->f.fid.Cell != andp->f.fid.Cell || aodp->f.fid.Fid.Volume != andp->f.fid.Fid.Volume) { code = EXDEV; goto done; } oneDir = 0; code = 0; if (andp->f.fid.Fid.Vnode == aodp->f.fid.Fid.Vnode) { if (!strcmp(aname1, aname2)) { /* Same directory and same name; this is a noop and just return success * to save cycles and follow posix standards */ code = 0; goto done; } if (AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW) { code = ENETDOWN; goto done; } ObtainWriteLock(&andp->lock, 147); tdc1 = afs_GetDCache(aodp, (afs_size_t) 0, areq, &offset, &len, 0); if (!tdc1) { code = ENOENT; } else { ObtainWriteLock(&tdc1->lock, 643); } tdc2 = tdc1; oneDir = 1; /* only one dude locked */ } else if ((andp->f.states & CRO) || (aodp->f.states & CRO)) { code = EROFS; goto done; } else if (andp->f.fid.Fid.Vnode < aodp->f.fid.Fid.Vnode) { ObtainWriteLock(&andp->lock, 148); /* lock smaller one first */ ObtainWriteLock(&aodp->lock, 149); tdc2 = afs_FindDCache(andp, (afs_size_t) 0); if (tdc2) ObtainWriteLock(&tdc2->lock, 644); tdc1 = afs_GetDCache(aodp, (afs_size_t) 0, areq, &offset, &len, 0); if (tdc1) ObtainWriteLock(&tdc1->lock, 645); else code = ENOENT; } else { ObtainWriteLock(&aodp->lock, 150); /* lock smaller one first */ ObtainWriteLock(&andp->lock, 557); tdc1 = afs_GetDCache(aodp, (afs_size_t) 0, areq, &offset, &len, 0); if (tdc1) ObtainWriteLock(&tdc1->lock, 646); else code = ENOENT; tdc2 = afs_FindDCache(andp, (afs_size_t) 0); if (tdc2) ObtainWriteLock(&tdc2->lock, 647); } osi_dnlc_remove(aodp, aname1, 0); osi_dnlc_remove(andp, aname2, 0); /* * Make sure that the data in the cache is current. We may have * received a callback while we were waiting for the write lock. */ if (tdc1) { if (!(aodp->f.states & CStatd) || !hsame(aodp->f.m.DataVersion, tdc1->f.versionNo)) { ReleaseWriteLock(&aodp->lock); if (!oneDir) { if (tdc2) { ReleaseWriteLock(&tdc2->lock); afs_PutDCache(tdc2); } ReleaseWriteLock(&andp->lock); } ReleaseWriteLock(&tdc1->lock); afs_PutDCache(tdc1); goto tagain; } } if (code == 0) code = afs_dir_Lookup(tdc1, aname1, &fileFid.Fid); if (code) { if (tdc1) { ReleaseWriteLock(&tdc1->lock); afs_PutDCache(tdc1); } ReleaseWriteLock(&aodp->lock); if (!oneDir) { if (tdc2) { ReleaseWriteLock(&tdc2->lock); afs_PutDCache(tdc2); } ReleaseWriteLock(&andp->lock); } goto done; } if (!AFS_IS_DISCON_RW) { /* Connected. */ do { tc = afs_Conn(&aodp->f.fid, areq, SHARED_LOCK); if (tc) { XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_RENAME); RX_AFS_GUNLOCK(); code = RXAFS_Rename(tc->id, (struct AFSFid *)&aodp->f.fid.Fid, aname1, (struct AFSFid *)&andp->f.fid.Fid, aname2, &OutOldDirStatus, &OutNewDirStatus, &tsync); RX_AFS_GLOCK(); XSTATS_END_TIME; } else code = -1; } while (afs_Analyze (tc, code, &andp->f.fid, areq, AFS_STATS_FS_RPCIDX_RENAME, SHARED_LOCK, NULL)); } else { #if defined(AFS_DISCON_ENV) /* Disconnected. */ /* Seek moved file vcache. */ fileFid.Cell = aodp->f.fid.Cell; fileFid.Fid.Volume = aodp->f.fid.Fid.Volume; ObtainSharedLock(&afs_xvcache, 754); tvc = afs_FindVCache(&fileFid, 0 , 1); ReleaseSharedLock(&afs_xvcache); if (tvc) { /* XXX - We're locking this vcache whilst holding dcaches. Ooops */ ObtainWriteLock(&tvc->lock, 750); if (!(tvc->f.ddirty_flags & (VDisconRename|VDisconCreate))) { /* If the vnode was created locally, then we don't care * about recording the rename - we'll do it automatically * on replay. If we've already renamed, we've already stored * the required information about where we came from. */ if (!aodp->f.shadow.vnode) { /* Make shadow copy of parent dir only. */ afs_MakeShadowDir(aodp, tdc1); } /* Save old parent dir fid so it will be searchable * in the shadow dir. */ tvc->f.oldParent.vnode = aodp->f.fid.Fid.Vnode; tvc->f.oldParent.unique = aodp->f.fid.Fid.Unique; afs_DisconAddDirty(tvc, VDisconRename | (oneDir ? VDisconRenameSameDir:0), 1); } ReleaseWriteLock(&tvc->lock); afs_PutVCache(tvc); } else { code = ENOENT; } /* if (tvc) */ #endif } /* if !(AFS_IS_DISCON_RW)*/ returnCode = code; /* remember for later */ /* Now we try to do things locally. This is really loathsome code. */ unlinkFid.Fid.Vnode = 0; if (code == 0) { /* In any event, we don't really care if the data (tdc2) is not * in the cache; if it isn't, we won't do the update locally. */ /* see if version numbers increased properly */ doLocally = 1; if (!AFS_IS_DISCON_RW) { if (oneDir) { /* number increases by 1 for whole rename operation */ if (!afs_LocalHero(aodp, tdc1, &OutOldDirStatus, 1)) { doLocally = 0; } } else { /* two separate dirs, each increasing by 1 */ if (!afs_LocalHero(aodp, tdc1, &OutOldDirStatus, 1)) doLocally = 0; if (!afs_LocalHero(andp, tdc2, &OutNewDirStatus, 1)) doLocally = 0; if (!doLocally) { if (tdc1) { ZapDCE(tdc1); DZap(tdc1); } if (tdc2) { ZapDCE(tdc2); DZap(tdc2); } } } } /* if (!AFS_IS_DISCON_RW) */ /* now really do the work */ if (doLocally) { /* first lookup the fid of the dude we're moving */ code = afs_dir_Lookup(tdc1, aname1, &fileFid.Fid); if (code == 0) { /* delete the source */ code = afs_dir_Delete(tdc1, aname1); } /* first see if target is there */ if (code == 0 && afs_dir_Lookup(tdc2, aname2, &unlinkFid.Fid) == 0) { /* target already exists, and will be unlinked by server */ code = afs_dir_Delete(tdc2, aname2); } if (code == 0) { ObtainWriteLock(&afs_xdcache, 292); code = afs_dir_Create(tdc2, aname2, &fileFid.Fid); ReleaseWriteLock(&afs_xdcache); } if (code != 0) { ZapDCE(tdc1); DZap(tdc1); if (!oneDir) { ZapDCE(tdc2); DZap(tdc2); } } } /* update dir link counts */ if (AFS_IS_DISCON_RW) { if (!oneDir) { aodp->f.m.LinkCount--; andp->f.m.LinkCount++; } /* If we're in the same directory, link count doesn't change */ } else { aodp->f.m.LinkCount = OutOldDirStatus.LinkCount; if (!oneDir) andp->f.m.LinkCount = OutNewDirStatus.LinkCount; } } else { /* operation failed (code != 0) */ if (code < 0) { /* if failed, server might have done something anyway, and * assume that we know about it */ ObtainWriteLock(&afs_xcbhash, 498); afs_DequeueCallback(aodp); afs_DequeueCallback(andp); andp->f.states &= ~CStatd; aodp->f.states &= ~CStatd; ReleaseWriteLock(&afs_xcbhash); osi_dnlc_purgedp(andp); osi_dnlc_purgedp(aodp); } } /* release locks */ if (tdc1) { ReleaseWriteLock(&tdc1->lock); afs_PutDCache(tdc1); } if ((!oneDir) && tdc2) { ReleaseWriteLock(&tdc2->lock); afs_PutDCache(tdc2); } ReleaseWriteLock(&aodp->lock); if (!oneDir) { ReleaseWriteLock(&andp->lock); } if (returnCode) { code = returnCode; goto done; } /* now, some more details. if unlinkFid.Fid.Vnode then we should decrement * the link count on this file. Note that if fileFid is a dir, then we don't * have to invalidate its ".." entry, since its DataVersion # should have * changed. However, interface is not good enough to tell us the * *file*'s new DataVersion, so we're stuck. Our hack: delete mark * the data as having an "unknown" version (effectively discarding the ".." * entry */ if (unlinkFid.Fid.Vnode) { unlinkFid.Fid.Volume = aodp->f.fid.Fid.Volume; unlinkFid.Cell = aodp->f.fid.Cell; tvc = NULL; if (!unlinkFid.Fid.Unique) { tvc = afs_LookupVCache(&unlinkFid, areq, NULL, aodp, aname1); } if (!tvc) /* lookup failed or wasn't called */ tvc = afs_GetVCache(&unlinkFid, areq, NULL, NULL); if (tvc) { #ifdef AFS_BOZONLOCK_ENV afs_BozonLock(&tvc->pvnLock, tvc); /* Since afs_TryToSmush will do a pvn_vptrunc */ #endif ObtainWriteLock(&tvc->lock, 151); tvc->f.m.LinkCount--; tvc->f.states &= ~CUnique; /* For the dfs xlator */ if (tvc->f.m.LinkCount == 0 && !osi_Active(tvc)) { /* if this was last guy (probably) discard from cache. * We have to be careful to not get rid of the stat * information, since otherwise operations will start * failing even if the file was still open (or * otherwise active), and the server no longer has the * info. If the file still has valid links, we'll get * a break-callback msg from the server, so it doesn't * matter that we don't discard the status info */ if (!AFS_NFSXLATORREQ(acred)) afs_TryToSmush(tvc, acred, 0); } ReleaseWriteLock(&tvc->lock); #ifdef AFS_BOZONLOCK_ENV afs_BozonUnlock(&tvc->pvnLock, tvc); #endif afs_PutVCache(tvc); } } /* now handle ".." invalidation */ if (!oneDir) { fileFid.Fid.Volume = aodp->f.fid.Fid.Volume; fileFid.Cell = aodp->f.fid.Cell; if (!fileFid.Fid.Unique) tvc = afs_LookupVCache(&fileFid, areq, NULL, andp, aname2); else tvc = afs_GetVCache(&fileFid, areq, NULL, (struct vcache *)0); if (tvc && (vType(tvc) == VDIR)) { ObtainWriteLock(&tvc->lock, 152); tdc1 = afs_FindDCache(tvc, (afs_size_t) 0); if (tdc1) { if (AFS_IS_DISCON_RW) { #if defined(AFS_DISCON_ENV) /* If disconnected, we need to fix (not discard) the "..".*/ afs_dir_ChangeFid(tdc1, "..", &aodp->f.fid.Fid.Vnode, &andp->f.fid.Fid.Vnode); #endif } else { ObtainWriteLock(&tdc1->lock, 648); ZapDCE(tdc1); /* mark as unknown */ DZap(tdc1); ReleaseWriteLock(&tdc1->lock); afs_PutDCache(tdc1); /* put it back */ } } osi_dnlc_remove(tvc, "..", 0); ReleaseWriteLock(&tvc->lock); afs_PutVCache(tvc); } else if (AFS_IS_DISCON_RW && tvc && (vType(tvc) == VREG)) { /* XXX - Should tvc not get locked here? */ tvc->f.parent.vnode = andp->f.fid.Fid.Vnode; tvc->f.parent.unique = andp->f.fid.Fid.Unique; } else if (tvc) { /* True we shouldn't come here since tvc SHOULD be a dir, but we * 'syntactically' need to unless we change the 'if' above... */ afs_PutVCache(tvc); } } code = returnCode; done: return code; }
void LockAndInstallUVolumeEntry(struct volume *av, struct uvldbentry *ve, int acell, struct cell *tcell, struct vrequest *areq) { struct server *ts; struct afs_conn *tconn; struct cell *cellp; int i, j; afs_uint32 serverid; afs_int32 mask; int k; char type = 0; struct server *serverHost[AFS_MAXHOSTS]; AFS_STATCNT(InstallVolumeEntry); memset(serverHost, 0, sizeof(serverHost)); /* Determine type of volume we want */ if ((ve->flags & VLF_RWEXISTS) && (av->volume == ve->volumeId[RWVOL])) { mask = VLSF_RWVOL; } else if ((ve->flags & VLF_ROEXISTS) && av->volume == ve->volumeId[ROVOL]) { mask = VLSF_ROVOL; type |= VRO; } else if ((ve->flags & VLF_BACKEXISTS) && (av->volume == ve->volumeId[BACKVOL])) { /* backup always is on the same volume as parent */ mask = VLSF_RWVOL; type |= (VRO | VBackup); } else { mask = 0; /* Can't find volume in vldb entry */ } cellp = afs_GetCell(acell, 0); /* Gather the list of servers the VLDB says the volume is on * and initialize the ve->serverHost[] array. If a server struct * is not found, then get the list of addresses for the * server, VL_GetAddrsU(), and create a server struct, afs_GetServer(). */ for (i = 0, j = 0; i < ve->nServers; i++) { if (((ve->serverFlags[i] & mask) == 0) || (ve->serverFlags[i] & VLSF_DONTUSE)) { continue; /* wrong volume don't use this volume */ } if (!(ve->serverFlags[i] & VLSERVER_FLAG_UUID)) { /* The server has no uuid */ serverid = htonl(ve->serverNumber[i].time_low); ts = afs_GetServer(&serverid, 1, acell, cellp->fsport, WRITE_LOCK, (afsUUID *) 0, 0, av); } else { ts = afs_FindServer(0, cellp->fsport, &ve->serverNumber[i], 0); if (ts && (ts->sr_addr_uniquifier == ve->serverUnique[i]) && ts->addr) { /* uuid, uniquifier, and portal are the same */ } else { afs_uint32 *addrp, code; afs_int32 nentries, unique; bulkaddrs addrs; ListAddrByAttributes attrs; afsUUID uuid; struct rx_connection *rxconn; memset(&attrs, 0, sizeof(attrs)); attrs.Mask = VLADDR_UUID; attrs.uuid = ve->serverNumber[i]; memset(&uuid, 0, sizeof(uuid)); memset(&addrs, 0, sizeof(addrs)); do { tconn = afs_ConnByMHosts(tcell->cellHosts, tcell->vlport, tcell->cellNum, areq, SHARED_LOCK, 0, &rxconn); if (tconn) { RX_AFS_GUNLOCK(); code = VL_GetAddrsU(rxconn, &attrs, &uuid, &unique, &nentries, &addrs); RX_AFS_GLOCK(); } else { code = -1; } /* Handle corrupt VLDB (defect 7393) */ if (code == 0 && nentries == 0) code = VL_NOENT; } while (afs_Analyze (tconn, rxconn, code, NULL, areq, -1, SHARED_LOCK, tcell)); if (code) { /* Better handing of such failures; for now we'll simply retry this call */ areq->volumeError = 1; return; } addrp = addrs.bulkaddrs_val; for (k = 0; k < nentries; k++) { addrp[k] = htonl(addrp[k]); } ts = afs_GetServer(addrp, nentries, acell, cellp->fsport, WRITE_LOCK, &ve->serverNumber[i], ve->serverUnique[i], av); xdr_free((xdrproc_t) xdr_bulkaddrs, &addrs); } #if defined(AFS_LINUX26_ENV) && !defined(UKERNEL) if (afs_compare_serveruuid(&ve->serverNumber[i])) av->states |= VPartVisible; #endif } serverHost[j] = ts; /* The cell field could be 0 if the server entry was created * first with the 'fs setserverprefs' call which doesn't set * the cell field. Thus if the afs_GetServer call above * follows later on it will find the server entry thus it will * simply return without setting any fields, so we set the * field ourselves below. */ if (!ts->cell) ts->cell = cellp; afs_PutServer(ts, WRITE_LOCK); j++; } ObtainWriteLock(&av->lock, 111); memcpy(av->serverHost, serverHost, sizeof(serverHost)); /* from above */ av->states |= type; /* fill in volume types */ av->rwVol = ((ve->flags & VLF_RWEXISTS) ? ve->volumeId[RWVOL] : 0); av->roVol = ((ve->flags & VLF_ROEXISTS) ? ve->volumeId[ROVOL] : 0); av->backVol = ((ve->flags & VLF_BACKEXISTS) ? ve->volumeId[BACKVOL] : 0); if (ve->flags & VLF_DFSFILESET) av->states |= VForeign; afs_SortServers(av->serverHost, AFS_MAXHOSTS); } /*InstallVolumeEntry */
/** * Decrement reference count to this connection. * @param ac * @param locktype */ void afs_PutConn(register struct afs_conn *ac, afs_int32 locktype) { AFS_STATCNT(afs_PutConn); ac->refCount--; } /*afs_PutConn */