void * osi_UFSOpen(afs_dcache_id_t *ainode) { struct inode *ip; struct osi_file *afile = NULL; extern int cacheDiskType; afs_int32 code = 0; int dummy; AFS_STATCNT(osi_UFSOpen); if (cacheDiskType != AFS_FCACHE_TYPE_UFS) { osi_Panic("UFSOpen called for non-UFS cache\n"); } if (!afs_osicred_initialized) { /* valid for alpha_osf, SunOS, Ultrix */ memset(&afs_osi_cred, 0, sizeof(afs_ucred_t)); crhold(&afs_osi_cred); /* don't let it evaporate, since it is static */ afs_osicred_initialized = 1; } afile = (struct osi_file *)osi_AllocSmallSpace(sizeof(struct osi_file)); setuerror(0); AFS_GUNLOCK(); ip = (struct inode *)igetinode(afs_cacheVfsp, (dev_t) cacheDev.dev, (ino_t) ainode->ufs, &dummy); AFS_GLOCK(); if (getuerror()) { osi_FreeSmallSpace(afile); osi_Panic("UFSOpen: igetinode failed"); } iunlock(ip); afile->vnode = ITOV(ip); afile->size = VTOI(afile->vnode)->i_size; afile->offset = 0; afile->proc = (int (*)())0; return (void *)afile; }
void afs_MarinerLog(char *astring, struct vcache *avc) { struct sockaddr_in taddr; char *tp, *tp1, *buf; struct iovec dvec; AFS_STATCNT(afs_MarinerLog); taddr.sin_family = AF_INET; taddr.sin_addr.s_addr = afs_marinerHost; taddr.sin_port = htons(2106); #ifdef STRUCT_SOCKADDR_HAS_SA_LEN taddr.sin_len = sizeof(taddr); #endif tp = buf = osi_AllocSmallSpace(AFS_SMALLOCSIZ); strcpy(tp, astring); tp += strlen(astring); if (avc) { *tp++ = ' '; tp1 = afs_GetMariner(avc); strcpy(tp, tp1); tp += strlen(tp1); } *tp++ = '\n'; /* note, console doesn't want a terminating null */ /* I don't care if mariner packets fail to be sent */ dvec.iov_base = buf; dvec.iov_len = tp - buf; AFS_GUNLOCK(); (void)osi_NetSend(afs_server->socket, &taddr, &dvec, 1, tp - buf, 0); AFS_GLOCK(); osi_FreeSmallSpace(buf); } /*afs_MarinerLog */
void * osi_UFSOpen(afs_dcache_id_t *ainode) { struct osi_file *afile; struct vnode *vp; extern int cacheDiskType; afs_int32 code; AFS_STATCNT(osi_UFSOpen); if (cacheDiskType != AFS_FCACHE_TYPE_UFS) osi_Panic("UFSOpen called for non-UFS cache\n"); afile = (struct osi_file *)osi_AllocSmallSpace(sizeof(struct osi_file)); AFS_GUNLOCK(); code = VFS_VGET(cacheDev.mp, ainode->ufs, &vp); AFS_GLOCK(); if (code == 0 && vp->v_type == VNON) code = ENOENT; if (code) { osi_FreeSmallSpace(afile); osi_Panic("UFSOpen: igetinode failed"); } VOP_UNLOCK(vp, 0, curproc); afile->vnode = vp; #ifdef AFS_OBSD39_ENV afile->size = VTOI(vp)->i_ffs1_size; #else afile->size = VTOI(vp)->i_ffs_size; #endif afile->offset = 0; afile->proc = NULL; return (void *)afile; }
void * osi_UFSOpen(afs_dcache_id_t *ainode) { struct osi_file *afile; struct vnode *vp; extern int cacheDiskType; afs_int32 code; AFS_STATCNT(osi_UFSOpen); if (cacheDiskType != AFS_FCACHE_TYPE_UFS) osi_Panic("UFSOpen called for non-UFS cache\n"); afile = (struct osi_file *)osi_AllocSmallSpace(sizeof(struct osi_file)); AFS_GUNLOCK(); code = VFS_VGET(afs_cacheVfsp, (ino_t) ainode->ufs, LK_EXCLUSIVE, &vp); AFS_GLOCK(); if (code == 0 && vp->v_type == VNON) code = ENOENT; if (code) { osi_FreeSmallSpace(afile); osi_Panic("UFSOpen: igetinode failed"); } #if defined(AFS_FBSD80_ENV) VOP_UNLOCK(vp, 0); #else VOP_UNLOCK(vp, 0, curthread); #endif afile->vnode = vp; afile->size = VTOI(vp)->i_size; afile->offset = 0; afile->proc = NULL; return (void *)afile; }
/* DARWIN uses locking, and so must provide its own */ void shutdown_osisleep(void) { afs_event_t *tmp; int i; for (i=0;i<AFS_EVHASHSIZE;i++) { while ((tmp = afs_evhasht[i]) != NULL) { afs_evhasht[i] = tmp->next; if (tmp->refcount > 0) { afs_warn("nonzero refcount in shutdown_osisleep()\n"); } else { #if defined(AFS_AIX_ENV) xmfree(tmp); #elif defined(AFS_FBSD_ENV) afs_osi_Free(tmp, sizeof(*tmp)); #elif defined(AFS_SGI_ENV) || defined(AFS_XBSD_ENV) || defined(AFS_SUN5_ENV) osi_FreeSmallSpace(tmp); #elif defined(AFS_LINUX26_ENV) kfree(tmp); #elif defined(AFS_LINUX20_ENV) osi_linux_free(tmp); #endif } } } }
int osi_UFSClose(struct osi_file *afile) { AFS_STATCNT(osi_Close); if (afile->vnode) { /* AIX writes entire data regions at a time when dumping core. We've * seen a 26M write go through the system. When this happens, we run * out of available pages. So, we'll flush the vnode's vm if we're short * on space. */ if (vmPageHog) { int code; if (afile->vnode->v_gnode->gn_seg) { /* 524287 is the max number of pages for a file. See test in * vm_writep. */ code = vm_writep(afile->vnode->v_gnode->gn_seg, 0, 524287); } } AFS_RELE(afile->vnode); } osi_FreeSmallSpace(afile); return 0; }
int osi_UFSClose(struct osi_file *afile) { AFS_STATCNT(osi_Close); if (afile->vnode) { VN_RELE(afile->vnode); } osi_FreeSmallSpace(afile); return 0; }
int osi_UFSClose(struct osi_file *afile) { AFS_STATCNT(osi_Close); if (afile->vnode) { #ifdef AFS_DARWIN80_ENV vnode_close(afile->vnode, O_RDWR, afs_osi_ctxtp); #else AFS_RELE(afile->vnode); #endif } osi_FreeSmallSpace(afile); return 0; }
void * osi_VxfsOpen(afs_dcache_id_t *ainode) { struct vnode *vp; struct osi_file *afile = NULL; afs_int32 code = 0; int dummy; afile = osi_AllocSmallSpace(sizeof(struct osi_file)); AFS_GUNLOCK(); code = (*vxfs_vx_vp_byino) (afs_cacheVfsp, &vp, (unsigned int)ainode->ufs); AFS_GLOCK(); if (code) { osi_FreeSmallSpace(afile); osi_Panic("VxfsOpen: vx_vp_byino failed"); } afile->vnode = vp; afile->size = VnodeToSize(afile->vnode); afile->offset = 0; afile->proc = (int (*)())0; return (void *)afile; }
void shutdown_osisleep(void) { struct afs_event *evp, *nevp, **pevpp; int i; for (i=0; i < AFS_EVHASHSIZE; i++) { evp = afs_evhasht[i]; pevpp = &afs_evhasht[i]; while (evp) { EVTLOCK_LOCK(evp); nevp = evp->next; if (evp->refcount == 0) { EVTLOCK_DESTROY(evp); *pevpp = evp->next; osi_FreeSmallSpace(evp); afs_evhashcnt--; } else { afs_warn("nonzero refcount in shutdown_osisleep()\n"); EVTLOCK_UNLOCK(evp); pevpp = &evp->next; } evp = nevp; } } }
void * osi_UFSOpen(afs_dcache_id_t *ainode) { struct vnode *vp; struct vattr va; struct osi_file *afile = NULL; extern int cacheDiskType; afs_int32 code = 0; int dummy; char fname[1024]; struct osi_stat tstat; AFS_STATCNT(osi_UFSOpen); if (cacheDiskType != AFS_FCACHE_TYPE_UFS) { osi_Panic("UFSOpen called for non-UFS cache\n"); } if (!afs_osicred_initialized) { /* valid for alpha_osf, SunOS, Ultrix */ memset(&afs_osi_cred, 0, sizeof(afs_ucred_t)); afs_osi_cred.cr_ref++; #ifndef AFS_DARWIN110_ENV afs_osi_cred.cr_ngroups = 1; #endif afs_osicred_initialized = 1; } afile = (struct osi_file *)osi_AllocSmallSpace(sizeof(struct osi_file)); AFS_GUNLOCK(); #ifdef AFS_CACHE_VNODE_PATH if (!ainode->ufs) { osi_Panic("No cache inode\n"); } code = vnode_open(ainode->ufs, O_RDWR, 0, 0, &vp, afs_osi_ctxtp); #else #ifndef AFS_DARWIN80_ENV if (afs_CacheFSType == AFS_APPL_HFS_CACHE) code = igetinode(afs_cacheVfsp, (dev_t) cacheDev.dev, &ainode->ufs, &vp, &va, &dummy); /* XXX hfs is broken */ else if (afs_CacheFSType == AFS_APPL_UFS_CACHE) #endif code = igetinode(afs_cacheVfsp, (dev_t) cacheDev.dev, (ino_t) ainode->ufs, &vp, &va, &dummy); #ifndef AFS_DARWIN80_ENV else panic("osi_UFSOpen called before cacheops initialized\n"); #endif #endif AFS_GLOCK(); if (code) { osi_FreeSmallSpace(afile); osi_Panic("UFSOpen: igetinode failed"); } afile->vnode = vp; afile->offset = 0; afile->proc = (int (*)())0; #ifndef AFS_CACHE_VNODE_PATH afile->size = va.va_size; #else code = afs_osi_Stat(afile, &tstat); afile->size = tstat.size; #endif return (void *)afile; }
/* no-cache prefetch routine */ static afs_int32 afs_NoCacheFetchProc(struct rx_call *acall, struct vcache *avc, struct uio *auio, afs_int32 release_pages, afs_int32 size) { afs_int32 length; afs_int32 code; int moredata, iovno, iovoff, iovmax, result, locked; struct iovec *ciov; struct iovec *rxiov; int nio = 0; bypass_page_t pp; int curpage, bytes; int pageoff; rxiov = osi_AllocSmallSpace(sizeof(struct iovec) * RX_MAXIOVECS); ciov = auio->uio_iov; pp = (bypass_page_t) ciov->iov_base; iovmax = auio->uio_iovcnt - 1; iovno = iovoff = result = 0; do { COND_GUNLOCK(locked); code = rx_Read(acall, (char *)&length, sizeof(afs_int32)); COND_RE_GLOCK(locked); if (code != sizeof(afs_int32)) { result = EIO; afs_warn("Preread error. code: %d instead of %d\n", code, (int)sizeof(afs_int32)); unlock_and_release_pages(auio); goto done; } else length = ntohl(length); if (length > size) { result = EIO; afs_warn("Preread error. Got length %d, which is greater than size %d\n", length, size); unlock_and_release_pages(auio); goto done; } /* If we get a 0 length reply, time to cleanup and return */ if (length == 0) { unlock_and_release_pages(auio); result = 0; goto done; } /* * The fetch protocol is extended for the AFS/DFS translator * to allow multiple blocks of data, each with its own length, * to be returned. As long as the top bit is set, there are more * blocks expected. * * We do not do this for AFS file servers because they sometimes * return large negative numbers as the transfer size. */ if (avc->f.states & CForeign) { moredata = length & 0x80000000; length &= ~0x80000000; } else { moredata = 0; } for (curpage = 0; curpage <= iovmax; curpage++) { pageoff = 0; /* properly, this should track uio_resid, not a fixed page size! */ while (pageoff < auio->uio_iov[curpage].iov_len) { /* If no more iovs, issue new read. */ if (iovno >= nio) { COND_GUNLOCK(locked); bytes = rx_Readv(acall, rxiov, &nio, RX_MAXIOVECS, length); COND_RE_GLOCK(locked); if (bytes < 0) { afs_warn("afs_NoCacheFetchProc: rx_Read error. Return code was %d\n", bytes); result = bytes; unlock_and_release_pages(auio); goto done; } else if (bytes == 0) { /* we failed to read the full length */ result = EIO; afs_warn("afs_NoCacheFetchProc: rx_Read returned zero. Aborting.\n"); unlock_and_release_pages(auio); goto done; } size -= bytes; auio->uio_resid -= bytes; iovno = 0; } pp = (bypass_page_t)auio->uio_iov[curpage].iov_base; if (pageoff + (rxiov[iovno].iov_len - iovoff) <= auio->uio_iov[curpage].iov_len) { /* Copy entire (or rest of) current iovec into current page */ if (pp) afs_bypass_copy_page(pp, pageoff, rxiov, iovno, iovoff, auio, curpage, 0); length -= (rxiov[iovno].iov_len - iovoff); pageoff += rxiov[iovno].iov_len - iovoff; iovno++; iovoff = 0; } else { /* Copy only what's needed to fill current page */ if (pp) afs_bypass_copy_page(pp, pageoff, rxiov, iovno, iovoff, auio, curpage, 1); length -= (auio->uio_iov[curpage].iov_len - pageoff); iovoff += auio->uio_iov[curpage].iov_len - pageoff; pageoff = auio->uio_iov[curpage].iov_len; } /* we filled a page, or this is the last page. conditionally release it */ if (pp && ((pageoff == auio->uio_iov[curpage].iov_len && release_pages) || (length == 0 && iovno >= nio))) release_full_page(pp, pageoff); if (length == 0 && iovno >= nio) goto done; } } } while (moredata); done: osi_FreeSmallSpace(rxiov); return result; }
int afs_MemRead(struct vcache *avc, struct uio *auio, afs_ucred_t *acred, daddr_t albn, struct buf **abpp, int noLock) { afs_size_t totalLength; afs_size_t transferLength; afs_size_t filePos; afs_size_t offset, tlen; afs_size_t len = 0; afs_int32 trimlen; struct dcache *tdc = 0; afs_int32 error, trybusy = 1; afs_int32 code; struct vrequest *treq = NULL; #ifdef AFS_DARWIN80_ENV uio_t tuiop = NULL; #else struct uio tuio; struct uio *tuiop = &tuio; struct iovec *tvec; memset(&tuio, 0, sizeof(tuio)); #endif AFS_STATCNT(afs_MemRead); if (avc->vc_error) return EIO; /* check that we have the latest status info in the vnode cache */ if ((code = afs_CreateReq(&treq, acred))) return code; if (!noLock) { code = afs_VerifyVCache(avc, treq); if (code) { code = afs_CheckCode(code, treq, 8); /* failed to get it */ afs_DestroyReq(treq); return code; } } #ifndef AFS_VM_RDWR_ENV if (AFS_NFSXLATORREQ(acred)) { if (!afs_AccessOK (avc, PRSFS_READ, treq, CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) { code = afs_CheckCode(EACCES, treq, 9); afs_DestroyReq(treq); return code; } } #endif #ifndef AFS_DARWIN80_ENV tvec = (struct iovec *)osi_AllocSmallSpace(sizeof(struct iovec)); memset(tvec, 0, sizeof(struct iovec)); #endif totalLength = AFS_UIO_RESID(auio); filePos = AFS_UIO_OFFSET(auio); afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32, totalLength, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); error = 0; transferLength = 0; if (!noLock) ObtainReadLock(&avc->lock); #if defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV) if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) { hset(avc->flushDV, avc->f.m.DataVersion); } #endif /* * Locks held: * avc->lock(R) */ /* This bit is bogus. We're checking to see if the read goes past the * end of the file. If so, we should be zeroing out all of the buffers * that the client has passed into us (there is a danger that we may leak * kernel memory if we do not). However, this behaviour is disabled by * not setting len before this segment runs, and by setting len to 0 * immediately we enter it. In addition, we also need to check for a read * which partially goes off the end of the file in the while loop below. */ if (filePos >= avc->f.m.Length) { if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ len = 0; #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); } while (avc->f.m.Length > 0 && totalLength > 0) { /* read all of the cached info */ if (filePos >= avc->f.m.Length) break; /* all done */ if (noLock) { if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); } tdc = afs_FindDCache(avc, filePos); if (tdc) { ObtainReadLock(&tdc->lock); offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->f.chunkBytes - offset; } } else { int versionOk; /* a tricky question: does the presence of the DFFetching flag * mean that we're fetching the latest version of the file? No. * The server could update the file as soon as the fetch responsible * for the setting of the DFFetching flag completes. * * However, the presence of the DFFetching flag (visible under * a dcache read lock since it is set and cleared only under a * dcache write lock) means that we're fetching as good a version * as was known to this client at the time of the last call to * afs_VerifyVCache, since the latter updates the stat cache's * m.DataVersion field under a vcache write lock, and from the * time that the DFFetching flag goes on in afs_GetDCache (before * the fetch starts), to the time it goes off (after the fetch * completes), afs_GetDCache keeps at least a read lock on the * vcache entry. * * This means that if the DFFetching flag is set, we can use that * data for any reads that must come from the current version of * the file (current == m.DataVersion). * * Another way of looking at this same point is this: if we're * fetching some data and then try do an afs_VerifyVCache, the * VerifyVCache operation will not complete until after the * DFFetching flag is turned off and the dcache entry's f.versionNo * field is updated. * * Note, by the way, that if DFFetching is set, * m.DataVersion > f.versionNo (the latter is not updated until * after the fetch completes). */ if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); /* before reusing tdc */ } #ifdef STRUCT_TASK_STRUCT_HAS_CRED try_background: #endif tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 2); ObtainReadLock(&tdc->lock); /* now, first try to start transfer, if we'll need the data. If * data already coming, we don't need to do this, obviously. Type * 2 requests never return a null dcache entry, btw. */ if (!(tdc->dflags & DFFetching) && !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) { /* have cache entry, it is not coming in now, * and we'll need new data */ tagain: #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (trybusy && (!afs_BBusy() || (afs_protocols & VICEP_ACCESS))) { #else if (trybusy && !afs_BBusy()) { #endif struct brequest *bp; /* daemon is not busy */ ObtainSharedLock(&tdc->mflock, 665); if (!(tdc->mflags & DFFetchReq)) { int dontwait = B_DONTWAIT; /* start the daemon (may already be running, however) */ UpgradeSToWLock(&tdc->mflock, 666); tdc->mflags |= DFFetchReq; #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) dontwait = 0; #endif bp = afs_BQueue(BOP_FETCH, avc, dontwait, 0, acred, (afs_size_t) filePos, (afs_size_t) 0, tdc, (void *)0, (void *)0); if (!bp) { tdc->mflags &= ~DFFetchReq; trybusy = 0; /* Avoid bkg daemon since they're too busy */ ReleaseWriteLock(&tdc->mflock); goto tagain; } ConvertWToSLock(&tdc->mflock); /* don't use bp pointer! */ } code = 0; ConvertSToRLock(&tdc->mflock); while (!code && tdc->mflags & DFFetchReq) { afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); /* don't need waiting flag on this one */ ReleaseReadLock(&tdc->mflock); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); ObtainReadLock(&tdc->mflock); } ReleaseReadLock(&tdc->mflock); if (code) { error = code; break; } } } /* now data may have started flowing in (if DFFetching is on). If * data is now streaming in, then wait for some interesting stuff. */ code = 0; while (!code && (tdc->dflags & DFFetching) && tdc->validPos <= filePos) { /* too early: wait for DFFetching flag to vanish, * or data to appear */ afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); } if (code) { error = code; break; } /* fetching flag gone, data is here, or we never tried * (BBusy for instance) */ len = tdc->validPos - filePos; versionOk = hsame(avc->f.m.DataVersion, tdc->f.versionNo) ? 1 : 0; if (tdc->dflags & DFFetching) { /* still fetching, some new data is here: * compute length and offset */ offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* no longer fetching, verify data version * (avoid new GetDCache call) */ if (versionOk && len > 0) { offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* don't have current data, so get it below */ afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO, ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_HYPER, &avc->f.m.DataVersion, ICL_TYPE_HYPER, &tdc->f.versionNo); #if 0 #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { printf("afs_read: DV mismatch? %d instead of %d for %u.%u.%u\n", tdc->f.versionNo.low, avc->f.m.DataVersion.low, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); printf("afs_read: validPos %llu filePos %llu totalLength %lld m.Length %llu noLock %d\n", tdc->validPos, filePos, totalLength, avc->f.m.Length, noLock); printf("afs_read: or len too low? %lld for %u.%u.%u\n", len, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); } #endif #endif ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); tdc = NULL; } } if (!tdc) { #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { /* avoid foreground fetch */ if (!versionOk) { printf("afs_read: avoid forground %u.%u.%u\n", avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); goto try_background; } #if 0 printf("afs_read: forground %u.%u.%u\n", avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); #endif } #endif /* If we get here, it was not possible to start the * background daemon. With flag == 1 afs_GetDCache * does the FetchData rpc synchronously. */ ReleaseReadLock(&avc->lock); tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 1); ObtainReadLock(&avc->lock); if (tdc) ObtainReadLock(&tdc->lock); } } afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset), ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len)); if (!tdc) { error = EIO; break; } /* * Locks held: * avc->lock(R) * tdc->lock(R) */ if (len > totalLength) len = totalLength; /* will read len bytes */ if (len <= 0) { /* shouldn't get here if DFFetching is on */ /* read past the end of a chunk, may not be at next chunk yet, and yet * also not at eof, so may have to supply fake zeros */ len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset; /* bytes left in chunk addr space */ if (len > totalLength) len = totalLength; /* and still within xfr request */ tlen = avc->f.m.Length - offset; /* and still within file */ if (len > tlen) len = tlen; if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); if (code) { error = code; break; } } else { /* get the data from the mem cache */ /* mung uio structure to be right for this transfer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); uio_setoffset(tuiop, offset); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); tuio.afsio_offset = offset; #endif code = afs_MemReadUIO(&tdc->f.inode, tuiop); if (code) { error = code; break; } } /* otherwise we've read some, fixup length, etc and continue with next seg */ len = len - AFS_UIO_RESID(tuiop); /* compute amount really transferred */ trimlen = len; afsio_skip(auio, trimlen); /* update input uio structure */ totalLength -= len; transferLength += len; filePos += len; if (len <= 0) break; /* surprise eof */ #ifdef AFS_DARWIN80_ENV if (tuiop) { uio_free(tuiop); tuiop = 0; } #endif } /* the whole while loop */ /* * Locks held: * avc->lock(R) * tdc->lock(R) if tdc */ /* if we make it here with tdc non-zero, then it is the last chunk we * dealt with, and we have to release it when we're done. We hold on * to it in case we need to do a prefetch. */ if (tdc) { ReleaseReadLock(&tdc->lock); /* * try to queue prefetch, if needed. If DataVersion is zero there * should not be any more: files with DV 0 never have been stored * on the fileserver, symbolic links and directories never require * more than a single chunk. */ if (!noLock && !(hiszero(avc->f.m.DataVersion)) && #ifndef AFS_VM_RDWR_ENV afs_preCache #else 1 #endif ) { afs_PrefetchChunk(avc, tdc, acred, treq); } afs_PutDCache(tdc); } if (!noLock) ReleaseReadLock(&avc->lock); #ifdef AFS_DARWIN80_ENV if (tuiop) uio_free(tuiop); #else osi_FreeSmallSpace(tvec); #endif error = afs_CheckCode(error, treq, 10); afs_DestroyReq(treq); return error; } /* called with the dcache entry triggering the fetch, the vcache entry involved, * and a vrequest for the read call. Marks the dcache entry as having already * triggered a prefetch, starts the prefetch going and sets the DFFetchReq * flag in the prefetched block, so that the next call to read knows to wait * for the daemon to start doing things. * * This function must be called with the vnode at least read-locked, and * no locks on the dcache, because it plays around with dcache entries. */ void afs_PrefetchChunk(struct vcache *avc, struct dcache *adc, afs_ucred_t *acred, struct vrequest *areq) { struct dcache *tdc; afs_size_t offset; afs_size_t j1, j2; /* junk vbls for GetDCache to trash */ offset = adc->f.chunk + 1; /* next chunk we'll need */ offset = AFS_CHUNKTOBASE(offset); /* base of next chunk */ ObtainReadLock(&adc->lock); ObtainSharedLock(&adc->mflock, 662); if (offset < avc->f.m.Length && !(adc->mflags & DFNextStarted) && !afs_BBusy()) { struct brequest *bp; UpgradeSToWLock(&adc->mflock, 663); adc->mflags |= DFNextStarted; /* we've tried to prefetch for this guy */ ReleaseWriteLock(&adc->mflock); ReleaseReadLock(&adc->lock); tdc = afs_GetDCache(avc, offset, areq, &j1, &j2, 2); /* type 2 never returns 0 */ /* * In disconnected mode, type 2 can return 0 because it doesn't * make any sense to allocate a dcache we can never fill */ if (tdc == NULL) return; ObtainSharedLock(&tdc->mflock, 651); if (!(tdc->mflags & DFFetchReq)) { /* ask the daemon to do the work */ UpgradeSToWLock(&tdc->mflock, 652); tdc->mflags |= DFFetchReq; /* guaranteed to be cleared by BKG or GetDCache */ /* last parm (1) tells bkg daemon to do an afs_PutDCache when it is done, * since we don't want to wait for it to finish before doing so ourselves. */ bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred, (afs_size_t) offset, (afs_size_t) 1, tdc, (void *)0, (void *)0); if (!bp) { /* Bkg table full; just abort non-important prefetching to avoid deadlocks */ tdc->mflags &= ~DFFetchReq; ReleaseWriteLock(&tdc->mflock); afs_PutDCache(tdc); /* * DCLOCKXXX: This is a little sketchy, since someone else * could have already started a prefetch.. In practice, * this probably doesn't matter; at most it would cause an * extra slot in the BKG table to be used up when someone * prefetches this for the second time. */ ObtainReadLock(&adc->lock); ObtainWriteLock(&adc->mflock, 664); adc->mflags &= ~DFNextStarted; ReleaseWriteLock(&adc->mflock); ReleaseReadLock(&adc->lock); } else { ReleaseWriteLock(&tdc->mflock); } } else { ReleaseSharedLock(&tdc->mflock); afs_PutDCache(tdc); } } else { ReleaseSharedLock(&adc->mflock); ReleaseReadLock(&adc->lock); } } int afs_UFSRead(struct vcache *avc, struct uio *auio, afs_ucred_t *acred, daddr_t albn, struct buf **abpp, int noLock) { afs_size_t totalLength; afs_size_t transferLength; afs_size_t filePos; afs_size_t offset, tlen; afs_size_t len = 0; afs_int32 trimlen; struct dcache *tdc = 0; afs_int32 error; struct osi_file *tfile; afs_int32 code; int trybusy = 1; struct vrequest *treq = NULL; #ifdef AFS_DARWIN80_ENV uio_t tuiop=NULL; #else struct uio tuio; struct uio *tuiop = &tuio; struct iovec *tvec; memset(&tuio, 0, sizeof(tuio)); #endif AFS_STATCNT(afs_UFSRead); if (avc && avc->vc_error) return EIO; AFS_DISCON_LOCK(); /* check that we have the latest status info in the vnode cache */ if ((code = afs_CreateReq(&treq, acred))) return code; if (!noLock) { if (!avc) osi_Panic("null avc in afs_UFSRead"); else { code = afs_VerifyVCache(avc, treq); if (code) { code = afs_CheckCode(code, treq, 11); /* failed to get it */ afs_DestroyReq(treq); AFS_DISCON_UNLOCK(); return code; } } } #ifndef AFS_VM_RDWR_ENV if (AFS_NFSXLATORREQ(acred)) { if (!afs_AccessOK (avc, PRSFS_READ, treq, CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) { AFS_DISCON_UNLOCK(); code = afs_CheckCode(EACCES, treq, 12); afs_DestroyReq(treq); return code; } } #endif #ifndef AFS_DARWIN80_ENV tvec = (struct iovec *)osi_AllocSmallSpace(sizeof(struct iovec)); memset(tvec, 0, sizeof(struct iovec)); #endif totalLength = AFS_UIO_RESID(auio); filePos = AFS_UIO_OFFSET(auio); afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32, totalLength, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); error = 0; transferLength = 0; if (!noLock) ObtainReadLock(&avc->lock); #if defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV) if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) { hset(avc->flushDV, avc->f.m.DataVersion); } #endif /* This bit is bogus. We're checking to see if the read goes past the * end of the file. If so, we should be zeroing out all of the buffers * that the client has passed into us (there is a danger that we may leak * kernel memory if we do not). However, this behaviour is disabled by * not setting len before this segment runs, and by setting len to 0 * immediately we enter it. In addition, we also need to check for a read * which partially goes off the end of the file in the while loop below. */ if (filePos >= avc->f.m.Length) { if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ len = 0; #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); } while (avc->f.m.Length > 0 && totalLength > 0) { /* read all of the cached info */ if (filePos >= avc->f.m.Length) break; /* all done */ if (noLock) { if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); } tdc = afs_FindDCache(avc, filePos); if (tdc) { ObtainReadLock(&tdc->lock); offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->validPos - filePos; } } else { int versionOk; /* a tricky question: does the presence of the DFFetching flag * mean that we're fetching the latest version of the file? No. * The server could update the file as soon as the fetch responsible * for the setting of the DFFetching flag completes. * * However, the presence of the DFFetching flag (visible under * a dcache read lock since it is set and cleared only under a * dcache write lock) means that we're fetching as good a version * as was known to this client at the time of the last call to * afs_VerifyVCache, since the latter updates the stat cache's * m.DataVersion field under a vcache write lock, and from the * time that the DFFetching flag goes on in afs_GetDCache (before * the fetch starts), to the time it goes off (after the fetch * completes), afs_GetDCache keeps at least a read lock on the * vcache entry. * * This means that if the DFFetching flag is set, we can use that * data for any reads that must come from the current version of * the file (current == m.DataVersion). * * Another way of looking at this same point is this: if we're * fetching some data and then try do an afs_VerifyVCache, the * VerifyVCache operation will not complete until after the * DFFetching flag is turned off and the dcache entry's f.versionNo * field is updated. * * Note, by the way, that if DFFetching is set, * m.DataVersion > f.versionNo (the latter is not updated until * after the fetch completes). */ if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); /* before reusing tdc */ } #ifdef STRUCT_TASK_STRUCT_HAS_CRED try_background: #endif tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 2); if (!tdc) { error = ENETDOWN; break; } ObtainReadLock(&tdc->lock); /* now, first try to start transfer, if we'll need the data. If * data already coming, we don't need to do this, obviously. Type * 2 requests never return a null dcache entry, btw. */ if (!(tdc->dflags & DFFetching) && !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) { /* have cache entry, it is not coming in now, and we'll need new data */ tagain: #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (trybusy && (!afs_BBusy() || (afs_protocols & VICEP_ACCESS))) { #else if (trybusy && !afs_BBusy()) { #endif struct brequest *bp; /* daemon is not busy */ ObtainSharedLock(&tdc->mflock, 667); if (!(tdc->mflags & DFFetchReq)) { int dontwait = B_DONTWAIT; UpgradeSToWLock(&tdc->mflock, 668); tdc->mflags |= DFFetchReq; #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) dontwait = 0; #endif bp = afs_BQueue(BOP_FETCH, avc, dontwait, 0, acred, (afs_size_t) filePos, (afs_size_t) 0, tdc, (void *)0, (void *)0); if (!bp) { /* Bkg table full; retry deadlocks */ tdc->mflags &= ~DFFetchReq; trybusy = 0; /* Avoid bkg daemon since they're too busy */ ReleaseWriteLock(&tdc->mflock); goto tagain; } ConvertWToSLock(&tdc->mflock); } code = 0; ConvertSToRLock(&tdc->mflock); while (!code && tdc->mflags & DFFetchReq) { afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); /* don't need waiting flag on this one */ ReleaseReadLock(&tdc->mflock); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); ObtainReadLock(&tdc->mflock); } ReleaseReadLock(&tdc->mflock); if (code) { error = code; break; } } } /* now data may have started flowing in (if DFFetching is on). If * data is now streaming in, then wait for some interesting stuff. */ code = 0; while (!code && (tdc->dflags & DFFetching) && tdc->validPos <= filePos) { /* too early: wait for DFFetching flag to vanish, * or data to appear */ afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); } if (code) { error = code; break; } /* fetching flag gone, data is here, or we never tried * (BBusy for instance) */ len = tdc->validPos - filePos; versionOk = hsame(avc->f.m.DataVersion, tdc->f.versionNo) ? 1 : 0; if (tdc->dflags & DFFetching) { /* still fetching, some new data is here: * compute length and offset */ offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* no longer fetching, verify data version (avoid new * GetDCache call) */ if (versionOk && len > 0) { offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* don't have current data, so get it below */ afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO, ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_HYPER, &avc->f.m.DataVersion, ICL_TYPE_HYPER, &tdc->f.versionNo); #if 0 #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { printf("afs_read: DV mismatch? %d instead of %d for %u.%u.%u\n", tdc->f.versionNo.low, avc->f.m.DataVersion.low, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); printf("afs_read: validPos %llu filePos %llu totalLength %d m.Length %llu noLock %d\n", tdc->validPos, filePos, totalLength, avc->f.m.Length, noLock); printf("afs_read: or len too low? %lld for %u.%u.%u\n", len, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); } #endif #endif ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); tdc = NULL; } } if (!tdc) { #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { /* avoid foreground fetch */ if (!versionOk) { printf("afs_read: avoid forground %u.%u.%u\n", avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); goto try_background; } } #endif /* If we get here, it was not possible to start the * background daemon. With flag == 1 afs_GetDCache * does the FetchData rpc synchronously. */ ReleaseReadLock(&avc->lock); tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 1); ObtainReadLock(&avc->lock); if (tdc) ObtainReadLock(&tdc->lock); } } if (!tdc) { error = EIO; break; } len = tdc->validPos - filePos; afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset), ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len)); if (len > totalLength) len = totalLength; /* will read len bytes */ if (len <= 0) { /* shouldn't get here if DFFetching is on */ afs_Trace4(afs_iclSetp, CM_TRACE_VNODEREAD2, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(tdc->validPos), ICL_TYPE_INT32, tdc->f.chunkBytes, ICL_TYPE_INT32, tdc->dflags); /* read past the end of a chunk, may not be at next chunk yet, and yet * also not at eof, so may have to supply fake zeros */ len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset; /* bytes left in chunk addr space */ if (len > totalLength) len = totalLength; /* and still within xfr request */ tlen = avc->f.m.Length - offset; /* and still within file */ if (len > tlen) len = tlen; if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); if (code) { error = code; break; } } else { /* get the data from the file */ tfile = (struct osi_file *)osi_UFSOpen(&tdc->f.inode); #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); uio_setoffset(tuiop, offset); #else /* mung uio structure to be right for this transfer */ afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); tuio.afsio_offset = offset; #endif #if defined(AFS_AIX41_ENV) AFS_GUNLOCK(); code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL, NULL, afs_osi_credp); AFS_GLOCK(); #elif defined(AFS_AIX32_ENV) code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL); /* Flush all JFS pages now for big performance gain in big file cases * If we do something like this, must check to be sure that AFS file * isn't mmapped... see afs_gn_map() for why. */ /* if (tfile->vnode->v_gnode && tfile->vnode->v_gnode->gn_seg) { many different ways to do similar things: so far, the best performing one is #2, but #1 might match it if we straighten out the confusion regarding which pages to flush. It really does matter. 1. vm_flushp(tfile->vnode->v_gnode->gn_seg, 0, len/PAGESIZE - 1); 2. vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE, (len + PAGESIZE-1)/PAGESIZE); 3. vms_inactive(tfile->vnode->v_gnode->gn_seg) Doesn't work correctly 4. vms_delete(tfile->vnode->v_gnode->gn_seg) probably also fails tfile->vnode->v_gnode->gn_seg = NULL; 5. deletep 6. ipgrlse 7. ifreeseg Unfortunately, this seems to cause frequent "cache corruption" episodes. vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE, (len + PAGESIZE-1)/PAGESIZE); } */ #elif defined(AFS_AIX_ENV) code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, (off_t) & offset, &tuio, NULL, NULL, -1); #elif defined(AFS_SUN5_ENV) AFS_GUNLOCK(); #ifdef AFS_SUN510_ENV VOP_RWLOCK(tfile->vnode, 0, NULL); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp, NULL); VOP_RWUNLOCK(tfile->vnode, 0, NULL); #else VOP_RWLOCK(tfile->vnode, 0); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_RWUNLOCK(tfile->vnode, 0); #endif AFS_GLOCK(); #elif defined(AFS_SGI_ENV) AFS_GUNLOCK(); AFS_VOP_RWLOCK(tfile->vnode, VRWLOCK_READ); AFS_VOP_READ(tfile->vnode, &tuio, IO_ISLOCKED, afs_osi_credp, code); AFS_VOP_RWUNLOCK(tfile->vnode, VRWLOCK_READ); AFS_GLOCK(); #elif defined(AFS_HPUX100_ENV) AFS_GUNLOCK(); code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp); AFS_GLOCK(); #elif defined(AFS_LINUX20_ENV) AFS_GUNLOCK(); code = osi_rdwr(tfile, &tuio, UIO_READ); AFS_GLOCK(); #elif defined(AFS_DARWIN80_ENV) AFS_GUNLOCK(); code = VNOP_READ(tfile->vnode, tuiop, 0, afs_osi_ctxtp); AFS_GLOCK(); #elif defined(AFS_DARWIN_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, current_proc()); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, current_proc()); AFS_GLOCK(); #elif defined(AFS_FBSD80_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0); AFS_GLOCK(); #elif defined(AFS_FBSD_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curthread); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, curthread); AFS_GLOCK(); #elif defined(AFS_NBSD_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0); AFS_GLOCK(); #elif defined(AFS_XBSD_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curproc); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, curproc); AFS_GLOCK(); #else code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp); #endif osi_UFSClose(tfile); if (code) { error = code; break; } } /* otherwise we've read some, fixup length, etc and continue with next seg */ len = len - AFS_UIO_RESID(tuiop); /* compute amount really transferred */ trimlen = len; afsio_skip(auio, trimlen); /* update input uio structure */ totalLength -= len; transferLength += len; filePos += len; if (len <= 0) break; /* surprise eof */ #ifdef AFS_DARWIN80_ENV if (tuiop) { uio_free(tuiop); tuiop = 0; } #endif } /* if we make it here with tdc non-zero, then it is the last chunk we * dealt with, and we have to release it when we're done. We hold on * to it in case we need to do a prefetch, obviously. */ if (tdc) { ReleaseReadLock(&tdc->lock); #if !defined(AFS_VM_RDWR_ENV) /* * try to queue prefetch, if needed. If DataVersion is zero there * should not be any more: files with DV 0 never have been stored * on the fileserver, symbolic links and directories never require * more than a single chunk. */ if (!noLock && !(hiszero(avc->f.m.DataVersion))) { if (!(tdc->mflags & DFNextStarted)) afs_PrefetchChunk(avc, tdc, acred, treq); } #endif afs_PutDCache(tdc); } if (!noLock) ReleaseReadLock(&avc->lock); #ifdef AFS_DARWIN80_ENV if (tuiop) uio_free(tuiop); #else osi_FreeSmallSpace(tvec); #endif AFS_DISCON_UNLOCK(); error = afs_CheckCode(error, treq, 13); afs_DestroyReq(treq); return error; }
/* Note that we don't set CDirty here, this is OK because the unlink * RPC is called synchronously */ int afs_remove(OSI_VC_DECL(adp), char *aname, afs_ucred_t *acred) { struct vrequest treq; register struct dcache *tdc; struct VenusFid unlinkFid; register afs_int32 code; register struct vcache *tvc; afs_size_t offset, len; struct afs_fakestat_state fakestate; OSI_VC_CONVERT(adp); AFS_STATCNT(afs_remove); afs_Trace2(afs_iclSetp, CM_TRACE_REMOVE, ICL_TYPE_POINTER, adp, ICL_TYPE_STRING, aname); if ((code = afs_InitReq(&treq, acred))) { return code; } afs_InitFakeStat(&fakestate); AFS_DISCON_LOCK(); code = afs_EvalFakeStat(&adp, &fakestate, &treq); if (code) goto done; /* Check if this is dynroot */ if (afs_IsDynroot(adp)) { code = afs_DynrootVOPRemove(adp, acred, aname); goto done; } if (afs_IsDynrootMount(adp)) { code = ENOENT; goto done; } if (strlen(aname) > AFSNAMEMAX) { code = ENAMETOOLONG; goto done; } tagain: code = afs_VerifyVCache(adp, &treq); tvc = NULL; if (code) { code = afs_CheckCode(code, &treq, 23); goto done; } /** If the volume is read-only, return error without making an RPC to the * fileserver */ if (adp->f.states & CRO) { code = EROFS; goto done; } /* If we're running disconnected without logging, go no further... */ if (AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW) { code = ENETDOWN; goto done; } tdc = afs_GetDCache(adp, (afs_size_t) 0, &treq, &offset, &len, 1); /* test for error below */ ObtainWriteLock(&adp->lock, 142); if (tdc) ObtainSharedLock(&tdc->lock, 638); /* * Make sure that the data in the cache is current. We may have * received a callback while we were waiting for the write lock. */ if (!(adp->f.states & CStatd) || (tdc && !hsame(adp->f.m.DataVersion, tdc->f.versionNo))) { ReleaseWriteLock(&adp->lock); if (tdc) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } goto tagain; } unlinkFid.Fid.Vnode = 0; if (!tvc) { tvc = osi_dnlc_lookup(adp, aname, WRITE_LOCK); } /* This should not be necessary since afs_lookup() has already * done the work. */ if (!tvc) if (tdc) { code = afs_dir_Lookup(tdc, aname, &unlinkFid.Fid); if (code == 0) { afs_int32 cached = 0; unlinkFid.Cell = adp->f.fid.Cell; unlinkFid.Fid.Volume = adp->f.fid.Fid.Volume; if (unlinkFid.Fid.Unique == 0) { tvc = afs_LookupVCache(&unlinkFid, &treq, &cached, adp, aname); } else { ObtainReadLock(&afs_xvcache); tvc = afs_FindVCache(&unlinkFid, 0, DO_STATS); ReleaseReadLock(&afs_xvcache); } } } if (AFS_IS_DISCON_RW) { if (!adp->f.shadow.vnode && !(adp->f.ddirty_flags & VDisconCreate)) { /* Make shadow copy of parent dir. */ afs_MakeShadowDir(adp, tdc); } /* Can't hold a dcache lock whilst we're getting a vcache one */ if (tdc) ReleaseSharedLock(&tdc->lock); /* XXX - We're holding adp->lock still, and we've got no * guarantee about whether the ordering matches the lock hierarchy */ ObtainWriteLock(&tvc->lock, 713); /* If we were locally created, then we don't need to do very * much beyond ensuring that we don't exist anymore */ if (tvc->f.ddirty_flags & VDisconCreate) { afs_DisconRemoveDirty(tvc); } else { /* Add removed file vcache to dirty list. */ afs_DisconAddDirty(tvc, VDisconRemove, 1); } adp->f.m.LinkCount--; ReleaseWriteLock(&tvc->lock); if (tdc) ObtainSharedLock(&tdc->lock, 714); } if (tvc && osi_Active(tvc)) { /* about to delete whole file, prefetch it first */ ReleaseWriteLock(&adp->lock); if (tdc) ReleaseSharedLock(&tdc->lock); ObtainWriteLock(&tvc->lock, 143); FetchWholeEnchilada(tvc, &treq); ReleaseWriteLock(&tvc->lock); ObtainWriteLock(&adp->lock, 144); /* Technically I don't think we need this back, but let's hold it anyway; The "got" reference should actually be sufficient. */ if (tdc) ObtainSharedLock(&tdc->lock, 640); } osi_dnlc_remove(adp, aname, tvc); Tadp1 = adp; #ifndef AFS_DARWIN80_ENV Tadpr = VREFCOUNT(adp); #endif Ttvc = tvc; Tnam = aname; Tnam1 = 0; #ifndef AFS_DARWIN80_ENV if (tvc) Ttvcr = VREFCOUNT(tvc); #endif #ifdef AFS_AIX_ENV if (tvc && VREFCOUNT_GT(tvc, 2) && tvc->opens > 0 && !(tvc->f.states & CUnlinked)) { #else if (tvc && VREFCOUNT_GT(tvc, 1) && tvc->opens > 0 && !(tvc->f.states & CUnlinked)) { #endif char *unlname = afs_newname(); ReleaseWriteLock(&adp->lock); if (tdc) ReleaseSharedLock(&tdc->lock); code = afsrename(adp, aname, adp, unlname, acred, &treq); Tnam1 = unlname; if (!code) { struct VenusFid *oldmvid = NULL; if (tvc->mvid) oldmvid = tvc->mvid; tvc->mvid = (struct VenusFid *)unlname; if (oldmvid) osi_FreeSmallSpace(oldmvid); crhold(acred); if (tvc->uncred) { crfree(tvc->uncred); } tvc->uncred = acred; tvc->f.states |= CUnlinked; /* if rename succeeded, remove should not */ ObtainWriteLock(&tvc->lock, 715); if (tvc->f.ddirty_flags & VDisconRemove) { tvc->f.ddirty_flags &= ~VDisconRemove; } ReleaseWriteLock(&tvc->lock); } else { osi_FreeSmallSpace(unlname); } if (tdc) afs_PutDCache(tdc); afs_PutVCache(tvc); } else { code = afsremove(adp, tdc, tvc, aname, acred, &treq); } done: afs_PutFakeStat(&fakestate); #ifndef AFS_DARWIN80_ENV /* we can't track by thread, it's not exported in the KPI; only do this on !macos */ osi_Assert(!WriteLocked(&adp->lock) || (adp->lock.pid_writer != MyPidxx)); #endif AFS_DISCON_UNLOCK(); return code; } /* afs_remunlink -- This tries to delete the file at the server after it has * been renamed when unlinked locally but now has been finally released. * * CAUTION -- may be called with avc unheld. */ int afs_remunlink(register struct vcache *avc, register int doit) { afs_ucred_t *cred; char *unlname; struct vcache *adp; struct vrequest treq; struct VenusFid dirFid; register struct dcache *tdc; afs_int32 code = 0; if (NBObtainWriteLock(&avc->lock, 423)) return 0; #if defined(AFS_DARWIN80_ENV) if (vnode_get(AFSTOV(avc))) { ReleaseWriteLock(&avc->lock); return 0; } #endif if (avc->mvid && (doit || (avc->f.states & CUnlinkedDel))) { if ((code = afs_InitReq(&treq, avc->uncred))) { ReleaseWriteLock(&avc->lock); } else { /* Must bump the refCount because GetVCache may block. * Also clear mvid so no other thread comes here if we block. */ unlname = (char *)avc->mvid; avc->mvid = NULL; cred = avc->uncred; avc->uncred = NULL; #if defined(AFS_DARWIN_ENV) && !defined(AFS_DARWIN80_ENV) VREF(AFSTOV(avc)); #else AFS_FAST_HOLD(avc); #endif /* We'll only try this once. If it fails, just release the vnode. * Clear after doing hold so that NewVCache doesn't find us yet. */ avc->f.states &= ~(CUnlinked | CUnlinkedDel); ReleaseWriteLock(&avc->lock); dirFid.Cell = avc->f.fid.Cell; dirFid.Fid.Volume = avc->f.fid.Fid.Volume; dirFid.Fid.Vnode = avc->f.parent.vnode; dirFid.Fid.Unique = avc->f.parent.unique; adp = afs_GetVCache(&dirFid, &treq, NULL, NULL); if (adp) { tdc = afs_FindDCache(adp, (afs_size_t) 0); ObtainWriteLock(&adp->lock, 159); if (tdc) ObtainSharedLock(&tdc->lock, 639); /* afsremove releases the adp & tdc locks, and does vn_rele(avc) */ code = afsremove(adp, tdc, avc, unlname, cred, &treq); afs_PutVCache(adp); } else { /* we failed - and won't be back to try again. */ afs_PutVCache(avc); } osi_FreeSmallSpace(unlname); crfree(cred); } } else { #if defined(AFS_DARWIN80_ENV) vnode_put(AFSTOV(avc)); #endif ReleaseWriteLock(&avc->lock); } return code; }
/* don't set CDirty in here because RPC is called synchronously */ int afs_symlink(OSI_VC_DECL(adp), char *aname, struct vattr *attrs, char *atargetName, struct vcache **tvcp, afs_ucred_t *acred) { afs_uint32 now = 0; struct vrequest *treq = NULL; afs_int32 code = 0; struct afs_conn *tc; struct VenusFid newFid; struct dcache *tdc; afs_size_t offset, len; afs_int32 alen; struct server *hostp = 0; struct vcache *tvc; struct AFSStoreStatus InStatus; struct AFSFetchStatus *OutFidStatus, *OutDirStatus; struct AFSCallBack CallBack; struct AFSVolSync tsync; struct volume *volp = 0; struct afs_fakestat_state fakestate; struct rx_connection *rxconn; XSTATS_DECLS; OSI_VC_CONVERT(adp); AFS_STATCNT(afs_symlink); afs_Trace2(afs_iclSetp, CM_TRACE_SYMLINK, ICL_TYPE_POINTER, adp, ICL_TYPE_STRING, aname); OutFidStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus)); OutDirStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus)); memset(&InStatus, 0, sizeof(InStatus)); if ((code = afs_CreateReq(&treq, acred))) goto done2; afs_InitFakeStat(&fakestate); AFS_DISCON_LOCK(); code = afs_EvalFakeStat(&adp, &fakestate, treq); if (code) goto done; if (strlen(aname) > AFSNAMEMAX || strlen(atargetName) > AFSPATHMAX) { code = ENAMETOOLONG; goto done; } if (afs_IsDynroot(adp)) { code = afs_DynrootVOPSymlink(adp, acred, aname, atargetName); goto done; } if (afs_IsDynrootMount(adp)) { code = EROFS; goto done; } code = afs_VerifyVCache(adp, treq); if (code) { code = afs_CheckCode(code, treq, 30); goto done; } /** If the volume is read-only, return error without making an RPC to the * fileserver */ if (adp->f.states & CRO) { code = EROFS; goto done; } if (AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW) { code = ENETDOWN; goto done; } InStatus.Mask = AFS_SETMODTIME | AFS_SETMODE; InStatus.ClientModTime = osi_Time(); alen = strlen(atargetName); /* we want it to include the null */ if ( (*atargetName == '#' || *atargetName == '%') && alen > 1 && atargetName[alen-1] == '.') { InStatus.UnixModeBits = 0644; /* mt pt: null from "." at end */ if (alen == 1) alen++; /* Empty string */ } else { InStatus.UnixModeBits = 0755; alen++; /* add in the null */ } tdc = afs_GetDCache(adp, (afs_size_t) 0, treq, &offset, &len, 1); volp = afs_FindVolume(&adp->f.fid, READ_LOCK); /*parent is also in same vol */ ObtainWriteLock(&adp->lock, 156); if (tdc) ObtainWriteLock(&tdc->lock, 636); /* No further locks: if the SymLink succeeds, it does not matter what happens * to our local copy of the directory. If somebody tampers with it in the meantime, * the copy will be invalidated */ if (!AFS_IS_DISCON_RW) { do { tc = afs_Conn(&adp->f.fid, treq, SHARED_LOCK, &rxconn); if (tc) { hostp = tc->parent->srvr->server; XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_SYMLINK); if (adp->f.states & CForeign) { now = osi_Time(); RX_AFS_GUNLOCK(); code = RXAFS_DFSSymlink(rxconn, (struct AFSFid *)&adp->f.fid.Fid, aname, atargetName, &InStatus, (struct AFSFid *)&newFid.Fid, OutFidStatus, OutDirStatus, &CallBack, &tsync); RX_AFS_GLOCK(); } else { RX_AFS_GUNLOCK(); code = RXAFS_Symlink(rxconn, (struct AFSFid *)&adp->f.fid.Fid, aname, atargetName, &InStatus, (struct AFSFid *)&newFid.Fid, OutFidStatus, OutDirStatus, &tsync); RX_AFS_GLOCK(); } XSTATS_END_TIME; } else code = -1; } while (afs_Analyze (tc, rxconn, code, &adp->f.fid, treq, AFS_STATS_FS_RPCIDX_SYMLINK, SHARED_LOCK, NULL)); } else { newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; afs_GenFakeFid(&newFid, VREG, 0); } ObtainWriteLock(&afs_xvcache, 40); if (code) { if (code < 0) { afs_StaleVCache(adp); } ReleaseWriteLock(&adp->lock); ReleaseWriteLock(&afs_xvcache); if (tdc) { ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); } goto done; } /* otherwise, we should see if we can make the change to the dir locally */ if (AFS_IS_DISCON_RW || afs_LocalHero(adp, tdc, OutDirStatus, 1)) { /* we can do it locally */ ObtainWriteLock(&afs_xdcache, 293); /* If the following fails because the name has been created in the meantime, the * directory is out-of-date - the file server knows best! */ code = afs_dir_Create(tdc, aname, &newFid.Fid); ReleaseWriteLock(&afs_xdcache); if (code && !AFS_IS_DISCON_RW) { ZapDCE(tdc); /* surprise error -- use invalid value */ DZap(tdc); } } if (tdc) { ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); } newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; ReleaseWriteLock(&adp->lock); /* now we're done with parent dir, create the link's entry. Note that * no one can get a pointer to the new cache entry until we release * the xvcache lock. */ tvc = afs_NewVCache(&newFid, hostp); if (!tvc) { code = -2; ReleaseWriteLock(&afs_xvcache); goto done; } ObtainWriteLock(&tvc->lock, 157); ObtainWriteLock(&afs_xcbhash, 500); tvc->f.states |= CStatd; /* have valid info */ tvc->f.states &= ~CBulkFetching; if (adp->f.states & CForeign) { tvc->f.states |= CForeign; /* We don't have to worry about losing the callback since we're doing it * under the afs_xvcache lock actually, afs_NewVCache may drop the * afs_xvcache lock, if it calls afs_FlushVCache */ tvc->cbExpires = CallBack.ExpirationTime + now; afs_QueueCallback(tvc, CBHash(CallBack.ExpirationTime), volp); } else { tvc->cbExpires = 0x7fffffff; /* never expires, they can't change */ /* since it never expires, we don't have to queue the callback */ } ReleaseWriteLock(&afs_xcbhash); if (AFS_IS_DISCON_RW) { attrs->va_mode = InStatus.UnixModeBits; afs_GenDisconStatus(adp, tvc, &newFid, attrs, treq, VLNK); code = afs_DisconCreateSymlink(tvc, atargetName, treq); if (code) { /* XXX - When this goes wrong, we need to tidy up the changes we made to * the parent, and get rid of the vcache we just created */ ReleaseWriteLock(&tvc->lock); ReleaseWriteLock(&afs_xvcache); afs_PutVCache(tvc); goto done; } afs_DisconAddDirty(tvc, VDisconCreate, 0); } else { afs_ProcessFS(tvc, OutFidStatus, treq); } if (!tvc->linkData) { tvc->linkData = afs_osi_Alloc(alen); osi_Assert(tvc->linkData != NULL); strncpy(tvc->linkData, atargetName, alen - 1); tvc->linkData[alen - 1] = 0; } ReleaseWriteLock(&tvc->lock); ReleaseWriteLock(&afs_xvcache); if (tvcp) *tvcp = tvc; else afs_PutVCache(tvc); code = 0; done: afs_PutFakeStat(&fakestate); if (volp) afs_PutVolume(volp, READ_LOCK); AFS_DISCON_UNLOCK(); code = afs_CheckCode(code, treq, 31); afs_DestroyReq(treq); done2: osi_FreeSmallSpace(OutFidStatus); osi_FreeSmallSpace(OutDirStatus); return code; }
int afs_UFSRead(register struct vcache *avc, struct uio *auio, struct AFS_UCRED *acred, daddr_t albn, struct buf **abpp, int noLock) { afs_size_t totalLength; afs_size_t transferLength; afs_size_t filePos; afs_size_t offset, len, tlen; afs_int32 trimlen; struct dcache *tdc = 0; afs_int32 error; #ifdef AFS_DARWIN80_ENV uio_t tuiop=NULL; #else struct uio tuio; struct uio *tuiop = &tuio; struct iovec *tvec; #endif struct osi_file *tfile; afs_int32 code; int trybusy = 1; struct vrequest treq; AFS_STATCNT(afs_UFSRead); if (avc && avc->vc_error) return EIO; AFS_DISCON_LOCK(); /* check that we have the latest status info in the vnode cache */ if ((code = afs_InitReq(&treq, acred))) return code; if (!noLock) { if (!avc) osi_Panic("null avc in afs_UFSRead"); else { code = afs_VerifyVCache(avc, &treq); if (code) { code = afs_CheckCode(code, &treq, 11); /* failed to get it */ AFS_DISCON_UNLOCK(); return code; } } } #ifndef AFS_VM_RDWR_ENV if (AFS_NFSXLATORREQ(acred)) { if (!afs_AccessOK (avc, PRSFS_READ, &treq, CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) { AFS_DISCON_UNLOCK(); return afs_CheckCode(EACCES, &treq, 12); } } #endif #ifndef AFS_DARWIN80_ENV tvec = (struct iovec *)osi_AllocSmallSpace(sizeof(struct iovec)); #endif totalLength = AFS_UIO_RESID(auio); filePos = AFS_UIO_OFFSET(auio); afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32, totalLength, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); error = 0; transferLength = 0; if (!noLock) ObtainReadLock(&avc->lock); #if defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV) if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) { hset(avc->flushDV, avc->f.m.DataVersion); } #endif if (filePos >= avc->f.m.Length) { if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ len = 0; #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); } while (avc->f.m.Length > 0 && totalLength > 0) { /* read all of the cached info */ if (filePos >= avc->f.m.Length) break; /* all done */ if (noLock) { if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); } tdc = afs_FindDCache(avc, filePos); if (tdc) { ObtainReadLock(&tdc->lock); offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->validPos - filePos; } } else { /* a tricky question: does the presence of the DFFetching flag * mean that we're fetching the latest version of the file? No. * The server could update the file as soon as the fetch responsible * for the setting of the DFFetching flag completes. * * However, the presence of the DFFetching flag (visible under * a dcache read lock since it is set and cleared only under a * dcache write lock) means that we're fetching as good a version * as was known to this client at the time of the last call to * afs_VerifyVCache, since the latter updates the stat cache's * m.DataVersion field under a vcache write lock, and from the * time that the DFFetching flag goes on in afs_GetDCache (before * the fetch starts), to the time it goes off (after the fetch * completes), afs_GetDCache keeps at least a read lock on the * vcache entry. * * This means that if the DFFetching flag is set, we can use that * data for any reads that must come from the current version of * the file (current == m.DataVersion). * * Another way of looking at this same point is this: if we're * fetching some data and then try do an afs_VerifyVCache, the * VerifyVCache operation will not complete until after the * DFFetching flag is turned off and the dcache entry's f.versionNo * field is updated. * * Note, by the way, that if DFFetching is set, * m.DataVersion > f.versionNo (the latter is not updated until * after the fetch completes). */ if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); /* before reusing tdc */ } tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 2); #ifdef AFS_DISCON_ENV if (!tdc) { printf("Network down in afs_read"); error = ENETDOWN; break; } #endif /* AFS_DISCON_ENV */ ObtainReadLock(&tdc->lock); /* now, first try to start transfer, if we'll need the data. If * data already coming, we don't need to do this, obviously. Type * 2 requests never return a null dcache entry, btw. */ if (!(tdc->dflags & DFFetching) && !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) { /* have cache entry, it is not coming in now, and we'll need new data */ tagain: if (trybusy && !afs_BBusy()) { struct brequest *bp; /* daemon is not busy */ ObtainSharedLock(&tdc->mflock, 667); if (!(tdc->mflags & DFFetchReq)) { UpgradeSToWLock(&tdc->mflock, 668); tdc->mflags |= DFFetchReq; bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred, (afs_size_t) filePos, (afs_size_t) 0, tdc); if (!bp) { /* Bkg table full; retry deadlocks */ tdc->mflags &= ~DFFetchReq; trybusy = 0; /* Avoid bkg daemon since they're too busy */ ReleaseWriteLock(&tdc->mflock); goto tagain; } ConvertWToSLock(&tdc->mflock); } code = 0; ConvertSToRLock(&tdc->mflock); while (!code && tdc->mflags & DFFetchReq) { afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); /* don't need waiting flag on this one */ ReleaseReadLock(&tdc->mflock); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); ObtainReadLock(&tdc->mflock); } ReleaseReadLock(&tdc->mflock); if (code) { error = code; break; } } } /* now data may have started flowing in (if DFFetching is on). If * data is now streaming in, then wait for some interesting stuff. */ code = 0; while (!code && (tdc->dflags & DFFetching) && tdc->validPos <= filePos) { /* too early: wait for DFFetching flag to vanish, * or data to appear */ afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); } if (code) { error = code; break; } /* fetching flag gone, data is here, or we never tried * (BBusy for instance) */ if (tdc->dflags & DFFetching) { /* still fetching, some new data is here: * compute length and offset */ offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->validPos - filePos; } else { /* no longer fetching, verify data version (avoid new * GetDCache call) */ if (hsame(avc->f.m.DataVersion, tdc->f.versionNo) && ((len = tdc->validPos - filePos) > 0)) { offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* don't have current data, so get it below */ afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO, ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_HYPER, &avc->f.m.DataVersion, ICL_TYPE_HYPER, &tdc->f.versionNo); ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); tdc = NULL; } } if (!tdc) { /* If we get, it was not possible to start the * background daemon. With flag == 1 afs_GetDCache * does the FetchData rpc synchronously. */ ReleaseReadLock(&avc->lock); tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 1); ObtainReadLock(&avc->lock); if (tdc) ObtainReadLock(&tdc->lock); } } if (!tdc) { error = EIO; break; } len = tdc->validPos - filePos; afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset), ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len)); if (len > totalLength) len = totalLength; /* will read len bytes */ if (len <= 0) { /* shouldn't get here if DFFetching is on */ afs_Trace4(afs_iclSetp, CM_TRACE_VNODEREAD2, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(tdc->validPos), ICL_TYPE_INT32, tdc->f.chunkBytes, ICL_TYPE_INT32, tdc->dflags); /* read past the end of a chunk, may not be at next chunk yet, and yet * also not at eof, so may have to supply fake zeros */ len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset; /* bytes left in chunk addr space */ if (len > totalLength) len = totalLength; /* and still within xfr request */ tlen = avc->f.m.Length - offset; /* and still within file */ if (len > tlen) len = tlen; if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); if (code) { error = code; break; } } else { /* get the data from the file */ #ifdef IHINT if (tfile = tdc->ihint) { if (tdc->f.inode != tfile->inum) { afs_warn("afs_UFSRead: %x hint mismatch tdc %d inum %d\n", tdc, tdc->f.inode, tfile->inum); osi_UFSClose(tfile); tdc->ihint = tfile = 0; nihints--; } } if (tfile != 0) { usedihint++; } else #endif /* IHINT */ #if defined(LINUX_USE_FH) tfile = (struct osi_file *)osi_UFSOpen_fh(&tdc->f.fh, tdc->f.fh_type); #else tfile = (struct osi_file *)osi_UFSOpen(tdc->f.inode); #endif #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); uio_setoffset(tuiop, offset); #else /* mung uio structure to be right for this transfer */ afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); tuio.afsio_offset = offset; #endif #if defined(AFS_AIX41_ENV) AFS_GUNLOCK(); code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL, NULL, afs_osi_credp); AFS_GLOCK(); #elif defined(AFS_AIX32_ENV) code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL); /* Flush all JFS pages now for big performance gain in big file cases * If we do something like this, must check to be sure that AFS file * isn't mmapped... see afs_gn_map() for why. */ /* if (tfile->vnode->v_gnode && tfile->vnode->v_gnode->gn_seg) { many different ways to do similar things: so far, the best performing one is #2, but #1 might match it if we straighten out the confusion regarding which pages to flush. It really does matter. 1. vm_flushp(tfile->vnode->v_gnode->gn_seg, 0, len/PAGESIZE - 1); 2. vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE, (len + PAGESIZE-1)/PAGESIZE); 3. vms_inactive(tfile->vnode->v_gnode->gn_seg) Doesn't work correctly 4. vms_delete(tfile->vnode->v_gnode->gn_seg) probably also fails tfile->vnode->v_gnode->gn_seg = NULL; 5. deletep 6. ipgrlse 7. ifreeseg Unfortunately, this seems to cause frequent "cache corruption" episodes. vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE, (len + PAGESIZE-1)/PAGESIZE); } */ #elif defined(AFS_AIX_ENV) code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, (off_t) & offset, &tuio, NULL, NULL, -1); #elif defined(AFS_SUN5_ENV) AFS_GUNLOCK(); #ifdef AFS_SUN510_ENV { caller_context_t ct; VOP_RWLOCK(tfile->vnode, 0, &ct); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp, &ct); VOP_RWUNLOCK(tfile->vnode, 0, &ct); } #else VOP_RWLOCK(tfile->vnode, 0); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_RWUNLOCK(tfile->vnode, 0); #endif AFS_GLOCK(); #elif defined(AFS_SGI_ENV) AFS_GUNLOCK(); AFS_VOP_RWLOCK(tfile->vnode, VRWLOCK_READ); AFS_VOP_READ(tfile->vnode, &tuio, IO_ISLOCKED, afs_osi_credp, code); AFS_VOP_RWUNLOCK(tfile->vnode, VRWLOCK_READ); AFS_GLOCK(); #elif defined(AFS_OSF_ENV) tuio.uio_rw = UIO_READ; AFS_GUNLOCK(); VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp, code); AFS_GLOCK(); #elif defined(AFS_HPUX100_ENV) AFS_GUNLOCK(); code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp); AFS_GLOCK(); #elif defined(AFS_LINUX20_ENV) AFS_GUNLOCK(); code = osi_rdwr(tfile, &tuio, UIO_READ); AFS_GLOCK(); #elif defined(AFS_DARWIN80_ENV) AFS_GUNLOCK(); code = VNOP_READ(tfile->vnode, tuiop, 0, afs_osi_ctxtp); AFS_GLOCK(); #elif defined(AFS_DARWIN_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, current_proc()); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, current_proc()); AFS_GLOCK(); #elif defined(AFS_FBSD80_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0); AFS_GLOCK(); #elif defined(AFS_FBSD50_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curthread); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, curthread); AFS_GLOCK(); #elif defined(AFS_XBSD_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curproc); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, curproc); AFS_GLOCK(); #else code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp); #endif #ifdef IHINT if (!tdc->ihint && nihints < maxIHint) { tdc->ihint = tfile; nihints++; } else #endif /* IHINT */ osi_UFSClose(tfile); if (code) { error = code; break; } } /* otherwise we've read some, fixup length, etc and continue with next seg */ len = len - AFS_UIO_RESID(tuiop); /* compute amount really transferred */ trimlen = len; afsio_skip(auio, trimlen); /* update input uio structure */ totalLength -= len; transferLength += len; filePos += len; if (len <= 0) break; /* surprise eof */ #ifdef AFS_DARWIN80_ENV if (tuiop) { uio_free(tuiop); tuiop = 0; } #endif } /* if we make it here with tdc non-zero, then it is the last chunk we * dealt with, and we have to release it when we're done. We hold on * to it in case we need to do a prefetch, obviously. */ if (tdc) { ReleaseReadLock(&tdc->lock); #if !defined(AFS_VM_RDWR_ENV) /* try to queue prefetch, if needed */ if (!noLock) { if (!(tdc->mflags & DFNextStarted)) afs_PrefetchChunk(avc, tdc, acred, &treq); } #endif afs_PutDCache(tdc); } if (!noLock) ReleaseReadLock(&avc->lock); #ifdef AFS_DARWIN80_ENV if (tuiop) uio_free(tuiop); #else osi_FreeSmallSpace(tvec); #endif AFS_DISCON_UNLOCK(); error = afs_CheckCode(error, &treq, 13); return error; }
/* clid - nonzero on sgi sunos osf1 only */ int HandleFlock(struct vcache *avc, int acom, struct vrequest *areq, pid_t clid, int onlymine) { struct afs_conn *tc; struct SimpleLocks *slp, *tlp, **slpp; afs_int32 code; struct AFSVolSync tsync; afs_int32 lockType; struct AFS_FLOCK flock; XSTATS_DECLS; AFS_STATCNT(HandleFlock); code = 0; /* default when we don't make any network calls */ lockIdSet(&flock, NULL, clid); #if defined(AFS_SGI_ENV) osi_Assert(valusema(&avc->vc_rwlock) <= 0); osi_Assert(OSI_GET_LOCKID() == avc->vc_rwlockid); #endif ObtainWriteLock(&avc->lock, 118); if (acom & LOCK_UN) { int stored_segments = 0; retry_unlock: /* defect 3083 */ #ifdef AFS_AIX_ENV /* If the lock is held exclusive, then only the owning process * or a child can unlock it. Use pid and ppid because they are * unique identifiers. */ if ((avc->flockCount < 0) && (getpid() != avc->ownslock)) { #ifdef AFS_AIX41_ENV if (onlymine || (getppid() != avc->ownslock)) { #else if (onlymine || (u.u_procp->p_ppid != avc->ownslock)) { #endif ReleaseWriteLock(&avc->lock); return 0; } } #endif if (lockIdcmp2(&flock, avc, NULL, onlymine, clid)) { ReleaseWriteLock(&avc->lock); return 0; } #ifdef AFS_AIX_ENV avc->ownslock = 0; #endif if (avc->flockCount == 0) { ReleaseWriteLock(&avc->lock); return 0 /*ENOTTY*/; /* no lock held */ } /* unlock the lock */ if (avc->flockCount > 0) { slpp = &avc->slocks; for (slp = *slpp; slp;) { if (!lockIdcmp2(&flock, avc, slp, onlymine, clid)) { avc->flockCount--; tlp = *slpp = slp->next; osi_FreeSmallSpace(slp); slp = tlp; } else { slpp = &slp->next; slp = *slpp; } } } else if (avc->flockCount == -1) { if (!stored_segments) { afs_StoreAllSegments(avc, areq, AFS_SYNC | AFS_VMSYNC); /* fsync file early */ /* afs_StoreAllSegments can drop and reacquire the write lock * on avc and GLOCK, so the flocks may be completely different * now. Go back and perform all checks again. */ stored_segments = 1; goto retry_unlock; } avc->flockCount = 0; /* And remove the (only) exclusive lock entry from the list... */ osi_FreeSmallSpace(avc->slocks); avc->slocks = 0; } if (avc->flockCount == 0) { if (!AFS_IS_DISCONNECTED) { struct rx_connection *rxconn; do { tc = afs_Conn(&avc->f.fid, areq, SHARED_LOCK, &rxconn); if (tc) { XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_RELEASELOCK); RX_AFS_GUNLOCK(); code = RXAFS_ReleaseLock(rxconn, (struct AFSFid *) &avc->f.fid.Fid, &tsync); RX_AFS_GLOCK(); XSTATS_END_TIME; } else code = -1; } while (afs_Analyze (tc, rxconn, code, &avc->f.fid, areq, AFS_STATS_FS_RPCIDX_RELEASELOCK, SHARED_LOCK, NULL)); } else { /*printf("Network is dooooooowwwwwwwnnnnnnn\n");*/ code = ENETDOWN; } } } else { while (1) { /* set a new lock */ /* * Upgrading from shared locks to Exclusive and vice versa * is a bit tricky and we don't really support it yet. But * we try to support the common used one which is upgrade * a shared lock to an exclusive for the same process... */ if ((avc->flockCount > 0 && (acom & LOCK_EX)) || (avc->flockCount == -1 && (acom & LOCK_SH))) { /* * Upgrading from shared locks to an exclusive one: * For now if all the shared locks belong to the * same process then we unlock them on the server * and proceed with the upgrade. Unless we change the * server's locking interface impl we prohibit from * unlocking other processes's shared locks... * Upgrading from an exclusive lock to a shared one: * Again only allowed to be done by the same process. */ slpp = &avc->slocks; for (slp = *slpp; slp;) { if (!lockIdcmp2 (&flock, avc, slp, 1 /*!onlymine */ , clid)) { if (acom & LOCK_EX) avc->flockCount--; else avc->flockCount = 0; tlp = *slpp = slp->next; osi_FreeSmallSpace(slp); slp = tlp; } else { code = EWOULDBLOCK; slpp = &slp->next; slp = *slpp; } } if (!code && avc->flockCount == 0) { if (!AFS_IS_DISCONNECTED) { struct rx_connection *rxconn; do { tc = afs_Conn(&avc->f.fid, areq, SHARED_LOCK, &rxconn); if (tc) { XSTATS_START_TIME (AFS_STATS_FS_RPCIDX_RELEASELOCK); RX_AFS_GUNLOCK(); code = RXAFS_ReleaseLock(rxconn, (struct AFSFid *)&avc-> f.fid.Fid, &tsync); RX_AFS_GLOCK(); XSTATS_END_TIME; } else code = -1; } while (afs_Analyze (tc, rxconn, code, &avc->f.fid, areq, AFS_STATS_FS_RPCIDX_RELEASELOCK, SHARED_LOCK, NULL)); } } } else if (avc->flockCount == -1 && (acom & LOCK_EX)) { if (lockIdcmp2(&flock, avc, NULL, 1, clid)) { code = EWOULDBLOCK; } else { code = 0; /* We've just re-grabbed an exclusive lock, so we don't * need to contact the fileserver, and we don't need to * add the lock to avc->slocks (since we already have a * lock there). So, we are done. */ break; } } if (code == 0) { /* compatible here, decide if needs to go to file server. If * we've already got the file locked (and thus read-locked, since * we've already checked for compatibility), we shouldn't send * the call through to the server again */ if (avc->flockCount == 0) { struct rx_connection *rxconn; /* we're the first on our block, send the call through */ lockType = ((acom & LOCK_EX) ? LockWrite : LockRead); if (!AFS_IS_DISCONNECTED) { do { tc = afs_Conn(&avc->f.fid, areq, SHARED_LOCK, &rxconn); if (tc) { XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_SETLOCK); RX_AFS_GUNLOCK(); code = RXAFS_SetLock(rxconn, (struct AFSFid *) &avc->f.fid.Fid, lockType, &tsync); RX_AFS_GLOCK(); XSTATS_END_TIME; } else code = -1; } while (afs_Analyze (tc, rxconn, code, &avc->f.fid, areq, AFS_STATS_FS_RPCIDX_SETLOCK, SHARED_LOCK, NULL)); if ((lockType == LockWrite) && (code == VREADONLY)) code = EBADF; /* per POSIX; VREADONLY == EROFS */ } else /* XXX - Should probably try and log this when we're * XXX - running with logging enabled. But it's horrid */ code = 0; /* pretend we worked - ick!!! */ } else code = 0; /* otherwise, pretend things worked */ } if (code == 0) { slp = (struct SimpleLocks *) osi_AllocSmallSpace(sizeof(struct SimpleLocks)); if (acom & LOCK_EX) { /* defect 3083 */ #ifdef AFS_AIX_ENV /* Record unique id of process owning exclusive lock. */ avc->ownslock = getpid(); #endif slp->type = LockWrite; slp->next = NULL; avc->slocks = slp; avc->flockCount = -1; } else { slp->type = LockRead; slp->next = avc->slocks; avc->slocks = slp; avc->flockCount++; } lockIdSet(&flock, slp, clid); break; } /* now, if we got EWOULDBLOCK, and we're supposed to wait, we do */ if (((code == EWOULDBLOCK) || (code == EAGAIN) || (code == UAEWOULDBLOCK) || (code == UAEAGAIN)) && !(acom & LOCK_NB)) { /* sleep for a second, allowing interrupts */ ReleaseWriteLock(&avc->lock); #if defined(AFS_SGI_ENV) AFS_RWUNLOCK((vnode_t *) avc, VRWLOCK_WRITE); #endif code = afs_osi_Wait(1000, NULL, 1); #if defined(AFS_SGI_ENV) AFS_RWLOCK((vnode_t *) avc, VRWLOCK_WRITE); #endif ObtainWriteLock(&avc->lock, 120); if (code) { code = EINTR; /* return this if ^C typed */ break; } } else break; } /* while loop */ } ReleaseWriteLock(&avc->lock); code = afs_CheckCode(code, areq, 1); /* defeat a buggy AIX optimization */ return code; } /* warn a user that a lock has been ignored */ afs_int32 lastWarnTime = 0; /* this is used elsewhere */ static afs_int32 lastWarnPid = 0; static void DoLockWarning(afs_ucred_t * acred) { afs_int32 now; pid_t pid = MyPidxx2Pid(MyPidxx); char *procname; now = osi_Time(); AFS_STATCNT(DoLockWarning); /* check if we've already warned this user recently */ if (!((now < lastWarnTime + 120) && (lastWarnPid == pid))) { procname = afs_osi_Alloc(256); if (!procname) return; /* Copies process name to allocated procname, see osi_machdeps for details of macro */ osi_procname(procname, 256); procname[255] = '\0'; /* otherwise, it is time to nag the user */ lastWarnTime = now; lastWarnPid = pid; #ifdef AFS_LINUX26_ENV afs_warnuser ("afs: byte-range locks only enforced for processes on this machine (pid %d (%s), user %ld).\n", pid, procname, (long)afs_cr_uid(acred)); #else afs_warnuser ("afs: byte-range lock/unlock ignored; make sure no one else is running this program (pid %d (%s), user %ld).\n", pid, procname, (long)afs_cr_uid(acred)); #endif afs_osi_Free(procname, 256); } return; }
void * osi_UfsOpen(afs_dcache_id_t *ainode) { #ifdef AFS_CACHE_VNODE_PATH struct vnode *vp; #else struct inode *ip; #endif struct osi_file *afile = NULL; afs_int32 code = 0; int dummy; #ifdef AFS_CACHE_VNODE_PATH char namebuf[1024]; struct pathname lookpn; #endif struct osi_stat tstat; afile = osi_AllocSmallSpace(sizeof(struct osi_file)); AFS_GUNLOCK(); /* * AFS_CACHE_VNODE_PATH can be used with any file system, including ZFS or tmpfs. * The ainode is not an inode number but a path. */ #ifdef AFS_CACHE_VNODE_PATH /* Can not use vn_open or lookupname, they use user's CRED() * We need to run as root So must use low level lookuppnvp * assume fname starts with / */ code = pn_get_buf(ainode->ufs, AFS_UIOSYS, &lookpn, namebuf, sizeof(namebuf)); if (code != 0) osi_Panic("UfsOpen: pn_get_buf failed %ld %s", code, ainode->ufs); VN_HOLD(rootdir); /* released in loopuppnvp */ code = lookuppnvp(&lookpn, NULL, FOLLOW, NULL, &vp, rootdir, rootdir, afs_osi_credp); if (code != 0) osi_Panic("UfsOpen: lookuppnvp failed %ld %s", code, ainode->ufs); #ifdef AFS_SUN511_ENV code = VOP_OPEN(&vp, FREAD|FWRITE, afs_osi_credp, NULL); #else code = VOP_OPEN(&vp, FREAD|FWRITE, afs_osi_credp); #endif if (code != 0) osi_Panic("UfsOpen: VOP_OPEN failed %ld %s", code, ainode->ufs); #else code = igetinode(afs_cacheVfsp, (dev_t) cacheDev.dev, ainode->ufs, &ip, CRED(), &dummy); #endif AFS_GLOCK(); if (code) { osi_FreeSmallSpace(afile); osi_Panic("UfsOpen: igetinode failed %ld %s", code, ainode->ufs); } #ifdef AFS_CACHE_VNODE_PATH afile->vnode = vp; code = afs_osi_Stat(afile, &tstat); afile->size = tstat.size; #else afile->vnode = ITOV(ip); afile->size = VTOI(afile->vnode)->i_size; #endif afile->offset = 0; afile->proc = (int (*)())0; return (void *)afile; }
int afs_MemRead(register struct vcache *avc, struct uio *auio, struct AFS_UCRED *acred, daddr_t albn, struct buf **abpp, int noLock) { afs_size_t totalLength; afs_size_t transferLength; afs_size_t filePos; afs_size_t offset, len, tlen; afs_int32 trimlen; struct dcache *tdc = 0; afs_int32 error, trybusy = 1; #ifdef AFS_DARWIN80_ENV uio_t tuiop = NULL; #else struct uio tuio; struct uio *tuiop = &tuio; struct iovec *tvec; #endif afs_int32 code; struct vrequest treq; AFS_STATCNT(afs_MemRead); if (avc->vc_error) return EIO; /* check that we have the latest status info in the vnode cache */ if ((code = afs_InitReq(&treq, acred))) return code; if (!noLock) { code = afs_VerifyVCache(avc, &treq); if (code) { code = afs_CheckCode(code, &treq, 8); /* failed to get it */ return code; } } #ifndef AFS_VM_RDWR_ENV if (AFS_NFSXLATORREQ(acred)) { if (!afs_AccessOK (avc, PRSFS_READ, &treq, CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) { return afs_CheckCode(EACCES, &treq, 9); } } #endif #ifndef AFS_DARWIN80_ENV tvec = (struct iovec *)osi_AllocSmallSpace(sizeof(struct iovec)); #endif totalLength = AFS_UIO_RESID(auio); filePos = AFS_UIO_OFFSET(auio); afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32, totalLength, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); error = 0; transferLength = 0; if (!noLock) ObtainReadLock(&avc->lock); #if defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV) if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) { hset(avc->flushDV, avc->f.m.DataVersion); } #endif /* * Locks held: * avc->lock(R) */ if (filePos >= avc->f.m.Length) { if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ len = 0; #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); } while (avc->f.m.Length > 0 && totalLength > 0) { /* read all of the cached info */ if (filePos >= avc->f.m.Length) break; /* all done */ if (noLock) { if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); } tdc = afs_FindDCache(avc, filePos); if (tdc) { ObtainReadLock(&tdc->lock); offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->f.chunkBytes - offset; } } else { /* a tricky question: does the presence of the DFFetching flag * mean that we're fetching the latest version of the file? No. * The server could update the file as soon as the fetch responsible * for the setting of the DFFetching flag completes. * * However, the presence of the DFFetching flag (visible under * a dcache read lock since it is set and cleared only under a * dcache write lock) means that we're fetching as good a version * as was known to this client at the time of the last call to * afs_VerifyVCache, since the latter updates the stat cache's * m.DataVersion field under a vcache write lock, and from the * time that the DFFetching flag goes on in afs_GetDCache (before * the fetch starts), to the time it goes off (after the fetch * completes), afs_GetDCache keeps at least a read lock on the * vcache entry. * * This means that if the DFFetching flag is set, we can use that * data for any reads that must come from the current version of * the file (current == m.DataVersion). * * Another way of looking at this same point is this: if we're * fetching some data and then try do an afs_VerifyVCache, the * VerifyVCache operation will not complete until after the * DFFetching flag is turned off and the dcache entry's f.versionNo * field is updated. * * Note, by the way, that if DFFetching is set, * m.DataVersion > f.versionNo (the latter is not updated until * after the fetch completes). */ if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); /* before reusing tdc */ } tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 2); ObtainReadLock(&tdc->lock); /* now, first try to start transfer, if we'll need the data. If * data already coming, we don't need to do this, obviously. Type * 2 requests never return a null dcache entry, btw. */ if (!(tdc->dflags & DFFetching) && !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) { /* have cache entry, it is not coming in now, * and we'll need new data */ tagain: if (trybusy && !afs_BBusy()) { struct brequest *bp; /* daemon is not busy */ ObtainSharedLock(&tdc->mflock, 665); if (!(tdc->mflags & DFFetchReq)) { /* start the daemon (may already be running, however) */ UpgradeSToWLock(&tdc->mflock, 666); tdc->mflags |= DFFetchReq; bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred, (afs_size_t) filePos, (afs_size_t) 0, tdc); if (!bp) { tdc->mflags &= ~DFFetchReq; trybusy = 0; /* Avoid bkg daemon since they're too busy */ ReleaseWriteLock(&tdc->mflock); goto tagain; } ConvertWToSLock(&tdc->mflock); /* don't use bp pointer! */ } code = 0; ConvertSToRLock(&tdc->mflock); while (!code && tdc->mflags & DFFetchReq) { afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); /* don't need waiting flag on this one */ ReleaseReadLock(&tdc->mflock); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); ObtainReadLock(&tdc->mflock); } ReleaseReadLock(&tdc->mflock); if (code) { error = code; break; } } } /* now data may have started flowing in (if DFFetching is on). If * data is now streaming in, then wait for some interesting stuff. */ code = 0; while (!code && (tdc->dflags & DFFetching) && tdc->validPos <= filePos) { /* too early: wait for DFFetching flag to vanish, * or data to appear */ afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); } if (code) { error = code; break; } /* fetching flag gone, data is here, or we never tried * (BBusy for instance) */ if (tdc->dflags & DFFetching) { /* still fetching, some new data is here: * compute length and offset */ offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->validPos - filePos; } else { /* no longer fetching, verify data version * (avoid new GetDCache call) */ if (hsame(avc->f.m.DataVersion, tdc->f.versionNo) && ((len = tdc->validPos - filePos) > 0)) { offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* don't have current data, so get it below */ afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO, ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_HYPER, &avc->f.m.DataVersion, ICL_TYPE_HYPER, &tdc->f.versionNo); ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); tdc = NULL; } } if (!tdc) { /* If we get, it was not possible to start the * background daemon. With flag == 1 afs_GetDCache * does the FetchData rpc synchronously. */ ReleaseReadLock(&avc->lock); tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 1); ObtainReadLock(&avc->lock); if (tdc) ObtainReadLock(&tdc->lock); } } afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset), ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len)); if (!tdc) { error = EIO; break; } /* * Locks held: * avc->lock(R) * tdc->lock(R) */ if (len > totalLength) len = totalLength; /* will read len bytes */ if (len <= 0) { /* shouldn't get here if DFFetching is on */ /* read past the end of a chunk, may not be at next chunk yet, and yet * also not at eof, so may have to supply fake zeros */ len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset; /* bytes left in chunk addr space */ if (len > totalLength) len = totalLength; /* and still within xfr request */ tlen = avc->f.m.Length - offset; /* and still within file */ if (len > tlen) len = tlen; if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); if (code) { error = code; break; } } else { /* get the data from the mem cache */ /* mung uio structure to be right for this transfer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); uio_setoffset(tuiop, offset); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); tuio.afsio_offset = offset; #endif code = afs_MemReadUIO(tdc->f.inode, tuiop); if (code) { error = code; break; } } /* otherwise we've read some, fixup length, etc and continue with next seg */ len = len - AFS_UIO_RESID(tuiop); /* compute amount really transferred */ trimlen = len; afsio_skip(auio, trimlen); /* update input uio structure */ totalLength -= len; transferLength += len; filePos += len; if (len <= 0) break; /* surprise eof */ #ifdef AFS_DARWIN80_ENV if (tuiop) { uio_free(tuiop); tuiop = 0; } #endif } /* the whole while loop */ /* * Locks held: * avc->lock(R) * tdc->lock(R) if tdc */ /* if we make it here with tdc non-zero, then it is the last chunk we * dealt with, and we have to release it when we're done. We hold on * to it in case we need to do a prefetch. */ if (tdc) { ReleaseReadLock(&tdc->lock); /* try to queue prefetch, if needed */ if (!noLock && #ifndef AFS_VM_RDWR_ENV afs_preCache #else 1 #endif ) { afs_PrefetchChunk(avc, tdc, acred, &treq); } afs_PutDCache(tdc); } if (!noLock) ReleaseReadLock(&avc->lock); #ifdef AFS_DARWIN80_ENV if (tuiop) uio_free(tuiop); #else osi_FreeSmallSpace(tvec); #endif error = afs_CheckCode(error, &treq, 10); return error; }
/* question: does afs_create need to set CDirty in the adp or the avc? * I think we can get away without it, but I'm not sure. Note that * afs_setattr is called in here for truncation. */ #ifdef AFS_SGI64_ENV int afs_create(OSI_VC_DECL(adp), char *aname, struct vattr *attrs, int flags, int amode, struct vcache **avcp, afs_ucred_t *acred) #else /* AFS_SGI64_ENV */ int afs_create(OSI_VC_DECL(adp), char *aname, struct vattr *attrs, enum vcexcl aexcl, int amode, struct vcache **avcp, afs_ucred_t *acred) #endif /* AFS_SGI64_ENV */ { afs_int32 origCBs, origZaps, finalZaps; struct vrequest *treq = NULL; afs_int32 code; struct afs_conn *tc; struct VenusFid newFid; struct AFSStoreStatus InStatus; struct AFSFetchStatus *OutFidStatus, *OutDirStatus; struct AFSVolSync tsync; struct AFSCallBack CallBack; afs_int32 now; struct dcache *tdc; afs_size_t offset, len; struct server *hostp = 0; struct vcache *tvc; struct volume *volp = 0; struct afs_fakestat_state fakestate; struct rx_connection *rxconn; XSTATS_DECLS; OSI_VC_CONVERT(adp); AFS_STATCNT(afs_create); OutFidStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus)); OutDirStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus)); memset(&InStatus, 0, sizeof(InStatus)); if ((code = afs_CreateReq(&treq, acred))) goto done2; afs_Trace3(afs_iclSetp, CM_TRACE_CREATE, ICL_TYPE_POINTER, adp, ICL_TYPE_STRING, aname, ICL_TYPE_INT32, amode); afs_InitFakeStat(&fakestate); #ifdef AFS_SGI65_ENV /* If avcp is passed not null, it's the old reference to this file. * We can use this to avoid create races. For now, just decrement * the reference count on it. */ if (*avcp) { AFS_RELE(AFSTOV(*avcp)); *avcp = NULL; } #endif if (strlen(aname) > AFSNAMEMAX) { code = ENAMETOOLONG; goto done3; } if (!afs_ENameOK(aname)) { code = EINVAL; goto done3; } switch (attrs->va_type) { case VBLK: case VCHR: #if !defined(AFS_SUN5_ENV) case VSOCK: #endif case VFIFO: /* We don't support special devices or FIFOs */ code = EINVAL; goto done3; default: ; } AFS_DISCON_LOCK(); code = afs_EvalFakeStat(&adp, &fakestate, treq); if (code) goto done; tagain: code = afs_VerifyVCache(adp, treq); if (code) goto done; /** If the volume is read-only, return error without making an RPC to the * fileserver */ if (adp->f.states & CRO) { code = EROFS; goto done; } if (AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW) { code = ENETDOWN; goto done; } tdc = afs_GetDCache(adp, (afs_size_t) 0, treq, &offset, &len, 1); ObtainWriteLock(&adp->lock, 135); if (tdc) ObtainSharedLock(&tdc->lock, 630); /* * Make sure that the data in the cache is current. We may have * received a callback while we were waiting for the write lock. */ if (!(adp->f.states & CStatd) || (tdc && !hsame(adp->f.m.DataVersion, tdc->f.versionNo))) { ReleaseWriteLock(&adp->lock); if (tdc) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } goto tagain; } if (tdc) { /* see if file already exists. If it does, we only set * the size attributes (to handle O_TRUNC) */ code = afs_dir_Lookup(tdc, aname, &newFid.Fid); /* use dnlc first xxx */ if (code == 0) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); ReleaseWriteLock(&adp->lock); #ifdef AFS_SGI64_ENV if (flags & VEXCL) { #else if (aexcl != NONEXCL) { #endif code = EEXIST; /* file exists in excl mode open */ goto done; } /* found the file, so use it */ newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; tvc = NULL; if (newFid.Fid.Unique == 0) { tvc = afs_LookupVCache(&newFid, treq, NULL, adp, aname); } if (!tvc) /* lookup failed or wasn't called */ tvc = afs_GetVCache(&newFid, treq, NULL, NULL); if (tvc) { /* if the thing exists, we need the right access to open it. * we must check that here, since no other checks are * made by the open system call */ len = attrs->va_size; /* only do the truncate */ /* * We used to check always for READ access before; the * problem is that we will fail if the existing file * has mode -w-w-w, which is wrong. */ if ((amode & VREAD) && !afs_AccessOK(tvc, PRSFS_READ, treq, CHECK_MODE_BITS)) { afs_PutVCache(tvc); code = EACCES; goto done; } #if defined(AFS_DARWIN80_ENV) if ((amode & VWRITE) || VATTR_IS_ACTIVE(attrs, va_data_size)) #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) if ((amode & VWRITE) || (attrs->va_mask & AT_SIZE)) #else if ((amode & VWRITE) || len != 0xffffffff) #endif { /* needed for write access check */ tvc->f.parent.vnode = adp->f.fid.Fid.Vnode; tvc->f.parent.unique = adp->f.fid.Fid.Unique; /* need write mode for these guys */ if (!afs_AccessOK (tvc, PRSFS_WRITE, treq, CHECK_MODE_BITS)) { afs_PutVCache(tvc); code = EACCES; goto done; } } #if defined(AFS_DARWIN80_ENV) if (VATTR_IS_ACTIVE(attrs, va_data_size)) #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) if (attrs->va_mask & AT_SIZE) #else if (len != 0xffffffff) #endif { if (vType(tvc) != VREG) { afs_PutVCache(tvc); code = EISDIR; goto done; } /* do a truncate */ #if defined(AFS_DARWIN80_ENV) VATTR_INIT(attrs); VATTR_SET_SUPPORTED(attrs, va_data_size); VATTR_SET_ACTIVE(attrs, va_data_size); #elif defined(UKERNEL) attrs->va_mask = ATTR_SIZE; #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) attrs->va_mask = AT_SIZE; #else VATTR_NULL(attrs); #endif attrs->va_size = len; ObtainWriteLock(&tvc->lock, 136); tvc->f.states |= CCreating; ReleaseWriteLock(&tvc->lock); #if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) #if defined(AFS_SGI64_ENV) code = afs_setattr(VNODE_TO_FIRST_BHV((vnode_t *) tvc), attrs, 0, acred); #else code = afs_setattr(tvc, attrs, 0, acred); #endif /* AFS_SGI64_ENV */ #else /* SUN5 || SGI */ code = afs_setattr(tvc, attrs, acred); #endif /* SUN5 || SGI */ ObtainWriteLock(&tvc->lock, 137); tvc->f.states &= ~CCreating; ReleaseWriteLock(&tvc->lock); if (code) { afs_PutVCache(tvc); goto done; } } *avcp = tvc; } else code = ENOENT; /* shouldn't get here */ /* make sure vrefCount bumped only if code == 0 */ goto done; } } /* if we create the file, we don't do any access checks, since * that's how O_CREAT is supposed to work */ if (adp->f.states & CForeign) { origCBs = afs_allCBs; origZaps = afs_allZaps; } else { origCBs = afs_evenCBs; /* if changes, we don't really have a callback */ origZaps = afs_evenZaps; /* number of even numbered vnodes discarded */ } InStatus.Mask = AFS_SETMODTIME | AFS_SETMODE | AFS_SETGROUP; InStatus.ClientModTime = osi_Time(); InStatus.Group = (afs_int32) afs_cr_gid(acred); if (AFS_NFSXLATORREQ(acred)) { /* * XXX The following is mainly used to fix a bug in the HP-UX * nfs client where they create files with mode of 0 without * doing any setattr later on to fix it. * XXX */ #if defined(AFS_AIX_ENV) if (attrs->va_mode != -1) { #else #if defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) if (attrs->va_mask & AT_MODE) { #else if (attrs->va_mode != ((unsigned short)-1)) { #endif #endif if (!attrs->va_mode) attrs->va_mode = 0x1b6; /* XXX default mode: rw-rw-rw XXX */ } } if (!AFS_IS_DISCONNECTED) { /* If not disconnected, connect to the server.*/ InStatus.UnixModeBits = attrs->va_mode & 0xffff; /* only care about protection bits */ do { tc = afs_Conn(&adp->f.fid, treq, SHARED_LOCK, &rxconn); if (tc) { hostp = tc->srvr->server; /* remember for callback processing */ now = osi_Time(); XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_CREATEFILE); RX_AFS_GUNLOCK(); code = RXAFS_CreateFile(rxconn, (struct AFSFid *)&adp->f.fid.Fid, aname, &InStatus, (struct AFSFid *) &newFid.Fid, OutFidStatus, OutDirStatus, &CallBack, &tsync); RX_AFS_GLOCK(); XSTATS_END_TIME; CallBack.ExpirationTime += now; } else code = -1; } while (afs_Analyze (tc, rxconn, code, &adp->f.fid, treq, AFS_STATS_FS_RPCIDX_CREATEFILE, SHARED_LOCK, NULL)); if ((code == EEXIST || code == UAEEXIST) && #ifdef AFS_SGI64_ENV !(flags & VEXCL) #else /* AFS_SGI64_ENV */ aexcl == NONEXCL #endif ) { /* if we get an EEXIST in nonexcl mode, just do a lookup */ if (tdc) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } ReleaseWriteLock(&adp->lock); #if defined(AFS_SGI64_ENV) code = afs_lookup(VNODE_TO_FIRST_BHV((vnode_t *) adp), aname, avcp, NULL, 0, NULL, acred); #elif defined(AFS_SUN5_ENV) || defined(AFS_SGI_ENV) code = afs_lookup(adp, aname, avcp, NULL, 0, NULL, acred); #elif defined(UKERNEL) code = afs_lookup(adp, aname, avcp, acred, 0); #elif !defined(AFS_DARWIN_ENV) code = afs_lookup(adp, aname, avcp, acred); #endif goto done; } if (code) { if (code < 0) { ObtainWriteLock(&afs_xcbhash, 488); afs_DequeueCallback(adp); adp->f.states &= ~CStatd; ReleaseWriteLock(&afs_xcbhash); osi_dnlc_purgedp(adp); } ReleaseWriteLock(&adp->lock); if (tdc) { ReleaseSharedLock(&tdc->lock); afs_PutDCache(tdc); } goto done; } } else { /* Generate a fake FID for disconnected mode. */ newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; afs_GenFakeFid(&newFid, VREG, 1); } /* if (!AFS_IS_DISCON_RW) */ /* otherwise, we should see if we can make the change to the dir locally */ if (tdc) UpgradeSToWLock(&tdc->lock, 631); if (AFS_IS_DISCON_RW || afs_LocalHero(adp, tdc, OutDirStatus, 1)) { /* we can do it locally */ ObtainWriteLock(&afs_xdcache, 291); code = afs_dir_Create(tdc, aname, &newFid.Fid); ReleaseWriteLock(&afs_xdcache); if (code) { ZapDCE(tdc); DZap(tdc); } } if (tdc) { ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); } if (AFS_IS_DISCON_RW) adp->f.m.LinkCount++; newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; ReleaseWriteLock(&adp->lock); volp = afs_FindVolume(&newFid, READ_LOCK); /* New tricky optimistic callback handling algorithm for file creation works * as follows. We create the file essentially with no locks set at all. File * server may thus handle operations from others cache managers as well as from * this very own cache manager that reference the file in question before * we managed to create the cache entry. However, if anyone else changes * any of the status information for a file, we'll see afs_evenCBs increase * (files always have even fids). If someone on this workstation manages * to do something to the file, they'll end up having to create a cache * entry for the new file. Either we'll find it once we've got the afs_xvcache * lock set, or it was also *deleted* the vnode before we got there, in which case * we will find evenZaps has changed, too. Thus, we only assume we have the right * status information if no callbacks or vnode removals have occurred to even * numbered files from the time the call started until the time that we got the xvcache * lock set. Of course, this also assumes that any call that modifies a file first * gets a write lock on the file's vnode, but if that weren't true, the whole cache manager * would fail, since no call would be able to update the local vnode status after modifying * a file on a file server. */ ObtainWriteLock(&afs_xvcache, 138); if (adp->f.states & CForeign) finalZaps = afs_allZaps; /* do this before calling newvcache */ else finalZaps = afs_evenZaps; /* do this before calling newvcache */ /* don't need to call RemoveVCB, since only path leaving a callback is the * one where we pass through afs_NewVCache. Can't have queued a VCB unless * we created and freed an entry between file creation time and here, and the * freeing of the vnode will change evenZaps. Don't need to update the VLRU * queue, since the find will only succeed in the event of a create race, and * then the vcache will be at the front of the VLRU queue anyway... */ if (!(tvc = afs_FindVCache(&newFid, 0, DO_STATS))) { tvc = afs_NewVCache(&newFid, hostp); if (tvc) { int finalCBs; ObtainWriteLock(&tvc->lock, 139); ObtainWriteLock(&afs_xcbhash, 489); finalCBs = afs_evenCBs; /* add the callback in */ if (adp->f.states & CForeign) { tvc->f.states |= CForeign; finalCBs = afs_allCBs; } if (origCBs == finalCBs && origZaps == finalZaps) { tvc->f.states |= CStatd; /* we've fake entire thing, so don't stat */ tvc->f.states &= ~CBulkFetching; if (!AFS_IS_DISCON_RW) { tvc->cbExpires = CallBack.ExpirationTime; afs_QueueCallback(tvc, CBHash(CallBack.ExpirationTime), volp); } } else { afs_DequeueCallback(tvc); tvc->f.states &= ~(CStatd | CUnique); tvc->callback = 0; if (tvc->f.fid.Fid.Vnode & 1 || (vType(tvc) == VDIR)) osi_dnlc_purgedp(tvc); } ReleaseWriteLock(&afs_xcbhash); if (AFS_IS_DISCON_RW) { afs_DisconAddDirty(tvc, VDisconCreate, 0); afs_GenDisconStatus(adp, tvc, &newFid, attrs, treq, VREG); } else { afs_ProcessFS(tvc, OutFidStatus, treq); } tvc->f.parent.vnode = adp->f.fid.Fid.Vnode; tvc->f.parent.unique = adp->f.fid.Fid.Unique; #if !defined(UKERNEL) if (volp && (volp->states & VPartVisible)) tvc->f.states |= CPartVisible; #endif ReleaseWriteLock(&tvc->lock); *avcp = tvc; code = 0; } else code = ENOENT; } else { /* otherwise cache entry already exists, someone else must * have created it. Comments used to say: "don't need write * lock to *clear* these flags" but we should do it anyway. * Code used to clear stat bit and callback, but I don't see * the point -- we didn't have a create race, somebody else just * snuck into NewVCache before we got here, probably a racing * lookup. */ *avcp = tvc; code = 0; } ReleaseWriteLock(&afs_xvcache); done: AFS_DISCON_UNLOCK(); done3: if (volp) afs_PutVolume(volp, READ_LOCK); if (code == 0) { if (afs_mariner) afs_AddMarinerName(aname, *avcp); /* return the new status in vattr */ afs_CopyOutAttrs(*avcp, attrs); if (afs_mariner) afs_MarinerLog("store$Creating", *avcp); } afs_PutFakeStat(&fakestate); code = afs_CheckCode(code, treq, 20); afs_DestroyReq(treq); done2: osi_FreeSmallSpace(OutFidStatus); osi_FreeSmallSpace(OutDirStatus); return code; } /* * Check to see if we can track the change locally: requires that * we have sufficiently recent info in data cache. If so, we * know the new DataVersion number, and place it correctly in both the * data and stat cache entries. This routine returns 1 if we should * do the operation locally, and 0 otherwise. * * This routine must be called with the stat cache entry write-locked, * and dcache entry write-locked. */ int afs_LocalHero(struct vcache *avc, struct dcache *adc, AFSFetchStatus * astat, int aincr) { afs_int32 ok; afs_hyper_t avers; AFS_STATCNT(afs_LocalHero); hset64(avers, astat->dataVersionHigh, astat->DataVersion); /* avers *is* the version number now, no matter what */ if (adc) { /* does what's in the dcache *now* match what's in the vcache *now*, * and do we have a valid callback? if not, our local copy is not "ok" */ ok = (hsame(avc->f.m.DataVersion, adc->f.versionNo) && avc->callback && (avc->f.states & CStatd) && avc->cbExpires >= osi_Time()); } else { ok = 0; } if (ok) { /* check that the DV on the server is what we expect it to be */ afs_hyper_t newDV; hset(newDV, adc->f.versionNo); hadd32(newDV, aincr); if (!hsame(avers, newDV)) { ok = 0; } } #if defined(AFS_SGI_ENV) osi_Assert(avc->v.v_type == VDIR); #endif /* The bulk status code used the length as a sequence number. */ /* Don't update the vcache entry unless the stats are current. */ if (avc->f.states & CStatd) { hset(avc->f.m.DataVersion, avers); #ifdef AFS_64BIT_CLIENT FillInt64(avc->f.m.Length, astat->Length_hi, astat->Length); #else /* AFS_64BIT_CLIENT */ avc->f.m.Length = astat->Length; #endif /* AFS_64BIT_CLIENT */ avc->f.m.Date = astat->ClientModTime; } if (ok) { /* we've been tracking things correctly */ adc->dflags |= DFEntryMod; adc->f.versionNo = avers; return 1; } else { if (adc) { ZapDCE(adc); DZap(adc); } if (avc->f.states & CStatd) { osi_dnlc_purgedp(avc); } return 0; } }
int afs_mkdir(OSI_VC_DECL(adp), char *aname, struct vattr *attrs, struct vcache **avcp, afs_ucred_t *acred) { struct vrequest *treq = NULL; afs_int32 code; struct afs_conn *tc; struct rx_connection *rxconn; struct VenusFid newFid; struct dcache *tdc; struct dcache *new_dc; afs_size_t offset, len; struct vcache *tvc; struct AFSStoreStatus InStatus; struct AFSFetchStatus *OutFidStatus, *OutDirStatus; struct AFSCallBack CallBack; struct AFSVolSync tsync; afs_int32 now; struct afs_fakestat_state fakestate; XSTATS_DECLS; OSI_VC_CONVERT(adp); AFS_STATCNT(afs_mkdir); afs_Trace2(afs_iclSetp, CM_TRACE_MKDIR, ICL_TYPE_POINTER, adp, ICL_TYPE_STRING, aname); OutFidStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus)); OutDirStatus = osi_AllocSmallSpace(sizeof(struct AFSFetchStatus)); memset(&InStatus, 0, sizeof(InStatus)); if ((code = afs_CreateReq(&treq, acred))) goto done2; afs_InitFakeStat(&fakestate); if (strlen(aname) > AFSNAMEMAX) { code = ENAMETOOLONG; goto done3; } if (!afs_ENameOK(aname)) { code = EINVAL; goto done3; } AFS_DISCON_LOCK(); code = afs_EvalFakeStat(&adp, &fakestate, treq); if (code) goto done; code = afs_VerifyVCache(adp, treq); if (code) goto done; /** If the volume is read-only, return error without making an RPC to the * fileserver */ if (adp->f.states & CRO) { code = EROFS; goto done; } if (AFS_IS_DISCONNECTED && !AFS_IS_DISCON_RW) { /*printf("Network is down in afs_mkdir\n");*/ code = ENETDOWN; goto done; } InStatus.Mask = AFS_SETMODTIME | AFS_SETMODE | AFS_SETGROUP; InStatus.ClientModTime = osi_Time(); InStatus.UnixModeBits = attrs->va_mode & 0xffff; /* only care about protection bits */ InStatus.Group = (afs_int32) afs_cr_gid(acred); tdc = afs_GetDCache(adp, (afs_size_t) 0, treq, &offset, &len, 1); ObtainWriteLock(&adp->lock, 153); if (!AFS_IS_DISCON_RW) { do { tc = afs_Conn(&adp->f.fid, treq, SHARED_LOCK, &rxconn); if (tc) { XSTATS_START_TIME(AFS_STATS_FS_RPCIDX_MAKEDIR); now = osi_Time(); RX_AFS_GUNLOCK(); code = RXAFS_MakeDir(rxconn, (struct AFSFid *)&adp->f.fid.Fid, aname, &InStatus, (struct AFSFid *)&newFid.Fid, OutFidStatus, OutDirStatus, &CallBack, &tsync); RX_AFS_GLOCK(); XSTATS_END_TIME; CallBack.ExpirationTime += now; /* DON'T forget to Set the callback value... */ } else code = -1; } while (afs_Analyze (tc, rxconn, code, &adp->f.fid, treq, AFS_STATS_FS_RPCIDX_MAKEDIR, SHARED_LOCK, NULL)); if (code) { if (code < 0) { afs_StaleVCache(adp); } ReleaseWriteLock(&adp->lock); if (tdc) afs_PutDCache(tdc); goto done; } } else { /* Disconnected. */ /* We have the dir entry now, we can use it while disconnected. */ if (adp->mvid.target_root == NULL) { /* If not mount point, generate a new fid. */ newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; afs_GenFakeFid(&newFid, VDIR, 1); } /* XXX: If mount point???*/ /* Operations with the actual dir's cache entry are further * down, where the dir entry gets created. */ } /* if (!AFS_IS_DISCON_RW) */ /* otherwise, we should see if we can make the change to the dir locally */ if (tdc) ObtainWriteLock(&tdc->lock, 632); if (AFS_IS_DISCON_RW || afs_LocalHero(adp, tdc, OutDirStatus, 1)) { /* we can do it locally */ ObtainWriteLock(&afs_xdcache, 294); code = afs_dir_Create(tdc, aname, &newFid.Fid); ReleaseWriteLock(&afs_xdcache); if (code) { ZapDCE(tdc); /* surprise error -- use invalid value */ DZap(tdc); } } if (tdc) { ReleaseWriteLock(&tdc->lock); afs_PutDCache(tdc); } if (AFS_IS_DISCON_RW) /* We will have to settle with the local link count. */ adp->f.m.LinkCount++; else adp->f.m.LinkCount = OutDirStatus->LinkCount; newFid.Cell = adp->f.fid.Cell; newFid.Fid.Volume = adp->f.fid.Fid.Volume; ReleaseWriteLock(&adp->lock); if (AFS_IS_DISCON_RW) { /* When disconnected, we have to create the full dir here. */ /* Generate a new vcache and fill it. */ tvc = afs_NewVCache(&newFid, NULL); if (tvc) { *avcp = tvc; } else { code = EIO; goto done; } ObtainWriteLock(&tvc->lock, 738); afs_GenDisconStatus(adp, tvc, &newFid, attrs, treq, VDIR); ReleaseWriteLock(&tvc->lock); /* And now make an empty dir, containing . and .. : */ /* Get a new dcache for it first. */ new_dc = afs_GetDCache(tvc, (afs_size_t) 0, treq, &offset, &len, 1); if (!new_dc) { /* printf("afs_mkdir: can't get new dcache for dir.\n"); */ code = EIO; goto done; } ObtainWriteLock(&afs_xdcache, 739); code = afs_dir_MakeDir(new_dc, (afs_int32 *) &newFid.Fid, (afs_int32 *) &adp->f.fid.Fid); ReleaseWriteLock(&afs_xdcache); /* if (code) printf("afs_mkdir: afs_dirMakeDir code = %u\n", code); */ afs_PutDCache(new_dc); ObtainWriteLock(&tvc->lock, 731); /* Update length in the vcache. */ tvc->f.m.Length = new_dc->f.chunkBytes; afs_DisconAddDirty(tvc, VDisconCreate, 1); ReleaseWriteLock(&tvc->lock); } else { /* now we're done with parent dir, create the real dir's cache entry */ tvc = afs_GetVCache(&newFid, treq, NULL, NULL); if (tvc) { code = 0; *avcp = tvc; } else { /* For some reason, we cannot fetch the vcache for our * newly-created dir. */ code = EIO; } } /* if (AFS_DISCON_RW) */ done: AFS_DISCON_UNLOCK(); done3: afs_PutFakeStat(&fakestate); code = afs_CheckCode(code, treq, 26); afs_DestroyReq(treq); done2: osi_FreeSmallSpace(OutFidStatus); osi_FreeSmallSpace(OutDirStatus); return code; }