/** * low-level thread entry point. * * @param[in] rock opaque pointer to thread worker object * * @return opaque return pointer from pool entry function * * @internal */ static void * _afs_tp_worker_run(void * rock) { struct afs_thread_pool_worker * worker = rock; struct afs_thread_pool * pool = worker->pool; /* register worker with pool */ MUTEX_ENTER(&pool->lock); queue_Append(&pool->thread_list, worker); pool->nthreads++; MUTEX_EXIT(&pool->lock); /* call high-level entry point */ worker->ret = (*pool->entry)(pool, worker, pool->work_queue, pool->rock); /* adjust pool live thread count */ MUTEX_ENTER(&pool->lock); osi_Assert(pool->nthreads); queue_Remove(worker); pool->nthreads--; if (!pool->nthreads) { CV_BROADCAST(&pool->shutdown_cv); pool->state = AFS_TP_STATE_STOPPED; } MUTEX_EXIT(&pool->lock); _afs_tp_worker_free(worker); return NULL; }
/** * link a dependency node to a parent and child work node. * * This links a dependency node such that when the 'parent' work node is * done, the 'child' work node can proceed. * * @param[in] dep dependency node * @param[in] parent parent node in this dependency * @param[in] child child node in this dependency * * @return operation status * @retval 0 success * * @pre * - parent->lock held * - child->lock held * - parent and child in quiescent state * * @internal */ static int _afs_wq_dep_link_r(struct afs_work_queue_dep_node *dep, struct afs_work_queue_node *parent, struct afs_work_queue_node *child) { int ret = 0; /* Each dep node adds a ref to the child node of that dep. We do not * do the same for the parent node, since if the only refs remaining * for a node are deps in node->dep_children, then the node should be * destroyed, and we will destroy the dep nodes when we free the * work node. */ ret = _afs_wq_node_get_r(child); if (ret) { goto error; } /* add this dep node to the parent node's list of deps */ queue_Append(&parent->dep_children, &dep->parent_list); dep->child = child; dep->parent = parent; error: return ret; }
/* * Process all events that have expired relative to the current clock time * (which is not re-evaluated unless clock_NewTime has been called). * The relative time to the next event is returned in the output parameter * next and the function returns 1. If there are is no next event, * the function returns 0. */ int rxevent_RaiseEvents(struct clock * next) { struct rxevent *qe; struct clock now; #ifdef RXDEBUG if (Log) fprintf(Log, "rxevent_RaiseEvents(%ld.%ld)\n", now.sec, now.usec); #endif /* * Events are sorted by time, so only scan until an event is found that * has not yet timed out */ while (queue_IsNotEmpty(&rxevent_queue)) { clock_GetTime(&now); qe = queue_First(&rxevent_queue, rxevent); if (clock_Lt(&now, &qe->eventTime)) { *next = qe->eventTime; clock_Sub(next, &now); return 1; } queue_Remove(qe); rxevent_nPosted--; qe->func(qe, qe->arg, qe->arg1); queue_Append(&rxevent_free, qe); rxevent_nFree++; } return 0; }
static void * SalvageChildReaperThread(void * args) { int slot, pid, status; struct log_cleanup_node * cleanup; assert(pthread_mutex_lock(&worker_lock) == 0); /* loop reaping our children */ while (1) { /* wait() won't block unless we have children, so * block on the cond var if we're childless */ while (current_workers == 0) { assert(pthread_cond_wait(&worker_cv, &worker_lock) == 0); } assert(pthread_mutex_unlock(&worker_lock) == 0); cleanup = (struct log_cleanup_node *) malloc(sizeof(struct log_cleanup_node)); while (Reap_Child("salvageserver", &pid, &status) < 0) { /* try to prevent livelock if something goes wrong */ sleep(1); } VOL_LOCK; for (slot = 0; slot < Parallel; slot++) { if (child_slot[slot] == pid) break; } assert(slot < Parallel); child_slot[slot] = 0; VOL_UNLOCK; SALVSYNC_doneWorkByPid(pid, status); assert(pthread_mutex_lock(&worker_lock) == 0); if (cleanup) { cleanup->pid = pid; queue_Append(&log_cleanup_queue, cleanup); assert(pthread_cond_signal(&log_cleanup_queue.queue_change_cv) == 0); } /* ok, we've reaped a child */ current_workers--; assert(pthread_cond_broadcast(&worker_cv) == 0); } return NULL; }
/** * add an entry to the hash table. * * @param[in] dp disk partition object * @param[in] volid volume id * @param[in] ent volume group object * @param[out] hash_out address in which to store pointer to hash entry * * @pre VOL_LOCK held * * @return operation status * @retval 0 success * @retval EEXIST hash entry for volid already exists, and it points to * a different VG entry * * @internal */ static int _VVGC_hash_entry_add(struct DiskPartition64 * dp, VolumeId volid, VVGCache_entry_t * ent, VVGCache_hash_entry_t ** hash_out) { int code = 0; VVGCache_hash_entry_t * hent; int hash = VVGC_HASH(volid); VVGCache_entry_t *nent; code = _VVGC_lookup(dp, volid, &nent, hash_out); if (!code) { if (ent != nent) { ViceLog(0, ("_VVGC_hash_entry_add: tried to add a duplicate " " nonmatching entry for vol %lu: original " "(%"AFS_PTR_FMT",%lu) new (%"AFS_PTR_FMT",%lu)\n", afs_printable_uint32_lu(volid), nent, afs_printable_uint32_lu(nent->rw), ent, afs_printable_uint32_lu(ent->rw))); return EEXIST; } ViceLog(1, ("_VVGC_hash_entry_add: tried to add duplicate " "hash entry for vol %lu, VG %lu", afs_printable_uint32_lu(volid), afs_printable_uint32_lu(ent->rw))); /* accept attempts to add matching duplicate entries; just * pretend we added it */ return 0; } code = _VVGC_hash_entry_alloc(&hent); if (code) { goto done; } hent->entry = ent; hent->dp = dp; hent->volid = volid; queue_Append(&VVGCache_hash_table.hash_buckets[hash], hent); done: if (hash_out) { *hash_out = hent; } return code; }
/** * append to a node list object. * * @param[in] list list object * @param[in] node node object * @param[in] state new node state * * @return operation status * @retval 0 success * @retval AFS_WQ_ERROR raced to enqueue node * * @pre * - node lock held * - node is not on a list * - node is either not busy, or it is marked as busy by the calling thread * * @post * - enqueued on list * - node lock dropped * * @internal */ static int _afs_wq_node_list_enqueue(struct afs_work_queue_node_list * list, struct afs_work_queue_node * node, afs_wq_work_state_t state) { int code, ret = 0; if (node->qidx != AFS_WQ_NODE_LIST_NONE) { /* raced */ ret = AFS_WQ_ERROR; goto error; } /* deal with lock inversion */ code = MUTEX_TRYENTER(&list->lock); if (!code) { /* contended */ _afs_wq_node_state_change(node, AFS_WQ_NODE_STATE_BUSY); MUTEX_EXIT(&node->lock); MUTEX_ENTER(&list->lock); MUTEX_ENTER(&node->lock); /* assert state of the world (we set busy, so this should never happen) */ osi_Assert(queue_IsNotOnQueue(node)); } if (list->shutdown) { ret = AFS_WQ_ERROR; goto error_unlock; } osi_Assert(node->qidx == AFS_WQ_NODE_LIST_NONE); if (queue_IsEmpty(&list->list)) { /* wakeup a dequeue thread */ CV_SIGNAL(&list->cv); } queue_Append(&list->list, node); node->qidx = list->qidx; _afs_wq_node_state_change(node, state); error_unlock: MUTEX_EXIT(&node->lock); MUTEX_EXIT(&list->lock); error: return ret; }
/** * look through log_watch_queue, and if any processes are not still * running, hand them off to the SalvageLogCleanupThread * * @param log_watch_queue a queue of PIDs that we should clean up if * that PID has died */ static void ScanLogs(struct rx_queue *log_watch_queue) { struct log_cleanup_node *cleanup, *next; assert(pthread_mutex_lock(&worker_lock) == 0); for (queue_Scan(log_watch_queue, cleanup, next, log_cleanup_node)) { /* if a process is still running, assume it's the salvage process * still going, and keep waiting for it */ if (kill(cleanup->pid, 0) < 0 && errno == ESRCH) { queue_Remove(cleanup); queue_Append(&log_cleanup_queue, cleanup); assert(pthread_cond_signal(&log_cleanup_queue.queue_change_cv) == 0); } } assert(pthread_mutex_unlock(&worker_lock) == 0); }
/** * look through log_watch_queue, and if any processes are not still * running, hand them off to the SalvageLogCleanupThread * * @param log_watch_queue a queue of PIDs that we should clean up if * that PID has died */ static void ScanLogs(struct rx_queue *log_watch_queue) { struct log_cleanup_node *cleanup, *next; MUTEX_ENTER(&worker_lock); for (queue_Scan(log_watch_queue, cleanup, next, log_cleanup_node)) { /* if a process is still running, assume it's the salvage process * still going, and keep waiting for it */ if (kill(cleanup->pid, 0) < 0 && errno == ESRCH) { queue_Remove(cleanup); queue_Append(&log_cleanup_queue, cleanup); CV_SIGNAL(&log_cleanup_queue.queue_change_cv); } } MUTEX_EXIT(&worker_lock); }
/** * add a VGC entry to the partition's to-delete list. * * This adds a VGC entry (a parent/child pair) to a list of VGC entries to * be deleted from the VGC at the end of a VGC scan. This is necessary, * while a VGC scan is ocurring, volumes may be deleted. Since a VGC scan * scans a partition in VVGC_SCAN_TBL_LEN chunks, a VGC delete operation * may delete a volume, only for it to be added again when the VGC scan's * table adds it to the VGC. So when a VGC entry is deleted and a VGC scan * is running, this function must be called to ensure it does not come * back onto the VGC. * * @param[in] dp the partition to whose dlist we are adding * @param[in] parent the parent volumeID of the VGC entry * @param[in] child the child volumeID of the VGC entry * * @return operation status * @retval 0 success * @retval ENOMEM memory allocation error * * @pre VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING * * @internal VGC use only */ int _VVGC_dlist_add_r(struct DiskPartition64 *dp, VolumeId parent, VolumeId child) { int bucket = VVGC_HASH(child); VVGCache_dlist_entry_t *entry; entry = malloc(sizeof(*entry)); if (!entry) { return ENOMEM; } entry->child = child; entry->parent = parent; queue_Append(&VVGCache.part[dp->index].dlist_hash_buckets[bucket], entry); return 0; }
/** * Thread to look for SalvageLog.$pid files that are not from our child * worker salvagers, and notify SalvageLogCleanupThread to clean them * up. This can happen if we restart during salvages, or the * salvageserver crashes or something. * * @param arg unused * * @return always NULL */ static void * SalvageLogScanningThread(void * arg) { struct rx_queue log_watch_queue; queue_Init(&log_watch_queue); { DIR *dp; struct dirent *dirp; char prefix[AFSDIR_PATH_MAX]; size_t prefix_len; afs_snprintf(prefix, sizeof(prefix), "%s.", AFSDIR_SLVGLOG_FILE); prefix_len = strlen(prefix); dp = opendir(AFSDIR_LOGS_DIR); assert(dp); while ((dirp = readdir(dp)) != NULL) { pid_t pid; struct log_cleanup_node *cleanup; int i; if (strncmp(dirp->d_name, prefix, prefix_len) != 0) { /* not a salvage logfile; skip */ continue; } errno = 0; pid = strtol(dirp->d_name + prefix_len, NULL, 10); if (errno != 0) { /* file is SalvageLog.<something> but <something> isn't * a pid, so skip */ continue; } VOL_LOCK; for (i = 0; i < Parallel; ++i) { if (pid == child_slot[i]) { break; } } VOL_UNLOCK; if (i < Parallel) { /* this pid is one of our children, so the reaper thread * will take care of it; skip */ continue; } cleanup = (struct log_cleanup_node *) malloc(sizeof(struct log_cleanup_node)); cleanup->pid = pid; queue_Append(&log_watch_queue, cleanup); } closedir(dp); } ScanLogs(&log_watch_queue); while (queue_IsNotEmpty(&log_watch_queue)) { sleep(SALVAGE_SCAN_POLL_INTERVAL); ScanLogs(&log_watch_queue); } return NULL; }
/* Add the indicated event (function, arg) at the specified clock time */ struct rxevent * rxevent_Post(struct clock * when, void (*func)(), void *arg, void *arg1) /* when - When event should happen, in clock (clock.h) units */ { struct rxevent *ev, *qe, *qpr; #ifdef RXDEBUG if (Log) { struct clock now; clock_GetTime(&now); fprintf(Log, "%ld.%ld: rxevent_Post(%ld.%ld, %p, %p)\n", now.sec, now.usec, when->sec, when->usec, func, arg); } #endif #if defined(AFS_SGIMP_ENV) ASSERT(osi_rxislocked()); #endif /* * If we're short on free event entries, create a block of new ones and * add them to the free queue */ if (queue_IsEmpty(&rxevent_free)) { int i; #if defined(AFS_AIX32_ENV) && defined(KERNEL) ev = (struct rxevent *) rxi_Alloc(sizeof(struct rxevent)); queue_Append(&rxevent_free, &ev[0]), rxevent_nFree++; #else ev = (struct rxevent *) osi_Alloc(sizeof(struct rxevent) * rxevent_allocUnit); xsp = xfreemallocs; xfreemallocs = (struct xfreelist *) ev; xfreemallocs->next = xsp; for (i = 0; i < rxevent_allocUnit; i++) queue_Append(&rxevent_free, &ev[i]), rxevent_nFree++; #endif } /* Grab and initialize a new rxevent structure */ ev = queue_First(&rxevent_free, rxevent); queue_Remove(ev); rxevent_nFree--; /* Record user defined event state */ ev->eventTime = *when; ev->func = func; ev->arg = arg; ev->arg1 = arg1; rxevent_nPosted += 1; /* Rather than ++, to shut high-C up * regarding never-set variables */ /* * Locate a slot for the new entry. The queue is ordered by time, and we * assume that a new entry is likely to be greater than a majority of the * entries already on the queue (unless there's very few entries on the * queue), so we scan it backwards */ for (queue_ScanBackwards(&rxevent_queue, qe, qpr, rxevent)) { if (clock_Ge(when, &qe->eventTime)) { queue_InsertAfter(qe, ev); return ev; } } /* The event is to expire earlier than any existing events */ queue_Prepend(&rxevent_queue, ev); if (rxevent_ScheduledEarlierEvent) (*rxevent_ScheduledEarlierEvent) (); /* Notify our external * scheduler */ return ev; }
int rxi_WritevAlloc(struct rx_call *call, struct iovec *iov, int *nio, int maxio, int nbytes) { struct rx_connection *conn = call->conn; struct rx_packet *cp = call->currentPacket; int requestCount; int nextio; /* Temporary values, real work is done in rxi_WritevProc */ int tnFree; unsigned int tcurvec; char *tcurpos; int tcurlen; requestCount = nbytes; nextio = 0; /* Free any packets from the last call to ReadvProc/WritevProc */ if (queue_IsNotEmpty(&call->iovq)) { #ifdef RXDEBUG_PACKET call->iovqc -= #endif /* RXDEBUG_PACKET */ rxi_FreePackets(0, &call->iovq); } if (call->mode != RX_MODE_SENDING) { if ((conn->type == RX_SERVER_CONNECTION) && (call->mode == RX_MODE_RECEIVING)) { call->mode = RX_MODE_SENDING; if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif rxi_FreePacket(cp); cp = call->currentPacket = (struct rx_packet *)0; call->nLeft = 0; call->nFree = 0; } } else { return 0; } } /* Set up the iovec to point to data in packet buffers. */ tnFree = call->nFree; tcurvec = call->curvec; tcurpos = call->curpos; tcurlen = call->curlen; do { int t; if (tnFree == 0) { /* current packet is full, allocate a new one */ MUTEX_ENTER(&call->lock); cp = rxi_AllocSendPacket(call, nbytes); MUTEX_EXIT(&call->lock); if (cp == NULL) { /* out of space, return what we have */ *nio = nextio; return requestCount - nbytes; } #ifdef RX_TRACK_PACKETS cp->flags |= RX_PKTFLAG_IOVQ; #endif queue_Append(&call->iovq, cp); #ifdef RXDEBUG_PACKET call->iovqc++; #endif /* RXDEBUG_PACKET */ tnFree = cp->length; tcurvec = 1; tcurpos = (char *)cp->wirevec[1].iov_base + call->conn->securityHeaderSize; tcurlen = cp->wirevec[1].iov_len - call->conn->securityHeaderSize; } if (tnFree < nbytes) { /* try to extend the current packet */ int len, mud; len = cp->length; mud = rx_MaxUserDataSize(call); if (mud > len) { int want; want = MIN(nbytes - tnFree, mud - len); rxi_AllocDataBuf(cp, want, RX_PACKET_CLASS_SEND_CBUF); if (cp->length > (unsigned)mud) cp->length = mud; tnFree += (cp->length - len); if (cp == call->currentPacket) { call->nFree += (cp->length - len); } } } /* fill in the next entry in the iovec */ t = MIN(tcurlen, nbytes); t = MIN(tnFree, t); iov[nextio].iov_base = tcurpos; iov[nextio].iov_len = t; nbytes -= t; tcurpos += t; tcurlen -= t; tnFree -= t; nextio++; if (!tcurlen) { /* need to get another struct iov */ if (++tcurvec >= cp->niovecs) { /* current packet is full, extend it or move on to next packet */ tnFree = 0; } else { tcurpos = (char *)cp->wirevec[tcurvec].iov_base; tcurlen = cp->wirevec[tcurvec].iov_len; } } } while (nbytes && nextio < maxio); *nio = nextio; return requestCount - nbytes; }
int rxi_WriteProc(struct rx_call *call, char *buf, int nbytes) { struct rx_connection *conn = call->conn; struct rx_packet *cp = call->currentPacket; unsigned int t; int requestCount = nbytes; /* Free any packets from the last call to ReadvProc/WritevProc */ if (queue_IsNotEmpty(&call->iovq)) { #ifdef RXDEBUG_PACKET call->iovqc -= #endif /* RXDEBUG_PACKET */ rxi_FreePackets(0, &call->iovq); } if (call->mode != RX_MODE_SENDING) { if ((conn->type == RX_SERVER_CONNECTION) && (call->mode == RX_MODE_RECEIVING)) { call->mode = RX_MODE_SENDING; if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif rxi_FreePacket(cp); cp = call->currentPacket = (struct rx_packet *)0; call->nLeft = 0; call->nFree = 0; } } else { return 0; } } /* Loop condition is checked at end, so that a write of 0 bytes * will force a packet to be created--specially for the case where * there are 0 bytes on the stream, but we must send a packet * anyway. */ do { if (call->nFree == 0) { MUTEX_ENTER(&call->lock); #ifdef AFS_GLOBAL_RXLOCK_KERNEL rxi_WaitforTQBusy(call); #endif /* AFS_GLOBAL_RXLOCK_KERNEL */ cp = call->currentPacket; if (call->error) call->mode = RX_MODE_ERROR; if (!call->error && cp) { /* Clear the current packet now so that if * we are forced to wait and drop the lock * the packet we are planning on using * cannot be freed. */ #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif call->currentPacket = (struct rx_packet *)0; clock_NewTime(); /* Bogus: need new time package */ /* The 0, below, specifies that it is not the last packet: * there will be others. PrepareSendPacket may * alter the packet length by up to * conn->securityMaxTrailerSize */ hadd32(call->bytesSent, cp->length); rxi_PrepareSendPacket(call, cp, 0); #ifdef RX_TRACK_PACKETS cp->flags |= RX_PKTFLAG_TQ; #endif queue_Append(&call->tq, cp); #ifdef RXDEBUG_PACKET call->tqc++; #endif /* RXDEBUG_PACKET */ cp = (struct rx_packet *)0; if (! (call-> flags & (RX_CALL_FAST_RECOVER | RX_CALL_FAST_RECOVER_WAIT))) { rxi_Start(0, call, 0, 0); } } else if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif rxi_FreePacket(cp); cp = call->currentPacket = (struct rx_packet *)0; } /* Wait for transmit window to open up */ while (!call->error && call->tnext + 1 > call->tfirst + (2 * call->twind)) { clock_NewTime(); call->startWait = clock_Sec(); #ifdef RX_ENABLE_LOCKS CV_WAIT(&call->cv_twind, &call->lock); #else call->flags |= RX_CALL_WAIT_WINDOW_ALLOC; osi_rxSleep(&call->twind); #endif call->startWait = 0; #ifdef RX_ENABLE_LOCKS if (call->error) { call->mode = RX_MODE_ERROR; MUTEX_EXIT(&call->lock); return 0; } #endif /* RX_ENABLE_LOCKS */ } if ((cp = rxi_AllocSendPacket(call, nbytes))) { #ifdef RX_TRACK_PACKETS cp->flags |= RX_PKTFLAG_CP; #endif call->currentPacket = cp; call->nFree = cp->length; call->curvec = 1; /* 0th vec is always header */ /* begin at the beginning [ more or less ], continue * on until the end, then stop. */ call->curpos = (char *)cp->wirevec[1].iov_base + call->conn->securityHeaderSize; call->curlen = cp->wirevec[1].iov_len - call->conn->securityHeaderSize; } if (call->error) { call->mode = RX_MODE_ERROR; if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif rxi_FreePacket(cp); call->currentPacket = NULL; } MUTEX_EXIT(&call->lock); return 0; } MUTEX_EXIT(&call->lock); } if (cp && (int)call->nFree < nbytes) { /* Try to extend the current buffer */ int len, mud; len = cp->length; mud = rx_MaxUserDataSize(call); if (mud > len) { int want; want = MIN(nbytes - (int)call->nFree, mud - len); rxi_AllocDataBuf(cp, want, RX_PACKET_CLASS_SEND_CBUF); if (cp->length > (unsigned)mud) cp->length = mud; call->nFree += (cp->length - len); } } /* If the remaining bytes fit in the buffer, then store them * and return. Don't ship a buffer that's full immediately to * the peer--we don't know if it's the last buffer yet */ if (!cp) { call->nFree = 0; } while (nbytes && call->nFree) { t = MIN((int)call->curlen, nbytes); t = MIN((int)call->nFree, t); memcpy(call->curpos, buf, t); buf += t; nbytes -= t; call->curpos += t; call->curlen -= (u_short)t; call->nFree -= (u_short)t; if (!call->curlen) { /* need to get another struct iov */ if (++call->curvec >= cp->niovecs) { /* current packet is full, extend or send it */ call->nFree = 0; } else { call->curpos = (char *)cp->wirevec[call->curvec].iov_base; call->curlen = cp->wirevec[call->curvec].iov_len; } } } /* while bytes to send and room to send them */ /* might be out of space now */ if (!nbytes) { return requestCount; } else; /* more data to send, so get another packet and keep going */ } while (nbytes); return requestCount - nbytes; }
/* rxi_FillReadVec * * Uses packets in the receive queue to fill in as much of the * current iovec as possible. Does not block if it runs out * of packets to complete the iovec. Return true if an ack packet * was sent, otherwise return false */ int rxi_FillReadVec(struct rx_call *call, afs_uint32 serial) { int didConsume = 0; int didHardAck = 0; unsigned int t; struct rx_packet *rp; struct rx_packet *curp; struct iovec *call_iov; struct iovec *cur_iov = NULL; curp = call->currentPacket; if (curp) { cur_iov = &curp->wirevec[call->curvec]; } call_iov = &call->iov[call->iovNext]; while (!call->error && call->iovNBytes && call->iovNext < call->iovMax) { if (call->nLeft == 0) { /* Get next packet */ if (queue_IsNotEmpty(&call->rq)) { /* Check that next packet available is next in sequence */ rp = queue_First(&call->rq, rx_packet); if (rp->header.seq == call->rnext) { afs_int32 error; struct rx_connection *conn = call->conn; queue_Remove(rp); #ifdef RX_TRACK_PACKETS rp->flags &= ~RX_PKTFLAG_RQ; #endif #ifdef RXDEBUG_PACKET call->rqc--; #endif /* RXDEBUG_PACKET */ /* RXS_CheckPacket called to undo RXS_PreparePacket's * work. It may reduce the length of the packet by up * to conn->maxTrailerSize, to reflect the length of the * data + the header. */ if ((error = RXS_CheckPacket(conn->securityObject, call, rp))) { /* Used to merely shut down the call, but now we * shut down the whole connection since this may * indicate an attempt to hijack it */ MUTEX_EXIT(&call->lock); rxi_ConnectionError(conn, error); MUTEX_ENTER(&conn->conn_data_lock); rp = rxi_SendConnectionAbort(conn, rp, 0, 0); MUTEX_EXIT(&conn->conn_data_lock); rxi_FreePacket(rp); MUTEX_ENTER(&call->lock); return 1; } call->rnext++; curp = call->currentPacket = rp; #ifdef RX_TRACK_PACKETS call->currentPacket->flags |= RX_PKTFLAG_CP; #endif call->curvec = 1; /* 0th vec is always header */ cur_iov = &curp->wirevec[1]; /* begin at the beginning [ more or less ], continue * on until the end, then stop. */ call->curpos = (char *)curp->wirevec[1].iov_base + call->conn->securityHeaderSize; call->curlen = curp->wirevec[1].iov_len - call->conn->securityHeaderSize; /* Notice that this code works correctly if the data * size is 0 (which it may be--no reply arguments from * server, for example). This relies heavily on the * fact that the code below immediately frees the packet * (no yields, etc.). If it didn't, this would be a * problem because a value of zero for call->nLeft * normally means that there is no read packet */ call->nLeft = curp->length; hadd32(call->bytesRcvd, curp->length); /* Send a hard ack for every rxi_HardAckRate+1 packets * consumed. Otherwise schedule an event to send * the hard ack later on. */ call->nHardAcks++; didConsume = 1; continue; } } break; } /* It's possible for call->nLeft to be smaller than any particular * iov_len. Usually, recvmsg doesn't change the iov_len, since it * reflects the size of the buffer. We have to keep track of the * number of bytes read in the length field of the packet struct. On * the final portion of a received packet, it's almost certain that * call->nLeft will be smaller than the final buffer. */ while (call->iovNBytes && call->iovNext < call->iovMax && curp) { t = MIN((int)call->curlen, call->iovNBytes); t = MIN(t, (int)call->nLeft); call_iov->iov_base = call->curpos; call_iov->iov_len = t; call_iov++; call->iovNext++; call->iovNBytes -= t; call->curpos += t; call->curlen -= t; call->nLeft -= t; if (!call->nLeft) { /* out of packet. Get another one. */ #ifdef RX_TRACK_PACKETS curp->flags &= ~RX_PKTFLAG_CP; curp->flags |= RX_PKTFLAG_IOVQ; #endif queue_Append(&call->iovq, curp); #ifdef RXDEBUG_PACKET call->iovqc++; #endif /* RXDEBUG_PACKET */ curp = call->currentPacket = (struct rx_packet *)0; } else if (!call->curlen) { /* need to get another struct iov */ if (++call->curvec >= curp->niovecs) { /* current packet is exhausted, get ready for another */ /* don't worry about curvec and stuff, they get set somewhere else */ #ifdef RX_TRACK_PACKETS curp->flags &= ~RX_PKTFLAG_CP; curp->flags |= RX_PKTFLAG_IOVQ; #endif queue_Append(&call->iovq, curp); #ifdef RXDEBUG_PACKET call->iovqc++; #endif /* RXDEBUG_PACKET */ curp = call->currentPacket = (struct rx_packet *)0; call->nLeft = 0; } else { cur_iov++; call->curpos = (char *)cur_iov->iov_base; call->curlen = cur_iov->iov_len; } } } } /* If we consumed any packets then check whether we need to * send a hard ack. */ if (didConsume && (!(call->flags & RX_CALL_RECEIVE_DONE))) { if (call->nHardAcks > (u_short) rxi_HardAckRate) { rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY); rxi_SendAck(call, 0, serial, RX_ACK_DELAY, 0); didHardAck = 1; } else { struct clock when, now; clock_GetTime(&now); when = now; /* Delay to consolidate ack packets */ clock_Add(&when, &rx_hardAckDelay); if (!call->delayedAckEvent || clock_Gt(&call->delayedAckEvent->eventTime, &when)) { rxevent_Cancel(call->delayedAckEvent, call, RX_CALL_REFCOUNT_DELAY); MUTEX_ENTER(&rx_refcnt_mutex); CALL_HOLD(call, RX_CALL_REFCOUNT_DELAY); MUTEX_EXIT(&rx_refcnt_mutex); call->delayedAckEvent = rxevent_PostNow(&when, &now, rxi_SendDelayedAck, call, 0); } } } return didHardAck; }
/* Flush any buffered data to the stream, switch to read mode * (clients) or to EOF mode (servers) * * LOCKS HELD: called at netpri. */ void rxi_FlushWrite(struct rx_call *call) { struct rx_packet *cp = NULL; /* Free any packets from the last call to ReadvProc/WritevProc */ if (queue_IsNotEmpty(&call->iovq)) { #ifdef RXDEBUG_PACKET call->iovqc -= #endif /* RXDEBUG_PACKET */ rxi_FreePackets(0, &call->iovq); } if (call->mode == RX_MODE_SENDING) { call->mode = (call->conn->type == RX_CLIENT_CONNECTION ? RX_MODE_RECEIVING : RX_MODE_EOF); #ifdef RX_KERNEL_TRACE { int glockOwner = ISAFS_GLOCK(); if (!glockOwner) AFS_GLOCK(); afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, call); if (!glockOwner) AFS_GUNLOCK(); } #endif MUTEX_ENTER(&call->lock); #ifdef AFS_GLOBAL_RXLOCK_KERNEL rxi_WaitforTQBusy(call); #endif /* AFS_GLOBAL_RXLOCK_KERNEL */ if (call->error) call->mode = RX_MODE_ERROR; cp = call->currentPacket; if (cp) { /* cp->length is only supposed to be the user's data */ /* cp->length was already set to (then-current) * MaxUserDataSize or less. */ #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif cp->length -= call->nFree; call->currentPacket = (struct rx_packet *)0; call->nFree = 0; } else { cp = rxi_AllocSendPacket(call, 0); if (!cp) { /* Mode can no longer be MODE_SENDING */ return; } cp->length = 0; cp->niovecs = 2; /* header + space for rxkad stuff */ call->nFree = 0; } /* The 1 specifies that this is the last packet */ hadd32(call->bytesSent, cp->length); rxi_PrepareSendPacket(call, cp, 1); #ifdef RX_TRACK_PACKETS cp->flags |= RX_PKTFLAG_TQ; #endif queue_Append(&call->tq, cp); #ifdef RXDEBUG_PACKET call->tqc++; #endif /* RXDEBUG_PACKET */ if (! (call-> flags & (RX_CALL_FAST_RECOVER | RX_CALL_FAST_RECOVER_WAIT))) { rxi_Start(0, call, 0, 0); } MUTEX_EXIT(&call->lock); } }
/* rxi_WritevProc -- internal version. * * Send buffers allocated in rxi_WritevAlloc. * * LOCKS USED -- called at netpri. */ int rxi_WritevProc(struct rx_call *call, struct iovec *iov, int nio, int nbytes) { struct rx_packet *cp = NULL; #ifdef RX_TRACK_PACKETS struct rx_packet *p, *np; #endif int nextio; int requestCount; struct rx_queue tmpq; #ifdef RXDEBUG_PACKET u_short tmpqc; #endif requestCount = nbytes; nextio = 0; MUTEX_ENTER(&call->lock); if (call->error) { call->mode = RX_MODE_ERROR; } else if (call->mode != RX_MODE_SENDING) { call->error = RX_PROTOCOL_ERROR; } #ifdef AFS_GLOBAL_RXLOCK_KERNEL rxi_WaitforTQBusy(call); #endif /* AFS_GLOBAL_RXLOCK_KERNEL */ cp = call->currentPacket; if (call->error) { call->mode = RX_MODE_ERROR; MUTEX_EXIT(&call->lock); if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; cp->flags |= RX_PKTFLAG_IOVQ; #endif queue_Prepend(&call->iovq, cp); #ifdef RXDEBUG_PACKET call->iovqc++; #endif /* RXDEBUG_PACKET */ call->currentPacket = (struct rx_packet *)0; } #ifdef RXDEBUG_PACKET call->iovqc -= #endif /* RXDEBUG_PACKET */ rxi_FreePackets(0, &call->iovq); return 0; } /* Loop through the I/O vector adjusting packet pointers. * Place full packets back onto the iovq once they are ready * to send. Set RX_PROTOCOL_ERROR if any problems are found in * the iovec. We put the loop condition at the end to ensure that * a zero length write will push a short packet. */ nextio = 0; queue_Init(&tmpq); #ifdef RXDEBUG_PACKET tmpqc = 0; #endif /* RXDEBUG_PACKET */ do { if (call->nFree == 0 && cp) { clock_NewTime(); /* Bogus: need new time package */ /* The 0, below, specifies that it is not the last packet: * there will be others. PrepareSendPacket may * alter the packet length by up to * conn->securityMaxTrailerSize */ hadd32(call->bytesSent, cp->length); rxi_PrepareSendPacket(call, cp, 0); queue_Append(&tmpq, cp); #ifdef RXDEBUG_PACKET tmpqc++; #endif /* RXDEBUG_PACKET */ cp = call->currentPacket = (struct rx_packet *)0; /* The head of the iovq is now the current packet */ if (nbytes) { if (queue_IsEmpty(&call->iovq)) { MUTEX_EXIT(&call->lock); call->error = RX_PROTOCOL_ERROR; #ifdef RXDEBUG_PACKET tmpqc -= #endif /* RXDEBUG_PACKET */ rxi_FreePackets(0, &tmpq); return 0; } cp = queue_First(&call->iovq, rx_packet); queue_Remove(cp); #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_IOVQ; #endif #ifdef RXDEBUG_PACKET call->iovqc--; #endif /* RXDEBUG_PACKET */ #ifdef RX_TRACK_PACKETS cp->flags |= RX_PKTFLAG_CP; #endif call->currentPacket = cp; call->nFree = cp->length; call->curvec = 1; call->curpos = (char *)cp->wirevec[1].iov_base + call->conn->securityHeaderSize; call->curlen = cp->wirevec[1].iov_len - call->conn->securityHeaderSize; } } if (nbytes) { /* The next iovec should point to the current position */ if (iov[nextio].iov_base != call->curpos || iov[nextio].iov_len > (int)call->curlen) { call->error = RX_PROTOCOL_ERROR; MUTEX_EXIT(&call->lock); if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif queue_Prepend(&tmpq, cp); #ifdef RXDEBUG_PACKET tmpqc++; #endif /* RXDEBUG_PACKET */ cp = call->currentPacket = (struct rx_packet *)0; } #ifdef RXDEBUG_PACKET tmpqc -= #endif /* RXDEBUG_PACKET */ rxi_FreePackets(0, &tmpq); return 0; } nbytes -= iov[nextio].iov_len; call->curpos += iov[nextio].iov_len; call->curlen -= iov[nextio].iov_len; call->nFree -= iov[nextio].iov_len; nextio++; if (call->curlen == 0) { if (++call->curvec > cp->niovecs) { call->nFree = 0; } else { call->curpos = (char *)cp->wirevec[call->curvec].iov_base; call->curlen = cp->wirevec[call->curvec].iov_len; } } } } while (nbytes && nextio < nio); /* Move the packets from the temporary queue onto the transmit queue. * We may end up with more than call->twind packets on the queue. */ #ifdef RX_TRACK_PACKETS for (queue_Scan(&tmpq, p, np, rx_packet)) { p->flags |= RX_PKTFLAG_TQ; } #endif if (call->error) call->mode = RX_MODE_ERROR; queue_SpliceAppend(&call->tq, &tmpq); if (!(call->flags & (RX_CALL_FAST_RECOVER | RX_CALL_FAST_RECOVER_WAIT))) { rxi_Start(0, call, 0, 0); } /* Wait for the length of the transmit queue to fall below call->twind */ while (!call->error && call->tnext + 1 > call->tfirst + (2 * call->twind)) { clock_NewTime(); call->startWait = clock_Sec(); #ifdef RX_ENABLE_LOCKS CV_WAIT(&call->cv_twind, &call->lock); #else call->flags |= RX_CALL_WAIT_WINDOW_ALLOC; osi_rxSleep(&call->twind); #endif call->startWait = 0; } /* cp is no longer valid since we may have given up the lock */ cp = call->currentPacket; if (call->error) { call->mode = RX_MODE_ERROR; call->currentPacket = NULL; MUTEX_EXIT(&call->lock); if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif rxi_FreePacket(cp); } return 0; } MUTEX_EXIT(&call->lock); return requestCount - nbytes; }