/** * lookup a vg cache entry given any member volume id. * * @param[in] dp disk partition object * @param[in] volid vg member volume id * @param[out] entry_out address in which to store volume group entry structure pointer * @param[out] hash_out address in which to store hash entry pointer * * @pre VOL_LOCK held * * @warning - it is up to the caller to get a ref to entry_out, if needed * - hash_out must not be referenced after dropping VOL_LOCK * * @return operation status * @retval 0 success * @retval ENOENT volume id not found * @retval EINVAL partition's VGC is invalid * * @internal */ static int _VVGC_lookup(struct DiskPartition64 * dp, VolumeId volid, VVGCache_entry_t ** entry_out, VVGCache_hash_entry_t ** hash_out) { int code = ENOENT; int bucket = VVGC_HASH(volid); struct VVGCache_hash_entry * ent, * nent; if (VVGCache.part[dp->index].state == VVGC_PART_STATE_INVALID) { return EINVAL; } *entry_out = NULL; for (queue_Scan(&VVGCache_hash_table.hash_buckets[bucket], ent, nent, VVGCache_hash_entry)) { if (ent->volid == volid && ent->dp == dp) { code = 0; *entry_out = ent->entry; if (hash_out) { *hash_out = ent; } break; } } return code; }
/** * wakeup all threads waiting in dequeue. * * @param[in] list list object * * @return operation status * @retval 0 success * * @internal */ static int _afs_wq_node_list_shutdown(struct afs_work_queue_node_list * list) { int ret = 0; struct afs_work_queue_node *node, *nnode; MUTEX_ENTER(&list->lock); list->shutdown = 1; for (queue_Scan(&list->list, node, nnode, afs_work_queue_node)) { _afs_wq_node_state_change(node, AFS_WQ_NODE_STATE_ERROR); queue_Remove(node); node->qidx = AFS_WQ_NODE_LIST_NONE; node->queue = NULL; if (node->detached) { /* if we are detached, we hold the reference on the node; * otherwise, it is some other caller that holds the reference. * So don't put the node if we are not detached; the node will * get freed when someone else calls afs_wq_node_put */ afs_wq_node_put(node); } } CV_BROADCAST(&list->cv); MUTEX_EXIT(&list->lock); return ret; }
/** * flush all cache entries for a given disk partition. * * @param[in] part disk partition object * * @pre VOL_LOCK held * * @return operation status * @retval 0 success * * @internal */ int _VVGC_flush_part_r(struct DiskPartition64 * part) { int code = 0, res; int i; VVGCache_hash_entry_t * ent, * nent; for (i = 0; i < VolumeHashTable.Size; i++) { for (queue_Scan(&VVGCache_hash_table.hash_buckets[i], ent, nent, VVGCache_hash_entry)) { if (ent->dp == part) { VolumeId volid = ent->volid; res = _VVGC_hash_entry_del(ent); if (res) { ViceLog(0, ("_VVGC_flush_part_r: error %d deleting hash entry for %lu\n", res, afs_printable_uint32_lu(volid))); code = res; } } } } return code; }
static void cleanup_pthread_cache(void) { thread_p cur = NULL, next = NULL; if (pthread_cache_done) { for(queue_Scan(&active_Q, cur, next, thread)) { queue_Remove(cur); } for(queue_Scan(&cache_Q, cur, next, thread)) { queue_Remove(cur); } pthread_mutex_destroy(&active_Q_mutex); pthread_mutex_destroy(&cache_Q_mutex); pthread_cache_done = 0; } }
/** * remove a dependency from a work node. * * @param[in] child node which was dependent upon completion of parent * @param[in] parent node whose completion gated child's execution * * @return operation status * @retval 0 success */ int afs_wq_node_dep_del(struct afs_work_queue_node * child, struct afs_work_queue_node * parent) { int code, ret = 0; struct afs_work_queue_dep_node * dep, * ndep; struct afs_work_queue_node_multilock ml; int held = 0; memset(&ml, 0, sizeof(ml)); ml.nodes[0].node = parent; ml.nodes[1].node = child; code = _afs_wq_node_multilock(&ml); if (code) { goto error; } held = 1; /* only permit changes while child is in init state * or running state (e.g. do a dep del when in callback func) */ if ((child->state != AFS_WQ_NODE_STATE_INIT) && (child->state != AFS_WQ_NODE_STATE_RUNNING)) { ret = AFS_WQ_ERROR; goto error; } /* locate node linking parent and child */ for (queue_Scan(&parent->dep_children, dep, ndep, afs_work_queue_dep_node)) { if ((dep->child == child) && (dep->parent == parent)) { /* no need to grab an extra ref on dep->child here; the caller * should already have a ref on dep->child */ code = _afs_wq_dep_unlink_r(dep); if (code) { ret = code; goto error; } code = _afs_wq_dep_free(dep); if (code) { ret = code; goto error; } break; } } error: if (held) { MUTEX_EXIT(&child->lock); MUTEX_EXIT(&parent->lock); } return ret; }
/** * unlink and free all of the dependency nodes from a node. * * @param[in] parent work node that is the parent node of all deps to be freed * * @return operation status * @retval 0 success * * @pre parent->refcount == 0 */ static int _afs_wq_node_free_deps(struct afs_work_queue_node *parent) { int ret = 0, code; struct afs_work_queue_node *node_unlock = NULL, *node_put = NULL; struct afs_work_queue_dep_node * dep, * nd; /* unlink and free all of the dep structs attached to 'parent' */ for (queue_Scan(&parent->dep_children, dep, nd, afs_work_queue_dep_node)) { MUTEX_ENTER(&dep->child->lock); node_unlock = dep->child; /* We need to get a ref on child here, since _afs_wq_dep_unlink_r may * put the last ref on the child, and we need the child to still exist * so we can unlock it */ code = _afs_wq_node_get_r(dep->child); if (code) { goto loop_error; } node_put = dep->child; /* remember, no need to lock dep->parent, since its refcount is 0 */ code = _afs_wq_dep_unlink_r(dep); loop_error: if (node_put) { _afs_wq_node_put_r(node_put, 1); } else if (node_unlock) { MUTEX_EXIT(&node_unlock->lock); } node_put = node_unlock = NULL; if (code == 0) { /* Only do this if everything is okay; if code is nonzero, * something will still be pointing at dep, so don't free it. * We will leak memory, but that's better than memory corruption; * we've done all we can do to try and free the dep memory */ code = _afs_wq_dep_free(dep); } if (!ret) { ret = code; } } return ret; }
/** * delete all of the entries in the dlist from the VGC. * * Traverses the to-delete list for the specified partition, and deletes * the specified entries from the global VGC. Also deletes the entries from * the dlist itself as it goes along. * * @param[in] dp the partition whose dlist we are flushing */ static void _VVGC_flush_dlist(struct DiskPartition64 *dp) { int i; VVGCache_dlist_entry_t *ent, *nent; for (i = 0; i < VolumeHashTable.Size; i++) { for (queue_Scan(&VVGCache.part[dp->index].dlist_hash_buckets[i], ent, nent, VVGCache_dlist_entry)) { _VVGC_entry_purge_r(dp, ent->parent, ent->child); queue_Remove(ent); free(ent); } } }
/** * shut down all threads in pool. * * @param[in] pool thread pool object * @param[in] block wait for all threads to terminate, if asserted * * @return operation status * @retval 0 success */ int afs_tp_shutdown(struct afs_thread_pool * pool, int block) { int ret = 0; struct afs_thread_pool_worker * worker, *nn; MUTEX_ENTER(&pool->lock); if (pool->state == AFS_TP_STATE_STOPPED || pool->state == AFS_TP_STATE_STOPPING) { goto done_stopped; } if (pool->state != AFS_TP_STATE_RUNNING) { ret = AFS_TP_ERROR; goto done_sync; } pool->state = AFS_TP_STATE_STOPPING; for (queue_Scan(&pool->thread_list, worker, nn, afs_thread_pool_worker)) { worker->req_shutdown = 1; } if (!pool->nthreads) { pool->state = AFS_TP_STATE_STOPPED; } /* need to drop lock to get a membar here */ MUTEX_EXIT(&pool->lock); ret = afs_wq_shutdown(pool->work_queue); if (ret) { goto error; } MUTEX_ENTER(&pool->lock); done_stopped: if (block) { while (pool->nthreads) { CV_WAIT(&pool->shutdown_cv, &pool->lock); } } done_sync: MUTEX_EXIT(&pool->lock); error: return ret; }
/** * looks up an entry on the to-delete list, if it exists. * * @param[in] dp the partition whose dlist we are looking at * @param[in] parent the parent volume ID we're looking for * @param[in] child the child volume ID we're looking for * * @return a pointer to the entry in the dlist for that entry * @retval NULL the requested entry does not exist in the dlist */ static VVGCache_dlist_entry_t * _VVGC_dlist_lookup_r(struct DiskPartition64 *dp, VolumeId parent, VolumeId child) { int bucket = VVGC_HASH(child); VVGCache_dlist_entry_t *ent, *nent; for (queue_Scan(&VVGCache.part[dp->index].dlist_hash_buckets[bucket], ent, nent, VVGCache_dlist_entry)) { if (ent->child == child && ent->parent == parent) { return ent; } } return NULL; }
/** * look through log_watch_queue, and if any processes are not still * running, hand them off to the SalvageLogCleanupThread * * @param log_watch_queue a queue of PIDs that we should clean up if * that PID has died */ static void ScanLogs(struct rx_queue *log_watch_queue) { struct log_cleanup_node *cleanup, *next; assert(pthread_mutex_lock(&worker_lock) == 0); for (queue_Scan(log_watch_queue, cleanup, next, log_cleanup_node)) { /* if a process is still running, assume it's the salvage process * still going, and keep waiting for it */ if (kill(cleanup->pid, 0) < 0 && errno == ESRCH) { queue_Remove(cleanup); queue_Append(&log_cleanup_queue, cleanup); assert(pthread_cond_signal(&log_cleanup_queue.queue_change_cv) == 0); } } assert(pthread_mutex_unlock(&worker_lock) == 0); }
/** * look through log_watch_queue, and if any processes are not still * running, hand them off to the SalvageLogCleanupThread * * @param log_watch_queue a queue of PIDs that we should clean up if * that PID has died */ static void ScanLogs(struct rx_queue *log_watch_queue) { struct log_cleanup_node *cleanup, *next; MUTEX_ENTER(&worker_lock); for (queue_Scan(log_watch_queue, cleanup, next, log_cleanup_node)) { /* if a process is still running, assume it's the salvage process * still going, and keep waiting for it */ if (kill(cleanup->pid, 0) < 0 && errno == ESRCH) { queue_Remove(cleanup); queue_Append(&log_cleanup_queue, cleanup); CV_SIGNAL(&log_cleanup_queue.queue_change_cv); } } MUTEX_EXIT(&worker_lock); }
/** * propagate state down through dep nodes. * * @param[in] parent parent node object * @param[in] next_state next state parent will assume * * @return operation status * @retval 0 success * * @pre * - parent->lock held * * @internal */ static int _afs_wq_dep_propagate(struct afs_work_queue_node * parent, afs_wq_work_state_t next_state) { int ret = 0; struct afs_work_queue_dep_node * dep, * nd; struct afs_work_queue_node_multilock ml; afs_wq_work_state_t old_state; afs_wq_node_list_id_t qidx; struct afs_work_queue_node_list * ql; afs_wq_work_state_t cns; old_state = _afs_wq_node_state_change(parent, AFS_WQ_NODE_STATE_BUSY); ml.nodes[0].node = parent; ml.nodes[0].lock_held = 1; ml.nodes[0].busy_held = 1; /* scan through our children updating scheduling state */ for (queue_Scan(&parent->dep_children, dep, nd, afs_work_queue_dep_node)) { /* skip half-registered nodes */ if (dep->child == NULL) { continue; } ml.nodes[1].node = dep->child; ml.nodes[1].lock_held = 0; ml.nodes[1].busy_held = 0; ret = _afs_wq_node_multilock(&ml); if (ret) { goto error; } switch (next_state) { case AFS_WQ_NODE_STATE_DONE: dep->child->block_count--; break; case AFS_WQ_NODE_STATE_ERROR: dep->child->error_count++; break; default: (void)0; /* nop */ } /* skip unscheduled nodes */ if (dep->child->queue == NULL) { MUTEX_EXIT(&dep->child->lock); continue; } /* * when blocked dep and error'd dep counts reach zero, the * node can be scheduled for execution */ if (dep->child->error_count) { ql = &dep->child->queue->done_list; qidx = AFS_WQ_NODE_LIST_DONE; cns = AFS_WQ_NODE_STATE_ERROR; } else if (dep->child->block_count) { ql = &dep->child->queue->blocked_list; qidx = AFS_WQ_NODE_LIST_BLOCKED; cns = AFS_WQ_NODE_STATE_BLOCKED; } else { ql = &dep->child->queue->ready_list; qidx = AFS_WQ_NODE_LIST_READY; cns = AFS_WQ_NODE_STATE_SCHEDULED; } if (qidx != dep->child->qidx) { /* we're transitioning to a different queue */ ret = _afs_wq_node_list_remove(dep->child, AFS_WQ_NODE_STATE_BUSY); if (ret) { MUTEX_EXIT(&dep->child->lock); goto error; } ret = _afs_wq_node_list_enqueue(ql, dep->child, cns); if (ret) { MUTEX_EXIT(&dep->child->lock); goto error; } } MUTEX_EXIT(&dep->child->lock); } error: _afs_wq_node_state_change(parent, old_state); return ret; }
/* rxi_WritevProc -- internal version. * * Send buffers allocated in rxi_WritevAlloc. * * LOCKS USED -- called at netpri. */ int rxi_WritevProc(struct rx_call *call, struct iovec *iov, int nio, int nbytes) { struct rx_packet *cp = NULL; #ifdef RX_TRACK_PACKETS struct rx_packet *p, *np; #endif int nextio; int requestCount; struct rx_queue tmpq; #ifdef RXDEBUG_PACKET u_short tmpqc; #endif requestCount = nbytes; nextio = 0; MUTEX_ENTER(&call->lock); if (call->error) { call->mode = RX_MODE_ERROR; } else if (call->mode != RX_MODE_SENDING) { call->error = RX_PROTOCOL_ERROR; } #ifdef AFS_GLOBAL_RXLOCK_KERNEL rxi_WaitforTQBusy(call); #endif /* AFS_GLOBAL_RXLOCK_KERNEL */ cp = call->currentPacket; if (call->error) { call->mode = RX_MODE_ERROR; MUTEX_EXIT(&call->lock); if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; cp->flags |= RX_PKTFLAG_IOVQ; #endif queue_Prepend(&call->iovq, cp); #ifdef RXDEBUG_PACKET call->iovqc++; #endif /* RXDEBUG_PACKET */ call->currentPacket = (struct rx_packet *)0; } #ifdef RXDEBUG_PACKET call->iovqc -= #endif /* RXDEBUG_PACKET */ rxi_FreePackets(0, &call->iovq); return 0; } /* Loop through the I/O vector adjusting packet pointers. * Place full packets back onto the iovq once they are ready * to send. Set RX_PROTOCOL_ERROR if any problems are found in * the iovec. We put the loop condition at the end to ensure that * a zero length write will push a short packet. */ nextio = 0; queue_Init(&tmpq); #ifdef RXDEBUG_PACKET tmpqc = 0; #endif /* RXDEBUG_PACKET */ do { if (call->nFree == 0 && cp) { clock_NewTime(); /* Bogus: need new time package */ /* The 0, below, specifies that it is not the last packet: * there will be others. PrepareSendPacket may * alter the packet length by up to * conn->securityMaxTrailerSize */ hadd32(call->bytesSent, cp->length); rxi_PrepareSendPacket(call, cp, 0); queue_Append(&tmpq, cp); #ifdef RXDEBUG_PACKET tmpqc++; #endif /* RXDEBUG_PACKET */ cp = call->currentPacket = (struct rx_packet *)0; /* The head of the iovq is now the current packet */ if (nbytes) { if (queue_IsEmpty(&call->iovq)) { MUTEX_EXIT(&call->lock); call->error = RX_PROTOCOL_ERROR; #ifdef RXDEBUG_PACKET tmpqc -= #endif /* RXDEBUG_PACKET */ rxi_FreePackets(0, &tmpq); return 0; } cp = queue_First(&call->iovq, rx_packet); queue_Remove(cp); #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_IOVQ; #endif #ifdef RXDEBUG_PACKET call->iovqc--; #endif /* RXDEBUG_PACKET */ #ifdef RX_TRACK_PACKETS cp->flags |= RX_PKTFLAG_CP; #endif call->currentPacket = cp; call->nFree = cp->length; call->curvec = 1; call->curpos = (char *)cp->wirevec[1].iov_base + call->conn->securityHeaderSize; call->curlen = cp->wirevec[1].iov_len - call->conn->securityHeaderSize; } } if (nbytes) { /* The next iovec should point to the current position */ if (iov[nextio].iov_base != call->curpos || iov[nextio].iov_len > (int)call->curlen) { call->error = RX_PROTOCOL_ERROR; MUTEX_EXIT(&call->lock); if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif queue_Prepend(&tmpq, cp); #ifdef RXDEBUG_PACKET tmpqc++; #endif /* RXDEBUG_PACKET */ cp = call->currentPacket = (struct rx_packet *)0; } #ifdef RXDEBUG_PACKET tmpqc -= #endif /* RXDEBUG_PACKET */ rxi_FreePackets(0, &tmpq); return 0; } nbytes -= iov[nextio].iov_len; call->curpos += iov[nextio].iov_len; call->curlen -= iov[nextio].iov_len; call->nFree -= iov[nextio].iov_len; nextio++; if (call->curlen == 0) { if (++call->curvec > cp->niovecs) { call->nFree = 0; } else { call->curpos = (char *)cp->wirevec[call->curvec].iov_base; call->curlen = cp->wirevec[call->curvec].iov_len; } } } } while (nbytes && nextio < nio); /* Move the packets from the temporary queue onto the transmit queue. * We may end up with more than call->twind packets on the queue. */ #ifdef RX_TRACK_PACKETS for (queue_Scan(&tmpq, p, np, rx_packet)) { p->flags |= RX_PKTFLAG_TQ; } #endif if (call->error) call->mode = RX_MODE_ERROR; queue_SpliceAppend(&call->tq, &tmpq); if (!(call->flags & (RX_CALL_FAST_RECOVER | RX_CALL_FAST_RECOVER_WAIT))) { rxi_Start(0, call, 0, 0); } /* Wait for the length of the transmit queue to fall below call->twind */ while (!call->error && call->tnext + 1 > call->tfirst + (2 * call->twind)) { clock_NewTime(); call->startWait = clock_Sec(); #ifdef RX_ENABLE_LOCKS CV_WAIT(&call->cv_twind, &call->lock); #else call->flags |= RX_CALL_WAIT_WINDOW_ALLOC; osi_rxSleep(&call->twind); #endif call->startWait = 0; } /* cp is no longer valid since we may have given up the lock */ cp = call->currentPacket; if (call->error) { call->mode = RX_MODE_ERROR; call->currentPacket = NULL; MUTEX_EXIT(&call->lock); if (cp) { #ifdef RX_TRACK_PACKETS cp->flags &= ~RX_PKTFLAG_CP; #endif rxi_FreePacket(cp); } return 0; } MUTEX_EXIT(&call->lock); return requestCount - nbytes; }