Exemplo n.º 1
0
static void *
SalvageChildReaperThread(void * args)
{
    int slot, pid, status;
    struct log_cleanup_node * cleanup;

    MUTEX_ENTER(&worker_lock);

    /* loop reaping our children */
    while (1) {
	/* wait() won't block unless we have children, so
	 * block on the cond var if we're childless */
	while (current_workers == 0) {
	    CV_WAIT(&worker_cv, &worker_lock);
	}

	MUTEX_EXIT(&worker_lock);

	cleanup = (struct log_cleanup_node *) malloc(sizeof(struct log_cleanup_node));

	while (Reap_Child("salvageserver", &pid, &status) < 0) {
	    /* try to prevent livelock if something goes wrong */
	    sleep(1);
	}

	VOL_LOCK;
	for (slot = 0; slot < Parallel; slot++) {
	    if (child_slot[slot] == pid)
		break;
	}
	osi_Assert(slot < Parallel);
	child_slot[slot] = 0;
	VOL_UNLOCK;

	SALVSYNC_doneWorkByPid(pid, status);

	MUTEX_ENTER(&worker_lock);

	if (cleanup) {
	    cleanup->pid = pid;
	    queue_Append(&log_cleanup_queue, cleanup);
	    CV_SIGNAL(&log_cleanup_queue.queue_change_cv);
	}

	/* ok, we've reaped a child */
	current_workers--;
	CV_BROADCAST(&worker_cv);
    }

    return NULL;
}
Exemplo n.º 2
0
/**
 * append to a node list object.
 *
 * @param[in] list  list object
 * @param[in] node  node object
 * @param[in] state new node state
 *
 * @return operation status
 *    @retval 0 success
 *    @retval AFS_WQ_ERROR raced to enqueue node
 *
 * @pre
 *   - node lock held
 *   - node is not on a list
 *   - node is either not busy, or it is marked as busy by the calling thread
 *
 * @post
 *   - enqueued on list
 *   - node lock dropped
 *
 * @internal
 */
static int
_afs_wq_node_list_enqueue(struct afs_work_queue_node_list * list,
			  struct afs_work_queue_node * node,
			  afs_wq_work_state_t state)
{
    int code, ret = 0;

    if (node->qidx != AFS_WQ_NODE_LIST_NONE) {
	/* raced */
	ret = AFS_WQ_ERROR;
	goto error;
    }

    /* deal with lock inversion */
    code = MUTEX_TRYENTER(&list->lock);
    if (!code) {
	/* contended */
	_afs_wq_node_state_change(node, AFS_WQ_NODE_STATE_BUSY);
	MUTEX_EXIT(&node->lock);
	MUTEX_ENTER(&list->lock);
	MUTEX_ENTER(&node->lock);

	/* assert state of the world (we set busy, so this should never happen) */
	osi_Assert(queue_IsNotOnQueue(node));
    }

    if (list->shutdown) {
	ret = AFS_WQ_ERROR;
	goto error_unlock;
    }

    osi_Assert(node->qidx == AFS_WQ_NODE_LIST_NONE);
    if (queue_IsEmpty(&list->list)) {
	/* wakeup a dequeue thread */
	CV_SIGNAL(&list->cv);
    }
    queue_Append(&list->list, node);
    node->qidx = list->qidx;
    _afs_wq_node_state_change(node, state);

 error_unlock:
    MUTEX_EXIT(&node->lock);
    MUTEX_EXIT(&list->lock);

 error:
    return ret;
}
Exemplo n.º 3
0
/* Called by Rx when the first reply packet of a call is received, or the call is aborted. */
void
multi_Ready(struct rx_call *call, void *amh,
	    int index)
{
    struct multi_handle *mh = (struct multi_handle *)amh;
#ifdef RX_ENABLE_LOCKS
    MUTEX_ENTER(&mh->lock);
#endif /* RX_ENABLE_LOCKS */
    *mh->firstNotReady++ = index;
    mh->nReady++;
#ifdef RX_ENABLE_LOCKS
    CV_SIGNAL(&mh->cv);
    MUTEX_EXIT(&mh->lock);
#else /* RX_ENABLE_LOCKS */
    osi_rxWakeup(mh);
#endif /* RX_ENABLE_LOCKS */
}
Exemplo n.º 4
0
/**
 * look through log_watch_queue, and if any processes are not still
 * running, hand them off to the SalvageLogCleanupThread
 *
 * @param log_watch_queue  a queue of PIDs that we should clean up if
 * that PID has died
 */
static void
ScanLogs(struct rx_queue *log_watch_queue)
{
    struct log_cleanup_node *cleanup, *next;

    MUTEX_ENTER(&worker_lock);

    for (queue_Scan(log_watch_queue, cleanup, next, log_cleanup_node)) {
	/* if a process is still running, assume it's the salvage process
	 * still going, and keep waiting for it */
	if (kill(cleanup->pid, 0) < 0 && errno == ESRCH) {
	    queue_Remove(cleanup);
	    queue_Append(&log_cleanup_queue, cleanup);
	    CV_SIGNAL(&log_cleanup_queue.queue_change_cv);
	}
    }

    MUTEX_EXIT(&worker_lock);
}
Exemplo n.º 5
0
/**
 * execute a node on the queue.
 *
 * @param[in] queue  work queue
 * @param[in] rock   opaque pointer (passed as third arg to callback func)
 * @param[in] block  allow blocking in dequeue
 *
 * @return operation status
 *    @retval 0 completed a work unit
 *
 * @internal
 */
static int
_afs_wq_do(struct afs_work_queue * queue,
	   void * rock,
	   int block)
{
    int code, ret = 0;
    struct afs_work_queue_node * node;
    afs_wq_callback_func_t * cbf;
    afs_wq_work_state_t next_state;
    struct afs_work_queue_node_list * ql;
    void * node_rock;
    int detached = 0;

    /* We can inc queue->running_count before actually pulling the node off
     * of the ready_list, since running_count only really matters when we are
     * shut down. If we get shut down before we pull the node off of
     * ready_list, but after we inc'd running_count,
     * _afs_wq_node_list_dequeue should return immediately with EINTR,
     * in which case we'll dec running_count, so it's as if we never inc'd it
     * in the first place. */
    MUTEX_ENTER(&queue->lock);
    if (queue->shutdown) {
	MUTEX_EXIT(&queue->lock);
	return EINTR;
    }
    queue->running_count++;
    MUTEX_EXIT(&queue->lock);

    ret = _afs_wq_node_list_dequeue(&queue->ready_list,
					 &node,
					 AFS_WQ_NODE_STATE_RUNNING,
					 block);
    if (ret) {
	_afs_wq_dec_running_count(queue);
	goto error;
    }

    cbf = node->cbf;
    node_rock = node->rock;
    detached = node->detached;

    if (cbf != NULL) {
	MUTEX_EXIT(&node->lock);
	code = (*cbf)(queue, node, queue->rock, node_rock, rock);
	MUTEX_ENTER(&node->lock);
	if (code == 0) {
	    next_state = AFS_WQ_NODE_STATE_DONE;
	    ql = &queue->done_list;
	} else if (code == AFS_WQ_ERROR_RESCHEDULE) {
	    if (node->error_count) {
		next_state = AFS_WQ_NODE_STATE_ERROR;
		ql = &queue->done_list;
	    } else if (node->block_count) {
		next_state = AFS_WQ_NODE_STATE_BLOCKED;
		ql = &queue->blocked_list;
	    } else {
		next_state = AFS_WQ_NODE_STATE_SCHEDULED;
		ql = &queue->ready_list;
	    }
	} else {
	    next_state = AFS_WQ_NODE_STATE_ERROR;
	    ql = &queue->done_list;
	}
    } else {
	next_state = AFS_WQ_NODE_STATE_DONE;
	code = 0;
	ql = &queue->done_list;
    }

    _afs_wq_dec_running_count(queue);

    node->retcode = code;

    if ((next_state == AFS_WQ_NODE_STATE_DONE) ||
        (next_state == AFS_WQ_NODE_STATE_ERROR)) {

	MUTEX_ENTER(&queue->lock);

	if (queue->drain && queue->pend_count == queue->opts.pend_lothresh) {
	    /* signal other threads if we're about to below the low
	     * pending-tasks threshold */
	    queue->drain = 0;
	    CV_SIGNAL(&queue->pend_cv);
	}

	if (queue->pend_count == 1) {
	    /* signal other threads if we're about to become 'empty' */
	    CV_BROADCAST(&queue->empty_cv);
	}

	queue->pend_count--;

	MUTEX_EXIT(&queue->lock);
    }

    ret = _afs_wq_node_state_wait_busy(node);
    if (ret) {
	goto error;
    }

    /* propagate scheduling changes down through dependencies */
    ret = _afs_wq_dep_propagate(node, next_state);
    if (ret) {
	goto error;
    }

    ret = _afs_wq_node_state_wait_busy(node);
    if (ret) {
	goto error;
    }

    if (detached &&
	((next_state == AFS_WQ_NODE_STATE_DONE) ||
	 (next_state == AFS_WQ_NODE_STATE_ERROR))) {
	_afs_wq_node_state_change(node, next_state);
	_afs_wq_node_put_r(node, 1);
    } else {
	ret = _afs_wq_node_list_enqueue(ql,
					     node,
					     next_state);
    }

 error:
    return ret;
}
Exemplo n.º 6
0
/**
 * schedule a work node for execution.
 *
 * @param[in] queue  work queue
 * @param[in] node   work node
 * @param[in] opts   options for adding, or NULL for defaults
 *
 * @return operation status
 *    @retval 0 success
 *    @retval EWOULDBLOCK queue is full and opts specified not to block
 *    @retval EINTR queue was full, we blocked to add, and the queue was
 *                  shutdown while we were blocking
 */
int
afs_wq_add(struct afs_work_queue *queue,
           struct afs_work_queue_node *node,
           struct afs_work_queue_add_opts *opts)
{
    int ret = 0;
    int donate, block, force, hithresh;
    struct afs_work_queue_node_list * list;
    struct afs_work_queue_add_opts l_opts;
    int waited_for_drain = 0;
    afs_wq_work_state_t state;

    if (!opts) {
	afs_wq_add_opts_init(&l_opts);
	opts = &l_opts;
    }

    donate = opts->donate;
    block = opts->block;
    force = opts->force;

 retry:
    MUTEX_ENTER(&node->lock);

    ret = _afs_wq_node_state_wait_busy(node);
    if (ret) {
	goto error;
    }

    if (!node->block_count && !node->error_count) {
	list = &queue->ready_list;
	state = AFS_WQ_NODE_STATE_SCHEDULED;
    } else if (node->error_count) {
	list = &queue->done_list;
	state = AFS_WQ_NODE_STATE_ERROR;
    } else {
	list = &queue->blocked_list;
	state = AFS_WQ_NODE_STATE_BLOCKED;
    }

    ret = 0;

    MUTEX_ENTER(&queue->lock);

    if (queue->shutdown) {
	ret = EINTR;
	MUTEX_EXIT(&queue->lock);
	MUTEX_EXIT(&node->lock);
	goto error;
    }

    hithresh = queue->opts.pend_hithresh;
    if (hithresh > 0 && queue->pend_count >= hithresh) {
	queue->drain = 1;
    }

    if (!force && (state == AFS_WQ_NODE_STATE_SCHEDULED
                   || state == AFS_WQ_NODE_STATE_BLOCKED)) {

	if (queue->drain) {
	    if (block) {
		MUTEX_EXIT(&node->lock);
		CV_WAIT(&queue->pend_cv, &queue->lock);

		if (queue->shutdown) {
		    ret = EINTR;
		} else {
		    MUTEX_EXIT(&queue->lock);

		    waited_for_drain = 1;

		    goto retry;
		}
	    } else {
		ret = EWOULDBLOCK;
	    }
	}
    }

    if (ret == 0) {
	queue->pend_count++;
    }
    if (waited_for_drain) {
	/* signal another thread that may have been waiting for drain */
	CV_SIGNAL(&queue->pend_cv);
    }

    MUTEX_EXIT(&queue->lock);

    if (ret) {
	goto error;
    }

    if (!donate)
	node->refcount++;
    node->queue = queue;

    ret = _afs_wq_node_list_enqueue(list,
					 node,
					 state);
 error:
    return ret;
}