Пример #1
0
/**
 * low-level thread entry point.
 *
 * @param[in] rock opaque pointer to thread worker object
 *
 * @return opaque return pointer from pool entry function
 *
 * @internal
 */
static void *
_afs_tp_worker_run(void * rock)
{
    struct afs_thread_pool_worker * worker = rock;
    struct afs_thread_pool * pool = worker->pool;

    /* register worker with pool */
    MUTEX_ENTER(&pool->lock);
    queue_Append(&pool->thread_list, worker);
    pool->nthreads++;
    MUTEX_EXIT(&pool->lock);

    /* call high-level entry point */
    worker->ret = (*pool->entry)(pool, worker, pool->work_queue, pool->rock);

    /* adjust pool live thread count */
    MUTEX_ENTER(&pool->lock);
    osi_Assert(pool->nthreads);
    queue_Remove(worker);
    pool->nthreads--;
    if (!pool->nthreads) {
        CV_BROADCAST(&pool->shutdown_cv);
        pool->state = AFS_TP_STATE_STOPPED;
    }
    MUTEX_EXIT(&pool->lock);

    _afs_tp_worker_free(worker);

    return NULL;
}
Пример #2
0
/**
 * link a dependency node to a parent and child work node.
 *
 * This links a dependency node such that when the 'parent' work node is
 * done, the 'child' work node can proceed.
 *
 * @param[in]  dep      dependency node
 * @param[in]  parent   parent node in this dependency
 * @param[in]  child    child node in this dependency
 *
 * @return operation status
 *    @retval 0 success
 *
 * @pre
 *   - parent->lock held
 *   - child->lock held
 *   - parent and child in quiescent state
 *
 * @internal
 */
static int
_afs_wq_dep_link_r(struct afs_work_queue_dep_node *dep,
                   struct afs_work_queue_node *parent,
                   struct afs_work_queue_node *child)
{
    int ret = 0;

    /* Each dep node adds a ref to the child node of that dep. We do not
     * do the same for the parent node, since if the only refs remaining
     * for a node are deps in node->dep_children, then the node should be
     * destroyed, and we will destroy the dep nodes when we free the
     * work node. */
    ret = _afs_wq_node_get_r(child);
    if (ret) {
	goto error;
    }

    /* add this dep node to the parent node's list of deps */
    queue_Append(&parent->dep_children, &dep->parent_list);

    dep->child = child;
    dep->parent = parent;

 error:
    return ret;
}
Пример #3
0
/*
 * Process all events that have expired relative to the current clock time
 * (which is not re-evaluated unless clock_NewTime has been called).
 * The relative time to the next event is returned in the output parameter
 * next and the function returns 1.  If there are is no next event,
 * the function returns 0.
 */
int
rxevent_RaiseEvents(struct clock * next)
{
    struct rxevent *qe;
    struct clock now;

#ifdef RXDEBUG
    if (Log)
	fprintf(Log, "rxevent_RaiseEvents(%ld.%ld)\n", now.sec, now.usec);
#endif

    /*
     * Events are sorted by time, so only scan until an event is found that
     * has not yet timed out
     */
    while (queue_IsNotEmpty(&rxevent_queue)) {
	clock_GetTime(&now);
	qe = queue_First(&rxevent_queue, rxevent);
	if (clock_Lt(&now, &qe->eventTime)) {
	    *next = qe->eventTime;
	    clock_Sub(next, &now);
	    return 1;
	}
	queue_Remove(qe);
	rxevent_nPosted--;
	qe->func(qe, qe->arg, qe->arg1);
	queue_Append(&rxevent_free, qe);
	rxevent_nFree++;
    }
    return 0;
}
Пример #4
0
static void *
SalvageChildReaperThread(void * args)
{
    int slot, pid, status;
    struct log_cleanup_node * cleanup;

    assert(pthread_mutex_lock(&worker_lock) == 0);

    /* loop reaping our children */
    while (1) {
	/* wait() won't block unless we have children, so
	 * block on the cond var if we're childless */
	while (current_workers == 0) {
	    assert(pthread_cond_wait(&worker_cv, &worker_lock) == 0);
	}

	assert(pthread_mutex_unlock(&worker_lock) == 0);

	cleanup = (struct log_cleanup_node *) malloc(sizeof(struct log_cleanup_node));

	while (Reap_Child("salvageserver", &pid, &status) < 0) {
	    /* try to prevent livelock if something goes wrong */
	    sleep(1);
	}

	VOL_LOCK;
	for (slot = 0; slot < Parallel; slot++) {
	    if (child_slot[slot] == pid)
		break;
	}
	assert(slot < Parallel);
	child_slot[slot] = 0;
	VOL_UNLOCK;

	SALVSYNC_doneWorkByPid(pid, status);

	assert(pthread_mutex_lock(&worker_lock) == 0);

	if (cleanup) {
	    cleanup->pid = pid;
	    queue_Append(&log_cleanup_queue, cleanup);
	    assert(pthread_cond_signal(&log_cleanup_queue.queue_change_cv) == 0);
	}

	/* ok, we've reaped a child */
	current_workers--;
	assert(pthread_cond_broadcast(&worker_cv) == 0);
    }

    return NULL;
}
Пример #5
0
/**
 * add an entry to the hash table.
 *
 * @param[in]  dp        disk partition object
 * @param[in]  volid     volume id
 * @param[in]  ent       volume group object
 * @param[out] hash_out  address in which to store pointer to hash entry
 *
 * @pre VOL_LOCK held
 *
 * @return operation status
 *    @retval 0 success
 *    @retval EEXIST hash entry for volid already exists, and it points to
 *                   a different VG entry
 *
 * @internal
 */
static int
_VVGC_hash_entry_add(struct DiskPartition64 * dp,
		     VolumeId volid,
		     VVGCache_entry_t * ent,
		     VVGCache_hash_entry_t ** hash_out)
{
    int code = 0;
    VVGCache_hash_entry_t * hent;
    int hash = VVGC_HASH(volid);
    VVGCache_entry_t *nent;

    code = _VVGC_lookup(dp, volid, &nent, hash_out);
    if (!code) {
	if (ent != nent) {
	    ViceLog(0, ("_VVGC_hash_entry_add: tried to add a duplicate "
	                " nonmatching entry for vol %lu: original "
	                "(%"AFS_PTR_FMT",%lu) new (%"AFS_PTR_FMT",%lu)\n",
	                afs_printable_uint32_lu(volid),
	                nent, afs_printable_uint32_lu(nent->rw),
	                ent, afs_printable_uint32_lu(ent->rw)));
	    return EEXIST;
	}
	ViceLog(1, ("_VVGC_hash_entry_add: tried to add duplicate "
	              "hash entry for vol %lu, VG %lu",
	              afs_printable_uint32_lu(volid),
	              afs_printable_uint32_lu(ent->rw)));
	/* accept attempts to add matching duplicate entries; just
	 * pretend we added it */
	return 0;
    }

    code = _VVGC_hash_entry_alloc(&hent);
    if (code) {
	goto done;
    }

    hent->entry = ent;
    hent->dp    = dp;
    hent->volid = volid;
    queue_Append(&VVGCache_hash_table.hash_buckets[hash],
		 hent);

 done:
    if (hash_out) {
	*hash_out = hent;
    }
    return code;
}
Пример #6
0
/**
 * append to a node list object.
 *
 * @param[in] list  list object
 * @param[in] node  node object
 * @param[in] state new node state
 *
 * @return operation status
 *    @retval 0 success
 *    @retval AFS_WQ_ERROR raced to enqueue node
 *
 * @pre
 *   - node lock held
 *   - node is not on a list
 *   - node is either not busy, or it is marked as busy by the calling thread
 *
 * @post
 *   - enqueued on list
 *   - node lock dropped
 *
 * @internal
 */
static int
_afs_wq_node_list_enqueue(struct afs_work_queue_node_list * list,
			  struct afs_work_queue_node * node,
			  afs_wq_work_state_t state)
{
    int code, ret = 0;

    if (node->qidx != AFS_WQ_NODE_LIST_NONE) {
	/* raced */
	ret = AFS_WQ_ERROR;
	goto error;
    }

    /* deal with lock inversion */
    code = MUTEX_TRYENTER(&list->lock);
    if (!code) {
	/* contended */
	_afs_wq_node_state_change(node, AFS_WQ_NODE_STATE_BUSY);
	MUTEX_EXIT(&node->lock);
	MUTEX_ENTER(&list->lock);
	MUTEX_ENTER(&node->lock);

	/* assert state of the world (we set busy, so this should never happen) */
	osi_Assert(queue_IsNotOnQueue(node));
    }

    if (list->shutdown) {
	ret = AFS_WQ_ERROR;
	goto error_unlock;
    }

    osi_Assert(node->qidx == AFS_WQ_NODE_LIST_NONE);
    if (queue_IsEmpty(&list->list)) {
	/* wakeup a dequeue thread */
	CV_SIGNAL(&list->cv);
    }
    queue_Append(&list->list, node);
    node->qidx = list->qidx;
    _afs_wq_node_state_change(node, state);

 error_unlock:
    MUTEX_EXIT(&node->lock);
    MUTEX_EXIT(&list->lock);

 error:
    return ret;
}
Пример #7
0
/**
 * look through log_watch_queue, and if any processes are not still
 * running, hand them off to the SalvageLogCleanupThread
 *
 * @param log_watch_queue  a queue of PIDs that we should clean up if
 * that PID has died
 */
static void
ScanLogs(struct rx_queue *log_watch_queue)
{
    struct log_cleanup_node *cleanup, *next;

    assert(pthread_mutex_lock(&worker_lock) == 0);

    for (queue_Scan(log_watch_queue, cleanup, next, log_cleanup_node)) {
	/* if a process is still running, assume it's the salvage process
	 * still going, and keep waiting for it */
	if (kill(cleanup->pid, 0) < 0 && errno == ESRCH) {
	    queue_Remove(cleanup);
	    queue_Append(&log_cleanup_queue, cleanup);
	    assert(pthread_cond_signal(&log_cleanup_queue.queue_change_cv) == 0);
	}
    }

    assert(pthread_mutex_unlock(&worker_lock) == 0);
}
Пример #8
0
/**
 * look through log_watch_queue, and if any processes are not still
 * running, hand them off to the SalvageLogCleanupThread
 *
 * @param log_watch_queue  a queue of PIDs that we should clean up if
 * that PID has died
 */
static void
ScanLogs(struct rx_queue *log_watch_queue)
{
    struct log_cleanup_node *cleanup, *next;

    MUTEX_ENTER(&worker_lock);

    for (queue_Scan(log_watch_queue, cleanup, next, log_cleanup_node)) {
	/* if a process is still running, assume it's the salvage process
	 * still going, and keep waiting for it */
	if (kill(cleanup->pid, 0) < 0 && errno == ESRCH) {
	    queue_Remove(cleanup);
	    queue_Append(&log_cleanup_queue, cleanup);
	    CV_SIGNAL(&log_cleanup_queue.queue_change_cv);
	}
    }

    MUTEX_EXIT(&worker_lock);
}
Пример #9
0
/**
 * add a VGC entry to the partition's to-delete list.
 *
 * This adds a VGC entry (a parent/child pair) to a list of VGC entries to
 * be deleted from the VGC at the end of a VGC scan. This is necessary,
 * while a VGC scan is ocurring, volumes may be deleted. Since a VGC scan
 * scans a partition in VVGC_SCAN_TBL_LEN chunks, a VGC delete operation
 * may delete a volume, only for it to be added again when the VGC scan's
 * table adds it to the VGC. So when a VGC entry is deleted and a VGC scan
 * is running, this function must be called to ensure it does not come
 * back onto the VGC.
 *
 * @param[in] dp      the partition to whose dlist we are adding
 * @param[in] parent  the parent volumeID of the VGC entry
 * @param[in] child   the child volumeID of the VGC entry
 *
 * @return operation status
 *  @retval 0 success
 *  @retval ENOMEM memory allocation error
 *
 * @pre VVGCache.part[dp->index].state == VVGC_PART_STATE_UPDATING
 *
 * @internal VGC use only
 */
int
_VVGC_dlist_add_r(struct DiskPartition64 *dp, VolumeId parent,
                  VolumeId child)
{
    int bucket = VVGC_HASH(child);
    VVGCache_dlist_entry_t *entry;

    entry = malloc(sizeof(*entry));
    if (!entry) {
	return ENOMEM;
    }

    entry->child = child;
    entry->parent = parent;

    queue_Append(&VVGCache.part[dp->index].dlist_hash_buckets[bucket],
                 entry);
    return 0;
}
Пример #10
0
/**
 * Thread to look for SalvageLog.$pid files that are not from our child
 * worker salvagers, and notify SalvageLogCleanupThread to clean them
 * up. This can happen if we restart during salvages, or the
 * salvageserver crashes or something.
 *
 * @param arg  unused
 *
 * @return always NULL
 */
static void *
SalvageLogScanningThread(void * arg)
{
    struct rx_queue log_watch_queue;

    queue_Init(&log_watch_queue);

    {
	DIR *dp;
	struct dirent *dirp;
	char prefix[AFSDIR_PATH_MAX];
	size_t prefix_len;

	afs_snprintf(prefix, sizeof(prefix), "%s.", AFSDIR_SLVGLOG_FILE);
	prefix_len = strlen(prefix);

	dp = opendir(AFSDIR_LOGS_DIR);
	assert(dp);

	while ((dirp = readdir(dp)) != NULL) {
	    pid_t pid;
	    struct log_cleanup_node *cleanup;
	    int i;

	    if (strncmp(dirp->d_name, prefix, prefix_len) != 0) {
		/* not a salvage logfile; skip */
		continue;
	    }

	    errno = 0;
	    pid = strtol(dirp->d_name + prefix_len, NULL, 10);

	    if (errno != 0) {
		/* file is SalvageLog.<something> but <something> isn't
		 * a pid, so skip */
		 continue;
	    }

	    VOL_LOCK;
	    for (i = 0; i < Parallel; ++i) {
		if (pid == child_slot[i]) {
		    break;
		}
	    }
	    VOL_UNLOCK;
	    if (i < Parallel) {
		/* this pid is one of our children, so the reaper thread
		 * will take care of it; skip */
		continue;
	    }

	    cleanup =
		(struct log_cleanup_node *) malloc(sizeof(struct log_cleanup_node));
	    cleanup->pid = pid;

	    queue_Append(&log_watch_queue, cleanup);
	}

	closedir(dp);
    }

    ScanLogs(&log_watch_queue);

    while (queue_IsNotEmpty(&log_watch_queue)) {
	sleep(SALVAGE_SCAN_POLL_INTERVAL);
	ScanLogs(&log_watch_queue);
    }

    return NULL;
}
Пример #11
0
/* Add the indicated event (function, arg) at the specified clock time */
struct rxevent *
rxevent_Post(struct clock * when, void (*func)(), void *arg, void *arg1)
/* when - When event should happen, in clock (clock.h) units */
{
    struct rxevent *ev, *qe, *qpr;

#ifdef RXDEBUG
    if (Log) {
	struct clock now;

	clock_GetTime(&now);
	fprintf(Log, "%ld.%ld: rxevent_Post(%ld.%ld, %p, %p)\n",
		now.sec, now.usec, when->sec, when->usec, func, arg);
    }
#endif
#if defined(AFS_SGIMP_ENV)
    ASSERT(osi_rxislocked());
#endif

    /*
     * If we're short on free event entries, create a block of new ones and
     * add them to the free queue
     */
    if (queue_IsEmpty(&rxevent_free)) {
	int i;

#if	defined(AFS_AIX32_ENV) && defined(KERNEL)
	ev = (struct rxevent *) rxi_Alloc(sizeof(struct rxevent));
	queue_Append(&rxevent_free, &ev[0]), rxevent_nFree++;
#else
	ev = (struct rxevent *) osi_Alloc(sizeof(struct rxevent) *
					  rxevent_allocUnit);
	xsp = xfreemallocs;
	xfreemallocs = (struct xfreelist *) ev;
	xfreemallocs->next = xsp;
	for (i = 0; i < rxevent_allocUnit; i++)
	    queue_Append(&rxevent_free, &ev[i]), rxevent_nFree++;
#endif
    }
    /* Grab and initialize a new rxevent structure */
    ev = queue_First(&rxevent_free, rxevent);
    queue_Remove(ev);
    rxevent_nFree--;

    /* Record user defined event state */
    ev->eventTime = *when;
    ev->func = func;
    ev->arg = arg;
    ev->arg1 = arg1;
    rxevent_nPosted += 1;	       /* Rather than ++, to shut high-C up
				        * regarding never-set variables */

    /*
     * Locate a slot for the new entry.  The queue is ordered by time, and we
     * assume that a new entry is likely to be greater than a majority of the
     * entries already on the queue (unless there's very few entries on the
     * queue), so we scan it backwards
     */
    for (queue_ScanBackwards(&rxevent_queue, qe, qpr, rxevent)) {
	if (clock_Ge(when, &qe->eventTime)) {
	    queue_InsertAfter(qe, ev);
	    return ev;
	}
    }
    /* The event is to expire earlier than any existing events */
    queue_Prepend(&rxevent_queue, ev);
    if (rxevent_ScheduledEarlierEvent)
	(*rxevent_ScheduledEarlierEvent) ();	/* Notify our external
						 * scheduler */
    return ev;
}
Пример #12
0
int
rxi_WritevAlloc(struct rx_call *call, struct iovec *iov, int *nio, int maxio,
		int nbytes)
{
    struct rx_connection *conn = call->conn;
    struct rx_packet *cp = call->currentPacket;
    int requestCount;
    int nextio;
    /* Temporary values, real work is done in rxi_WritevProc */
    int tnFree;
    unsigned int tcurvec;
    char *tcurpos;
    int tcurlen;

    requestCount = nbytes;
    nextio = 0;

    /* Free any packets from the last call to ReadvProc/WritevProc */
    if (queue_IsNotEmpty(&call->iovq)) {
#ifdef RXDEBUG_PACKET
        call->iovqc -=
#endif /* RXDEBUG_PACKET */
            rxi_FreePackets(0, &call->iovq);
    }

    if (call->mode != RX_MODE_SENDING) {
	if ((conn->type == RX_SERVER_CONNECTION)
	    && (call->mode == RX_MODE_RECEIVING)) {
	    call->mode = RX_MODE_SENDING;
	    if (cp) {
#ifdef RX_TRACK_PACKETS
		cp->flags &= ~RX_PKTFLAG_CP;
#endif
		rxi_FreePacket(cp);
		cp = call->currentPacket = (struct rx_packet *)0;
		call->nLeft = 0;
		call->nFree = 0;
	    }
	} else {
	    return 0;
	}
    }

    /* Set up the iovec to point to data in packet buffers. */
    tnFree = call->nFree;
    tcurvec = call->curvec;
    tcurpos = call->curpos;
    tcurlen = call->curlen;
    do {
	int t;

	if (tnFree == 0) {
	    /* current packet is full, allocate a new one */
	    MUTEX_ENTER(&call->lock);
	    cp = rxi_AllocSendPacket(call, nbytes);
	    MUTEX_EXIT(&call->lock);
	    if (cp == NULL) {
		/* out of space, return what we have */
		*nio = nextio;
		return requestCount - nbytes;
	    }
#ifdef RX_TRACK_PACKETS
	    cp->flags |= RX_PKTFLAG_IOVQ;
#endif
	    queue_Append(&call->iovq, cp);
#ifdef RXDEBUG_PACKET
            call->iovqc++;
#endif /* RXDEBUG_PACKET */
	    tnFree = cp->length;
	    tcurvec = 1;
	    tcurpos =
		(char *)cp->wirevec[1].iov_base +
		call->conn->securityHeaderSize;
	    tcurlen = cp->wirevec[1].iov_len - call->conn->securityHeaderSize;
	}

	if (tnFree < nbytes) {
	    /* try to extend the current packet */
	    int len, mud;
	    len = cp->length;
	    mud = rx_MaxUserDataSize(call);
	    if (mud > len) {
		int want;
		want = MIN(nbytes - tnFree, mud - len);
		rxi_AllocDataBuf(cp, want, RX_PACKET_CLASS_SEND_CBUF);
		if (cp->length > (unsigned)mud)
		    cp->length = mud;
		tnFree += (cp->length - len);
		if (cp == call->currentPacket) {
		    call->nFree += (cp->length - len);
		}
	    }
	}

	/* fill in the next entry in the iovec */
	t = MIN(tcurlen, nbytes);
	t = MIN(tnFree, t);
	iov[nextio].iov_base = tcurpos;
	iov[nextio].iov_len = t;
	nbytes -= t;
	tcurpos += t;
	tcurlen -= t;
	tnFree -= t;
	nextio++;

	if (!tcurlen) {
	    /* need to get another struct iov */
	    if (++tcurvec >= cp->niovecs) {
		/* current packet is full, extend it or move on to next packet */
		tnFree = 0;
	    } else {
		tcurpos = (char *)cp->wirevec[tcurvec].iov_base;
		tcurlen = cp->wirevec[tcurvec].iov_len;
	    }
	}
    } while (nbytes && nextio < maxio);
    *nio = nextio;
    return requestCount - nbytes;
}
Пример #13
0
int
rxi_WriteProc(struct rx_call *call, char *buf,
	      int nbytes)
{
    struct rx_connection *conn = call->conn;
    struct rx_packet *cp = call->currentPacket;
    unsigned int t;
    int requestCount = nbytes;

    /* Free any packets from the last call to ReadvProc/WritevProc */
    if (queue_IsNotEmpty(&call->iovq)) {
#ifdef RXDEBUG_PACKET
        call->iovqc -=
#endif /* RXDEBUG_PACKET */
            rxi_FreePackets(0, &call->iovq);
    }

    if (call->mode != RX_MODE_SENDING) {
	if ((conn->type == RX_SERVER_CONNECTION)
	    && (call->mode == RX_MODE_RECEIVING)) {
	    call->mode = RX_MODE_SENDING;
	    if (cp) {
#ifdef RX_TRACK_PACKETS
		cp->flags &= ~RX_PKTFLAG_CP;
#endif
		rxi_FreePacket(cp);
		cp = call->currentPacket = (struct rx_packet *)0;
		call->nLeft = 0;
		call->nFree = 0;
	    }
	} else {
	    return 0;
	}
    }

    /* Loop condition is checked at end, so that a write of 0 bytes
     * will force a packet to be created--specially for the case where
     * there are 0 bytes on the stream, but we must send a packet
     * anyway. */
    do {
	if (call->nFree == 0) {
	    MUTEX_ENTER(&call->lock);
#ifdef AFS_GLOBAL_RXLOCK_KERNEL
            rxi_WaitforTQBusy(call);
#endif /* AFS_GLOBAL_RXLOCK_KERNEL */
            cp = call->currentPacket;
            if (call->error)
                call->mode = RX_MODE_ERROR;
	    if (!call->error && cp) {
                /* Clear the current packet now so that if
                 * we are forced to wait and drop the lock
                 * the packet we are planning on using
                 * cannot be freed.
                 */
#ifdef RX_TRACK_PACKETS
                cp->flags &= ~RX_PKTFLAG_CP;
#endif
		call->currentPacket = (struct rx_packet *)0;
		clock_NewTime();	/* Bogus:  need new time package */
		/* The 0, below, specifies that it is not the last packet:
		 * there will be others. PrepareSendPacket may
		 * alter the packet length by up to
		 * conn->securityMaxTrailerSize */
		hadd32(call->bytesSent, cp->length);
		rxi_PrepareSendPacket(call, cp, 0);
#ifdef RX_TRACK_PACKETS
		cp->flags |= RX_PKTFLAG_TQ;
#endif
		queue_Append(&call->tq, cp);
#ifdef RXDEBUG_PACKET
                call->tqc++;
#endif /* RXDEBUG_PACKET */
                cp = (struct rx_packet *)0;
		if (!
		    (call->
		     flags & (RX_CALL_FAST_RECOVER |
			      RX_CALL_FAST_RECOVER_WAIT))) {
		    rxi_Start(0, call, 0, 0);
		}
	    } else if (cp) {
#ifdef RX_TRACK_PACKETS
		cp->flags &= ~RX_PKTFLAG_CP;
#endif
		rxi_FreePacket(cp);
		cp = call->currentPacket = (struct rx_packet *)0;
	    }
	    /* Wait for transmit window to open up */
	    while (!call->error
		   && call->tnext + 1 > call->tfirst + (2 * call->twind)) {
		clock_NewTime();
		call->startWait = clock_Sec();

#ifdef	RX_ENABLE_LOCKS
		CV_WAIT(&call->cv_twind, &call->lock);
#else
		call->flags |= RX_CALL_WAIT_WINDOW_ALLOC;
		osi_rxSleep(&call->twind);
#endif

		call->startWait = 0;
#ifdef RX_ENABLE_LOCKS
		if (call->error) {
                    call->mode = RX_MODE_ERROR;
		    MUTEX_EXIT(&call->lock);
		    return 0;
		}
#endif /* RX_ENABLE_LOCKS */
	    }
	    if ((cp = rxi_AllocSendPacket(call, nbytes))) {
#ifdef RX_TRACK_PACKETS
		cp->flags |= RX_PKTFLAG_CP;
#endif
		call->currentPacket = cp;
		call->nFree = cp->length;
		call->curvec = 1;	/* 0th vec is always header */
		/* begin at the beginning [ more or less ], continue
		 * on until the end, then stop. */
		call->curpos =
		    (char *)cp->wirevec[1].iov_base +
		    call->conn->securityHeaderSize;
		call->curlen =
		    cp->wirevec[1].iov_len - call->conn->securityHeaderSize;
	    }
	    if (call->error) {
                call->mode = RX_MODE_ERROR;
		if (cp) {
#ifdef RX_TRACK_PACKETS
		    cp->flags &= ~RX_PKTFLAG_CP;
#endif
		    rxi_FreePacket(cp);
		    call->currentPacket = NULL;
		}
		MUTEX_EXIT(&call->lock);
		return 0;
	    }
	    MUTEX_EXIT(&call->lock);
	}

	if (cp && (int)call->nFree < nbytes) {
	    /* Try to extend the current buffer */
	    int len, mud;
	    len = cp->length;
	    mud = rx_MaxUserDataSize(call);
	    if (mud > len) {
		int want;
		want = MIN(nbytes - (int)call->nFree, mud - len);
		rxi_AllocDataBuf(cp, want, RX_PACKET_CLASS_SEND_CBUF);
		if (cp->length > (unsigned)mud)
		    cp->length = mud;
		call->nFree += (cp->length - len);
	    }
	}

	/* If the remaining bytes fit in the buffer, then store them
	 * and return.  Don't ship a buffer that's full immediately to
	 * the peer--we don't know if it's the last buffer yet */

	if (!cp) {
	    call->nFree = 0;
	}

	while (nbytes && call->nFree) {

	    t = MIN((int)call->curlen, nbytes);
	    t = MIN((int)call->nFree, t);
	    memcpy(call->curpos, buf, t);
	    buf += t;
	    nbytes -= t;
	    call->curpos += t;
	    call->curlen -= (u_short)t;
	    call->nFree -= (u_short)t;

	    if (!call->curlen) {
		/* need to get another struct iov */
		if (++call->curvec >= cp->niovecs) {
		    /* current packet is full, extend or send it */
		    call->nFree = 0;
		} else {
		    call->curpos = (char *)cp->wirevec[call->curvec].iov_base;
		    call->curlen = cp->wirevec[call->curvec].iov_len;
		}
	    }
	}			/* while bytes to send and room to send them */

	/* might be out of space now */
	if (!nbytes) {
	    return requestCount;
	} else;			/* more data to send, so get another packet and keep going */
    } while (nbytes);

    return requestCount - nbytes;
}
Пример #14
0
/* rxi_FillReadVec
 *
 * Uses packets in the receive queue to fill in as much of the
 * current iovec as possible. Does not block if it runs out
 * of packets to complete the iovec. Return true if an ack packet
 * was sent, otherwise return false */
int
rxi_FillReadVec(struct rx_call *call, afs_uint32 serial)
{
    int didConsume = 0;
    int didHardAck = 0;
    unsigned int t;
    struct rx_packet *rp;
    struct rx_packet *curp;
    struct iovec *call_iov;
    struct iovec *cur_iov = NULL;

    curp = call->currentPacket;
    if (curp) {
	cur_iov = &curp->wirevec[call->curvec];
    }
    call_iov = &call->iov[call->iovNext];

    while (!call->error && call->iovNBytes && call->iovNext < call->iovMax) {
	if (call->nLeft == 0) {
	    /* Get next packet */
	    if (queue_IsNotEmpty(&call->rq)) {
		/* Check that next packet available is next in sequence */
		rp = queue_First(&call->rq, rx_packet);
		if (rp->header.seq == call->rnext) {
		    afs_int32 error;
		    struct rx_connection *conn = call->conn;
		    queue_Remove(rp);
#ifdef RX_TRACK_PACKETS
		    rp->flags &= ~RX_PKTFLAG_RQ;
#endif
#ifdef RXDEBUG_PACKET
                    call->rqc--;
#endif /* RXDEBUG_PACKET */

		    /* RXS_CheckPacket called to undo RXS_PreparePacket's
		     * work.  It may reduce the length of the packet by up
		     * to conn->maxTrailerSize, to reflect the length of the
		     * data + the header. */
		    if ((error =
			 RXS_CheckPacket(conn->securityObject, call, rp))) {
			/* Used to merely shut down the call, but now we
			 * shut down the whole connection since this may
			 * indicate an attempt to hijack it */

			MUTEX_EXIT(&call->lock);
			rxi_ConnectionError(conn, error);
			MUTEX_ENTER(&conn->conn_data_lock);
			rp = rxi_SendConnectionAbort(conn, rp, 0, 0);
			MUTEX_EXIT(&conn->conn_data_lock);
			rxi_FreePacket(rp);
			MUTEX_ENTER(&call->lock);

			return 1;
		    }
		    call->rnext++;
		    curp = call->currentPacket = rp;
#ifdef RX_TRACK_PACKETS
		    call->currentPacket->flags |= RX_PKTFLAG_CP;
#endif
		    call->curvec = 1;	/* 0th vec is always header */
		    cur_iov = &curp->wirevec[1];
		    /* begin at the beginning [ more or less ], continue
		     * on until the end, then stop. */
		    call->curpos =
			(char *)curp->wirevec[1].iov_base +
			call->conn->securityHeaderSize;
		    call->curlen =
			curp->wirevec[1].iov_len -
			call->conn->securityHeaderSize;

		    /* Notice that this code works correctly if the data
		     * size is 0 (which it may be--no reply arguments from
		     * server, for example).  This relies heavily on the
		     * fact that the code below immediately frees the packet
		     * (no yields, etc.).  If it didn't, this would be a
		     * problem because a value of zero for call->nLeft
		     * normally means that there is no read packet */
		    call->nLeft = curp->length;
		    hadd32(call->bytesRcvd, curp->length);

		    /* Send a hard ack for every rxi_HardAckRate+1 packets
		     * consumed. Otherwise schedule an event to send
		     * the hard ack later on.
		     */
		    call->nHardAcks++;
		    didConsume = 1;
		    continue;
		}
	    }
	    break;
	}

	/* It's possible for call->nLeft to be smaller than any particular
	 * iov_len.  Usually, recvmsg doesn't change the iov_len, since it
	 * reflects the size of the buffer.  We have to keep track of the
	 * number of bytes read in the length field of the packet struct.  On
	 * the final portion of a received packet, it's almost certain that
	 * call->nLeft will be smaller than the final buffer. */
	while (call->iovNBytes && call->iovNext < call->iovMax && curp) {

	    t = MIN((int)call->curlen, call->iovNBytes);
	    t = MIN(t, (int)call->nLeft);
	    call_iov->iov_base = call->curpos;
	    call_iov->iov_len = t;
	    call_iov++;
	    call->iovNext++;
	    call->iovNBytes -= t;
	    call->curpos += t;
	    call->curlen -= t;
	    call->nLeft -= t;

	    if (!call->nLeft) {
		/* out of packet.  Get another one. */
#ifdef RX_TRACK_PACKETS
                curp->flags &= ~RX_PKTFLAG_CP;
                curp->flags |= RX_PKTFLAG_IOVQ;
#endif
		queue_Append(&call->iovq, curp);
#ifdef RXDEBUG_PACKET
                call->iovqc++;
#endif /* RXDEBUG_PACKET */
		curp = call->currentPacket = (struct rx_packet *)0;
	    } else if (!call->curlen) {
		/* need to get another struct iov */
		if (++call->curvec >= curp->niovecs) {
		    /* current packet is exhausted, get ready for another */
		    /* don't worry about curvec and stuff, they get set somewhere else */
#ifdef RX_TRACK_PACKETS
		    curp->flags &= ~RX_PKTFLAG_CP;
		    curp->flags |= RX_PKTFLAG_IOVQ;
#endif
		    queue_Append(&call->iovq, curp);
#ifdef RXDEBUG_PACKET
                    call->iovqc++;
#endif /* RXDEBUG_PACKET */
		    curp = call->currentPacket = (struct rx_packet *)0;
		    call->nLeft = 0;
		} else {
		    cur_iov++;
		    call->curpos = (char *)cur_iov->iov_base;
		    call->curlen = cur_iov->iov_len;
		}
	    }
	}
    }

    /* If we consumed any packets then check whether we need to
     * send a hard ack. */
    if (didConsume && (!(call->flags & RX_CALL_RECEIVE_DONE))) {
	if (call->nHardAcks > (u_short) rxi_HardAckRate) {
	    rxevent_Cancel(call->delayedAckEvent, call,
			   RX_CALL_REFCOUNT_DELAY);
	    rxi_SendAck(call, 0, serial, RX_ACK_DELAY, 0);
	    didHardAck = 1;
	} else {
	    struct clock when, now;
	    clock_GetTime(&now);
	    when = now;
	    /* Delay to consolidate ack packets */
	    clock_Add(&when, &rx_hardAckDelay);
	    if (!call->delayedAckEvent
		|| clock_Gt(&call->delayedAckEvent->eventTime, &when)) {
		rxevent_Cancel(call->delayedAckEvent, call,
			       RX_CALL_REFCOUNT_DELAY);
                MUTEX_ENTER(&rx_refcnt_mutex);
		CALL_HOLD(call, RX_CALL_REFCOUNT_DELAY);
                MUTEX_EXIT(&rx_refcnt_mutex);
		call->delayedAckEvent =
		    rxevent_PostNow(&when, &now, rxi_SendDelayedAck, call, 0);
	    }
	}
    }
    return didHardAck;
}
Пример #15
0
/* Flush any buffered data to the stream, switch to read mode
 * (clients) or to EOF mode (servers)
 *
 * LOCKS HELD: called at netpri.
 */
void
rxi_FlushWrite(struct rx_call *call)
{
    struct rx_packet *cp = NULL;

    /* Free any packets from the last call to ReadvProc/WritevProc */
    if (queue_IsNotEmpty(&call->iovq)) {
#ifdef RXDEBUG_PACKET
        call->iovqc -=
#endif /* RXDEBUG_PACKET */
            rxi_FreePackets(0, &call->iovq);
    }

    if (call->mode == RX_MODE_SENDING) {

	call->mode =
	    (call->conn->type ==
	     RX_CLIENT_CONNECTION ? RX_MODE_RECEIVING : RX_MODE_EOF);

#ifdef RX_KERNEL_TRACE
	{
	    int glockOwner = ISAFS_GLOCK();
	    if (!glockOwner)
		AFS_GLOCK();
	    afs_Trace3(afs_iclSetp, CM_TRACE_WASHERE, ICL_TYPE_STRING,
		       __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER,
		       call);
	    if (!glockOwner)
		AFS_GUNLOCK();
	}
#endif

        MUTEX_ENTER(&call->lock);
#ifdef AFS_GLOBAL_RXLOCK_KERNEL
        rxi_WaitforTQBusy(call);
#endif /* AFS_GLOBAL_RXLOCK_KERNEL */
        if (call->error)
            call->mode = RX_MODE_ERROR;

        cp = call->currentPacket;

	if (cp) {
	    /* cp->length is only supposed to be the user's data */
	    /* cp->length was already set to (then-current)
	     * MaxUserDataSize or less. */
#ifdef RX_TRACK_PACKETS
	    cp->flags &= ~RX_PKTFLAG_CP;
#endif
	    cp->length -= call->nFree;
	    call->currentPacket = (struct rx_packet *)0;
	    call->nFree = 0;
	} else {
	    cp = rxi_AllocSendPacket(call, 0);
	    if (!cp) {
		/* Mode can no longer be MODE_SENDING */
		return;
	    }
	    cp->length = 0;
	    cp->niovecs = 2;	/* header + space for rxkad stuff */
	    call->nFree = 0;
	}

	/* The 1 specifies that this is the last packet */
	hadd32(call->bytesSent, cp->length);
	rxi_PrepareSendPacket(call, cp, 1);
#ifdef RX_TRACK_PACKETS
	cp->flags |= RX_PKTFLAG_TQ;
#endif
	queue_Append(&call->tq, cp);
#ifdef RXDEBUG_PACKET
        call->tqc++;
#endif /* RXDEBUG_PACKET */
	if (!
	    (call->
	     flags & (RX_CALL_FAST_RECOVER | RX_CALL_FAST_RECOVER_WAIT))) {
	    rxi_Start(0, call, 0, 0);
	}
        MUTEX_EXIT(&call->lock);
    }
}
Пример #16
0
/* rxi_WritevProc -- internal version.
 *
 * Send buffers allocated in rxi_WritevAlloc.
 *
 * LOCKS USED -- called at netpri.
 */
int
rxi_WritevProc(struct rx_call *call, struct iovec *iov, int nio, int nbytes)
{
    struct rx_packet *cp = NULL;
#ifdef RX_TRACK_PACKETS
    struct rx_packet *p, *np;
#endif
    int nextio;
    int requestCount;
    struct rx_queue tmpq;
#ifdef RXDEBUG_PACKET
    u_short tmpqc;
#endif

    requestCount = nbytes;
    nextio = 0;

    MUTEX_ENTER(&call->lock);
    if (call->error) {
        call->mode = RX_MODE_ERROR;
    } else if (call->mode != RX_MODE_SENDING) {
	call->error = RX_PROTOCOL_ERROR;
    }
#ifdef AFS_GLOBAL_RXLOCK_KERNEL
    rxi_WaitforTQBusy(call);
#endif /* AFS_GLOBAL_RXLOCK_KERNEL */
    cp = call->currentPacket;

    if (call->error) {
        call->mode = RX_MODE_ERROR;
	MUTEX_EXIT(&call->lock);
	if (cp) {
#ifdef RX_TRACK_PACKETS
            cp->flags &= ~RX_PKTFLAG_CP;
            cp->flags |= RX_PKTFLAG_IOVQ;
#endif
	    queue_Prepend(&call->iovq, cp);
#ifdef RXDEBUG_PACKET
            call->iovqc++;
#endif /* RXDEBUG_PACKET */
	    call->currentPacket = (struct rx_packet *)0;
	}
#ifdef RXDEBUG_PACKET
        call->iovqc -=
#endif /* RXDEBUG_PACKET */
            rxi_FreePackets(0, &call->iovq);
	return 0;
    }

    /* Loop through the I/O vector adjusting packet pointers.
     * Place full packets back onto the iovq once they are ready
     * to send. Set RX_PROTOCOL_ERROR if any problems are found in
     * the iovec. We put the loop condition at the end to ensure that
     * a zero length write will push a short packet. */
    nextio = 0;
    queue_Init(&tmpq);
#ifdef RXDEBUG_PACKET
    tmpqc = 0;
#endif /* RXDEBUG_PACKET */
    do {
	if (call->nFree == 0 && cp) {
	    clock_NewTime();	/* Bogus:  need new time package */
	    /* The 0, below, specifies that it is not the last packet:
	     * there will be others. PrepareSendPacket may
	     * alter the packet length by up to
	     * conn->securityMaxTrailerSize */
	    hadd32(call->bytesSent, cp->length);
	    rxi_PrepareSendPacket(call, cp, 0);
	    queue_Append(&tmpq, cp);
#ifdef RXDEBUG_PACKET
            tmpqc++;
#endif /* RXDEBUG_PACKET */
            cp = call->currentPacket = (struct rx_packet *)0;

	    /* The head of the iovq is now the current packet */
	    if (nbytes) {
		if (queue_IsEmpty(&call->iovq)) {
                    MUTEX_EXIT(&call->lock);
		    call->error = RX_PROTOCOL_ERROR;
#ifdef RXDEBUG_PACKET
                    tmpqc -=
#endif /* RXDEBUG_PACKET */
                        rxi_FreePackets(0, &tmpq);
		    return 0;
		}
		cp = queue_First(&call->iovq, rx_packet);
		queue_Remove(cp);
#ifdef RX_TRACK_PACKETS
                cp->flags &= ~RX_PKTFLAG_IOVQ;
#endif
#ifdef RXDEBUG_PACKET
                call->iovqc--;
#endif /* RXDEBUG_PACKET */
#ifdef RX_TRACK_PACKETS
                cp->flags |= RX_PKTFLAG_CP;
#endif
		call->currentPacket = cp;
		call->nFree = cp->length;
		call->curvec = 1;
		call->curpos =
		    (char *)cp->wirevec[1].iov_base +
		    call->conn->securityHeaderSize;
		call->curlen =
		    cp->wirevec[1].iov_len - call->conn->securityHeaderSize;
	    }
	}

	if (nbytes) {
	    /* The next iovec should point to the current position */
	    if (iov[nextio].iov_base != call->curpos
		|| iov[nextio].iov_len > (int)call->curlen) {
		call->error = RX_PROTOCOL_ERROR;
                MUTEX_EXIT(&call->lock);
		if (cp) {
#ifdef RX_TRACK_PACKETS
		    cp->flags &= ~RX_PKTFLAG_CP;
#endif
                    queue_Prepend(&tmpq, cp);
#ifdef RXDEBUG_PACKET
                    tmpqc++;
#endif /* RXDEBUG_PACKET */
                    cp = call->currentPacket = (struct rx_packet *)0;
		}
#ifdef RXDEBUG_PACKET
                tmpqc -=
#endif /* RXDEBUG_PACKET */
                    rxi_FreePackets(0, &tmpq);
		return 0;
	    }
	    nbytes -= iov[nextio].iov_len;
	    call->curpos += iov[nextio].iov_len;
	    call->curlen -= iov[nextio].iov_len;
	    call->nFree -= iov[nextio].iov_len;
	    nextio++;
	    if (call->curlen == 0) {
		if (++call->curvec > cp->niovecs) {
		    call->nFree = 0;
		} else {
		    call->curpos = (char *)cp->wirevec[call->curvec].iov_base;
		    call->curlen = cp->wirevec[call->curvec].iov_len;
		}
	    }
	}
    } while (nbytes && nextio < nio);

    /* Move the packets from the temporary queue onto the transmit queue.
     * We may end up with more than call->twind packets on the queue. */

#ifdef RX_TRACK_PACKETS
    for (queue_Scan(&tmpq, p, np, rx_packet))
    {
        p->flags |= RX_PKTFLAG_TQ;
    }
#endif

    if (call->error)
        call->mode = RX_MODE_ERROR;

    queue_SpliceAppend(&call->tq, &tmpq);

    if (!(call->flags & (RX_CALL_FAST_RECOVER | RX_CALL_FAST_RECOVER_WAIT))) {
	rxi_Start(0, call, 0, 0);
    }

    /* Wait for the length of the transmit queue to fall below call->twind */
    while (!call->error && call->tnext + 1 > call->tfirst + (2 * call->twind)) {
	clock_NewTime();
	call->startWait = clock_Sec();
#ifdef	RX_ENABLE_LOCKS
	CV_WAIT(&call->cv_twind, &call->lock);
#else
	call->flags |= RX_CALL_WAIT_WINDOW_ALLOC;
	osi_rxSleep(&call->twind);
#endif
	call->startWait = 0;
    }

    /* cp is no longer valid since we may have given up the lock */
    cp = call->currentPacket;

    if (call->error) {
        call->mode = RX_MODE_ERROR;
        call->currentPacket = NULL;
        MUTEX_EXIT(&call->lock);
	if (cp) {
#ifdef RX_TRACK_PACKETS
	    cp->flags &= ~RX_PKTFLAG_CP;
#endif
	    rxi_FreePacket(cp);
	}
	return 0;
    }
    MUTEX_EXIT(&call->lock);

    return requestCount - nbytes;
}