/*
 * The life of a pregen thread:
 *    Find empty keystream queues and fill them using their counter.
 *    When done, update counter for the next fill.
 */
static void *
thread_loop(void *x)
{
	AES_KEY key;
	STATS_STRUCT(stats);
	struct ssh_aes_ctr_ctx *c = x;
	struct kq *q;
	int i;
	int qidx;

	/* Threads stats on cancellation */
	STATS_INIT(stats);
#ifdef CIPHER_THREAD_STATS
	pthread_cleanup_push(thread_loop_stats, &stats);
#endif

	/* Thread local copy of AES key */
	memcpy(&key, &c->aes_ctx, sizeof(key));

	/*
	 * Handle the special case of startup, one thread must fill
	 * the first KQ then mark it as draining. Lock held throughout.
	 */
	if (pthread_equal(pthread_self(), c->tid[0])) {
		q = &c->q[0];
		pthread_mutex_lock(&q->lock);
		if (q->qstate == KQINIT) {
			for (i = 0; i < KQLEN; i++) {
				AES_encrypt(q->ctr, q->keys[i], &key);
				ssh_ctr_inc(q->ctr, AES_BLOCK_SIZE);
			}
			ssh_ctr_add(q->ctr, KQLEN * (NUMKQ - 1), AES_BLOCK_SIZE);
			q->qstate = KQDRAINING;
			STATS_FILL(stats);
			pthread_cond_broadcast(&q->cond);
		}
		pthread_mutex_unlock(&q->lock);
	} else
		STATS_SKIP(stats);

	/*
	 * Normal case is to find empty queues and fill them, skipping over
	 * queues already filled by other threads and stopping to wait for
	 * a draining queue to become empty.
	 *
	 * Multiple threads may be waiting on a draining queue and awoken
	 * when empty.  The first thread to wake will mark it as filling,
	 * others will move on to fill, skip, or wait on the next queue.
	 */
	for (qidx = 1;; qidx = (qidx + 1) % NUMKQ) {
		/* Check if I was cancelled, also checked in cond_wait */
		pthread_testcancel();

		/* Lock queue and block if its draining */
		q = &c->q[qidx];
		pthread_mutex_lock(&q->lock);
		pthread_cleanup_push(thread_loop_cleanup, &q->lock);
		while (q->qstate == KQDRAINING || q->qstate == KQINIT) {
			STATS_WAIT(stats);
			pthread_cond_wait(&q->cond, &q->lock);
		}
		pthread_cleanup_pop(0);

		/* If filling or full, somebody else got it, skip */
		if (q->qstate != KQEMPTY) {
			pthread_mutex_unlock(&q->lock);
			STATS_SKIP(stats);
			continue;
		}

		/*
		 * Empty, let's fill it.
		 * Queue lock is relinquished while we do this so others
		 * can see that it's being filled.
		 */
		q->qstate = KQFILLING;
		pthread_mutex_unlock(&q->lock);
		for (i = 0; i < KQLEN; i++) {
			AES_encrypt(q->ctr, q->keys[i], &key);
			ssh_ctr_inc(q->ctr, AES_BLOCK_SIZE);
		}

		/* Re-lock, mark full and signal consumer */
		pthread_mutex_lock(&q->lock);
		ssh_ctr_add(q->ctr, KQLEN * (NUMKQ - 1), AES_BLOCK_SIZE);
		q->qstate = KQFULL;
		STATS_FILL(stats);
		pthread_cond_signal(&q->cond);
		pthread_mutex_unlock(&q->lock);
	}

#ifdef CIPHER_THREAD_STATS
	/* Stats */
	pthread_cleanup_pop(1);
#endif

	return NULL;
}
static int
ssh_aes_ctr(EVP_CIPHER_CTX *ctx, u_char *dest, const u_char *src,
    LIBCRYPTO_EVP_INL_TYPE len)
{
	typedef union {
#ifdef CIPHER_INT128_OK
		__uint128_t *u128;
#endif
		uint64_t *u64;
		uint32_t *u32;
		uint8_t *u8;
		const uint8_t *cu8;
		uintptr_t u;
	} ptrs_t;
	ptrs_t destp, srcp, bufp;
	uintptr_t align;
	struct ssh_aes_ctr_ctx *c;
	struct kq *q, *oldq;
	int ridx;
	u_char *buf;

	if (len == 0)
		return 1;
	if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL)
		return 0;

	q = &c->q[c->qidx];
	ridx = c->ridx;

	/* src already padded to block multiple */
	srcp.cu8 = src;
	destp.u8 = dest;
	while (len > 0) {
		buf = q->keys[ridx];
		bufp.u8 = buf;

		/* figure out the alignment on the fly */
#ifdef CIPHER_UNALIGNED_OK
		align = 0;
#else
		align = destp.u | srcp.u | bufp.u;
#endif

#ifdef CIPHER_INT128_OK
		if ((align & 0xf) == 0) {
			destp.u128[0] = srcp.u128[0] ^ bufp.u128[0];
		} else
#endif
		if ((align & 0x7) == 0) {
			destp.u64[0] = srcp.u64[0] ^ bufp.u64[0];
			destp.u64[1] = srcp.u64[1] ^ bufp.u64[1];
		} else if ((align & 0x3) == 0) {
			destp.u32[0] = srcp.u32[0] ^ bufp.u32[0];
			destp.u32[1] = srcp.u32[1] ^ bufp.u32[1];
			destp.u32[2] = srcp.u32[2] ^ bufp.u32[2];
			destp.u32[3] = srcp.u32[3] ^ bufp.u32[3];
		} else {
			size_t i;
			for (i = 0; i < AES_BLOCK_SIZE; ++i)
				dest[i] = src[i] ^ buf[i];
		}

		destp.u += AES_BLOCK_SIZE;
		srcp.u += AES_BLOCK_SIZE;
		len -= AES_BLOCK_SIZE;
		ssh_ctr_inc(ctx->iv, AES_BLOCK_SIZE);

		/* Increment read index, switch queues on rollover */
		if ((ridx = (ridx + 1) % KQLEN) == 0) {
			oldq = q;

			/* Mark next queue draining, may need to wait */
			c->qidx = (c->qidx + 1) % NUMKQ;
			q = &c->q[c->qidx];
			pthread_mutex_lock(&q->lock);
			while (q->qstate != KQFULL) {
				STATS_WAIT(c->stats);
				pthread_cond_wait(&q->cond, &q->lock);
			}
			q->qstate = KQDRAINING;
			pthread_mutex_unlock(&q->lock);

			/* Mark consumed queue empty and signal producers */
			pthread_mutex_lock(&oldq->lock);
			oldq->qstate = KQEMPTY;
			STATS_DRAIN(c->stats);
			pthread_cond_broadcast(&oldq->cond);
			pthread_mutex_unlock(&oldq->lock);
		}
	}
	c->ridx = ridx;
	return 1;
}
Example #3
0
static int
ssh_aes_ctr(EVP_CIPHER_CTX *ctx, u_char *dest, const u_char *src,
            LIBCRYPTO_EVP_INL_TYPE len)
{
    struct ssh_aes_ctr_ctx *c;
    struct kq *q, *oldq;
    int ridx;
    u_char *buf;

    if (len == 0)
        return (1);
    if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL)
        return (0);

    q = &c->q[c->qidx];
    ridx = c->ridx;

    /* src already padded to block multiple */
    while (len > 0) {
        buf = q->keys[ridx];

#ifdef CIPHER_BYTE_XOR
        dest[0] = src[0] ^ buf[0];
        dest[1] = src[1] ^ buf[1];
        dest[2] = src[2] ^ buf[2];
        dest[3] = src[3] ^ buf[3];
        dest[4] = src[4] ^ buf[4];
        dest[5] = src[5] ^ buf[5];
        dest[6] = src[6] ^ buf[6];
        dest[7] = src[7] ^ buf[7];
        dest[8] = src[8] ^ buf[8];
        dest[9] = src[9] ^ buf[9];
        dest[10] = src[10] ^ buf[10];
        dest[11] = src[11] ^ buf[11];
        dest[12] = src[12] ^ buf[12];
        dest[13] = src[13] ^ buf[13];
        dest[14] = src[14] ^ buf[14];
        dest[15] = src[15] ^ buf[15];
#else
        *(uint64_t *)dest = *(uint64_t *)src ^ *(uint64_t *)buf;
        *(uint64_t *)(dest + 8) = *(uint64_t *)(src + 8) ^
                                  *(uint64_t *)(buf + 8);
#endif

        dest += 16;
        src += 16;
        len -= 16;
        ssh_ctr_inc(ctx->iv, AES_BLOCK_SIZE);

        /* Increment read index, switch queues on rollover */
        if ((ridx = (ridx + 1) % KQLEN) == 0) {
            oldq = q;

            /* Mark next queue draining, may need to wait */
            c->qidx = (c->qidx + 1) % NUMKQ;
            q = &c->q[c->qidx];
            pthread_mutex_lock(&q->lock);
            while (q->qstate != KQFULL) {
                STATS_WAIT(c->stats);
                pthread_cond_wait(&q->cond, &q->lock);
            }
            q->qstate = KQDRAINING;
            pthread_mutex_unlock(&q->lock);

            /* Mark consumed queue empty and signal producers */
            pthread_mutex_lock(&oldq->lock);
            oldq->qstate = KQEMPTY;
            STATS_DRAIN(c->stats);
            pthread_cond_broadcast(&oldq->cond);
            pthread_mutex_unlock(&oldq->lock);
        }
    }
    c->ridx = ridx;
    return (1);
}