unsigned int calculate_timespace(long load, struct config *config)
{
	int i;
	long long now, then;
	unsigned int estimated = GAUGECOUNT;
	unsigned int rounds = 0;
	unsigned int timed = 0;

	if (config->verbose)
		printf("calibrating load of %lius, please wait...\n", load);

	
	now = get_time();
	ROUNDS(estimated);
	then = get_time();

	timed = (unsigned int)(then - now);

	for (i = 0; i < 4; i++) {
		rounds = (unsigned int)(load * estimated / timed);
		dprintf("calibrating with %u rounds\n", rounds);
		now = get_time();
		ROUNDS(rounds);
		then = get_time();

		timed = (unsigned int)(then - now);
		estimated = rounds;
	}
	if (config->verbose)
		printf("calibration done\n");

	return estimated;
}
static void camellia_do_decrypt(const u32 *subkey, u32 *io, unsigned i)
{
	u32 il, ir, t0, t1;            /* temporary variables */

	/* pre whitening but absorb kw2 */
	io[0] ^= SUBKEY_L(i);
	io[1] ^= SUBKEY_R(i);

	/* main iteration */
#define ROUNDS(i) ({ \
	CAMELLIA_ROUNDSM(io[0], io[1], \
			 SUBKEY_L(i + 7), SUBKEY_R(i + 7), \
			 io[2], io[3], il, ir); \
	CAMELLIA_ROUNDSM(io[2], io[3], \
			 SUBKEY_L(i + 6), SUBKEY_R(i + 6), \
			 io[0], io[1], il, ir); \
	CAMELLIA_ROUNDSM(io[0], io[1], \
			 SUBKEY_L(i + 5), SUBKEY_R(i + 5), \
			 io[2], io[3], il, ir); \
	CAMELLIA_ROUNDSM(io[2], io[3], \
			 SUBKEY_L(i + 4), SUBKEY_R(i + 4), \
			 io[0], io[1], il, ir); \
	CAMELLIA_ROUNDSM(io[0], io[1], \
			 SUBKEY_L(i + 3), SUBKEY_R(i + 3), \
			 io[2], io[3], il, ir); \
	CAMELLIA_ROUNDSM(io[2], io[3], \
			 SUBKEY_L(i + 2), SUBKEY_R(i + 2), \
			 io[0], io[1], il, ir); \
})
#define FLS(i) ({ \
	CAMELLIA_FLS(io[0], io[1], io[2], io[3], \
		     SUBKEY_L(i + 1), SUBKEY_R(i + 1), \
		     SUBKEY_L(i + 0), SUBKEY_R(i + 0), \
		     t0, t1, il, ir); \
})

	if (i == 32) {
		ROUNDS(24);
		FLS(24);
	}
	ROUNDS(16);
	FLS(16);
	ROUNDS(8);
	FLS(8);
	ROUNDS(0);

#undef ROUNDS
#undef FLS

	/* post whitening but kw4 */
	io[2] ^= SUBKEY_L(0);
	io[3] ^= SUBKEY_R(0);
	/* NB: 0,1 should be swapped with 2,3 by caller! */
}
void AES_ecb_encrypt(block *blk,  AES_KEY *aesKey) {
	unsigned j, rnds = ROUNDS(aesKey);
	const block *sched = ((block *)(aesKey->rd_key));

	*blk = _mm_xor_si128(*blk, sched[0]);
	for (j = 1; j<rnds; ++j)
		*blk = _mm_aesenc_si128(*blk, sched[j]);
	*blk = _mm_aesenclast_si128(*blk, sched[j]);
}
void AES_encryptC(block *in, block *out,  AES_KEY *aesKey)
{
	int j, rnds = ROUNDS(aesKey);
	const __m128i *sched = ((__m128i *)(aesKey->rd_key));
	__m128i tmp = _mm_load_si128((__m128i*)in);
	tmp = _mm_xor_si128(tmp, sched[0]);
	for (j = 1; j<rnds; j++)  tmp = _mm_aesenc_si128(tmp, sched[j]);
	tmp = _mm_aesenclast_si128(tmp, sched[j]);
	_mm_store_si128((__m128i*)out, tmp);
}
Beispiel #5
0
void sms4_knc_encrypt_16blocks(sms4_key_t *key, const unsigned char *in, unsigned char *out)
{
	int *rk = (int *)key->rk;
	__m512i x0, x1, x2, x3, x4;
	__m512i t0, t1, t2, t3, t4;

	GET_BLKS(x0, x1, x2, x3, in);
	ROUNDS(x0, x1, x2, x3, x4, ROUND);
	PUT_BLKS(out, x2, x3, x4, x0);
}
void AES_ecb_encrypt_blks(block *blks, unsigned nblks,  AES_KEY *aesKey) {
    unsigned i,j,rnds=ROUNDS(aesKey);
	const block *sched = ((block *)(aesKey->rd_key));
	for (i=0; i<nblks; ++i)
	    blks[i] =_mm_xor_si128(blks[i], sched[0]);
	for(j=1; j<rnds; ++j)
	    for (i=0; i<nblks; ++i)
		    blks[i] = _mm_aesenc_si128(blks[i], sched[j]);
	for (i=0; i<nblks; ++i)
	    blks[i] =_mm_aesenclast_si128(blks[i], sched[j]);
}
Beispiel #7
0
inline void AES_ecb_decrypt_blks(block *blks, unsigned nblks, AES_KEY *key) {
	unsigned i, j, rnds = ROUNDS(key);
	const __m128i *sched = ((__m128i *) (key->rd_key));
	for (i = 0; i < nblks; ++i)
		blks[i] = _mm_xor_si128(blks[i], sched[0]);
	for (j = 1; j < rnds; ++j)
		for (i = 0; i < nblks; ++i)
			blks[i] = _mm_aesdec_si128(blks[i], sched[j]);
	for (i = 0; i < nblks; ++i)
		blks[i] = _mm_aesdeclast_si128(blks[i], sched[j]);
}
Beispiel #8
0
inline void AES_decrypt(const unsigned char *in, unsigned char *out,
		const AES_KEY *key) {
	int j, rnds = ROUNDS(key);
	const __m128i *sched = ((__m128i *) (key->rd_key));
	__m128i tmp = _mm_load_si128((__m128i *) in);
	tmp = _mm_xor_si128(tmp, sched[0]);
	for (j = 1; j < rnds; j++)
		tmp = _mm_aesdec_si128(tmp, sched[j]);
	tmp = _mm_aesdeclast_si128(tmp, sched[j]);
	_mm_store_si128((__m128i *) out, tmp);
}
Beispiel #9
0
inline void AES_set_decrypt_key_fast(AES_KEY *dkey, const AES_KEY *ekey) {
	int j = 0;
	int i = ROUNDS(ekey);
#if (OCB_KEY_LEN == 0)
	dkey->rounds = i;
#endif
	dkey->rd_key[i--] = ekey->rd_key[j++];
	while (i)
		dkey->rd_key[i--] = _mm_aesimc_si128(ekey->rd_key[j++]);
	dkey->rd_key[i] = ekey->rd_key[j];
}
void AES_ecb_encrypt_chunk_in_out(block *in, block *out, unsigned nblks, AES_KEY *aesKey) {

	int numberOfLoops = nblks / 8;
	int blocksPipeLined = numberOfLoops * 8;
	int remainingEncrypts = nblks - blocksPipeLined;

	unsigned j, rnds = ROUNDS(aesKey);
	const block *sched = ((block *)(aesKey->rd_key));

	for (int i = 0; i < numberOfLoops; i++){

		out[0 + i * 8] = _mm_xor_si128(in[0 + i * 8], sched[0]);
		out[1 + i * 8] = _mm_xor_si128(in[1 + i * 8], sched[0]);
		out[2 + i * 8] = _mm_xor_si128(in[2 + i * 8], sched[0]);
		out[3 + i * 8] = _mm_xor_si128(in[3 + i * 8], sched[0]);
		out[4 + i * 8] = _mm_xor_si128(in[4 + i * 8], sched[0]);
		out[5 + i * 8] = _mm_xor_si128(in[5 + i * 8], sched[0]);
		out[6 + i * 8] = _mm_xor_si128(in[6 + i * 8], sched[0]);
		out[7 + i * 8] = _mm_xor_si128(in[7 + i * 8], sched[0]);

		for (j = 1; j < rnds; ++j){
			out[0 + i * 8] = _mm_aesenc_si128(out[0 + i * 8], sched[j]);
			out[1 + i * 8] = _mm_aesenc_si128(out[1 + i * 8], sched[j]);
			out[2 + i * 8] = _mm_aesenc_si128(out[2 + i * 8], sched[j]);
			out[3 + i * 8] = _mm_aesenc_si128(out[3 + i * 8], sched[j]);
			out[4 + i * 8] = _mm_aesenc_si128(out[4 + i * 8], sched[j]);
			out[5 + i * 8] = _mm_aesenc_si128(out[5 + i * 8], sched[j]);
			out[6 + i * 8] = _mm_aesenc_si128(out[6 + i * 8], sched[j]);
			out[7 + i * 8] = _mm_aesenc_si128(out[7 + i * 8], sched[j]);
		}
		out[0 + i * 8] = _mm_aesenclast_si128(out[0 + i * 8], sched[j]);
		out[1 + i * 8] = _mm_aesenclast_si128(out[1 + i * 8], sched[j]);
		out[2 + i * 8] = _mm_aesenclast_si128(out[2 + i * 8], sched[j]);
		out[3 + i * 8] = _mm_aesenclast_si128(out[3 + i * 8], sched[j]);
		out[4 + i * 8] = _mm_aesenclast_si128(out[4 + i * 8], sched[j]);
		out[5 + i * 8] = _mm_aesenclast_si128(out[5 + i * 8], sched[j]);
		out[6 + i * 8] = _mm_aesenclast_si128(out[6 + i * 8], sched[j]);
		out[7 + i * 8] = _mm_aesenclast_si128(out[7 + i * 8], sched[j]);
	}

	for (int i = blocksPipeLined; i<blocksPipeLined + remainingEncrypts; ++i)
		out[i] = _mm_xor_si128(in[i], sched[0]);
	for (j = 1; j<rnds; ++j)
		for (int i = blocksPipeLined; i<blocksPipeLined + remainingEncrypts; ++i)
			out[i] = _mm_aesenc_si128(out[i], sched[j]);
	for (int i = blocksPipeLined; i<blocksPipeLined + remainingEncrypts; ++i)
		out[i] = _mm_aesenclast_si128(out[i], sched[j]);
}
void AES_ecb_encrypt_blks_4(block *blks,  AES_KEY *aesKey) {
	unsigned j, rnds = ROUNDS(aesKey);
	const block *sched = ((block *)(aesKey->rd_key));
	blks[0] = _mm_xor_si128(blks[0], sched[0]);
	blks[1] = _mm_xor_si128(blks[1], sched[0]);
	blks[2] = _mm_xor_si128(blks[2], sched[0]);
	blks[3] = _mm_xor_si128(blks[3], sched[0]);

	for (j = 1; j < rnds; ++j){
		blks[0] = _mm_aesenc_si128(blks[0], sched[j]);
		blks[1] = _mm_aesenc_si128(blks[1], sched[j]);
		blks[2] = _mm_aesenc_si128(blks[2], sched[j]);
		blks[3] = _mm_aesenc_si128(blks[3], sched[j]);
	}
	blks[0] = _mm_aesenclast_si128(blks[0], sched[j]);
	blks[1] = _mm_aesenclast_si128(blks[1], sched[j]);
	blks[2] = _mm_aesenclast_si128(blks[2], sched[j]);
	blks[3] = _mm_aesenclast_si128(blks[3], sched[j]);
}
Beispiel #12
0
void sms4_encrypt(const unsigned char *in, unsigned char *out, const sms4_key_t *key)
{
	uint32_t *rk = key->rk;
	uint32_t x0, x1, x2, x3, x4;

	x0 = GET32(in     );
	x1 = GET32(in +  4);
	x2 = GET32(in +  8);
	x3 = GET32(in + 12);

	ROUNDS(x0, x1, x2, x3, x4);

	PUT32(x0, out     );
	PUT32(x4, out +  4);
	PUT32(x3, out +  8);
	PUT32(x2, out + 12);

	x0 = x1 = x2 = x3 = x4 = 0;
}
void AES_ecb_encrypt_blks_4_in_out(block *in, block *out,  AES_KEY *aesKey) {
	unsigned j, rnds = ROUNDS(aesKey);
	const block *sched = ((block *)(aesKey->rd_key));
	//block temp[4];

	out[0] = _mm_xor_si128(in[0], sched[0]);
	out[1] = _mm_xor_si128(in[1], sched[0]);
	out[2] = _mm_xor_si128(in[2], sched[0]);
	out[3] = _mm_xor_si128(in[3], sched[0]);

	for (j = 1; j < rnds; ++j){
		out[0] = _mm_aesenc_si128(out[0], sched[j]);
		out[1] = _mm_aesenc_si128(out[1], sched[j]);
		out[2] = _mm_aesenc_si128(out[2], sched[j]);
		out[3] = _mm_aesenc_si128(out[3], sched[j]);
	}
	out[0] = _mm_aesenclast_si128(out[0], sched[j]);
	out[1] = _mm_aesenclast_si128(out[1], sched[j]);
	out[2] = _mm_aesenclast_si128(out[2], sched[j]);
	out[3] = _mm_aesenclast_si128(out[3], sched[j]);
}
void start_benchmark(struct config *config)
{
	unsigned int _round, cycle;
	long long now, then;
	long sleep_time = 0, load_time = 0;
	long performance_time = 0, powersave_time = 0;
	unsigned int calculations;
	unsigned long total_time = 0, progress_time = 0;

	sleep_time = config->sleep;
	load_time = config->load;

	
	for (_round = 1; _round <= config->rounds; _round++)
		total_time += _round * (config->sleep + config->load);
	total_time *= 2; 

	for (_round = 0; _round < config->rounds; _round++) {
		performance_time = 0LL;
		powersave_time = 0LL;

		show_progress(total_time, progress_time);

		if (set_cpufreq_governor("performance", config->cpu) != 0)
			return;

		calculations = calculate_timespace(load_time, config);

		if (config->verbose)
			printf("_round %i: doing %u cycles with %u calculations"
			       " for %lius\n", _round + 1, config->cycles,
			       calculations, load_time);

		fprintf(config->output, "%u %li %li ",
			_round, load_time, sleep_time);

		if (config->verbose)
			printf("avarage: %lius, rps:%li\n",
				load_time / calculations,
				1000000 * calculations / load_time);

		
		for (cycle = 0; cycle < config->cycles; cycle++) {
			now = get_time();
			usleep(sleep_time);
			ROUNDS(calculations);
			then = get_time();
			performance_time += then - now - sleep_time;
			if (config->verbose)
				printf("performance cycle took %lius, "
					"sleep: %lius, "
					"load: %lius, rounds: %u\n",
					(long)(then - now), sleep_time,
					load_time, calculations);
		}
		fprintf(config->output, "%li ",
			performance_time / config->cycles);

		progress_time += sleep_time + load_time;
		show_progress(total_time, progress_time);

		if (set_cpufreq_governor(config->governor, config->cpu) != 0)
			return;

		for (cycle = 0; cycle < config->cycles; cycle++) {
			now = get_time();
			usleep(sleep_time);
			ROUNDS(calculations);
			then = get_time();
			powersave_time += then - now - sleep_time;
			if (config->verbose)
				printf("powersave cycle took %lius, "
					"sleep: %lius, "
					"load: %lius, rounds: %u\n",
					(long)(then - now), sleep_time,
					load_time, calculations);
		}

		progress_time += sleep_time + load_time;

		
		fprintf(config->output, "%li ",
			powersave_time / config->cycles);
		fprintf(config->output, "%.3f\n",
			performance_time * 100.0 / powersave_time);
		fflush(config->output);

		if (config->verbose)
			printf("performance is at %.2f%%\n",
				performance_time * 100.0 / powersave_time);

		sleep_time += config->sleep_step;
		load_time += config->load_step;
	}
}