Exemplo n.º 1
0
inline void mixColumns(ByteArray *s_)
{
    uint8_t *s = s_->bytes();

    for (int c = 0; c < Nb; ++c) {
        uint8_t *sc = s + c * Nb;
        uint8_t h0 = sc[0];
        uint8_t h1 = sc[1];
        uint8_t h2 = sc[2];
        uint8_t h3 = sc[3];
        sc[0] = h2 ^ h3 ^ mul2(h0) ^ mul3(h1);
        sc[1] = h0 ^ h3 ^ mul2(h1) ^ mul3(h2);
        sc[2] = h0 ^ h1 ^ mul2(h2) ^ mul3(h3);
        sc[3] = h1 ^ h2 ^ mul2(h3) ^ mul3(h0);
    }
}
Exemplo n.º 2
0
/* MixBytes reversibly mixes the bytes within a column */
void MixBytes(u8 x[ROWS][COLS1024], int columns) {
  int i, j;
  u8 temp[ROWS];

  for (i = 0; i < columns; i++) {
    for (j = 0; j < ROWS; j++) {
      temp[j] = 
	mul2(x[(j+0)%ROWS][i])^
	mul2(x[(j+1)%ROWS][i])^
	mul3(x[(j+2)%ROWS][i])^
	mul4(x[(j+3)%ROWS][i])^
	mul5(x[(j+4)%ROWS][i])^
	mul3(x[(j+5)%ROWS][i])^
	mul5(x[(j+6)%ROWS][i])^
	mul7(x[(j+7)%ROWS][i]);
    }
    for (j = 0; j < ROWS; j++) {
      x[j][i] = temp[j];
    }
  }
}
Exemplo n.º 3
0
/*
AFunc : OTR Core Authentication Function (ADP=p)
*/
__m128i AFunc(
	const uint8 *header,
	uint32 h_len)
{
	uint32 i;
	uint32 m, last;
	block tmp[PIPE], mask[PIPE + 1], ASum = _mm_setzero_si128();
	uint32 rest_len = h_len;
	const __m128i *hdp = (__m128i*)header;

	mask[0] = _mm_load_si128(&Q);
	while (rest_len > (BLOCK*PIPE)){
		mul2_PIPE(mask);
		for (i = 0; i < PIPE; i++){
			tmp[i] = _mm_xor_si128(mask[i], hdp[i]);
		}
		AES_ecb_encrypt_PIPE(tmp, encrypt_key);
		for (i = 0; i < PIPE; i++){
			ASum = _mm_xor_si128(ASum, tmp[i]);
		}
		rest_len -= (BLOCK*PIPE);
		hdp += PIPE;
		mask[0] = _mm_load_si128(&mask[PIPE]);
	}

	last = rest_len % BLOCK;
	if (last == 0) last = BLOCK;
	m = (rest_len - last) / BLOCK; //header = m blocks + last bytes

	for (i = 0; i < m; i++){
		tmp[0] = _mm_xor_si128(mask[0], hdp[i]);
		AES_encrypt(tmp[0], &tmp[0], encrypt_key);
		ASum = _mm_xor_si128(ASum, tmp[0]);
		mul2(mask[0], &mask[0]);
	}
	hdp += m;
	/* last block */
	ozp(last, (uint8*)&hdp[0], &tmp[0]);
	ASum = _mm_xor_si128(ASum, tmp[0]);
	if (last != BLOCK){
		mul3(mask[0], &mask[0]);
	}
	else{
		mul3twice(mask[0], &mask[0]);
	}
	ASum = _mm_xor_si128(ASum, mask[0]);
	AES_encrypt(ASum, &ASum, encrypt_key);
	return ASum; //TA
}
Exemplo n.º 4
0
void test_mixer_with_sines() {
	Sine vox1(431);				// create 4 scaled sine waves
	MulOp mul1(vox1, 0.3);
	Sine vox2(540);
	MulOp mul2(vox2, 0.1);
	Sine vox3(890);
	MulOp mul3(vox3, 0.3);
	Sine vox4(1280);
	MulOp mul4(vox4, 0.01);
	Mixer mix(2);				// create a stereo mixer
	mix.add_input(mul1);			// add them to the mixer
	mix.add_input(mul2);
	mix.add_input(mul3);
	mix.add_input(mul4);
	logMsg("playing mix of 4 sines...");
	run_test(mix);
	logMsg("mix done.");
}
Exemplo n.º 5
0
static void __stdcall
comb_mask_0_simd(uint8_t* dstp, const uint8_t* srcp, const int dpitch,
                 const int spitch, const int cthresh, const int width,
                 const int height) noexcept
{
    const uint8_t* sc = srcp;
    const uint8_t* sb = sc + spitch;
    const uint8_t* sa = sb + spitch;
    const uint8_t* sd = sc + spitch;
    const uint8_t* se = sd + spitch;

    int16_t cth16 = static_cast<int16_t>(cthresh);
    const V cthp = set1_i16<V>(cth16);
    const V cthn = set1_i16<V>(-cth16);
    const V cth6 = set1_i16<V>(cth16 * 6);

    constexpr int step = sizeof(V) / 2;

    for (int y = 0; y < height; ++y) {
        for (int x = 0; x < width; x += step) {
            V xc = load_half<V>(sc + x);
            V xb = load_half<V>(sb + x);
            V xd = load_half<V>(sd + x);
            V d1 = sub_i16(xc, xb);
            V d2 = sub_i16(xc, xd);
            V mask0 = or_reg(
                and_reg(cmpgt_i16(d1, cthp), cmpgt_i16(d2, cthp)),
                and_reg(cmpgt_i16(cthn, d1), cmpgt_i16(cthn, d2)));
            d2 = mul3(add_i16(xb, xd));
            d1 = add_i16(load_half<V>(sa + x), load_half<V>(se + x));
            d1 = add_i16(d1, lshift_i16(xc, 2));
            mask0 = and_reg(mask0, cmpgt_i16(absdiff_i16(d1, d2), cth6));
            store_half(dstp + x, mask0);
        }
        sa = sb;
        sb = sc;
        sc = sd;
        sd = se;
        se += (y < height - 3) ? spitch : -spitch;
        dstp += dpitch;
    }
}
Exemplo n.º 6
0
Arquivo: 3d.c Projeto: 3ki5tj/scinotes
/* brute-force calculation
 * http://www.fftw.org/doc/The-1d-Discrete-Fourier-Transform-_0028DFT_0029.html */
void ft3d(int n0, int n1, int n2, fftw_complex *in, fftw_complex *out)
{
  int j0, j1, j2, j, k0, k1, k2, k;
  double (*cs0)[2], (*cs1)[2], (*cs2)[2], cs[2];

  cs0 = calloc(n0, sizeof(*cs0));
  cs1 = calloc(n1, sizeof(*cs1));
  cs2 = calloc(n2, sizeof(*cs2));
  for (j = 0; j < n0; j++) {
    cs0[j][0] = cos(2*M_PI*j/n0);
    cs0[j][1] = sin(2*M_PI*j/n0);
  }
  for (j = 0; j < n1; j++) {
    cs1[j][0] = cos(2*M_PI*j/n1);
    cs1[j][1] = sin(2*M_PI*j/n1);
  }
  for (j = 0; j < n2; j++) {
    cs2[j][0] = cos(2*M_PI*j/n2);
    cs2[j][1] = sin(2*M_PI*j/n2);
  }
  for (k0 = 0; k0 < n0; k0++)
  for (k1 = 0; k1 < n1; k1++)
  for (k2 = 0; k2 < n2; k2++) {
    k = k0 * n1 * n2 + k1 * n2 + k2;
    out[k][0] = out[k][1] = 0;
    for (j0 = 0; j0 < n0; j0++)
    for (j1 = 0; j1 < n1; j1++)
    for (j2 = 0; j2 < n2; j2++) {
      j = j0 * n1 * n2 + j1 * n2 + j2;
      mul3(cs, cs0[j0 * k0 % n0], cs1[j1 * k1 % n1], cs2[j2 * k2 % n2]);
      out[k][0] += in[j][0]*cs[0] + in[j][1]*cs[1];
      out[k][1] += in[j][1]*cs[0] - in[j][0]*cs[1];
    }
  }
  free(cs0);
  free(cs1);
  free(cs2);
}
Exemplo n.º 7
0
/*
DFunc : OTR Core Decryption Function, with nonce encryption 
*/
__m128i DFunc(
	const uint8 *nonce,
	uint32 nonce_len,
#if(ADP==Seri)
	const __m128i TA,
#endif
	const uint8 *ciphertext,
	uint32 ci_len,
	uint32 t_len,
	uint8 *plaintext)
{
	uint32 i;
	uint32 ell = 0; //number of 2BLOCK-byte chunks, excl. last one
	uint32 last = 0; //number of bytes in the last chunks

	block Sum = _mm_setzero_si128();
	block txt[PIPE], Ln[PIPE + 1];
	uint32 rest_len = ci_len;
	__m128i *ptp = (__m128i*)plaintext;
	const __m128i *ctp = (__m128i*)ciphertext;
	ALIGN(16)uint8 tmp[BLOCK] = { 0 };
	block *La;

	/* Encryption of nonce */
	memcpy(&tmp[BLOCK - nonce_len], nonce, nonce_len);
	tmp[0] = (uint8)((t_len%BLOCK) << 4);
	tmp[BLOCK - nonce_len - 1] |= 0x01;
	Ln[0] = _mm_load_si128((__m128i*)tmp);
	AES_encrypt(Ln[0], &Ln[0], encrypt_key);

#if (ADP==Seri)
	Ln[0] = _mm_xor_si128(Ln[0], TA);
	mul2(Ln[0], &Ln[0]);
#endif
	while (rest_len > (DBLOCK*PIPE)){
		/* first round*/
		mul2_PIPE(Ln);
		txt[0] = _mm_xor_si128(Ln[0], ctp[0]);
		txt[0] = _mm_xor_si128(Ln[1], txt[0]); 
		txt[1] = _mm_xor_si128(Ln[1], ctp[2]);
		txt[1] = _mm_xor_si128(Ln[2], txt[1]); 
		txt[2] = _mm_xor_si128(Ln[2], ctp[4]);
		txt[2] = _mm_xor_si128(Ln[3], txt[2]); 
		txt[3] = _mm_xor_si128(Ln[3], ctp[6]);
		txt[3] = _mm_xor_si128(Ln[4], txt[3]); 
#if (PIPE>=5)
		txt[4] = _mm_xor_si128(Ln[4], ctp[8]);
		txt[4] = _mm_xor_si128(Ln[5], txt[4]); 
#endif
#if (PIPE>=6)
		txt[5] = _mm_xor_si128(Ln[5], ctp[10]);
		txt[5] = _mm_xor_si128(Ln[6], txt[5]); 
#endif
#if (PIPE>=7)
		txt[6] = _mm_xor_si128(Ln[6], ctp[12]);
		txt[6] = _mm_xor_si128(Ln[7], txt[6]); 
#endif
#if (PIPE==8)
		txt[7] = _mm_xor_si128(Ln[7], ctp[14]);
		txt[7] = _mm_xor_si128(Ln[8], txt[7]); 
#endif
		AES_ecb_encrypt_PIPE(txt, encrypt_key);
		/* second round*/
		ptp[0] = _mm_xor_si128(txt[0], ctp[1]);
		txt[0] = _mm_xor_si128(Ln[0], ptp[0]);
		ptp[2] = _mm_xor_si128(txt[1], ctp[3]);
		txt[1] = _mm_xor_si128(Ln[1], ptp[2]);
		ptp[4] = _mm_xor_si128(txt[2], ctp[5]);
		txt[2] = _mm_xor_si128(Ln[2], ptp[4]);
		ptp[6] = _mm_xor_si128(txt[3], ctp[7]);
		txt[3] = _mm_xor_si128(Ln[3], ptp[6]);
#if (PIPE>=5)
		ptp[8] = _mm_xor_si128(txt[4], ctp[9]);
		txt[4] = _mm_xor_si128(Ln[4], ptp[8]);
#endif
#if (PIPE>=6)
		ptp[10] = _mm_xor_si128(txt[5], ctp[11]);
		txt[5] = _mm_xor_si128(Ln[5], ptp[10]);
#endif
#if (PIPE>=7)
		ptp[12] = _mm_xor_si128(txt[6], ctp[13]);
		txt[6] = _mm_xor_si128(Ln[6], ptp[12]);
#endif
#if (PIPE==8)
		ptp[14] = _mm_xor_si128(txt[7], ctp[15]);
		txt[7] = _mm_xor_si128(Ln[7], ptp[14]);
#endif
		AES_ecb_encrypt_PIPE(txt, encrypt_key);
		ptp[1] = _mm_xor_si128(txt[0], ctp[0]);
		Sum = _mm_xor_si128(Sum, ptp[1]);
		ptp[3] = _mm_xor_si128(txt[1], ctp[2]);
		Sum = _mm_xor_si128(Sum, ptp[3]);
		ptp[5] = _mm_xor_si128(txt[2], ctp[4]);
		Sum = _mm_xor_si128(Sum, ptp[5]);
		ptp[7] = _mm_xor_si128(txt[3], ctp[6]);
		Sum = _mm_xor_si128(Sum, ptp[7]);
#if (PIPE>=5)
		ptp[9] = _mm_xor_si128(txt[4], ctp[8]);
		Sum = _mm_xor_si128(Sum, ptp[9]);
#endif
#if (PIPE>=6)
		ptp[11] = _mm_xor_si128(txt[5], ctp[10]);
		Sum = _mm_xor_si128(Sum, ptp[11]);
#endif
#if (PIPE>=7)
		ptp[13] = _mm_xor_si128(txt[6], ctp[12]);
		Sum = _mm_xor_si128(Sum, ptp[13]);
#endif
#if (PIPE==8)
		ptp[15] = _mm_xor_si128(txt[7], ctp[14]);
		Sum = _mm_xor_si128(Sum, ptp[15]);
#endif
		Ln[0] = _mm_load_si128(&Ln[PIPE]);
		ptp += (2 * PIPE);
		ctp += (2 * PIPE);
		rest_len -= (DBLOCK*PIPE);
	}

	if (rest_len != 0){
		last = rest_len % DBLOCK;
		if (last == 0) last = DBLOCK;
		ell = (rest_len - last) / DBLOCK; // plaintext length = 2BLOCK*ell + last (non-zero)
	}

	/* 2-round Feistel for the full chunks */
	mul3(Ln[0], &Ln[1]);
	for (i = 0; i < (2 * ell); i += 2){
		txt[0] = _mm_xor_si128(Ln[1], ctp[i]);
		AES_encrypt(txt[0], &txt[0], encrypt_key);
		ptp[i] = _mm_xor_si128(txt[0], ctp[i + 1]);
		txt[0] = _mm_xor_si128(Ln[0], ptp[i]);
		AES_encrypt(txt[0], &txt[0], encrypt_key);
		ptp[i + 1] = _mm_xor_si128(txt[0], ctp[i]);
		Sum = _mm_xor_si128(Sum, ptp[i + 1]);
		Ln[0] = _mm_xor_si128(Ln[0], Ln[1]);
		mul2(Ln[1], &Ln[1]);
	}
	ptp += (2 * ell);
	ctp += (2 * ell);
	/* Last chunk */
	if (last <= BLOCK){ 	//odd block, including the case pl_len = 0 (no plaintext)
		AES_encrypt(Ln[0], &txt[0], encrypt_key); //txt[0] is Z
		xorp(last, &txt[0], (uint8*)&ctp[0], (uint8*)&ptp[0]);
		ozp(last, (uint8*)&ptp[0], &txt[0]);
		Sum = _mm_xor_si128(txt[0], Sum);
		La = &Ln[0];
	}
	else{//even blocks, last > BLOCK always holds. 2-round Feistel with last swap
		ozp(last - BLOCK, (uint8*)&ctp[1], &txt[0]);
		Sum = _mm_xor_si128(Sum, txt[0]);
		txt[0] = _mm_xor_si128(Ln[1], txt[0]);
		AES_encrypt(txt[0], &txt[0], encrypt_key);
		ptp[0] = _mm_xor_si128(txt[0], ctp[0]);
		txt[0] = _mm_xor_si128(Ln[0], ptp[0]);
		AES_encrypt(txt[0], &txt[1], encrypt_key); //txt[1] is Z
		xorp(last - BLOCK, &txt[1], (uint8*)&ctp[1], (uint8*)&ptp[1]);
		Sum = _mm_xor_si128(Sum, txt[1]);
		La = &Ln[1];
	}
	/* TE generation */
	if (last == BLOCK || last == DBLOCK){//last = 16 or 32
		mul7(*La, La);
	}
	else{
		mul3twice(*La, La);
	}
	Sum = _mm_xor_si128(Sum, *La);	//Sum = (3^2 or 7)L* xor Sum
	AES_encrypt(Sum, &Sum, encrypt_key);
	return Sum;//TE
}//end of DFunc