static void serpent_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct serpent_ctx *ctx = crypto_tfm_ctx(tfm); const u32 *k = ctx->expkey; const __le32 *s = (const __le32 *)src; __le32 *d = (__le32 *)dst; u32 r0, r1, r2, r3, r4; /* * Note: The conversions between u8* and u32* might cause trouble * on architectures with stricter alignment rules than x86 */ r0 = le32_to_cpu(s[0]); r1 = le32_to_cpu(s[1]); r2 = le32_to_cpu(s[2]); r3 = le32_to_cpu(s[3]); K(r0,r1,r2,r3,0); S0(r0,r1,r2,r3,r4); LK(r2,r1,r3,r0,r4,1); S1(r2,r1,r3,r0,r4); LK(r4,r3,r0,r2,r1,2); S2(r4,r3,r0,r2,r1); LK(r1,r3,r4,r2,r0,3); S3(r1,r3,r4,r2,r0); LK(r2,r0,r3,r1,r4,4); S4(r2,r0,r3,r1,r4); LK(r0,r3,r1,r4,r2,5); S5(r0,r3,r1,r4,r2); LK(r2,r0,r3,r4,r1,6); S6(r2,r0,r3,r4,r1); LK(r3,r1,r0,r4,r2,7); S7(r3,r1,r0,r4,r2); LK(r2,r0,r4,r3,r1,8); S0(r2,r0,r4,r3,r1); LK(r4,r0,r3,r2,r1,9); S1(r4,r0,r3,r2,r1); LK(r1,r3,r2,r4,r0,10); S2(r1,r3,r2,r4,r0); LK(r0,r3,r1,r4,r2,11); S3(r0,r3,r1,r4,r2); LK(r4,r2,r3,r0,r1,12); S4(r4,r2,r3,r0,r1); LK(r2,r3,r0,r1,r4,13); S5(r2,r3,r0,r1,r4); LK(r4,r2,r3,r1,r0,14); S6(r4,r2,r3,r1,r0); LK(r3,r0,r2,r1,r4,15); S7(r3,r0,r2,r1,r4); LK(r4,r2,r1,r3,r0,16); S0(r4,r2,r1,r3,r0); LK(r1,r2,r3,r4,r0,17); S1(r1,r2,r3,r4,r0); LK(r0,r3,r4,r1,r2,18); S2(r0,r3,r4,r1,r2); LK(r2,r3,r0,r1,r4,19); S3(r2,r3,r0,r1,r4); LK(r1,r4,r3,r2,r0,20); S4(r1,r4,r3,r2,r0); LK(r4,r3,r2,r0,r1,21); S5(r4,r3,r2,r0,r1); LK(r1,r4,r3,r0,r2,22); S6(r1,r4,r3,r0,r2); LK(r3,r2,r4,r0,r1,23); S7(r3,r2,r4,r0,r1); LK(r1,r4,r0,r3,r2,24); S0(r1,r4,r0,r3,r2); LK(r0,r4,r3,r1,r2,25); S1(r0,r4,r3,r1,r2); LK(r2,r3,r1,r0,r4,26); S2(r2,r3,r1,r0,r4); LK(r4,r3,r2,r0,r1,27); S3(r4,r3,r2,r0,r1); LK(r0,r1,r3,r4,r2,28); S4(r0,r1,r3,r4,r2); LK(r1,r3,r4,r2,r0,29); S5(r1,r3,r4,r2,r0); LK(r0,r1,r3,r2,r4,30); S6(r0,r1,r3,r2,r4); LK(r3,r4,r1,r2,r0,31); S7(r3,r4,r1,r2,r0); K(r0,r1,r2,r3,32); d[0] = cpu_to_le32(r0); d[1] = cpu_to_le32(r1); d[2] = cpu_to_le32(r2); d[3] = cpu_to_le32(r3); }
static BOOL bign_keyunwrap(byte *X, byte *d, byte *untoken){ Point q; REV_PI(X, q); BigInteger Q = bign_curve256v1::getQ(); byte s0[32]; memcpy(s0, d, sizeof s0); for (size_t jj = 0; jj < 32; jj += 4) change_endian(s0 + jj); BigInteger S0(s0, 32); S0 <<= 128; S0 %= Q; byte h_belt[32]; memcpy(h_belt, H, 32); for (size_t jj = 0; jj <32; jj += 4) change_endian(h_belt + jj); BigInteger temp2(h_belt, 32); temp2 %= Q; byte _qq[32]; memcpy(_qq, d + 32, sizeof _qq); for (size_t jj = 0; jj < 32; jj += 4) change_endian(_qq + jj); BigInteger S1(_qq, 32); if (S1 >= Q) return false; BigInteger rr = (temp2 + S1) % Q; BigInteger zero = BigInteger(0); Point G(zero, bign_curve256v1::getY()); Point R = shamir(G, rr, q, S0); if (R.x == zero && R.y == zero) return false; byte toHash[108]; byte bR[64]; PI(bR, R); belt_hash(toHash, sizeof toHash, h_belt); for (size_t jj = 0; jj < 32; ++jj) if (h_belt[jj] != bR[jj]) return false; return true; }
static BOOL bign_verify(byte *H, byte *_q, byte *S, uint32 size){ Point q; REV_PI(_q, q); BigInteger Q = bign_curve256v1::getQ(); byte s0[32]; memcpy(s0, S, sizeof s0); for (size_t jj = 0; jj < 32; jj += 4) change_endian(s0 + jj); BigInteger S0(s0, 32); S0 <<= 128; S0 %= Q; byte _qq[32]; memcpy(_qq, S + 32, sizeof _qq); for (size_t jj = 0; jj < 32; jj += 4) change_endian(_qq + jj); BigInteger S1(_qq, 32); if (S1 >= Q) return false; byte h_belt[32]; memcpy(h_belt, H, 32); for (size_t jj = 0; jj <32; jj += 4) change_endian(h_belt + jj); BigInteger temp2(h_belt, 32); temp2 %= Q; BigInteger rr = (temp2 + S1) % Q; BigInteger zero = BigInteger(0); Point G(zero, bign_curve256v1::getY()); Point R = shamir(G, rr, q, S0); if (R.x == zero && R.y == zero) return false; byte toHash[108]; byte bR[64]; PI(bR, R); memcpy(toHash, OID, sizeof OID); memcpy(toHash + sizeof OID, bR, sizeof bR); memcpy(toHash + sizeof OID + sizeof bR, H, 32); belt_hash(toHash, sizeof toHash, h_belt); for (size_t jj = 0; jj < 32; ++jj) if (h_belt[jj] != S[jj]) return false; return true; }
int updateReg(unsigned int A[], unsigned int E[], unsigned int m[], int n) { // Compute the register value E[n], having already computed the others. // Pass parameters as: updateReg (a1, e1, m1, n) or updateReg(a2, e2, m2, n) E[n] = E[n-4] + S1(E[n-1]) + ch(E[n-1], E[n-2], E[n-3]) + A[n-4] + k[n] + m[n]; A[n] = -A[n-4] + S0(A[n-1]) + mj(A[n-1]), A[n-2], A[n-3]) + E[n]; }
void _stdcall serpent256_encrypt(const unsigned char *in, unsigned char *out, serpent256_key *key) { u32 *k = key->expkey; u32 r0, r1, r2, r3, r4; r0 = p32(in)[0]; r1 = p32(in)[1]; r2 = p32(in)[2]; r3 = p32(in)[3]; K(r0,r1,r2,r3,0); S0(r0,r1,r2,r3,r4); LK(r2,r1,r3,r0,r4,1); S1(r2,r1,r3,r0,r4); LK(r4,r3,r0,r2,r1,2); S2(r4,r3,r0,r2,r1); LK(r1,r3,r4,r2,r0,3); S3(r1,r3,r4,r2,r0); LK(r2,r0,r3,r1,r4,4); S4(r2,r0,r3,r1,r4); LK(r0,r3,r1,r4,r2,5); S5(r0,r3,r1,r4,r2); LK(r2,r0,r3,r4,r1,6); S6(r2,r0,r3,r4,r1); LK(r3,r1,r0,r4,r2,7); S7(r3,r1,r0,r4,r2); LK(r2,r0,r4,r3,r1,8); S0(r2,r0,r4,r3,r1); LK(r4,r0,r3,r2,r1,9); S1(r4,r0,r3,r2,r1); LK(r1,r3,r2,r4,r0,10); S2(r1,r3,r2,r4,r0); LK(r0,r3,r1,r4,r2,11); S3(r0,r3,r1,r4,r2); LK(r4,r2,r3,r0,r1,12); S4(r4,r2,r3,r0,r1); LK(r2,r3,r0,r1,r4,13); S5(r2,r3,r0,r1,r4); LK(r4,r2,r3,r1,r0,14); S6(r4,r2,r3,r1,r0); LK(r3,r0,r2,r1,r4,15); S7(r3,r0,r2,r1,r4); LK(r4,r2,r1,r3,r0,16); S0(r4,r2,r1,r3,r0); LK(r1,r2,r3,r4,r0,17); S1(r1,r2,r3,r4,r0); LK(r0,r3,r4,r1,r2,18); S2(r0,r3,r4,r1,r2); LK(r2,r3,r0,r1,r4,19); S3(r2,r3,r0,r1,r4); LK(r1,r4,r3,r2,r0,20); S4(r1,r4,r3,r2,r0); LK(r4,r3,r2,r0,r1,21); S5(r4,r3,r2,r0,r1); LK(r1,r4,r3,r0,r2,22); S6(r1,r4,r3,r0,r2); LK(r3,r2,r4,r0,r1,23); S7(r3,r2,r4,r0,r1); LK(r1,r4,r0,r3,r2,24); S0(r1,r4,r0,r3,r2); LK(r0,r4,r3,r1,r2,25); S1(r0,r4,r3,r1,r2); LK(r2,r3,r1,r0,r4,26); S2(r2,r3,r1,r0,r4); LK(r4,r3,r2,r0,r1,27); S3(r4,r3,r2,r0,r1); LK(r0,r1,r3,r4,r2,28); S4(r0,r1,r3,r4,r2); LK(r1,r3,r4,r2,r0,29); S5(r1,r3,r4,r2,r0); LK(r0,r1,r3,r2,r4,30); S6(r0,r1,r3,r2,r4); LK(r3,r4,r1,r2,r0,31); S7(r3,r4,r1,r2,r0); K(r0,r1,r2,r3,32); p32(out)[0] = r0; p32(out)[1] = r1; p32(out)[2] = r2; p32(out)[3] = r3; }
void CCM_Decryption::finish(secure_vector<byte>& buffer, size_t offset) { BOTAN_ASSERT(buffer.size() >= offset, "Offset is sane"); buffer.insert(buffer.begin() + offset, msg_buf().begin(), msg_buf().end()); const size_t sz = buffer.size() - offset; byte* buf = buffer.data() + offset; BOTAN_ASSERT(sz >= tag_size(), "We have the tag"); const secure_vector<byte>& ad = ad_buf(); BOTAN_ASSERT(ad.size() % BS == 0, "AD is block size multiple"); const BlockCipher& E = cipher(); secure_vector<byte> T(BS); E.encrypt(format_b0(sz - tag_size()), T); for(size_t i = 0; i != ad.size(); i += BS) { xor_buf(T.data(), &ad[i], BS); E.encrypt(T); } secure_vector<byte> C = format_c0(); secure_vector<byte> S0(BS); E.encrypt(C, S0); inc(C); secure_vector<byte> X(BS); const byte* buf_end = &buf[sz - tag_size()]; while(buf != buf_end) { const size_t to_proc = std::min<size_t>(BS, buf_end - buf); E.encrypt(C, X); xor_buf(buf, X.data(), to_proc); inc(C); xor_buf(T.data(), buf, to_proc); E.encrypt(T); buf += to_proc; } T ^= S0; if(!same_mem(T.data(), buf_end, tag_size())) throw Integrity_Failure("CCM tag check failed"); buffer.resize(buffer.size() - tag_size()); }
static void __serpent_setkey_sbox(u32 r0, u32 r1, u32 r2, u32 r3, u32 r4, u32 *k) { k += 100; S3(r3, r4, r0, r1, r2); store_and_load_keys(r1, r2, r4, r3, 28, 24); S4(r1, r2, r4, r3, r0); store_and_load_keys(r2, r4, r3, r0, 24, 20); S5(r2, r4, r3, r0, r1); store_and_load_keys(r1, r2, r4, r0, 20, 16); S6(r1, r2, r4, r0, r3); store_and_load_keys(r4, r3, r2, r0, 16, 12); S7(r4, r3, r2, r0, r1); store_and_load_keys(r1, r2, r0, r4, 12, 8); S0(r1, r2, r0, r4, r3); store_and_load_keys(r0, r2, r4, r1, 8, 4); S1(r0, r2, r4, r1, r3); store_and_load_keys(r3, r4, r1, r0, 4, 0); S2(r3, r4, r1, r0, r2); store_and_load_keys(r2, r4, r3, r0, 0, -4); S3(r2, r4, r3, r0, r1); store_and_load_keys(r0, r1, r4, r2, -4, -8); S4(r0, r1, r4, r2, r3); store_and_load_keys(r1, r4, r2, r3, -8, -12); S5(r1, r4, r2, r3, r0); store_and_load_keys(r0, r1, r4, r3, -12, -16); S6(r0, r1, r4, r3, r2); store_and_load_keys(r4, r2, r1, r3, -16, -20); S7(r4, r2, r1, r3, r0); store_and_load_keys(r0, r1, r3, r4, -20, -24); S0(r0, r1, r3, r4, r2); store_and_load_keys(r3, r1, r4, r0, -24, -28); k -= 50; S1(r3, r1, r4, r0, r2); store_and_load_keys(r2, r4, r0, r3, 22, 18); S2(r2, r4, r0, r3, r1); store_and_load_keys(r1, r4, r2, r3, 18, 14); S3(r1, r4, r2, r3, r0); store_and_load_keys(r3, r0, r4, r1, 14, 10); S4(r3, r0, r4, r1, r2); store_and_load_keys(r0, r4, r1, r2, 10, 6); S5(r0, r4, r1, r2, r3); store_and_load_keys(r3, r0, r4, r2, 6, 2); S6(r3, r0, r4, r2, r1); store_and_load_keys(r4, r1, r0, r2, 2, -2); S7(r4, r1, r0, r2, r3); store_and_load_keys(r3, r0, r2, r4, -2, -6); S0(r3, r0, r2, r4, r1); store_and_load_keys(r2, r0, r4, r3, -6, -10); S1(r2, r0, r4, r3, r1); store_and_load_keys(r1, r4, r3, r2, -10, -14); S2(r1, r4, r3, r2, r0); store_and_load_keys(r0, r4, r1, r2, -14, -18); S3(r0, r4, r1, r2, r3); store_and_load_keys(r2, r3, r4, r0, -18, -22); k -= 50; S4(r2, r3, r4, r0, r1); store_and_load_keys(r3, r4, r0, r1, 28, 24); S5(r3, r4, r0, r1, r2); store_and_load_keys(r2, r3, r4, r1, 24, 20); S6(r2, r3, r4, r1, r0); store_and_load_keys(r4, r0, r3, r1, 20, 16); S7(r4, r0, r3, r1, r2); store_and_load_keys(r2, r3, r1, r4, 16, 12); S0(r2, r3, r1, r4, r0); store_and_load_keys(r1, r3, r4, r2, 12, 8); S1(r1, r3, r4, r2, r0); store_and_load_keys(r0, r4, r2, r1, 8, 4); S2(r0, r4, r2, r1, r3); store_and_load_keys(r3, r4, r0, r1, 4, 0); S3(r3, r4, r0, r1, r2); storekeys(r1, r2, r4, r3, 0); }
void CCM_Encryption::finish(secure_vector<byte>& buffer, size_t offset) { BOTAN_ASSERT(buffer.size() >= offset, "Offset is sane"); buffer.insert(buffer.begin() + offset, msg_buf().begin(), msg_buf().end()); const size_t sz = buffer.size() - offset; byte* buf = buffer.data() + offset; const secure_vector<byte>& ad = ad_buf(); BOTAN_ASSERT(ad.size() % BS == 0, "AD is block size multiple"); const BlockCipher& E = cipher(); secure_vector<byte> T(BS); E.encrypt(format_b0(sz), T); for(size_t i = 0; i != ad.size(); i += BS) { xor_buf(T.data(), &ad[i], BS); E.encrypt(T); } secure_vector<byte> C = format_c0(); secure_vector<byte> S0(BS); E.encrypt(C, S0); inc(C); secure_vector<byte> X(BS); const byte* buf_end = &buf[sz]; while(buf != buf_end) { const size_t to_proc = std::min<size_t>(BS, buf_end - buf); xor_buf(T.data(), buf, to_proc); E.encrypt(T); E.encrypt(C, X); xor_buf(buf, X.data(), to_proc); inc(C); buf += to_proc; } T ^= S0; buffer += std::make_pair(T.data(), tag_size()); }
static void processblock(struct sha512 *s, const uint8_t *buf) { uint64_t W[80], t1, t2, a, b, c, d, e, f, g, h; int i; for (i = 0; i < 16; i++) { W[i] = (uint64_t)buf[8*i]<<56; W[i] |= (uint64_t)buf[8*i+1]<<48; W[i] |= (uint64_t)buf[8*i+2]<<40; W[i] |= (uint64_t)buf[8*i+3]<<32; W[i] |= (uint64_t)buf[8*i+4]<<24; W[i] |= (uint64_t)buf[8*i+5]<<16; W[i] |= (uint64_t)buf[8*i+6]<<8; W[i] |= buf[8*i+7]; } for (; i < 80; i++) W[i] = R1(W[i-2]) + W[i-7] + R0(W[i-15]) + W[i-16]; a = s->h[0]; b = s->h[1]; c = s->h[2]; d = s->h[3]; e = s->h[4]; f = s->h[5]; g = s->h[6]; h = s->h[7]; for (i = 0; i < 80; i++) { t1 = h + S1(e) + Ch(e,f,g) + K[i] + W[i]; t2 = S0(a) + Maj(a,b,c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; } s->h[0] += a; s->h[1] += b; s->h[2] += c; s->h[3] += d; s->h[4] += e; s->h[5] += f; s->h[6] += g; s->h[7] += h; }
void inspector_cpack_align(int n_inter,int n_moles,int n_tstep,double data[][9],int * inter1,int * inter2,ExplicitRelation **sigma) { ExplicitRelation * cpack_input_ER; ExplicitRelation * sigma_ER; ExplicitRelation * inter1_ER; ExplicitRelation * inter2_ER; int i,ii,ii_out0,tstep; inter2_ER=ER_ctor(inter2,n_inter+-1-0); inter1_ER=ER_ctor(inter1,n_inter+-1-0); /* RectUnionDomain for set {[ii_out0]: -1ii_out0+n_inter+-1>=0 and n_tstep+-1>=0 and ii_out0>=0 | n_inter,n_tstep} */ /* RectDomain for set {[ii_out0]: -1ii_out0+n_inter+-1>=0 and n_tstep+-1>=0 and ii_out0>=0 | n_inter,n_tstep} */ RectDomain *in_domain_cpack_input_conj0=RD_ctor(1); RD_set_lb(in_domain_cpack_input_conj0,0,0); RD_set_ub(in_domain_cpack_input_conj0,0,n_inter+-1); RectUnionDomain *in_domain_cpack_input=RUD_ctor(in_domain_cpack_input_conj0); /* Creation of ExplicitRelation of the ARTT */ /* {[ii_out0]->[accessRelation1]: accessRelation1+-1inter1(ii_out0)=0} union {[ii_out0]->[accessRelation1]: accessRelation1+-1inter2(ii_out0)=0} */ cpack_input_ER = ER_ctor(1,1,in_domain_cpack_input,false,false); /* Define loop body statements */ #define S0(ii_out0) ER_in_ordered_insert(cpack_input_ER,Tuple_make(ii_out0),Tuple_make(ER_out_given_in(inter1_ER,ii_out0))); #define S1(ii_out0) ER_in_ordered_insert(cpack_input_ER,Tuple_make(ii_out0),Tuple_make(ER_out_given_in(inter2_ER,ii_out0))); if ((n_inter >= 1) && (n_tstep >= 1)) { for (ii_out0=0;ii_out0<=n_inter-1;ii_out0++) { S0(ii_out0); S1(ii_out0); } } /* Undefine loop body statements */ #undef S0 #undef S1 /* RectUnionDomain for set {[k]: -1k+n_moles+-1>=0 and k>=0 | n_moles} */ /* RectDomain for set {[k]: -1k+n_moles+-1>=0 and k>=0 | n_moles} */ RectDomain *in_domain_sigma_conj0=RD_ctor(1); RD_set_lb(in_domain_sigma_conj0,0,0); RD_set_ub(in_domain_sigma_conj0,0,n_moles+-1); RectUnionDomain *in_domain_sigma=RUD_ctor(in_domain_sigma_conj0); *sigma=ER_ctor(1,1,in_domain_sigma,true,true); sigma_ER=*sigma; ERG_cpack(cpack_input_ER,sigma_ER); reorderArray((unsigned char*)data,sizeof(double),n_moles+-1-0,sigma_ER); }
/** Use state machine S0: start status S1: / status S2: /. status S3: /abc status S4: /.. status END: end status ERROR: end status enum { E_STATE_S0, E_STATE_S1, E_STATE_S2, E_STATE_S3, E_STATE_S4, E_STATE_END, E_STATE_ERROR } m_eStatue; */ string Solution::simplifyPath(string path) { stack<int> stkSlashes; int i = 0; int j = -1; if(path.empty()) return string(); m_eStatue = E_STATE_S0; bool bFinished = false; while(!bFinished) { switch(m_eStatue) { case E_STATE_S0: S0(path, i, j, stkSlashes); break; case E_STATE_S1: S1(path, i, j, stkSlashes); break; case E_STATE_S2: S2(path, i, j, stkSlashes); break; case E_STATE_S3: S3(path, i, j, stkSlashes); break; case E_STATE_S4: S4(path, i, j, stkSlashes); break; case E_STATE_END: bFinished = true; break; case E_STATE_ERROR: bFinished = true; break; default: break; } } return path; }
/** * return superblock containing 0 th 1 bit, * i.e. largest j such that S0(j) < ii **/ uint64_t selectSuper0(uint64_t const ii) const { // search largest superblock index s such that ii < S[s] uint64_t left = 0, right = numsuper; while ( right-left > 1 ) { uint64_t const d = right-left; uint64_t const d2 = d>>1; uint64_t const mid = left + d2; // number of 1s is too large if ( S0(mid) < ii ) left = mid; else right = mid; } return left; }
void GflSHA256::Generate(void) { int i; DWORD W[SHA256_WORK]; DWORD Hash[SHA256_WORK + SHA256_HASH]; for(i = 0; i < SHA256_BLOCK; i++) W[i] = ReverseEndian(m_aBlock[i]); for(i = SHA256_BLOCK; i < SHA256_WORK; i++) W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16]; for(i = 0; i < SHA256_HASH; i++) Hash[SHA256_WORK + i] = m_dwH[i]; DWORD *pHash = &Hash[SHA256_WORK]; DWORD dwT1, dwT2; for(i = 0; i < SHA256_WORK; i++){ pHash--; dwT1 = pHash[8] + S1(pHash[5]) + Ch(pHash[5], pHash[6], pHash[7]) + c_dwK[i] + W[i]; dwT2 = S0(pHash[1]) + Maj(pHash[1], pHash[2], pHash[3]); pHash[0] = dwT1 + dwT2; pHash[4] += dwT1; } for(i = 0; i < SHA256_HASH; i++) m_dwH[i] += pHash[i]; }
TEST_F(statearray, slicing) { StateArray A(3, dl, dataptr); StateArray S0(A,0); StateArray S1(A,1); StateArray S2(A,2); StateArray S3(A,3); ASSERT_EQ(S0.size(), A.size()); ASSERT_EQ(S1.size(), A.size()-1); ASSERT_EQ(S2.size(), A.size()-2); ASSERT_EQ(S3.size(), A.size()-3); ASSERT_TRUE(A[0] != A[1]); ASSERT_EQ(A[0].data_ptr(), S0[0].data_ptr()); ASSERT_EQ(A[1].data_ptr(), S0[1].data_ptr()); ASSERT_EQ(A[2].data_ptr(), S0[2].data_ptr()); ASSERT_EQ(A[1].data_ptr(), S1[0].data_ptr()); ASSERT_EQ(A[2].data_ptr(), S1[1].data_ptr()); ASSERT_EQ(A[2].data_ptr(), S2[0].data_ptr()); StateArray S11(A,1,1); ASSERT_EQ(S11.size(), 1u); ASSERT_EQ(A[1].data_ptr(), S11[0].data_ptr()); }
/** * return miniblock containing i th 0 bit, * i.e. largest j such that M0[j] < ii **/ uint64_t selectMini0(uint64_t const s, uint64_t const iii) const { uint64_t const ii = iii - S0(s); uint64_t left = (s << sbbitwidth) >> mbbitwidth; uint64_t const rleft = left; uint64_t right = ::std::min( nummini, ((s+1) << sbbitwidth) >> mbbitwidth); while ( right-left > 1 ) { uint64_t const d = right-left; uint64_t const d2 = d>>1; uint64_t const mid = left + d2; // number of 1s is too large if ( ((((mid-rleft) << mbbitwidth)) - M[mid]) < ii ) left = mid; else right = mid; } return left; }
static void _BRSHA512Compress(uint64_t *r, uint64_t *x) { static const uint64_t k[] = { 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, 0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, 0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694, 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4, 0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70, 0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b, 0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30, 0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8, 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, 0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec, 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b, 0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, 0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b, 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c, 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 }; int i; uint64_t a = r[0], b = r[1], c = r[2], d = r[3], e = r[4], f = r[5], g = r[6], h = r[7], t1, t2, w[80]; for (i = 0; i < 16; i++) w[i] = be64(x[i]); for (; i < 80; i++) w[i] = S3(w[i - 2]) + w[i - 7] + S2(w[i - 15]) + w[i - 16]; for (i = 0; i < 80; i++) { t1 = h + S1(e) + ch(e, f, g) + k[i] + w[i]; t2 = S0(a) + maj(a, b, c); h = g, g = f, f = e, e = d + t1, d = c, c = b, b = a, a = t1 + t2; } r[0] += a, r[1] += b, r[2] += c, r[3] += d, r[4] += e, r[5] += f, r[6] += g, r[7] += h; var_clean(&a, &b, &c, &d, &e, &f, &g, &h, &t1, &t2); mem_clean(w, sizeof(w)); }
/* Process LEN bytes of BUFFER, accumulating context into CTX. It is assumed that LEN % 128 == 0. */ void sha512_process_block (const void *buffer, size_t len, struct sha512_ctx *ctx) { const uint64_t *words = buffer; size_t nwords = len / sizeof (uint64_t); uint64_t a = ctx->H[0]; uint64_t b = ctx->H[1]; uint64_t c = ctx->H[2]; uint64_t d = ctx->H[3]; uint64_t e = ctx->H[4]; uint64_t f = ctx->H[5]; uint64_t g = ctx->H[6]; uint64_t h = ctx->H[7]; /* First increment the byte count. FIPS 180-2 specifies the possible length of the file up to 2^128 bits. Here we only compute the number of bytes. Do a double word increment. */ #ifdef USE_TOTAL128 ctx->total128 += len; #else uint64_t lolen = len; ctx->total[TOTAL128_low] += lolen; ctx->total[TOTAL128_high] += ((len >> 31 >> 31 >> 2) + (ctx->total[TOTAL128_low] < lolen)); #endif /* Process all bytes in the buffer with 128 bytes in each round of the loop. */ while (nwords > 0) { uint64_t W[80]; uint64_t a_save = a; uint64_t b_save = b; uint64_t c_save = c; uint64_t d_save = d; uint64_t e_save = e; uint64_t f_save = f; uint64_t g_save = g; uint64_t h_save = h; /* Operators defined in FIPS 180-2:4.1.2. */ #define Ch(x, y, z) ((x & y) ^ (~x & z)) #define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) #define S0(x) (CYCLIC (x, 28) ^ CYCLIC (x, 34) ^ CYCLIC (x, 39)) #define S1(x) (CYCLIC (x, 14) ^ CYCLIC (x, 18) ^ CYCLIC (x, 41)) #define R0(x) (CYCLIC (x, 1) ^ CYCLIC (x, 8) ^ (x >> 7)) #define R1(x) (CYCLIC (x, 19) ^ CYCLIC (x, 61) ^ (x >> 6)) /* It is unfortunate that C does not provide an operator for cyclic rotation. Hope the C compiler is smart enough. */ #define CYCLIC(w, s) ((w >> s) | (w << (64 - s))) /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */ for (unsigned int t = 0; t < 16; ++t) { W[t] = SWAP (*words); ++words; } for (unsigned int t = 16; t < 80; ++t) W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16]; /* The actual computation according to FIPS 180-2:6.3.2 step 3. */ for (unsigned int t = 0; t < 80; ++t) { uint64_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t]; uint64_t T2 = S0 (a) + Maj (a, b, c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } /* Add the starting values of the context according to FIPS 180-2:6.3.2 step 4. */ a += a_save; b += b_save; c += c_save; d += d_save; e += e_save; f += f_save; g += g_save; h += h_save; /* Prepare for the next round. */ nwords -= 16; } /* Put checksum in context given as argument. */ ctx->H[0] = a; ctx->H[1] = b; ctx->H[2] = c; ctx->H[3] = d; ctx->H[4] = e; ctx->H[5] = f; ctx->H[6] = g; ctx->H[7] = h; }
mlib_status mlib_ImagePolynomialWarp_0_BC( mlib_image *dst, const mlib_image *src, mlib_d64 wx, mlib_d64 wy) { mlib_d64 a0, a1, a2, a3; mlib_d64 r0, r1, r2, r3, res; mlib_d64 fx0, fx1, fx2, fx3; mlib_d64 fy0, fy1, fy2, fy3; mlib_d64 dx, dy; mlib_s32 i, srcYStride, srcChannels, color[4]; mlib_type srcType; mlib_d64 limit_lo, limit_hi; srcType = mlib_ImageGetType(src); srcYStride = mlib_ImageGetStride(src); srcChannels = mlib_ImageGetChannels(src); dx = wx - (mlib_s32)wx; dy = wy - (mlib_s32)wy; fx0 = S0(dx); fx1 = S1(dx); fx2 = S2(dx); fx3 = S3(dx); fy0 = S0(dy); fy1 = S1(dy); fy2 = S2(dy); fy3 = S3(dy); switch (srcType) { case MLIB_BYTE: { mlib_u8 *srcData0, *srcData = mlib_ImageGetData(src); limit_lo = MLIB_U8_MIN; limit_hi = MLIB_U8_MAX; srcData = srcData + ((mlib_s32)wy - 1) * srcYStride; srcData += ((mlib_s32)wx - 1) * srcChannels; CALC_BICUBIC; } break; case MLIB_SHORT: { mlib_s16 *srcData0, *srcData = mlib_ImageGetData(src); limit_lo = MLIB_S16_MIN; limit_hi = MLIB_S16_MAX; srcData = (void *)((mlib_u8 *)srcData + ((mlib_s32)wy - 1) * srcYStride); srcData += ((mlib_s32)wx - 1) * srcChannels; CALC_BICUBIC; } break; case MLIB_USHORT: { mlib_u16 *srcData0, *srcData = mlib_ImageGetData(src); limit_lo = MLIB_U16_MIN; limit_hi = MLIB_U16_MAX; srcData = (void *)((mlib_u8 *)srcData + ((mlib_s32)wy - 1) * srcYStride); srcData += ((mlib_s32)wx - 1) * srcChannels; CALC_BICUBIC; } break; case MLIB_INT: { mlib_s32 *srcData0, *srcData = mlib_ImageGetData(src); limit_lo = MLIB_S32_MIN; limit_hi = MLIB_S32_MAX; srcData = (void *)((mlib_u8 *)srcData + ((mlib_s32)wy - 1) * srcYStride); srcData += ((mlib_s32)wx - 1) * srcChannels; CALC_BICUBIC; } break; default: return (MLIB_FAILURE); } return (__mlib_ImageClear(dst, color)); }
void sw_verify(long OM, long ON, long ObM, long ObN, short** Oseq_A, short** Oseq_B, int** OAout, int** OBout) { // long M, long N, short* _local_seq_A, short* _local_seq_B, int* _local_A, int* _local_B, int* _local_c, int* _local_Aout, int* _local_Bout){ long M,N; M = OM*ObM; N = ON*ObN; short* _local_seq_A = (short *)malloc(sizeof(short)*M); short* _local_seq_B = (short *)malloc(sizeof(short)*N); int* _local_A = (int *) malloc(sizeof(int)*M); int* _local_B = (int *) malloc(sizeof(int)*N); int _local_c = 0; int* _local_Aout = (int *) malloc(sizeof(int)*M); int* _local_Bout = (int *) malloc(sizeof(int)*N); memset(_local_A,0,sizeof(int)*M); memset(_local_B,0,sizeof(int)*N); int ii=0; int jj=0; for (ii=0;ii<OM; ii++) { memcpy(&_local_seq_A[ii*ObM],Oseq_A[ii],sizeof(short)*ObM); } for (ii=0;ii<ON; ii++) { memcpy(&_local_seq_B[ii*ObN],Oseq_B[ii],sizeof(short)*ObN); } ///Parameter checking if (!((M >= 2 && N >= 2))) { printf("The value of parameters are not valid.\n"); exit(-1); } //Copy to global seq_A = _local_seq_A; seq_B = _local_seq_B; A = _local_A; B = _local_B; c = _local_c; Aout = _local_Aout; Bout = _local_Bout; //Memory Allocation int mz1, mz2; int* _lin_Table = (int*)malloc(sizeof(int)*((M) * (N))); mallocCheck(_lin_Table, ((M) * (N)), int); Table = (int**)malloc(sizeof(int*)*(M)); mallocCheck(Table, (M), int*); for (mz1=0;mz1 < M; mz1++) { Table[mz1] = &_lin_Table[(mz1*(N))]; } _flag_Aout = (char*)malloc(sizeof(char)*(M)); mallocCheck(_flag_Aout, (M), char); memset(_flag_Aout, 'N', (M)); _flag_Bout = (char*)malloc(sizeof(char)*(N)); mallocCheck(_flag_Bout, (N), char); memset(_flag_Bout, 'N', (N)); _flag_gap_penalty = 'N'; char* _lin__flag_Table = (char*)malloc(sizeof(char)*((M) * (N))); mallocCheck(_lin__flag_Table, ((M) * (N)), char); _flag_Table = (char**)malloc(sizeof(char*)*(M)); mallocCheck(_flag_Table, (M), char*); for (mz1=0;mz1 < M; mz1++) { _flag_Table[mz1] = &_lin__flag_Table[(mz1*(N))]; } memset(_lin__flag_Table, 'N', ((M) * (N))); #define S0(i) eval_Aout(M,N,i) { //Domain //{i|i>=0 && M>=i+1 && M>=2 && N>=2} int c1; for(c1=0;c1 <= M-1;c1+=1) { S0((c1)); } } #undef S0 #define S0(i) eval_Bout(M,N,i) { //Domain //{i|i>=0 && N>=i+1 && M>=2 && N>=2} int c1; for(c1=0;c1 <= N-1;c1+=1) { S0((c1)); } } #undef S0 for (ii=0;ii<OM; ii++) { memcpy(OAout[ii],&_local_Aout[ii*ObM],sizeof(int)*ObM); } for (ii=0;ii<ON; ii++) { memcpy(OBout[ii],&_local_Bout[ii*ObN],sizeof(int)*ObN); } //Memory Free free(_lin_Table); free(Table); free(_flag_Aout); free(_flag_Bout); free(_lin__flag_Table); free(_flag_Table); }
/* The SHA-256 core: Transform the message X which consists of 16 32-bit-words. See FIPS 180-2 for details. */ static void transform (hmac256_context_t hd, const void *data_arg) { const unsigned char *data = data_arg; #define Cho(x,y,z) (z ^ (x & (y ^ z))) /* (4.2) same as SHA-1's F1 */ #define Maj(x,y,z) ((x & y) | (z & (x|y))) /* (4.3) same as SHA-1's F3 */ #define Sum0(x) (ror ((x), 2) ^ ror ((x), 13) ^ ror ((x), 22)) /* (4.4) */ #define Sum1(x) (ror ((x), 6) ^ ror ((x), 11) ^ ror ((x), 25)) /* (4.5) */ #define S0(x) (ror ((x), 7) ^ ror ((x), 18) ^ ((x) >> 3)) /* (4.6) */ #define S1(x) (ror ((x), 17) ^ ror ((x), 19) ^ ((x) >> 10)) /* (4.7) */ #define R(a,b,c,d,e,f,g,h,k,w) do \ { \ t1 = (h) + Sum1((e)) + Cho((e),(f),(g)) + (k) + (w); \ t2 = Sum0((a)) + Maj((a),(b),(c)); \ h = g; \ g = f; \ f = e; \ e = d + t1; \ d = c; \ c = b; \ b = a; \ a = t1 + t2; \ } while (0) static const u32 K[64] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; u32 a, b, c, d, e, f, g, h, t1, t2; u32 x[16]; u32 w[64]; int i; a = hd->h0; b = hd->h1; c = hd->h2; d = hd->h3; e = hd->h4; f = hd->h5; g = hd->h6; h = hd->h7; #ifdef WORDS_BIGENDIAN memcpy (x, data, 64); #else /*!WORDS_BIGENDIAN*/ { unsigned char *p2; for (i=0, p2=(unsigned char*)x; i < 16; i++, p2 += 4 ) { p2[3] = *data++; p2[2] = *data++; p2[1] = *data++; p2[0] = *data++; } } #endif /*!WORDS_BIGENDIAN*/ for (i=0; i < 16; i++) w[i] = x[i]; for (; i < 64; i++) w[i] = S1(w[i-2]) + w[i-7] + S0(w[i-15]) + w[i-16]; for (i=0; i < 64; i++) R(a,b,c,d,e,f,g,h,K[i],w[i]); hd->h0 += a; hd->h1 += b; hd->h2 += c; hd->h3 += d; hd->h4 += e; hd->h5 += f; hd->h6 += g; hd->h7 += h; }
//main int main(int argc, char** argv) { //Check number of args if (argc <= 6) { printf("Number of argument is smaller than expected.\n"); printf("Expecting P,Q,R,ts1_l1,ts2_l1,ts3_l1\n"); exit(0); } char *end = 0; char *val = 0; //Read Parameters //Initialisation of P errno = 0; end = 0; val = argv[1]; long P = strtol(val,&end,10); if ((errno == ERANGE && (P == LONG_MAX || P == LONG_MIN)) || (errno != 0 && P == 0)) { perror("strtol"); exit(EXIT_FAILURE); } if (end == val) { fprintf(stderr, "No digits were found for P\n"); exit(EXIT_FAILURE); } if (*end != '\0'){ printf("For parameter P: Converted part: %ld, non-convertible part: %s\n", P, end); exit(EXIT_FAILURE); } //Initialisation of Q errno = 0; end = 0; val = argv[2]; long Q = strtol(val,&end,10); if ((errno == ERANGE && (Q == LONG_MAX || Q == LONG_MIN)) || (errno != 0 && Q == 0)) { perror("strtol"); exit(EXIT_FAILURE); } if (end == val) { fprintf(stderr, "No digits were found for Q\n"); exit(EXIT_FAILURE); } if (*end != '\0'){ printf("For parameter Q: Converted part: %ld, non-convertible part: %s\n", Q, end); exit(EXIT_FAILURE); } //Initialisation of R errno = 0; end = 0; val = argv[3]; long R = strtol(val,&end,10); if ((errno == ERANGE && (R == LONG_MAX || R == LONG_MIN)) || (errno != 0 && R == 0)) { perror("strtol"); exit(EXIT_FAILURE); } if (end == val) { fprintf(stderr, "No digits were found for R\n"); exit(EXIT_FAILURE); } if (*end != '\0'){ printf("For parameter R: Converted part: %ld, non-convertible part: %s\n", R, end); exit(EXIT_FAILURE); } //Initialisation of ts1_l1 errno = 0; end = 0; val = argv[4]; long ts1_l1 = strtol(val,&end,10); if ((errno == ERANGE && (ts1_l1 == LONG_MAX || ts1_l1 == LONG_MIN)) || (errno != 0 && ts1_l1 == 0)) { perror("strtol"); exit(EXIT_FAILURE); } if (end == val) { fprintf(stderr, "No digits were found for ts1_l1\n"); exit(EXIT_FAILURE); } if (*end != '\0'){ printf("For parameter ts1_l1: Converted part: %ld, non-convertible part: %s\n", ts1_l1, end); exit(EXIT_FAILURE); } //Initialisation of ts2_l1 errno = 0; end = 0; val = argv[5]; long ts2_l1 = strtol(val,&end,10); if ((errno == ERANGE && (ts2_l1 == LONG_MAX || ts2_l1 == LONG_MIN)) || (errno != 0 && ts2_l1 == 0)) { perror("strtol"); exit(EXIT_FAILURE); } if (end == val) { fprintf(stderr, "No digits were found for ts2_l1\n"); exit(EXIT_FAILURE); } if (*end != '\0'){ printf("For parameter ts2_l1: Converted part: %ld, non-convertible part: %s\n", ts2_l1, end); exit(EXIT_FAILURE); } //Initialisation of ts3_l1 errno = 0; end = 0; val = argv[6]; long ts3_l1 = strtol(val,&end,10); if ((errno == ERANGE && (ts3_l1 == LONG_MAX || ts3_l1 == LONG_MIN)) || (errno != 0 && ts3_l1 == 0)) { perror("strtol"); exit(EXIT_FAILURE); } if (end == val) { fprintf(stderr, "No digits were found for ts3_l1\n"); exit(EXIT_FAILURE); } if (*end != '\0'){ printf("For parameter ts3_l1: Converted part: %ld, non-convertible part: %s\n", ts3_l1, end); exit(EXIT_FAILURE); } ///Parameter checking if (!((P >= 2 && Q >= 2 && R >= 2 && ts1_l1 > 0 && ts2_l1 > 0 && ts3_l1 > 0))) { printf("The value of parameters are not valid.\n"); exit(-1); } //Memory Allocation int mz1, mz2; float alpha; float beta; float* A = (float*)malloc(sizeof(float)*((P) * (Q))); mallocCheck(A, ((P) * (Q)), float); float* B = (float*)malloc(sizeof(float)*((Q) * (R))); mallocCheck(B, ((Q) * (R)), float); float* Cout = (float*)malloc(sizeof(float)*((P) * (R))); mallocCheck(Cout, ((P) * (R)), float); #ifdef VERIFY float* Cout_verify = (float*)malloc(sizeof(float)*((P) * (R))); mallocCheck(Cout_verify, ((P) * (R)), float); #endif //Initialization of rand srand((unsigned)time(NULL)); //Input Initialization { #if defined (RANDOM) #define S0() (alpha = rand()%50) #elif defined (CHECKING) || defined (VERIFY) #ifdef NO_PROMPT #define S0() scanf("%f", &alpha) #else #define S0() printf("alpha="); scanf("%f", &alpha) #endif #else #define S0() (alpha = 1) //Default value #endif S0(); #undef S0 } { #if defined (RANDOM) #define S0() (beta = rand()%50) #elif defined (CHECKING) || defined (VERIFY) #ifdef NO_PROMPT #define S0() scanf("%f", &beta) #else #define S0() printf("beta="); scanf("%f", &beta) #endif #else #define S0() (beta = 1) //Default value #endif S0(); #undef S0 } { #if defined (RANDOM) #define S0(i,j) (A(i,j) = rand()%50) #elif defined (CHECKING) || defined (VERIFY) #ifdef NO_PROMPT #define S0(i,j) scanf("%f", &A(i,j)) #else #define S0(i,j) printf("A(%ld,%ld)=",(long) i,(long) j); scanf("%f", &A(i,j)) #endif #else #define S0(i,j) (A(i,j) = 1) //Default value #endif int c1,c2; for(c1=0;c1 <= P-1;c1+=1) { for(c2=0;c2 <= Q-1;c2+=1) { S0((c1),(c2)); } } #undef S0 } { #if defined (RANDOM) #define S0(i,j) (B(i,j) = rand()%50) #elif defined (CHECKING) || defined (VERIFY) #ifdef NO_PROMPT #define S0(i,j) scanf("%f", &B(i,j)) #else #define S0(i,j) printf("B(%ld,%ld)=",(long) i,(long) j); scanf("%f", &B(i,j)) #endif #else #define S0(i,j) (B(i,j) = 1) //Default value #endif int c1,c2; for(c1=0;c1 <= Q-1;c1+=1) { for(c2=0;c2 <= R-1;c2+=1) { S0((c1),(c2)); } } #undef S0 } { #if defined (RANDOM) #if defined (VERIFY) #define S0(i,j) Cout(i,j) = rand()%50; Cout_verify(i,j)=Cout(i,j); #else #define S0(i,j) (Cout(i,j) = rand()%50) #endif #elif defined (CHECKING) || defined (VERIFY) #if defined (VERIFY) #ifdef NO_PROMPT #define S0(i,j) scanf("%f", &Cout(i,j)); Cout_verify(i,j)=Cout(i,j); #else #define S0(i,j) printf("Cout(%ld,%ld)=",(long) i,(long) j); scanf("%f", &Cout(i,j)); Cout_verify(i,j)=Cout(i,j); #endif #else #ifdef NO_PROMPT #define S0(i,j) scanf("%f", &Cout(i,j)) #else #define S0(i,j) printf("Cout(%ld,%ld)=",(long) i,(long) j); scanf("%f", &Cout(i,j)) #endif #endif #else #define S0(i,j) (Cout(i,j) = 1) //Default value #endif int c1,c2; for(c1=0;c1 <= P-1;c1+=1) { for(c2=0;c2 <= R-1;c2+=1) { S0((c1),(c2)); } } #undef S0 } //Timing struct timeval time; double elapsed_time; //Call the main computation gettimeofday(&time, NULL); elapsed_time = (((double) time.tv_sec) + ((double) time.tv_usec)/1000000); gemm(P, Q, R, ts1_l1, ts2_l1, ts3_l1, &alpha, &beta, A, B, Cout); gettimeofday(&time, NULL); elapsed_time = (((double) time.tv_sec) + ((double) time.tv_usec)/1000000) - elapsed_time; // timing information printf("Execution time : %lf sec.\n", elapsed_time); #ifdef TIMING FILE * fp = fopen( "trace.dat","a+"); if (fp == NULL) { printf("I couldn't open trace.dat for writing.\n"); exit(EXIT_FAILURE); } fprintf(fp, "%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%lf\n",P,Q,R,ts1_l1,ts2_l1,ts3_l1,elapsed_time); fclose(fp); #endif //Verification Run #ifdef VERIFY #ifdef TIMING gettimeofday(&time, NULL); elapsed_time = (((double) time.tv_sec) + ((double) time.tv_usec)/1000000); #endif gemm_verify(P, Q, R, ts1_l1, ts2_l1, ts3_l1, &alpha, &beta, A, B, Cout_verify, Cout_verify); #ifdef TIMING gettimeofday(&time, NULL); elapsed_time = (((double) time.tv_sec) + ((double) time.tv_usec)/1000000) - elapsed_time; FILE * fp_verify = fopen( "trace_verify.dat","a+"); if (fp == NULL) { printf("I couldn't open trace_verify.dat for writing.\n"); exit(EXIT_FAILURE); } fprintf(fp, "%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%lf\n",P,Q,R,ts1_l1,ts2_l1,ts3_l1,elapsed_time); fclose(fp_verify); #endif #endif #ifdef CHECKING //Print Outputs { #ifdef NO_PROMPT #define S0(i,j) printf("%0.2lf\n",var_Cout(i,j)) #else #define S0(i,j) printf("Cout(%ld,%ld)=",(long) i,(long) j);printf("%0.2lf\n",var_Cout(i,j)) #endif int c1,c2; for(c1=0;c1 <= P-1;c1+=1) { for(c2=0;c2 <= R-1;c2+=1) { S0((c1),(c2)); } } #undef S0 } #elif VERIFY //Compare outputs for verification { //Error Counter int _errors_ = 0; #define S0(i,j) if (fabs(1.0 - var_Cout_verify(i,j)/var_Cout(i,j)) > EPSILON) _errors_++; int c1,c2; for(c1=0;c1 <= P-1;c1+=1) { for(c2=0;c2 <= R-1;c2+=1) { S0((c1),(c2)); } } #undef S0 if(_errors_ == 0){ printf("TEST for Cout PASSED\n"); }else{ printf("TEST for Cout FAILED. #Errors: %d\n", _errors_); } } #endif //Memory Free free(A); free(B); free(Cout); #ifdef VERIFY free(Cout_verify); #endif return EXIT_SUCCESS; }
void _stdcall serpent256_set_key(const unsigned char *key, serpent256_key *skey) { u32 *k = skey->expkey; u32 r0,r1,r2,r3,r4; /* Copy key, add padding */ memcpy(k, key, SERPENT_KEY_SIZE); /* Expand key using polynomial */ r0 = k[3]; r1 = k[4]; r2 = k[5]; r3 = k[6]; r4 = k[7]; keyiter(k[0],r0,r4,r2,0,0); keyiter(k[1],r1,r0,r3,1,1); keyiter(k[2],r2,r1,r4,2,2); keyiter(k[3],r3,r2,r0,3,3); keyiter(k[4],r4,r3,r1,4,4); keyiter(k[5],r0,r4,r2,5,5); keyiter(k[6],r1,r0,r3,6,6); keyiter(k[7],r2,r1,r4,7,7); keyiter(k[ 0],r3,r2,r0, 8, 8); keyiter(k[ 1],r4,r3,r1, 9, 9); keyiter(k[ 2],r0,r4,r2, 10, 10); keyiter(k[ 3],r1,r0,r3, 11, 11); keyiter(k[ 4],r2,r1,r4, 12, 12); keyiter(k[ 5],r3,r2,r0, 13, 13); keyiter(k[ 6],r4,r3,r1, 14, 14); keyiter(k[ 7],r0,r4,r2, 15, 15); keyiter(k[ 8],r1,r0,r3, 16, 16); keyiter(k[ 9],r2,r1,r4, 17, 17); keyiter(k[ 10],r3,r2,r0, 18, 18); keyiter(k[ 11],r4,r3,r1, 19, 19); keyiter(k[ 12],r0,r4,r2, 20, 20); keyiter(k[ 13],r1,r0,r3, 21, 21); keyiter(k[ 14],r2,r1,r4, 22, 22); keyiter(k[ 15],r3,r2,r0, 23, 23); keyiter(k[ 16],r4,r3,r1, 24, 24); keyiter(k[ 17],r0,r4,r2, 25, 25); keyiter(k[ 18],r1,r0,r3, 26, 26); keyiter(k[ 19],r2,r1,r4, 27, 27); keyiter(k[ 20],r3,r2,r0, 28, 28); keyiter(k[ 21],r4,r3,r1, 29, 29); keyiter(k[ 22],r0,r4,r2, 30, 30); keyiter(k[ 23],r1,r0,r3, 31, 31); k += 50; keyiter(k[-26],r2,r1,r4, 32,-18); keyiter(k[-25],r3,r2,r0, 33,-17); keyiter(k[-24],r4,r3,r1, 34,-16); keyiter(k[-23],r0,r4,r2, 35,-15); keyiter(k[-22],r1,r0,r3, 36,-14); keyiter(k[-21],r2,r1,r4, 37,-13); keyiter(k[-20],r3,r2,r0, 38,-12); keyiter(k[-19],r4,r3,r1, 39,-11); keyiter(k[-18],r0,r4,r2, 40,-10); keyiter(k[-17],r1,r0,r3, 41, -9); keyiter(k[-16],r2,r1,r4, 42, -8); keyiter(k[-15],r3,r2,r0, 43, -7); keyiter(k[-14],r4,r3,r1, 44, -6); keyiter(k[-13],r0,r4,r2, 45, -5); keyiter(k[-12],r1,r0,r3, 46, -4); keyiter(k[-11],r2,r1,r4, 47, -3); keyiter(k[-10],r3,r2,r0, 48, -2); keyiter(k[ -9],r4,r3,r1, 49, -1); keyiter(k[ -8],r0,r4,r2, 50, 0); keyiter(k[ -7],r1,r0,r3, 51, 1); keyiter(k[ -6],r2,r1,r4, 52, 2); keyiter(k[ -5],r3,r2,r0, 53, 3); keyiter(k[ -4],r4,r3,r1, 54, 4); keyiter(k[ -3],r0,r4,r2, 55, 5); keyiter(k[ -2],r1,r0,r3, 56, 6); keyiter(k[ -1],r2,r1,r4, 57, 7); keyiter(k[ 0],r3,r2,r0, 58, 8); keyiter(k[ 1],r4,r3,r1, 59, 9); keyiter(k[ 2],r0,r4,r2, 60, 10); keyiter(k[ 3],r1,r0,r3, 61, 11); keyiter(k[ 4],r2,r1,r4, 62, 12); keyiter(k[ 5],r3,r2,r0, 63, 13); keyiter(k[ 6],r4,r3,r1, 64, 14); keyiter(k[ 7],r0,r4,r2, 65, 15); keyiter(k[ 8],r1,r0,r3, 66, 16); keyiter(k[ 9],r2,r1,r4, 67, 17); keyiter(k[ 10],r3,r2,r0, 68, 18); keyiter(k[ 11],r4,r3,r1, 69, 19); keyiter(k[ 12],r0,r4,r2, 70, 20); keyiter(k[ 13],r1,r0,r3, 71, 21); keyiter(k[ 14],r2,r1,r4, 72, 22); keyiter(k[ 15],r3,r2,r0, 73, 23); keyiter(k[ 16],r4,r3,r1, 74, 24); keyiter(k[ 17],r0,r4,r2, 75, 25); keyiter(k[ 18],r1,r0,r3, 76, 26); keyiter(k[ 19],r2,r1,r4, 77, 27); keyiter(k[ 20],r3,r2,r0, 78, 28); keyiter(k[ 21],r4,r3,r1, 79, 29); keyiter(k[ 22],r0,r4,r2, 80, 30); keyiter(k[ 23],r1,r0,r3, 81, 31); k += 50; keyiter(k[-26],r2,r1,r4, 82,-18); keyiter(k[-25],r3,r2,r0, 83,-17); keyiter(k[-24],r4,r3,r1, 84,-16); keyiter(k[-23],r0,r4,r2, 85,-15); keyiter(k[-22],r1,r0,r3, 86,-14); keyiter(k[-21],r2,r1,r4, 87,-13); keyiter(k[-20],r3,r2,r0, 88,-12); keyiter(k[-19],r4,r3,r1, 89,-11); keyiter(k[-18],r0,r4,r2, 90,-10); keyiter(k[-17],r1,r0,r3, 91, -9); keyiter(k[-16],r2,r1,r4, 92, -8); keyiter(k[-15],r3,r2,r0, 93, -7); keyiter(k[-14],r4,r3,r1, 94, -6); keyiter(k[-13],r0,r4,r2, 95, -5); keyiter(k[-12],r1,r0,r3, 96, -4); keyiter(k[-11],r2,r1,r4, 97, -3); keyiter(k[-10],r3,r2,r0, 98, -2); keyiter(k[ -9],r4,r3,r1, 99, -1); keyiter(k[ -8],r0,r4,r2,100, 0); keyiter(k[ -7],r1,r0,r3,101, 1); keyiter(k[ -6],r2,r1,r4,102, 2); keyiter(k[ -5],r3,r2,r0,103, 3); keyiter(k[ -4],r4,r3,r1,104, 4); keyiter(k[ -3],r0,r4,r2,105, 5); keyiter(k[ -2],r1,r0,r3,106, 6); keyiter(k[ -1],r2,r1,r4,107, 7); keyiter(k[ 0],r3,r2,r0,108, 8); keyiter(k[ 1],r4,r3,r1,109, 9); keyiter(k[ 2],r0,r4,r2,110, 10); keyiter(k[ 3],r1,r0,r3,111, 11); keyiter(k[ 4],r2,r1,r4,112, 12); keyiter(k[ 5],r3,r2,r0,113, 13); keyiter(k[ 6],r4,r3,r1,114, 14); keyiter(k[ 7],r0,r4,r2,115, 15); keyiter(k[ 8],r1,r0,r3,116, 16); keyiter(k[ 9],r2,r1,r4,117, 17); keyiter(k[ 10],r3,r2,r0,118, 18); keyiter(k[ 11],r4,r3,r1,119, 19); keyiter(k[ 12],r0,r4,r2,120, 20); keyiter(k[ 13],r1,r0,r3,121, 21); keyiter(k[ 14],r2,r1,r4,122, 22); keyiter(k[ 15],r3,r2,r0,123, 23); keyiter(k[ 16],r4,r3,r1,124, 24); keyiter(k[ 17],r0,r4,r2,125, 25); keyiter(k[ 18],r1,r0,r3,126, 26); keyiter(k[ 19],r2,r1,r4,127, 27); keyiter(k[ 20],r3,r2,r0,128, 28); keyiter(k[ 21],r4,r3,r1,129, 29); keyiter(k[ 22],r0,r4,r2,130, 30); keyiter(k[ 23],r1,r0,r3,131, 31); /* Apply S-boxes */ S3(r3,r4,r0,r1,r2); storekeys(r1,r2,r4,r3, 28); loadkeys(r1,r2,r4,r3, 24); S4(r1,r2,r4,r3,r0); storekeys(r2,r4,r3,r0, 24); loadkeys(r2,r4,r3,r0, 20); S5(r2,r4,r3,r0,r1); storekeys(r1,r2,r4,r0, 20); loadkeys(r1,r2,r4,r0, 16); S6(r1,r2,r4,r0,r3); storekeys(r4,r3,r2,r0, 16); loadkeys(r4,r3,r2,r0, 12); S7(r4,r3,r2,r0,r1); storekeys(r1,r2,r0,r4, 12); loadkeys(r1,r2,r0,r4, 8); S0(r1,r2,r0,r4,r3); storekeys(r0,r2,r4,r1, 8); loadkeys(r0,r2,r4,r1, 4); S1(r0,r2,r4,r1,r3); storekeys(r3,r4,r1,r0, 4); loadkeys(r3,r4,r1,r0, 0); S2(r3,r4,r1,r0,r2); storekeys(r2,r4,r3,r0, 0); loadkeys(r2,r4,r3,r0, -4); S3(r2,r4,r3,r0,r1); storekeys(r0,r1,r4,r2, -4); loadkeys(r0,r1,r4,r2, -8); S4(r0,r1,r4,r2,r3); storekeys(r1,r4,r2,r3, -8); loadkeys(r1,r4,r2,r3,-12); S5(r1,r4,r2,r3,r0); storekeys(r0,r1,r4,r3,-12); loadkeys(r0,r1,r4,r3,-16); S6(r0,r1,r4,r3,r2); storekeys(r4,r2,r1,r3,-16); loadkeys(r4,r2,r1,r3,-20); S7(r4,r2,r1,r3,r0); storekeys(r0,r1,r3,r4,-20); loadkeys(r0,r1,r3,r4,-24); S0(r0,r1,r3,r4,r2); storekeys(r3,r1,r4,r0,-24); loadkeys(r3,r1,r4,r0,-28); k -= 50; S1(r3,r1,r4,r0,r2); storekeys(r2,r4,r0,r3, 22); loadkeys(r2,r4,r0,r3, 18); S2(r2,r4,r0,r3,r1); storekeys(r1,r4,r2,r3, 18); loadkeys(r1,r4,r2,r3, 14); S3(r1,r4,r2,r3,r0); storekeys(r3,r0,r4,r1, 14); loadkeys(r3,r0,r4,r1, 10); S4(r3,r0,r4,r1,r2); storekeys(r0,r4,r1,r2, 10); loadkeys(r0,r4,r1,r2, 6); S5(r0,r4,r1,r2,r3); storekeys(r3,r0,r4,r2, 6); loadkeys(r3,r0,r4,r2, 2); S6(r3,r0,r4,r2,r1); storekeys(r4,r1,r0,r2, 2); loadkeys(r4,r1,r0,r2, -2); S7(r4,r1,r0,r2,r3); storekeys(r3,r0,r2,r4, -2); loadkeys(r3,r0,r2,r4, -6); S0(r3,r0,r2,r4,r1); storekeys(r2,r0,r4,r3, -6); loadkeys(r2,r0,r4,r3,-10); S1(r2,r0,r4,r3,r1); storekeys(r1,r4,r3,r2,-10); loadkeys(r1,r4,r3,r2,-14); S2(r1,r4,r3,r2,r0); storekeys(r0,r4,r1,r2,-14); loadkeys(r0,r4,r1,r2,-18); S3(r0,r4,r1,r2,r3); storekeys(r2,r3,r4,r0,-18); loadkeys(r2,r3,r4,r0,-22); k -= 50; S4(r2,r3,r4,r0,r1); storekeys(r3,r4,r0,r1, 28); loadkeys(r3,r4,r0,r1, 24); S5(r3,r4,r0,r1,r2); storekeys(r2,r3,r4,r1, 24); loadkeys(r2,r3,r4,r1, 20); S6(r2,r3,r4,r1,r0); storekeys(r4,r0,r3,r1, 20); loadkeys(r4,r0,r3,r1, 16); S7(r4,r0,r3,r1,r2); storekeys(r2,r3,r1,r4, 16); loadkeys(r2,r3,r1,r4, 12); S0(r2,r3,r1,r4,r0); storekeys(r1,r3,r4,r2, 12); loadkeys(r1,r3,r4,r2, 8); S1(r1,r3,r4,r2,r0); storekeys(r0,r4,r2,r1, 8); loadkeys(r0,r4,r2,r1, 4); S2(r0,r4,r2,r1,r3); storekeys(r3,r4,r0,r1, 4); loadkeys(r3,r4,r0,r1, 0); S3(r3,r4,r0,r1,r2); storekeys(r1,r2,r4,r3, 0); }
static void transform (SHA256_CONTEXT *hd, byte *data) { static const u32 K[64] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; u32 a,b,c,d,e,f,g,h,t1,t2; u32 x[16]; u32 w[64]; int i; a = hd->h0; b = hd->h1; c = hd->h2; d = hd->h3; e = hd->h4; f = hd->h5; g = hd->h6; h = hd->h7; #ifdef WORDS_BIGENDIAN memcpy (x, data, 64); #else { byte *p2; for (i=0, p2=(byte*)x; i < 16; i++, p2 += 4 ) { p2[3] = *data++; p2[2] = *data++; p2[1] = *data++; p2[0] = *data++; } } #endif for (i=0; i < 16; i++) w[i] = x[i]; for (; i < 64; i++) w[i] = S1(w[i-2]) + w[i-7] + S0(w[i-15]) + w[i-16]; for (i=0; i < 64; i++) R(a,b,c,d,e,f,g,h,K[i],w[i]); hd->h0 += a; hd->h1 += b; hd->h2 += c; hd->h3 += d; hd->h4 += e; hd->h5 += f; hd->h6 += g; hd->h7 += h; }
/**************** * Transform the message W which consists of 16 64-bit-words */ static void transform (SHA512_CONTEXT *hd, const unsigned char *data) { u64 a, b, c, d, e, f, g, h; u64 w[80]; int t; static const u64 k[] = { U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd), U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc), U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019), U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118), U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe), U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2), U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1), U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694), U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3), U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65), U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483), U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5), U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210), U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4), U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725), U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70), U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926), U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df), U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8), U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b), U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001), U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30), U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910), U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8), U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53), U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8), U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb), U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3), U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60), U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec), U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9), U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b), U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207), U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178), U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6), U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b), U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493), U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c), U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a), U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817) }; /* get values from the chaining vars */ a = hd->h0; b = hd->h1; c = hd->h2; d = hd->h3; e = hd->h4; f = hd->h5; g = hd->h6; h = hd->h7; #ifdef WORDS_BIGENDIAN memcpy (w, data, 128); #else { int i; byte *p2; for (i = 0, p2 = (byte *) w; i < 16; i++, p2 += 8) { p2[7] = *data++; p2[6] = *data++; p2[5] = *data++; p2[4] = *data++; p2[3] = *data++; p2[2] = *data++; p2[1] = *data++; p2[0] = *data++; } } #endif #define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) #define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) for (t = 16; t < 80; t++) w[t] = S1 (w[t - 2]) + w[t - 7] + S0 (w[t - 15]) + w[t - 16]; for (t = 0; t < 80; ) { u64 t1, t2; /* Performance on a AMD Athlon(tm) Dual Core Processor 4050e with gcc 4.3.3 using gcry_md_hash_buffer of each 10000 bytes initialized to 0,1,2,3...255,0,... and 1000 iterations: Not unrolled with macros: 440ms Unrolled with macros: 350ms Unrolled with inline: 330ms */ #if 1 /* Not unrolled. */ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; t2 = Sum0 (a) + Maj (a, b, c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; t++; #else /* Unrolled to interweave the chain variables. */ t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; t2 = Sum0 (a) + Maj (a, b, c); d += t1; h = t1 + t2; t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[t+1]; t2 = Sum0 (h) + Maj (h, a, b); c += t1; g = t1 + t2; t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[t+2]; t2 = Sum0 (g) + Maj (g, h, a); b += t1; f = t1 + t2; t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[t+3]; t2 = Sum0 (f) + Maj (f, g, h); a += t1; e = t1 + t2; t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[t+4]; t2 = Sum0 (e) + Maj (e, f, g); h += t1; d = t1 + t2; t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[t+5]; t2 = Sum0 (d) + Maj (d, e, f); g += t1; c = t1 + t2; t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[t+6]; t2 = Sum0 (c) + Maj (c, d, e); f += t1; b = t1 + t2; t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[t+7]; t2 = Sum0 (b) + Maj (b, c, d); e += t1; a = t1 + t2; t += 8; #endif } /* Update chaining vars. */ hd->h0 += a; hd->h1 += b; hd->h2 += c; hd->h3 += d; hd->h4 += e; hd->h5 += f; hd->h6 += g; hd->h7 += h; }
/**************** * Transform the message W which consists of 16 64-bit-words */ static void transform (SHA512_CONTEXT *hd, byte *data) { u64 a, b, c, d, e, f, g, h; u64 w[80]; int t; static const u64 k[] = { U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd), U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc), U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019), U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118), U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe), U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2), U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1), U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694), U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3), U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65), U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483), U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5), U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210), U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4), U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725), U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70), U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926), U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df), U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8), U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b), U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001), U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30), U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910), U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8), U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53), U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8), U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb), U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3), U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60), U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec), U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9), U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b), U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207), U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178), U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6), U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b), U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493), U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c), U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a), U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817) }; /* get values from the chaining vars */ a = hd->h0; b = hd->h1; c = hd->h2; d = hd->h3; e = hd->h4; f = hd->h5; g = hd->h6; h = hd->h7; #ifdef WORDS_BIGENDIAN memcpy (w, data, 128); #else { int i; byte *p2; for (i = 0, p2 = (byte *) w; i < 16; i++, p2 += 8) { p2[7] = *data++; p2[6] = *data++; p2[5] = *data++; p2[4] = *data++; p2[3] = *data++; p2[2] = *data++; p2[1] = *data++; p2[0] = *data++; } } #endif #define ROTR(x,n) (((x)>>(n)) | ((x)<<(64-(n)))) #define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) #define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) #define Sum0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) #define Sum1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) #define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) #define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) for (t = 16; t < 80; t++) w[t] = S1 (w[t - 2]) + w[t - 7] + S0 (w[t - 15]) + w[t - 16]; for (t = 0; t < 80; t++) { u64 t1, t2; t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; t2 = Sum0 (a) + Maj (a, b, c); h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; /* printf("t=%d a=%016llX b=%016llX c=%016llX d=%016llX " "e=%016llX f=%016llX g=%016llX h=%016llX\n",t,a,b,c,d,e,f,g,h); */ } /* update chaining vars */ hd->h0 += a; hd->h1 += b; hd->h2 += c; hd->h3 += d; hd->h4 += e; hd->h5 += f; hd->h6 += g; hd->h7 += h; }
inline void SpawnTask::OnExecute() { return S0(); }
int __serpent_setkey(struct serpent_ctx *ctx, const u8 *key, unsigned int keylen) { u32 *k = ctx->expkey; u8 *k8 = (u8 *)k; u32 r0, r1, r2, r3, r4; int i; /* Copy key, add padding */ for (i = 0; i < keylen; ++i) k8[i] = key[i]; if (i < SERPENT_MAX_KEY_SIZE) k8[i++] = 1; while (i < SERPENT_MAX_KEY_SIZE) k8[i++] = 0; /* Expand key using polynomial */ r0 = le32_to_cpu(k[3]); r1 = le32_to_cpu(k[4]); r2 = le32_to_cpu(k[5]); r3 = le32_to_cpu(k[6]); r4 = le32_to_cpu(k[7]); keyiter(le32_to_cpu(k[0]), r0, r4, r2, 0, 0); keyiter(le32_to_cpu(k[1]), r1, r0, r3, 1, 1); keyiter(le32_to_cpu(k[2]), r2, r1, r4, 2, 2); keyiter(le32_to_cpu(k[3]), r3, r2, r0, 3, 3); keyiter(le32_to_cpu(k[4]), r4, r3, r1, 4, 4); keyiter(le32_to_cpu(k[5]), r0, r4, r2, 5, 5); keyiter(le32_to_cpu(k[6]), r1, r0, r3, 6, 6); keyiter(le32_to_cpu(k[7]), r2, r1, r4, 7, 7); keyiter(k[0], r3, r2, r0, 8, 8); keyiter(k[1], r4, r3, r1, 9, 9); keyiter(k[2], r0, r4, r2, 10, 10); keyiter(k[3], r1, r0, r3, 11, 11); keyiter(k[4], r2, r1, r4, 12, 12); keyiter(k[5], r3, r2, r0, 13, 13); keyiter(k[6], r4, r3, r1, 14, 14); keyiter(k[7], r0, r4, r2, 15, 15); keyiter(k[8], r1, r0, r3, 16, 16); keyiter(k[9], r2, r1, r4, 17, 17); keyiter(k[10], r3, r2, r0, 18, 18); keyiter(k[11], r4, r3, r1, 19, 19); keyiter(k[12], r0, r4, r2, 20, 20); keyiter(k[13], r1, r0, r3, 21, 21); keyiter(k[14], r2, r1, r4, 22, 22); keyiter(k[15], r3, r2, r0, 23, 23); keyiter(k[16], r4, r3, r1, 24, 24); keyiter(k[17], r0, r4, r2, 25, 25); keyiter(k[18], r1, r0, r3, 26, 26); keyiter(k[19], r2, r1, r4, 27, 27); keyiter(k[20], r3, r2, r0, 28, 28); keyiter(k[21], r4, r3, r1, 29, 29); keyiter(k[22], r0, r4, r2, 30, 30); keyiter(k[23], r1, r0, r3, 31, 31); k += 50; keyiter(k[-26], r2, r1, r4, 32, -18); keyiter(k[-25], r3, r2, r0, 33, -17); keyiter(k[-24], r4, r3, r1, 34, -16); keyiter(k[-23], r0, r4, r2, 35, -15); keyiter(k[-22], r1, r0, r3, 36, -14); keyiter(k[-21], r2, r1, r4, 37, -13); keyiter(k[-20], r3, r2, r0, 38, -12); keyiter(k[-19], r4, r3, r1, 39, -11); keyiter(k[-18], r0, r4, r2, 40, -10); keyiter(k[-17], r1, r0, r3, 41, -9); keyiter(k[-16], r2, r1, r4, 42, -8); keyiter(k[-15], r3, r2, r0, 43, -7); keyiter(k[-14], r4, r3, r1, 44, -6); keyiter(k[-13], r0, r4, r2, 45, -5); keyiter(k[-12], r1, r0, r3, 46, -4); keyiter(k[-11], r2, r1, r4, 47, -3); keyiter(k[-10], r3, r2, r0, 48, -2); keyiter(k[-9], r4, r3, r1, 49, -1); keyiter(k[-8], r0, r4, r2, 50, 0); keyiter(k[-7], r1, r0, r3, 51, 1); keyiter(k[-6], r2, r1, r4, 52, 2); keyiter(k[-5], r3, r2, r0, 53, 3); keyiter(k[-4], r4, r3, r1, 54, 4); keyiter(k[-3], r0, r4, r2, 55, 5); keyiter(k[-2], r1, r0, r3, 56, 6); keyiter(k[-1], r2, r1, r4, 57, 7); keyiter(k[0], r3, r2, r0, 58, 8); keyiter(k[1], r4, r3, r1, 59, 9); keyiter(k[2], r0, r4, r2, 60, 10); keyiter(k[3], r1, r0, r3, 61, 11); keyiter(k[4], r2, r1, r4, 62, 12); keyiter(k[5], r3, r2, r0, 63, 13); keyiter(k[6], r4, r3, r1, 64, 14); keyiter(k[7], r0, r4, r2, 65, 15); keyiter(k[8], r1, r0, r3, 66, 16); keyiter(k[9], r2, r1, r4, 67, 17); keyiter(k[10], r3, r2, r0, 68, 18); keyiter(k[11], r4, r3, r1, 69, 19); keyiter(k[12], r0, r4, r2, 70, 20); keyiter(k[13], r1, r0, r3, 71, 21); keyiter(k[14], r2, r1, r4, 72, 22); keyiter(k[15], r3, r2, r0, 73, 23); keyiter(k[16], r4, r3, r1, 74, 24); keyiter(k[17], r0, r4, r2, 75, 25); keyiter(k[18], r1, r0, r3, 76, 26); keyiter(k[19], r2, r1, r4, 77, 27); keyiter(k[20], r3, r2, r0, 78, 28); keyiter(k[21], r4, r3, r1, 79, 29); keyiter(k[22], r0, r4, r2, 80, 30); keyiter(k[23], r1, r0, r3, 81, 31); k += 50; keyiter(k[-26], r2, r1, r4, 82, -18); keyiter(k[-25], r3, r2, r0, 83, -17); keyiter(k[-24], r4, r3, r1, 84, -16); keyiter(k[-23], r0, r4, r2, 85, -15); keyiter(k[-22], r1, r0, r3, 86, -14); keyiter(k[-21], r2, r1, r4, 87, -13); keyiter(k[-20], r3, r2, r0, 88, -12); keyiter(k[-19], r4, r3, r1, 89, -11); keyiter(k[-18], r0, r4, r2, 90, -10); keyiter(k[-17], r1, r0, r3, 91, -9); keyiter(k[-16], r2, r1, r4, 92, -8); keyiter(k[-15], r3, r2, r0, 93, -7); keyiter(k[-14], r4, r3, r1, 94, -6); keyiter(k[-13], r0, r4, r2, 95, -5); keyiter(k[-12], r1, r0, r3, 96, -4); keyiter(k[-11], r2, r1, r4, 97, -3); keyiter(k[-10], r3, r2, r0, 98, -2); keyiter(k[-9], r4, r3, r1, 99, -1); keyiter(k[-8], r0, r4, r2, 100, 0); keyiter(k[-7], r1, r0, r3, 101, 1); keyiter(k[-6], r2, r1, r4, 102, 2); keyiter(k[-5], r3, r2, r0, 103, 3); keyiter(k[-4], r4, r3, r1, 104, 4); keyiter(k[-3], r0, r4, r2, 105, 5); keyiter(k[-2], r1, r0, r3, 106, 6); keyiter(k[-1], r2, r1, r4, 107, 7); keyiter(k[0], r3, r2, r0, 108, 8); keyiter(k[1], r4, r3, r1, 109, 9); keyiter(k[2], r0, r4, r2, 110, 10); keyiter(k[3], r1, r0, r3, 111, 11); keyiter(k[4], r2, r1, r4, 112, 12); keyiter(k[5], r3, r2, r0, 113, 13); keyiter(k[6], r4, r3, r1, 114, 14); keyiter(k[7], r0, r4, r2, 115, 15); keyiter(k[8], r1, r0, r3, 116, 16); keyiter(k[9], r2, r1, r4, 117, 17); keyiter(k[10], r3, r2, r0, 118, 18); keyiter(k[11], r4, r3, r1, 119, 19); keyiter(k[12], r0, r4, r2, 120, 20); keyiter(k[13], r1, r0, r3, 121, 21); keyiter(k[14], r2, r1, r4, 122, 22); keyiter(k[15], r3, r2, r0, 123, 23); keyiter(k[16], r4, r3, r1, 124, 24); keyiter(k[17], r0, r4, r2, 125, 25); keyiter(k[18], r1, r0, r3, 126, 26); keyiter(k[19], r2, r1, r4, 127, 27); keyiter(k[20], r3, r2, r0, 128, 28); keyiter(k[21], r4, r3, r1, 129, 29); keyiter(k[22], r0, r4, r2, 130, 30); keyiter(k[23], r1, r0, r3, 131, 31); /* Apply S-boxes */ S3(r3, r4, r0, r1, r2); store_and_load_keys(r1, r2, r4, r3, 28, 24); S4(r1, r2, r4, r3, r0); store_and_load_keys(r2, r4, r3, r0, 24, 20); S5(r2, r4, r3, r0, r1); store_and_load_keys(r1, r2, r4, r0, 20, 16); S6(r1, r2, r4, r0, r3); store_and_load_keys(r4, r3, r2, r0, 16, 12); S7(r4, r3, r2, r0, r1); store_and_load_keys(r1, r2, r0, r4, 12, 8); S0(r1, r2, r0, r4, r3); store_and_load_keys(r0, r2, r4, r1, 8, 4); S1(r0, r2, r4, r1, r3); store_and_load_keys(r3, r4, r1, r0, 4, 0); S2(r3, r4, r1, r0, r2); store_and_load_keys(r2, r4, r3, r0, 0, -4); S3(r2, r4, r3, r0, r1); store_and_load_keys(r0, r1, r4, r2, -4, -8); S4(r0, r1, r4, r2, r3); store_and_load_keys(r1, r4, r2, r3, -8, -12); S5(r1, r4, r2, r3, r0); store_and_load_keys(r0, r1, r4, r3, -12, -16); S6(r0, r1, r4, r3, r2); store_and_load_keys(r4, r2, r1, r3, -16, -20); S7(r4, r2, r1, r3, r0); store_and_load_keys(r0, r1, r3, r4, -20, -24); S0(r0, r1, r3, r4, r2); store_and_load_keys(r3, r1, r4, r0, -24, -28); k -= 50; S1(r3, r1, r4, r0, r2); store_and_load_keys(r2, r4, r0, r3, 22, 18); S2(r2, r4, r0, r3, r1); store_and_load_keys(r1, r4, r2, r3, 18, 14); S3(r1, r4, r2, r3, r0); store_and_load_keys(r3, r0, r4, r1, 14, 10); S4(r3, r0, r4, r1, r2); store_and_load_keys(r0, r4, r1, r2, 10, 6); S5(r0, r4, r1, r2, r3); store_and_load_keys(r3, r0, r4, r2, 6, 2); S6(r3, r0, r4, r2, r1); store_and_load_keys(r4, r1, r0, r2, 2, -2); S7(r4, r1, r0, r2, r3); store_and_load_keys(r3, r0, r2, r4, -2, -6); S0(r3, r0, r2, r4, r1); store_and_load_keys(r2, r0, r4, r3, -6, -10); S1(r2, r0, r4, r3, r1); store_and_load_keys(r1, r4, r3, r2, -10, -14); S2(r1, r4, r3, r2, r0); store_and_load_keys(r0, r4, r1, r2, -14, -18); S3(r0, r4, r1, r2, r3); store_and_load_keys(r2, r3, r4, r0, -18, -22); k -= 50; S4(r2, r3, r4, r0, r1); store_and_load_keys(r3, r4, r0, r1, 28, 24); S5(r3, r4, r0, r1, r2); store_and_load_keys(r2, r3, r4, r1, 24, 20); S6(r2, r3, r4, r1, r0); store_and_load_keys(r4, r0, r3, r1, 20, 16); S7(r4, r0, r3, r1, r2); store_and_load_keys(r2, r3, r1, r4, 16, 12); S0(r2, r3, r1, r4, r0); store_and_load_keys(r1, r3, r4, r2, 12, 8); S1(r1, r3, r4, r2, r0); store_and_load_keys(r0, r4, r2, r1, 8, 4); S2(r0, r4, r2, r1, r3); store_and_load_keys(r3, r4, r0, r1, 4, 0); S3(r3, r4, r0, r1, r2); storekeys(r1, r2, r4, r3, 0); return 0; }
void executor_cpack_align(int n_inter,int n_moles,int n_tstep,double data[][9],int * inter1,int * inter2,ExplicitRelation **sigma) { ExplicitRelation * cpack_input_ER; ExplicitRelation * sigma_ER; ExplicitRelation * inter1_ER; ExplicitRelation * inter2_ER; int i,ii,ii_out0,tstep; inter1_ER=ER_ctor(inter1,n_inter+-1-0); inter2_ER=ER_ctor(inter2,n_inter+-1-0); sigma_ER=*sigma; /* Define the executor main loop body statments */ /* data[ %(a1)s ][ 3 ] += data[ %(a1)s ][ 0 ] + data[ %(a1)s ][ 6 ]; data[ %(a1)s ][ 4 ] += data[ %(a1)s ][ 1 ] + data[ %(a1)s ][ 7 ]; data[ %(a1)s ][ 5 ] += data[ %(a1)s ][ 2 ] + data[ %(a1)s ][ 8 ]; if (data[ %(a1)s ][ 3 ] < 0.0) data[ %(a1)s ][ 3 ] += side; if (data[ %(a1)s ][ 3 ] > side) data[ %(a1)s ][ 3 ] -= side; if (data[ %(a1)s ][ 4 ] < 0.0) data[ %(a1)s ][ 4 ] += side; if (data[ %(a1)s ][ 4 ] > side) data[ %(a1)s ][ 4 ] -= side; if (data[ %(a1)s ][ 5 ] < 0.0) data[ %(a1)s ][ 5 ] += side; if (data[ %(a1)s ][ 5 ] > side) data[ %(a1)s ][ 5 ] -= side; data[ %(a1)s ][ 0 ] += data[ %(a1)s ][ 6 ]; data[ %(a1)s ][ 1 ] += data[ %(a1)s ][ 7 ]; data[ %(a1)s ][ 2 ] += data[ %(a1)s ][ 8 ]; data[ %(a1)s ][ 6 ] = 0.0; data[ %(a1)s ][ 7 ] = 0.0; data[ %(a1)s ][ 8 ] = 0.0; */ /* a1: {[tstep,i]->[sigma_out1]: -1sigma_out1+i=0} */ #define S0(tstep,i) data[ i ][ 3 ] += data[ i ][ 0 ] + data[ i ][ 6 ];\ data[ i ][ 4 ] += data[ i ][ 1 ] + data[ i ][ 7 ];\ data[ i ][ 5 ] += data[ i ][ 2 ] + data[ i ][ 8 ];\ if (data[ i ][ 3 ] < 0.0) data[ i ][ 3 ] += side;\ if (data[ i ][ 3 ] > side) data[ i ][ 3 ] -= side;\ if (data[ i ][ 4 ] < 0.0) data[ i ][ 4 ] += side;\ if (data[ i ][ 4 ] > side) data[ i ][ 4 ] -= side;\ if (data[ i ][ 5 ] < 0.0) data[ i ][ 5 ] += side;\ if (data[ i ][ 5 ] > side) data[ i ][ 5 ] -= side;\ data[ i ][ 0 ] += data[ i ][ 6 ];\ data[ i ][ 1 ] += data[ i ][ 7 ];\ data[ i ][ 2 ] += data[ i ][ 8 ];\ data[ i ][ 6 ] = 0.0;\ data[ i ][ 7 ] = 0.0;\ data[ i ][ 8 ] = 0.0; /* cutoffSquare = cutoffRadius * cutoffRadius; n_inter = ninter; vir = 0.0; epot = 0.0; */ #define S1(tstep) cutoffSquare = cutoffRadius * cutoffRadius;\ n_inter = ninter;\ vir = 0.0;\ epot = 0.0; /* xx = data[ %(a31)s ][ 3 ] - data[ %(a32)s ][ 3 ]; yy = data[ %(a31)s ][ 4 ] - data[ %(a32)s ][ 4 ]; zz = data[ %(a31)s ][ 5 ] - data[ %(a32)s ][ 5 ]; if (xx < -sideHalf) xx += side; if (yy < -sideHalf) yy += side; if (zz < -sideHalf) zz += side; if (xx > sideHalf) xx -= side; if (yy > sideHalf) yy -= side; if (zz > sideHalf) zz -= side; rd = (xx * xx + yy * yy + zz * zz); if (rd < cutoffSquare) { rrd = 1.0 / rd; rrd2 = rrd * rrd; rrd3 = rrd2 * rrd; rrd4 = rrd2 * rrd2; rrd6 = rrd2 * rrd4; rrd7 = rrd6 * rrd; r148 = rrd7 - 0.5 * rrd4; forcex = xx * r148; forcey = yy * r148; forcez = zz * r148; data[ %(a31)s ][ 6 ] += forcex; data[ %(a31)s ][ 7 ] += forcey; data[ %(a31)s ][ 8 ] += forcez; data[ %(a32)s ][ 6 ] -= forcex; data[ %(a32)s ][ 7 ] -= forcey; data[ %(a32)s ][ 8 ] -= forcez; vir -= rd * r148; epot += (rrd6 - rrd3); } */ /* a32: {[tstep,ii]->[sigma_out1]: sigma_out1+-1sigma(inter2(ii))=0} */ /* a31: {[tstep,ii]->[sigma_out1]: sigma_out1+-1sigma(inter1(ii))=0} */ #define S2(tstep,ii) xx = data[ ER_out_given_in(sigma_ER,ER_out_given_in(inter1_ER,ii)) ][ 3 ] - data[ ER_out_given_in(sigma_ER,ER_out_given_in(inter2_ER,ii)) ][ 3 ];\ yy = data[ ER_out_given_in(sigma_ER,ER_out_given_in(inter1_ER,ii)) ][ 4 ] - data[ ER_out_given_in(sigma_ER,ER_out_given_in(inter2_ER,ii)) ][ 4 ];\ zz = data[ ER_out_given_in(sigma_ER,ER_out_given_in(inter1_ER,ii)) ][ 5 ] - data[ ER_out_given_in(sigma_ER,ER_out_given_in(inter2_ER,ii)) ][ 5 ];\ if (xx < -sideHalf) xx += side;\ if (yy < -sideHalf) yy += side;\ if (zz < -sideHalf) zz += side;\ if (xx > sideHalf) xx -= side;\ if (yy > sideHalf) yy -= side;\ if (zz > sideHalf) zz -= side;\ rd = (xx * xx + yy * yy + zz * zz);\ if (rd < cutoffSquare) \ {\ rrd = 1.0 / rd;\ rrd2 = rrd * rrd;\ rrd3 = rrd2 * rrd;\ rrd4 = rrd2 * rrd2;\ rrd6 = rrd2 * rrd4;\ rrd7 = rrd6 * rrd;\ r148 = rrd7 - 0.5 * rrd4;\ forcex = xx * r148;\ forcey = yy * r148;\ forcez = zz * r148;\ data[ ER_out_given_in(sigma_ER,ER_out_given_in(inter1_ER,ii)) ][ 6 ] += forcex;\ data[ ER_out_given_in(sigma_ER,ER_out_given_in(inter1_ER,ii)) ][ 7 ] += forcey;\ data[ ER_out_given_in(sigma_ER,ER_out_given_in(inter1_ER,ii)) ][ 8 ] += forcez;\ data[ ER_out_given_in(sigma_ER,ER_out_given_in(inter2_ER,ii)) ][ 6 ] -= forcex;\ data[ ER_out_given_in(sigma_ER,ER_out_given_in(inter2_ER,ii)) ][ 7 ] -= forcey;\ data[ ER_out_given_in(sigma_ER,ER_out_given_in(inter2_ER,ii)) ][ 8 ] -= forcez;\ vir -= rd * r148;\ epot += (rrd6 - rrd3);\ } /* data[ %(a43)s ][ 6 ] *= timeStepSqHalf; data[ %(a43)s ][ 7 ] *= timeStepSqHalf; data[ %(a43)s ][ 8 ] *= timeStepSqHalf; data[ %(a43)s ][ 0 ] += data[ %(a43)s ][ 6 ]; data[ %(a43)s ][ 1 ] += data[ %(a43)s ][ 7 ]; data[ %(a43)s ][ 2 ] += data[ %(a43)s ][ 8 ]; */ /* a43: {[tstep,i]->[sigma_out1]: -1sigma_out1+i=0} */ #define S3(tstep,i) data[ i ][ 6 ] *= timeStepSqHalf;\ data[ i ][ 7 ] *= timeStepSqHalf;\ data[ i ][ 8 ] *= timeStepSqHalf;\ data[ i ][ 0 ] += data[ i ][ 6 ];\ data[ i ][ 1 ] += data[ i ][ 7 ];\ data[ i ][ 2 ] += data[ i ][ 8 ]; /* The executor main loop */ if (n_tstep >= 1) { if ((n_inter >= 1) && (n_moles >= 1)) { for (tstep=0;tstep<=n_tstep-1;tstep++) { for (i=0;i<=n_moles-1;i++) { S0(tstep,i); } S1(tstep); for (i=0;i<=n_inter-1;i++) { S2(tstep,i); } for (i=0;i<=n_moles-1;i++) { S3(tstep,i); } } } if ((n_inter <= 0) && (n_moles >= 1)) { for (tstep=0;tstep<=n_tstep-1;tstep++) { for (i=0;i<=n_moles-1;i++) { S0(tstep,i); } S1(tstep); for (i=0;i<=n_moles-1;i++) { S3(tstep,i); } } } if ((n_inter >= 1) && (n_moles <= 0)) { for (tstep=0;tstep<=n_tstep-1;tstep++) { S1(tstep); for (i=0;i<=n_inter-1;i++) { S2(tstep,i); } } } if ((n_inter <= 0) && (n_moles <= 0)) { for (tstep=0;tstep<=n_tstep-1;tstep++) { S1(tstep); } } } /* Undefine the executor main loop body statments */ #undef S0 #undef S1 #undef S2 #undef S3 }
//main int main(int argc, char** argv) { //Check number of args if (argc <= 2) { printf("Number of argument is smaller than expected.\n"); printf("Expecting M,N\n"); exit(0); } char *end = 0; char *val = 0; //Read Parameters //Initialisation of M errno = 0; end = 0; val = argv[1]; long M = strtol(val,&end,10); if ((errno == ERANGE && (M == LONG_MAX || M == LONG_MIN)) || (errno != 0 && M == 0)) { perror("strtol"); exit(EXIT_FAILURE); } if (end == val) { fprintf(stderr, "No digits were found for M\n"); exit(EXIT_FAILURE); } if (*end != '\0'){ printf("For parameter M: Converted part: %ld, non-convertible part: %s\n", M, end); exit(EXIT_FAILURE); } //Initialisation of N errno = 0; end = 0; val = argv[2]; long N = strtol(val,&end,10); if ((errno == ERANGE && (N == LONG_MAX || N == LONG_MIN)) || (errno != 0 && N == 0)) { perror("strtol"); exit(EXIT_FAILURE); } if (end == val) { fprintf(stderr, "No digits were found for N\n"); exit(EXIT_FAILURE); } if (*end != '\0'){ printf("For parameter N: Converted part: %ld, non-convertible part: %s\n", N, end); exit(EXIT_FAILURE); } ///Parameter checking if (!((M >= 2 && N >= 2))) { printf("The value of parameters are not valid.\n"); exit(-1); } //Memory Allocation int mz1, mz2; short* seq_A = (short*)malloc(sizeof(short)*(M)); mallocCheck(seq_A, (M), short); short* seq_B = (short*)malloc(sizeof(short)*(N)); mallocCheck(seq_B, (N), short); int* A = (int*)malloc(sizeof(int)*(M)); mallocCheck(A, (M), int); int* B = (int*)malloc(sizeof(int)*(N)); mallocCheck(B, (N), int); int c; int* Aout = (int*)malloc(sizeof(int)*(M)); mallocCheck(Aout, (M), int); int* Bout = (int*)malloc(sizeof(int)*(N)); mallocCheck(Bout, (N), int); #ifdef VERIFY int* Aout_verify = (int*)malloc(sizeof(int)*(M)); mallocCheck(Aout_verify, (M), int); int* Bout_verify = (int*)malloc(sizeof(int)*(N)); mallocCheck(Bout_verify, (N), int); #endif //Initialization of rand srand((unsigned)time(NULL)); //Input Initialization { #if defined (RANDOM) #define S0(i) (seq_A(i) = rand()) #elif defined (CHECKING) || defined (VERIFY) #ifdef NO_PROMPT #define S0(i) scanf("%hd", &seq_A(i)) #else #define S0(i) printf("seq_A(%ld)=",(long) i); scanf("%hd", &seq_A(i)) #endif #else #define S0(i) (seq_A(i) = 1) //Default value #endif int c1; for(c1=0;c1 <= M-1;c1+=1) { S0((c1)); } #undef S0 } { #if defined (RANDOM) #define S0(i) (seq_B(i) = rand()) #elif defined (CHECKING) || defined (VERIFY) #ifdef NO_PROMPT #define S0(i) scanf("%hd", &seq_B(i)) #else #define S0(i) printf("seq_B(%ld)=",(long) i); scanf("%hd", &seq_B(i)) #endif #else #define S0(i) (seq_B(i) = 1) //Default value #endif int c1; for(c1=0;c1 <= N-1;c1+=1) { S0((c1)); } #undef S0 } { #if defined (RANDOM) #define S0(i) (A(i) = rand()) #elif defined (CHECKING) || defined (VERIFY) #ifdef NO_PROMPT #define S0(i) scanf("%d", &A(i)) #else #define S0(i) printf("A(%ld)=",(long) i); scanf("%d", &A(i)) #endif #else #define S0(i) (A(i) = 1) //Default value #endif int c1; for(c1=0;c1 <= M-1;c1+=1) { S0((c1)); } #undef S0 } { #if defined (RANDOM) #define S0(i) (B(i) = rand()) #elif defined (CHECKING) || defined (VERIFY) #ifdef NO_PROMPT #define S0(i) scanf("%d", &B(i)) #else #define S0(i) printf("B(%ld)=",(long) i); scanf("%d", &B(i)) #endif #else #define S0(i) (B(i) = 1) //Default value #endif int c1; for(c1=0;c1 <= N-1;c1+=1) { S0((c1)); } #undef S0 } { #if defined (RANDOM) #define S0() (c = rand()) #elif defined (CHECKING) || defined (VERIFY) #ifdef NO_PROMPT #define S0() scanf("%d", &c) #else #define S0() printf("c="); scanf("%d", &c) #endif #else #define S0() (c = 1) //Default value #endif S0(); #undef S0 } //Timing struct timeval time; double elapsed_time; //Call the main computation gettimeofday(&time, NULL); elapsed_time = (((double) time.tv_sec) + ((double) time.tv_usec)/1000000); sw_base(M, N, seq_A, seq_B, A, B, &c, Aout, Bout); gettimeofday(&time, NULL); elapsed_time = (((double) time.tv_sec) + ((double) time.tv_usec)/1000000) - elapsed_time; // timing information printf("Execution time : %lf sec.\n", elapsed_time); #ifdef TIMING FILE * fp = fopen( "trace.dat","a+"); if (fp == NULL) { printf("I couldn't open trace.dat for writing.\n"); exit(EXIT_FAILURE); } fprintf(fp, "%ld\t%ld\t%lf\n",M,N,elapsed_time); fclose(fp); #endif //Verification Run #ifdef VERIFY #ifdef TIMING gettimeofday(&time, NULL); elapsed_time = (((double) time.tv_sec) + ((double) time.tv_usec)/1000000); #endif sw_base_verify(M, N, seq_A, seq_B, A, B, &c, Aout_verify, Bout_verify); #ifdef TIMING gettimeofday(&time, NULL); elapsed_time = (((double) time.tv_sec) + ((double) time.tv_usec)/1000000) - elapsed_time; FILE * fp_verify = fopen( "trace_verify.dat","a+"); if (fp == NULL) { printf("I couldn't open trace_verify.dat for writing.\n"); exit(EXIT_FAILURE); } fprintf(fp, "%ld\t%ld\t%lf\n",M,N,elapsed_time); fclose(fp_verify); #endif #endif #ifdef CHECKING //Print Outputs { #ifdef NO_PROMPT #define S0(i) printf("%d\n",var_Aout(i)) #else #define S0(i) printf("Aout(%ld)=",(long) i);printf("%d\n",var_Aout(i)) #endif int c1; for(c1=0;c1 <= M-1;c1+=1) { S0((c1)); } #undef S0 } { #ifdef NO_PROMPT #define S0(i) printf("%d\n",var_Bout(i)) #else #define S0(i) printf("Bout(%ld)=",(long) i);printf("%d\n",var_Bout(i)) #endif int c1; for(c1=0;c1 <= N-1;c1+=1) { S0((c1)); } #undef S0 } #elif VERIFY //Compare outputs for verification { //Error Counter int _errors_ = 0; #define S0(i) if (abs(1 - var_Aout_verify(i)/var_Aout(i)) > EPSILON) _errors_++; int c1; for(c1=0;c1 <= M-1;c1+=1) { S0((c1)); } #undef S0 if(_errors_ == 0){ printf("TEST PASSED\n"); }else{ printf("TEST FAILED\n"); } } { //Error Counter int _errors_ = 0; #define S0(i) if (abs(1 - var_Bout_verify(i)/var_Bout(i)) > EPSILON) _errors_++; int c1; for(c1=0;c1 <= N-1;c1+=1) { S0((c1)); } #undef S0 if(_errors_ == 0){ printf("TEST PASSED\n"); }else{ printf("TEST FAILED\n"); } } #endif //Memory Free free(seq_A); free(seq_B); free(A); free(B); free(Aout); free(Bout); #ifdef VERIFY free(Aout_verify); free(Bout_verify); #endif return EXIT_SUCCESS; }
inline void UntilTask::OnExecute() { return S0(); }