static void Transform(Sha* sha) { word32 W[SHA_BLOCK_SIZE / sizeof(word32)]; /* Copy context->state[] to working vars */ word32 a = sha->digest[0]; word32 b = sha->digest[1]; word32 c = sha->digest[2]; word32 d = sha->digest[3]; word32 e = sha->digest[4]; #ifdef USE_SLOW_SHA word32 t, i; for (i = 0; i < 16; i++) { R0(a, b, c, d, e, i); t = e; e = d; d = c; c = b; b = a; a = t; } for (; i < 20; i++) { R1(a, b, c, d, e, i); t = e; e = d; d = c; c = b; b = a; a = t; } for (; i < 40; i++) { R2(a, b, c, d, e, i); t = e; e = d; d = c; c = b; b = a; a = t; } for (; i < 60; i++) { R3(a, b, c, d, e, i); t = e; e = d; d = c; c = b; b = a; a = t; } for (; i < 80; i++) { R4(a, b, c, d, e, i); t = e; e = d; d = c; c = b; b = a; a = t; } #else /* nearly 1 K bigger in code size but 25% faster */ /* 4 rounds of 20 operations each. Loop unrolled. */ R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); #endif /* Add the working vars back into digest state[] */ sha->digest[0] += a; sha->digest[1] += b; sha->digest[2] += c; sha->digest[3] += d; sha->digest[4] += e; }
void EulerAngleUpdater::initialize() { const auto grain_num = _grain_tracker.getTotalFeatureCount(); if (_first_time) { _angles.resize(grain_num); _angles_old.resize(grain_num); for (unsigned int i = 0; i < grain_num; ++i) _angles[i] = _euler.getEulerAngles(i); // Read initial euler angles } unsigned int angle_size = _angles.size(); for (unsigned int i = angle_size; i < grain_num; ++i) // if new grains are created _angles.push_back(_euler.getEulerAngles(i)); // Assign initial euler angles for (unsigned int i = 0; i < grain_num; ++i) { if (!_first_time && !_fe_problem.converged()) _angles[i] = _angles_old[i]; RealGradient torque = _grain_torque.getTorqueValues()[i]; if (i <= angle_size) // if new grains are created _angles_old[i] = _angles[i]; else _angles_old.push_back(_angles[i]); RotationTensor R0(_angles_old[i]); // RotationTensor as per old euler angles RealVectorValue torque_rotated = R0 * torque; // Applied torque is rotated to allign with old grain axes RealVectorValue omega = _mr / _grain_volumes[i] * torque_rotated; // Angular velocity as per old grain axes /** * Change in euler angles are obtained from the torque & angular velocities about the material axes. * Change in phi1, Phi and phi2 are caused by rotation about z axis, x' axis & z'' axis, respectively. * Components of the angular velocities across z, x' and z'' axes are obtained from the torque values. * This yields change in euler angles due to grain rotation. */ RealVectorValue angle_change; angle_change(0) = omega(2) * _dt; angle_change(1) = (omega(0) * std::cos(angle_change(0)) + omega(1) * std::sin(angle_change(0))) * _dt; angle_change(2) = (omega(0) * std::sin(angle_change(0)) * std::sin(angle_change(1)) - omega(1) * std::cos(angle_change(0)) * std::sin(angle_change(1)) + omega(2) * std::cos(angle_change(1))) * _dt; angle_change *= (180.0 / libMesh::pi); RotationTensor R1(angle_change); // Rotation matrix due to torque /** * Final RotationMatrix = RotationMatrix due to applied torque X old RotationMatrix * Updated Euler angles are obtained by back-tracking the angles from the rotation matrix * For details about the componenets of the rotation matrix please refer to RotationTensor.C * Phi = acos(R33); phi1 = atan2(R31,-R32); phi2 = atan2(R13,R23) for phi != 0.0 por 180.0 */ RealTensorValue R = R1 * R0; if (R(2,2) != 1.0 && R(2,2) != -1.0) // checks if cos(Phi) = 1 or -1 { _angles[i].phi1 = std::atan2(R(2,0), -R(2,1)) * (180.0 / libMesh::pi) ; _angles[i].Phi = std::acos(R(2,2)) * (180.0 / libMesh::pi); _angles[i].phi2 = std::atan2(R(0,2), R(1,2)) * (180.0 / libMesh::pi); } else if (R(2,2) == 1.0) // special case for Phi = 0.0 { if (R0(2,2) == 1.0) // when Phi_old = 0.0; all the rotations are about z axis and angles accumulates after each rotation _angles[i].phi1 = _angles_old[i].phi1 + _angles_old[i].phi2 + angle_change(0); else _angles[i].phi1 = angle_change(0); // Comply with bunge euler angle definitions, 0.0 <= phi1 <= 360.0 if (std::abs(_angles[i].phi1) > 360.0) { int laps = _angles[i].phi1 / 360.0; _angles[i].phi1 -= laps * 360.0; } _angles[i].Phi = 0.0; _angles[i].phi2 = -_angles[i].phi1 + std::atan2(R(0,1), R(1,1)) * (180.0 / libMesh::pi); } else { if (R0(2,2) == 1.0) _angles[i].phi1 = _angles_old[i].phi1 + _angles_old[i].phi2 + angle_change(0); else _angles[i].phi1 = angle_change(0); // Comply with bunge euler angle definitions, 0.0 <= phi1 <= 360.0 if (std::abs(_angles[i].phi1) > 360.0) { int laps = _angles[i].phi1 / 360.0; _angles[i].phi1 -= laps * 360.0; } _angles[i].Phi = 180.0; _angles[i].phi2 = _angles[i].phi1 - std::atan2(-R(0,1), -R(1,1)) * (180.0 / libMesh::pi); } // Following checks and updates are done only to comply with bunge euler angle definitions, 0.0 <= phi1/phi2 <= 360.0 if (_angles[i].phi1 < 0.0) _angles[i].phi1 += 360.0; if (_angles[i].phi2 < 0.0) _angles[i].phi2 += 360.0; if (_angles[i].Phi < 0.0) mooseError("Euler angle out of range."); } _first_time = false; }
void md5_block_data_order (MD5_CTX * c, const void *data_, size_t num) { const unsigned char *data = data_; register unsigned MD32_REG_T A, B, C, D, l; #ifndef MD32_XARRAY /* See comment in crypto/sha/sha_locl.h for details. */ unsigned MD32_REG_T XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7, XX8, XX9, XX10, XX11, XX12, XX13, XX14, XX15; # define X(i) XX##i #else MD5_LONG XX[MD5_LBLOCK]; # define X(i) XX[i] #endif A = c->A; B = c->B; C = c->C; D = c->D; for (; num--;) { HOST_c2l (data, l); X (0) = l; HOST_c2l (data, l); X (1) = l; /* Round 0 */ R0 (A, B, C, D, X (0), 7, 0xd76aa478L); HOST_c2l (data, l); X (2) = l; R0 (D, A, B, C, X (1), 12, 0xe8c7b756L); HOST_c2l (data, l); X (3) = l; R0 (C, D, A, B, X (2), 17, 0x242070dbL); HOST_c2l (data, l); X (4) = l; R0 (B, C, D, A, X (3), 22, 0xc1bdceeeL); HOST_c2l (data, l); X (5) = l; R0 (A, B, C, D, X (4), 7, 0xf57c0fafL); HOST_c2l (data, l); X (6) = l; R0 (D, A, B, C, X (5), 12, 0x4787c62aL); HOST_c2l (data, l); X (7) = l; R0 (C, D, A, B, X (6), 17, 0xa8304613L); HOST_c2l (data, l); X (8) = l; R0 (B, C, D, A, X (7), 22, 0xfd469501L); HOST_c2l (data, l); X (9) = l; R0 (A, B, C, D, X (8), 7, 0x698098d8L); HOST_c2l (data, l); X (10) = l; R0 (D, A, B, C, X (9), 12, 0x8b44f7afL); HOST_c2l (data, l); X (11) = l; R0 (C, D, A, B, X (10), 17, 0xffff5bb1L); HOST_c2l (data, l); X (12) = l; R0 (B, C, D, A, X (11), 22, 0x895cd7beL); HOST_c2l (data, l); X (13) = l; R0 (A, B, C, D, X (12), 7, 0x6b901122L); HOST_c2l (data, l); X (14) = l; R0 (D, A, B, C, X (13), 12, 0xfd987193L); HOST_c2l (data, l); X (15) = l; R0 (C, D, A, B, X (14), 17, 0xa679438eL); R0 (B, C, D, A, X (15), 22, 0x49b40821L); /* Round 1 */ R1 (A, B, C, D, X (1), 5, 0xf61e2562L); R1 (D, A, B, C, X (6), 9, 0xc040b340L); R1 (C, D, A, B, X (11), 14, 0x265e5a51L); R1 (B, C, D, A, X (0), 20, 0xe9b6c7aaL); R1 (A, B, C, D, X (5), 5, 0xd62f105dL); R1 (D, A, B, C, X (10), 9, 0x02441453L); R1 (C, D, A, B, X (15), 14, 0xd8a1e681L); R1 (B, C, D, A, X (4), 20, 0xe7d3fbc8L); R1 (A, B, C, D, X (9), 5, 0x21e1cde6L); R1 (D, A, B, C, X (14), 9, 0xc33707d6L); R1 (C, D, A, B, X (3), 14, 0xf4d50d87L); R1 (B, C, D, A, X (8), 20, 0x455a14edL); R1 (A, B, C, D, X (13), 5, 0xa9e3e905L); R1 (D, A, B, C, X (2), 9, 0xfcefa3f8L); R1 (C, D, A, B, X (7), 14, 0x676f02d9L); R1 (B, C, D, A, X (12), 20, 0x8d2a4c8aL); /* Round 2 */ R2 (A, B, C, D, X (5), 4, 0xfffa3942L); R2 (D, A, B, C, X (8), 11, 0x8771f681L); R2 (C, D, A, B, X (11), 16, 0x6d9d6122L); R2 (B, C, D, A, X (14), 23, 0xfde5380cL); R2 (A, B, C, D, X (1), 4, 0xa4beea44L); R2 (D, A, B, C, X (4), 11, 0x4bdecfa9L); R2 (C, D, A, B, X (7), 16, 0xf6bb4b60L); R2 (B, C, D, A, X (10), 23, 0xbebfbc70L); R2 (A, B, C, D, X (13), 4, 0x289b7ec6L); R2 (D, A, B, C, X (0), 11, 0xeaa127faL); R2 (C, D, A, B, X (3), 16, 0xd4ef3085L); R2 (B, C, D, A, X (6), 23, 0x04881d05L); R2 (A, B, C, D, X (9), 4, 0xd9d4d039L); R2 (D, A, B, C, X (12), 11, 0xe6db99e5L); R2 (C, D, A, B, X (15), 16, 0x1fa27cf8L); R2 (B, C, D, A, X (2), 23, 0xc4ac5665L); /* Round 3 */ R3 (A, B, C, D, X (0), 6, 0xf4292244L); R3 (D, A, B, C, X (7), 10, 0x432aff97L); R3 (C, D, A, B, X (14), 15, 0xab9423a7L); R3 (B, C, D, A, X (5), 21, 0xfc93a039L); R3 (A, B, C, D, X (12), 6, 0x655b59c3L); R3 (D, A, B, C, X (3), 10, 0x8f0ccc92L); R3 (C, D, A, B, X (10), 15, 0xffeff47dL); R3 (B, C, D, A, X (1), 21, 0x85845dd1L); R3 (A, B, C, D, X (8), 6, 0x6fa87e4fL); R3 (D, A, B, C, X (15), 10, 0xfe2ce6e0L); R3 (C, D, A, B, X (6), 15, 0xa3014314L); R3 (B, C, D, A, X (13), 21, 0x4e0811a1L); R3 (A, B, C, D, X (4), 6, 0xf7537e82L); R3 (D, A, B, C, X (11), 10, 0xbd3af235L); R3 (C, D, A, B, X (2), 15, 0x2ad7d2bbL); R3 (B, C, D, A, X (9), 21, 0xeb86d391L); A = c->A += A; B = c->B += B; C = c->C += C; D = c->D += D; } }
void md5_block_host_order (MD5_CTX *c, const void *data, int num) { const MD5_LONG *X=data; register unsigned long A,B,C,D; /* * In case you wonder why A-D are declared as long and not * as MD5_LONG. Doing so results in slight performance * boost on LP64 architectures. The catch is we don't * really care if 32 MSBs of a 64-bit register get polluted * with eventual overflows as we *save* only 32 LSBs in * *either* case. Now declaring 'em long excuses the compiler * from keeping 32 MSBs zeroed resulting in 13% performance * improvement under SPARC Solaris7/64 and 5% under AlphaLinux. * Well, to be honest it should say that this *prevents* * performance degradation. * * <*****@*****.**> */ A=c->A; B=c->B; C=c->C; D=c->D; for (;num--;X+=HASH_LBLOCK) { /* Round 0 */ R0(A,B,C,D,X[ 0], 7,0xd76aa478L); R0(D,A,B,C,X[ 1],12,0xe8c7b756L); R0(C,D,A,B,X[ 2],17,0x242070dbL); R0(B,C,D,A,X[ 3],22,0xc1bdceeeL); R0(A,B,C,D,X[ 4], 7,0xf57c0fafL); R0(D,A,B,C,X[ 5],12,0x4787c62aL); R0(C,D,A,B,X[ 6],17,0xa8304613L); R0(B,C,D,A,X[ 7],22,0xfd469501L); R0(A,B,C,D,X[ 8], 7,0x698098d8L); R0(D,A,B,C,X[ 9],12,0x8b44f7afL); R0(C,D,A,B,X[10],17,0xffff5bb1L); R0(B,C,D,A,X[11],22,0x895cd7beL); R0(A,B,C,D,X[12], 7,0x6b901122L); R0(D,A,B,C,X[13],12,0xfd987193L); R0(C,D,A,B,X[14],17,0xa679438eL); R0(B,C,D,A,X[15],22,0x49b40821L); /* Round 1 */ R1(A,B,C,D,X[ 1], 5,0xf61e2562L); R1(D,A,B,C,X[ 6], 9,0xc040b340L); R1(C,D,A,B,X[11],14,0x265e5a51L); R1(B,C,D,A,X[ 0],20,0xe9b6c7aaL); R1(A,B,C,D,X[ 5], 5,0xd62f105dL); R1(D,A,B,C,X[10], 9,0x02441453L); R1(C,D,A,B,X[15],14,0xd8a1e681L); R1(B,C,D,A,X[ 4],20,0xe7d3fbc8L); R1(A,B,C,D,X[ 9], 5,0x21e1cde6L); R1(D,A,B,C,X[14], 9,0xc33707d6L); R1(C,D,A,B,X[ 3],14,0xf4d50d87L); R1(B,C,D,A,X[ 8],20,0x455a14edL); R1(A,B,C,D,X[13], 5,0xa9e3e905L); R1(D,A,B,C,X[ 2], 9,0xfcefa3f8L); R1(C,D,A,B,X[ 7],14,0x676f02d9L); R1(B,C,D,A,X[12],20,0x8d2a4c8aL); /* Round 2 */ R2(A,B,C,D,X[ 5], 4,0xfffa3942L); R2(D,A,B,C,X[ 8],11,0x8771f681L); R2(C,D,A,B,X[11],16,0x6d9d6122L); R2(B,C,D,A,X[14],23,0xfde5380cL); R2(A,B,C,D,X[ 1], 4,0xa4beea44L); R2(D,A,B,C,X[ 4],11,0x4bdecfa9L); R2(C,D,A,B,X[ 7],16,0xf6bb4b60L); R2(B,C,D,A,X[10],23,0xbebfbc70L); R2(A,B,C,D,X[13], 4,0x289b7ec6L); R2(D,A,B,C,X[ 0],11,0xeaa127faL); R2(C,D,A,B,X[ 3],16,0xd4ef3085L); R2(B,C,D,A,X[ 6],23,0x04881d05L); R2(A,B,C,D,X[ 9], 4,0xd9d4d039L); R2(D,A,B,C,X[12],11,0xe6db99e5L); R2(C,D,A,B,X[15],16,0x1fa27cf8L); R2(B,C,D,A,X[ 2],23,0xc4ac5665L); /* Round 3 */ R3(A,B,C,D,X[ 0], 6,0xf4292244L); R3(D,A,B,C,X[ 7],10,0x432aff97L); R3(C,D,A,B,X[14],15,0xab9423a7L); R3(B,C,D,A,X[ 5],21,0xfc93a039L); R3(A,B,C,D,X[12], 6,0x655b59c3L); R3(D,A,B,C,X[ 3],10,0x8f0ccc92L); R3(C,D,A,B,X[10],15,0xffeff47dL); R3(B,C,D,A,X[ 1],21,0x85845dd1L); R3(A,B,C,D,X[ 8], 6,0x6fa87e4fL); R3(D,A,B,C,X[15],10,0xfe2ce6e0L); R3(C,D,A,B,X[ 6],15,0xa3014314L); R3(B,C,D,A,X[13],21,0x4e0811a1L); R3(A,B,C,D,X[ 4], 6,0xf7537e82L); R3(D,A,B,C,X[11],10,0xbd3af235L); R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL); R3(B,C,D,A,X[ 9],21,0xeb86d391L); A = c->A += A; B = c->B += B; C = c->C += C; D = c->D += D; } }
/* Process LEN bytes of BUFFER, accumulating context into CTX. It is assumed that LEN % 128 == 0. */ static void sha512_process_block (const void *buffer, size_t len, struct sha512_ctx *ctx) { const uint64_t *words = buffer; size_t nwords = len / sizeof (uint64_t); uint64_t a = ctx->H[0]; uint64_t b = ctx->H[1]; uint64_t c = ctx->H[2]; uint64_t d = ctx->H[3]; uint64_t e = ctx->H[4]; uint64_t f = ctx->H[5]; uint64_t g = ctx->H[6]; uint64_t h = ctx->H[7]; /* First increment the byte count. FIPS 180-2 specifies the possible length of the file up to 2^128 bits. Here we only compute the number of bytes. Do a double word increment. */ #ifdef USE_TOTAL128 ctx->total128 += len; #else ctx->total[TOTAL128_low] += len; if (ctx->total[TOTAL128_low] < len) ++ctx->total[TOTAL128_high]; #endif /* Process all bytes in the buffer with 128 bytes in each round of the loop. */ while (nwords > 0) { uint64_t W[80]; uint64_t a_save = a; uint64_t b_save = b; uint64_t c_save = c; uint64_t d_save = d; uint64_t e_save = e; uint64_t f_save = f; uint64_t g_save = g; uint64_t h_save = h; /* Operators defined in FIPS 180-2:4.1.2. */ #define Ch(x, y, z) ((x & y) ^ (~x & z)) #define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) #define S0(x) (CYCLIC (x, 28) ^ CYCLIC (x, 34) ^ CYCLIC (x, 39)) #define S1(x) (CYCLIC (x, 14) ^ CYCLIC (x, 18) ^ CYCLIC (x, 41)) #define R0(x) (CYCLIC (x, 1) ^ CYCLIC (x, 8) ^ (x >> 7)) #define R1(x) (CYCLIC (x, 19) ^ CYCLIC (x, 61) ^ (x >> 6)) /* It is unfortunate that C does not provide an operator for cyclic rotation. Hope the C compiler is smart enough. */ #define CYCLIC(w, s) ((w >> s) | (w << (64 - s))) /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */ for (unsigned int t = 0; t < 16; ++t) { W[t] = SWAP (*words); ++words; } for (unsigned int t = 16; t < 80; ++t) W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16]; /* The actual computation according to FIPS 180-2:6.3.2 step 3. */ for (unsigned int t = 0; t < 80; ++t) { uint64_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t]; uint64_t T2 = S0 (a) + Maj (a, b, c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; } /* Add the starting values of the context according to FIPS 180-2:6.3.2 step 4. */ a += a_save; b += b_save; c += c_save; d += d_save; e += e_save; f += f_save; g += g_save; h += h_save; /* Prepare for the next round. */ nwords -= 16; } /* Put checksum in context given as argument. */ ctx->H[0] = a; ctx->H[1] = b; ctx->H[2] = c; ctx->H[3] = d; ctx->H[4] = e; ctx->H[5] = f; ctx->H[6] = g; ctx->H[7] = h; }
typename IntrEllipse2Ellipse2<Real>::Classification IntrEllipse2Ellipse2<Real>::GetClassification () const { // Get the parameters of ellipe0. Vector2<Real> K0 = mEllipse0->Center; Matrix2<Real> R0(mEllipse0->Axis, true); Matrix2<Real> D0( ((Real)1)/(mEllipse0->Extent[0]*mEllipse0->Extent[0]), ((Real)1)/(mEllipse0->Extent[1]*mEllipse0->Extent[1])); // Get the parameters of ellipse1. Vector2<Real> K1 = mEllipse1->Center; Matrix2<Real> R1(mEllipse1->Axis, true); Matrix2<Real> D1( ((Real)1)/(mEllipse1->Extent[0]*mEllipse1->Extent[0]), ((Real)1)/(mEllipse1->Extent[1]*mEllipse1->Extent[1])); // Compute K2. Matrix2<Real> D0NegHalf( mEllipse0->Extent[0], mEllipse0->Extent[1]); Matrix2<Real> D0Half( ((Real)1)/mEllipse0->Extent[0], ((Real)1)/mEllipse0->Extent[1]); Vector2<Real> K2 = D0Half*((K1 - K0)*R0); // Compute M2. Matrix2<Real> R1TR0D0NegHalf = R1.TransposeTimes(R0*D0NegHalf); Matrix2<Real> M2 = R1TR0D0NegHalf.TransposeTimes(D1)*R1TR0D0NegHalf; // Factor M2 = R*D*R^T. Matrix2<Real> R, D; M2.EigenDecomposition(R, D); // Compute K = R^T*K2. Vector2<Real> K = K2*R; // Transformed ellipsoid0 is Z^T*Z = 1 and transformed ellipsoid1 is // (Z-K)^T*D*(Z-K) = 0. // The minimum and maximum squared distances from the origin of points on // transformed ellipse1 are used to determine whether the ellipses // intersect, are separated, or one contains the other. Real minSqrDistance = Math<Real>::MAX_REAL; Real maxSqrDistance = (Real)0; int i; if (K == Vector2<Real>::ZERO) { // The special case of common centers must be handled separately. It // is not possible for the ellipsoids to be separated. for (i = 0; i < 2; ++i) { Real invD = ((Real)1)/D[i][i]; if (invD < minSqrDistance) { minSqrDistance = invD; } if (invD > maxSqrDistance) { maxSqrDistance = invD; } } if (maxSqrDistance < (Real)1) { return EC_ELLIPSE0_CONTAINS_ELLIPSE1; } else if (minSqrDistance > (Real)1) { return EC_ELLIPSE1_CONTAINS_ELLIPSE0; } else { return EC_ELLIPSES_INTERSECTING; } } // The closest point P0 and farthest point P1 are solutions to // s0*D*(P0 - K) = P0 and s1*D1*(P1 - K) = P1 for some scalars s0 and s1 // that are roots to the function // f(s) = d0*k0^2/(d0*s-1)^2 + d1*k1^2/(d1*s-1)^2 - 1 // where D = diagonal(d0,d1) and K = (k0,k1). Real d0 = D[0][0], d1 = D[1][1]; Real c0 = K2[0]*K2[0], c1 = K2[1]*K2[1]; // Sort the values so that d0 >= d1. This allows us to bound the roots of // f(s), of which there are at most 4. std::vector<std::pair<Real,Real> > param(2); if (d0 >= d1) { param[0] = std::make_pair(d0, c0); param[1] = std::make_pair(d1, c1); } else { param[0] = std::make_pair(d1, c1); param[1] = std::make_pair(d0, c0); } std::vector<std::pair<Real,Real> > valid; valid.reserve(2); if (param[0].first > param[1].first) { // d0 > d1 for (i = 0; i < 2; ++i) { if (param[i].second > (Real)0) { valid.push_back(param[i]); } } } else { // d0 = d1 param[0].second += param[1].second; if (param[0].second > (Real)0) { valid.push_back(param[0]); } } size_t numValid = valid.size(); int numRoots; Real roots[4]; if (numValid == 2) { GetRoots( valid[0].first, valid[1].first, valid[0].second, valid[1].second, numRoots, roots); } else if (numValid == 1) { GetRoots( valid[0].first, valid[0].second, numRoots, roots); } else { // numValid cannot be zero because we already handled case K = 0 assertion(false, "Unexpected condition.\n"); return EC_ELLIPSES_INTERSECTING; } for (int i = 0; i < numRoots; ++i) { Real s = roots[i]; Real p0 = d0*K[0]*s/(d0*s - (Real)1); Real p1 = d1*K[1]*s/(d1*s - (Real)1); Real sqrDistance = p0*p0 + p1*p1; if (sqrDistance < minSqrDistance) { minSqrDistance = sqrDistance; } if (sqrDistance > maxSqrDistance) { maxSqrDistance = sqrDistance; } } if (maxSqrDistance < (Real)1) { return EC_ELLIPSE0_CONTAINS_ELLIPSE1; } if (minSqrDistance > (Real)1) { if (d0*c0 + d1*c1 > (Real)1) { return EC_ELLIPSES_SEPARATED; } else { return EC_ELLIPSE1_CONTAINS_ELLIPSE0; } } return EC_ELLIPSES_INTERSECTING; }
int Rsimp(int m, int n, double **A, double *b, double *c, double *x, int *basis, int *nonbasis, double **R, double **Q, double *t1, double *t2){ int i,j,k,l,q,qv; int max_steps=20; double r,a,at; void GQR(int,int,double**,double**); max_steps=4*n; for(k=0; k<=max_steps;k++){ /* ++ Step 0) load new basis matrix and factor it */ for(i=0;i<m;i++)for(j=0;j<m;j++)R0(i,j)=AB0(i,j); GQR(m,m,Q,R); /* ++ Step 1) solving system B'*w=c(basis) ++ a) forward solve R'*y=c(basis) */ for(i=0;i<m;i++){ Y0(i)=0.0; for(j=0;j<i;j++)Y0(i)+=R0(j,i)*Y0(j); if (R0(i,i)!=0.0) Y0(i)=(CB0(i)-Y0(i))/R0(i,i); else { printf("Warning Singular Matrix Found\n"); return LP_FAIL; } } /* ++ b) find w=Q*y ++ note: B'*w=(Q*R)'*Q*y= R'*(Q'*Q)*y=R'*y=c(basis) */ for(i=0;i<m;i++){ W0(i)=0.0; for(j=0;j<m;j++)W0(i)+=Q0(i,j)*Y0(j); } /* ++ Step 2)find entering variable, ++ (use lexicographically first variable with negative reduced cost) */ q=n; for(i=0;i<n-m;i++){ /* calculate reduced cost */ r=CN0(i); for(j=0;j<m;j++) r-=W0(j)*AN0(j,i); if (r<-zero_tol && (q==n || nonbasis0(i)<nonbasis0(q))) q=i; } /* ++ if ratios were all nonnegative current solution is optimal */ if (q==n){ if (verbose>0) printf("optimal solution found in %d iterations\n",k); return LP_OPT; } /* ++ Step 3)Calculate translation direction for q entering ++ by solving system B*d=-A(:,nonbasis(q)); ++ a) let y=-Q'*A(:,nonbasis(q)); */ for(i=0;i<m;i++){ Y0(i)=0.0; for(j=0;j<m;j++) Y0(i)-=Q0(j,i)*AN0(j,q); } /* ++ b) back solve Rd=y (d=R\y) ++ note B*d= Q*R*d=Q*y=Q*-Q'*A(:nonbasis(q))=-A(:,nonbasis(q)) */ for(i=m-1;i>=0;i--){ D0(i)=0.0; for(j=m-1;j>=i+1;j--)D0(i)+=R0(i,j)*D0(j); if (R0(i,i)!=0.0) D0(i)=(Y0(i)-D0(i))/R0(i,i); else { printf("Warning Singular Matrix Found\n"); return LP_FAIL; } } /* ++ Step 4 Choose leaving variable ++ (first variable to become negative, by moving in direction D) ++ (if none become negative, then objective function unbounded) */ a=0; l=-1; for(i=0;i<m;i++){ if (D0(i)<-zero_tol){ at=-1*XB0(i)/D0(i); if (l==-1 || at<a){ a=at; l=i;} } } if (l==-1){ if (verbose>0){ printf("Objective function Unbounded (%d iterations)\n",k); } return LP_UNBD; } /* ++ Step 5) Update solution and basis data */ XN0(q)=a; for(j=0;j<m;j++) XB0(j)+=a*D0(j); XB0(l)=0.0; /* enforce strict zeroness of nonbasis variables */ qv=nonbasis0(q); nonbasis0(q)=basis0(l); basis0(l)=qv; } if (verbose>=0){ printf("Simplex Algorithm did not Terminate in %d iterations\n",k); } return LP_FAIL; }
static void sha1_transform(uint32_t h[5], const uint8_t data[64]) { uint32_t a, b, c, d, e; uint32_t buf[16]; /* copy state and data*/ a = h[0]; b = h[1]; c = h[2]; d = h[3]; e = h[4]; memcpy(buf, data, 64); /* unrolled sha-1 rounds */ R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); /* update state */ h[0] += a; h[1] += b; h[2] += c; h[3] += d; h[4] += e; /* overwrite all used variables */ a = b = c = d = e = 0; memset(buf, 0, 64); }
int main(int argc,char **argv){ // Print GPU properties //print_properties(); // Files to print the result after the last time step FILE *rho_file; FILE *E_file; rho_file = fopen("rho_final.txt", "w"); E_file = fopen("E_final.txt", "w"); // Construct initial condition for problem ICsinus Config(-1.0, 1.0, -1.0, 1.0); //ICsquare Config(0.5,0.5,gasGam); // Set initial values for Configuration 1 /* Config.set_rho(rhoConfig19); Config.set_pressure(pressureConfig19); Config.set_u(uConfig19); Config.set_v(vConfig19); */ // Determining global border based on left over tiles (a little hack) int globalPadding; globalPadding = (nx+2*border+16)/16; globalPadding = 16*globalPadding - (nx+2*border); //printf("Globalpad: %i\n", globalPadding); // Change border to add padding //border = border + globalPadding/2; // Initiate the matrices for the unknowns in the Euler equations cpu_ptr_2D rho(nx, ny, border,1); cpu_ptr_2D E(nx, ny, border,1); cpu_ptr_2D rho_u(nx, ny, border,1); cpu_ptr_2D rho_v(nx, ny, border,1); cpu_ptr_2D zeros(nx, ny, border,1); // Set initial condition Config.setIC(rho, rho_u, rho_v, E); double timeStart = get_wall_time(); // Test cpu_ptr_2D rho_dummy(nx, ny, border); cpu_ptr_2D E_dummy(nx, ny, border); /* rho_dummy.xmin = -1.0; rho_dummy.ymin = -1.0; E_dummy.xmin = -1.0; E_dummy.ymin = -1.0; */ // Set block and grid sizes dim3 gridBC = dim3(1, 1, 1); dim3 blockBC = dim3(BLOCKDIM_BC,1,1); dim3 gridBlockFlux; dim3 threadBlockFlux; dim3 gridBlockRK; dim3 threadBlockRK; computeGridBlock(gridBlockFlux, threadBlockFlux, nx + 2*border, ny + 2*border, INNERTILEDIM_X, INNERTILEDIM_Y, BLOCKDIM_X, BLOCKDIM_Y); computeGridBlock(gridBlockRK, threadBlockRK, nx + 2*border, ny + 2*border, BLOCKDIM_X_RK, BLOCKDIM_Y_RK, BLOCKDIM_X_RK, BLOCKDIM_Y_RK); int nElements = gridBlockFlux.x*gridBlockFlux.y; // Allocate memory for the GPU pointers gpu_ptr_1D L_device(nElements); gpu_ptr_1D dt_device(1); gpu_ptr_2D rho_device(nx, ny, border); gpu_ptr_2D E_device(nx, ny, border); gpu_ptr_2D rho_u_device(nx, ny, border); gpu_ptr_2D rho_v_device(nx, ny, border); gpu_ptr_2D R0(nx, ny, border); gpu_ptr_2D R1(nx, ny, border); gpu_ptr_2D R2(nx, ny, border); gpu_ptr_2D R3(nx, ny, border); gpu_ptr_2D Q0(nx, ny, border); gpu_ptr_2D Q1(nx, ny, border); gpu_ptr_2D Q2(nx, ny, border); gpu_ptr_2D Q3(nx, ny, border); // Allocate pinned memory on host init_allocate(); // Set BC arguments set_bc_args(BCArgs[0], rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), nx+2*border, ny+2*border, border); set_bc_args(BCArgs[1], Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), nx+2*border, ny+2*border, border); set_bc_args(BCArgs[2], rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), nx+2*border, ny+2*border, border); // Set FLUX arguments set_flux_args(fluxArgs[0], L_device.getRawPtr(), rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), R0.getRawPtr(),R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), nx, ny, border, rho.get_dx(), rho.get_dy(), theta, gasGam, INNERTILEDIM_X, INNERTILEDIM_Y); set_flux_args(fluxArgs[1], L_device.getRawPtr(), Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), R0.getRawPtr(),R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), nx, ny, border, rho.get_dx(), rho.get_dy(), theta, gasGam, INNERTILEDIM_X, INNERTILEDIM_Y); // Set TIME argument set_dt_args(dtArgs, L_device.getRawPtr(), dt_device.getRawPtr(), nElements, rho.get_dx(), rho.get_dy(), cfl_number); // Set Rk arguments set_rk_args(RKArgs[0], dt_device.getRawPtr(), rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), R0.getRawPtr(), R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), nx, ny, border); set_rk_args(RKArgs[1], dt_device.getRawPtr(), Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), R0.getRawPtr(), R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), nx, ny, border); L_device.set(FLT_MAX); /* R0.upload(zeros.get_ptr()); R1.upload(zeros.get_ptr()); R2.upload(zeros.get_ptr()); R3.upload(zeros.get_ptr()); Q0.upload(zeros.get_ptr()); Q1.upload(zeros.get_ptr()); Q2.upload(zeros.get_ptr()); Q3.upload(zeros.get_ptr()); */ R0.set(0,0,0,nx,ny,border); R1.set(0,0,0,nx,ny,border); R2.set(0,0,0,nx,ny,border); R3.set(0,0,0,nx,ny,border); Q0.set(0,0,0,nx,ny,border); Q1.set(0,0,0,nx,ny,border); Q2.set(0,0,0,nx,ny,border); Q3.set(0,0,0,nx,ny,border); rho_device.upload(rho.get_ptr()); rho_u_device.upload(rho_u.get_ptr()); rho_v_device.upload(rho_v.get_ptr()); E_device.upload(E.get_ptr()); // Update boudries callCollectiveSetBCPeriodic(gridBC, blockBC, BCArgs[0]); //Create cuda stream cudaStream_t stream1; cudaStreamCreate(&stream1); cudaEvent_t dt_complete; cudaEventCreate(&dt_complete); while (currentTime < timeLength && step < maxStep){ //RK1 //Compute flux callFluxKernel(gridBlockFlux, threadBlockFlux, 0, fluxArgs[0]); // Compute timestep (based on CFL condition) callDtKernel(TIMETHREADS, dtArgs); cudaMemcpyAsync(dt_host, dt_device.getRawPtr(), sizeof(float), cudaMemcpyDeviceToHost, stream1); cudaEventRecord(dt_complete, stream1); // Perform RK1 step callRKKernel(gridBlockRK, threadBlockRK, 0, RKArgs[0]); //Update boudries callCollectiveSetBCPeriodic(gridBC, blockBC, BCArgs[1]); //RK2 // Compute flux callFluxKernel(gridBlockFlux, threadBlockFlux, 1, fluxArgs[1]); //Perform RK2 step callRKKernel(gridBlockRK, threadBlockRK, 1, RKArgs[1]); //cudaEventRecord(srteam_sync, srteam1); callCollectiveSetBCPeriodic(gridBC, blockBC, BCArgs[2]); cudaEventSynchronize(dt_complete); step++; currentTime += *dt_host; // printf("Step: %i, current time: %.6f dt:%.6f\n" , step,currentTime, dt_host[0]); } //cuProfilerStop(); //cudaProfilerStop(); printf("Elapsed time %.5f", get_wall_time() - timeStart); E_device.download(E.get_ptr()); rho_u_device.download(rho_u.get_ptr()); rho_v_device.download(rho_v.get_ptr()); rho_device.download(rho_dummy.get_ptr()); rho_dummy.printToFile(rho_file, true, false); Config.exactSolution(E_dummy, currentTime); E_dummy.printToFile(E_file, true, false); float LinfError = Linf(E_dummy, rho_dummy); float L1Error = L1(E_dummy, rho_dummy); float L1Error2 = L1test(E_dummy, rho_dummy); printf("nx: %i\t Linf error %.9f\t L1 error %.7f L1test erro %.7f", nx, LinfError, L1Error, L1Error2); printf("nx: %i step: %i, current time: %.6f dt:%.6f\n" , nx, step,currentTime, dt_host[0]); /* cudaMemcpy(L_host, L_device, sizeof(float)*(nElements), cudaMemcpyDeviceToHost); for (int i =0; i < nElements; i++) printf(" %.7f ", L_host[i]); */ printf("%s\n", cudaGetErrorString(cudaGetLastError())); return(0); }
void md4_block_data_order(MD4_CTX *c, const void *data_, size_t num) { const uint8_t *data = data_; uint32_t A, B, C, D, l; uint32_t X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15; A = c->A; B = c->B; C = c->C; D = c->D; for (; num--;) { HOST_c2l(data, l); X0 = l; HOST_c2l(data, l); X1 = l; /* Round 0 */ R0(A, B, C, D, X0, 3, 0); HOST_c2l(data, l); X2 = l; R0(D, A, B, C, X1, 7, 0); HOST_c2l(data, l); X3 = l; R0(C, D, A, B, X2, 11, 0); HOST_c2l(data, l); X4 = l; R0(B, C, D, A, X3, 19, 0); HOST_c2l(data, l); X5 = l; R0(A, B, C, D, X4, 3, 0); HOST_c2l(data, l); X6 = l; R0(D, A, B, C, X5, 7, 0); HOST_c2l(data, l); X7 = l; R0(C, D, A, B, X6, 11, 0); HOST_c2l(data, l); X8 = l; R0(B, C, D, A, X7, 19, 0); HOST_c2l(data, l); X9 = l; R0(A, B, C, D, X8, 3, 0); HOST_c2l(data, l); X10 = l; R0(D, A, B, C, X9, 7, 0); HOST_c2l(data, l); X11 = l; R0(C, D, A, B, X10, 11, 0); HOST_c2l(data, l); X12 = l; R0(B, C, D, A, X11, 19, 0); HOST_c2l(data, l); X13 = l; R0(A, B, C, D, X12, 3, 0); HOST_c2l(data, l); X14 = l; R0(D, A, B, C, X13, 7, 0); HOST_c2l(data, l); X15 = l; R0(C, D, A, B, X14, 11, 0); R0(B, C, D, A, X15, 19, 0); /* Round 1 */ R1(A, B, C, D, X0, 3, 0x5A827999L); R1(D, A, B, C, X4, 5, 0x5A827999L); R1(C, D, A, B, X8, 9, 0x5A827999L); R1(B, C, D, A, X12, 13, 0x5A827999L); R1(A, B, C, D, X1, 3, 0x5A827999L); R1(D, A, B, C, X5, 5, 0x5A827999L); R1(C, D, A, B, X9, 9, 0x5A827999L); R1(B, C, D, A, X13, 13, 0x5A827999L); R1(A, B, C, D, X2, 3, 0x5A827999L); R1(D, A, B, C, X6, 5, 0x5A827999L); R1(C, D, A, B, X10, 9, 0x5A827999L); R1(B, C, D, A, X14, 13, 0x5A827999L); R1(A, B, C, D, X3, 3, 0x5A827999L); R1(D, A, B, C, X7, 5, 0x5A827999L); R1(C, D, A, B, X11, 9, 0x5A827999L); R1(B, C, D, A, X15, 13, 0x5A827999L); /* Round 2 */ R2(A, B, C, D, X0, 3, 0x6ED9EBA1L); R2(D, A, B, C, X8, 9, 0x6ED9EBA1L); R2(C, D, A, B, X4, 11, 0x6ED9EBA1L); R2(B, C, D, A, X12, 15, 0x6ED9EBA1L); R2(A, B, C, D, X2, 3, 0x6ED9EBA1L); R2(D, A, B, C, X10, 9, 0x6ED9EBA1L); R2(C, D, A, B, X6, 11, 0x6ED9EBA1L); R2(B, C, D, A, X14, 15, 0x6ED9EBA1L); R2(A, B, C, D, X1, 3, 0x6ED9EBA1L); R2(D, A, B, C, X9, 9, 0x6ED9EBA1L); R2(C, D, A, B, X5, 11, 0x6ED9EBA1L); R2(B, C, D, A, X13, 15, 0x6ED9EBA1L); R2(A, B, C, D, X3, 3, 0x6ED9EBA1L); R2(D, A, B, C, X11, 9, 0x6ED9EBA1L); R2(C, D, A, B, X7, 11, 0x6ED9EBA1L); R2(B, C, D, A, X15, 15, 0x6ED9EBA1L); A = c->A += A; B = c->B += B; C = c->C += C; D = c->D += D; } }
void MD5MAC::Transform (word32 *digest, const word32 *X, const word32 *key) { // #define F(x,y,z) ((x & y) | (~x & z)) #define F(x,y,z) (z ^ (x & (y^z))) // #define G(x,y,z) ((x & z) | (y & ~z)) #define G(x,y,z) (y ^ (z & (x^y))) #define H(x,y,z) (x ^ y ^ z) #define I(x,y,z) (y ^ (x | ~z)) #define R0(a,b,c,d,k,s,t) { \ a+=(k+t+ F((b),(c),(d)) + key[0]); \ a = rotlFixed(word32(a), (unsigned int)(s)); \ a+=b; };\ #define R1(a,b,c,d,k,s,t) { \ a+=(k+t+ G((b),(c),(d)) + key[1]); \ a = rotlFixed(word32(a), (unsigned int)(s)); \ a+=b; }; #define R2(a,b,c,d,k,s,t) { \ a+=(k+t+ H((b),(c),(d)) + key[2]); \ a = rotlFixed(word32(a), (unsigned int)(s)); \ a+=b; }; #define R3(a,b,c,d,k,s,t) { \ a+=(k+t+ I((b),(c),(d)) + key[3]); \ a = rotlFixed(word32(a), (unsigned int)(s)); \ a+=b; }; register unsigned long A,B,C,D; A=digest[0]; B=digest[1]; C=digest[2]; D=digest[3]; /* Round 0 */ R0(A,B,C,D,X[ 0], 7,0xd76aa478); R0(D,A,B,C,X[ 1],12,0xe8c7b756); R0(C,D,A,B,X[ 2],17,0x242070db); R0(B,C,D,A,X[ 3],22,0xc1bdceee); R0(A,B,C,D,X[ 4], 7,0xf57c0faf); R0(D,A,B,C,X[ 5],12,0x4787c62a); R0(C,D,A,B,X[ 6],17,0xa8304613); R0(B,C,D,A,X[ 7],22,0xfd469501); R0(A,B,C,D,X[ 8], 7,0x698098d8); R0(D,A,B,C,X[ 9],12,0x8b44f7af); R0(C,D,A,B,X[10],17,0xffff5bb1); R0(B,C,D,A,X[11],22,0x895cd7be); R0(A,B,C,D,X[12], 7,0x6b901122); R0(D,A,B,C,X[13],12,0xfd987193); R0(C,D,A,B,X[14],17,0xa679438e); R0(B,C,D,A,X[15],22,0x49b40821); /* Round 1 */ R1(A,B,C,D,X[ 1], 5,0xf61e2562); R1(D,A,B,C,X[ 6], 9,0xc040b340); R1(C,D,A,B,X[11],14,0x265e5a51); R1(B,C,D,A,X[ 0],20,0xe9b6c7aa); R1(A,B,C,D,X[ 5], 5,0xd62f105d); R1(D,A,B,C,X[10], 9,0x02441453); R1(C,D,A,B,X[15],14,0xd8a1e681); R1(B,C,D,A,X[ 4],20,0xe7d3fbc8); R1(A,B,C,D,X[ 9], 5,0x21e1cde6); R1(D,A,B,C,X[14], 9,0xc33707d6); R1(C,D,A,B,X[ 3],14,0xf4d50d87); R1(B,C,D,A,X[ 8],20,0x455a14ed); R1(A,B,C,D,X[13], 5,0xa9e3e905); R1(D,A,B,C,X[ 2], 9,0xfcefa3f8); R1(C,D,A,B,X[ 7],14,0x676f02d9); R1(B,C,D,A,X[12],20,0x8d2a4c8a); /* Round 2 */ R2(A,B,C,D,X[ 5], 4,0xfffa3942); R2(D,A,B,C,X[ 8],11,0x8771f681); R2(C,D,A,B,X[11],16,0x6d9d6122); R2(B,C,D,A,X[14],23,0xfde5380c); R2(A,B,C,D,X[ 1], 4,0xa4beea44); R2(D,A,B,C,X[ 4],11,0x4bdecfa9); R2(C,D,A,B,X[ 7],16,0xf6bb4b60); R2(B,C,D,A,X[10],23,0xbebfbc70); R2(A,B,C,D,X[13], 4,0x289b7ec6); R2(D,A,B,C,X[ 0],11,0xeaa127fa); R2(C,D,A,B,X[ 3],16,0xd4ef3085); R2(B,C,D,A,X[ 6],23,0x04881d05); R2(A,B,C,D,X[ 9], 4,0xd9d4d039); R2(D,A,B,C,X[12],11,0xe6db99e5); R2(C,D,A,B,X[15],16,0x1fa27cf8); R2(B,C,D,A,X[ 2],23,0xc4ac5665); /* Round 3 */ R3(A,B,C,D,X[ 0], 6,0xf4292244); R3(D,A,B,C,X[ 7],10,0x432aff97); R3(C,D,A,B,X[14],15,0xab9423a7); R3(B,C,D,A,X[ 5],21,0xfc93a039); R3(A,B,C,D,X[12], 6,0x655b59c3); R3(D,A,B,C,X[ 3],10,0x8f0ccc92); R3(C,D,A,B,X[10],15,0xffeff47d); R3(B,C,D,A,X[ 1],21,0x85845dd1); R3(A,B,C,D,X[ 8], 6,0x6fa87e4f); R3(D,A,B,C,X[15],10,0xfe2ce6e0); R3(C,D,A,B,X[ 6],15,0xa3014314); R3(B,C,D,A,X[13],21,0x4e0811a1); R3(A,B,C,D,X[ 4], 6,0xf7537e82); R3(D,A,B,C,X[11],10,0xbd3af235); R3(C,D,A,B,X[ 2],15,0x2ad7d2bb); R3(B,C,D,A,X[ 9],21,0xeb86d391); digest[0]+=A; digest[1]+=B; digest[2]+=C; digest[3]+=D; }
void GarbleBlock(unsigned char *block,uint32_t a,uint32_t b,uint32_t c,uint32_t d,uint32_t e) { uint32_t W[16]; for(int i=0;i<16;i++) W[i]=(block[4*i+0]<<24)|(block[4*i+1]<<16)|(block[4*i+2]<<8)|block[4*i+3]; R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); for(int i=0;i<64;i++) block[i]=W[i/4]>>(i%4)*8; }
void md5_block_host_order (MD5_CTX *c, const void *data, size_t num) { const MD5_LONG *X=data; register unsigned MD32_REG_T A,B,C,D; A=c->A; B=c->B; C=c->C; D=c->D; for (;num--;X+=HASH_LBLOCK) { /* Round 0 */ R0(A,B,C,D,X[ 0], 7,0xd76aa478L); R0(D,A,B,C,X[ 1],12,0xe8c7b756L); R0(C,D,A,B,X[ 2],17,0x242070dbL); R0(B,C,D,A,X[ 3],22,0xc1bdceeeL); R0(A,B,C,D,X[ 4], 7,0xf57c0fafL); R0(D,A,B,C,X[ 5],12,0x4787c62aL); R0(C,D,A,B,X[ 6],17,0xa8304613L); R0(B,C,D,A,X[ 7],22,0xfd469501L); R0(A,B,C,D,X[ 8], 7,0x698098d8L); R0(D,A,B,C,X[ 9],12,0x8b44f7afL); R0(C,D,A,B,X[10],17,0xffff5bb1L); R0(B,C,D,A,X[11],22,0x895cd7beL); R0(A,B,C,D,X[12], 7,0x6b901122L); R0(D,A,B,C,X[13],12,0xfd987193L); R0(C,D,A,B,X[14],17,0xa679438eL); R0(B,C,D,A,X[15],22,0x49b40821L); /* Round 1 */ R1(A,B,C,D,X[ 1], 5,0xf61e2562L); R1(D,A,B,C,X[ 6], 9,0xc040b340L); R1(C,D,A,B,X[11],14,0x265e5a51L); R1(B,C,D,A,X[ 0],20,0xe9b6c7aaL); R1(A,B,C,D,X[ 5], 5,0xd62f105dL); R1(D,A,B,C,X[10], 9,0x02441453L); R1(C,D,A,B,X[15],14,0xd8a1e681L); R1(B,C,D,A,X[ 4],20,0xe7d3fbc8L); R1(A,B,C,D,X[ 9], 5,0x21e1cde6L); R1(D,A,B,C,X[14], 9,0xc33707d6L); R1(C,D,A,B,X[ 3],14,0xf4d50d87L); R1(B,C,D,A,X[ 8],20,0x455a14edL); R1(A,B,C,D,X[13], 5,0xa9e3e905L); R1(D,A,B,C,X[ 2], 9,0xfcefa3f8L); R1(C,D,A,B,X[ 7],14,0x676f02d9L); R1(B,C,D,A,X[12],20,0x8d2a4c8aL); /* Round 2 */ R2(A,B,C,D,X[ 5], 4,0xfffa3942L); R2(D,A,B,C,X[ 8],11,0x8771f681L); R2(C,D,A,B,X[11],16,0x6d9d6122L); R2(B,C,D,A,X[14],23,0xfde5380cL); R2(A,B,C,D,X[ 1], 4,0xa4beea44L); R2(D,A,B,C,X[ 4],11,0x4bdecfa9L); R2(C,D,A,B,X[ 7],16,0xf6bb4b60L); R2(B,C,D,A,X[10],23,0xbebfbc70L); R2(A,B,C,D,X[13], 4,0x289b7ec6L); R2(D,A,B,C,X[ 0],11,0xeaa127faL); R2(C,D,A,B,X[ 3],16,0xd4ef3085L); R2(B,C,D,A,X[ 6],23,0x04881d05L); R2(A,B,C,D,X[ 9], 4,0xd9d4d039L); R2(D,A,B,C,X[12],11,0xe6db99e5L); R2(C,D,A,B,X[15],16,0x1fa27cf8L); R2(B,C,D,A,X[ 2],23,0xc4ac5665L); /* Round 3 */ R3(A,B,C,D,X[ 0], 6,0xf4292244L); R3(D,A,B,C,X[ 7],10,0x432aff97L); R3(C,D,A,B,X[14],15,0xab9423a7L); R3(B,C,D,A,X[ 5],21,0xfc93a039L); R3(A,B,C,D,X[12], 6,0x655b59c3L); R3(D,A,B,C,X[ 3],10,0x8f0ccc92L); R3(C,D,A,B,X[10],15,0xffeff47dL); R3(B,C,D,A,X[ 1],21,0x85845dd1L); R3(A,B,C,D,X[ 8], 6,0x6fa87e4fL); R3(D,A,B,C,X[15],10,0xfe2ce6e0L); R3(C,D,A,B,X[ 6],15,0xa3014314L); R3(B,C,D,A,X[13],21,0x4e0811a1L); R3(A,B,C,D,X[ 4], 6,0xf7537e82L); R3(D,A,B,C,X[11],10,0xbd3af235L); R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL); R3(B,C,D,A,X[ 9],21,0xeb86d391L); A = c->A += A; B = c->B += B; C = c->C += C; D = c->D += D; } }
/* transform one 512bit block. this is the core of the algorithm. */ static void sha1_transform(uint32_t state[5], char buf[SHA1_BLOCKSIZE]) { uint32_t a, b, c, d, e; uint32_t *block = (uint32_t *)buf; #ifdef SHA1_SHORTCODE uint8_t i; #endif /* copy state to working vars */ a = state[0]; b = state[1]; c = state[2]; d = state[3]; e = state[4]; #ifdef SHA1_SHORTCODE for (i = 0; i < 80; i++) { uint32_t t; if (i < 20) t = ((b & (c ^ d)) ^ d) + 0x5A827999; else if (i < 40) t = (b ^ c ^ d) + 0x6ED9EBA1; else if (i < 60) t = (((b | c) & d) | (b & c)) + 0x8F1BBCDC; else t = (b ^ c ^ d) + 0xCA62C1D6; t += (i < 16) ? blk0(i) : blk(i); t += e + rol(a, 5); e = d; d = c; c = rol(b, 30); b = a; a = t; } #else /* R0 and R1, R2, R3, R4 are the different operations used in SHA1 */ #define R0(v,w,x,y,z,i) \ z += ((w&(x^y))^y) + blk0(i) + 0x5A827999 + rol(v, 5); w = rol(w, 30) #define R1(v,w,x,y,z,i) \ z += ((w&(x^y))^y) + blk(i) + 0x5A827999 + rol(v, 5); w = rol(w, 30) #define R2(v,w,x,y,z,i) \ z += (w^x^y) + blk(i) + 0x6ED9EBA1 + rol(v, 5); w = rol(w, 30) #define R3(v,w,x,y,z,i) \ z += (((w|x)&y)|(w&x)) + blk(i) + 0x8F1BBCDC + rol(v, 5); w = rol(w, 30) #define R4(v,w,x,y,z,i) \ z += (w^x^y) + blk(i) + 0xCA62C1D6 + rol(v, 5); w = rol(w, 30) /* 4 rounds of 20 operations each. loop unrolled. */ R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); #endif /* add the working vars back into state */ state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; }
void GQR(int r, int c, double **Q, double **R){ int i,j,k; double s,s1,s2; double t1,t2; for(i=0;i<r;i++){ for(k=0;k<r;k++)Q0(i,k)=0.0; Q0(i,i)=1.0; } for (i=0;i<c;i++) for (k=i+1;k<r;k++) /* performing givens rotations to zero A[k][i] */ if (R0(k,i)!=0){ s=sqrt(R0(i,i)*R0(i,i)+R0(k,i)*R0(k,i)); s1=R0(i,i)/s; s2=R0(k,i)/s; for(j=0;j<c;j++) { t1=R0(i,j); t2=R0(k,j); R0(i,j)=s1*t1+s2*t2; R0(k,j)=-s2*t1+s1*t2; } /* actually doing givens row rotations on Q */ for(j=0;j<r;j++){ t1=Q0(j,i); t2=Q0(j,k); Q0(j,i)=s1*t1+s2*t2; Q0(j,k)=-s2*t1+s1*t2; } } }
void SHA1::Transform(word32 *state, const word32 *data) { word32 W[16]; /* Copy context->state[] to working vars */ word32 a = state[0]; word32 b = state[1]; word32 c = state[2]; word32 d = state[3]; word32 e = state[4]; /* 4 rounds of 20 operations each. Loop unrolled. */ R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); /* Add the working vars back into context.state[] */ state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; }
void transform( std::uint32_t digest[], std::uint32_t block[BLOCK_INTS]) { std::uint32_t a = digest[0]; std::uint32_t b = digest[1]; std::uint32_t c = digest[2]; std::uint32_t d = digest[3]; std::uint32_t e = digest[4]; R0(block, a, b, c, d, e, 0); R0(block, e, a, b, c, d, 1); R0(block, d, e, a, b, c, 2); R0(block, c, d, e, a, b, 3); R0(block, b, c, d, e, a, 4); R0(block, a, b, c, d, e, 5); R0(block, e, a, b, c, d, 6); R0(block, d, e, a, b, c, 7); R0(block, c, d, e, a, b, 8); R0(block, b, c, d, e, a, 9); R0(block, a, b, c, d, e, 10); R0(block, e, a, b, c, d, 11); R0(block, d, e, a, b, c, 12); R0(block, c, d, e, a, b, 13); R0(block, b, c, d, e, a, 14); R0(block, a, b, c, d, e, 15); R1(block, e, a, b, c, d, 0); R1(block, d, e, a, b, c, 1); R1(block, c, d, e, a, b, 2); R1(block, b, c, d, e, a, 3); R2(block, a, b, c, d, e, 4); R2(block, e, a, b, c, d, 5); R2(block, d, e, a, b, c, 6); R2(block, c, d, e, a, b, 7); R2(block, b, c, d, e, a, 8); R2(block, a, b, c, d, e, 9); R2(block, e, a, b, c, d, 10); R2(block, d, e, a, b, c, 11); R2(block, c, d, e, a, b, 12); R2(block, b, c, d, e, a, 13); R2(block, a, b, c, d, e, 14); R2(block, e, a, b, c, d, 15); R2(block, d, e, a, b, c, 0); R2(block, c, d, e, a, b, 1); R2(block, b, c, d, e, a, 2); R2(block, a, b, c, d, e, 3); R2(block, e, a, b, c, d, 4); R2(block, d, e, a, b, c, 5); R2(block, c, d, e, a, b, 6); R2(block, b, c, d, e, a, 7); R3(block, a, b, c, d, e, 8); R3(block, e, a, b, c, d, 9); R3(block, d, e, a, b, c, 10); R3(block, c, d, e, a, b, 11); R3(block, b, c, d, e, a, 12); R3(block, a, b, c, d, e, 13); R3(block, e, a, b, c, d, 14); R3(block, d, e, a, b, c, 15); R3(block, c, d, e, a, b, 0); R3(block, b, c, d, e, a, 1); R3(block, a, b, c, d, e, 2); R3(block, e, a, b, c, d, 3); R3(block, d, e, a, b, c, 4); R3(block, c, d, e, a, b, 5); R3(block, b, c, d, e, a, 6); R3(block, a, b, c, d, e, 7); R3(block, e, a, b, c, d, 8); R3(block, d, e, a, b, c, 9); R3(block, c, d, e, a, b, 10); R3(block, b, c, d, e, a, 11); R4(block, a, b, c, d, e, 12); R4(block, e, a, b, c, d, 13); R4(block, d, e, a, b, c, 14); R4(block, c, d, e, a, b, 15); R4(block, b, c, d, e, a, 0); R4(block, a, b, c, d, e, 1); R4(block, e, a, b, c, d, 2); R4(block, d, e, a, b, c, 3); R4(block, c, d, e, a, b, 4); R4(block, b, c, d, e, a, 5); R4(block, a, b, c, d, e, 6); R4(block, e, a, b, c, d, 7); R4(block, d, e, a, b, c, 8); R4(block, c, d, e, a, b, 9); R4(block, b, c, d, e, a, 10); R4(block, a, b, c, d, e, 11); R4(block, e, a, b, c, d, 12); R4(block, d, e, a, b, c, 13); R4(block, c, d, e, a, b, 14); R4(block, b, c, d, e, a, 15); digest[0] += a; digest[1] += b; digest[2] += c; digest[3] += d; digest[4] += e; }
static void sha1_transform(uint32_t state[5], const uint8_t buffer[64]) { uint32_t block[80]; unsigned int i, a, b, c, d, e; a = state[0]; b = state[1]; c = state[2]; d = state[3]; e = state[4]; #if CONFIG_SMALL for (i = 0; i < 80; i++) { int t; if (i < 16) t = AV_RB32(buffer + 4 * i); else t = rol(block[i-3] ^ block[i-8] ^ block[i-14] ^ block[i-16], 1); block[i] = t; t += e + rol(a, 5); if (i < 40) { if (i < 20) t += ((b&(c^d))^d) + 0x5A827999; else t += ( b^c ^d) + 0x6ED9EBA1; } else { if (i < 60) t += (((b|c)&d)|(b&c)) + 0x8F1BBCDC; else t += ( b^c ^d) + 0xCA62C1D6; } e = d; d = c; c = rol(b, 30); b = a; a = t; } #else #define R1_0 \ R0(a, b, c, d, e, 0 + i); \ R0(e, a, b, c, d, 1 + i); \ R0(d, e, a, b, c, 2 + i); \ R0(c, d, e, a, b, 3 + i); \ R0(b, c, d, e, a, 4 + i); \ i += 5 i = 0; R1_0; R1_0; R1_0; R0(a, b, c, d, e, 15); R1(e, a, b, c, d, 16); R1(d, e, a, b, c, 17); R1(c, d, e, a, b, 18); R1(b, c, d, e, a, 19); #define R1_20 \ R2(a, b, c, d, e, 0 + i); \ R2(e, a, b, c, d, 1 + i); \ R2(d, e, a, b, c, 2 + i); \ R2(c, d, e, a, b, 3 + i); \ R2(b, c, d, e, a, 4 + i); \ i += 5 i = 20; R1_20; R1_20; R1_20; R1_20; #define R1_40 \ R3(a, b, c, d, e, 0 + i); \ R3(e, a, b, c, d, 1 + i); \ R3(d, e, a, b, c, 2 + i); \ R3(c, d, e, a, b, 3 + i); \ R3(b, c, d, e, a, 4 + i); \ i += 5 R1_40; R1_40; R1_40; R1_40; #define R1_60 \ R4(a, b, c, d, e, 0 + i); \ R4(e, a, b, c, d, 1 + i); \ R4(d, e, a, b, c, 2 + i); \ R4(c, d, e, a, b, 3 + i); \ R4(b, c, d, e, a, 4 + i); \ i += 5 R1_60; R1_60; R1_60; R1_60; #endif state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; }
static void ripemd_256_transform(akmos_ripemd_t *ctx, const uint8_t *block, size_t nb) { uint32_t a, b, c, d, aa, bb, cc, dd, t, *x, *state; size_t i; state = ctx->state; x = ctx->x; for(i = 0; i < nb; i++, block += AKMOS_RIPEMD_BLKLEN) { memcpy(x, block, AKMOS_RIPEMD_BLKLEN); a = state[0]; b = state[1]; c = state[2]; d = state[3]; aa = state[4]; bb = state[5]; cc = state[6]; dd = state[7]; /* Round 1 */ R0(a, b, c, d, F0, K0, 11, 0); R0(d, a, b, c, F0, K0, 14, 1); R0(c, d, a, b, F0, K0, 15, 2); R0(b, c, d, a, F0, K0, 12, 3); R0(a, b, c, d, F0, K0, 5, 4); R0(d, a, b, c, F0, K0, 8, 5); R0(c, d, a, b, F0, K0, 7, 6); R0(b, c, d, a, F0, K0, 9, 7); R0(a, b, c, d, F0, K0, 11, 8); R0(d, a, b, c, F0, K0, 13, 9); R0(c, d, a, b, F0, K0, 14, 10); R0(b, c, d, a, F0, K0, 15, 11); R0(a, b, c, d, F0, K0, 6, 12); R0(d, a, b, c, F0, K0, 7, 13); R0(c, d, a, b, F0, K0, 9, 14); R0(b, c, d, a, F0, K0, 8, 15); R0(aa, bb, cc, dd, F3, KK0, 8, 5); R0(dd, aa, bb, cc, F3, KK0, 9, 14); R0(cc, dd, aa, bb, F3, KK0, 9, 7); R0(bb, cc, dd, aa, F3, KK0, 11, 0); R0(aa, bb, cc, dd, F3, KK0, 13, 9); R0(dd, aa, bb, cc, F3, KK0, 15, 2); R0(cc, dd, aa, bb, F3, KK0, 15, 11); R0(bb, cc, dd, aa, F3, KK0, 5, 4); R0(aa, bb, cc, dd, F3, KK0, 7, 13); R0(dd, aa, bb, cc, F3, KK0, 7, 6); R0(cc, dd, aa, bb, F3, KK0, 8, 15); R0(bb, cc, dd, aa, F3, KK0, 11, 8); R0(aa, bb, cc, dd, F3, KK0, 14, 1); R0(dd, aa, bb, cc, F3, KK0, 14, 10); R0(cc, dd, aa, bb, F3, KK0, 12, 3); R0(bb, cc, dd, aa, F3, KK0, 6, 12); /* #15 */ t = a; a = aa; aa = t; /* Round 2 */ R0(a, b, c, d, F1, K1, 7, 7); R0(d, a, b, c, F1, K1, 6, 4); R0(c, d, a, b, F1, K1, 8, 13); R0(b, c, d, a, F1, K1, 13, 1); R0(a, b, c, d, F1, K1, 11, 10); R0(d, a, b, c, F1, K1, 9, 6); R0(c, d, a, b, F1, K1, 7, 15); R0(b, c, d, a, F1, K1, 15, 3); R0(a, b, c, d, F1, K1, 7, 12); R0(d, a, b, c, F1, K1, 12, 0); R0(c, d, a, b, F1, K1, 15, 9); R0(b, c, d, a, F1, K1, 9, 5); R0(a, b, c, d, F1, K1, 11, 2); R0(d, a, b, c, F1, K1, 7, 14); R0(c, d, a, b, F1, K1, 13, 11); R0(b, c, d, a, F1, K1, 12, 8); R0(aa, bb, cc, dd, F2, KK1, 9, 6); R0(dd, aa, bb, cc, F2, KK1, 13, 11); R0(cc, dd, aa, bb, F2, KK1, 15, 3); R0(bb, cc, dd, aa, F2, KK1, 7, 7); R0(aa, bb, cc, dd, F2, KK1, 12, 0); R0(dd, aa, bb, cc, F2, KK1, 8, 13); R0(cc, dd, aa, bb, F2, KK1, 9, 5); R0(bb, cc, dd, aa, F2, KK1, 11, 10); R0(aa, bb, cc, dd, F2, KK1, 7, 14); R0(dd, aa, bb, cc, F2, KK1, 7, 15); R0(cc, dd, aa, bb, F2, KK1, 12, 8); R0(bb, cc, dd, aa, F2, KK1, 7, 12); R0(aa, bb, cc, dd, F2, KK1, 6, 4); R0(dd, aa, bb, cc, F2, KK1, 15, 9); R0(cc, dd, aa, bb, F2, KK1, 13, 1); R0(bb, cc, dd, aa, F2, KK1, 11, 2); /* #31 */ t = b; b = bb; bb = t; /* Round 3 */ R0(a, b, c, d, F2, K2, 11, 3); R0(d, a, b, c, F2, K2, 13, 10); R0(c, d, a, b, F2, K2, 6, 14); R0(b, c, d, a, F2, K2, 7, 4); R0(a, b, c, d, F2, K2, 14, 9); R0(d, a, b, c, F2, K2, 9, 15); R0(c, d, a, b, F2, K2, 13, 8); R0(b, c, d, a, F2, K2, 15, 1); R0(a, b, c, d, F2, K2, 14, 2); R0(d, a, b, c, F2, K2, 8, 7); R0(c, d, a, b, F2, K2, 13, 0); R0(b, c, d, a, F2, K2, 6, 6); R0(a, b, c, d, F2, K2, 5, 13); R0(d, a, b, c, F2, K2, 12, 11); R0(c, d, a, b, F2, K2, 7, 5); R0(b, c, d, a, F2, K2, 5, 12); R0(aa, bb, cc, dd, F1, KK2, 9, 15); R0(dd, aa, bb, cc, F1, KK2, 7, 5); R0(cc, dd, aa, bb, F1, KK2, 15, 1); R0(bb, cc, dd, aa, F1, KK2, 11, 3); R0(aa, bb, cc, dd, F1, KK2, 8, 7); R0(dd, aa, bb, cc, F1, KK2, 6, 14); R0(cc, dd, aa, bb, F1, KK2, 6, 6); R0(bb, cc, dd, aa, F1, KK2, 14, 9); R0(aa, bb, cc, dd, F1, KK2, 12, 11); R0(dd, aa, bb, cc, F1, KK2, 13, 8); R0(cc, dd, aa, bb, F1, KK2, 5, 12); R0(bb, cc, dd, aa, F1, KK2, 14, 2); R0(aa, bb, cc, dd, F1, KK2, 13, 10); R0(dd, aa, bb, cc, F1, KK2, 13, 0); R0(cc, dd, aa, bb, F1, KK2, 7, 4); R0(bb, cc, dd, aa, F1, KK2, 5, 13); /* #47 */ t = c; c = cc; cc = t; /* Round 4 */ R0(a, b, c, d, F3, K3, 11, 1); R0(d, a, b, c, F3, K3, 12, 9); R0(c, d, a, b, F3, K3, 14, 11); R0(b, c, d, a, F3, K3, 15, 10); R0(a, b, c, d, F3, K3, 14, 0); R0(d, a, b, c, F3, K3, 15, 8); R0(c, d, a, b, F3, K3, 9, 12); R0(b, c, d, a, F3, K3, 8, 4); R0(a, b, c, d, F3, K3, 9, 13); R0(d, a, b, c, F3, K3, 14, 3); R0(c, d, a, b, F3, K3, 5, 7); R0(b, c, d, a, F3, K3, 6, 15); R0(a, b, c, d, F3, K3, 8, 14); R0(d, a, b, c, F3, K3, 6, 5); R0(c, d, a, b, F3, K3, 5, 6); R0(b, c, d, a, F3, K3, 12, 2); R0(aa, bb, cc, dd, F0, KK4, 15, 8); R0(dd, aa, bb, cc, F0, KK4, 5, 6); R0(cc, dd, aa, bb, F0, KK4, 8, 4); R0(bb, cc, dd, aa, F0, KK4, 11, 1); R0(aa, bb, cc, dd, F0, KK4, 14, 3); R0(dd, aa, bb, cc, F0, KK4, 14, 11); R0(cc, dd, aa, bb, F0, KK4, 6, 15); R0(bb, cc, dd, aa, F0, KK4, 14, 0); R0(aa, bb, cc, dd, F0, KK4, 6, 5); R0(dd, aa, bb, cc, F0, KK4, 9, 12); R0(cc, dd, aa, bb, F0, KK4, 12, 2); R0(bb, cc, dd, aa, F0, KK4, 9, 13); R0(aa, bb, cc, dd, F0, KK4, 12, 9); R0(dd, aa, bb, cc, F0, KK4, 5, 7); R0(cc, dd, aa, bb, F0, KK4, 15, 10); R0(bb, cc, dd, aa, F0, KK4, 8, 14); /* #63 */ t = d; d = dd; dd = t; state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += aa; state[5] += bb; state[6] += cc; state[7] += dd; } }
void md4_block_host_order (MD4_CTX *c, const void *data, size_t num) { const MD4_LONG *X=data; register unsigned MD32_REG_T A,B,C,D; A=c->A; B=c->B; C=c->C; D=c->D; for (;num--;X+=HASH_LBLOCK) { /* Round 0 */ R0(A,B,C,D,X[ 0], 3,0); R0(D,A,B,C,X[ 1], 7,0); R0(C,D,A,B,X[ 2],11,0); R0(B,C,D,A,X[ 3],19,0); R0(A,B,C,D,X[ 4], 3,0); R0(D,A,B,C,X[ 5], 7,0); R0(C,D,A,B,X[ 6],11,0); R0(B,C,D,A,X[ 7],19,0); R0(A,B,C,D,X[ 8], 3,0); R0(D,A,B,C,X[ 9], 7,0); R0(C,D,A,B,X[10],11,0); R0(B,C,D,A,X[11],19,0); R0(A,B,C,D,X[12], 3,0); R0(D,A,B,C,X[13], 7,0); R0(C,D,A,B,X[14],11,0); R0(B,C,D,A,X[15],19,0); /* Round 1 */ R1(A,B,C,D,X[ 0], 3,0x5A827999L); R1(D,A,B,C,X[ 4], 5,0x5A827999L); R1(C,D,A,B,X[ 8], 9,0x5A827999L); R1(B,C,D,A,X[12],13,0x5A827999L); R1(A,B,C,D,X[ 1], 3,0x5A827999L); R1(D,A,B,C,X[ 5], 5,0x5A827999L); R1(C,D,A,B,X[ 9], 9,0x5A827999L); R1(B,C,D,A,X[13],13,0x5A827999L); R1(A,B,C,D,X[ 2], 3,0x5A827999L); R1(D,A,B,C,X[ 6], 5,0x5A827999L); R1(C,D,A,B,X[10], 9,0x5A827999L); R1(B,C,D,A,X[14],13,0x5A827999L); R1(A,B,C,D,X[ 3], 3,0x5A827999L); R1(D,A,B,C,X[ 7], 5,0x5A827999L); R1(C,D,A,B,X[11], 9,0x5A827999L); R1(B,C,D,A,X[15],13,0x5A827999L); /* Round 2 */ R2(A,B,C,D,X[ 0], 3,0x6ED9EBA1); R2(D,A,B,C,X[ 8], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 4],11,0x6ED9EBA1); R2(B,C,D,A,X[12],15,0x6ED9EBA1); R2(A,B,C,D,X[ 2], 3,0x6ED9EBA1); R2(D,A,B,C,X[10], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 6],11,0x6ED9EBA1); R2(B,C,D,A,X[14],15,0x6ED9EBA1); R2(A,B,C,D,X[ 1], 3,0x6ED9EBA1); R2(D,A,B,C,X[ 9], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 5],11,0x6ED9EBA1); R2(B,C,D,A,X[13],15,0x6ED9EBA1); R2(A,B,C,D,X[ 3], 3,0x6ED9EBA1); R2(D,A,B,C,X[11], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 7],11,0x6ED9EBA1); R2(B,C,D,A,X[15],15,0x6ED9EBA1); A = c->A += A; B = c->B += B; C = c->C += C; D = c->D += D; } }
void sha512_hash_block(sha512_ctx *ctx, const unsigned char data[128], int perform_endian_swap) { ARCH_WORD_64 A, B, C, D, E, F, G, H, tmp, W[80]; int i; #if ARCH_LITTLE_ENDIAN if (perform_endian_swap) { for(i = 0; i < 16; i++) { W[i] = JOHNSWAP64(*((ARCH_WORD_64 *)&(data[i<<3]))); } } else #endif { i = 16; memcpy(W, data, 128); } for(; i < 80; i++) W[i] = R1(W[i - 2]) + W[i - 7] + R0(W[i - 15]) + W[i - 16]; A = ctx->h[0]; B = ctx->h[1]; C = ctx->h[2]; D = ctx->h[3]; E = ctx->h[4]; F = ctx->h[5]; G = ctx->h[6]; H = ctx->h[7]; R( 0, A, B, C, D, E, F, G, H); R( 1, H, A, B, C, D, E, F, G); R( 2, G, H, A, B, C, D, E, F); R( 3, F, G, H, A, B, C, D, E); R( 4, E, F, G, H, A, B, C, D); R( 5, D, E, F, G, H, A, B, C); R( 6, C, D, E, F, G, H, A, B); R( 7, B, C, D, E, F, G, H, A); R( 8, A, B, C, D, E, F, G, H); R( 9, H, A, B, C, D, E, F, G); R(10, G, H, A, B, C, D, E, F); R(11, F, G, H, A, B, C, D, E); R(12, E, F, G, H, A, B, C, D); R(13, D, E, F, G, H, A, B, C); R(14, C, D, E, F, G, H, A, B); R(15, B, C, D, E, F, G, H, A); R(16, A, B, C, D, E, F, G, H); R(17, H, A, B, C, D, E, F, G); R(18, G, H, A, B, C, D, E, F); R(19, F, G, H, A, B, C, D, E); R(20, E, F, G, H, A, B, C, D); R(21, D, E, F, G, H, A, B, C); R(22, C, D, E, F, G, H, A, B); R(23, B, C, D, E, F, G, H, A); R(24, A, B, C, D, E, F, G, H); R(25, H, A, B, C, D, E, F, G); R(26, G, H, A, B, C, D, E, F); R(27, F, G, H, A, B, C, D, E); R(28, E, F, G, H, A, B, C, D); R(29, D, E, F, G, H, A, B, C); R(30, C, D, E, F, G, H, A, B); R(31, B, C, D, E, F, G, H, A); R(32, A, B, C, D, E, F, G, H); R(33, H, A, B, C, D, E, F, G); R(34, G, H, A, B, C, D, E, F); R(35, F, G, H, A, B, C, D, E); R(36, E, F, G, H, A, B, C, D); R(37, D, E, F, G, H, A, B, C); R(38, C, D, E, F, G, H, A, B); R(39, B, C, D, E, F, G, H, A); R(40, A, B, C, D, E, F, G, H); R(41, H, A, B, C, D, E, F, G); R(42, G, H, A, B, C, D, E, F); R(43, F, G, H, A, B, C, D, E); R(44, E, F, G, H, A, B, C, D); R(45, D, E, F, G, H, A, B, C); R(46, C, D, E, F, G, H, A, B); R(47, B, C, D, E, F, G, H, A); R(48, A, B, C, D, E, F, G, H); R(49, H, A, B, C, D, E, F, G); R(50, G, H, A, B, C, D, E, F); R(51, F, G, H, A, B, C, D, E); R(52, E, F, G, H, A, B, C, D); R(53, D, E, F, G, H, A, B, C); R(54, C, D, E, F, G, H, A, B); R(55, B, C, D, E, F, G, H, A); R(56, A, B, C, D, E, F, G, H); R(57, H, A, B, C, D, E, F, G); R(58, G, H, A, B, C, D, E, F); R(59, F, G, H, A, B, C, D, E); R(60, E, F, G, H, A, B, C, D); R(61, D, E, F, G, H, A, B, C); R(62, C, D, E, F, G, H, A, B); R(63, B, C, D, E, F, G, H, A); R(64, A, B, C, D, E, F, G, H); R(65, H, A, B, C, D, E, F, G); R(66, G, H, A, B, C, D, E, F); R(67, F, G, H, A, B, C, D, E); R(68, E, F, G, H, A, B, C, D); R(69, D, E, F, G, H, A, B, C); R(70, C, D, E, F, G, H, A, B); R(71, B, C, D, E, F, G, H, A); R(72, A, B, C, D, E, F, G, H); R(73, H, A, B, C, D, E, F, G); R(74, G, H, A, B, C, D, E, F); R(75, F, G, H, A, B, C, D, E); R(76, E, F, G, H, A, B, C, D); R(77, D, E, F, G, H, A, B, C); R(78, C, D, E, F, G, H, A, B); R(79, B, C, D, E, F, G, H, A); ctx->h[0] += A; ctx->h[1] += B; ctx->h[2] += C; ctx->h[3] += D; ctx->h[4] += E; ctx->h[5] += F; ctx->h[6] += G; ctx->h[7] += H; }
void md4_block_data_order(uint32_t *state, const uint8_t *data, size_t num) { uint32_t A, B, C, D, l; uint32_t X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15; A = state[0]; B = state[1]; C = state[2]; D = state[3]; for (; num--;) { HOST_c2l(data, l); X0 = l; HOST_c2l(data, l); X1 = l; /* Round 0 */ R0(A, B, C, D, X0, 3, 0); HOST_c2l(data, l); X2 = l; R0(D, A, B, C, X1, 7, 0); HOST_c2l(data, l); X3 = l; R0(C, D, A, B, X2, 11, 0); HOST_c2l(data, l); X4 = l; R0(B, C, D, A, X3, 19, 0); HOST_c2l(data, l); X5 = l; R0(A, B, C, D, X4, 3, 0); HOST_c2l(data, l); X6 = l; R0(D, A, B, C, X5, 7, 0); HOST_c2l(data, l); X7 = l; R0(C, D, A, B, X6, 11, 0); HOST_c2l(data, l); X8 = l; R0(B, C, D, A, X7, 19, 0); HOST_c2l(data, l); X9 = l; R0(A, B, C, D, X8, 3, 0); HOST_c2l(data, l); X10 = l; R0(D, A, B, C, X9, 7, 0); HOST_c2l(data, l); X11 = l; R0(C, D, A, B, X10, 11, 0); HOST_c2l(data, l); X12 = l; R0(B, C, D, A, X11, 19, 0); HOST_c2l(data, l); X13 = l; R0(A, B, C, D, X12, 3, 0); HOST_c2l(data, l); X14 = l; R0(D, A, B, C, X13, 7, 0); HOST_c2l(data, l); X15 = l; R0(C, D, A, B, X14, 11, 0); R0(B, C, D, A, X15, 19, 0); /* Round 1 */ R1(A, B, C, D, X0, 3, 0x5A827999L); R1(D, A, B, C, X4, 5, 0x5A827999L); R1(C, D, A, B, X8, 9, 0x5A827999L); R1(B, C, D, A, X12, 13, 0x5A827999L); R1(A, B, C, D, X1, 3, 0x5A827999L); R1(D, A, B, C, X5, 5, 0x5A827999L); R1(C, D, A, B, X9, 9, 0x5A827999L); R1(B, C, D, A, X13, 13, 0x5A827999L); R1(A, B, C, D, X2, 3, 0x5A827999L); R1(D, A, B, C, X6, 5, 0x5A827999L); R1(C, D, A, B, X10, 9, 0x5A827999L); R1(B, C, D, A, X14, 13, 0x5A827999L); R1(A, B, C, D, X3, 3, 0x5A827999L); R1(D, A, B, C, X7, 5, 0x5A827999L); R1(C, D, A, B, X11, 9, 0x5A827999L); R1(B, C, D, A, X15, 13, 0x5A827999L); /* Round 2 */ R2(A, B, C, D, X0, 3, 0x6ED9EBA1L); R2(D, A, B, C, X8, 9, 0x6ED9EBA1L); R2(C, D, A, B, X4, 11, 0x6ED9EBA1L); R2(B, C, D, A, X12, 15, 0x6ED9EBA1L); R2(A, B, C, D, X2, 3, 0x6ED9EBA1L); R2(D, A, B, C, X10, 9, 0x6ED9EBA1L); R2(C, D, A, B, X6, 11, 0x6ED9EBA1L); R2(B, C, D, A, X14, 15, 0x6ED9EBA1L); R2(A, B, C, D, X1, 3, 0x6ED9EBA1L); R2(D, A, B, C, X9, 9, 0x6ED9EBA1L); R2(C, D, A, B, X5, 11, 0x6ED9EBA1L); R2(B, C, D, A, X13, 15, 0x6ED9EBA1L); R2(A, B, C, D, X3, 3, 0x6ED9EBA1L); R2(D, A, B, C, X11, 9, 0x6ED9EBA1L); R2(C, D, A, B, X7, 11, 0x6ED9EBA1L); R2(B, C, D, A, X15, 15, 0x6ED9EBA1L); A = state[0] += A; B = state[1] += B; C = state[2] += C; D = state[3] += D; } }
void SHA1Transform(uint32 state[5], const unsigned char buffer[SHA1_BLOCK_LENGTH]) { uint32 a, b, c, d, e; typedef union { unsigned char c[64]; uint32 l[16]; } CHAR64LONG16; CHAR64LONG16 block[1]; /* use array to appear as a pointer */ memcpy(block, buffer, SHA1_BLOCK_LENGTH); /* Copy context->state[] to working vars */ a = state[0]; b = state[1]; c = state[2]; d = state[3]; e = state[4]; /* 4 rounds of 20 operations each. Loop unrolled. */ R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); /* Add the working vars back into context.state[] */ state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; /* Wipe variables */ a = b = c = d = e = 0; #ifdef SHA1HANDSOFF memset(block, '\0', sizeof(block)); #endif }
void md5_block_data_order (MD5_CTX *c, const void *data_, int num) { const unsigned char *data=data_; register unsigned long A,B,C,D,l; /* * In case you wonder why A-D are declared as long and not * as MD5_LONG. Doing so results in slight performance * boost on LP64 architectures. The catch is we don't * really care if 32 MSBs of a 64-bit register get polluted * with eventual overflows as we *save* only 32 LSBs in * *either* case. Now declaring 'em long excuses the compiler * from keeping 32 MSBs zeroed resulting in 13% performance * improvement under SPARC Solaris7/64 and 5% under AlphaLinux. * Well, to be honest it should say that this *prevents* * performance degradation. * * <*****@*****.**> */ #ifndef MD32_XARRAY /* See comment in crypto/sha/sha_locl.h for details. */ unsigned long XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7, XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15; # define X(i) XX##i #else MD5_LONG XX[MD5_LBLOCK]; # define X(i) XX[i] #endif A=c->A; B=c->B; C=c->C; D=c->D; for (;num--;) { HOST_c2l(data,l); X( 0)=l; HOST_c2l(data,l); X( 1)=l; /* Round 0 */ R0(A,B,C,D,X( 0), 7,0xd76aa478L); HOST_c2l(data,l); X( 2)=l; R0(D,A,B,C,X( 1),12,0xe8c7b756L); HOST_c2l(data,l); X( 3)=l; R0(C,D,A,B,X( 2),17,0x242070dbL); HOST_c2l(data,l); X( 4)=l; R0(B,C,D,A,X( 3),22,0xc1bdceeeL); HOST_c2l(data,l); X( 5)=l; R0(A,B,C,D,X( 4), 7,0xf57c0fafL); HOST_c2l(data,l); X( 6)=l; R0(D,A,B,C,X( 5),12,0x4787c62aL); HOST_c2l(data,l); X( 7)=l; R0(C,D,A,B,X( 6),17,0xa8304613L); HOST_c2l(data,l); X( 8)=l; R0(B,C,D,A,X( 7),22,0xfd469501L); HOST_c2l(data,l); X( 9)=l; R0(A,B,C,D,X( 8), 7,0x698098d8L); HOST_c2l(data,l); X(10)=l; R0(D,A,B,C,X( 9),12,0x8b44f7afL); HOST_c2l(data,l); X(11)=l; R0(C,D,A,B,X(10),17,0xffff5bb1L); HOST_c2l(data,l); X(12)=l; R0(B,C,D,A,X(11),22,0x895cd7beL); HOST_c2l(data,l); X(13)=l; R0(A,B,C,D,X(12), 7,0x6b901122L); HOST_c2l(data,l); X(14)=l; R0(D,A,B,C,X(13),12,0xfd987193L); HOST_c2l(data,l); X(15)=l; R0(C,D,A,B,X(14),17,0xa679438eL); R0(B,C,D,A,X(15),22,0x49b40821L); /* Round 1 */ R1(A,B,C,D,X( 1), 5,0xf61e2562L); R1(D,A,B,C,X( 6), 9,0xc040b340L); R1(C,D,A,B,X(11),14,0x265e5a51L); R1(B,C,D,A,X( 0),20,0xe9b6c7aaL); R1(A,B,C,D,X( 5), 5,0xd62f105dL); R1(D,A,B,C,X(10), 9,0x02441453L); R1(C,D,A,B,X(15),14,0xd8a1e681L); R1(B,C,D,A,X( 4),20,0xe7d3fbc8L); R1(A,B,C,D,X( 9), 5,0x21e1cde6L); R1(D,A,B,C,X(14), 9,0xc33707d6L); R1(C,D,A,B,X( 3),14,0xf4d50d87L); R1(B,C,D,A,X( 8),20,0x455a14edL); R1(A,B,C,D,X(13), 5,0xa9e3e905L); R1(D,A,B,C,X( 2), 9,0xfcefa3f8L); R1(C,D,A,B,X( 7),14,0x676f02d9L); R1(B,C,D,A,X(12),20,0x8d2a4c8aL); /* Round 2 */ R2(A,B,C,D,X( 5), 4,0xfffa3942L); R2(D,A,B,C,X( 8),11,0x8771f681L); R2(C,D,A,B,X(11),16,0x6d9d6122L); R2(B,C,D,A,X(14),23,0xfde5380cL); R2(A,B,C,D,X( 1), 4,0xa4beea44L); R2(D,A,B,C,X( 4),11,0x4bdecfa9L); R2(C,D,A,B,X( 7),16,0xf6bb4b60L); R2(B,C,D,A,X(10),23,0xbebfbc70L); R2(A,B,C,D,X(13), 4,0x289b7ec6L); R2(D,A,B,C,X( 0),11,0xeaa127faL); R2(C,D,A,B,X( 3),16,0xd4ef3085L); R2(B,C,D,A,X( 6),23,0x04881d05L); R2(A,B,C,D,X( 9), 4,0xd9d4d039L); R2(D,A,B,C,X(12),11,0xe6db99e5L); R2(C,D,A,B,X(15),16,0x1fa27cf8L); R2(B,C,D,A,X( 2),23,0xc4ac5665L); /* Round 3 */ R3(A,B,C,D,X( 0), 6,0xf4292244L); R3(D,A,B,C,X( 7),10,0x432aff97L); R3(C,D,A,B,X(14),15,0xab9423a7L); R3(B,C,D,A,X( 5),21,0xfc93a039L); R3(A,B,C,D,X(12), 6,0x655b59c3L); R3(D,A,B,C,X( 3),10,0x8f0ccc92L); R3(C,D,A,B,X(10),15,0xffeff47dL); R3(B,C,D,A,X( 1),21,0x85845dd1L); R3(A,B,C,D,X( 8), 6,0x6fa87e4fL); R3(D,A,B,C,X(15),10,0xfe2ce6e0L); R3(C,D,A,B,X( 6),15,0xa3014314L); R3(B,C,D,A,X(13),21,0x4e0811a1L); R3(A,B,C,D,X( 4), 6,0xf7537e82L); R3(D,A,B,C,X(11),10,0xbd3af235L); R3(C,D,A,B,X( 2),15,0x2ad7d2bbL); R3(B,C,D,A,X( 9),21,0xeb86d391L); A = c->A += A; B = c->B += B; C = c->C += C; D = c->D += D; } }
void rtsmb_md4_block_data_order (RTSMB_MD4_CTX *c, const void *data_, int num) { const unsigned char *data; register unsigned long A,B,C,D,l; /* * In case you wonder why A-D are declared as long and not * as RTSMB_MD4_LONG. Doing so results in slight performance * boost on LP64 architectures. The catch is we don't * really care if 32 MSBs of a 64-bit register get polluted * with eventual overflows as we *save* only 32 LSBs in * *either* case. Now declaring 'em long excuses the compiler * from keeping 32 MSBs zeroed resulting in 13% performance * improvement under SPARC Solaris7/64 and 5% under AlphaLinux. * Well, to be honest it should say that this *prevents* * performance degradation. * * <*****@*****.**> */ #ifndef MD32_XARRAY /* See comment in crypto/sha/sha_locl.h for details. */ unsigned long XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7, XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15; # define X(i) XX##i #else RTSMB_MD4_LONG XX[RTSMB_MD4_LBLOCK]; # define X(i) XX[i] #endif data =data_; A=c->A; B=c->B; C=c->C; D=c->D; for (;num--;) { HOST_c2l(data,l); X( 0)=l; HOST_c2l(data,l); X( 1)=l; /* Round 0 */ R0(A,B,C,D,X( 0), 3,0); HOST_c2l(data,l); X( 2)=l; R0(D,A,B,C,X( 1), 7,0); HOST_c2l(data,l); X( 3)=l; R0(C,D,A,B,X( 2),11,0); HOST_c2l(data,l); X( 4)=l; R0(B,C,D,A,X( 3),19,0); HOST_c2l(data,l); X( 5)=l; R0(A,B,C,D,X( 4), 3,0); HOST_c2l(data,l); X( 6)=l; R0(D,A,B,C,X( 5), 7,0); HOST_c2l(data,l); X( 7)=l; R0(C,D,A,B,X( 6),11,0); HOST_c2l(data,l); X( 8)=l; R0(B,C,D,A,X( 7),19,0); HOST_c2l(data,l); X( 9)=l; R0(A,B,C,D,X( 8), 3,0); HOST_c2l(data,l); X(10)=l; R0(D,A,B,C,X( 9), 7,0); HOST_c2l(data,l); X(11)=l; R0(C,D,A,B,X(10),11,0); HOST_c2l(data,l); X(12)=l; R0(B,C,D,A,X(11),19,0); HOST_c2l(data,l); X(13)=l; R0(A,B,C,D,X(12), 3,0); HOST_c2l(data,l); X(14)=l; R0(D,A,B,C,X(13), 7,0); HOST_c2l(data,l); X(15)=l; R0(C,D,A,B,X(14),11,0); R0(B,C,D,A,X(15),19,0); /* Round 1 */ R1(A,B,C,D,X( 0), 3,0x5A827999L); R1(D,A,B,C,X( 4), 5,0x5A827999L); R1(C,D,A,B,X( 8), 9,0x5A827999L); R1(B,C,D,A,X(12),13,0x5A827999L); R1(A,B,C,D,X( 1), 3,0x5A827999L); R1(D,A,B,C,X( 5), 5,0x5A827999L); R1(C,D,A,B,X( 9), 9,0x5A827999L); R1(B,C,D,A,X(13),13,0x5A827999L); R1(A,B,C,D,X( 2), 3,0x5A827999L); R1(D,A,B,C,X( 6), 5,0x5A827999L); R1(C,D,A,B,X(10), 9,0x5A827999L); R1(B,C,D,A,X(14),13,0x5A827999L); R1(A,B,C,D,X( 3), 3,0x5A827999L); R1(D,A,B,C,X( 7), 5,0x5A827999L); R1(C,D,A,B,X(11), 9,0x5A827999L); R1(B,C,D,A,X(15),13,0x5A827999L); /* Round 2 */ R2(A,B,C,D,X( 0), 3,0x6ED9EBA1L); R2(D,A,B,C,X( 8), 9,0x6ED9EBA1L); R2(C,D,A,B,X( 4),11,0x6ED9EBA1L); R2(B,C,D,A,X(12),15,0x6ED9EBA1L); R2(A,B,C,D,X( 2), 3,0x6ED9EBA1L); R2(D,A,B,C,X(10), 9,0x6ED9EBA1L); R2(C,D,A,B,X( 6),11,0x6ED9EBA1L); R2(B,C,D,A,X(14),15,0x6ED9EBA1L); R2(A,B,C,D,X( 1), 3,0x6ED9EBA1L); R2(D,A,B,C,X( 9), 9,0x6ED9EBA1L); R2(C,D,A,B,X( 5),11,0x6ED9EBA1L); R2(B,C,D,A,X(13),15,0x6ED9EBA1L); R2(A,B,C,D,X( 3), 3,0x6ED9EBA1L); R2(D,A,B,C,X(11), 9,0x6ED9EBA1L); R2(C,D,A,B,X( 7),11,0x6ED9EBA1L); R2(B,C,D,A,X(15),15,0x6ED9EBA1L); A = c->A += A; B = c->B += B; C = c->C += C; D = c->D += D; } }
void md4_block_data_order (MD4_CTX *c, const void *data_, size_t num) { const unsigned char *data=data_; register unsigned MD32_REG_T A,B,C,D,l; #ifndef MD32_XARRAY /* See comment in crypto/sha/sha_locl.h for details. */ unsigned MD32_REG_T XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7, XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15; # define X(i) XX##i #else MD4_LONG XX[MD4_LBLOCK]; # define X(i) XX[i] #endif A=c->A; B=c->B; C=c->C; D=c->D; for (;num--;) { HOST_c2l(data,l); X( 0)=l; HOST_c2l(data,l); X( 1)=l; /* Round 0 */ R0(A,B,C,D,X( 0), 3,0); HOST_c2l(data,l); X( 2)=l; R0(D,A,B,C,X( 1), 7,0); HOST_c2l(data,l); X( 3)=l; R0(C,D,A,B,X( 2),11,0); HOST_c2l(data,l); X( 4)=l; R0(B,C,D,A,X( 3),19,0); HOST_c2l(data,l); X( 5)=l; R0(A,B,C,D,X( 4), 3,0); HOST_c2l(data,l); X( 6)=l; R0(D,A,B,C,X( 5), 7,0); HOST_c2l(data,l); X( 7)=l; R0(C,D,A,B,X( 6),11,0); HOST_c2l(data,l); X( 8)=l; R0(B,C,D,A,X( 7),19,0); HOST_c2l(data,l); X( 9)=l; R0(A,B,C,D,X( 8), 3,0); HOST_c2l(data,l); X(10)=l; R0(D,A,B,C,X( 9), 7,0); HOST_c2l(data,l); X(11)=l; R0(C,D,A,B,X(10),11,0); HOST_c2l(data,l); X(12)=l; R0(B,C,D,A,X(11),19,0); HOST_c2l(data,l); X(13)=l; R0(A,B,C,D,X(12), 3,0); HOST_c2l(data,l); X(14)=l; R0(D,A,B,C,X(13), 7,0); HOST_c2l(data,l); X(15)=l; R0(C,D,A,B,X(14),11,0); R0(B,C,D,A,X(15),19,0); /* Round 1 */ R1(A,B,C,D,X( 0), 3,0x5A827999L); R1(D,A,B,C,X( 4), 5,0x5A827999L); R1(C,D,A,B,X( 8), 9,0x5A827999L); R1(B,C,D,A,X(12),13,0x5A827999L); R1(A,B,C,D,X( 1), 3,0x5A827999L); R1(D,A,B,C,X( 5), 5,0x5A827999L); R1(C,D,A,B,X( 9), 9,0x5A827999L); R1(B,C,D,A,X(13),13,0x5A827999L); R1(A,B,C,D,X( 2), 3,0x5A827999L); R1(D,A,B,C,X( 6), 5,0x5A827999L); R1(C,D,A,B,X(10), 9,0x5A827999L); R1(B,C,D,A,X(14),13,0x5A827999L); R1(A,B,C,D,X( 3), 3,0x5A827999L); R1(D,A,B,C,X( 7), 5,0x5A827999L); R1(C,D,A,B,X(11), 9,0x5A827999L); R1(B,C,D,A,X(15),13,0x5A827999L); /* Round 2 */ R2(A,B,C,D,X( 0), 3,0x6ED9EBA1L); R2(D,A,B,C,X( 8), 9,0x6ED9EBA1L); R2(C,D,A,B,X( 4),11,0x6ED9EBA1L); R2(B,C,D,A,X(12),15,0x6ED9EBA1L); R2(A,B,C,D,X( 2), 3,0x6ED9EBA1L); R2(D,A,B,C,X(10), 9,0x6ED9EBA1L); R2(C,D,A,B,X( 6),11,0x6ED9EBA1L); R2(B,C,D,A,X(14),15,0x6ED9EBA1L); R2(A,B,C,D,X( 1), 3,0x6ED9EBA1L); R2(D,A,B,C,X( 9), 9,0x6ED9EBA1L); R2(C,D,A,B,X( 5),11,0x6ED9EBA1L); R2(B,C,D,A,X(13),15,0x6ED9EBA1L); R2(A,B,C,D,X( 3), 3,0x6ED9EBA1L); R2(D,A,B,C,X(11), 9,0x6ED9EBA1L); R2(C,D,A,B,X( 7),11,0x6ED9EBA1L); R2(B,C,D,A,X(15),15,0x6ED9EBA1L); A = c->A += A; B = c->B += B; C = c->C += C; D = c->D += D; } }
void rtsmb_md4_block_host_order (RTSMB_MD4_CTX *c, const void *data, int num) { const RTSMB_MD4_LONG *X; register unsigned long A,B,C,D; /* * In case you wonder why A-D are declared as long and not * as RTSMB_MD4_LONG. Doing so results in slight performance * boost on LP64 architectures. The catch is we don't * really care if 32 MSBs of a 64-bit register get polluted * with eventual overflows as we *save* only 32 LSBs in * *either* case. Now declaring 'em long excuses the compiler * from keeping 32 MSBs zeroed resulting in 13% performance * improvement under SPARC Solaris7/64 and 5% under AlphaLinux. * Well, to be honest it should say that this *prevents* * performance degradation. * * <*****@*****.**> */ X=data; A=c->A; B=c->B; C=c->C; D=c->D; for (;num--;X+=HASH_LBLOCK) { /* Round 0 */ R0(A,B,C,D,X[ 0], 3,0); R0(D,A,B,C,X[ 1], 7,0); R0(C,D,A,B,X[ 2],11,0); R0(B,C,D,A,X[ 3],19,0); R0(A,B,C,D,X[ 4], 3,0); R0(D,A,B,C,X[ 5], 7,0); R0(C,D,A,B,X[ 6],11,0); R0(B,C,D,A,X[ 7],19,0); R0(A,B,C,D,X[ 8], 3,0); R0(D,A,B,C,X[ 9], 7,0); R0(C,D,A,B,X[10],11,0); R0(B,C,D,A,X[11],19,0); R0(A,B,C,D,X[12], 3,0); R0(D,A,B,C,X[13], 7,0); R0(C,D,A,B,X[14],11,0); R0(B,C,D,A,X[15],19,0); /* Round 1 */ R1(A,B,C,D,X[ 0], 3,0x5A827999L); R1(D,A,B,C,X[ 4], 5,0x5A827999L); R1(C,D,A,B,X[ 8], 9,0x5A827999L); R1(B,C,D,A,X[12],13,0x5A827999L); R1(A,B,C,D,X[ 1], 3,0x5A827999L); R1(D,A,B,C,X[ 5], 5,0x5A827999L); R1(C,D,A,B,X[ 9], 9,0x5A827999L); R1(B,C,D,A,X[13],13,0x5A827999L); R1(A,B,C,D,X[ 2], 3,0x5A827999L); R1(D,A,B,C,X[ 6], 5,0x5A827999L); R1(C,D,A,B,X[10], 9,0x5A827999L); R1(B,C,D,A,X[14],13,0x5A827999L); R1(A,B,C,D,X[ 3], 3,0x5A827999L); R1(D,A,B,C,X[ 7], 5,0x5A827999L); R1(C,D,A,B,X[11], 9,0x5A827999L); R1(B,C,D,A,X[15],13,0x5A827999L); /* Round 2 */ R2(A,B,C,D,X[ 0], 3,0x6ED9EBA1); R2(D,A,B,C,X[ 8], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 4],11,0x6ED9EBA1); R2(B,C,D,A,X[12],15,0x6ED9EBA1); R2(A,B,C,D,X[ 2], 3,0x6ED9EBA1); R2(D,A,B,C,X[10], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 6],11,0x6ED9EBA1); R2(B,C,D,A,X[14],15,0x6ED9EBA1); R2(A,B,C,D,X[ 1], 3,0x6ED9EBA1); R2(D,A,B,C,X[ 9], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 5],11,0x6ED9EBA1); R2(B,C,D,A,X[13],15,0x6ED9EBA1); R2(A,B,C,D,X[ 3], 3,0x6ED9EBA1); R2(D,A,B,C,X[11], 9,0x6ED9EBA1); R2(C,D,A,B,X[ 7],11,0x6ED9EBA1); R2(B,C,D,A,X[15],15,0x6ED9EBA1); A = c->A += A; B = c->B += B; C = c->C += C; D = c->D += D; } }
//MD4 Block data order setting void __fastcall MD4_BlockDataOrder( MD4_CTX *c, const void *data_, size_t num) { const unsigned char *data = (const unsigned char *)data_; register uint32_t A = 0, B = 0, C = 0, D = 0, l = 0; uint32_t XX0 = 0, XX1 = 0, XX2 = 0, XX3 = 0, XX4 = 0, XX5 = 0, XX6 = 0, XX7 = 0, XX8 = 0, XX9 = 0, XX10 = 0, XX11 = 0, XX12 = 0, XX13 = 0, XX14 = 0, XX15 = 0; #define X(i) XX ## i A = c->A; B = c->B; C = c->C; D = c->D; for (;num--;) { (void)HOST_c2l(data, l); X(0) = l; (void)HOST_c2l(data, l); X(1) = l; //Round 0 R0(A, B, C, D, X(0), 3, 0); (void)HOST_c2l(data, l); X(2) = l; R0(D, A, B, C, X(1), 7, 0); (void)HOST_c2l(data, l); X(3) = l; R0(C, D, A, B, X(2), 11, 0); (void)HOST_c2l(data, l); X(4) = l; R0(B, C, D, A, X(3), 19, 0); (void)HOST_c2l(data, l); X(5) = l; R0(A, B, C, D, X(4), 3, 0); (void)HOST_c2l(data, l); X(6) = l; R0(D, A, B, C, X(5), 7, 0); (void)HOST_c2l(data, l); X(7) = l; R0(C, D, A, B, X(6), 11, 0); (void)HOST_c2l(data, l); X(8) = l; R0(B, C, D, A, X(7), 19, 0); (void)HOST_c2l(data, l); X(9) = l; R0(A, B, C, D, X(8), 3, 0); (void)HOST_c2l(data, l); X(10) = l; R0(D, A, B, C, X(9), 7, 0); (void)HOST_c2l(data, l); X(11) = l; R0(C, D, A, B, X(10), 11, 0); (void)HOST_c2l(data, l); X(12) = l; R0(B, C, D, A, X(11), 19, 0); (void)HOST_c2l(data, l); X(13) = l; R0(A, B, C, D, X(12), 3, 0); (void)HOST_c2l(data, l); X(14) = l; R0(D, A, B, C, X(13), 7, 0); (void)HOST_c2l(data, l); X(15) = l; R0(C, D, A, B, X(14), 11, 0); R0(B, C, D, A, X(15), 19, 0); //Round 1 R1(A, B, C, D, X(0), 3, 0x5A827999L); R1(D, A, B, C, X(4), 5, 0x5A827999L); R1(C, D, A, B, X(8), 9, 0x5A827999L); R1(B, C, D, A, X(12), 13, 0x5A827999L); R1(A, B, C, D, X(1), 3, 0x5A827999L); R1(D, A, B, C, X(5), 5, 0x5A827999L); R1(C, D, A, B, X(9), 9, 0x5A827999L); R1(B, C, D, A, X(13), 13, 0x5A827999L); R1(A, B, C, D, X(2), 3, 0x5A827999L); R1(D, A, B, C, X(6), 5, 0x5A827999L); R1(C, D, A, B, X(10), 9, 0x5A827999L); R1(B, C, D, A, X(14), 13, 0x5A827999L); R1(A, B, C, D, X(3), 3, 0x5A827999L); R1(D, A, B, C, X(7), 5, 0x5A827999L); R1(C, D, A, B, X(11), 9, 0x5A827999L); R1(B, C, D, A, X(15), 13, 0x5A827999L); //Round 2 R2(A, B, C, D, X(0), 3, 0x6ED9EBA1L); R2(D, A, B, C, X(8), 9, 0x6ED9EBA1L); R2(C, D, A, B, X(4), 11, 0x6ED9EBA1L); R2(B, C, D, A, X(12), 15, 0x6ED9EBA1L); R2(A, B, C, D, X(2), 3, 0x6ED9EBA1L); R2(D, A, B, C, X(10), 9, 0x6ED9EBA1L); R2(C, D, A, B, X(6), 11, 0x6ED9EBA1L); R2(B, C, D, A, X(14), 15, 0x6ED9EBA1L); R2(A, B, C, D, X(1), 3, 0x6ED9EBA1L); R2(D, A, B, C, X(9), 9, 0x6ED9EBA1L); R2(C, D, A, B, X(5), 11, 0x6ED9EBA1L); R2(B, C, D, A, X(13), 15, 0x6ED9EBA1L); R2(A, B, C, D, X(3), 3, 0x6ED9EBA1L); R2(D, A, B, C, X(11), 9, 0x6ED9EBA1L); R2(C, D, A, B, X(7), 11, 0x6ED9EBA1L); R2(B, C, D, A, X(15), 15, 0x6ED9EBA1L); A = c->A += A; B = c->B += B; C = c->C += C; D = c->D += D; } return; }
/* Hash a single 512-bit block. This is the core of the algorithm. */ static void SHATransform(ULONG State[5], UCHAR Buffer[64]) { ULONG a, b, c, d, e; ULONG *Block; Block = (ULONG*)Buffer; /* Copy Context->State[] to working variables */ a = State[0]; b = State[1]; c = State[2]; d = State[3]; e = State[4]; /* 4 rounds of 20 operations each. Loop unrolled. */ R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); /* Add the working variables back into Context->State[] */ State[0] += a; State[1] += b; State[2] += c; State[3] += d; State[4] += e; /* Wipe variables */ a = b = c = d = e = 0; }