BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { BN_ULONG c1 = 0; assert(num >= 0); if (num <= 0) { return c1; } while (num & ~3) { mul_add(rp[0], ap[0], w, c1); mul_add(rp[1], ap[1], w, c1); mul_add(rp[2], ap[2], w, c1); mul_add(rp[3], ap[3], w, c1); ap += 4; rp += 4; num -= 4; } while (num) { mul_add(rp[0], ap[0], w, c1); ap++; rp++; num--; } return c1; }
static void gaussianBlur(const T * __srcp, float * temp, float * dstp, const float * weightsH, const float * weightsV, const int width, const int height, const int srcStride, const int dstStride, const int radiusH, const int radiusV, const float offset) noexcept { const int diameter = radiusV * 2 + 1; const T ** _srcp = new const T *[diameter]; _srcp[radiusV] = __srcp; for (int i = 1; i <= radiusV; i++) { _srcp[radiusV - i] = _srcp[radiusV - 1 + i]; _srcp[radiusV + i] = _srcp[radiusV] + srcStride * i; } weightsH += radiusH; for (int y = 0; y < height; y++) { for (int x = 0; x < width; x += 4) { Vec4f sum = zero_4f(); for (int i = 0; i < diameter; i++) { if (std::is_same<T, uint8_t>::value) { const Vec4f srcp = to_float(Vec4i().load_4uc(_srcp[i] + x)); sum = mul_add(srcp, weightsV[i], sum); } else if (std::is_same<T, uint16_t>::value) { const Vec4f srcp = to_float(Vec4i().load_4us(_srcp[i] + x)); sum = mul_add(srcp, weightsV[i], sum); } else { const Vec4f srcp = Vec4f().load_a(_srcp[i] + x); sum = mul_add(srcp + offset, weightsV[i], sum); } } sum.store_a(temp + x); } for (int i = 1; i <= radiusH; i++) { temp[-i] = temp[-1 + i]; temp[width - 1 + i] = temp[width - i]; } for (int x = 0; x < width; x += 4) { Vec4f sum = zero_4f(); for (int i = -radiusH; i <= radiusH; i++) { const Vec4f srcp = Vec4f().load(temp + x + i); sum = mul_add(srcp, weightsH[i], sum); } sum.stream(dstp + x); } for (int i = 0; i < diameter - 1; i++) _srcp[i] = _srcp[i + 1]; if (y < height - 1 - radiusV) _srcp[diameter - 1] += srcStride; else if (y > height - 1 - radiusV) _srcp[diameter - 1] -= srcStride; dstp += dstStride; } delete[] _srcp; }
void discretizeGM(const float * _srcp, uint16_t * dstp, const int width, const int height, const int srcStride, const int dstStride, const float magnitude, const uint16_t peak, const float offset) noexcept { for (int y = 0; y < height; y++) { for (int x = 0; x < width; x += 8) { const Vec4f srcp_4f_0 = Vec4f().load_a(_srcp + x); const Vec4f srcp_4f_1 = Vec4f().load_a(_srcp + x + 4); const Vec4i srcp_4i_0 = truncate_to_int(mul_add(srcp_4f_0, magnitude, 0.5f)); const Vec4i srcp_4i_1 = truncate_to_int(mul_add(srcp_4f_1, magnitude, 0.5f)); const Vec8us srcp = compress_saturated_s2u(srcp_4i_0, srcp_4i_1); min(srcp, peak).stream(dstp + x); } _srcp += srcStride; dstp += dstStride; } }
static void gaussianBlurH(const T * _srcp, float * temp, float * dstp, const float * weights, const int width, const int height, const int srcStride, const int dstStride, const int radius, const float offset) noexcept { weights += radius; for (int y = 0; y < height; y++) { for (int x = 0; x < width; x += 4) { if (std::is_same<T, uint8_t>::value) to_float(Vec4i().load_4uc(_srcp + x)).store_a(temp + x); else if (std::is_same<T, uint16_t>::value) to_float(Vec4i().load_4us(_srcp + x)).store_a(temp + x); else (Vec4f().load_a(_srcp + x) + offset).store_a(temp + x); } for (int i = 1; i <= radius; i++) { temp[-i] = temp[-1 + i]; temp[width - 1 + i] = temp[width - i]; } for (int x = 0; x < width; x += 4) { Vec4f sum = zero_4f(); for (int i = -radius; i <= radius; i++) { const Vec4f srcp = Vec4f().load(temp + x + i); sum = mul_add(srcp, weights[i], sum); } sum.stream(dstp + x); } _srcp += srcStride; dstp += dstStride; } }
// a1 < 0, a2 < 0, a3 < 0, a4 > 0 // x >= t2/a2 // x >= t3/a3 // determine glb among these // the resolve glb with others. // e.g. t2/a2 >= t3/a3 // then replace a3*x + t3 by t3/a3 - t2/a2 <= 0 // bound_type model_based_opt::maximize(rational& value) { SASSERT(invariant()); unsigned_vector other; while (!objective().m_vars.empty()) { TRACE("opt", tout << "tableau\n";); var v = objective().m_vars.back(); unsigned x = v.m_id; rational const& coeff = v.m_coeff; unsigned bound_row_index; rational bound_coeff; other.reset(); if (find_bound(x, bound_row_index, bound_coeff, other, coeff.is_pos())) { SASSERT(!bound_coeff.is_zero()); for (unsigned i = 0; i < other.size(); ++i) { resolve(bound_row_index, bound_coeff, other[i], x); } // coeff*x + objective <= ub // a2*x + t2 <= 0 // => coeff*x <= -t2*coeff/a2 // objective + t2*coeff/a2 <= ub mul_add(m_objective_id, - coeff/bound_coeff, bound_row_index); m_rows[bound_row_index].m_alive = false; } else { return unbounded; } }
SecureVector<byte> DSA_Signature_Operation::sign(const byte msg[], size_t msg_len, RandomNumberGenerator& rng) { rng.add_entropy(msg, msg_len); BigInt i(msg, msg_len); BigInt r = 0, s = 0; while(r == 0 || s == 0) { BigInt k; do k.randomize(rng, q.bits()); while(k >= q); r = mod_q.reduce(powermod_g_p(k)); s = mod_q.multiply(inverse_mod(k, q), mul_add(x, r, i)); } SecureVector<byte> output(2*q.bytes()); r.binary_encode(&output[output.size() / 2 - r.bytes()]); s.binary_encode(&output[output.size() - s.bytes()]); return output; }
SecureVector<byte> ECDSA_Signature_Operation::sign(const byte msg[], size_t msg_len, RandomNumberGenerator& rng) { rng.add_entropy(msg, msg_len); BigInt m(msg, msg_len); BigInt r = 0, s = 0; while(r == 0 || s == 0) { // This contortion is necessary for the tests BigInt k; k.randomize(rng, order.bits()); while(k >= order) k.randomize(rng, order.bits() - 1); PointGFp k_times_P = base_point * k; r = mod_order.reduce(k_times_P.get_affine_x()); s = mod_order.multiply(inverse_mod(k, order), mul_add(x, r, m)); } SecureVector<byte> output(2*order.bytes()); r.binary_encode(&output[output.size() / 2 - r.bytes()]); s.binary_encode(&output[output.size() - s.bytes()]); return output; }
BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num, BN_ULONG w) { BN_ULONG c1 = 0; if (num == 0) { return (c1); } while (num & ~3) { mul_add(rp[0], ap[0], w, c1); mul_add(rp[1], ap[1], w, c1); mul_add(rp[2], ap[2], w, c1); mul_add(rp[3], ap[3], w, c1); ap += 4; rp += 4; num -= 4; } if (num) { mul_add(rp[0], ap[0], w, c1); if (--num == 0) { return c1; } mul_add(rp[1], ap[1], w, c1); if (--num == 0) { return c1; } mul_add(rp[2], ap[2], w, c1); return c1; } return c1; }
void discretizeGM(const float * _srcp, uint8_t * dstp, const int width, const int height, const int srcStride, const int dstStride, const float magnitude, const uint16_t peak, const float offset) noexcept { for (int y = 0; y < height; y++) { for (int x = 0; x < width; x += 16) { const Vec8f srcp_8f_0 = Vec8f().load_a(_srcp + x); const Vec8f srcp_8f_1 = Vec8f().load_a(_srcp + x + 8); const Vec8i srcp_8i_0 = truncate_to_int(mul_add(srcp_8f_0, magnitude, 0.5f)); const Vec8i srcp_8i_1 = truncate_to_int(mul_add(srcp_8f_1, magnitude, 0.5f)); const Vec8s srcp_8s_0 = compress_saturated(srcp_8i_0.get_low(), srcp_8i_0.get_high()); const Vec8s srcp_8s_1 = compress_saturated(srcp_8i_1.get_low(), srcp_8i_1.get_high()); const Vec16uc srcp = compress_saturated_s2u(srcp_8s_0, srcp_8s_1); srcp.stream(dstp + x); } _srcp += srcStride; dstp += dstStride; } }
static void gaussianBlurV(const T * __srcp, float * dstp, const float * weights, const int width, const int height, const int srcStride, const int dstStride, const int radius, const float offset) noexcept { const int diameter = radius * 2 + 1; const T ** _srcp = new const T *[diameter]; _srcp[radius] = __srcp; for (int i = 1; i <= radius; i++) { _srcp[radius - i] = _srcp[radius - 1 + i]; _srcp[radius + i] = _srcp[radius] + srcStride * i; } for (int y = 0; y < height; y++) { for (int x = 0; x < width; x += 8) { Vec8f sum = zero_8f(); for (int i = 0; i < diameter; i++) { if (std::is_same<T, uint8_t>::value) { const Vec8f srcp = to_float(Vec8i().load_8uc(_srcp[i] + x)); sum = mul_add(srcp, weights[i], sum); } else if (std::is_same<T, uint16_t>::value) { const Vec8f srcp = to_float(Vec8i().load_8us(_srcp[i] + x)); sum = mul_add(srcp, weights[i], sum); } else { const Vec8f srcp = Vec8f().load_a(_srcp[i] + x); sum = mul_add(srcp + offset, weights[i], sum); } } sum.stream(dstp + x); } for (int i = 0; i < diameter - 1; i++) _srcp[i] = _srcp[i + 1]; if (y < height - 1 - radius) _srcp[diameter - 1] += srcStride; else if (y > height - 1 - radius) _srcp[diameter - 1] -= srcStride; dstp += dstStride; } delete[] _srcp; }
int main(int argc, char **argv) { jit_int arg1, arg2, arg3; void *args[3]; jit_int result; // Create a context to hold the JIT's primary state. jit_context context; // Create the function object. mul_add_function mul_add(context); // Execute the function and print the result. This will arrange // to call "mul_add_function::build" to build the function's body. arg1 = 3; arg2 = 5; arg3 = 2; args[0] = &arg1; args[1] = &arg2; args[2] = &arg3; mul_add.apply(args, &result); printf("mul_add(3, 5, 2) = %d\n", (int)result); // Execute the function again, to demonstrate that the // on-demand compiler is not invoked a second time. arg1 = 13; arg2 = 5; arg3 = 7; args[0] = &arg1; args[1] = &arg2; args[2] = &arg3; mul_add.apply(args, &result); printf("mul_add(13, 5, 7) = %d\n", (int)result); // Force the function to be recompiled. mul_add.build_start(); mul_add.build(); mul_add.compile(); mul_add.build_end(); // Execute the function a third time, after it is recompiled. arg1 = 2; arg2 = 18; arg3 = -3; args[0] = &arg1; args[1] = &arg2; args[2] = &arg3; mul_add.apply(args, &result); printf("mul_add(2, 18, -3) = %d\n", (int)result); /* Finished */ return 0; }
ticks ECDSA_Timing_Test::measure_critical_function(std::vector<uint8_t> input) { const Botan::BigInt k(input.data(), input.size()); const Botan::BigInt msg(Timing_Test::timing_test_rng(), m_order.bits()); ticks start = get_ticks(); //The following ECDSA operations involve and should not leak any information about k. const Botan::PointGFp k_times_P = m_base_point.blinded_multiply(k, Timing_Test::timing_test_rng()); const Botan::BigInt r = m_mod_order.reduce(k_times_P.get_affine_x()); const Botan::BigInt s = m_mod_order.multiply(inverse_mod(k, m_order), mul_add(m_x, r, msg)); ticks end = get_ticks(); return (end - start); }
ticks ECDSA_Timing_Test::measure_critical_function(std::vector<uint8_t> input) { const Botan::BigInt k(input.data(), input.size()); const Botan::BigInt msg(5); // fixed message to minimize noise ticks start = get_ticks(); //The following ECDSA operations involve and should not leak any information about k. const Botan::BigInt k_inv = Botan::inverse_mod(k, m_group.get_order()); const Botan::PointGFp k_times_P = m_group.blinded_base_point_multiply(k, Timing_Test::timing_test_rng(), m_ws); const Botan::BigInt r = m_group.mod_order(k_times_P.get_affine_x()); const Botan::BigInt s = m_group.multiply_mod_order(k_inv, mul_add(m_x, r, msg)); BOTAN_UNUSED(r, s); ticks end = get_ticks(); return (end - start); }
static void detectEdge(float * blur, float * gradient, unsigned * direction, const int width, const int height, const int stride, const int bgStride, const int mode, const int op) noexcept { float * srcpp = blur; float * srcp = blur; float * srcpn = blur + bgStride; srcp[-1] = srcp[0]; srcp[width] = srcp[width - 1]; for (int y = 0; y < height; y++) { srcpn[-1] = srcpn[0]; srcpn[width] = srcpn[width - 1]; for (int x = 0; x < width; x += 4) { const Vec4f topLeft = Vec4f().load(srcpp + x - 1); const Vec4f top = Vec4f().load_a(srcpp + x); const Vec4f topRight = Vec4f().load(srcpp + x + 1); const Vec4f left = Vec4f().load(srcp + x - 1); const Vec4f right = Vec4f().load(srcp + x + 1); const Vec4f bottomLeft = Vec4f().load(srcpn + x - 1); const Vec4f bottom = Vec4f().load_a(srcpn + x); const Vec4f bottomRight = Vec4f().load(srcpn + x + 1); Vec4f gx, gy; if (op == 0) { gx = right - left; gy = top - bottom; } else if (op == 1) { gx = (topRight + right + bottomRight - topLeft - left - bottomLeft) * 0.5f; gy = (topLeft + top + topRight - bottomLeft - bottom - bottomRight) * 0.5f; } else if (op == 2) { gx = topRight + mul_add(2.f, right, bottomRight) - topLeft - mul_add(2.f, left, bottomLeft); gy = topLeft + mul_add(2.f, top, topRight) - bottomLeft - mul_add(2.f, bottom, bottomRight); } else { gx = mul_add(3.f, topRight, mul_add(10.f, right, 3.f * bottomRight)) - mul_add(3.f, topLeft, mul_add(10.f, left, 3.f * bottomLeft)); gy = mul_add(3.f, topLeft, mul_add(10.f, top, 3.f * topRight)) - mul_add(3.f, bottomLeft, mul_add(10.f, bottom, 3.f * bottomRight)); } sqrt(mul_add(gx, gx, gy * gy)).stream(gradient + x); if (mode == 0) { Vec4f dr = atan2(gy, gx); dr = if_add(dr < 0.f, dr, M_PIF); const Vec4ui bin = Vec4ui(truncate_to_int(mul_add(dr, 4.f * M_1_PIF, 0.5f))); select(bin >= 4, zero_128b(), bin).stream(direction + x); } } srcpp = srcp; srcp = srcpn; if (y < height - 2) srcpn += bgStride; gradient += bgStride; direction += stride; } }
int main(void) { printf("%u\n", mul_add(5, 7, 31)); return 0; }