C++ (Cpp) mul_add 예제들

예제 #1

0

파일 보기

파일: generic.c 프로젝트: thejpster/ring

BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
                          BN_ULONG w) {
  BN_ULONG c1 = 0;

  assert(num >= 0);
  if (num <= 0) {
    return c1;
  }

  while (num & ~3) {
    mul_add(rp[0], ap[0], w, c1);
    mul_add(rp[1], ap[1], w, c1);
    mul_add(rp[2], ap[2], w, c1);
    mul_add(rp[3], ap[3], w, c1);
    ap += 4;
    rp += 4;
    num -= 4;
  }

  while (num) {
    mul_add(rp[0], ap[0], w, c1);
    ap++;
    rp++;
    num--;
  }

  return c1;
}

예제 #2

0

파일 보기

파일: TCanny_SSE2.cpp 프로젝트: HomeOfVapourSynthEvolution/VapourSynth-TCanny

static void gaussianBlur(const T * __srcp, float * temp, float * dstp, const float * weightsH, const float * weightsV, const int width, const int height,
                         const int srcStride, const int dstStride, const int radiusH, const int radiusV, const float offset) noexcept {
    const int diameter = radiusV * 2 + 1;
    const T ** _srcp = new const T *[diameter];

    _srcp[radiusV] = __srcp;
    for (int i = 1; i <= radiusV; i++) {
        _srcp[radiusV - i] = _srcp[radiusV - 1 + i];
        _srcp[radiusV + i] = _srcp[radiusV] + srcStride * i;
    }

    weightsH += radiusH;

    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x += 4) {
            Vec4f sum = zero_4f();

            for (int i = 0; i < diameter; i++) {
                if (std::is_same<T, uint8_t>::value) {
                    const Vec4f srcp = to_float(Vec4i().load_4uc(_srcp[i] + x));
                    sum = mul_add(srcp, weightsV[i], sum);
                } else if (std::is_same<T, uint16_t>::value) {
                    const Vec4f srcp = to_float(Vec4i().load_4us(_srcp[i] + x));
                    sum = mul_add(srcp, weightsV[i], sum);
                } else {
                    const Vec4f srcp = Vec4f().load_a(_srcp[i] + x);
                    sum = mul_add(srcp + offset, weightsV[i], sum);
                }
            }

            sum.store_a(temp + x);
        }

        for (int i = 1; i <= radiusH; i++) {
            temp[-i] = temp[-1 + i];
            temp[width - 1 + i] = temp[width - i];
        }

        for (int x = 0; x < width; x += 4) {
            Vec4f sum = zero_4f();

            for (int i = -radiusH; i <= radiusH; i++) {
                const Vec4f srcp = Vec4f().load(temp + x + i);
                sum = mul_add(srcp, weightsH[i], sum);
            }

            sum.stream(dstp + x);
        }

        for (int i = 0; i < diameter - 1; i++)
            _srcp[i] = _srcp[i + 1];
        if (y < height - 1 - radiusV)
            _srcp[diameter - 1] += srcStride;
        else if (y > height - 1 - radiusV)
            _srcp[diameter - 1] -= srcStride;
        dstp += dstStride;
    }

    delete[] _srcp;
}

예제 #3

0

파일 보기

파일: TCanny_SSE2.cpp 프로젝트: HomeOfVapourSynthEvolution/VapourSynth-TCanny

void discretizeGM(const float * _srcp, uint16_t * dstp, const int width, const int height, const int srcStride, const int dstStride,
                  const float magnitude, const uint16_t peak, const float offset) noexcept {
    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x += 8) {
            const Vec4f srcp_4f_0 = Vec4f().load_a(_srcp + x);
            const Vec4f srcp_4f_1 = Vec4f().load_a(_srcp + x + 4);
            const Vec4i srcp_4i_0 = truncate_to_int(mul_add(srcp_4f_0, magnitude, 0.5f));
            const Vec4i srcp_4i_1 = truncate_to_int(mul_add(srcp_4f_1, magnitude, 0.5f));
            const Vec8us srcp = compress_saturated_s2u(srcp_4i_0, srcp_4i_1);
            min(srcp, peak).stream(dstp + x);
        }

        _srcp += srcStride;
        dstp += dstStride;
    }
}

예제 #4

0

파일 보기

파일: TCanny_SSE2.cpp 프로젝트: HomeOfVapourSynthEvolution/VapourSynth-TCanny

static void gaussianBlurH(const T * _srcp, float * temp, float * dstp, const float * weights, const int width, const int height,
                          const int srcStride, const int dstStride, const int radius, const float offset) noexcept {
    weights += radius;

    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x += 4) {
            if (std::is_same<T, uint8_t>::value)
                to_float(Vec4i().load_4uc(_srcp + x)).store_a(temp + x);
            else if (std::is_same<T, uint16_t>::value)
                to_float(Vec4i().load_4us(_srcp + x)).store_a(temp + x);
            else
                (Vec4f().load_a(_srcp + x) + offset).store_a(temp + x);
        }

        for (int i = 1; i <= radius; i++) {
            temp[-i] = temp[-1 + i];
            temp[width - 1 + i] = temp[width - i];
        }

        for (int x = 0; x < width; x += 4) {
            Vec4f sum = zero_4f();

            for (int i = -radius; i <= radius; i++) {
                const Vec4f srcp = Vec4f().load(temp + x + i);
                sum = mul_add(srcp, weights[i], sum);
            }

            sum.stream(dstp + x);
        }

        _srcp += srcStride;
        dstp += dstStride;
    }
}

예제 #5

0

파일 보기

파일: model_based_opt.cpp 프로젝트: ttsvetanov/z3

    // a1 < 0, a2 < 0, a3 < 0, a4 > 0
    // x >= t2/a2
    // x >= t3/a3
    // determine glb among these
    // the resolve glb with others.
    // e.g. t2/a2 >= t3/a3
    // then replace a3*x + t3 by t3/a3 - t2/a2 <= 0
    // 
    bound_type model_based_opt::maximize(rational& value) {
        SASSERT(invariant());
        unsigned_vector other;
        while (!objective().m_vars.empty()) {
            TRACE("opt", tout << "tableau\n";);
            var v = objective().m_vars.back();
            unsigned x = v.m_id;
            rational const& coeff = v.m_coeff;
            unsigned bound_row_index;
            rational bound_coeff;
            other.reset();
            if (find_bound(x, bound_row_index, bound_coeff, other, coeff.is_pos())) {
                SASSERT(!bound_coeff.is_zero());
                for (unsigned i = 0; i < other.size(); ++i) {
                    resolve(bound_row_index, bound_coeff, other[i], x);
                }
                // coeff*x + objective <= ub
                // a2*x + t2 <= 0
                // => coeff*x <= -t2*coeff/a2
                // objective + t2*coeff/a2 <= ub

                mul_add(m_objective_id, - coeff/bound_coeff, bound_row_index);
                m_rows[bound_row_index].m_alive = false;
            }
            else {
                return unbounded;
            }
        }

예제 #6

0

파일 보기

파일: dsa.cpp 프로젝트: BenjaminSchiborr/safe

SecureVector<byte>
DSA_Signature_Operation::sign(const byte msg[], size_t msg_len,
                              RandomNumberGenerator& rng)
   {
   rng.add_entropy(msg, msg_len);

   BigInt i(msg, msg_len);
   BigInt r = 0, s = 0;

   while(r == 0 || s == 0)
      {
      BigInt k;
      do
         k.randomize(rng, q.bits());
      while(k >= q);

      r = mod_q.reduce(powermod_g_p(k));
      s = mod_q.multiply(inverse_mod(k, q), mul_add(x, r, i));
      }

   SecureVector<byte> output(2*q.bytes());
   r.binary_encode(&output[output.size() / 2 - r.bytes()]);
   s.binary_encode(&output[output.size() - s.bytes()]);
   return output;
   }

예제 #7

0

파일 보기

파일: ecdsa.cpp 프로젝트: BenjaminSchiborr/safe

SecureVector<byte>
ECDSA_Signature_Operation::sign(const byte msg[], size_t msg_len,
                                RandomNumberGenerator& rng)
   {
   rng.add_entropy(msg, msg_len);

   BigInt m(msg, msg_len);

   BigInt r = 0, s = 0;

   while(r == 0 || s == 0)
      {
      // This contortion is necessary for the tests
      BigInt k;
      k.randomize(rng, order.bits());

      while(k >= order)
         k.randomize(rng, order.bits() - 1);

      PointGFp k_times_P = base_point * k;
      r = mod_order.reduce(k_times_P.get_affine_x());
      s = mod_order.multiply(inverse_mod(k, order), mul_add(x, r, m));
      }

   SecureVector<byte> output(2*order.bytes());
   r.binary_encode(&output[output.size() / 2 - r.bytes()]);
   s.binary_encode(&output[output.size() - s.bytes()]);
   return output;
   }

예제 #8

0

파일 보기

파일: x86_64-gcc.c 프로젝트: AxiomaAbsurdo/time_web_app

BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
                          BN_ULONG w) {
  BN_ULONG c1 = 0;

  if (num == 0) {
    return (c1);
  }

  while (num & ~3) {
    mul_add(rp[0], ap[0], w, c1);
    mul_add(rp[1], ap[1], w, c1);
    mul_add(rp[2], ap[2], w, c1);
    mul_add(rp[3], ap[3], w, c1);
    ap += 4;
    rp += 4;
    num -= 4;
  }
  if (num) {
    mul_add(rp[0], ap[0], w, c1);
    if (--num == 0) {
      return c1;
    }
    mul_add(rp[1], ap[1], w, c1);
    if (--num == 0) {
      return c1;
    }
    mul_add(rp[2], ap[2], w, c1);
    return c1;
  }

  return c1;
}

예제 #9

0

파일 보기

파일: TCanny_AVX.cpp 프로젝트: HomeOfVapourSynthEvolution/VapourSynth-TCanny

void discretizeGM(const float * _srcp, uint8_t * dstp, const int width, const int height, const int srcStride, const int dstStride,
                  const float magnitude, const uint16_t peak, const float offset) noexcept {
    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x += 16) {
            const Vec8f srcp_8f_0 = Vec8f().load_a(_srcp + x);
            const Vec8f srcp_8f_1 = Vec8f().load_a(_srcp + x + 8);
            const Vec8i srcp_8i_0 = truncate_to_int(mul_add(srcp_8f_0, magnitude, 0.5f));
            const Vec8i srcp_8i_1 = truncate_to_int(mul_add(srcp_8f_1, magnitude, 0.5f));
            const Vec8s srcp_8s_0 = compress_saturated(srcp_8i_0.get_low(), srcp_8i_0.get_high());
            const Vec8s srcp_8s_1 = compress_saturated(srcp_8i_1.get_low(), srcp_8i_1.get_high());
            const Vec16uc srcp = compress_saturated_s2u(srcp_8s_0, srcp_8s_1);
            srcp.stream(dstp + x);
        }

        _srcp += srcStride;
        dstp += dstStride;
    }
}

예제 #10

0

파일 보기

파일: TCanny_AVX.cpp 프로젝트: HomeOfVapourSynthEvolution/VapourSynth-TCanny

static void gaussianBlurV(const T * __srcp, float * dstp, const float * weights, const int width, const int height, const int srcStride, const int dstStride,
                          const int radius, const float offset) noexcept {
    const int diameter = radius * 2 + 1;
    const T ** _srcp = new const T *[diameter];

    _srcp[radius] = __srcp;
    for (int i = 1; i <= radius; i++) {
        _srcp[radius - i] = _srcp[radius - 1 + i];
        _srcp[radius + i] = _srcp[radius] + srcStride * i;
    }

    for (int y = 0; y < height; y++) {
        for (int x = 0; x < width; x += 8) {
            Vec8f sum = zero_8f();

            for (int i = 0; i < diameter; i++) {
                if (std::is_same<T, uint8_t>::value) {
                    const Vec8f srcp = to_float(Vec8i().load_8uc(_srcp[i] + x));
                    sum = mul_add(srcp, weights[i], sum);
                } else if (std::is_same<T, uint16_t>::value) {
                    const Vec8f srcp = to_float(Vec8i().load_8us(_srcp[i] + x));
                    sum = mul_add(srcp, weights[i], sum);
                } else {
                    const Vec8f srcp = Vec8f().load_a(_srcp[i] + x);
                    sum = mul_add(srcp + offset, weights[i], sum);
                }
            }

            sum.stream(dstp + x);
        }

        for (int i = 0; i < diameter - 1; i++)
            _srcp[i] = _srcp[i + 1];
        if (y < height - 1 - radius)
            _srcp[diameter - 1] += srcStride;
        else if (y > height - 1 - radius)
            _srcp[diameter - 1] -= srcStride;
        dstp += dstStride;
    }

    delete[] _srcp;
}

예제 #11

0

파일 보기

파일: t4.cpp 프로젝트: 8l/lllm

int main(int argc, char **argv)
{
	jit_int arg1, arg2, arg3;
	void *args[3];
	jit_int result;

	// Create a context to hold the JIT's primary state.
	jit_context context;

	// Create the function object.
	mul_add_function mul_add(context);

	// Execute the function and print the result.  This will arrange
	// to call "mul_add_function::build" to build the function's body.
	arg1 = 3;
	arg2 = 5;
	arg3 = 2;
	args[0] = &arg1;
	args[1] = &arg2;
	args[2] = &arg3;
	mul_add.apply(args, &result);
	printf("mul_add(3, 5, 2) = %d\n", (int)result);

	// Execute the function again, to demonstrate that the
	// on-demand compiler is not invoked a second time.
	arg1 = 13;
	arg2 = 5;
	arg3 = 7;
	args[0] = &arg1;
	args[1] = &arg2;
	args[2] = &arg3;
	mul_add.apply(args, &result);
	printf("mul_add(13, 5, 7) = %d\n", (int)result);

	// Force the function to be recompiled.
	mul_add.build_start();
	mul_add.build();
	mul_add.compile();
	mul_add.build_end();

	// Execute the function a third time, after it is recompiled.
	arg1 = 2;
	arg2 = 18;
	arg3 = -3;
	args[0] = &arg1;
	args[1] = &arg2;
	args[2] = &arg3;
	mul_add.apply(args, &result);
	printf("mul_add(2, 18, -3) = %d\n", (int)result);

	/* Finished */
	return 0;
}

예제 #12

0

파일 보기

파일: timing_tests.cpp 프로젝트: lanurmi/botan

ticks ECDSA_Timing_Test::measure_critical_function(std::vector<uint8_t> input)
   {
   const Botan::BigInt k(input.data(), input.size());
   const Botan::BigInt msg(Timing_Test::timing_test_rng(), m_order.bits());

   ticks start = get_ticks();

   //The following ECDSA operations involve and should not leak any information about k.
   const Botan::PointGFp k_times_P = m_base_point.blinded_multiply(k, Timing_Test::timing_test_rng());
   const Botan::BigInt r = m_mod_order.reduce(k_times_P.get_affine_x());
   const Botan::BigInt s = m_mod_order.multiply(inverse_mod(k, m_order), mul_add(m_x, r, msg));

   ticks end = get_ticks();

   return (end - start);
   }

예제 #13

0

파일 보기

파일: timing_tests.cpp 프로젝트: noloader/botan

ticks ECDSA_Timing_Test::measure_critical_function(std::vector<uint8_t> input)
   {
   const Botan::BigInt k(input.data(), input.size());
   const Botan::BigInt msg(5); // fixed message to minimize noise

   ticks start = get_ticks();

   //The following ECDSA operations involve and should not leak any information about k.

   const Botan::BigInt k_inv = Botan::inverse_mod(k, m_group.get_order());
   const Botan::PointGFp k_times_P = m_group.blinded_base_point_multiply(k, Timing_Test::timing_test_rng(), m_ws);
   const Botan::BigInt r = m_group.mod_order(k_times_P.get_affine_x());
   const Botan::BigInt s = m_group.multiply_mod_order(k_inv, mul_add(m_x, r, msg));

   BOTAN_UNUSED(r, s);

   ticks end = get_ticks();

   return (end - start);
   }

예제 #14

0

파일 보기

파일: TCanny_SSE2.cpp 프로젝트: HomeOfVapourSynthEvolution/VapourSynth-TCanny

static void detectEdge(float * blur, float * gradient, unsigned * direction, const int width, const int height, const int stride, const int bgStride,
                       const int mode, const int op) noexcept {
    float * srcpp = blur;
    float * srcp = blur;
    float * srcpn = blur + bgStride;

    srcp[-1] = srcp[0];
    srcp[width] = srcp[width - 1];

    for (int y = 0; y < height; y++) {
        srcpn[-1] = srcpn[0];
        srcpn[width] = srcpn[width - 1];

        for (int x = 0; x < width; x += 4) {
            const Vec4f topLeft = Vec4f().load(srcpp + x - 1);
            const Vec4f top = Vec4f().load_a(srcpp + x);
            const Vec4f topRight = Vec4f().load(srcpp + x + 1);
            const Vec4f left = Vec4f().load(srcp + x - 1);
            const Vec4f right = Vec4f().load(srcp + x + 1);
            const Vec4f bottomLeft = Vec4f().load(srcpn + x - 1);
            const Vec4f bottom = Vec4f().load_a(srcpn + x);
            const Vec4f bottomRight = Vec4f().load(srcpn + x + 1);

            Vec4f gx, gy;

            if (op == 0) {
                gx = right - left;
                gy = top - bottom;
            } else if (op == 1) {
                gx = (topRight + right + bottomRight - topLeft - left - bottomLeft) * 0.5f;
                gy = (topLeft + top + topRight - bottomLeft - bottom - bottomRight) * 0.5f;
            } else if (op == 2) {
                gx = topRight + mul_add(2.f, right, bottomRight) - topLeft - mul_add(2.f, left, bottomLeft);
                gy = topLeft + mul_add(2.f, top, topRight) - bottomLeft - mul_add(2.f, bottom, bottomRight);
            } else {
                gx = mul_add(3.f, topRight, mul_add(10.f, right, 3.f * bottomRight)) - mul_add(3.f, topLeft, mul_add(10.f, left, 3.f * bottomLeft));
                gy = mul_add(3.f, topLeft, mul_add(10.f, top, 3.f * topRight)) - mul_add(3.f, bottomLeft, mul_add(10.f, bottom, 3.f * bottomRight));
            }

            sqrt(mul_add(gx, gx, gy * gy)).stream(gradient + x);

            if (mode == 0) {
                Vec4f dr = atan2(gy, gx);
                dr = if_add(dr < 0.f, dr, M_PIF);

                const Vec4ui bin = Vec4ui(truncate_to_int(mul_add(dr, 4.f * M_1_PIF, 0.5f)));
                select(bin >= 4, zero_128b(), bin).stream(direction + x);
            }
        }

        srcpp = srcp;
        srcp = srcpn;
        if (y < height - 2)
            srcpn += bgStride;
        gradient += bgStride;
        direction += stride;
    }
}

예제 #15

0

파일 보기

파일: mla.c 프로젝트: MatzeB/firm-testsuite

int main(void)
{
	printf("%u\n", mul_add(5, 7, 31));
	return 0;
}