Exemplo n.º 1
0
static void VFilter8iSSE2(uint8_t* u, uint8_t* v, int stride,
                          int thresh, int ithresh, int hev_thresh) {
  __m128i mask;
  __m128i t1, t2, p1, p0, q0, q1;

  // Load p3, p2, p1, p0
  LOADUV_H_EDGES4(u, v, stride, t2, t1, p1, p0);
  MAX_DIFF1(t2, t1, p1, p0, mask);

  u += 4 * stride;
  v += 4 * stride;

  // Load q0, q1, q2, q3
  LOADUV_H_EDGES4(u, v, stride, q0, q1, t1, t2);
  MAX_DIFF2(t2, t1, q1, q0, mask);

  COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
  DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);

  // Store
  STOREUV(p1, u, v, -2 * stride);
  STOREUV(p0, u, v, -1 * stride);
  STOREUV(q0, u, v, 0 * stride);
  STOREUV(q1, u, v, 1 * stride);
}
Exemplo n.º 2
0
static void HFilter16iSSE2(uint8_t* p, int stride,
                           int thresh, int ithresh, int hev_thresh) {
  int k;
  uint8_t* b;
  __m128i mask;
  __m128i t1, t2, p1, p0, q0, q1;

  for (k = 3; k > 0; --k) {
    b = p;
    Load16x4(b, b + 8 * stride, stride, &t2, &t1, &p1, &p0);  // p3, p2, p1, p0
    MAX_DIFF1(t2, t1, p1, p0, mask);

    b += 4;  // beginning of q0
    Load16x4(b, b + 8 * stride, stride, &q0, &q1, &t1, &t2);  // q0, q1, q2, q3
    MAX_DIFF2(t2, t1, q1, q0, mask);

    COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
    DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);

    b -= 2;  // beginning of p1
    Store16x4(b, b + 8 * stride, stride, &p1, &p0, &q0, &q1);

    p += 4;
  }
}
Exemplo n.º 3
0
// on three inner edges
static void VFilter16iSSE2(uint8_t* p, int stride,
                           int thresh, int ithresh, int hev_thresh) {
  int k;
  __m128i mask;
  __m128i t1, t2, p1, p0, q0, q1;

  for (k = 3; k > 0; --k) {
    // Load p3, p2, p1, p0
    LOAD_H_EDGES4(p, stride, t2, t1, p1, p0);
    MAX_DIFF1(t2, t1, p1, p0, mask);

    p += 4 * stride;

    // Load q0, q1, q2, q3
    LOAD_H_EDGES4(p, stride, q0, q1, t1, t2);
    MAX_DIFF2(t2, t1, q1, q0, mask);

    COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
    DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);

    // Store
    _mm_storeu_si128((__m128i*)&p[-2 * stride], p1);
    _mm_storeu_si128((__m128i*)&p[-1 * stride], p0);
    _mm_storeu_si128((__m128i*)&p[0 * stride], q0);
    _mm_storeu_si128((__m128i*)&p[1 * stride], q1);
  }
}
Exemplo n.º 4
0
static void HFilter16i(uint8_t* p, int stride,
                       int thresh, int ithresh, int hev_thresh) {
    int k;
    __m128i p3, p2, p1, p0;   // loop invariants

    Load16x4(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0);  // prologue

    for (k = 3; k > 0; --k) {
        __m128i mask, tmp1, tmp2;
        uint8_t* const b = p + 2;   // beginning of p1

        p += 4;  // beginning of q0 (and next span)

        MAX_DIFF1(p3, p2, p1, p0, mask);   // compute partial mask
        Load16x4(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
        MAX_DIFF2(p3, p2, tmp1, tmp2, mask);

        ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
        DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);

        Store16x4(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);

        // rotate samples
        p1 = tmp1;
        p0 = tmp2;
    }
}
Exemplo n.º 5
0
// on three inner edges
static void VFilter16i(uint8_t* p, int stride,
                       int thresh, int ithresh, int hev_thresh) {
    int k;
    __m128i p3, p2, p1, p0;   // loop invariants

    LOAD_H_EDGES4(p, stride, p3, p2, p1, p0);  // prologue

    for (k = 3; k > 0; --k) {
        __m128i mask, tmp1, tmp2;
        uint8_t* const b = p + 2 * stride;   // beginning of p1
        p += 4 * stride;

        MAX_DIFF1(p3, p2, p1, p0, mask);   // compute partial mask
        LOAD_H_EDGES4(p, stride, p3, p2, tmp1, tmp2);
        MAX_DIFF2(p3, p2, tmp1, tmp2, mask);

        // p3 and p2 are not just temporary variables here: they will be
        // re-used for next span. And q2/q3 will become p1/p0 accordingly.
        ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
        DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);

        // Store
        _mm_storeu_si128((__m128i*)&b[0 * stride], p1);
        _mm_storeu_si128((__m128i*)&b[1 * stride], p0);
        _mm_storeu_si128((__m128i*)&b[2 * stride], p3);
        _mm_storeu_si128((__m128i*)&b[3 * stride], p2);

        // rotate samples
        p1 = tmp1;
        p0 = tmp2;
    }
}
Exemplo n.º 6
0
static void HFilter8iSSE2(uint8_t* u, uint8_t* v, int stride,
                          int thresh, int ithresh, int hev_thresh) {
    __m128i mask;
    __m128i t1, t2, p1, p0, q0, q1;
    Load16x4(u, v, stride, &t2, &t1, &p1, &p0);   // p3, p2, p1, p0
    MAX_DIFF1(t2, t1, p1, p0, mask);

    u += 4;  // beginning of q0
    v += 4;
    Load16x4(u, v, stride, &q0, &q1, &t1, &t2);  // q0, q1, q2, q3
    MAX_DIFF2(t2, t1, q1, q0, mask);

    COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
    DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);

    u -= 2;  // beginning of p1
    v -= 2;
    Store16x4(u, v, stride, &p1, &p0, &q0, &q1);
}