void  finalize(void) {

    //fprintf(stderr, "FINALIZE %ld %ld %ld\n", distInnie.size(), distOuttie.size(), distSame.size());
    computeStdDev(distInnie,  meanInnie,  sdevInnie);
    computeStdDev(distOuttie, meanOuttie, sdevOuttie);
    computeStdDev(distSame,   meanSame,   sdevSame);
  };
예제 #2
0
static void inline printColdSummary(
        uint64_t /*time_ns*/, const char *name, size_t size, size_t copies, size_t num_buffers,
        double running_avg, double square_avg, double min, double max) {
    printf("  %s %zux%zux%zu bytes average %.2f MB/s std dev %.4f min %.2f MB/s max %.2f MB/s\n",
           name, copies, num_buffers, size, running_avg/1024.0,
           computeStdDev(running_avg, square_avg)/1024.0, min/1024.0, max/1024.0);
}
예제 #3
0
// An implementation of the Pyramidal Lucas-Kanade Optical Flow algorithm.
// See http://robots.stanford.edu/cs223b04/algo_tracking.pdf for details.
bool OpticalFlow::findFlowAtPoint(const float32 u_x, const float32 u_y,
                                  float32* final_x, float32* final_y) const {
  const float32 threshold_squared = square(THRESHOLD);

  // Initial guess.
  float32 g_x = 0.0f;
  float32 g_y = 0.0f;

  // For every level in the pyramid, update the coordinates of the best match.
  for (int32 l = NUM_LEVELS - 1; l >= 0; --l) {
    // Shrink factor from original.
    const int32 shrink_factor = (1 << l);

    // Images I (prev) and J (next).
    const Image<uint8>& img_I = *frame1_->pyramid_[l];
    const Image<uint8>& img_J = *frame2_->pyramid_[l];

    // Computed gradients.
    const Image<int32>& I_x = *frame1_->spatial_x_[l];
    const Image<int32>& I_y = *frame1_->spatial_y_[l];

    // Image position vector (p := u^l), scaled for this level.
    const float32 p_x = u_x / static_cast<float32>(shrink_factor);
    const float32 p_y = u_y / static_cast<float32>(shrink_factor);

    // LOGV("Level %d: (%d, %d) / %d -> (%d, %d)",
    //      l, u_x, u_y, shrink_factor, p_x, p_y);

    // Get values for frame 1.  They remain constant through the inner
    // iteration loop.
    float32 vals_I[ARRAY_SIZE];
    float32 vals_I_x[ARRAY_SIZE];
    float32 vals_I_y[ARRAY_SIZE];

    int32 val_idx = 0;
    for (int32 win_x = -WINDOW_SIZE; win_x <= WINDOW_SIZE; ++win_x) {
      for (int32 win_y = -WINDOW_SIZE; win_y <= WINDOW_SIZE; ++win_y) {
        const float32 x_pos = p_x + win_x;
        const float32 y_pos = p_y + win_y;

        if (!img_I.validInterpPixel(x_pos, y_pos)) {
          return false;
        }

        vals_I[val_idx] = img_I.getPixelInterp(x_pos, y_pos);

        vals_I_x[val_idx] = I_x.getPixelInterp(x_pos, y_pos);
        vals_I_y[val_idx] = I_y.getPixelInterp(x_pos, y_pos);

        ++val_idx;
      }
    }

    // Compute the spatial gradient matrix about point p.
    float32 G[] = { 0, 0, 0, 0 };
    calculateG(vals_I_x, vals_I_y, ARRAY_SIZE, G);

    // Find the inverse of G.
    float32 G_inv[4];
    if (!invert2x2(G, G_inv)) {
      // If we can't invert, hope that the next level will have better luck.
      continue;
    }

#ifdef NORMALIZE
    const float32 mean_I = computeMean(vals_I, ARRAY_SIZE);
    const float32 std_dev_I = computeStdDev(vals_I, ARRAY_SIZE, mean_I);
#endif

    // Iterate NUM_ITERATIONS times or until we converge.
    for (int32 iteration = 0; iteration < NUM_ITERATIONS; ++iteration) {
      // Get values for frame 2.
      float32 vals_J[ARRAY_SIZE];
      int32 val_idx = 0;
      for (int32 win_x = -WINDOW_SIZE; win_x <= WINDOW_SIZE; ++win_x) {
        for (int32 win_y = -WINDOW_SIZE; win_y <= WINDOW_SIZE; ++win_y) {
          const float32 x_pos = p_x + win_x + g_x;
          const float32 y_pos = p_y + win_y + g_y;

          if (!img_I.validInterpPixel(x_pos, y_pos)) {
            return false;
          }

          vals_J[val_idx] = img_J.getPixelInterp(x_pos, y_pos);

          ++val_idx;
        }
      }

#ifdef NORMALIZE
      const float32 mean_J = computeMean(vals_J, ARRAY_SIZE);
      const float32 std_dev_J = computeStdDev(vals_J, ARRAY_SIZE, mean_J);

      const float32 std_dev_ratio = std_dev_I / std_dev_J;
#endif

      // Compute image mismatch vector.
      float32 b_x = 0.0f;
      float32 b_y = 0.0f;
      val_idx = 0;
      for (int32 win_x = -WINDOW_SIZE; win_x <= WINDOW_SIZE; ++win_x) {
        for (int32 win_y = -WINDOW_SIZE; win_y <= WINDOW_SIZE; ++win_y) {
          // Normalized Image difference.

#ifdef NORMALIZE
          const float32 dI = (vals_I[val_idx] - mean_I) -
                             (vals_J[val_idx] - mean_J) * std_dev_ratio;
#else
          const float32 dI = vals_I[val_idx] - vals_J[val_idx];
#endif

          b_x += dI * vals_I_x[val_idx];
          b_y += dI * vals_I_y[val_idx];

          ++val_idx;
        }
      }

      // Optical flow... solve n = G^-1 * b
      const float32 n_x = (G_inv[0] * b_x) + (G_inv[1] * b_y);
      const float32 n_y = (G_inv[2] * b_x) + (G_inv[3] * b_y);

      // Update best guess with residual displacement from this level and
      // iteration.
      g_x += n_x;
      g_y += n_y;

      // LOGV("Iteration %d: delta (%.3f, %.3f)", iteration, n_x, n_y);

      // Abort early if we're already below the threshold.
      if (square(n_x) + square(n_y) < threshold_squared) {
        break;
      }
    }  // Iteration.

    if (l > 0) {
      // Every lower level of the pyramid is 2x as large dimensionally.
      g_x = 2.0f * g_x;
      g_y = 2.0f * g_y;
    }
  }  // Level.

  // LOGV("Final displacement for feature %d was (%.2f, %.2f)",
  //      iFeat, g_x, g_y);

  *final_x = u_x + g_x;
  *final_y = u_y + g_y;

  // Assign the best guess, if we're still in the image.
  if (frame1_->pyramid_[0]->validInterpPixel(*final_x, *final_y)) {
    return true;
  } else {
    return false;
  }
}
예제 #4
0
    MAINLOOP_COLD(name, (cmd_data), size, num_incrs,                          \
                  buf1 = buffer1 + k * buf1_incr;                             \
                  buf2 = buffer2 + k * buf2_incr;                             \
                  for (l = 0; l < num_strides; l++) {                         \
                      BENCH;                                                  \
                      buf1 += buf1_stride_incr;                               \
                      buf2 += buf2_stride_incr;                               \
                  });

int benchmarkSleep(const char* /*name*/, const command_data_t &cmd_data, void_func_t /*func*/) {
    int delay = cmd_data.args[0];
    MAINLOOP(cmd_data, sleep(delay),
             (double)time_ns/NS_PER_SEC,
             printf("sleep(%d) took %.06f seconds\n", delay, avg);,
             printf("  sleep(%d) average %.06f seconds std dev %f min %.06f seconds max %0.6f seconds\n", \
                    delay, running_avg, computeStdDev(square_avg, running_avg), \
                    min, max));

    return 0;
}

int benchmarkMemset(const char *name, const command_data_t &cmd_data, void_func_t func) {
    memset_func_t memset_func = reinterpret_cast<memset_func_t>(func);
    BENCH_ONE_BUF(name, cmd_data, ;, memset_func(buf, i, size));

    return 0;
}

int benchmarkMemsetCold(const char *name, const command_data_t &cmd_data, void_func_t func) {
    memset_func_t memset_func = reinterpret_cast<memset_func_t>(func);
    COLD_ONE_BUF(name, cmd_data, ;, memset_func(buf, l, size));