void finalize(void) { //fprintf(stderr, "FINALIZE %ld %ld %ld\n", distInnie.size(), distOuttie.size(), distSame.size()); computeStdDev(distInnie, meanInnie, sdevInnie); computeStdDev(distOuttie, meanOuttie, sdevOuttie); computeStdDev(distSame, meanSame, sdevSame); };
static void inline printColdSummary( uint64_t /*time_ns*/, const char *name, size_t size, size_t copies, size_t num_buffers, double running_avg, double square_avg, double min, double max) { printf(" %s %zux%zux%zu bytes average %.2f MB/s std dev %.4f min %.2f MB/s max %.2f MB/s\n", name, copies, num_buffers, size, running_avg/1024.0, computeStdDev(running_avg, square_avg)/1024.0, min/1024.0, max/1024.0); }
// An implementation of the Pyramidal Lucas-Kanade Optical Flow algorithm. // See http://robots.stanford.edu/cs223b04/algo_tracking.pdf for details. bool OpticalFlow::findFlowAtPoint(const float32 u_x, const float32 u_y, float32* final_x, float32* final_y) const { const float32 threshold_squared = square(THRESHOLD); // Initial guess. float32 g_x = 0.0f; float32 g_y = 0.0f; // For every level in the pyramid, update the coordinates of the best match. for (int32 l = NUM_LEVELS - 1; l >= 0; --l) { // Shrink factor from original. const int32 shrink_factor = (1 << l); // Images I (prev) and J (next). const Image<uint8>& img_I = *frame1_->pyramid_[l]; const Image<uint8>& img_J = *frame2_->pyramid_[l]; // Computed gradients. const Image<int32>& I_x = *frame1_->spatial_x_[l]; const Image<int32>& I_y = *frame1_->spatial_y_[l]; // Image position vector (p := u^l), scaled for this level. const float32 p_x = u_x / static_cast<float32>(shrink_factor); const float32 p_y = u_y / static_cast<float32>(shrink_factor); // LOGV("Level %d: (%d, %d) / %d -> (%d, %d)", // l, u_x, u_y, shrink_factor, p_x, p_y); // Get values for frame 1. They remain constant through the inner // iteration loop. float32 vals_I[ARRAY_SIZE]; float32 vals_I_x[ARRAY_SIZE]; float32 vals_I_y[ARRAY_SIZE]; int32 val_idx = 0; for (int32 win_x = -WINDOW_SIZE; win_x <= WINDOW_SIZE; ++win_x) { for (int32 win_y = -WINDOW_SIZE; win_y <= WINDOW_SIZE; ++win_y) { const float32 x_pos = p_x + win_x; const float32 y_pos = p_y + win_y; if (!img_I.validInterpPixel(x_pos, y_pos)) { return false; } vals_I[val_idx] = img_I.getPixelInterp(x_pos, y_pos); vals_I_x[val_idx] = I_x.getPixelInterp(x_pos, y_pos); vals_I_y[val_idx] = I_y.getPixelInterp(x_pos, y_pos); ++val_idx; } } // Compute the spatial gradient matrix about point p. float32 G[] = { 0, 0, 0, 0 }; calculateG(vals_I_x, vals_I_y, ARRAY_SIZE, G); // Find the inverse of G. float32 G_inv[4]; if (!invert2x2(G, G_inv)) { // If we can't invert, hope that the next level will have better luck. continue; } #ifdef NORMALIZE const float32 mean_I = computeMean(vals_I, ARRAY_SIZE); const float32 std_dev_I = computeStdDev(vals_I, ARRAY_SIZE, mean_I); #endif // Iterate NUM_ITERATIONS times or until we converge. for (int32 iteration = 0; iteration < NUM_ITERATIONS; ++iteration) { // Get values for frame 2. float32 vals_J[ARRAY_SIZE]; int32 val_idx = 0; for (int32 win_x = -WINDOW_SIZE; win_x <= WINDOW_SIZE; ++win_x) { for (int32 win_y = -WINDOW_SIZE; win_y <= WINDOW_SIZE; ++win_y) { const float32 x_pos = p_x + win_x + g_x; const float32 y_pos = p_y + win_y + g_y; if (!img_I.validInterpPixel(x_pos, y_pos)) { return false; } vals_J[val_idx] = img_J.getPixelInterp(x_pos, y_pos); ++val_idx; } } #ifdef NORMALIZE const float32 mean_J = computeMean(vals_J, ARRAY_SIZE); const float32 std_dev_J = computeStdDev(vals_J, ARRAY_SIZE, mean_J); const float32 std_dev_ratio = std_dev_I / std_dev_J; #endif // Compute image mismatch vector. float32 b_x = 0.0f; float32 b_y = 0.0f; val_idx = 0; for (int32 win_x = -WINDOW_SIZE; win_x <= WINDOW_SIZE; ++win_x) { for (int32 win_y = -WINDOW_SIZE; win_y <= WINDOW_SIZE; ++win_y) { // Normalized Image difference. #ifdef NORMALIZE const float32 dI = (vals_I[val_idx] - mean_I) - (vals_J[val_idx] - mean_J) * std_dev_ratio; #else const float32 dI = vals_I[val_idx] - vals_J[val_idx]; #endif b_x += dI * vals_I_x[val_idx]; b_y += dI * vals_I_y[val_idx]; ++val_idx; } } // Optical flow... solve n = G^-1 * b const float32 n_x = (G_inv[0] * b_x) + (G_inv[1] * b_y); const float32 n_y = (G_inv[2] * b_x) + (G_inv[3] * b_y); // Update best guess with residual displacement from this level and // iteration. g_x += n_x; g_y += n_y; // LOGV("Iteration %d: delta (%.3f, %.3f)", iteration, n_x, n_y); // Abort early if we're already below the threshold. if (square(n_x) + square(n_y) < threshold_squared) { break; } } // Iteration. if (l > 0) { // Every lower level of the pyramid is 2x as large dimensionally. g_x = 2.0f * g_x; g_y = 2.0f * g_y; } } // Level. // LOGV("Final displacement for feature %d was (%.2f, %.2f)", // iFeat, g_x, g_y); *final_x = u_x + g_x; *final_y = u_y + g_y; // Assign the best guess, if we're still in the image. if (frame1_->pyramid_[0]->validInterpPixel(*final_x, *final_y)) { return true; } else { return false; } }
MAINLOOP_COLD(name, (cmd_data), size, num_incrs, \ buf1 = buffer1 + k * buf1_incr; \ buf2 = buffer2 + k * buf2_incr; \ for (l = 0; l < num_strides; l++) { \ BENCH; \ buf1 += buf1_stride_incr; \ buf2 += buf2_stride_incr; \ }); int benchmarkSleep(const char* /*name*/, const command_data_t &cmd_data, void_func_t /*func*/) { int delay = cmd_data.args[0]; MAINLOOP(cmd_data, sleep(delay), (double)time_ns/NS_PER_SEC, printf("sleep(%d) took %.06f seconds\n", delay, avg);, printf(" sleep(%d) average %.06f seconds std dev %f min %.06f seconds max %0.6f seconds\n", \ delay, running_avg, computeStdDev(square_avg, running_avg), \ min, max)); return 0; } int benchmarkMemset(const char *name, const command_data_t &cmd_data, void_func_t func) { memset_func_t memset_func = reinterpret_cast<memset_func_t>(func); BENCH_ONE_BUF(name, cmd_data, ;, memset_func(buf, i, size)); return 0; } int benchmarkMemsetCold(const char *name, const command_data_t &cmd_data, void_func_t func) { memset_func_t memset_func = reinterpret_cast<memset_func_t>(func); COLD_ONE_BUF(name, cmd_data, ;, memset_func(buf, l, size));