int main(const int argc, char ** argv) { shmem_init(); #ifdef EXTRA_STATS _timer_t total_time; if(shmem_my_pe() == 0) { printf("\n-----\nmkdir timedrun fake\n\n"); timer_start(&total_time); } #endif init_shmem_sync_array(pSync); char * log_file = parse_params(argc, argv); int err = bucket_sort(); log_times(log_file); #ifdef EXTRA_STATS if(shmem_my_pe() == 0) { just_timer_stop(&total_time); double tTime = ( total_time.stop.tv_sec - total_time.start.tv_sec ) + ( total_time.stop.tv_nsec - total_time.start.tv_nsec )/1E9; avg_time *= 1000; avg_time_all2all *= 1000; printf("\n============================ MMTk Statistics Totals ============================\n"); if(NUM_ITERATIONS == 1) { //TODO: fix time calculation below for more number of iterations printf("time.mu\tt.ATA_KEYS\tt.MAKE_INPUT\tt.COUNT_BUCKET_SIZES\tt.BUCKETIZE\tt.COMPUTE_OFFSETS\tt.LOCAL_SORT\tBARRIER_AT_START\tBARRIER_AT_EXCHANGE\tBARRIER_AT_END\tnWorkers\tnPEs\n"); double TIMES[TIMER_NTIMERS]; memset(TIMES, 0x00, sizeof(double) * TIMER_NTIMERS); for(int i=0; i<NUM_PES; i++) { for(int t = 0; t < TIMER_NTIMERS; ++t){ if(timers[t].all_times != NULL){ TIMES[t] += timers[t].all_times[i]; } } } for(int t = 0; t < TIMER_NTIMERS; ++t){ printf("%.3f\t", (TIMES[t]/NUM_PES)*1000); } printf("1\t%d\n",NUM_PES); printf("Total time: %.3f\n",(TIMES[0]/NUM_PES)*1000); } else { printf("time.mu\ttimeAll2All\tnWorkers\tnPEs\n"); printf("%.3f\t%.3f\t1\t%d\n",avg_time,avg_time_all2all,NUM_PES); printf("Total time: %.3f\n",avg_time); } printf("------------------------------ End MMTk Statistics -----------------------------\n"); printf("===== TEST PASSED in %.3f msec =====\n",(tTime*1000)); } #endif shmem_finalize(); return err; }
DynVolNN::~DynVolNN() { log_times(); }
DetectionSet DynVolNN::detect(const ImRGBZ&im,DetectionFilter filter) const { SphericalOccupancyMap SOM(im); vector<MatchPacket> packets(templates.size()); TaskBlock proc_templates("proc_templates"); tbb::concurrent_unordered_set<size_t> checked_templates; for(int iter = 0; iter < templates.size(); ++iter) { proc_templates.add_callee([&,iter]() { Timer timer; timer.tic(); const DynVolTempl&t = templates.at(iter); packets.at(iter) = MatchPacket(SOM,t,[&](const Mat&t) { size_t hash = hash_code(t); auto r = checked_templates.insert(hash); if(!r.second) { cout << "template duplicate skipped! " << hash << endl; } return !r.second; }); long interval = timer.toc(); lock_guard<mutex> l(monitor); performance_times[(t.z_min - min_z)/50] += interval; performance_counts[(t.z_min - min_z)/50] ++; }); } proc_templates.execute(); log_file << safe_printf("info: % checked among % templates",checked_templates.size(),templates.size()) << endl; std::sort(packets.begin(),packets.end()); for(int iter = 0; iter < 1; ++iter, iter *= 2) { string fn = im.filename; for(char&c : fn) if(c == '/') c = '_'; packets.at(iter).log(safe_printf("packet_[%]_[%]_",fn,iter),SOM); } log_times(); DetectionSet all_dets; TaskBlock take_dets("take_dets"); int stride = 1; for(int yIter = stride/2; yIter < im.rows(); yIter += stride) take_dets.add_callee([&,yIter]() { for(int xIter = stride/2; xIter < im.cols(); xIter += stride) { float max_resp = -inf; Rect max_bb; for(auto && packet : packets) { if(packet.bb.size().area() <= 0) continue; if(yIter < packet.r.rows && xIter < packet.r.cols) { float resp = packet.r.at<float>(yIter,xIter); if(resp > max_resp) { max_resp = resp; max_bb = Rect(Point(xIter,yIter),packet.bb.size()); } } }//end for packet auto det = make_shared<Detection>(); det->BB = max_bb; det->resp = max_resp; static mutex m; lock_guard<mutex> l(m); all_dets.push_back(det); }// end for xIter }); take_dets.execute(); all_dets = sort(all_dets); return (all_dets); }