// initialize the group class to have sufficient space such that the largest nodeNumber can still be stored FAGeneral::FAGeneral(double* x,int K, double* ngroup, NumericVector xv, std::string weights, double gamma, NumericMatrix W, int mxSize, bool verb, double eps):K(K),Weights(K,vector<double>(K)),graphsize(K){ verbose = verb; mxSplitSize = mxSize; epsilon = eps; // calculate the weights we need calculateSlope(x,ngroup,xv,weights,gamma,W); calculateWeights(x,ngroup,xv,weights,gamma,W); // print_weights(); if (verbose){ Rprintf("Graph Construction \n"); } myGraph.Construct(K,Weights,epsilon); // myGraph.printGraph(cout); // set groups, prefuse and set events initializeGroups(&x[0],&ngroup[0]); initializeEvents(); // print_groups(); // print_events(); run(); }
void runbench(double *cd, long size){ if( memory_ratio>UNROLL_ITERATIONS ){ fprintf(stderr, "ERROR: memory_ratio exceeds UNROLL_ITERATIONS\n"); exit(1); } const long compute_grid_size = size/(UNROLLED_MEMORY_ACCESSES)/2; const int BLOCK_SIZE = 256; const int TOTAL_BLOCKS = compute_grid_size/BLOCK_SIZE; const long long computations = 2*(long long)(COMP_ITERATIONS)*REGBLOCK_SIZE*compute_grid_size; const long long memoryoperations = (long long)(COMP_ITERATIONS)*compute_grid_size; dim3 dimBlock(BLOCK_SIZE, 1, 1); dim3 dimGrid(TOTAL_BLOCKS, 1, 1); hipEvent_t start, stop; initializeEvents(&start, &stop); hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< float, BLOCK_SIZE, memory_ratio >), dim3(dimGrid), dim3(dimBlock ), 0, 0, 1.0f, (float*)cd); float kernel_time_mad_sp = finalizeEvents(start, stop); initializeEvents(&start, &stop); hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< double, BLOCK_SIZE, memory_ratio >), dim3(dimGrid), dim3(dimBlock ), 0, 0, 1.0, cd); float kernel_time_mad_dp = finalizeEvents(start, stop); initializeEvents(&start, &stop); hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< int, BLOCK_SIZE, memory_ratio >), dim3(dimGrid), dim3(dimBlock ), 0, 0, 1, (int*)cd); float kernel_time_mad_int = finalizeEvents(start, stop); const double memaccesses_ratio = (double)(memory_ratio)/UNROLL_ITERATIONS; const double computations_ratio = 1.0-memaccesses_ratio; printf(" %4d, %8.3f,%8.2f,%8.2f,%7.2f, %8.3f,%8.2f,%8.2f,%7.2f, %8.3f,%8.2f,%8.2f,%7.2f\n", UNROLL_ITERATIONS-memory_ratio, (computations_ratio*(double)computations)/(memaccesses_ratio*(double)memoryoperations*sizeof(float)), kernel_time_mad_sp, (computations_ratio*(double)computations)/kernel_time_mad_sp*1000./(double)(1000*1000*1000), (memaccesses_ratio*(double)memoryoperations*sizeof(float))/kernel_time_mad_sp*1000./(1000.*1000.*1000.), (computations_ratio*(double)computations)/(memaccesses_ratio*(double)memoryoperations*sizeof(double)), kernel_time_mad_dp, (computations_ratio*(double)computations)/kernel_time_mad_dp*1000./(double)(1000*1000*1000), (memaccesses_ratio*(double)memoryoperations*sizeof(double))/kernel_time_mad_dp*1000./(1000.*1000.*1000.), (computations_ratio*(double)computations)/(memaccesses_ratio*(double)memoryoperations*sizeof(int)), kernel_time_mad_int, (computations_ratio*(double)computations)/kernel_time_mad_int*1000./(double)(1000*1000*1000), (memaccesses_ratio*(double)memoryoperations*sizeof(int))/kernel_time_mad_int*1000./(1000.*1000.*1000.) ); }
void EditNoteDialog::on_realize() { Gtk::Window::on_realize(); initialize(); setInputData(); verifyInputData(); initializeEvents(); }
void runbench(double *cd, long size){ const long compute_grid_size = size/ELEMENTS_PER_THREAD; const int BLOCK_SIZE = 256; const int TOTAL_BLOCKS = compute_grid_size/BLOCK_SIZE; const long long computations = ELEMENTS_PER_THREAD*(long long)compute_grid_size+(2*ELEMENTS_PER_THREAD*compute_iterations)*(long long)compute_grid_size; const long long memoryoperations = size; dim3 dimBlock(BLOCK_SIZE, 1, 1); dim3 dimGrid(TOTAL_BLOCKS, 1, 1); hipEvent_t start, stop; initializeEvents(&start, &stop); hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< float, BLOCK_SIZE, ELEMENTS_PER_THREAD, compute_iterations >), dim3(dimGrid), dim3(dimBlock ), 0, 0, 1.0f, (float*)cd); float kernel_time_mad_sp = finalizeEvents(start, stop); initializeEvents(&start, &stop); hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< double, BLOCK_SIZE, ELEMENTS_PER_THREAD, compute_iterations >), dim3(dimGrid), dim3(dimBlock ), 0, 0, 1.0, cd); float kernel_time_mad_dp = finalizeEvents(start, stop); initializeEvents(&start, &stop); hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< int, BLOCK_SIZE, ELEMENTS_PER_THREAD, compute_iterations >), dim3(dimGrid), dim3(dimBlock ), 0, 0, 1, (int*)cd); float kernel_time_mad_int = finalizeEvents(start, stop); printf(" %4d, %8.3f,%8.2f,%8.2f,%7.2f, %8.3f,%8.2f,%8.2f,%7.2f, %8.3f,%8.2f,%8.2f,%7.2f\n", compute_iterations, ((double)computations)/((double)memoryoperations*sizeof(float)), kernel_time_mad_sp, ((double)computations)/kernel_time_mad_sp*1000./(double)(1000*1000*1000), ((double)memoryoperations*sizeof(float))/kernel_time_mad_sp*1000./(1000.*1000.*1000.), ((double)computations)/((double)memoryoperations*sizeof(double)), kernel_time_mad_dp, ((double)computations)/kernel_time_mad_dp*1000./(double)(1000*1000*1000), ((double)memoryoperations*sizeof(double))/kernel_time_mad_dp*1000./(1000.*1000.*1000.), ((double)computations)/((double)memoryoperations*sizeof(int)), kernel_time_mad_int, ((double)computations)/kernel_time_mad_int*1000./(double)(1000*1000*1000), ((double)memoryoperations*sizeof(int))/kernel_time_mad_int*1000./(1000.*1000.*1000.) ); }
void Simulation::tick() { // The idea behind event driven simulation is quite // ingenious: we determine the time of all collisions // happening between all particles and walls assuming // that particles move by straight lines at constant // speed without any resistance. // // We keep the collision events arranged by time in priority // queue, so that we always know when and what collisions // are going to happen. // // The expensive calculations have to be done only // once when the priority queue is initialised. By the expensive // calculations I mean the calculation of all collisions between // all available particles O(n^2). Then the event driven model // requires to recalculate new events only after some event (collision) // happens, which requires no more than O(N). That's why this // model is so swift. // // Of course some of the events in the queue have to be cancelled after // the collision event happens (since particle's trajectories change), // that's why the system allows to detect whether the event // is stale/cancelled. if (events.empty()) { if (particles.size() == 0) { throw SimulationError("Simulation can not be launched " "with 0 particles"); } initializeEvents(); } if (is_paused) { SDL_Delay(delay_ms); return; } bool enough = false; while (!enough) { Event *ev = events.top(); events.pop(); if (ev->isStale()) { delete ev; continue; } // simulation system does time related calculations // in relative time, not absolute. So we have to // translate relative time time to absolute one // and vice versa. moveParticles(ev->getTime() - now); SDL_Delay(simulationTimeToMS(ev->getTime() - now)); now = ev->getTime(); switch (ev->getType()) { case EventType::WallCollision: { // Particle collides a wall. This requires to calculate // the collisions of this particle with all other particles // and walls. WallCollisionEvent *wc_ev = dynamic_cast<WallCollisionEvent*>(ev); wc_ev->getParticle().bounceWall(wc_ev->getWallType()); predictCollisions(wc_ev->getParticle()); break; } case EventType::ParticleCollision: { // Two particles collide each other. This requires to calculate // the collisions of these two particles with all other particles // and walls. ParticleCollisionEvent *pc_ev = dynamic_cast<ParticleCollisionEvent*>(ev); pc_ev->getFirstParticle().bounceParticle(pc_ev->getSecondParticle()); predictCollisions(pc_ev->getFirstParticle()); predictCollisions(pc_ev->getSecondParticle()); break; } case EventType::Refresh: refresh(); enough = true; events.push(new RefreshEvent(now + MSToSimulationTime(delay_ms))); break; } delete ev; } }
int test_amdgcn_wave_lshift_1 (const int n, const int blockSize, const int launch_iter=1, const int shfl_iter=1, const bool verify=true) { const int WIDTH = 64; const int DELTA = 1; std::vector<int> input(n); std::future<void> inputFuture = std::async([&]() { std::default_random_engine generator; std::uniform_int_distribution<int> input_dist(0, WIDTH-1); std::generate(std::begin(input), std::end(input),[&]() { return input_dist(generator); }); }); inputFuture.wait(); int* gpuInput; hipMalloc(&gpuInput, n * sizeof(int)); hipMemcpy(gpuInput, input.data(), n * sizeof(int), hipMemcpyHostToDevice); int* gpuOutput; hipMalloc(&gpuOutput, n * sizeof(int)); // warm up { hipEvent_t start, stop; initializeEvents(&start, &stop); hipLaunchKernel(HIP_KERNEL_NAME(run_amdgcn_wave_lshift_1) , dim3(n/blockSize), dim3(blockSize), 0, 0 , gpuInput, gpuOutput, shfl_iter); float time_ms = finalizeEvents(start, stop); } // measure the performance hipEvent_t start, stop; initializeEvents(&start, &stop); for (int i = 0; i < launch_iter; i++) { hipLaunchKernel(HIP_KERNEL_NAME(run_amdgcn_wave_lshift_1) , dim3(n/blockSize), dim3(blockSize), 0, 0 , gpuInput, gpuOutput, shfl_iter); } float time_ms = finalizeEvents(start, stop); std::vector<int> output(n); hipMemcpy(output.data(), gpuOutput, n * sizeof(int), hipMemcpyDeviceToHost); // verification int errors = 0; if (verify) { for (int i = 0; i < n; i+=WIDTH) { int local_output[WIDTH]; for (int j = 0; j < shfl_iter; j++) { for (int k = 0; k < WIDTH; k++) { unsigned int lane = ((k+(int)DELTA)<WIDTH)?(k+DELTA):k; local_output[k] = input[i+lane]; } for (int k = 0; k < WIDTH; k++) { input[i+k] = local_output[k]; } } for (int k = 0; k < WIDTH; k++) { if (input[i+k] != output[i+k]) { errors++; } } } } std::cout << __FUNCTION__ << "<" << DELTA << "," << WIDTH << "> total(" << launch_iter << " launches, " << shfl_iter << " wavefront_shift_left/lane/kernel): " << time_ms << "ms, " << time_ms/(double)launch_iter << " ms/kernel, " << errors << " errors" << std::endl; hipFree(gpuInput); hipFree(gpuOutput); return errors; }
FrameworkEventInfo::FrameworkEventInfo(void) { initializeAllEventsToInvalid(); initializeEvents(); verifyAllEventsCorrectlyInitialized(); }
int main() { // initialize IRQ (interrupts) // this must come before everything else IRQ_INIT(); // Initialize global pointers GameStateManager gameStateMan; OamManager oamMan; AudioManager audioMan; PlayState playState(&gameStateMan); TitleScreenState titleState(&gameStateMan); PauseState pauseState(&gameStateMan); GameOverState gameOverState(&gameStateMan); StoreState storeState(&gameStateMan); StageEndState stageEndState(&gameStateMan); g_gameStateMan = &gameStateMan; g_oamMan = &oamMan; g_playState = &playState; g_titleState = &titleState; g_pauseState = &pauseState; g_gameOverState = &gameOverState; g_storeState = &storeState; g_stageEndState = &stageEndState; g_audioMan = &audioMan; // create stage events StageEvent endEvent; StageEvent event1; StageEvent event2; StageEvent event3; StageEvent event4; StageEvent event5; StageEvent event6; StageEvent firePowerPowerUpEvent; StageEvent invinciblePowerUpEvent; StageEvent bombPowerUpEvent; g_endEvent = &endEvent; g_event1 = &event1; g_event2 = &event2; g_event3 = &event3; g_event4 = &event4; g_event5 = &event5; g_event6 = &event6; g_firePowerPowerUpEvent = &firePowerPowerUpEvent; g_invinciblePowerUpEvent = &invinciblePowerUpEvent; g_bombPowerUpEvent = &bombPowerUpEvent; initializeEvents(); StageEvent * stage1Events[24]; int stage1Timing[24]; int stage1yOffset[24]; fillEventsStage1(stage1Events, stage1Timing, stage1yOffset); Stage stage1(&playState, stage1Events, stage1Timing, stage1yOffset, 24); g_stage1 = &stage1; StageEvent * stage2Events[20]; int stage2Timing[20]; int stage2yOffset[20]; fillEventsStage2(stage2Events, stage2Timing, stage2yOffset); Stage stage2(&playState, stage2Events, stage2Timing, stage2yOffset, 20); g_stage2 = &stage2; StageEvent * stage3Events[20]; int stage3Timing[20]; int stage3yOffset[20]; fillEventsStage3(stage3Events, stage3Timing, stage3yOffset); Stage stage3(&playState, stage3Events, stage3Timing, stage3yOffset, 20); g_stage3 = &stage3; videoInit(); g_gameStateMan->pushState(g_titleState); #ifdef DEBUG // timers used for debug display REG_TM1D = 0x10000 - 2808; // overflow into timer 2 every 2808 cycles, approx. 1% of a screen refresh REG_TM2D = 0; REG_TM2CNT = TM_CASCADE | TM_ENABLE; REG_TM1CNT = TM_FREQ_1 | TM_ENABLE; int oldPercent, diffPercent, oldFrac, diffFrac; char buf[15]; #endif // DEBUG while(true) { // wait until next VBlank // prefer this over vid_vsync() - it's // better for power consumption VBlankIntrWait(); #ifdef DEBUG // grab current percentage oldPercent = REG_TM2D; oldFrac = REG_TM1D; #endif // DEBUG // update shadow OAM to real OAM g_oamMan->update(); // mix the next frame's audio g_audioMan->sndMix(); // poll keys - do not do this in other places key_poll(); // update the game state g_gameStateMan->update(); #ifdef DEBUG // grab current percentage, and write it out diffPercent = REG_TM2D - oldPercent; diffFrac = REG_TM1D - oldFrac; // round the percent based on the fractional amount if (diffFrac > 1404) { diffPercent++; } else if (diffFrac < -1404) { diffPercent--; } gba_itoa(diffPercent, buf); // clear out characters from the last write write(" ", Vec2(0, 0)); write(buf, Vec2(0, 0)); // reset timer 2 to 0 REG_TM2CNT = 0; // disable timer REG_TM2D = 0; // set new value to 0 REG_TM2CNT = TM_CASCADE | TM_ENABLE; // reenable timer #endif // DEBUG } }