Beispiel #1
0
// initialize the group class to have sufficient space such that the largest nodeNumber can still be stored
FAGeneral::FAGeneral(double* x,int K, double* ngroup, NumericVector xv, std::string weights, double gamma, NumericMatrix W, int mxSize, bool verb, double eps):K(K),Weights(K,vector<double>(K)),graphsize(K){
  verbose = verb;
  mxSplitSize = mxSize;
  epsilon = eps;
  
  // calculate the weights we need
  calculateSlope(x,ngroup,xv,weights,gamma,W);
  calculateWeights(x,ngroup,xv,weights,gamma,W);
  // print_weights();
  
  if (verbose){
    Rprintf("Graph Construction \n");
  }
  
  myGraph.Construct(K,Weights,epsilon);
  // myGraph.printGraph(cout);
  
  // set groups, prefuse and set events
  initializeGroups(&x[0],&ngroup[0]);
  
  initializeEvents();
  
  // print_groups();
  // print_events();
  
  run();
  
}
Beispiel #2
0
void runbench(double *cd, long size){
	if( memory_ratio>UNROLL_ITERATIONS ){
		fprintf(stderr, "ERROR: memory_ratio exceeds UNROLL_ITERATIONS\n");
		exit(1);
	}

	const long compute_grid_size = size/(UNROLLED_MEMORY_ACCESSES)/2;
	const int BLOCK_SIZE = 256;
	const int TOTAL_BLOCKS = compute_grid_size/BLOCK_SIZE;
	const long long computations = 2*(long long)(COMP_ITERATIONS)*REGBLOCK_SIZE*compute_grid_size;
	const long long memoryoperations = (long long)(COMP_ITERATIONS)*compute_grid_size;

	dim3 dimBlock(BLOCK_SIZE, 1, 1);
    dim3 dimGrid(TOTAL_BLOCKS, 1, 1);
	hipEvent_t start, stop;

	initializeEvents(&start, &stop);
	hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< float, BLOCK_SIZE, memory_ratio >), dim3(dimGrid), dim3(dimBlock ), 0, 0, 1.0f, (float*)cd);
	float kernel_time_mad_sp = finalizeEvents(start, stop);

	initializeEvents(&start, &stop);
	hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< double, BLOCK_SIZE, memory_ratio >), dim3(dimGrid), dim3(dimBlock ), 0, 0, 1.0, cd);
	float kernel_time_mad_dp = finalizeEvents(start, stop);

	initializeEvents(&start, &stop);
	hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< int, BLOCK_SIZE, memory_ratio >), dim3(dimGrid), dim3(dimBlock ), 0, 0, 1, (int*)cd);
	float kernel_time_mad_int = finalizeEvents(start, stop);

	const double memaccesses_ratio = (double)(memory_ratio)/UNROLL_ITERATIONS;
	const double computations_ratio = 1.0-memaccesses_ratio;

	printf("         %4d,   %8.3f,%8.2f,%8.2f,%7.2f,   %8.3f,%8.2f,%8.2f,%7.2f,  %8.3f,%8.2f,%8.2f,%7.2f\n",
		UNROLL_ITERATIONS-memory_ratio,
		(computations_ratio*(double)computations)/(memaccesses_ratio*(double)memoryoperations*sizeof(float)),
		kernel_time_mad_sp,
		(computations_ratio*(double)computations)/kernel_time_mad_sp*1000./(double)(1000*1000*1000),
		(memaccesses_ratio*(double)memoryoperations*sizeof(float))/kernel_time_mad_sp*1000./(1000.*1000.*1000.),
		(computations_ratio*(double)computations)/(memaccesses_ratio*(double)memoryoperations*sizeof(double)),
		kernel_time_mad_dp,
		(computations_ratio*(double)computations)/kernel_time_mad_dp*1000./(double)(1000*1000*1000),
		(memaccesses_ratio*(double)memoryoperations*sizeof(double))/kernel_time_mad_dp*1000./(1000.*1000.*1000.),
		(computations_ratio*(double)computations)/(memaccesses_ratio*(double)memoryoperations*sizeof(int)),
		kernel_time_mad_int,
		(computations_ratio*(double)computations)/kernel_time_mad_int*1000./(double)(1000*1000*1000),
		(memaccesses_ratio*(double)memoryoperations*sizeof(int))/kernel_time_mad_int*1000./(1000.*1000.*1000.) );
}
Beispiel #3
0
void EditNoteDialog::on_realize()
{
    Gtk::Window::on_realize();

    initialize();

    setInputData();

    verifyInputData();

    initializeEvents();
}
void runbench(double *cd, long size){
	const long compute_grid_size = size/ELEMENTS_PER_THREAD;
	const int BLOCK_SIZE = 256;
	const int TOTAL_BLOCKS = compute_grid_size/BLOCK_SIZE;
	const long long computations = ELEMENTS_PER_THREAD*(long long)compute_grid_size+(2*ELEMENTS_PER_THREAD*compute_iterations)*(long long)compute_grid_size;
	const long long memoryoperations = size;

	dim3 dimBlock(BLOCK_SIZE, 1, 1);
	dim3 dimGrid(TOTAL_BLOCKS, 1, 1);
	hipEvent_t start, stop;

	initializeEvents(&start, &stop);
	hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< float, BLOCK_SIZE, ELEMENTS_PER_THREAD, compute_iterations >), dim3(dimGrid), dim3(dimBlock ), 0, 0, 1.0f, (float*)cd);
	float kernel_time_mad_sp = finalizeEvents(start, stop);

	initializeEvents(&start, &stop);
	hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< double, BLOCK_SIZE, ELEMENTS_PER_THREAD, compute_iterations >), dim3(dimGrid), dim3(dimBlock ), 0, 0, 1.0, cd);
	float kernel_time_mad_dp = finalizeEvents(start, stop);

	initializeEvents(&start, &stop);
	hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< int, BLOCK_SIZE, ELEMENTS_PER_THREAD, compute_iterations >), dim3(dimGrid), dim3(dimBlock ), 0, 0, 1, (int*)cd);
	float kernel_time_mad_int = finalizeEvents(start, stop);

	printf("         %4d,   %8.3f,%8.2f,%8.2f,%7.2f,   %8.3f,%8.2f,%8.2f,%7.2f,  %8.3f,%8.2f,%8.2f,%7.2f\n",
		compute_iterations,
		((double)computations)/((double)memoryoperations*sizeof(float)),
		kernel_time_mad_sp,
		((double)computations)/kernel_time_mad_sp*1000./(double)(1000*1000*1000),
		((double)memoryoperations*sizeof(float))/kernel_time_mad_sp*1000./(1000.*1000.*1000.),
		((double)computations)/((double)memoryoperations*sizeof(double)),
		kernel_time_mad_dp,
		((double)computations)/kernel_time_mad_dp*1000./(double)(1000*1000*1000),
		((double)memoryoperations*sizeof(double))/kernel_time_mad_dp*1000./(1000.*1000.*1000.),
		((double)computations)/((double)memoryoperations*sizeof(int)),
		kernel_time_mad_int,
		((double)computations)/kernel_time_mad_int*1000./(double)(1000*1000*1000),
		((double)memoryoperations*sizeof(int))/kernel_time_mad_int*1000./(1000.*1000.*1000.) );
}
Beispiel #5
0
void Simulation::tick()
{
    // The idea behind event driven simulation is quite
    // ingenious: we determine the time of all collisions
    // happening between all particles and walls assuming
    // that particles move by straight lines at constant
    // speed without any resistance.
    //
    // We keep the collision events arranged by time in priority
    // queue, so that we always know when and what collisions
    // are going to happen.
    //
    // The expensive calculations have to be done only
    // once when the priority queue is initialised. By the expensive
    // calculations I mean the calculation of all collisions between
    // all available particles O(n^2). Then the event driven model
    // requires to recalculate new events only after some event (collision)
    // happens, which requires no more than O(N). That's why this
    // model is so swift.
    //
    // Of course some of the events in the queue have to be cancelled after
    // the collision event happens (since particle's trajectories change),
    // that's why the system allows to detect whether the event
    // is stale/cancelled.

    if (events.empty()) {
        if (particles.size() == 0) {
            throw SimulationError("Simulation can not be launched "
                                  "with 0 particles");
        }

        initializeEvents();
    }
    if (is_paused) {
        SDL_Delay(delay_ms);
        return;
    }

    bool enough = false;
    while (!enough) {
        Event *ev = events.top();

        events.pop();
        if (ev->isStale()) {
            delete ev;
            continue;
        }

        // simulation system does time related calculations
        // in relative time, not absolute. So we have to
        // translate relative time time to absolute one
        // and vice versa.
        moveParticles(ev->getTime() - now);
        SDL_Delay(simulationTimeToMS(ev->getTime() - now));
        now = ev->getTime();

        switch (ev->getType()) {
        case EventType::WallCollision:
        {
            // Particle collides a wall. This requires to calculate
            // the collisions of this particle with all other particles
            // and walls.
            WallCollisionEvent *wc_ev = dynamic_cast<WallCollisionEvent*>(ev);
            wc_ev->getParticle().bounceWall(wc_ev->getWallType());
            predictCollisions(wc_ev->getParticle());
            break;
        }

        case EventType::ParticleCollision:
        {
            // Two particles collide each other. This requires to calculate
            // the collisions of these two particles with all other particles
            // and walls.
            ParticleCollisionEvent *pc_ev = dynamic_cast<ParticleCollisionEvent*>(ev);
            pc_ev->getFirstParticle().bounceParticle(pc_ev->getSecondParticle());
            predictCollisions(pc_ev->getFirstParticle());
            predictCollisions(pc_ev->getSecondParticle());
            break;
        }

        case EventType::Refresh:
            refresh();
            enough = true;
            events.push(new RefreshEvent(now + MSToSimulationTime(delay_ms)));
            break;
        }

        delete ev;
    }
}
Beispiel #6
0
int test_amdgcn_wave_lshift_1
              (const int n, const int blockSize, const int launch_iter=1, const int shfl_iter=1, const bool verify=true) {

  const int WIDTH = 64;
  const int DELTA = 1;

  std::vector<int> input(n);
  std::future<void> inputFuture = std::async([&]() {
    std::default_random_engine generator;
    std::uniform_int_distribution<int> input_dist(0, WIDTH-1);
    std::generate(std::begin(input), std::end(input),[&]() { return input_dist(generator); }); 
  });
  inputFuture.wait();

  int* gpuInput;
  hipMalloc(&gpuInput, n * sizeof(int));
  hipMemcpy(gpuInput, input.data(), n * sizeof(int), hipMemcpyHostToDevice);

  int* gpuOutput;
  hipMalloc(&gpuOutput, n * sizeof(int));

  
  // warm up
  {
	  hipEvent_t start, stop;

    initializeEvents(&start, &stop);

    hipLaunchKernel(HIP_KERNEL_NAME(run_amdgcn_wave_lshift_1)
                    , dim3(n/blockSize), dim3(blockSize), 0, 0
                    , gpuInput, gpuOutput, shfl_iter); 

    float time_ms = finalizeEvents(start, stop);
  }


  // measure the performance
  hipEvent_t start, stop;
  initializeEvents(&start, &stop);

  for (int i = 0; i < launch_iter; i++) {
    hipLaunchKernel(HIP_KERNEL_NAME(run_amdgcn_wave_lshift_1)
                    , dim3(n/blockSize), dim3(blockSize), 0, 0
                    , gpuInput, gpuOutput, shfl_iter); 
  }
  float time_ms = finalizeEvents(start, stop);
  std::vector<int> output(n);
  hipMemcpy(output.data(), gpuOutput, n * sizeof(int), hipMemcpyDeviceToHost);
  

  // verification
  int errors = 0;
  if (verify) {
    for (int i = 0; i < n; i+=WIDTH) {
      int local_output[WIDTH];
      for (int j = 0; j < shfl_iter; j++) {
        for (int k = 0; k < WIDTH; k++) {
          unsigned int lane = ((k+(int)DELTA)<WIDTH)?(k+DELTA):k;
          local_output[k] = input[i+lane];
        }
        for (int k = 0; k < WIDTH; k++) {
          input[i+k] = local_output[k];
        }
      }
      for (int k = 0; k < WIDTH; k++) {
        if (input[i+k] != output[i+k]) {
          errors++;
        }
      }
    }
  }

  std::cout << __FUNCTION__ << "<" << DELTA << "," << WIDTH 
            << "> total(" << launch_iter << " launches, " << shfl_iter << " wavefront_shift_left/lane/kernel): " 
            << time_ms << "ms, "
            << time_ms/(double)launch_iter << " ms/kernel, "
            << errors << " errors"
            << std::endl;

  hipFree(gpuInput);
  hipFree(gpuOutput);

  return errors;
}
Beispiel #7
0
FrameworkEventInfo::FrameworkEventInfo(void)
{
    initializeAllEventsToInvalid();
    initializeEvents();
    verifyAllEventsCorrectlyInitialized();
}
Beispiel #8
0
int main()
{
	// initialize IRQ (interrupts)
	// this must come before everything else
	IRQ_INIT();

	// Initialize global pointers
	GameStateManager gameStateMan;
	OamManager oamMan;
	AudioManager audioMan;
	PlayState playState(&gameStateMan);
	TitleScreenState titleState(&gameStateMan);
	PauseState pauseState(&gameStateMan);
	GameOverState gameOverState(&gameStateMan);
	StoreState storeState(&gameStateMan);
	StageEndState stageEndState(&gameStateMan);
	
	g_gameStateMan = &gameStateMan;
	g_oamMan = &oamMan;
	g_playState = &playState;
	g_titleState = &titleState;
	g_pauseState = &pauseState;
	g_gameOverState = &gameOverState;
	g_storeState = &storeState;
	g_stageEndState = &stageEndState;
	g_audioMan = &audioMan;

	// create stage events
	StageEvent endEvent;
	StageEvent event1;
	StageEvent event2;
	StageEvent event3;
	StageEvent event4;
	StageEvent event5;
	StageEvent event6;
	StageEvent firePowerPowerUpEvent;
	StageEvent invinciblePowerUpEvent;
	StageEvent bombPowerUpEvent;
	g_endEvent = &endEvent;
	g_event1 = &event1;
	g_event2 = &event2;
	g_event3 = &event3;
	g_event4 = &event4;
	g_event5 = &event5;
	g_event6 = &event6;
	g_firePowerPowerUpEvent = &firePowerPowerUpEvent;
	g_invinciblePowerUpEvent = &invinciblePowerUpEvent;
	g_bombPowerUpEvent = &bombPowerUpEvent;

	initializeEvents();

	StageEvent * stage1Events[24];
	int stage1Timing[24];
	int stage1yOffset[24];
	fillEventsStage1(stage1Events, stage1Timing, stage1yOffset);
	Stage stage1(&playState, stage1Events, stage1Timing, stage1yOffset, 24);
	g_stage1 = &stage1;

	StageEvent * stage2Events[20];
	int stage2Timing[20];
	int stage2yOffset[20];
	fillEventsStage2(stage2Events, stage2Timing, stage2yOffset);
	Stage stage2(&playState, stage2Events, stage2Timing, stage2yOffset, 20);
	g_stage2 = &stage2;
	
	StageEvent * stage3Events[20];
	int stage3Timing[20];
	int stage3yOffset[20];
	fillEventsStage3(stage3Events, stage3Timing, stage3yOffset);
	Stage stage3(&playState, stage3Events, stage3Timing, stage3yOffset, 20);
	g_stage3 = &stage3;

	videoInit();

	g_gameStateMan->pushState(g_titleState);

#ifdef DEBUG
	// timers used for debug display
	REG_TM1D = 0x10000 - 2808; // overflow into timer 2 every 2808 cycles, approx. 1% of a screen refresh
	REG_TM2D = 0;
	REG_TM2CNT = TM_CASCADE | TM_ENABLE;
	REG_TM1CNT = TM_FREQ_1 | TM_ENABLE;
	int oldPercent, diffPercent, oldFrac, diffFrac;
	char buf[15];
#endif // DEBUG

	while(true)
	{
		// wait until next VBlank
		// prefer this over vid_vsync() - it's
		// better for power consumption
		VBlankIntrWait();

#ifdef DEBUG
		// grab current percentage
		oldPercent = REG_TM2D;
		oldFrac = REG_TM1D;
#endif // DEBUG

		// update shadow OAM to real OAM
		g_oamMan->update();

		// mix the next frame's audio
		g_audioMan->sndMix();
		
		// poll keys - do not do this in other places
		key_poll();

		// update the game state
		g_gameStateMan->update();

#ifdef DEBUG
		// grab current percentage, and write it out
		diffPercent = REG_TM2D - oldPercent;
		diffFrac = REG_TM1D - oldFrac;

		// round the percent based on the fractional amount
		if (diffFrac > 1404)
		{
			diffPercent++;
		}
		else if (diffFrac < -1404)
		{
			diffPercent--;
		}

		gba_itoa(diffPercent, buf);

		// clear out characters from the last write
		write("  ", Vec2(0, 0));
		write(buf, Vec2(0, 0));

		// reset timer 2 to 0
		REG_TM2CNT = 0; // disable timer
		REG_TM2D = 0; // set new value to 0
		REG_TM2CNT = TM_CASCADE | TM_ENABLE; // reenable timer
#endif // DEBUG
	}
}