void SPHSolver::SetupCUDASolver()
{
	//Generate the CUDA buffer
	GenerateCUDABuffer();
	//Setup an CUDA stuff
	SetupCUDA(mTotalParticles,sizeof(Fluid));
    SetupSolver(mSmoothRadius, mTotalParticles);
	//Transfer to CUDA
	TransferToCUDA(mPartBuffer,mTotalParticles,sizeof(Fluid));

}
void FluidSystem::Run ()
{
	bool bTiming = true;

	mint::Time start, stop;
	
	float ss = m_Param [ SPH_PDIST ] / m_Param[ SPH_SIMSCALE ];		// simulation scale (not Schutzstaffel)

	if ( m_Vec[EMIT_RATE].x > 0 && (++m_Frame) % (int) m_Vec[EMIT_RATE].x == 0 ) {
		//m_Frame = 0;
		Emit ( ss ); 
	}
	
	#ifdef NOGRID
		// Slow method - O(n^2)
		SPH_ComputePressureSlow ();
		SPH_ComputeForceSlow ();
	#else

		if ( m_Toggle[USE_CUDA] ) {
			
			#ifdef BUILD_CUDA
				// -- GPU --
				start.SetSystemTime ( ACC_NSEC );		
				TransferToCUDA ( mBuf[0].data, (int*) &m_Grid[0], NumPoints() );
				if ( bTiming) { stop.SetSystemTime ( ACC_NSEC ); stop = stop - start; printf ( "TO: %s\n", stop.GetReadableTime().c_str() ); }
			
				start.SetSystemTime ( ACC_NSEC );		
				Grid_InsertParticlesCUDA ();
				if ( bTiming) { stop.SetSystemTime ( ACC_NSEC ); stop = stop - start; printf ( "INSERT (CUDA): %s\n", stop.GetReadableTime().c_str() ); }

				start.SetSystemTime ( ACC_NSEC );
				SPH_ComputePressureCUDA ();
				if ( bTiming) { stop.SetSystemTime ( ACC_NSEC ); stop = stop - start; printf ( "PRESS (CUDA): %s\n", stop.GetReadableTime().c_str() ); }

				start.SetSystemTime ( ACC_NSEC );
				SPH_ComputeForceCUDA (); 
				if ( bTiming) { stop.SetSystemTime ( ACC_NSEC ); stop = stop - start; printf ( "FORCE (CUDA): %s\n", stop.GetReadableTime().c_str() ); }

				//** CUDA integrator is incomplete..
				// Once integrator is done, we can remove TransferTo/From steps
				/*start.SetSystemTime ( ACC_NSEC );
				SPH_AdvanceCUDA( m_DT, m_DT/m_Param[SPH_SIMSCALE] );
				if ( bTiming) { stop.SetSystemTime ( ACC_NSEC ); stop = stop - start; printf ( "ADV (CUDA): %s\n", stop.GetReadableTime().c_str() ); }*/

				start.SetSystemTime ( ACC_NSEC );		
				TransferFromCUDA ( mBuf[0].data, (int*) &m_Grid[0], NumPoints() );
				if ( bTiming) { stop.SetSystemTime ( ACC_NSEC ); stop = stop - start; printf ( "FROM: %s\n", stop.GetReadableTime().c_str() ); }

				// .. Do advance on CPU 
				Advance();

			#endif
			
		} else {
			// -- CPU only --

			start.SetSystemTime ( ACC_NSEC );
			Grid_InsertParticles ();
			if ( bTiming) { stop.SetSystemTime ( ACC_NSEC ); stop = stop - start; printf ( "INSERT: %s\n", stop.GetReadableTime().c_str() ); }
		
			start.SetSystemTime ( ACC_NSEC );
			SPH_ComputePressureGrid ();
			if ( bTiming) { stop.SetSystemTime ( ACC_NSEC ); stop = stop - start; printf ( "PRESS: %s\n", stop.GetReadableTime().c_str() ); }

			start.SetSystemTime ( ACC_NSEC );
			SPH_ComputeForceGridNC ();		
			if ( bTiming) { stop.SetSystemTime ( ACC_NSEC ); stop = stop - start; printf ( "FORCE: %s\n", stop.GetReadableTime().c_str() ); }

			start.SetSystemTime ( ACC_NSEC );
			Advance();
			if ( bTiming) { stop.SetSystemTime ( ACC_NSEC ); stop = stop - start; printf ( "ADV: %s\n", stop.GetReadableTime().c_str() ); }
		}		
		
	#endif
}