Пример #1
0
cudaTimer::cudaTimer(){

	cudaError_t status;
	status = cudaEventCreate(&inicio);
	status = cudaEventCreate(&fin);

}
Пример #2
0
  double time_invocation_cuda(std::size_t num_trials, Function f, Arg1 arg1, Arg2 arg2, Arg3 arg3)
{
  cudaEvent_t start, stop;
  cudaEventCreate(&start);
  cudaEventCreate(&stop);

  cudaEventRecord(start);
  for(std::size_t i = 0;
      i < num_trials;
      ++i)
  {
    f(arg1,arg2,arg3);
  }
  cudaEventRecord(stop);
  cudaThreadSynchronize();

  float msecs = 0;
  cudaEventElapsedTime(&msecs, start, stop);

  cudaEventDestroy(start);
  cudaEventDestroy(stop);

  // return mean msecs
  return msecs / num_trials;
}
Пример #3
0
float bench::ClockBenchmark::_determineCycleTime() {
	cudaEvent_t start, end;
	
	check( cudaEventCreate(&start) );
	check( cudaEventCreate(&end) );
	
	unsigned long long elapsedCycles;
	unsigned long long* deviceElapsedCycles;
	long long int* deviceDummyMem;
	const dim3 grid(1,1,1), block(1,1,1);
	
	check( cudaMalloc((void**)&deviceElapsedCycles, sizeof(unsigned long long)) );
	check( cudaMalloc((void**)&deviceDummyMem, sizeof(long long int)) );
	
	check( cudaEventRecord(start) );
	cudaDetermineCycleTimeWrapper(deviceElapsedCycles, deviceDummyMem, grid, block);
	check( cudaEventRecord(end) );
	
	check( cudaDeviceSynchronize() );
	
	check( cudaMemcpy(&elapsedCycles, deviceElapsedCycles, sizeof(unsigned long long), cudaMemcpyDeviceToHost) );
	
	float elapsedTime = 0;
	check( cudaEventElapsedTime(&elapsedTime, start, end) );
	
	report(util::Indents(2) << "elapsed time: " << elapsedTime << "ms");
	report(util::Indents(2) << "elapsed cycles: " << elapsedCycles);
	
	return elapsedTime * 1000000.0 / (float)elapsedCycles;
}
Пример #4
0
ProfilerDPD::ProfilerDPD(): count(0), tf(0)
{
    CUDA_CHECK(cudaEventCreate(&evstart));
    CUDA_CHECK(cudaEventCreate(&evforce));
    
    _flush(true);
}
Пример #5
0
int main()
{
	cudaEvent_t start;
	cudaEvent_t end;
	float duration;

	const float overestimateRate = 0.01f;
	const float errorRate = 0.01f;
	Tokenizer tokenizer( overestimateRate, errorRate );

	/************** Test counting string tokens *************/
	TextReader reader;

	cudaEventCreate( &start );
	cudaEventRecord( start, 0 );

	reader.Read();
	tokenizer.StartTokenizing( 
		reader.GetCharBuffer(), 
		reader.GetOffsetBuffer(), 
		reader.GetCharBufferSize(), 
		reader.GetOffsetBufferSize() );
	
	cudaEventCreate( &end );
	cudaEventRecord( end, 0 );
	cudaEventSynchronize( end );

	cudaEventElapsedTime( &duration, start, end );
	printf( "Time taken: %.3lf milliseconds\n", duration );

	tokenizer.GetFrequency( "a" );
}
Пример #6
0
void trainMethodsSpeedTestGPU(fann *ann, fann_train_data* train, unsigned int trainingAlgorithm, unsigned int epochCount)
{
    fann *gpunn = fann_copy(ann);
    gpunn->training_algorithm = (fann_train_enum)trainingAlgorithm;

    {
        cudaEvent_t start, stop;
        float time;

        cudaEventCreate(&start);
        cudaEventCreate(&stop);
        cudaEventRecord(start, 0);

        gpuann_fann_parallel_train_on_data(gpunn, train, epochCount);

        cudaEventRecord(stop, 0);
        cudaEventSynchronize(stop);
        cudaEventElapsedTime(&time, start, stop);
        cudaEventDestroy(start);
        cudaEventDestroy(stop);

        printf("%10.5f ", time);
    }

    fann_destroy(gpunn);
}
Пример #7
0
unsigned int StartTimer () {
  cudaEventCreate(&timerStart);
  cudaEventCreate(&timerStop);

  cudaEventRecord(timerStart,0);
  return 0;
}
Пример #8
0
void runCuda()
{
	//////////////////////
	// Timing cuda call //
	//////////////////////
	float time;
	cudaEvent_t start, stop;
	cudaEventCreate(&start);
	cudaEventCreate(&stop);
	cudaEventRecord(start, 0);

	// Map OpenGL buffer object for writing from CUDA on a single GPU
	// No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer
	dptr=NULL;

	vbo = mesh->getVBO();
	vbosize = mesh->getVBOsize();

	nbo = mesh->getNBO();
	nbosize = mesh->getNBOsize();

#if RGBONLY == 1
	float newcbo[] = {0.0, 1.0, 0.0, 
					0.0, 0.0, 1.0, 
					1.0, 0.0, 0.0};
	cbo = newcbo;
	cbosize = 9;
#elif RGBONLY == 0
	vec3 defaultColor(0.5f, 0.5f, 0.5f);
	mesh->changeColor(defaultColor);
	cbo = mesh->getCBO();
	cbosize = mesh->getCBOsize();
#endif

	ibo = mesh->getIBO();
	ibosize = mesh->getIBOsize();

	cudaGLMapBufferObject((void**)&dptr, pbo);

	updateCamera();

	cudaRasterizeCore(cam, dptr, glm::vec2(width, height), frame, vbo, vbosize, cbo, cbosize, ibo, ibosize, nbo, nbosize, lights, lightsize, alpha, beta, displayMode);
	cudaGLUnmapBufferObject(pbo);

	vbo = NULL;
	cbo = NULL;
	ibo = NULL;

	frame++;
	fpstracker++;

	//////////////////////
	// Timing cuda call //
	//////////////////////
	cudaEventRecord(stop, 0);
	cudaEventSynchronize(stop);
	cudaEventElapsedTime(&time, start, stop);
	printf("runCuda runtime: %3.1f ms \n", time);
}
Пример #9
0
CudaTimer::CudaTimer(cudaStream_t stream)
	: start(nullptr)
	, end(nullptr)
	, stream(stream)
{
	CUDA_CHECK(cudaEventCreate(&start));
	CUDA_CHECK(cudaEventCreate(&end));
}
Пример #10
0
		CUDATimer() {
			start_ = 0.0; stop_ = 0.0; elapsed_ = 0.0; is_running_ = false;
			cudaEventCreate(&custart_);
			cudaEventCreate(&custop_);
			cudaEventCreate(&cubase_);

			cudaEventRecord(cubase_);
		} // CUDATimer()
Пример #11
0
void contractTT(sTensorGPU *TT1, sTensorGPU *TT2, const int n, const int size)
{
	cublasHandle_t handle;
	cublasCreate(&handle);
	type result=0;

	sTensorGPU temp1 = emptyTensor(size*size,2);
	sTensorGPU temp2 = emptyTensor(size*size*2,3);
	cudaEvent_t start;
	cudaEventCreate(&start);
	cudaEvent_t stop;
	cudaEventCreate(&stop);

	//printf("Start contractTT\n");

	cudaEventRecord(start, NULL);
	int indA = TT1[0].size[0];
	int indB = TT2[0].size[0];

	sTensorCPU tt1start = copyToCPU(TT1[0]);
	sTensorCPU tt2start = copyToCPU(TT2[0]);
	sTensorCPU tt1end = copyToCPU(TT1[n - 1]);
	sTensorCPU tt2end = copyToCPU( TT2[n - 1]);


	for (int i = 0; i < indA; i++){
		TT1[0] = prepareTensorStart(tt1start, i);
		TT1[n - 1] = prepareTensorEnd(tt1end, i);
		for (int j = 0; j < indB; j++){
			TT2[0] = prepareTensorStart(tt2start, j);
			TT2[n - 1] = prepareTensorEnd(tt2end, j);
			contractTensor(handle, TT1[0], TT2[0], temp1);
			for (int i = 1; i < n; i++){
				contractTensor(handle, temp1, TT1[i], temp2);
				contractTensor(handle, temp2, TT2[i], temp1, 2);
			}
			type add = 0;
			cudaMemcpy(&add, temp1.deviceData, sizeof(type), cudaMemcpyDeviceToHost);
			//printf("%e ", add);
			result += add;
		}
	}
	cudaEventRecord(stop, NULL);
	cudaEventSynchronize(stop);
	
	float msecTotal = 0.0f;
	cudaEventElapsedTime(&msecTotal, start, stop);
	printf("Time: %.3fms\n", msecTotal);
	printf("Ops: %.0f\n", bops);
	double gigaFlops = (bops * 1.0e-9f) / (msecTotal / 1000.0f);
	printf("Perf= %.2f GFlop/s\n", gigaFlops);

	cublasDestroy(handle);
	cudaDeviceReset();

	printf("%.5e \n", result);
	exit(0);
}
Пример #12
0
void Timer::Init() {
  if (!initted()) {
    if (Caffe::mode() == Caffe::GPU) {
      CUDA_CHECK(cudaEventCreate(&start_gpu_));
      CUDA_CHECK(cudaEventCreate(&stop_gpu_));
    }
    initted_ = true;
  }
}
Пример #13
0
// startTest ------------------------------------------------------------------
//		Initializes the cuda timer events and starts the timer.
//		@param start - Start time evet
//		@param end   - End time evet
//-----------------------------------------------------------------------------
void startTest(cudaEvent_t &start, cudaEvent_t &stop, char* msg){
	// Create Events
	cudaEventCreate( &start );
	cudaEventCreate( &stop );
	
	// Start Timer
	printf("%s\n", msg);
	cudaEventRecord( start, 0 );
}
Пример #14
0
void sobel1(int *h_result, unsigned int *h_pic, int xsize, int ysize, int thresh)
{

	
	int *d_result;
	unsigned int *d_pic;
	
	 
	int resultSize = xsize * ysize  * 3 * sizeof(int);
	int picSize = xsize * ysize * sizeof(int);

	 
	cudaMalloc( (void**)&d_result, resultSize);
	if( !d_result) {
		exit(-1);
	}
	cudaMalloc( (void**)&d_pic, picSize);
	if( !d_pic) {
		exit(-1);
	}

	 
	cudaMemcpy(d_result, h_result, resultSize, cudaMemcpyHostToDevice);
	cudaMemcpy(d_pic, h_pic, picSize, cudaMemcpyHostToDevice);
	
	 
	
	dim3 threadsPerBlock(BLOCKSIZE, BLOCKSIZE);
	dim3 numBlocks(ceil((float)ysize/(float)threadsPerBlock.x), ceil((float)xsize/(float)threadsPerBlock.y));
	
	 
	cudaEvent_t start, stop;
	cudaEventCreate(&start);
	cudaEventCreate(&stop);
{	__set_CUDAConfig(numBlocks, threadsPerBlock ); 
          
	d_sobel1 (d_result, d_pic, xsize, ysize, thresh);}
          
	
	 
	cudaEventSynchronize(stop);
	float elapsedTime;
	cudaEventElapsedTime(&elapsedTime, start, stop);
	cudaEventDestroy(start);
	cudaEventDestroy(stop);

	 
	 
	cudaMemcpy(h_result, d_result, resultSize, cudaMemcpyDeviceToHost);
	cudaMemcpy(h_pic, d_pic, picSize, cudaMemcpyDeviceToHost);

	 
	cudaFree(d_result);
	cudaFree(d_pic);
	
}
Пример #15
0
void startTimer_GPU( MTime* mtime )
{
  mtime->type = GPU_TIME;
  if(mtime->gpustart==0 || mtime->gpustop==0)
  {
    CHECK_ERROR(cudaEventCreate(&mtime->gpustart));
    CHECK_ERROR(cudaEventCreate(&mtime->gpustop));
  }
  CHECK_ERROR( cudaEventRecord(mtime->gpustart) );
}
Пример #16
0
CCudaTimeMeasure::CCudaTimeMeasure(cudaStream_t csStreamID/* = 0*/):
	m_ceStartEvent(NULL),
	m_ceStopEvent(NULL),
	m_csStreamID(csStreamID)
{
	cudaCheckError(cudaEventCreate(&m_ceStartEvent));
	cudaCheckError(cudaEventCreate(&m_ceStopEvent));

	cudaCheckError(cudaEventRecord(m_ceStartEvent, m_csStreamID));
}
  void Mark(){
    // insert a "start" event into the command stream
    cudaEventCreate(&start); 

    // instert a "stop" event into the command stream
    cudaEventCreate(&stop);    

    // record the event
    cudaEventRecord( start, 0 );
    status = 1;
  }
Пример #18
0
// execute kernel
double dslashCUDA() {

  printfQuda("Executing %d kernel loops...\n", loops);
  fflush(stdout);

  if (test_type < 2)
    dirac->Tune(*cudaSpinorOut, *cudaSpinor, *tmp);
  else
    dirac->Tune(cudaSpinorOut->Even(), cudaSpinor->Even(), *tmp);

  cudaEvent_t start, end;
  cudaEventCreate(&start);
  cudaEventRecord(start, 0);
  cudaEventSynchronize(start);

  for (int i = 0; i < loops; i++) {
    switch (test_type) {
    case 0:
      if (transfer) {
	dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity);
      } else {
	dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
      }
      break;
    case 1:
    case 2:
      if (transfer) {
	MatQuda(spinorOut->V(), spinor->V(), &inv_param);
      } else {
	dirac->M(*cudaSpinorOut, *cudaSpinor);
      }
      break;
    }
  }
    
  cudaEventCreate(&end);
  cudaEventRecord(end, 0);
  cudaEventSynchronize(end);
  float runTime;
  cudaEventElapsedTime(&runTime, start, end);
  cudaEventDestroy(start);
  cudaEventDestroy(end);

  double secs = runTime / 1000; //stopwatchReadSeconds();

  // check for errors
  cudaError_t stat = cudaGetLastError();
  if (stat != cudaSuccess)
    printf("with ERROR: %s\n", cudaGetErrorString(stat));

  printf("done.\n\n");

  return secs;
}
Пример #19
0
//----------------------------------------------------------------------------//
CUDAImpl::CUDAImpl()
        : ImageProcessor(CUDA), _cudaBuild(false)
{
    if (cudaSetDevice(_cudaGetMaxGflopsDeviceId()) != cudaSuccess) {
        throw std::logic_error("gpuip::CUDAImpl() could not set device id");
    };
    cudaFree(0); //use runtime api to create a CUDA context implicitly

    cudaEventCreate(&_start);
    cudaEventCreate(&_stop);
}
Пример #20
0
// execute kernel
double dslashCUDA(int niter) {

  cudaEvent_t start, end;
  cudaEventCreate(&start);
  cudaEventCreate(&end);
  cudaEventRecord(start, 0);

  for (int i = 0; i < niter; i++) {
    switch (test_type) {
    case 0:
      if (transfer) {
	dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity);
      } else {
	//inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
	//inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
	//dslashQuda(cudaSpinorOut->V(), cudaSpinor->V(), &inv_param, parity);
	dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
      }
      break;
    case 1:
    case 2:
      if (transfer) {
	MatQuda(spinorOut->V(), spinor->V(), &inv_param);
      } else {
	dirac->M(*cudaSpinorOut, *cudaSpinor);
      }
      break;
    case 3:
    case 4:
      if (transfer) {
	MatDagMatQuda(spinorOut->V(), spinor->V(), &inv_param);
      } else {
	dirac->MdagM(*cudaSpinorOut, *cudaSpinor);
      }
      break;
    }
  }
    
  cudaEventRecord(end, 0);
  cudaEventSynchronize(end);
  float runTime;
  cudaEventElapsedTime(&runTime, start, end);
  cudaEventDestroy(start);
  cudaEventDestroy(end);

  double secs = runTime / 1000; //stopwatchReadSeconds();

  // check for errors
  cudaError_t stat = cudaGetLastError();
  if (stat != cudaSuccess)
    printfQuda("with ERROR: %s\n", cudaGetErrorString(stat));

  return secs;
}
Пример #21
0
void Timer::Init() {
  if (!initted()) {
    if (Caffe::mode() == Caffe::GPU) {
#ifndef CPU_ONLY
      CUDA_CHECK(cudaEventCreate(&start_gpu_));
      CUDA_CHECK(cudaEventCreate(&stop_gpu_));
#else
      NO_GPU;
#endif
    }
    initted_ = true;
  }
}
Пример #22
0
int main(int argc, char **argv)
{
    // device memory
    real *psi_d, *z_d;

    size_t fSize = sizeof(real);

    /* grid dimensions */
    unsigned int Nx = 513, Ny = 513;
    // omitting boundaries
    unsigned int nGridPoints = (Nx-2)*(Ny-2);

    cudaMalloc((void **) &psi_d, (nGridPoints+1)*fSize);
    cudaMalloc((void **) &z_d,   (nGridPoints+1)*fSize);

    /* initialization */
    fillArray(psi_d, 0.0, nGridPoints+1);
    fillArray(z_d,   1.0, nGridPoints+1);
    checkCudaError("Initialization of grid");

    // for timing purposes
    cudaEvent_t start, stop;
    cudaEventCreate(&start);
    cudaEventCreate(&stop);
    // start timer
    cudaEventRecord(start,0);

    /* Call the poisson solver, right hand side
     * is stored on the device in z_d (make sure the data
     * is copied from CPU to GPU!), result is stored in
     * psi_d (on the GPU/device).
     * Here NX-2 is the width of the grid's interior
     * (without the boundaries).
     */
    cuPoisson((Nx-2), psi_d, z_d);

    // stop timer
    cudaEventRecord(stop,0);
    cudaEventSynchronize(stop);
    float computationTime;
    cudaEventElapsedTime(&computationTime, start, stop);

    printf("Computation time was %.5f seconds.\n\n", computationTime/1000.0);

    printf("Writing result to disk...\n");
    // write result to file
    writeBinaryFile(Nx, Ny, psi_d, "data.dat");
    printf("done\n");

    return EXIT_SUCCESS;
}
Пример #23
0
double dslashCUDA(int niter) {
    
  cudaEvent_t start, end;
  cudaEventCreate(&start);
  cudaEventRecord(start, 0);
  cudaEventSynchronize(start);

  for (int i = 0; i < niter; i++) {
    switch (test_type) {
    case 0:
      parity = QUDA_EVEN_PARITY;
      if (transfer){
	//dslashQuda(spinorOdd, spinorEven, &inv_param, parity);
      } else {
	dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
      }	   
      break;
    case 1:
      parity = QUDA_ODD_PARITY;
      if (transfer){
	//MatPCQuda(spinorOdd, spinorEven, &inv_param);
      } else {
	dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
      }
      break;
    case 2:
      if (transfer){
	//MatQuda(spinorGPU, spinor, &inv_param);
      } else {
	dirac->M(*cudaSpinorOut, *cudaSpinor);
      }
    }
  }
    
  cudaEventCreate(&end);
  cudaEventRecord(end, 0);
  cudaEventSynchronize(end);
  float runTime;
  cudaEventElapsedTime(&runTime, start, end);
  cudaEventDestroy(start);
  cudaEventDestroy(end);

  double secs = runTime / 1000; //stopwatchReadSeconds();

  // check for errors
  cudaError_t stat = cudaGetLastError();
  if (stat != cudaSuccess)
    errorQuda("with ERROR: %s\n", cudaGetErrorString(stat));
    
  return secs;
}
Пример #24
0
void Timer::Init() {
  if (!initted()) {
    if (Caffe::mode() == Caffe::GPU
        && Caffe::GetDefaultDevice()->backend() == BACKEND_CUDA) {
#ifndef CPU_ONLY
#ifdef USE_CUDA
      CUDA_CHECK(cudaEventCreate(&start_gpu_));
      CUDA_CHECK(cudaEventCreate(&stop_gpu_));
#endif  // USE_CUDA
#else
      NO_GPU;
#endif
    }
    initted_ = true;
  }
}
Пример #25
0
static void _init_cuda_checkpoints(QSP_ARG_DECL  int n)
{
	//CUresult e;
	cudaError_t drv_err;
	int i;

	if( max_cuda_checkpoints > 0 ){
		sprintf(ERROR_STRING,
"init_cuda_checkpoints(%d):  already initialized with %d checpoints",
			n,max_cuda_checkpoints);
		warn(ERROR_STRING);
		return;
	}
	ckpt_tbl = (Cuda_Checkpoint *) getbuf( n * sizeof(*ckpt_tbl) );
	if( ckpt_tbl == NULL ) error1("failed to allocate checkpoint table");

	max_cuda_checkpoints = n;

	for(i=0;i<max_cuda_checkpoints;i++){
		drv_err=cudaEventCreate(&ckpt_tbl[i].ckpt_event);
		if( drv_err != cudaSuccess ){
			describe_cuda_driver_error2("init_cuda_checkpoints",
				"cudaEventCreate",drv_err);
			error1("failed to initialize checkpoint table");
		}
		ckpt_tbl[i].ckpt_tag=NULL;
	}
}
Пример #26
0
TEST(EventRecord, RecordAfterDestroy) {
    ::testing::FLAGS_gtest_death_test_style = "threadsafe";

    cudaError_t ret;
    cudaEvent_t event;
    cudaStream_t stream;

    ret = cudaEventCreate(&event);
    ASSERT_EQ(cudaSuccess, ret);

    ret = cudaEventDestroy(event);
    EXPECT_EQ(cudaSuccess, ret);

    ret = cudaStreamCreate(&stream);
    ASSERT_EQ(cudaSuccess, ret);

    #if CUDART_VERSION >= 5000
    ret = cudaEventRecord(event);
    EXPECT_EQ(cudaErrorUnknown, ret);
    #else
    EXPECT_EXIT(
        cudaEventRecord(event, stream),
        ::testing::KilledBySignal(SIGSEGV), "");
    #endif

    ret = cudaStreamDestroy(stream);
    EXPECT_EQ(cudaSuccess, ret);
}
Пример #27
0
        void _init(int numBodies, int numDevices, int p, int q,
                   bool bUsePBO, bool useHostMem, bool useCpu)
        {
            if (useCpu)
            {
                m_nbodyCpu = new BodySystemCPU<T>(numBodies);
                m_nbody = m_nbodyCpu;
                m_nbodyCuda = 0;
            }
            else
            {
                m_nbodyCuda = new BodySystemCUDA<T>(numBodies, numDevices, p, q, bUsePBO, useHostMem);
                m_nbody = m_nbodyCuda;
                m_nbodyCpu = 0;
            }

            // allocate host memory
            m_hPos = new T[numBodies*4];
            m_hVel = new T[numBodies*4];
            m_hColor = new float[numBodies*4];

            m_nbody->setSoftening(activeParams.m_softening);
            m_nbody->setDamping(activeParams.m_damping);

            if (useCpu)
            {
                sdkCreateTimer(&timer);
                sdkStartTimer(&timer);
            }
            else
            {
                checkCudaErrors(cudaEventCreate(&startEvent));
                checkCudaErrors(cudaEventCreate(&stopEvent));
                checkCudaErrors(cudaEventCreate(&hostMemSyncEvent));
            }

            if (!benchmark && !compareToCPU)
            {
                m_renderer = new ParticleRenderer;
                _resetRenderer();
            }

            sdkCreateTimer(&demoTimer);
            sdkStartTimer(&demoTimer);
        }
Пример #28
0
// ----------------------------------------
void trace_gpu_end( int dev, int s )
{
    int t = dev*glog.nstream + s;
    int id = glog.gpu_id[t];
    cudaEventCreate( &glog.gpu_end[t][id] );
    cudaEventRecord(  glog.gpu_end[t][id], glog.streams[t] );
    if ( id+1 < MAX_EVENTS ) {
        glog.gpu_id[t] = id+1;
    }
}
Пример #29
0
static void do_main()
{
  Matrix A;
  double *x, *b;

  printf("#############################################################################################\n");
  printf("**                           B I C G S T A B   S O L V E R                                 **\n");
  printf("#############################################################################################\n\n");

  cudaDeviceProp props;
  CUDA_SAFE_CALL( cudaGetDeviceProperties(&props, 0) );
  printf("** DEVICE      : %10s (ECC: %3s)                                                     **\n", props.name, props.ECCEnabled ? "ON" : "OFF");
  printf("\n#############################################################################################\n\n");

  Context ctx;
  ctx.read_from_file("config.txt");
  read_system_from_file(&ctx, "res/matrix.inp", &A, &x, &b);

  cudaEvent_t start, stop;
  CUDA_SAFE_CALL( cudaEventCreate(&start) );
  CUDA_SAFE_CALL( cudaEventCreate(&stop) );
  
  CUDA_SAFE_CALL( cudaEventRecord(start) );

  //Jacobi solver(0.6);
  Bicgstab solver;
  solver.setup(&ctx, &A);
  solver.solve(&ctx, &A, x, b);

  float elapsed_time = 0.0f;
  CUDA_SAFE_CALL( cudaEventRecord(stop) );
  CUDA_SAFE_CALL( cudaEventSynchronize(stop) );
  CUDA_SAFE_CALL( cudaEventElapsedTime(&elapsed_time, start, stop) );
  
  printf("** ELAPSED TIME: %9.3fms                                                               **\n", elapsed_time);
  printf("\n#############################################################################################\n\n");

  CUDA_SAFE_CALL( cudaEventDestroy(stop) );
  CUDA_SAFE_CALL( cudaEventDestroy(start) );

  CUDA_SAFE_CALL( cudaFree(x) );
  CUDA_SAFE_CALL( cudaFree(b) );
}
Пример #30
0
static int cutorch_Event_new(lua_State *L)
{
  cudaEvent_t *event = luaT_alloc(L, sizeof(cudaEvent_t));
  THCudaCheck(cudaEventCreate(event));

  THCState *state = cutorch_getstate(L);
  THCudaCheck(cudaEventRecord(*event, THCState_getCurrentStream(state)));
  luaT_pushudata(L, event, "cutorch.Event");

  return 1;
}