/* GPUrand */
void GPUrand(const GPUtype &OUT) {

  curandStatus_t status;

  gpuTYPE_t type = gm->gputype.getType(OUT);

  gm->gmat->control.cacheClean();


  const void *gpuptr = gm->gputype.getGPUptr(OUT); // pointer to GPU memory
  int numel = gm->gputype.getNumel(OUT);           // number of elements
  int datasize = gm->gputype.getDataSize(OUT);     // bytes for each element


  gen = 0;
  // implement recovery procedure
  // try and if error try again

  // init curand
  if (curandCreateGenerator(&gen,CURAND_RNG_PSEUDO_DEFAULT)!=CURAND_STATUS_SUCCESS) {
    mexErrMsgTxt(ERROR_CURAND_INIT);
  }
  //if (curandCreateGenerator(&gen,CURAND_RNG_QUASI_DEFAULT)!=CURAND_STATUS_SUCCESS) {
  //  mexErrMsgTxt(ERROR_CURAND_INIT);
  //}

  // seed
  seed++;
  if (curandSetPseudoRandomGeneratorSeed(gen, time(NULL)+seed)!=CURAND_STATUS_SUCCESS) {
    mexErrMsgTxt(ERROR_CURAND_SEED);
  }

  if (type == gpuFLOAT) {
    status = curandGenerateUniform(gen, (float *) gpuptr, numel);
  } else if (type == gpuCFLOAT) {
    status = curandGenerateUniform(gen, (float *) gpuptr, numel*2);
  } else if (type == gpuDOUBLE) {
    status = curandGenerateUniformDouble(gen, (double *) gpuptr, numel);
  } else if (type == gpuCDOUBLE) {
    status = curandGenerateUniformDouble(gen, (double *) gpuptr, numel*2);
  }

  if (status!=CURAND_STATUS_SUCCESS) {
    curandDestroyGenerator(gen);
    mexErrMsgTxt(ERROR_CURAND_GEN);
  }


  // destroy
  if (curandDestroyGenerator(gen)!=CURAND_STATUS_SUCCESS) {
    mexErrMsgTxt(ERROR_CURAND_DESTROY);
  }


}
Beispiel #2
0
void Caffe::SetDevice(const int device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == device_id) {
    return;
  }
  // The call to cudaSetDevice must come before any calls to Get, which
  // may perform initialization using the GPU.
  CUDA_CHECK(cudaSetDevice(device_id));
  if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
  if (Get().cusparse_descr_)CUSPARSE_CHECK(cusparseDestroyMatDescr(Get().cusparse_descr_));
  if (Get().cusparse_handle_)CUSPARSE_CHECK(cusparseDestroy(Get().cusparse_handle_));
  if (Get().curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
  }
  CUSPARSE_CHECK(cusparseCreate(&Get().cusparse_handle_));
  CUSPARSE_CHECK(cusparseCreateMatDescr(&Get().cusparse_descr_));
//  cusparseSetMatType(cusparse_descr_,CUSPARSE_MATRIX_TYPE_GENERAL);
//  cusparseSetMatIndexBase(cusparse_descr_,CUSPARSE_INDEX_BASE_ZERO);
  LOG(INFO)<<"set descr";
  CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
  CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
      CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
      cluster_seedgen()));
}
Beispiel #3
0
 curand_generator::
 ~curand_generator()
 {
     if (handle)
     {
         curandDestroyGenerator((curandGenerator_t)handle);
     }
 }
Beispiel #4
0
Caffe::~Caffe() {
  if (cusparse_descr_) CUSPARSE_CHECK(cusparseDestroyMatDescr(cusparse_descr_));
  if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
  if (cusparse_handle_) CUSPARSE_CHECK(cusparseDestroy(cusparse_handle_));
  if (curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(curand_generator_));
  }
}
Beispiel #5
0
void Caffe::set_random_seed(unsigned int seed)
{
  CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
  CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_, CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_, seed));
  VSL_CHECK(vslDeleteStream(&Get().vsl_stream_));
  VSL_CHECK(vslNewStream(&Get().vsl_stream_, VSL_BRNG_MT19937, seed));
}
Beispiel #6
0
Caffe::~Caffe()
{
  if (cublas_handle_)
    CUBLAS_CHECK(cublasDestroy(cublas_handle_));
  if (curand_generator_)
    CURAND_CHECK(curandDestroyGenerator(curand_generator_));
  if (vsl_stream_)
    VSL_CHECK(vslDeleteStream(&vsl_stream_));
}
Beispiel #7
0
WIE::Random::~Random()
{
  assert(generator);
  curandDestroyGenerator(generator);
  generator = NULL;

  assert(samples);
  cudaFree(samples);
  samples = NULL;
}
Beispiel #8
0
Caffe::~Caffe() {
  // Make sure all device contexts and
  // dependent memory blocks are freed properly
  device_contexts_.clear();
#ifdef USE_CUDA
  if (cublas_handle_)
    CUBLAS_CHECK(cublasDestroy(cublas_handle_));
  if (curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(curand_generator_));
  }
#endif  // USE_CUDA
}
Caffe::~Caffe() {
  for (vector<cublasHandle_t>& group_cublas_handles : cublas_handles_) {
    for (cublasHandle_t h : group_cublas_handles) {
      if (h) {
        CUBLAS_CHECK(cublasDestroy(h));
      }
    }
  }
  for_each(curand_generators_.begin(), curand_generators_.end(), [](curandGenerator_t h) {
    if (h) {
      CURAND_CHECK(curandDestroyGenerator(h));
    }
  });
}
Beispiel #10
0
void Dragon::set_device(const int device_id) {
	int current_device;
	CUDA_CHECK(cudaGetDevice(&current_device));
	if (current_device == device_id) return;
	// The call to cudaSetDevice must come before any calls to Get, which
	// may perform initialization using the GPU.

	//	reset Device must reset handle and generator???
	CUDA_CHECK(cudaSetDevice(device_id));
	if (Get().cublas_handle) cublasDestroy_v2(Get().cublas_handle);
	if (Get().curand_generator) curandDestroyGenerator(Get().curand_generator);
	cublasCreate_v2(&Get().cublas_handle);
	curandCreateGenerator(&Get().curand_generator, CURAND_RNG_PSEUDO_DEFAULT);
	curandSetPseudoRandomGeneratorSeed(Get().curand_generator, cluster_seedgen());
}
Beispiel #11
0
void Caffe::set_random_seed(const unsigned int seed) {
  // Curand seed
  // Yangqing's note: simply setting the generator seed does not seem to
  // work on the tesla K20s, so I wrote the ugly reset thing below.
  if (Get().curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(curand_generator()));
    CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
        CURAND_RNG_PSEUDO_DEFAULT));
    CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator(),
        seed));
  } else {
    LOG(ERROR) << "Curand not available. Skipping setting the curand seed.";
  }
  // RNG seed
  Get().random_generator_.reset(new RNG(seed));
}
Beispiel #12
0
void Caffe::SetDevice(const int device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == device_id) {
    return;
  }
  if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
  if (Get().curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
  }
  CUDA_CHECK(cudaSetDevice(device_id));
  CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
  CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
      CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
      cluster_seedgen()));
}
Beispiel #13
0
void Engine::SetDevice(const int device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == device_id) {
    return;
  }
  // The call to cudaSetDevice must come before any calls to Get, which
  // may perform initialization using the GPU.
  CUDA_CHECK(cudaSetDevice(device_id));
  if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
  if (Get().curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
  }
  CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
  CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
      CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
      cluster_seedgen()));
}
Beispiel #14
0
void Caffe::SetSlaveDevice(const int slave_device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == slave_device_id) {
    return;
  }
  if (Get().slave_cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().slave_cublas_handle_));
  if (Get().slave_curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(Get().slave_curand_generator_));
  }
  CUDA_CHECK(cudaSetDevice(slave_device_id));
  CUDA_CHECK(cudaStreamCreate (&Get().slave_cu_stream_));
  CUBLAS_CHECK(cublasCreate(&Get().slave_cublas_handle_));
  CUBLAS_CHECK(cublasSetStream(Get().slave_cublas_handle_, Get().slave_cu_stream_));
  CURAND_CHECK(curandCreateGenerator(&Get().slave_curand_generator_,
      CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().slave_curand_generator_,
      cluster_seedgen()));
  Get().slave_device_id_ = slave_device_id;
  CUDA_CHECK(cudaSetDevice(current_device));
  Caffe::set_gpu_mode(Caffe::MASTER_SLAVE);
}
Beispiel #15
0
void Caffe::SetDevice(const int device_id) {
  std::vector<int> devices;
  devices.push_back(device_id);
  Caffe::SetDevices(devices);

  Get().default_device_context_ = GetDeviceContext(device_id);

  if (Get().default_device_context_->backend() == Backend::BACKEND_CUDA) {
#ifdef USE_CUDA
    int current_device;
    CUDA_CHECK(cudaGetDevice(&current_device));
    if (current_device == device_id) {
      return;
    }
// The call to cudaSetDevice must come before any calls to Get, which
// may perform initialization using the GPU.
    CUDA_CHECK(cudaSetDevice(device_id));
    if (Get().cublas_handle_)
      CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
    if (Get().curand_generator_) {
      CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
    }
    CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
    CURAND_CHECK(
        curandCreateGenerator(&Get().curand_generator_,
                              CURAND_RNG_PSEUDO_DEFAULT));
    CURAND_CHECK(
        curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
                                           cluster_seedgen()));
#endif  // USE_CUDA
  } else {
#ifdef USE_GREENTEA
#ifdef USE_CLBLAS
    clblasSetup();
#endif  // USE_CLBLAS
#endif  // USE_GREENTEA
  }
}
Beispiel #16
0
///////////////////////////////////////////////////////////////////////////////
// Main program
///////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv)
{
    // Start logs
    shrQAStart(argc, argv);

    // initialize the GPU, either identified by --device
    // or by picking the device with highest flop rate.
    int devID = findCudaDevice(argc, (const char **)argv);

    // parsing the number of random numbers to generate
    int rand_n = DEFAULT_RAND_N;
    if( checkCmdLineFlag(argc, (const char**) argv, "count") )  
    {       
        rand_n = getCmdLineArgumentInt(argc, (const char**) argv, "count"); 
    }
    printf("Allocating data for %i samples...\n", rand_n);
     
    // parsing the seed
    int seed = DEFAULT_SEED;
    if( checkCmdLineFlag(argc, (const char**) argv, "seed") ) 
    {       
        seed = getCmdLineArgumentInt(argc, (const char**) argv, "seed"); 
    }
    printf("Seeding with %i ...\n", seed);
    

    float *d_Rand; 
    checkCudaErrors( cudaMalloc((void **)&d_Rand, rand_n * sizeof(float)) );
    
    curandGenerator_t prngGPU;
    checkCurandErrors( curandCreateGenerator(&prngGPU, CURAND_RNG_PSEUDO_MTGP32) ); 
    checkCurandErrors( curandSetPseudoRandomGeneratorSeed(prngGPU, seed) );

    curandGenerator_t prngCPU;
    checkCurandErrors( curandCreateGeneratorHost(&prngCPU, CURAND_RNG_PSEUDO_MTGP32) ); 
    checkCurandErrors( curandSetPseudoRandomGeneratorSeed(prngCPU, seed) );

    //
    // Example 1: Compare random numbers generated on GPU and CPU
    float *h_RandGPU  = (float *)malloc(rand_n * sizeof(float));

    printf("Generating random numbers on GPU...\n\n");
    checkCurandErrors( curandGenerateUniform(prngGPU, (float*) d_Rand, rand_n) );

    printf("\nReading back the results...\n");
    checkCudaErrors( cudaMemcpy(h_RandGPU, d_Rand, rand_n * sizeof(float), cudaMemcpyDeviceToHost) );

    
    float *h_RandCPU  = (float *)malloc(rand_n * sizeof(float));
     
    printf("Generating random numbers on CPU...\n\n");
    checkCurandErrors( curandGenerateUniform(prngCPU, (float*) h_RandCPU, rand_n) ); 
 
    printf("Comparing CPU/GPU random numbers...\n\n");
    float L1norm = compareResults(rand_n, h_RandGPU, h_RandCPU); 
    
    //
    // Example 2: Timing of random number generation on GPU
    const int numIterations = 10;
    int i;
    StopWatchInterface *hTimer;

    checkCudaErrors( cudaDeviceSynchronize() );
    sdkCreateTimer(&hTimer);
    sdkResetTimer(&hTimer);
    sdkStartTimer(&hTimer);

    for (i = 0; i < numIterations; i++)
    {
        checkCurandErrors( curandGenerateUniform(prngGPU, (float*) d_Rand, rand_n) );
    }

    checkCudaErrors( cudaDeviceSynchronize() );
    sdkStopTimer(&hTimer);

    double gpuTime = 1.0e-3 * sdkGetTimerValue(&hTimer)/(double)numIterations;

    printf("MersenneTwister, Throughput = %.4f GNumbers/s, Time = %.5f s, Size = %u Numbers\n", 
               1.0e-9 * rand_n / gpuTime, gpuTime, rand_n); 

    printf("Shutting down...\n");

    checkCurandErrors( curandDestroyGenerator(prngGPU) );
    checkCurandErrors( curandDestroyGenerator(prngCPU) );
    checkCudaErrors( cudaFree(d_Rand) );
    sdkDeleteTimer( &hTimer);
    free(h_RandGPU);
    free(h_RandCPU);

    cudaDeviceReset();	
    shrQAFinishExit(argc, (const char**)argv, (L1norm < 1e-6) ? QA_PASSED : QA_FAILED);
}
Beispiel #17
0
Engine::~Engine() {
  if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
  if (curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(curand_generator_));
  }
}
/* GPUrandn */
void GPUrandn(const GPUtype &OUT) {

  curandStatus_t status;

  gpuTYPE_t type = gm->gputype.getType(OUT);


  gm->gmat->control.cacheClean();



  const void *gpuptr = gm->gputype.getGPUptr(OUT); // pointer to GPU memory
  int numel = gm->gputype.getNumel(OUT);           // number of elements
  int datasize = gm->gputype.getDataSize(OUT);     // bytes for each element


  gen = 0;
  // implement recovery procedure
  // try and if error try again

  // init curand
  if (curandCreateGenerator(&gen,CURAND_RNG_PSEUDO_DEFAULT)!=CURAND_STATUS_SUCCESS) {
    mexErrMsgTxt(ERROR_CURAND_INIT);
  }
  //if (curandCreateGenerator(&gen,CURAND_RNG_QUASI_DEFAULT)!=CURAND_STATUS_SUCCESS) {
  //  mexErrMsgTxt(ERROR_CURAND_INIT);
  //}

  // randn requires even numbers
  // we split the execution in 2 parts (overlap if not even)

  // seed
  seed++;
  if (curandSetPseudoRandomGeneratorSeed(gen, time(NULL)+seed)!=CURAND_STATUS_SUCCESS) {
    mexErrMsgTxt(ERROR_CURAND_SEED);
  }

  unsigned int n = 0;

  if (type == gpuFLOAT) {
    n = numel;
  } else if (type == gpuCFLOAT) {
    n = numel*2;
  } else if (type == gpuDOUBLE) {
    n = numel;
  } else if (type == gpuCDOUBLE) {
    n = numel*2;
  }

  unsigned int even = (n%2) == 0;

  unsigned int offset = 0;
  unsigned int mysize = 0;


  unsigned int iter = 1;
  if (!even) {
    n = n-1;
    iter = 2;
  }

  if (type == gpuFLOAT) {
    float mean = 0.0;
    float std = 1.0;
    status = curandGenerateNormal(gen, (float *) gpuptr, n, mean, std);

    if (!even) {
      float *devData;
      if((cudaMalloc((void **)&devData, 4 * sizeof(float))) != cudaSuccess) {
        status = CURAND_STATUS_LAUNCH_FAILURE;
      } else {
        status = curandGenerateNormal(gen, devData, 4, mean, std);
        if (status==CURAND_STATUS_SUCCESS) {
          void *dst = (void *) ((UINTPTR gpuptr)+n*datasize);
          if (cudaMemcpy(dst, (void *) devData, datasize, cudaMemcpyDeviceToDevice)!=cudaSuccess) {
            status = CURAND_STATUS_LAUNCH_FAILURE;
          }
        }
        if(cudaFree(devData) != cudaSuccess) {
          status = CURAND_STATUS_LAUNCH_FAILURE;
        }
      }
    }

  } else if (type == gpuCFLOAT) {
    float mean = 0.0;
    float std = 1.0;
    status = curandGenerateNormal(gen, (float *) gpuptr, n, mean, std);
  } else if (type == gpuDOUBLE) {
    double mean = 0.0;
    double std = 1.0;
    status = curandGenerateNormalDouble(gen, (double *) gpuptr, n, mean, std);
    if (!even) {
      double *devData;
      if((cudaMalloc((void **)&devData, 4 * sizeof(double))) != cudaSuccess) {
        status = CURAND_STATUS_LAUNCH_FAILURE;
      } else {
        status = curandGenerateNormalDouble(gen, devData, 4, mean, std);
        if (status==CURAND_STATUS_SUCCESS) {
          void *dst = (void *) ((UINTPTR gpuptr)+n*datasize);
          if (cudaMemcpy(dst, (void *) devData, datasize, cudaMemcpyDeviceToDevice)!=cudaSuccess) {
            status = CURAND_STATUS_LAUNCH_FAILURE;
          }
        }
        if(cudaFree(devData) != cudaSuccess) {
          status = CURAND_STATUS_LAUNCH_FAILURE;
        }
      }
    }


  } else if (type == gpuCDOUBLE) {
    double mean = 0.0;
    double std = 1.0;
    status = curandGenerateNormalDouble(gen, (double *) gpuptr, n, mean, std);

  }

  if (status!=CURAND_STATUS_SUCCESS) {
    curandDestroyGenerator(gen);
    mexErrMsgTxt(ERROR_CURAND_GEN);
  }


  // destroy
  if (curandDestroyGenerator(gen)!=CURAND_STATUS_SUCCESS) {
    mexErrMsgTxt(ERROR_CURAND_DESTROY);
  }


}
Beispiel #19
0
Dragon::~Dragon(){
	if (cublas_handle) cublasDestroy_v2(cublas_handle);
	if (curand_generator) curandDestroyGenerator(curand_generator);
}
 prngenerator_cuda<TFloat>::~prngenerator_cuda() {
   CurandSafeCall(curandDestroyGenerator(_dev_bulk_prng_engine));
   CudaSafeCall(cudaFree(_dev_prng_engines));
 }
ModelWPAMGPU::~ModelWPAMGPU()
{
	curandDestroyGenerator(gen);
}