void Caffe::SetDevice(const int device_id) { int current_device; CUDA_CHECK(cudaGetDevice(¤t_device)); if (current_device == device_id) { return; } // The call to cudaSetDevice must come before any calls to Get, which // may perform initialization using the GPU. CUDA_CHECK(cudaSetDevice(device_id)); if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_)); if (Get().cusparse_descr_)CUSPARSE_CHECK(cusparseDestroyMatDescr(Get().cusparse_descr_)); if (Get().cusparse_handle_)CUSPARSE_CHECK(cusparseDestroy(Get().cusparse_handle_)); if (Get().curand_generator_) { CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_)); } CUSPARSE_CHECK(cusparseCreate(&Get().cusparse_handle_)); CUSPARSE_CHECK(cusparseCreateMatDescr(&Get().cusparse_descr_)); // cusparseSetMatType(cusparse_descr_,CUSPARSE_MATRIX_TYPE_GENERAL); // cusparseSetMatIndexBase(cusparse_descr_,CUSPARSE_INDEX_BASE_ZERO); LOG(INFO)<<"set descr"; CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_)); CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_, cluster_seedgen())); }
Caffe::Caffe() : cublas_handle_(NULL),cusparse_handle_(NULL),cusparse_descr_(NULL),curand_generator_(NULL),random_generator_(),mode_(Caffe::CPU), solver_count_(1), root_solver_(true){ // Try to create a cublas handler, and report an error if failed (but we will // keep the program running as one might just want to run CPU code). LOG(INFO)<<"caffe init."; if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) { LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available."; } //add cusparse handler if (cusparseCreate(&cusparse_handle_)!=CUSPARSE_STATUS_SUCCESS){ LOG(ERROR) << "cannot create Cusparse handle,Cusparse won't be available."; } if(cusparseCreateMatDescr(&cusparse_descr_)!=CUSPARSE_STATUS_SUCCESS){ LOG(ERROR) << "cannot create Cusparse descr,descr won't be available."; }else{ cusparseSetMatType(cusparse_descr_,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatIndexBase(cusparse_descr_,CUSPARSE_INDEX_BASE_ZERO); LOG(INFO)<<"init descr"; } // Try to create a curand handler. if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT) != CURAND_STATUS_SUCCESS || curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen()) != CURAND_STATUS_SUCCESS) { LOG(ERROR) << "Cannot create Curand generator. Curand won't be available."; } LOG(INFO)<<"caffe finish"; }
void WIE::Random::Init() { device.activate(); assertResult(curandCreateGenerator(&generator, rngMethod), "Could not create random number generator"); assertResult(curandSetPseudoRandomGeneratorSeed(generator, seed), "Could not set seed value"); samples = NULL; }
void Caffe::set_random_seed(unsigned int seed) { CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_)); CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_, seed)); VSL_CHECK(vslDeleteStream(&Get().vsl_stream_)); VSL_CHECK(vslNewStream(&Get().vsl_stream_, VSL_BRNG_MT19937, seed)); }
prngenerator_cuda<TFloat>::prngenerator_cuda(uint32_t num_engines) : prngenerator<TFloat>::prngenerator(num_engines) { CurandSafeCall(curandCreateGenerator(&(_dev_bulk_prng_engine), CURAND_RNG_PSEUDO_DEFAULT)); CudaSafeCall(cudaMalloc((void **) &(_dev_prng_engines), _NUM_ENGINES * sizeof(curandState))); }
Dragon::Dragon() : mode(Dragon::CPU), solver_count(1), root_solver(true), cublas_handle(NULL), curand_generator(NULL){ if (cublasCreate_v2(&cublas_handle) != CUBLAS_STATUS_SUCCESS) LOG(ERROR) << "Couldn't create cublas handle."; if (curandCreateGenerator(&curand_generator, CURAND_RNG_PSEUDO_DEFAULT) != CURAND_STATUS_SUCCESS || curandSetPseudoRandomGeneratorSeed(curand_generator, cluster_seedgen()) != CURAND_STATUS_SUCCESS) LOG(ERROR) << "Couldn't create curand generator."; }
Caffe::Caffe() : mode_(Caffe::CPU), phase_(Caffe::TRAIN), cublas_handle_(NULL), curand_generator_(NULL), vsl_stream_(NULL) { CUBLAS_CHECK(cublasCreate(&cublas_handle_)); //TODO: original caffe code has bug here! CURAND_CHECK(curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator_, 1701ULL)); VSL_CHECK(vslNewStream(&vsl_stream_, VSL_BRNG_MT19937, 1701)); }
/* GPUrand */ void GPUrand(const GPUtype &OUT) { curandStatus_t status; gpuTYPE_t type = gm->gputype.getType(OUT); gm->gmat->control.cacheClean(); const void *gpuptr = gm->gputype.getGPUptr(OUT); // pointer to GPU memory int numel = gm->gputype.getNumel(OUT); // number of elements int datasize = gm->gputype.getDataSize(OUT); // bytes for each element gen = 0; // implement recovery procedure // try and if error try again // init curand if (curandCreateGenerator(&gen,CURAND_RNG_PSEUDO_DEFAULT)!=CURAND_STATUS_SUCCESS) { mexErrMsgTxt(ERROR_CURAND_INIT); } //if (curandCreateGenerator(&gen,CURAND_RNG_QUASI_DEFAULT)!=CURAND_STATUS_SUCCESS) { // mexErrMsgTxt(ERROR_CURAND_INIT); //} // seed seed++; if (curandSetPseudoRandomGeneratorSeed(gen, time(NULL)+seed)!=CURAND_STATUS_SUCCESS) { mexErrMsgTxt(ERROR_CURAND_SEED); } if (type == gpuFLOAT) { status = curandGenerateUniform(gen, (float *) gpuptr, numel); } else if (type == gpuCFLOAT) { status = curandGenerateUniform(gen, (float *) gpuptr, numel*2); } else if (type == gpuDOUBLE) { status = curandGenerateUniformDouble(gen, (double *) gpuptr, numel); } else if (type == gpuCDOUBLE) { status = curandGenerateUniformDouble(gen, (double *) gpuptr, numel*2); } if (status!=CURAND_STATUS_SUCCESS) { curandDestroyGenerator(gen); mexErrMsgTxt(ERROR_CURAND_GEN); } // destroy if (curandDestroyGenerator(gen)!=CURAND_STATUS_SUCCESS) { mexErrMsgTxt(ERROR_CURAND_DESTROY); } }
curand_generator:: curand_generator( unsigned long long seed ) : handle(nullptr) { curandGenerator_t gen; CHECK_CURAND(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT)); handle = gen; CHECK_CURAND(curandSetPseudoRandomGeneratorSeed(gen, seed)); }
void cuda_random(float *x_gpu, size_t n) { static curandGenerator_t gen; static int init = 0; if(!init){ curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT); curandSetPseudoRandomGeneratorSeed(gen, time(0)); init = 1; } curandGenerateUniform(gen, x_gpu, n); check_error(cudaPeekAtLastError()); }
void cuda_random(float *x_gpu, size_t n) { static curandGenerator_t gen[16]; static int init[16] = { 0 }; int i = cuda_get_device(); if (!init[i]) { curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT); curandSetPseudoRandomGeneratorSeed(gen[i], time(0)); init[i] = 1; } curandGenerateUniform(gen[i], x_gpu, n); check_error(cudaPeekAtLastError()); }
ModelWPAMGPU::ModelWPAMGPU() { variance = 100e-6f; dimmeas = 3; T = 1.0f/30.0f; addF(); setUNoise(); curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT); curandSetPseudoRandomGeneratorSeed(gen, 1234ULL); }
curandGenerator_t Caffe::device_curand_generator() { curandGenerator_t& curand_generator = curand_generators_[current_device()]; if (!curand_generator) { // Try to create a curand handler. if (curandCreateGenerator(&curand_generator, CURAND_RNG_PSEUDO_DEFAULT) != CURAND_STATUS_SUCCESS || curandSetPseudoRandomGeneratorSeed(curand_generator, cluster_seedgen()) != CURAND_STATUS_SUCCESS) { LOG(ERROR) << "Cannot create Curand generator. Curand won't be available."; } curandSetStream(curand_generator, device_pstream()->get()); } return curand_generator; }
void Dragon::set_device(const int device_id) { int current_device; CUDA_CHECK(cudaGetDevice(¤t_device)); if (current_device == device_id) return; // The call to cudaSetDevice must come before any calls to Get, which // may perform initialization using the GPU. // reset Device must reset handle and generator??? CUDA_CHECK(cudaSetDevice(device_id)); if (Get().cublas_handle) cublasDestroy_v2(Get().cublas_handle); if (Get().curand_generator) curandDestroyGenerator(Get().curand_generator); cublasCreate_v2(&Get().cublas_handle); curandCreateGenerator(&Get().curand_generator, CURAND_RNG_PSEUDO_DEFAULT); curandSetPseudoRandomGeneratorSeed(Get().curand_generator, cluster_seedgen()); }
void Caffe::set_random_seed(const unsigned int seed) { // Curand seed // Yangqing's note: simply setting the generator seed does not seem to // work on the tesla K20s, so I wrote the ugly reset thing below. if (Get().curand_generator_) { CURAND_CHECK(curandDestroyGenerator(curand_generator())); CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator(), seed)); } else { LOG(ERROR) << "Curand not available. Skipping setting the curand seed."; } // RNG seed Get().random_generator_.reset(new RNG(seed)); }
Engine::Engine() : cublas_handle_(NULL), curand_generator_(NULL), random_generator_(), mode_(Engine::CPU) { // Try to create a cublas handler, and report an error if failed (but we will // keep the program running as one might just want to run CPU code). if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) { LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available."; } // Try to create a curand handler. if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT) != CURAND_STATUS_SUCCESS || curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen()) != CURAND_STATUS_SUCCESS) { LOG(ERROR) << "Cannot create Curand generator. Curand won't be available."; } }
void Caffe::SetDevice(const int device_id) { int current_device; CUDA_CHECK(cudaGetDevice(¤t_device)); if (current_device == device_id) { return; } if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_)); if (Get().curand_generator_) { CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_)); } CUDA_CHECK(cudaSetDevice(device_id)); CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_)); CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_, cluster_seedgen())); }
void cuda_rand(void *ptr, int numel, bool dbl) { static bool is_init = false; static curandGenerator_t stream; if (!is_init) { curandCreateGenerator(&stream, CURAND_RNG_PSEUDO_DEFAULT); is_init = true; } if (!dbl) { curandGenerateUniform(stream, (float *)ptr, numel); } else { curandGenerateUniformDouble(stream, (double *)ptr, numel); } return; }
void Engine::SetDevice(const int device_id) { int current_device; CUDA_CHECK(cudaGetDevice(¤t_device)); if (current_device == device_id) { return; } // The call to cudaSetDevice must come before any calls to Get, which // may perform initialization using the GPU. CUDA_CHECK(cudaSetDevice(device_id)); if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_)); if (Get().curand_generator_) { CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_)); } CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_)); CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_, cluster_seedgen())); }
void Caffe::SetSlaveDevice(const int slave_device_id) { int current_device; CUDA_CHECK(cudaGetDevice(¤t_device)); if (current_device == slave_device_id) { return; } if (Get().slave_cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().slave_cublas_handle_)); if (Get().slave_curand_generator_) { CURAND_CHECK(curandDestroyGenerator(Get().slave_curand_generator_)); } CUDA_CHECK(cudaSetDevice(slave_device_id)); CUDA_CHECK(cudaStreamCreate (&Get().slave_cu_stream_)); CUBLAS_CHECK(cublasCreate(&Get().slave_cublas_handle_)); CUBLAS_CHECK(cublasSetStream(Get().slave_cublas_handle_, Get().slave_cu_stream_)); CURAND_CHECK(curandCreateGenerator(&Get().slave_curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().slave_curand_generator_, cluster_seedgen())); Get().slave_device_id_ = slave_device_id; CUDA_CHECK(cudaSetDevice(current_device)); Caffe::set_gpu_mode(Caffe::MASTER_SLAVE); }
void Caffe::SetDevice(const int device_id) { std::vector<int> devices; devices.push_back(device_id); Caffe::SetDevices(devices); Get().default_device_context_ = GetDeviceContext(device_id); if (Get().default_device_context_->backend() == Backend::BACKEND_CUDA) { #ifdef USE_CUDA int current_device; CUDA_CHECK(cudaGetDevice(¤t_device)); if (current_device == device_id) { return; } // The call to cudaSetDevice must come before any calls to Get, which // may perform initialization using the GPU. CUDA_CHECK(cudaSetDevice(device_id)); if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_)); if (Get().curand_generator_) { CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_)); } CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_)); CURAND_CHECK( curandCreateGenerator(&Get().curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); CURAND_CHECK( curandSetPseudoRandomGeneratorSeed(Get().curand_generator_, cluster_seedgen())); #endif // USE_CUDA } else { #ifdef USE_GREENTEA #ifdef USE_CLBLAS clblasSetup(); #endif // USE_CLBLAS #endif // USE_GREENTEA } }
Caffe::Caffe() : mode_(Caffe::CPU), phase_(Caffe::TRAIN), cublas_handle_(NULL), curand_generator_(NULL), random_generator_(), slave_cublas_handle_(NULL), slave_curand_generator_(NULL), master_device_id_(0), slave_device_id_(-1), cu_stream_(NULL),slave_cu_stream_(NULL), current_cu_stream_(NULL){ // Try to create a cublas handler, and report an error if failed (but we will // keep the program running as one might just want to run CPU code). if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) { LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available."; } // Try to create a curand handler. if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT) != CURAND_STATUS_SUCCESS || curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen()) != CURAND_STATUS_SUCCESS) { LOG(ERROR) << "Cannot create Curand generator. Curand won't be available."; } }
Caffe::Caffe() : #ifdef USE_CUDA cublas_handle_(NULL), curand_generator_(NULL), #endif // USE_CUDA random_generator_(), mode_(Caffe::CPU), default_device_context_(nullptr) { // Try to create a cublas handler, and report an error if failed (but we will // keep the program running as one might just want to run CPU code). #ifdef USE_CUDA if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) { LOG(ERROR)<< "Cannot create Cublas handle. Cublas won't be available."; } // Try to create a curand handler. if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT) != CURAND_STATUS_SUCCESS || curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen()) != CURAND_STATUS_SUCCESS) { LOG(ERROR) << "Cannot create Curand generator. Curand won't be available."; } #endif // USE_CUDA }
/////////////////////////////////////////////////////////////////////////////// // Main program /////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { // Start logs shrQAStart(argc, argv); // initialize the GPU, either identified by --device // or by picking the device with highest flop rate. int devID = findCudaDevice(argc, (const char **)argv); // parsing the number of random numbers to generate int rand_n = DEFAULT_RAND_N; if( checkCmdLineFlag(argc, (const char**) argv, "count") ) { rand_n = getCmdLineArgumentInt(argc, (const char**) argv, "count"); } printf("Allocating data for %i samples...\n", rand_n); // parsing the seed int seed = DEFAULT_SEED; if( checkCmdLineFlag(argc, (const char**) argv, "seed") ) { seed = getCmdLineArgumentInt(argc, (const char**) argv, "seed"); } printf("Seeding with %i ...\n", seed); float *d_Rand; checkCudaErrors( cudaMalloc((void **)&d_Rand, rand_n * sizeof(float)) ); curandGenerator_t prngGPU; checkCurandErrors( curandCreateGenerator(&prngGPU, CURAND_RNG_PSEUDO_MTGP32) ); checkCurandErrors( curandSetPseudoRandomGeneratorSeed(prngGPU, seed) ); curandGenerator_t prngCPU; checkCurandErrors( curandCreateGeneratorHost(&prngCPU, CURAND_RNG_PSEUDO_MTGP32) ); checkCurandErrors( curandSetPseudoRandomGeneratorSeed(prngCPU, seed) ); // // Example 1: Compare random numbers generated on GPU and CPU float *h_RandGPU = (float *)malloc(rand_n * sizeof(float)); printf("Generating random numbers on GPU...\n\n"); checkCurandErrors( curandGenerateUniform(prngGPU, (float*) d_Rand, rand_n) ); printf("\nReading back the results...\n"); checkCudaErrors( cudaMemcpy(h_RandGPU, d_Rand, rand_n * sizeof(float), cudaMemcpyDeviceToHost) ); float *h_RandCPU = (float *)malloc(rand_n * sizeof(float)); printf("Generating random numbers on CPU...\n\n"); checkCurandErrors( curandGenerateUniform(prngCPU, (float*) h_RandCPU, rand_n) ); printf("Comparing CPU/GPU random numbers...\n\n"); float L1norm = compareResults(rand_n, h_RandGPU, h_RandCPU); // // Example 2: Timing of random number generation on GPU const int numIterations = 10; int i; StopWatchInterface *hTimer; checkCudaErrors( cudaDeviceSynchronize() ); sdkCreateTimer(&hTimer); sdkResetTimer(&hTimer); sdkStartTimer(&hTimer); for (i = 0; i < numIterations; i++) { checkCurandErrors( curandGenerateUniform(prngGPU, (float*) d_Rand, rand_n) ); } checkCudaErrors( cudaDeviceSynchronize() ); sdkStopTimer(&hTimer); double gpuTime = 1.0e-3 * sdkGetTimerValue(&hTimer)/(double)numIterations; printf("MersenneTwister, Throughput = %.4f GNumbers/s, Time = %.5f s, Size = %u Numbers\n", 1.0e-9 * rand_n / gpuTime, gpuTime, rand_n); printf("Shutting down...\n"); checkCurandErrors( curandDestroyGenerator(prngGPU) ); checkCurandErrors( curandDestroyGenerator(prngCPU) ); checkCudaErrors( cudaFree(d_Rand) ); sdkDeleteTimer( &hTimer); free(h_RandGPU); free(h_RandCPU); cudaDeviceReset(); shrQAFinishExit(argc, (const char**)argv, (L1norm < 1e-6) ? QA_PASSED : QA_FAILED); }
/* GPUrandn */ void GPUrandn(const GPUtype &OUT) { curandStatus_t status; gpuTYPE_t type = gm->gputype.getType(OUT); gm->gmat->control.cacheClean(); const void *gpuptr = gm->gputype.getGPUptr(OUT); // pointer to GPU memory int numel = gm->gputype.getNumel(OUT); // number of elements int datasize = gm->gputype.getDataSize(OUT); // bytes for each element gen = 0; // implement recovery procedure // try and if error try again // init curand if (curandCreateGenerator(&gen,CURAND_RNG_PSEUDO_DEFAULT)!=CURAND_STATUS_SUCCESS) { mexErrMsgTxt(ERROR_CURAND_INIT); } //if (curandCreateGenerator(&gen,CURAND_RNG_QUASI_DEFAULT)!=CURAND_STATUS_SUCCESS) { // mexErrMsgTxt(ERROR_CURAND_INIT); //} // randn requires even numbers // we split the execution in 2 parts (overlap if not even) // seed seed++; if (curandSetPseudoRandomGeneratorSeed(gen, time(NULL)+seed)!=CURAND_STATUS_SUCCESS) { mexErrMsgTxt(ERROR_CURAND_SEED); } unsigned int n = 0; if (type == gpuFLOAT) { n = numel; } else if (type == gpuCFLOAT) { n = numel*2; } else if (type == gpuDOUBLE) { n = numel; } else if (type == gpuCDOUBLE) { n = numel*2; } unsigned int even = (n%2) == 0; unsigned int offset = 0; unsigned int mysize = 0; unsigned int iter = 1; if (!even) { n = n-1; iter = 2; } if (type == gpuFLOAT) { float mean = 0.0; float std = 1.0; status = curandGenerateNormal(gen, (float *) gpuptr, n, mean, std); if (!even) { float *devData; if((cudaMalloc((void **)&devData, 4 * sizeof(float))) != cudaSuccess) { status = CURAND_STATUS_LAUNCH_FAILURE; } else { status = curandGenerateNormal(gen, devData, 4, mean, std); if (status==CURAND_STATUS_SUCCESS) { void *dst = (void *) ((UINTPTR gpuptr)+n*datasize); if (cudaMemcpy(dst, (void *) devData, datasize, cudaMemcpyDeviceToDevice)!=cudaSuccess) { status = CURAND_STATUS_LAUNCH_FAILURE; } } if(cudaFree(devData) != cudaSuccess) { status = CURAND_STATUS_LAUNCH_FAILURE; } } } } else if (type == gpuCFLOAT) { float mean = 0.0; float std = 1.0; status = curandGenerateNormal(gen, (float *) gpuptr, n, mean, std); } else if (type == gpuDOUBLE) { double mean = 0.0; double std = 1.0; status = curandGenerateNormalDouble(gen, (double *) gpuptr, n, mean, std); if (!even) { double *devData; if((cudaMalloc((void **)&devData, 4 * sizeof(double))) != cudaSuccess) { status = CURAND_STATUS_LAUNCH_FAILURE; } else { status = curandGenerateNormalDouble(gen, devData, 4, mean, std); if (status==CURAND_STATUS_SUCCESS) { void *dst = (void *) ((UINTPTR gpuptr)+n*datasize); if (cudaMemcpy(dst, (void *) devData, datasize, cudaMemcpyDeviceToDevice)!=cudaSuccess) { status = CURAND_STATUS_LAUNCH_FAILURE; } } if(cudaFree(devData) != cudaSuccess) { status = CURAND_STATUS_LAUNCH_FAILURE; } } } } else if (type == gpuCDOUBLE) { double mean = 0.0; double std = 1.0; status = curandGenerateNormalDouble(gen, (double *) gpuptr, n, mean, std); } if (status!=CURAND_STATUS_SUCCESS) { curandDestroyGenerator(gen); mexErrMsgTxt(ERROR_CURAND_GEN); } // destroy if (curandDestroyGenerator(gen)!=CURAND_STATUS_SUCCESS) { mexErrMsgTxt(ERROR_CURAND_DESTROY); } }