コード例 #1
0
ファイル: curand.cpp プロジェクト: stefan-k/julia-CuMatrix
void cuda_randn(void *ptr, int numel, bool dbl)
{
    static bool is_init = false;
    static curandGenerator_t stream;
    if (!is_init) {
        curandCreateGenerator(&stream, CURAND_RNG_PSEUDO_DEFAULT);
        is_init = true;
    }

    if (!dbl) {
        curandGenerateNormal(stream, (float *)ptr, numel, 0, 1);
    } else {
        curandGenerateNormalDouble(stream, (double *)ptr, numel, 0, 1);
    }

    return;
}
コード例 #2
0
/* GPUrandn */
void GPUrandn(const GPUtype &OUT) {

  curandStatus_t status;

  gpuTYPE_t type = gm->gputype.getType(OUT);


  gm->gmat->control.cacheClean();



  const void *gpuptr = gm->gputype.getGPUptr(OUT); // pointer to GPU memory
  int numel = gm->gputype.getNumel(OUT);           // number of elements
  int datasize = gm->gputype.getDataSize(OUT);     // bytes for each element


  gen = 0;
  // implement recovery procedure
  // try and if error try again

  // init curand
  if (curandCreateGenerator(&gen,CURAND_RNG_PSEUDO_DEFAULT)!=CURAND_STATUS_SUCCESS) {
    mexErrMsgTxt(ERROR_CURAND_INIT);
  }
  //if (curandCreateGenerator(&gen,CURAND_RNG_QUASI_DEFAULT)!=CURAND_STATUS_SUCCESS) {
  //  mexErrMsgTxt(ERROR_CURAND_INIT);
  //}

  // randn requires even numbers
  // we split the execution in 2 parts (overlap if not even)

  // seed
  seed++;
  if (curandSetPseudoRandomGeneratorSeed(gen, time(NULL)+seed)!=CURAND_STATUS_SUCCESS) {
    mexErrMsgTxt(ERROR_CURAND_SEED);
  }

  unsigned int n = 0;

  if (type == gpuFLOAT) {
    n = numel;
  } else if (type == gpuCFLOAT) {
    n = numel*2;
  } else if (type == gpuDOUBLE) {
    n = numel;
  } else if (type == gpuCDOUBLE) {
    n = numel*2;
  }

  unsigned int even = (n%2) == 0;

  unsigned int offset = 0;
  unsigned int mysize = 0;


  unsigned int iter = 1;
  if (!even) {
    n = n-1;
    iter = 2;
  }

  if (type == gpuFLOAT) {
    float mean = 0.0;
    float std = 1.0;
    status = curandGenerateNormal(gen, (float *) gpuptr, n, mean, std);

    if (!even) {
      float *devData;
      if((cudaMalloc((void **)&devData, 4 * sizeof(float))) != cudaSuccess) {
        status = CURAND_STATUS_LAUNCH_FAILURE;
      } else {
        status = curandGenerateNormal(gen, devData, 4, mean, std);
        if (status==CURAND_STATUS_SUCCESS) {
          void *dst = (void *) ((UINTPTR gpuptr)+n*datasize);
          if (cudaMemcpy(dst, (void *) devData, datasize, cudaMemcpyDeviceToDevice)!=cudaSuccess) {
            status = CURAND_STATUS_LAUNCH_FAILURE;
          }
        }
        if(cudaFree(devData) != cudaSuccess) {
          status = CURAND_STATUS_LAUNCH_FAILURE;
        }
      }
    }

  } else if (type == gpuCFLOAT) {
    float mean = 0.0;
    float std = 1.0;
    status = curandGenerateNormal(gen, (float *) gpuptr, n, mean, std);
  } else if (type == gpuDOUBLE) {
    double mean = 0.0;
    double std = 1.0;
    status = curandGenerateNormalDouble(gen, (double *) gpuptr, n, mean, std);
    if (!even) {
      double *devData;
      if((cudaMalloc((void **)&devData, 4 * sizeof(double))) != cudaSuccess) {
        status = CURAND_STATUS_LAUNCH_FAILURE;
      } else {
        status = curandGenerateNormalDouble(gen, devData, 4, mean, std);
        if (status==CURAND_STATUS_SUCCESS) {
          void *dst = (void *) ((UINTPTR gpuptr)+n*datasize);
          if (cudaMemcpy(dst, (void *) devData, datasize, cudaMemcpyDeviceToDevice)!=cudaSuccess) {
            status = CURAND_STATUS_LAUNCH_FAILURE;
          }
        }
        if(cudaFree(devData) != cudaSuccess) {
          status = CURAND_STATUS_LAUNCH_FAILURE;
        }
      }
    }


  } else if (type == gpuCDOUBLE) {
    double mean = 0.0;
    double std = 1.0;
    status = curandGenerateNormalDouble(gen, (double *) gpuptr, n, mean, std);

  }

  if (status!=CURAND_STATUS_SUCCESS) {
    curandDestroyGenerator(gen);
    mexErrMsgTxt(ERROR_CURAND_GEN);
  }


  // destroy
  if (curandDestroyGenerator(gen)!=CURAND_STATUS_SUCCESS) {
    mexErrMsgTxt(ERROR_CURAND_DESTROY);
  }


}