Exemple #1
0
void drunif(RngEngine* rng, double* buffer, BlasInt* n, BlasInt* isAligned,
			RngErrorType* info) {
	UNUSED(isAligned);
	*info = (*n > 0)?(vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, rng->m_stream,
								*n, buffer, 0.0, 1.0))
					:(0);
}
Exemple #2
0
void MainWindow::randomizeSigma_1()
{   double x1;
    VSLStreamStatePtr stream;
    vslNewStream( &stream, VSL_BRNG_MCG31, this->seed );
    vdRngUniform( 0, stream, model->nI(), &model->Sigma[0], 0.0, 1.0 );
    vslDeleteStream( &stream );
    for (int i=0; i < model->nI(); ++i)
    {
        std::pair<int,int> ends = model->ends(i);
        int from = ends.first;
        int to   = ends.second;
        std::pair<double,double> xy0 = model->xy(from);
        std::pair<double,double> xy1 = model->xy(to);
 
        if (xy0.first==0 && xy1.first==0
            || xy0.first==0 && xy1.first==1
            || xy0.first==1 && xy1.first==0
            || xy0.first==1 && xy1.first==1
            || xy0.first==model->xmax() && xy1.first==model->xmax() 
            || xy0.first==model->xmax()-1 && xy1.first==model->xmax()
            || xy0.first==model->xmax()   && xy1.first==model->xmax()-1  
            || xy0.first==model->xmax()-1 && xy1.first==model->xmax()-1
            )
        {
            model->Sigma[i]=this->sigmaU;
        }
        else  {x1=model->Sigma[i];
            if (x1 < this->fraction) model->Sigma[i] = CUTOFF_SIGMA;
            else model->Sigma[i] =1;
        }
 
        }     
    
}
Exemple #3
0
void drnorm(RngEngine* rng, double* buffer, BlasInt* n, BlasInt* isAligned,
			RngErrorType* info) {
	UNUSED(isAligned);
#if defined(USE_RNG_BOX_MULLER)
	if (*n == 1) {
		*info = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, rng->m_stream,
							1, buffer, 0.0, 1.0);
	} else if (*n > 1) {
		*info = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, rng->m_stream,
							*n, buffer, 0.0, 1.0);
	} else {
		*info = 0;
	}
#elif defined(USE_RNG_MARSAGLIA)
	if (*n > 0) {
		for (BlasInt iter = 0; iter < *n; ++iter) {
			double x[2];
			double r;
			do {
				vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, rng->m_stream, 2, x,
							0.0, 1.0);
				x[0] = 2.0 * x[0] - 1.0;
				x[1] = 2.0 * x[1] - 1.0;
				r = x[0] * x[0] + x[1] * x[1];
			} while (r >= 1 || r == 0);
			buffer[iter] = x[0] * sqrt(- 2.0 * log(r) / r);
		}
	} else {
		*info = 0;
	}
#endif
}
double pnlRand(double left, double right)
{
    //TODO: check argument range
    double retVal = 0;
    vdRngUniform( VSL_METHOD_DUNIFORM_STD, g_RNG.m_vslStream, 1, &retVal, left, right );
    return retVal;           
} 
JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vdRngUniform
(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jdoubleArray j_r, jdouble a, jdouble b) {

    VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream);
    jdouble * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE);

    jint retval = vdRngUniform(method, stream, n, r, a, b);

    (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0);
    return retval;
}
Exemple #6
0
int mkl_rand(void)
{
    if (__UTIL_sRNG.ind < RNG_BLOCK_SIZE -1)
    {
        __UTIL_sRNG.ind++;
    }
    else
    {
        vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD,__UTIL_sRNG.stream,RNG_BLOCK_SIZE,__UTIL_sRNG.dbuf,0,1);
        __UTIL_sRNG.ind = 0;
    }
}
Exemple #7
0
void MainWindow::randRcr()
{   
    int i_Rcr = model->index_of_Rcr();
    elementCr = fabs((model->I[ i_Rcr ]));
    this->sigmaMin=model->Sigma[i_Rcr ];
    VSLStreamStatePtr stream;
    vslNewStream( &stream, VSL_BRNG_MCG31, this->seed );
    vdRngUniform( 0, stream, model->nI(), &model->Sigma[0], 0.0, 1.0 );
    vslDeleteStream( &stream );
    double x1=model->Sigma[i_Rcr];
    this->randc=x1;
    this->rand=x1;
}
Exemple #8
0
int main(){
  unsigned int iter=200000000; 
  int i,j;
  double x, y;
  double dUnderCurve=0.0;
  double pi=0.0;


  VSLStreamStatePtr stream;  //You need one stream for each thread

  double end_time,start_time;

  start_time=clock();
#pragma omp parallel private(stream,x,y,i) reduction(+:dUnderCurve)
  {
   double r[BLOCK_SIZE*2];  //Careful!!! 
  	//you need a private copy of whole array for each thread  

    vslNewStream( &stream, BRNG,  (int)clock() );   
#pragma omp for 
    for(j=0;j<iter/BLOCK_SIZE;j++) {
                              
        vdRngUniform( METHOD, stream, BLOCK_SIZE*2, r, 0.0, 1.0 );   
		//Create random numbers into array r
        
        for (i=0;i<BLOCK_SIZE;i++) {
            x=r[i];                     //X Coordinate
            y=r[i+BLOCK_SIZE];          //Y Coordinate
            if (x*x + y*y <= 1.0) {	//is distance from Origin under Curve 
                dUnderCurve++;
            }
        } 
    }
    
    vslDeleteStream( &stream );
  }
  pi = dUnderCurve / (double) iter * 4 ;

  end_time=clock();

  printf ("pi = %10.9f\n", pi);
  printf ("Seconds = %10.9f\n",(double)((end_time-start_time)/1000.0));
  
  return 0;
}
int main(int argc, char **argv) {
  long i;
  long Ncirc = 0;
  double pi, xy[2];
  double r = 1.0; // radius of circle
  double r2 = r*r;

  int rank, size, manager = 0;
  MPI_Status status;
  long my_trials, temp;
  int j;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  VSLStreamStatePtr stream;

  my_trials = num_trials/size;
  if (num_trials%(long)size > (long)rank) my_trials++;

  vslNewStream(&stream, VSL_BRNG_MT2203+rank, 1);

  for (i = 0; i < my_trials; i++) {
    vdRngUniform(VSL_RNG_METHOD_UNIFORMBITS_STD, stream, 2, xy, 0.0, 1.0);
    if ((xy[0]*xy[0] + xy[1]*xy[1]) <= r2)
      Ncirc++;
  }

  if (rank == manager) {
    for (j = 1; j < size; j++) {
      MPI_Recv(&temp, 1, MPI_LONG, j, j, MPI_COMM_WORLD, &status);
      Ncirc += temp;
    }
    pi = 4.0 * ((double)Ncirc)/((double)num_trials);
    printf("\n \t Computing pi using MPI and MKL for random number generator: \n");
    printf("\t For %ld trials, pi = %f\n", num_trials, pi);
    printf("\n");
  } else {
    MPI_Send(&Ncirc, 1, MPI_LONG, manager, rank, MPI_COMM_WORLD);
  }
  MPI_Finalize();
  return 0;
}
void pnlRand(int numElem, double* vec, double left, double right)
{
   vdRngUniform( VSL_METHOD_DUNIFORM_STD, g_RNG.m_vslStream, numElem, vec, left, right );
}
double GeneticAlgorithm2::randomDouble(double min, double max){				
	double randNum;
	 vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, 1, &randNum, min, max);
	return randNum;
}
Exemple #12
0
double integrateVegas(double * limits , int threads, double * params){
    //Setting the number of threads
     omp_set_num_threads(threads);
    //How many iterations to perform
    int iterations =15;
    //Which iteration to start sampling more
    int switchIteration = 7;
    //How many points to sample in total
    int samples = 100000;
    //How many points to sample after grid set up
    int samplesAfter = 5000000;
    //How many intervals for each dimension
    int intervals = 10;
    //How many subIntervals
    int subIntervals = 1000;
    //Parameter alpha controls convergence rate
    double alpha = 0.5;
    int seed = 40847516;
    //double to store volume integrated over
    double volume = 1.0;
    for(int i=0; i<dimensions; i++){
        volume*= (limits[(2*i)+1]-limits[2*i]);
    };
    //Number of boxes
    int numBoxes = intervals;
    for(int i=1; i<dimensions; i++){
        numBoxes *= intervals;
    }
    //CHANGE SEED WHEN YOU KNOW IT WORKS
    //Setting up one random number stream for each thread
    VSLStreamStatePtr * streams; 
    streams = ( VSLStreamStatePtr * )_mm_malloc(sizeof(VSLStreamStatePtr)*threads,64);
    for(int i=0; i<threads; i++){
        vslNewStream(&streams[i], VSL_BRNG_MT2203+i,seed);
    }
    //Arrays to store integral and uncertainty for each iteration
    double * integral = (double *)_mm_malloc(sizeof(double)*iterations,64);
    double * sigmas = (double *)_mm_malloc(sizeof(double)*iterations,64);
    for(int i=0; i<iterations; i++){
        integral[i] = 0;
        sigmas[i] = 0;
    }
    //Points per each box
    int pointsPerBox = samples/numBoxes;
    //Array storing the box limits (stores x limits then y limits and so on) intervals+1 to store all limits
    double * boxLimits = (double *)_mm_malloc(sizeof(double)*(intervals+1)*dimensions,64);
    //Array to store average function values for each box
    double * heights = (double *)_mm_malloc(sizeof(double)*dimensions*intervals,64);
    //Array storing values of m
    double * mValues = (double *)_mm_malloc(sizeof(double)*intervals,64);
    //Array storing widths of sub boxes
    double * subWidths = (double *) _mm_malloc(sizeof(double)*intervals,64);
    //Getting initial limits for the boxes 
    for(int i=0; i<dimensions; i++){
        double boxWidth = (limits[(2*i)+1]-limits[2*i])/intervals;
        //0th iteration
        boxLimits[i*(intervals+1)] = limits[2*i];
        for(int j=1; j<=intervals; j++){
            int x = (i*(intervals+1))+j;
            boxLimits[x] =  boxLimits[x-1]+boxWidth;
        }
    };
    //Pointer to store random generated  numbers
      double  randomNums[dimensions]__attribute__((aligned(64)));
      int  binNums[dimensions]__attribute__((aligned(64)));
    //Double to store p(x) denominator for monte carlo
    double prob;
    //Values to store integral and sigma for each thread so they can be reduced in OpenMp
    double integralTemp;
    double sigmaTemp;
    double heightsTemp[dimensions*intervals]__attribute__((aligned(64)));
    int threadNum;
#pragma omp parallel  default(none) private(sigmaTemp,integralTemp,binNums,randomNums,prob,threadNum,heightsTemp) shared(iterations,subIntervals,alpha,mValues,subWidths,streams,samples,boxLimits,intervals, integral, sigmas, heights, threads, volume, samplesAfter, switchIteration, params) 
    {
        for(int iter=0; iter<iterations; iter++){ 
            //Stepping up to more samples when grid calibrated
            if(iter==switchIteration){
                samples = samplesAfter;
            }
            //Performing  iterations
            for(int i=0; i<dimensions*intervals; i++){
                heightsTemp[i] = 0;
            }

            integralTemp = 0; 
            sigmaTemp = 0;
            //Getting chunk sizes for each thread
            threadNum = omp_get_thread_num();
            int seg = ceil((double)samples/threads);
            int lower = seg*threadNum;
            int upper = seg*(threadNum+1);
            if(upper > samples){
                upper = samples;
            };
            //Spliting monte carlo up
            for(int i=0; i<seg; i++){
                prob = 1;
                //Randomly choosing bins to sample from
                viRngUniform(VSL_RNG_METHOD_UNIFORM_STD,streams[threadNum],dimensions,binNums,0,intervals);
                vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD,streams[threadNum],dimensions,randomNums,0,1);
                //Getting samples from bins
                for(int j=0; j<dimensions; j++){
                    int x = ((intervals+1)*j)+binNums[j];
                    randomNums[j] *= (boxLimits[x+1]-boxLimits[x]);
                    randomNums[j] += boxLimits[x];
                    prob *= 1.0/(intervals*(boxLimits[x+1]-boxLimits[x]));
                }
                //Performing evaluation of function and adding it to the total integral
                double eval = evaluate(randomNums,params);
                integralTemp += eval/prob;
                sigmaTemp += (eval*eval)/(prob*prob);
                //Calculating the values of f for bin resising
                for(int j=0; j<dimensions; j++){
                    int x = binNums[j]+(j*intervals);
                    //May need to initialize heights
                    // #pragma omp atomic
                    // printf("heightsTemp before=%f\n",heightsTemp[x]);
                    heightsTemp[x] += eval;
                    // printf("heightsTemp=%f x=%d eval=%f thread=%d\n",heightsTemp[x],x,eval,omp_get_thread_num());
                }

            } 
#pragma omp critical
            {
                integral[iter] += integralTemp;
                sigmas[iter] += sigmaTemp;
                for(int k=0; k<dimensions*intervals; k++){
                    // printf("heightTemp[k]=%f k=%d\n",heightsTemp[k],k);
                    heights[k] += heightsTemp[k];
                }
            }
#pragma omp barrier
#pragma omp single
            {
                //Calculating the values of sigma and the integral
                integral[iter] /= samples;
                sigmas[iter] /= samples;
                sigmas[iter] -= (integral[iter]*integral[iter]);
                sigmas[iter] /= (samples-1);
                 // printf("integral=%f\n",integral[iter]);

                //Readjusting the box widths based on the heights
                //Creating array to store values of m and their sum 
                int totalM=0; 
                //Doing for each dimension seperately
                for(int i=0; i<dimensions; i++){
                    double sum = 0;
                    //Getting the sum of f*delta x
                    for(int j=0; j<intervals; j++){
                        int x = (i*(intervals))+j ;
                        //May be bug with these indicies
                        sum += heights[x]*(boxLimits[x+1+i]-boxLimits[x+i]);
                    }
                    //Performing the rescaling 
                    for(int j=0; j<intervals; j++){
                        int x = (i*(intervals))+j;
                        double value = heights[x]*(boxLimits[x+1+i]-boxLimits[x+i]);
                        mValues[j] = ceil(subIntervals*pow((value-1)*(1.0/log(value)),alpha));
                        subWidths[j] = (boxLimits[x+1+i]-boxLimits[x+i])/mValues[j];
                        totalM += mValues[j];
                    }
                    int mPerInterval = totalM/intervals;
                    int mValueIterator = 0;
                    //Adjusting the intervals going from 1 to less than intervals to keep the edges at the limits
                    for(int j=1; j<intervals; j++){
                        double width = 0;
                        for(int y=0; y<mPerInterval; y++){
                            width += subWidths[mValueIterator];
                            mValues[mValueIterator]--;
                            if(mValues[mValueIterator]==0){
                                mValueIterator++;
                            }
                        }
                        //NEED TO SET BOX LIMITS NOW  
                        int x = j+(i*(intervals+1));
                        boxLimits[x] = boxLimits[x-1]+width;    
                    }
                    //Setting mvalues etc. (reseting memory allocated before the dimensions loop to 0)
                    totalM = 0;
                    for(int k=0; k<intervals; k++){
                        subWidths[k] = 0;
                        mValues[k] = 0;

                    }
                }

                //Setting heights to zero for next iteration
                for(int i=0; i<intervals*dimensions; i++ ){
                    heights[i] = 0;
                }
            }

            
        }
    }
    //All iterations done 
    //Free stuff
    
    _mm_free(subWidths);
    _mm_free(mValues);
    _mm_free(boxLimits);
    _mm_free(streams);
    _mm_free(heights);
   
    //Calculating the final value of the integral
    double denom = 0;
    double numerator =0;
    for(int i=7; i<iterations; i++){
        numerator += integral[i]*((integral[i]*integral[i])/(sigmas[i]*sigmas[i]));
        denom += ((integral[i]*integral[i])/(sigmas[i]*sigmas[i]));
         // printf("integral=%f sigma=%f\n",integral[i],sigmas[i]);
    }
    double output  = numerator/denom;
    //Calculating value of x^2 to check if result can be trusted
    double chisq = 0;
    for(int i=0; i<iterations; i++){
       chisq += (((integral[i]-output)*(integral[i]-output))/(sigmas[i]*sigmas[i]));
    }
    if(chisq>iterations){
        printf("Chisq value is %f, it should be not much greater than %d (iterations-1) Integral:%f Analytical Value=%f\n",chisq,iterations-1,output,normValue(params));
    }
      _mm_free(integral);
      _mm_free(sigmas);
    return output;
    
}
Exemple #13
0
int main(int argc, char** argv){
    double* A;
    double* B;
    double* C;
    
    double alpha = 1.0;
    double beta = 0.0;
    int i;        
    struct timeval t1,t2, t3, t4;
    
    const int SEED = 1;
    const int METHOD = 0;
    const int BRNG = VSL_BRNG_MCG31;
    VSLStreamStatePtr stream;
    int errcode;
    
    cublasStatus_t status;
    cublasHandle_t handle;
    
    double a=0.0, b= 1.0; // Uniform distribution between 0 and 1
    
    errcode = vslNewStream(&stream, BRNG, SEED);
    
    int width = 100;
    if (argc > 1){
        width = atoi(argv[1]);
    }
    /* Allocate memory for A, B, and C */
    if (cudaMallocManaged(&A, width * width * sizeof(double)) != cudaSuccess){
        fprintf(stderr, "!!!! device memory alocation error (allocate A)\n");
        return EXIT_FAILURE;
    }
    if (cudaMallocManaged(&B, width * width * sizeof(double)) != cudaSuccess){
        fprintf(stderr, "!!!! device memory alocation error (allocate B)\n");
        return EXIT_FAILURE;
    }
    if (cudaMallocManaged(&C, width * width * sizeof(double)) != cudaSuccess){
        fprintf(stderr, "!!!! device memory alocation error (allocate C)\n");
        return EXIT_FAILURE;
    }
    /* Generate width * width random numbers between 0 and 1 to fill matrices A and B. */
    errcode = vdRngUniform(METHOD, stream, width * width, A, a, b);
    CheckVslError(errcode);
    errcode = vdRngUniform(METHOD, stream, width * width, B, a, b);
    CheckVslError(errcode);
    
    /* Now prepare the call to CUBLAS */
    status = cublasCreate(&handle);
    if (status != CUBLAS_STATUS_SUCCESS) {
        fprintf (stderr, "!!!! CUBLAS initialization error\n");
        return EXIT_FAILURE;
    }
    gettimeofday(&t3, NULL);

    
    /* Perform calculation */
    status = cublasDgemm(handle, CUBLAS_OP_T, CUBLAS_OP_T, width, width, width, &alpha, A,
        width, B, width, &beta, C, width);
    if (status != CUBLAS_STATUS_SUCCESS){
        fprintf(stderr, "!!!! kernel execution error.\n");
        return EXIT_FAILURE;
    }
    cudaDeviceSynchronize(); 
    gettimeofday(&t4, NULL);
    const double time = (double) (t4.tv_sec - t3.tv_sec) + 1e-6 * (t4.tv_usec -
        t3.tv_usec);
    const double Gflops = 2. * width * width * width / (double) time * 10e-9;
    printf("Call to cublasDGEMM took %lf\n", time);
    printf("Gflops: %lf\n", Gflops);
    cudaFree(A);
    cudaFree(B);
    cudaFree(C);
    
    status = cublasDestroy(handle);
    if (status != CUBLAS_STATUS_SUCCESS){
        fprintf(stderr, "!!!! shutdown error\n");
        return EXIT_FAILURE;
    }
    
    return 0;
}
Exemple #14
0
void HopDropSpin(InputStruct  *  In_Ptr,
        Photon8Struct *  Photon_Ptr,
        OutStruct *  Out_Ptr,
        VSLStreamStatePtr  stream)
{
    int i;
    double s[8];
    short  layer[8];// = Photon_Ptr->layer;
    double mua[8];// = In_Ptr->layerspecs[layer].mua;
    double mus[8];// = In_Ptr->layerspecs[layer].mus;

    double dl_b[8];  /* length to boundary. */
    double uz[8];// = Photon_Ptr->uz;
    double mut[8];
    Boolean hit[8];
    double dwa[8];		/* absorbed weight.*/
    double x[8];// = Photon_Ptr->x;
    double y[8];// = Photon_Ptr->y;
    short  iz[8];	/* index to z & r. */

    double cost[8], sint[8];	/* cosine and sine of the */
    /* polar deflection angle theta. */
    double cosp[8], sinp[8];	/* cosine and sine of the */
    /* azimuthal angle psi. */
    double ux[8] ;
    double uy[8] ;//= Photon_Ptr->uy;
    double psi[8];
    double g[8];
    double temp[8];
    double cap[8], cam[8];	/* cosines of the sum ap or */
    /* difference am of the two */
    /* angles. ap = a1+a2 */
    /* am = a1 - a2. */
    double sap[8], sam[8];	/* sines. */
    double sa1[8], sa2[8];
    /* sine of the incident and transmission angles. */
    double ca2[8];
    double n1_temp[8] , n2_temp[8] , ca1_temp[8],* ca2_Ptr_temp[8] ;
    double uz1[8];	/* cosines of transmission alpha. always */
    /* positive. */
    double r[8];//=0.0;	/* reflectance */
    double ni[8];// = In_Ptr->layerspecs[layer].n;
    double nt[8];
    short  ir[8], ia[8];	/* index to r & angle. */
    double    result_t[8*7];
    vdRngUniform( VSL_RNG_METHOD_UNIFORM_STD, stream, 8*9 , &result_t, 0.0, 1.0 );
    /////////////////////////////////////////////////////////////////////////////////////////////
#pragma simd
    for(i = 0 ; i<8 ; i++)
    {    
        layer[i] = Photon_Ptr->layer[i];
        mua[i] = In_Ptr->layerspecs[layer[i]].mua;
        mus[i] = In_Ptr->layerspecs[layer[i]].mus;
        if(Photon_Ptr->sleft[i] == 0.0) {  /* make a new step. */

            Photon_Ptr->s[i] = -log(result_t[i])/(mua[i]+mus[i]);
        } else {	/* take the leftover. */
            Photon_Ptr->s[i] = Photon_Ptr->sleft[i]/(mua[i]+mus[i]);
            Photon_Ptr->sleft[i] = 0.0;
        }
        /////////////////////////////////////////////////////////////////////////////////////////////
        layer[i] = Photon_Ptr->layer[i];
        uz[i] = Photon_Ptr->uz[i];
        /* Distance to the boundary. */
        if(uz[i]>0.0)
            dl_b[i] = (In_Ptr->layerspecs[layer[i]].z1
                    - Photon_Ptr->z[i])/uz[i];	/* dl_b>0. */
        else if(uz[i]<0.0)
            dl_b[i] = (In_Ptr->layerspecs[layer[i]].z0
                    - Photon_Ptr->z[i])/uz[i];	/* dl_b>0. */

        if(uz[i] != 0.0 && Photon_Ptr->s[i] > dl_b[i]) {
            /* not horizontal & crossing. */
            mut[i] = In_Ptr->layerspecs[layer[i]].mua
                + In_Ptr->layerspecs[layer[i]].mus;

            Photon_Ptr->sleft[i] = (Photon_Ptr->s[i] - dl_b[i])*mut[i];
            Photon_Ptr->s[i]    = dl_b[i];
            hit[i] = 1;
        } else
            hit[i] = 0;


        if(hit[i]) {

            s[i] = Photon_Ptr->s[i];
            Photon_Ptr->x[i] += s[i]*Photon_Ptr->ux[i];
            Photon_Ptr->y[i] += s[i]*Photon_Ptr->uy[i];
            Photon_Ptr->z[i] += s[i]*Photon_Ptr->uz[i];
            //        CrossOrNot(In_Ptr, Photon_Ptr, tmpOut_Ptr, rand_seed);
            if(Photon_Ptr->uz[i] < 0.0)
            {
                uz[i] = Photon_Ptr->uz[i]; /* z directional cosine. */
                r[i]=0.0;	/* reflectance */
                layer[i] = Photon_Ptr->layer[i];
                ni[i] = In_Ptr->layerspecs[layer[i]].n;
                nt[i] = In_Ptr->layerspecs[layer[i]-1].n;

                /* Get r. */
                if( - uz[i] <= In_Ptr->layerspecs[layer[i]].cos_crit0)
                    r[i]=1.0;		      /* total internal reflection. */
                else{
                    //     r = RFresnel(ni, nt, -uz, &uz1);
                    n1_temp[i] = ni[i];
                    n2_temp[i] = nt[i];
                    ca1_temp[i] = -uz[i];
                    ca2_Ptr_temp[i] = &uz1[i];
                    //    double r;

                    if(n1_temp[i]==n2_temp[i]) {			  	/** matched boundary. **/
                        *ca2_Ptr_temp[i] = ca1_temp[i];
                        r[i] = 0.0;
                    } else if(ca1_temp[i]>COSZERO) {	/** normal incident. **/
                        *(ca2_Ptr_temp[i]) = ca1_temp[i];
                        r[i] = (n2_temp[i]-n1_temp[i])/(n2_temp[i]+n1_temp[i]);
                        r[i] *= r[i];
                    } else if(ca1_temp[i]<COS90D)  {	/** very slant. **/
                        *(ca2_Ptr_temp[i]) = 0.0;
                        r[i] = 1.0;
                    } else  {			  		/** general. **/

                        sa1[i] = sqrt(1-ca1_temp[i]*ca1_temp[i]);
                        sa2[i] = n1_temp[i]*sa1[i]/n2_temp[i];
                        if(sa2[i]>=1.0) {
                            /* double check for total internal reflection. */
                            *ca2_Ptr_temp[i] = 0.0;
                            r[i] = 1.0;
                        } else  {

                            *(ca2_Ptr_temp[i]) = ca2[i] = sqrt(1-sa2[i]*sa2[i]);

                            cap[i] = ca1_temp[i]*ca2[i] - sa1[i]*sa2[i]; /* c+ = cc - ss. */
                            cam[i] = ca1_temp[i]*ca2[i] + sa1[i]*sa2[i]; /* c- = cc + ss. */
                            sap[i] = sa1[i]*ca2[i] + ca1_temp[i]*sa2[i]; /* s+ = sc + cs. */
                            sam[i] = sa1[i]*ca2[i] - ca1_temp[i]*sa2[i]; /* s- = sc - cs. */
                            r[i] = 0.5*sam[i]*sam[i]*(cam[i]*cam[i]+cap[i]*cap[i])/(sap[i]*sap[i]*cam[i]*cam[i]);
                            /* rearranged for speed. */
                        }
                    }

                }

                if(result_t[i+8] > r[i]) {		/* transmitted to layer-1. */
                    if(layer[i]==1)  {
                        Photon_Ptr->uz[i] = -uz1[i];
                        //            RecordR(0.0, In_Ptr, Photon_Ptr, tmpOut_Ptr);
                        x[i] = Photon_Ptr->x[i];
                        y[i]  = Photon_Ptr->y[i];

                        ir[i] = (short)(sqrt(x[i]*x[i]+y[i]*y[i])/In_Ptr->dr);
                        if(ir[i]>In_Ptr->nr-1) ir[i]=In_Ptr->nr-1;

                        ia[i] = (short)(acos(-Photon_Ptr->uz[i])/In_Ptr->da);
                        if(ia[i]>In_Ptr->na-1) ia[i]=In_Ptr->na-1;

                        /* assign photon to the reflection array element. */
                        //    tmpOut_Ptr->Rd_ra[ir][ia] += Photon_Ptr->w*(1.0-Refl);
                        Out_Ptr->Rd_ra[ir[i]][ia[i]] += Photon_Ptr->w[i]*(1.0);

                        Photon_Ptr->w[i] *= 0.0;

                        Photon_Ptr->dead[i] = 1;
                    } else {
                        Photon_Ptr->layer[i]--;
                        Photon_Ptr->ux[i] *= ni[i]/nt[i];
                        Photon_Ptr->uy[i] *= ni[i]/nt[i];
                        Photon_Ptr->uz[i] = -uz1[i];
                    }
                } else 						/* reflected. */
                    Photon_Ptr->uz[i] = -uz[i];

            }
            else
            {
                //        CrossDnOrNot(In_Ptr, Photon_Ptr, tmpOut_Ptr, rand_seed);
                uz[i] = Photon_Ptr->uz[i]; /* z directional cosine. */
                uz1[i];	/* cosines of transmission alpha. */
                r[i]=0.0;	/* reflectance */
                layer[i] = Photon_Ptr->layer[i];
                ni[i] = In_Ptr->layerspecs[layer[i]].n;
                nt[i] = In_Ptr->layerspecs[layer[i]+1].n;

                /* Get r. */
                if( uz[i] <= In_Ptr->layerspecs[layer[i]].cos_crit1)
                    r[i]=1.0;		/* total internal reflection. */
                else{
                    //         r = RFresnel(ni, nt, uz, &uz1);
                    //       double r;
                    n1_temp[i] = ni[i];
                    n2_temp[i] = nt[i];
                    ca1_temp[i] = uz[i];
                    ca2_Ptr_temp[i] = &uz1[i];

                    if(n1_temp[i]==n2_temp[i]) {			  	/** matched boundary. **/
                        *ca2_Ptr_temp[i] = ca1_temp[i];
                        r[i] = 0.0;
                    } else if(ca1_temp[i]>COSZERO) {	/** normal incident. **/
                        *ca2_Ptr_temp[i] = ca1_temp[i];
                        r[i] = (n2_temp[i]-n1_temp[i])/(n2_temp[i]+n1_temp[i]);
                        r[i] *= r[i];
                    } else if(ca1_temp[i]<COS90D)  {	/** very slant. **/
                        *(ca2_Ptr_temp[i]) = 0.0;
                        r[i] = 1.0;
                    } else  {			  		/** general. **/

                        sa1[i] = sqrt(1-ca1_temp[i]*ca1_temp[i]);
                        sa2[i] = n1_temp[i]*sa1[i]/n2_temp[i];
                        if(sa2[i]>=1.0) {
                            /* double check for total internal reflection. */
                            *(ca2_Ptr_temp[i]) = 0.0;
                            r[i] = 1.0;
                        } else  {

                            *(ca2_Ptr_temp[i]) = ca2[i] = sqrt(1-sa2[i]*sa2[i]);

                            cap[i] = ca1_temp[i]*ca2[i] - sa1[i]*sa2[i]; /* c+ = cc - ss. */
                            cam[i] = ca1_temp[i]*ca2[i] + sa1[i]*sa2[i]; /* c- = cc + ss. */
                            sap[i] = sa1[i]*ca2[i] + ca1_temp[i]*sa2[i]; /* s+ = sc + cs. */
                            sam[i] = sa1[i]*ca2[i] - ca1_temp[i]*sa2[i]; /* s- = sc - cs. */
                            r[i] = 0.5*sam[i]*sam[i]*(cam[i]*cam[i]+cap[i]*cap[i])/(sap[i]*sap[i]*cam[i]*cam[i]);
                            /* rearranged for speed. */
                        }
                    }

                }

                if(result_t[i+16] > r[i]) {		/* transmitted to layer+1. */
                    if(layer[i] == In_Ptr->num_layers) {
                        Photon_Ptr->uz[i] = uz1[i];
                        //            RecordT(0.0, In_Ptr, Photon_Ptr, tmpOut_Ptr);
                        x[i] = Photon_Ptr->x[i];
                        y[i] = Photon_Ptr->y[i];

                        ir[i] = (short)(sqrt(x[i]*x[i]+y[i]*y[i])/In_Ptr->dr);
                        if(ir[i]>In_Ptr->nr-1) ir[i]=In_Ptr->nr-1;

                        ia[i] = (short)(acos(Photon_Ptr->uz[i])/In_Ptr->da);
                        if(ia[i]>In_Ptr->na-1) ia[i]=In_Ptr->na-1;

                        /* assign photon to the transmittance array element. */
                        //    Out_Ptr->Tt_ra[ir][ia] += Photon_Ptr->w*(1.0-Refl);
                        Out_Ptr->Tt_ra[ir[i]][ia[i]] += Photon_Ptr->w[i]*(1.0);     

                        Photon_Ptr->w[i] *= 0.0;

                        Photon_Ptr->dead[i] = 1;
                    } else {
                        Photon_Ptr->layer[i]++;
                        Photon_Ptr->ux[i] *= ni[i]/nt[i];
                        Photon_Ptr->uy[i] *= ni[i]/nt[i];
                        Photon_Ptr->uz[i] = uz1[i];
                    }
                } else 						/* reflected. */
                    Photon_Ptr->uz[i] = -uz[i];

            }
        } else {
            s[i] = Photon_Ptr->s[i];
            Photon_Ptr->x[i] += s[i]*Photon_Ptr->ux[i];
            Photon_Ptr->y[i] += s[i]*Photon_Ptr->uy[i];
            Photon_Ptr->z[i] += s[i]*Photon_Ptr->uz[i];
            //        Drop(In_Ptr, Photon_Ptr, tmpOut_Ptr);
            /* compute array indices. */
            x[i] = Photon_Ptr->x[i];
            y[i] = Photon_Ptr->y[i];
            layer[i] = Photon_Ptr->layer[i];
            iz[i] = (short)(Photon_Ptr->z[i]/In_Ptr->dz);
            if(iz[i]>In_Ptr->nz-1) iz[i]=In_Ptr->nz-1;

            ir[i] = (short)(sqrt(x[i]*x[i]+y[i]*y[i])/In_Ptr->dr);
            if(ir[i]>In_Ptr->nr-1) ir[i]=In_Ptr->nr-1;

            /* update photon weight. */
            mua[i] = In_Ptr->layerspecs[layer[i]].mua;
            mus[i] = In_Ptr->layerspecs[layer[i]].mus;
            dwa[i] = Photon_Ptr->w[i] * mua[i]/(mua[i]+mus[i]);
            Photon_Ptr->w[i] -= dwa[i];

            /* assign dwa to the absorption array element. */
            //    Out_Ptr->A_rz[ir][iz]	+= dwa;

            Out_Ptr->A_rz[ir[i]][iz[i]] += dwa[i]; 
            //        Spin(In_Ptr->layerspecs[Photon_Ptr->layer].g,
            //             Photon_Ptr, rand_seed);
            g[i] = In_Ptr->layerspecs[Photon_Ptr->layer[i]].g;
            ux[i] = Photon_Ptr->ux[i];
            uy[i] = Photon_Ptr->uy[i];
            uz[i] = Photon_Ptr->uz[i];
            //    cost = SpinTheta(g, rand_seed);
            if(g[i] == 0.0)
                cost[i] = 2*result_t[i+24] -1;
            else {
                temp[i] = (1-g[i]*g[i])/(1-g[i]+2*g[i]*result_t[i+32]);
                cost[i] = (1+g[i]*g[i] - temp[i]*temp[i])/(2*g[i]);
                if(cost[i] < -1) cost[i] = -1;
                else if(cost[i] > 1) cost[i] = 1;
            }

            sint[i] = sqrt(1.0 - cost[i]*cost[i]);
            /* sqrt() is faster than sin(). */

            psi[i] = 2.0*PI*result_t[i+40]; /* spin psi 0-2pi. */
            cosp[i] = cos(psi[i]);
            if(psi[i]<PI)
                sinp[i] = sqrt(1.0 - cosp[i]*cosp[i]);
            /* sqrt() is faster than sin(). */
            else
                sinp[i] = - sqrt(1.0 - cosp[i]*cosp[i]);

            if(fabs(uz[i]) > COSZERO)  { 	/* normal incident. */
                Photon_Ptr->ux[i] = sint[i]*cosp[i];
                Photon_Ptr->uy[i] = sint[i]*sinp[i];
                Photon_Ptr->uz[i] = cost[i]*SIGN(uz[i]);
                /* SIGN() is faster than division. */
            } else  {		/* regular incident. */
                temp[i] = sqrt(1.0 - uz[i]*uz[i]);
                Photon_Ptr->ux[i] = sint[i]*(ux[i]*uz[i]*cosp[i] - uy[i]*sinp[i])
                    /temp[i] + ux[i]*cost[i];
                Photon_Ptr->uy[i] = sint[i]*(uy[i]*uz[i]*cosp[i] + ux[i]*sinp[i])
                    /temp[i] + uy[i]*cost[i];
                Photon_Ptr->uz[i] = -sint[i]*cosp[i]*temp[i] + uz[i]*cost[i];
            }

        }


        if( Photon_Ptr->w[i] < In_Ptr->Wth && !Photon_Ptr->dead[i])
        {

            if(Photon_Ptr->w[i] == 0.0)
                Photon_Ptr->dead[i] = 1;
            else if(result_t[i+48] < CHANCE) /* survived the roulette.*/
                Photon_Ptr->w[i] /= CHANCE;
            else
                Photon_Ptr->dead[i] = 1;

        }
    }
}
double GeneticAlgorithm::randomDouble(double min, double max){				//change this to return full aray: faster
	double randNum;
	 vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, 1, &randNum, min, max);
	return randNum;
}