Esempio n. 1
0
void MainWindow::randomizeSigma_1()
{   double x1;
    VSLStreamStatePtr stream;
    vslNewStream( &stream, VSL_BRNG_MCG31, this->seed );
    vdRngUniform( 0, stream, model->nI(), &model->Sigma[0], 0.0, 1.0 );
    vslDeleteStream( &stream );
    for (int i=0; i < model->nI(); ++i)
    {
        std::pair<int,int> ends = model->ends(i);
        int from = ends.first;
        int to   = ends.second;
        std::pair<double,double> xy0 = model->xy(from);
        std::pair<double,double> xy1 = model->xy(to);
 
        if (xy0.first==0 && xy1.first==0
            || xy0.first==0 && xy1.first==1
            || xy0.first==1 && xy1.first==0
            || xy0.first==1 && xy1.first==1
            || xy0.first==model->xmax() && xy1.first==model->xmax() 
            || xy0.first==model->xmax()-1 && xy1.first==model->xmax()
            || xy0.first==model->xmax()   && xy1.first==model->xmax()-1  
            || xy0.first==model->xmax()-1 && xy1.first==model->xmax()-1
            )
        {
            model->Sigma[i]=this->sigmaU;
        }
        else  {x1=model->Sigma[i];
            if (x1 < this->fraction) model->Sigma[i] = CUTOFF_SIGMA;
            else model->Sigma[i] =1;
        }
 
        }     
    
}
Esempio n. 2
0
PNL_BEGIN
void pnlSeed(int s)
{

    vslDeleteStream(&g_RNG.m_vslStream);
    vslNewStream(&g_RNG.m_vslStream, _VSL_UNI_METHOD_, s); 
}
Esempio n. 3
0
static void bernoulli_generate(int n, double p, int* r) {
  int seed = 17 + caffe_rng_rand() % 4096;

#ifdef _OPENMP
  int nthr = omp_get_max_threads();
  int threshold = nthr * caffe::cpu::OpenMpManager::getProcessorSpeedMHz() / 3;
  bool run_parallel =
    (Caffe::mode() != Caffe::GPU) &&
    (omp_in_parallel() == 0) &&
    (n >= threshold);
  if (!run_parallel) nthr = 1;

# pragma omp parallel num_threads(nthr)
  {
    const int ithr = omp_get_thread_num();
    const int avg_amount = (n + nthr - 1) / nthr;
    const int my_offset = ithr * avg_amount;
    const int my_amount = std::min(my_offset + avg_amount, n) - my_offset;
#else
  {
    const int my_amount = n;
    const int my_offset = 0;
#endif

    VSLStreamStatePtr stream;
    vslNewStream(&stream, VSL_BRNG_MCG31, seed);
    vslSkipAheadStream(stream, my_offset);
    viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, stream, my_amount,
      r + my_offset, p);
    vslDeleteStream(&stream);
  }
}
Esempio n. 4
0
void Caffe::set_random_seed(unsigned int seed)
{
  CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
  CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_, CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_, seed));
  VSL_CHECK(vslDeleteStream(&Get().vsl_stream_));
  VSL_CHECK(vslNewStream(&Get().vsl_stream_, VSL_BRNG_MT19937, seed));
}
Esempio n. 5
0
JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vslNewStream
  (JNIEnv *env, jclass clazz, jobject jstream, jint brng, jint seed)
{
    VSLStreamStatePtr streamp;
    int status = vslNewStream(&streamp, brng, seed);
    setStream(env, clazz, jstream, streamp);

    return (jint)status;
}
Esempio n. 6
0
int main()
{
  const int n     = 500;           // Number of atoms, molecules
  const int mt    = 20;         // Max time steps
  const int dtxyz = 100;           // Time interval to output xyz 

  int i;
  int j;

  double *x;
  double *v;
  double *f;

  const double domain = 300;       // Domain size (a.u.)
  const double dt     = 10;        // Time interval (a.u.)
  const double ms     = 0.0;       // Max speed (a.u.)
  const double em     = 1822.88839 * 28.0134; // Effective mass of N2
  const double lje    = 0.000313202;          // Lennard-Jones epsilon of N2
  const double ljs    = 6.908841465;          // Lennard-Jones sigma of N2

  #ifdef MKLRNG
  VSLStreamStatePtr stream; 

  vslNewStream(&stream, VSL_BRNG_MT19937,   5489); // Initiation, type, seed
  //vslNewStream(&stream, VSL_BRNG_SFMT19937, 5489); // Initiation, type, seed
  #endif

  x = (double *) malloc(n * 3 * sizeof(double));
  v = (double *) malloc(n * 3 * sizeof(double));
  f = (double *) malloc(n * 3 * sizeof(double));

  // Initialization


  for (i=0; i<n; i++)
    for (j=0; j<3; j++) x[i*3+j] = domain * rand() / (RAND_MAX + 1.0);

  for (i=0; i<n; i++)
    for (j=0; j<3; j++) v[i*3+j] = ms * (rand() / (RAND_MAX + 1.0) - 0.5);


  // Dynamics
  printf("#  Index    dTime    KinEng      PotEng       TotEng\n");
  for (i=0; i<mt; i++)
    {
      Force(n, lje, ljs, x, f);
      Solver(n, dt, em, x, v, f);

      Output_energy(i, n, dt, em, lje, ljs, x, v);
      if (i % dtxyz == 0) Output_xyz(i, n, x);
    }

  Output_xyz(i, n, x);

  return 0;
}
Esempio n. 7
0
Caffe::Caffe()
  : mode_(Caffe::CPU), phase_(Caffe::TRAIN), cublas_handle_(NULL),
  curand_generator_(NULL), vsl_stream_(NULL)
{
  CUBLAS_CHECK(cublasCreate(&cublas_handle_));
  //TODO: original caffe code has bug here!
  CURAND_CHECK(curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator_, 1701ULL));
  VSL_CHECK(vslNewStream(&vsl_stream_, VSL_BRNG_MT19937, 1701));
}
Esempio n. 8
0
double * get_vector(int size, int i)
{
	double *vec;
	VSLStreamStatePtr stream;
	vslNewStream( &stream, VSL_BRNG_MT19937, i*time(0) );
	vec = (double *)calloc(size, sizeof(double));
	vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream,
	          size, vec, 1.0, 3.0 );
	vslDeleteStream( &stream );
	return vec;
}
void GeneticAlgorithm::initializeRandomNumberGenerators(){
	SYSTEMTIME t;
	GetLocalTime(&t);
	unsigned int max = _nPopulation - 1;

	vslNewStream( & stream, VSL_BRNG_SFMT19937, t.wMilliseconds );
	ints1 = new int[_nPopulation];
    ints2 = new int[_nPopulation];
    ints3 = new int[_nPopulation];
	shuffleIndex = new int[_nPopulation];
}
Esempio n. 10
0
void MainWindow::randRcr()
{   
    int i_Rcr = model->index_of_Rcr();
    elementCr = fabs((model->I[ i_Rcr ]));
    this->sigmaMin=model->Sigma[i_Rcr ];
    VSLStreamStatePtr stream;
    vslNewStream( &stream, VSL_BRNG_MCG31, this->seed );
    vdRngUniform( 0, stream, model->nI(), &model->Sigma[0], 0.0, 1.0 );
    vslDeleteStream( &stream );
    double x1=model->Sigma[i_Rcr];
    this->randc=x1;
    this->rand=x1;
}
void GeneticAlgorithm2::initializeRandomNumberGenerators(){
	SYSTEMTIME t;
	GetLocalTime(&t);
	//_randomNumberGenerator = new boost::random::mt19937(t.wMilliseconds);
	//_randomNumberGenerator = new boost::random::mt19937(0);
//	_doubleDistribution = new boost::random::uniform_int_distribution<>(0, RAND_MAX);
	unsigned int max = _nPopulation - 1;
	// _integerDistribution = new boost::random::uniform_int_distribution<>(0,max);

	vslNewStream( & stream, VSL_BRNG_SFMT19937, t.wMilliseconds );
	ints1 = new int[_nPopulation];
    ints2 = new int[_nPopulation];
    ints3 = new int[_nPopulation];
}
Esempio n. 12
0
int main(){
  unsigned int iter=200000000; 
  int i,j;
  double x, y;
  double dUnderCurve=0.0;
  double pi=0.0;


  VSLStreamStatePtr stream;  //You need one stream for each thread

  double end_time,start_time;

  start_time=clock();
#pragma omp parallel private(stream,x,y,i) reduction(+:dUnderCurve)
  {
   double r[BLOCK_SIZE*2];  //Careful!!! 
  	//you need a private copy of whole array for each thread  

    vslNewStream( &stream, BRNG,  (int)clock() );   
#pragma omp for 
    for(j=0;j<iter/BLOCK_SIZE;j++) {
                              
        vdRngUniform( METHOD, stream, BLOCK_SIZE*2, r, 0.0, 1.0 );   
		//Create random numbers into array r
        
        for (i=0;i<BLOCK_SIZE;i++) {
            x=r[i];                     //X Coordinate
            y=r[i+BLOCK_SIZE];          //Y Coordinate
            if (x*x + y*y <= 1.0) {	//is distance from Origin under Curve 
                dUnderCurve++;
            }
        } 
    }
    
    vslDeleteStream( &stream );
  }
  pi = dUnderCurve / (double) iter * 4 ;

  end_time=clock();

  printf ("pi = %10.9f\n", pi);
  printf ("Seconds = %10.9f\n",(double)((end_time-start_time)/1000.0));
  
  return 0;
}
int main(int argc, char **argv) {
  long i;
  long Ncirc = 0;
  double pi, xy[2];
  double r = 1.0; // radius of circle
  double r2 = r*r;

  int rank, size, manager = 0;
  MPI_Status status;
  long my_trials, temp;
  int j;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  VSLStreamStatePtr stream;

  my_trials = num_trials/size;
  if (num_trials%(long)size > (long)rank) my_trials++;

  vslNewStream(&stream, VSL_BRNG_MT2203+rank, 1);

  for (i = 0; i < my_trials; i++) {
    vdRngUniform(VSL_RNG_METHOD_UNIFORMBITS_STD, stream, 2, xy, 0.0, 1.0);
    if ((xy[0]*xy[0] + xy[1]*xy[1]) <= r2)
      Ncirc++;
  }

  if (rank == manager) {
    for (j = 1; j < size; j++) {
      MPI_Recv(&temp, 1, MPI_LONG, j, j, MPI_COMM_WORLD, &status);
      Ncirc += temp;
    }
    pi = 4.0 * ((double)Ncirc)/((double)num_trials);
    printf("\n \t Computing pi using MPI and MKL for random number generator: \n");
    printf("\t For %ld trials, pi = %f\n", num_trials, pi);
    printf("\n");
  } else {
    MPI_Send(&Ncirc, 1, MPI_LONG, manager, rank, MPI_COMM_WORLD);
  }
  MPI_Finalize();
  return 0;
}
Esempio n. 14
0
void hard_mkl()
{
    /*char *results_file = "hard_mkl.txt";
    FILE *res;
    if((res=fopen(results_file, "w"))==NULL)
    {
        printf("Can't open file %s.\n", results_file);
        exit(1);
    }*/
    for(int i = 10; i <= ARRAY_SIZE; i*=10)
    {
	VSLStreamStatePtr stream;
	vslNewStream( &stream, VSL_BRNG_MT19937, i*time(0) );
	double *ar1, *ar2, *ar3, *ar4, *ar5, *ar6;
        ar1 = (double *)malloc(i*sizeof(double));
        ar2 = (double *)malloc(i*sizeof(double));
        ar3 = (double *)malloc(i*sizeof(double));
        ar4 = (double *)malloc(i*sizeof(double));
        ar5 = (double *)malloc(i*sizeof(double));
        ar6 = (double *)malloc(i*sizeof(double));
	vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream,
	                i, ar1, 1.0, 3.0 );
	vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream,
			i, ar2, 1.0, 3.0 );
	double start = omp_get_wtime();
	for(int j = 0; j < EXPERIMENTS_NUM; j++)
	{
		vdCos (i, ar1, ar3);
		vdLn  (i, ar1, ar4);
		vdPow (i, ar1, ar2, ar5);
		vdCosh(i, ar2, ar6);
	}
	double end = omp_get_wtime();
	free(ar1); free(ar2); free(ar3); free(ar4); free(ar5); free(ar6);
	//fprintf(res, "%lf\n", end-start);
	printf("%lf, i=%d\n", end-start, i);
	vslDeleteStream( &stream );
    }
    //fclose(res);
}
int main(int argc, char* argv[]) {

	// construct lattice
	unsigned int rows = 1;
	unsigned int columns = 300;
	lattice_t* lattice = lattice_create(rows, columns, 
		periodic, periodic, periodic, periodic);

	// initialise lattice positioning
	unsigned int const kNumStdDevs = 5;
	unsigned int const kStdDevsRepeatCount = 1;
	unsigned int const kRepeatCount = 1000;
	double stddevs[] = { 0.1, 0.2, 0.3, 0.4, 0.5 };

	unsigned int const kTimeSetsNum = 3;
	unsigned int timeSets[] = { 200, 500, 1000 };

	// initialise random number storage
	VSLStreamStatePtr stream;
	float randomNumbers[columns];

	// initialise temporary node storage
	double xPosition;
	double yPosition = 0.0;
	coordinate_t coord;

	// initialise loop variables
	bool trackedLatticeLayout;
	char latticeLayoutFileName[50];
	char latticeProfileFilename[100];

	// initialise agent tracking information
	unsigned int numTrackedAgents = 0;
	coordinate_t* trackedPositions = NULL;
	int* trackedAgentIds = NULL;

	// set motility properties
	double motilityProbability = 1.0;
	double xShiftPreference = 0;
	double yShiftPreference = 0;
	bool agentExclusion = false;

	// generation random lattices
	for (int stdDevIndex = 0; stdDevIndex < kNumStdDevs; stdDevIndex++) {
		
		for (int boundRepeatCount = 0; 
			boundRepeatCount < kStdDevsRepeatCount; boundRepeatCount++) {

			// generate any required random numbers (uniform dist)
			vslNewStream(&stream, BRNG, arc4random());	
			vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream, 
				columns, randomNumbers, 0.0f, stddevs[stdDevIndex]);

			// perturb and sort node locations
			for (int col = 0; col < columns; col++) {
				randomNumbers[col] += col;
			}
			qsort(randomNumbers, columns, sizeof(float), compare);

			// specify node locations
			for (int row = 0; row < rows; row++) {
				for (int col = 0; col < columns; col++) {
					coord.row = row;
					coord.column = col;
					xPosition = (double)randomNumbers[col];
					lattice_specify_position(lattice, coord, xPosition, yPosition);
				}
			}

			// save node locations
			bool saveNodeLocations = true;
			if (saveNodeLocations) {
				sprintf(latticeLayoutFileName, "node_positions_%0.02f_%d_ghosts.txt", 
					stddevs[stdDevIndex], boundRepeatCount);
				trackedLatticeLayout = lattice_parser_node_positions(lattice, rows, 
					columns, latticeLayoutFileName, "output/");
				if (!trackedLatticeLayout) {
					printf("Error: failed storing lattice layout information (case: %0.02f %d).\n", 
						stddevs[stdDevIndex], boundRepeatCount);
				}
			}

			// perform simulations
			bool performSimulation = true;
			if (performSimulation) {

				// perform simulations
				for (int repeatCount = 0; repeatCount < kRepeatCount; repeatCount++) {

					// populate lattice
					int* agentId;
					int currentAgentId = 1;
					coordinate_t agentPos;

					for (int j = 130; j < 171; j++) {
						agentId = malloc(sizeof(int));
						*agentId = currentAgentId++;

						agentPos.row = 0;
						agentPos.column = j;

						lattice_push_agent(lattice, agentPos, agentId);
					}

					// perform simulation
					for (int timeStep = 0; timeStep < timeSets[kTimeSetsNum-1]; 
							timeStep++) {
						performMotilityEvents(lattice, rows, columns, motilityProbability, 
							xShiftPreference, yShiftPreference, agentExclusion, 
							trackedAgentIds, numTrackedAgents, trackedPositions);

						for (int j = 0; j < kTimeSetsNum; j++) {
							if (timeStep == timeSets[j]-1) {
								// store lattice profile
								sprintf(latticeProfileFilename, "lattice_profile_%0.02f_%d_%d_%d_ghosts.txt", 
									stddevs[stdDevIndex], boundRepeatCount, repeatCount, timeStep+1);
								bool isTracked = lattice_occupancy_parser(lattice, rows, columns, 
									latticeProfileFilename, "output/");
								if (!isTracked) {
									printf("Error: failed to store lattice profile.\n");
								}
							}
						}
					}

					// clear lattice and deallocate memory
					lattice_clear(lattice, rows, columns, true);
				}
			}
		}
	}

	// deallocate memory
	lattice_destroy(&lattice, rows, columns, true);

	return EXIT_SUCCESS;
}
Esempio n. 16
0
int main(int argc, char* argv[])
{
    double
	sTime, eTime;

    double sum_delta  = 0.0;
    double sum_ref    = 0.0;
    double max_delta  = 0.0;
    double sumReserve = 0.0;

    printf("Monte Carlo European Option Pricing Single Precision\n\n");
    printf("Compiler Version  = %d\n", __INTEL_COMPILER/100);
    printf("Release Update    = %d\n", __INTEL_COMPILER_UPDATE);
    printf("Build Time        = %s %s\n", __DATE__, __TIME__);
    printf("Path Length       = %d\n", RAND_N);
    printf("Number of Options = %d\n", OPT_N);
    printf("Block Size        = %d\n", RAND_BLOCK_LENGTH);
    printf("Worker Threads    = %d\n\n", NTHREADS);

    const int mem_size  = sizeof(float)*OPT_PER_THREAD;

#ifndef _OPENMP
    NTHREADS = 1;
#endif

    float *samples[MAX_THREADS];
    VSLStreamStatePtr Streams[MAX_THREADS];
    const int nblocks = RAND_N/RAND_BLOCK_LENGTH;
#pragma omp parallel reduction(+ : sum_delta) reduction(+ : sum_ref) reduction(+ : sumReserve) reduction(max : max_delta)
{
#ifdef _OPENMP
    int threadID = omp_get_thread_num();
#else
    int threadID = 0;
#endif
    unsigned int randseed = RANDSEED + threadID;
    srand(randseed);
    float *CallResultList     = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN);
    float *CallConfidenceList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN);
    float *StockPriceList     = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN);
    float *OptionStrikeList   = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN);
    float *OptionYearsList    = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN);
    for(int i = 0; i < OPT_PER_THREAD; i++)
    {
        CallResultList[i]     = 0.0f;
        CallConfidenceList[i] = 0.0f;
        StockPriceList[i]     = RandFloat_T(5.0f, 50.0f, &randseed);
        OptionStrikeList[i]   = RandFloat_T(10.0f, 25.0f, &randseed);
        OptionYearsList[i]    = RandFloat_T(1.0f, 5.0f, &randseed);
    }
    samples[threadID] = (float *)scalable_aligned_malloc(RAND_BLOCK_LENGTH * sizeof(float), SIMDALIGN);
    vslNewStream(&(Streams[threadID]), VSL_BRNG_MT2203 + threadID, RANDSEED);

#pragma omp barrier
    if (threadID == 0)
    {
        printf("Starting options pricing...\n");
        sTime = second();
        start_cyc = _rdtsc();
    }

    for(int opt = 0; opt < OPT_PER_THREAD; opt++)
    {
        const float VBySqrtT = VLog2E * sqrtf(OptionYearsList[opt]);
	const float MuByT    = MuLog2E * OptionYearsList[opt];
        const float Y        = StockPriceList[opt];
        const float Z        = OptionStrikeList[opt];
		            
        float v0 = 0.0f;
        float v1 = 0.0f;
        for(int block = 0; block < nblocks; ++block)
        {
            float *rand = samples[threadID];
            vsRngGaussian (VSL_RNG_METHOD_GAUSSIAN_ICDF, Streams[threadID], RAND_BLOCK_LENGTH, rand, MuByT, VBySqrtT); 
#pragma vector aligned
#pragma simd reduction(+:v0) reduction(+:v1)
#pragma unroll(4)
            for(int i=0; i < RAND_BLOCK_LENGTH; i++) 
            {
                float callValue  = Y * exp2f(rand[i]) - Z;
                callValue = (callValue > 0.0) ? callValue : 0.0;
                v0 += callValue;
                v1 += callValue * callValue;
            }
        }
        const float  exprt      = exp2f(RLog2E*OptionYearsList[opt]);
        CallResultList[opt]     = exprt * v0 * INV_RAND_N;
        const float  stdDev     = sqrtf((F_RAND_N * v1 - v0 * v0) * STDDEV_DENOM);
        CallConfidenceList[opt] = (float)(exprt * stdDev * CONFIDENCE_DENOM);
    } //end of opt 

#pragma omp barrier
    if (threadID == 0) {
        end_cyc = _rdtsc();
        eTime = second();
        printf("Parallel simulation completed in %f seconds.\n", eTime-sTime);
        printf("Validating the result...\n");
    }

    double delta = 0.0, ref = 0.0, L1norm = 0.0;
    int max_index = 0;
    double max_local  = 0.0;
    for(int i = 0; i < OPT_PER_THREAD; i++)
    {
        double callReference, putReference;
        BlackScholesBodyCPU(
            callReference,
            putReference,
            StockPriceList[i],
            OptionStrikeList[i], OptionYearsList[i],  RISKFREE, VOLATILITY );
        ref   = callReference;
        delta = fabs(callReference - CallResultList[i]);
        sum_delta += delta;
        sum_ref   += fabs(ref);
        if(delta > 1e-6)
             sumReserve += CallConfidenceList[i] / delta;
        max_local = delta>max_local? delta: max_local;
    }
    max_delta = max_local>max_delta? max_local: max_delta;
    vslDeleteStream(&(Streams[threadID]));
    scalable_aligned_free(samples[threadID]);
    scalable_aligned_free(CallResultList);
    scalable_aligned_free(CallConfidenceList);
    scalable_aligned_free(StockPriceList);
    scalable_aligned_free(OptionStrikeList);
    scalable_aligned_free(OptionYearsList);
}//end of parallel block

    sumReserve          /= (double)OPT_N;
    const double L1norm  = sum_delta / sum_ref;

    printf("L1_Norm          = %4.3E\n", L1norm);
    printf("Average RESERVE  = %4.3f\n", sumReserve);
    printf("Max Error        = %4.3E\n", max_delta);

    const unsigned long long cyc       = end_cyc - start_cyc;
    const double             optcyc    = (double)cyc/(double)OPT_N;

    printf("==========================================\n");
    printf("Total Cycles = %lld\n", cyc);
    printf("Cyc/opt      = %8.3f\n", optcyc);
    printf("Time Elapsed = %8.3f\n", eTime-sTime);
    printf("Options/sec  = %8.3f\n", OPT_N/(eTime-sTime));
    printf("==========================================\n");
    return 0;
}
Esempio n. 17
0
int main()
{
  const int n     = 500;           // Number of atoms, molecules
  const int mt    = 100;          // Max time steps
  const int dtxyz = 100;           // Time interval to output xyz 

  int i;
  int j;

  double *x;
  double *v;
  double *f;

  const double domain = 300;       // Domain size (a.u.)
  const double dt     = 10;        // Time interval (a.u.)
  const double ms     = 0.00001;   // Max speed (a.u.)
  const double em     = 1822.88839 * 28.0134; // Effective mass of N2
  const double lje    = 0.000313202;          // Lennard-Jones epsilon of N2
  const double ljs    = 6.908841465;          // Lennard-Jones sigma of N2

  #ifdef MKLRNG
  VSLStreamStatePtr stream; 

  vslNewStream(&stream, VSL_BRNG_MT19937,   5489); // Initiation, type, seed
  //vslNewStream(&stream, VSL_BRNG_SFMT19937, 5489); // Initiation, type, seed
  #endif

  x = (double *) malloc(n * 3 * sizeof(double));
  v = (double *) malloc(n * 3 * sizeof(double));
  f = (double *) malloc(n * 3 * sizeof(double));

  // Initialization

  #ifdef MKLRNG
  for (i=0; i<n; i++)
    {
      int nRN = 3;
      
      double GRN[3];
      
      vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, stream, nRN, GRN, 0.5 * domain, domain);

      for (j=0; j<3; j++) x[i*3+j] = GRN[j];

      vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, stream, nRN, GRN, 0.0, 0.5 * ms);

      for (j=0; j<3; j++) v[i*3+j] = GRN[j];
    }
  #else
  for (i=0; i<n; i++)
    for (j=0; j<3; j++) x[i*3+j] = domain * rand() / (RAND_MAX + 1.0);

  for (i=0; i<n; i++)
    for (j=0; j<3; j++) v[i*3+j] = ms * (rand() / (RAND_MAX + 1.0) - 0.5);
  #endif

  // Dynamics

  for (i=0; i<mt; i++)
    {
      Force(n, lje, ljs, x, f);
      Solver(n, dt, em, x, v, f);

      Output_energy(i, n, dt, em, lje, ljs, x, v);
      if (i % dtxyz == 0) Output_xyz(i, n, x);
    }

  Output_xyz(i, n, x);

  return 0;
}
Esempio n. 18
0
void rngInit(RngEngine* rng, RngSeedType* seedValue, RngErrorType* info) {
	*info = vslNewStream(&(rng->m_stream), kVSLBRNGMethod, *seedValue);
}
Esempio n. 19
0
 CRNG()
 {
     m_vslStream = 0;
     vslNewStream(&m_vslStream, _VSL_UNI_METHOD_, 0 );
 }
Esempio n. 20
0
int main(int argc, char *argv[])
{
  unsigned long long count = 0;
  double EPSILON = X0*1.0E-2;
  double err;
  double PXend;
  const double dt = T/N;
  const double rootdt = sqrt((double)T/N);
  int nCal = N/Ncache;
  const int left = N%Ncache;
  VSLStreamStatePtr stream; 
  int errcode = vslNewStream(&stream, VSL_BRNG_MT2203, 0);//seed=0

  start_timer();
  for (unsigned long long m = 0; m < M; ++m){
    // one-time MC simulation
    err = 0.0;
    vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream, Ncache, NRV, 0.0f, 1.0f);// leaves the rest of random numbers generated by the idle thread
    BM[0] = rootdt*NRV[0];
    PX[0] = X0;
    for (int k = 0; k < nCal; ++k){
      //rootdt:firstprivate???
#pragma omp parallel default(none) shared(NRV, BM, PX, stream, err, PXend, rootdt, dt, k)
      {
	double errloc = 0.0;
	double upbd, tmp;
	//GUIDED_CHUNK too large: load imbalance
	//GUIDED_CHUNK too small: scheduling overhead
	//#pragma omp for schedule(guided, GUIDED_CHUNK)
#pragma omp for schedule(guided) //tunable 
	for (int i = 1; i < Ncache; ++i){
	  //tmp = BM[0];
	  tmp = 0.0;
#pragma simd reduction(+:tmp) vectorlengthfor(double) assert
	  for (int j = 1; j <= i; ++j){
	    //tmp += rootdt*NRV[j];
	    tmp += NRV[j];
	  }
	  //BM[i] = tmp;
	  BM[i] = BM[0] + tmp*rootdt;
	  //PX[i+1] = X0*exp(-0.5*SIGMA*SIGMA*(k*Ncache+i+1)*dt+SIGMA*tmp);
	  PX[i+1] = X0*exp(-0.5*SIGMA*SIGMA*(k*Ncache+i+1)*dt+SIGMA*BM[i]);
	}
#pragma omp single
	{
	  PX[1] = X0*exp(-0.5*SIGMA*SIGMA*(k*Ncache+1)*dt+SIGMA*BM[0]);
	}
	
	//maybe vary the scheduling strategy?
#pragma omp for reduction(+:err) nowait
	for (int i = 0; i < Ncache; ++i){
	  int j = k*Ncache+i;
	  double Tj = j*(double)T/N;
	  upbd = (log(PX[i]/K)+0.5*SIGMA*SIGMA*(T-Tj))/(SIGMA*sqrt(T-Tj));
	  //errloc -= 1/(sqrt(2*PI))*(PX[i+1]-PX[i])*vNormalIntegral(upbd);
	  err += -1/(sqrt(2*PI))*(PX[i+1]-PX[i])*vNormalIntegral(upbd);
	}

#pragma omp single 
	{
	  vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream, Ncache, NRV, 0.0f, 1.0f);// leaves the rest of random numbers generated by the idle thread
	}//single
      }//parallel
      BM[0] = BM[Ncache-1] + rootdt*NRV[0];
      PX[0] = PX[Ncache];
    }//for nCal 
    PXend = PX[Ncache];

#pragma omp parallel default(none) shared(NRV, BM, PX, err, rootdt, dt, nCal, left, PXend)
    {
      double errloc = 0.0;
      double upbd, tmp;
      if(left!=0){
	//GUIDED_CHUNK too large: load imbalance
	//GUIDED_CHUNK too small: scheduling overhead
	//#pragma omp for schedule(guided, GUIDED_CHUNK)
#pragma omp for schedule(guided) //tunable
	for (int i = 1; i < left; ++i){
	  //tmp = BM[0];
	  tmp = 0.0;
#pragma simd reduction(+:tmp) vectorlengthfor(double) assert
	  for (int j = 1; j <= i; ++j){
	    //tmp += rootdt*NRV[j];
	    tmp += NRV[j];
	  }
	  //BM[i] = tmp;
	  BM[i] = BM[0] + tmp*rootdt;
	  //PX[i+1] = X0*exp(-0.5*SIGMA*SIGMA*(nCal*Ncache+i+1)*dt+SIGMA*BM[i]);
	  PX[i+1] = X0*exp(-0.5*SIGMA*SIGMA*(nCal*Ncache+i+1)*dt+SIGMA*BM[i]);	
	}
#pragma omp single
	{
	  PX[1] = X0*exp(-0.5*SIGMA*SIGMA*(nCal*Ncache+1)*dt+SIGMA*BM[0]);
	  PXend = PX[left];
	}
      
	//maybe vary the scheduling strategy?
#pragma omp for reduction(+:err) nowait
	for (int i = 0; i < left; ++i){
	  int j = nCal*Ncache+i;
	  double Tj = j*(double)T/N;
	  upbd = (log(PX[i]/K)+0.5*SIGMA*SIGMA*(T-Tj))/(SIGMA*sqrt(T-Tj));
	  err += -1/sqrt((2*PI))*(PX[i+1]-PX[i])*vNormalIntegral(upbd);
	}
      }//if
      
#pragma omp single nowait
      {
	upbd = (log(X0/K) + 0.5*SIGMA*SIGMA*T)/(SIGMA*sqrt(T));
	errloc -= X0/(sqrt(2*PI))*vNormalIntegral(upbd);
#pragma omp atomic
	err += errloc;
      }
#pragma omp single nowait
      {
	upbd = (log(X0/K) - 0.5*SIGMA*SIGMA*T)/(SIGMA*sqrt(T));
	errloc += K/(sqrt(2*PI))*vNormalIntegral(upbd);
#pragma omp atomic
	err += errloc;
      }
#pragma omp single nowait
      {
	if(PXend > K)
	    errloc += PXend - K;
#pragma omp atomic
	err += errloc;
      }
    }//parallel

    err = fabs(err);
    if(err < EPSILON)
      count++;
    //printf("err=%.10lf\n",err);
  }//MC simulation
  printf ("time %g ms\n", stop_timer());
  
  printf("err=%.20lf\n",err);
  
  printf("count=%llu, M=%llu\n", count, M);
  printf("%.5g\n", (double)count/(double)M);

  vslDeleteStream(&stream);
  return 0;
}
Esempio n. 21
0
int main(int argc, char** argv){
    double* A;
    double* B;
    double* C;
    
    double alpha = 1.0;
    double beta = 0.0;
    int i;        
    struct timeval t1,t2, t3, t4;
    
    const int SEED = 1;
    const int METHOD = 0;
    const int BRNG = VSL_BRNG_MCG31;
    VSLStreamStatePtr stream;
    int errcode;
    
    cublasStatus_t status;
    cublasHandle_t handle;
    
    double a=0.0, b= 1.0; // Uniform distribution between 0 and 1
    
    errcode = vslNewStream(&stream, BRNG, SEED);
    
    int width = 100;
    if (argc > 1){
        width = atoi(argv[1]);
    }
    /* Allocate memory for A, B, and C */
    if (cudaMallocManaged(&A, width * width * sizeof(double)) != cudaSuccess){
        fprintf(stderr, "!!!! device memory alocation error (allocate A)\n");
        return EXIT_FAILURE;
    }
    if (cudaMallocManaged(&B, width * width * sizeof(double)) != cudaSuccess){
        fprintf(stderr, "!!!! device memory alocation error (allocate B)\n");
        return EXIT_FAILURE;
    }
    if (cudaMallocManaged(&C, width * width * sizeof(double)) != cudaSuccess){
        fprintf(stderr, "!!!! device memory alocation error (allocate C)\n");
        return EXIT_FAILURE;
    }
    /* Generate width * width random numbers between 0 and 1 to fill matrices A and B. */
    errcode = vdRngUniform(METHOD, stream, width * width, A, a, b);
    CheckVslError(errcode);
    errcode = vdRngUniform(METHOD, stream, width * width, B, a, b);
    CheckVslError(errcode);
    
    /* Now prepare the call to CUBLAS */
    status = cublasCreate(&handle);
    if (status != CUBLAS_STATUS_SUCCESS) {
        fprintf (stderr, "!!!! CUBLAS initialization error\n");
        return EXIT_FAILURE;
    }
    gettimeofday(&t3, NULL);

    
    /* Perform calculation */
    status = cublasDgemm(handle, CUBLAS_OP_T, CUBLAS_OP_T, width, width, width, &alpha, A,
        width, B, width, &beta, C, width);
    if (status != CUBLAS_STATUS_SUCCESS){
        fprintf(stderr, "!!!! kernel execution error.\n");
        return EXIT_FAILURE;
    }
    cudaDeviceSynchronize(); 
    gettimeofday(&t4, NULL);
    const double time = (double) (t4.tv_sec - t3.tv_sec) + 1e-6 * (t4.tv_usec -
        t3.tv_usec);
    const double Gflops = 2. * width * width * width / (double) time * 10e-9;
    printf("Call to cublasDGEMM took %lf\n", time);
    printf("Gflops: %lf\n", Gflops);
    cudaFree(A);
    cudaFree(B);
    cudaFree(C);
    
    status = cublasDestroy(handle);
    if (status != CUBLAS_STATUS_SUCCESS){
        fprintf(stderr, "!!!! shutdown error\n");
        return EXIT_FAILURE;
    }
    
    return 0;
}
Esempio n. 22
0
Random<CPU>::Random (const int did) : did_(did)
{ rand_check (vslNewStream (&vStream_, VSL_BRNG_MT19937, 1));
}
Esempio n. 23
0
double integrateVegas(double * limits , int threads, double * params){
    //Setting the number of threads
     omp_set_num_threads(threads);
    //How many iterations to perform
    int iterations =15;
    //Which iteration to start sampling more
    int switchIteration = 7;
    //How many points to sample in total
    int samples = 100000;
    //How many points to sample after grid set up
    int samplesAfter = 5000000;
    //How many intervals for each dimension
    int intervals = 10;
    //How many subIntervals
    int subIntervals = 1000;
    //Parameter alpha controls convergence rate
    double alpha = 0.5;
    int seed = 40847516;
    //double to store volume integrated over
    double volume = 1.0;
    for(int i=0; i<dimensions; i++){
        volume*= (limits[(2*i)+1]-limits[2*i]);
    };
    //Number of boxes
    int numBoxes = intervals;
    for(int i=1; i<dimensions; i++){
        numBoxes *= intervals;
    }
    //CHANGE SEED WHEN YOU KNOW IT WORKS
    //Setting up one random number stream for each thread
    VSLStreamStatePtr * streams; 
    streams = ( VSLStreamStatePtr * )_mm_malloc(sizeof(VSLStreamStatePtr)*threads,64);
    for(int i=0; i<threads; i++){
        vslNewStream(&streams[i], VSL_BRNG_MT2203+i,seed);
    }
    //Arrays to store integral and uncertainty for each iteration
    double * integral = (double *)_mm_malloc(sizeof(double)*iterations,64);
    double * sigmas = (double *)_mm_malloc(sizeof(double)*iterations,64);
    for(int i=0; i<iterations; i++){
        integral[i] = 0;
        sigmas[i] = 0;
    }
    //Points per each box
    int pointsPerBox = samples/numBoxes;
    //Array storing the box limits (stores x limits then y limits and so on) intervals+1 to store all limits
    double * boxLimits = (double *)_mm_malloc(sizeof(double)*(intervals+1)*dimensions,64);
    //Array to store average function values for each box
    double * heights = (double *)_mm_malloc(sizeof(double)*dimensions*intervals,64);
    //Array storing values of m
    double * mValues = (double *)_mm_malloc(sizeof(double)*intervals,64);
    //Array storing widths of sub boxes
    double * subWidths = (double *) _mm_malloc(sizeof(double)*intervals,64);
    //Getting initial limits for the boxes 
    for(int i=0; i<dimensions; i++){
        double boxWidth = (limits[(2*i)+1]-limits[2*i])/intervals;
        //0th iteration
        boxLimits[i*(intervals+1)] = limits[2*i];
        for(int j=1; j<=intervals; j++){
            int x = (i*(intervals+1))+j;
            boxLimits[x] =  boxLimits[x-1]+boxWidth;
        }
    };
    //Pointer to store random generated  numbers
      double  randomNums[dimensions]__attribute__((aligned(64)));
      int  binNums[dimensions]__attribute__((aligned(64)));
    //Double to store p(x) denominator for monte carlo
    double prob;
    //Values to store integral and sigma for each thread so they can be reduced in OpenMp
    double integralTemp;
    double sigmaTemp;
    double heightsTemp[dimensions*intervals]__attribute__((aligned(64)));
    int threadNum;
#pragma omp parallel  default(none) private(sigmaTemp,integralTemp,binNums,randomNums,prob,threadNum,heightsTemp) shared(iterations,subIntervals,alpha,mValues,subWidths,streams,samples,boxLimits,intervals, integral, sigmas, heights, threads, volume, samplesAfter, switchIteration, params) 
    {
        for(int iter=0; iter<iterations; iter++){ 
            //Stepping up to more samples when grid calibrated
            if(iter==switchIteration){
                samples = samplesAfter;
            }
            //Performing  iterations
            for(int i=0; i<dimensions*intervals; i++){
                heightsTemp[i] = 0;
            }

            integralTemp = 0; 
            sigmaTemp = 0;
            //Getting chunk sizes for each thread
            threadNum = omp_get_thread_num();
            int seg = ceil((double)samples/threads);
            int lower = seg*threadNum;
            int upper = seg*(threadNum+1);
            if(upper > samples){
                upper = samples;
            };
            //Spliting monte carlo up
            for(int i=0; i<seg; i++){
                prob = 1;
                //Randomly choosing bins to sample from
                viRngUniform(VSL_RNG_METHOD_UNIFORM_STD,streams[threadNum],dimensions,binNums,0,intervals);
                vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD,streams[threadNum],dimensions,randomNums,0,1);
                //Getting samples from bins
                for(int j=0; j<dimensions; j++){
                    int x = ((intervals+1)*j)+binNums[j];
                    randomNums[j] *= (boxLimits[x+1]-boxLimits[x]);
                    randomNums[j] += boxLimits[x];
                    prob *= 1.0/(intervals*(boxLimits[x+1]-boxLimits[x]));
                }
                //Performing evaluation of function and adding it to the total integral
                double eval = evaluate(randomNums,params);
                integralTemp += eval/prob;
                sigmaTemp += (eval*eval)/(prob*prob);
                //Calculating the values of f for bin resising
                for(int j=0; j<dimensions; j++){
                    int x = binNums[j]+(j*intervals);
                    //May need to initialize heights
                    // #pragma omp atomic
                    // printf("heightsTemp before=%f\n",heightsTemp[x]);
                    heightsTemp[x] += eval;
                    // printf("heightsTemp=%f x=%d eval=%f thread=%d\n",heightsTemp[x],x,eval,omp_get_thread_num());
                }

            } 
#pragma omp critical
            {
                integral[iter] += integralTemp;
                sigmas[iter] += sigmaTemp;
                for(int k=0; k<dimensions*intervals; k++){
                    // printf("heightTemp[k]=%f k=%d\n",heightsTemp[k],k);
                    heights[k] += heightsTemp[k];
                }
            }
#pragma omp barrier
#pragma omp single
            {
                //Calculating the values of sigma and the integral
                integral[iter] /= samples;
                sigmas[iter] /= samples;
                sigmas[iter] -= (integral[iter]*integral[iter]);
                sigmas[iter] /= (samples-1);
                 // printf("integral=%f\n",integral[iter]);

                //Readjusting the box widths based on the heights
                //Creating array to store values of m and their sum 
                int totalM=0; 
                //Doing for each dimension seperately
                for(int i=0; i<dimensions; i++){
                    double sum = 0;
                    //Getting the sum of f*delta x
                    for(int j=0; j<intervals; j++){
                        int x = (i*(intervals))+j ;
                        //May be bug with these indicies
                        sum += heights[x]*(boxLimits[x+1+i]-boxLimits[x+i]);
                    }
                    //Performing the rescaling 
                    for(int j=0; j<intervals; j++){
                        int x = (i*(intervals))+j;
                        double value = heights[x]*(boxLimits[x+1+i]-boxLimits[x+i]);
                        mValues[j] = ceil(subIntervals*pow((value-1)*(1.0/log(value)),alpha));
                        subWidths[j] = (boxLimits[x+1+i]-boxLimits[x+i])/mValues[j];
                        totalM += mValues[j];
                    }
                    int mPerInterval = totalM/intervals;
                    int mValueIterator = 0;
                    //Adjusting the intervals going from 1 to less than intervals to keep the edges at the limits
                    for(int j=1; j<intervals; j++){
                        double width = 0;
                        for(int y=0; y<mPerInterval; y++){
                            width += subWidths[mValueIterator];
                            mValues[mValueIterator]--;
                            if(mValues[mValueIterator]==0){
                                mValueIterator++;
                            }
                        }
                        //NEED TO SET BOX LIMITS NOW  
                        int x = j+(i*(intervals+1));
                        boxLimits[x] = boxLimits[x-1]+width;    
                    }
                    //Setting mvalues etc. (reseting memory allocated before the dimensions loop to 0)
                    totalM = 0;
                    for(int k=0; k<intervals; k++){
                        subWidths[k] = 0;
                        mValues[k] = 0;

                    }
                }

                //Setting heights to zero for next iteration
                for(int i=0; i<intervals*dimensions; i++ ){
                    heights[i] = 0;
                }
            }

            
        }
    }
    //All iterations done 
    //Free stuff
    
    _mm_free(subWidths);
    _mm_free(mValues);
    _mm_free(boxLimits);
    _mm_free(streams);
    _mm_free(heights);
   
    //Calculating the final value of the integral
    double denom = 0;
    double numerator =0;
    for(int i=7; i<iterations; i++){
        numerator += integral[i]*((integral[i]*integral[i])/(sigmas[i]*sigmas[i]));
        denom += ((integral[i]*integral[i])/(sigmas[i]*sigmas[i]));
         // printf("integral=%f sigma=%f\n",integral[i],sigmas[i]);
    }
    double output  = numerator/denom;
    //Calculating value of x^2 to check if result can be trusted
    double chisq = 0;
    for(int i=0; i<iterations; i++){
       chisq += (((integral[i]-output)*(integral[i]-output))/(sigmas[i]*sigmas[i]));
    }
    if(chisq>iterations){
        printf("Chisq value is %f, it should be not much greater than %d (iterations-1) Integral:%f Analytical Value=%f\n",chisq,iterations-1,output,normValue(params));
    }
      _mm_free(integral);
      _mm_free(sigmas);
    return output;
    
}
Esempio n. 24
0
int main()
{

	SetThreads();
	PrintInfo();

	double Start = omp_get_wtime();

	double * restrict ResultPrices;
	ResultPrices = malloc(sizeof(double) * HISTORY);

#pragma offload target(mic) out(ResultPrices:length(HISTORY))
	{
		SetMICThreads();

		double * restrict Prices;
		double * restrict Epsilon;

		Prices = malloc(sizeof(double) * HISTORY);
		Epsilon = malloc(sizeof(double) * HISTORY);

		//Creating random stream
		VSLStreamStatePtr RndStream;
		vslNewStream(&RndStream, VSL_BRNG_SFMT19937, (int)time(NULL));

		long double Buff;
		for (unsigned int iter = 0; iter < TE; iter++)
		{

			//Randomize volumes
			vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, RndStream, HISTORY, Epsilon, 0, 0.002);

			#pragma omp parallel for shared(Prices, ResultPrices)
			for (unsigned long long int i = 0; i < HISTORY; i++)
			{
				//Buff = i * i * powl(10, (-21.65) - i * 4.5 * powl(10, (-10.65));
				//Prices[i] = (((i * i * powl(10, (-24.65))) - (i * 4.5 * powl(10, (-13.65))) + 1.095) + Epsilon[i]);
				Prices[i] = (  (  i * i * powl(10, (-24.65)) - i * 4.5 * powl(10, (-13.65)) + 1.095 ) + Epsilon[i]);
				ResultPrices[i] += Prices[i];
			}

		}

		#pragma omp parallel for shared(ResultPrices)
		for (unsigned long long int j = 0; j < HISTORY; j++)
		{
			ResultPrices[j] = ResultPrices[j] / TE;;
		}


		free(Prices);
		free(Epsilon);
		Prices = NULL;
		Epsilon = NULL;
	}

	double End = omp_get_wtime();
	printf("%lf\n", (End - Start));
	FILE *FpResultHistory;
//unsigned long long int Buff;
	FpResultHistory = fopen("res_history.txt", "wb");
	if (FpResultHistory)
	{
		printf("//================================================================\n");
		printf("||	Result history file status : open\n");
		for (unsigned long long int i = 0; i < HISTORY; i++)
		{
			//Buff = (i);
			fprintf(FpResultHistory, "%llu %lf\n", (i * 10), ResultPrices[i]);
			//fprintf(fp_result, "%lf %lf %lf\n", ResultPrices[i], ResultVolumeUp[i], ResultVolumeDown[i]);
		}
		fclose(FpResultHistory);
		printf("||	Result history file status : close\n||\n");
		printf("\\================================================================\n\n");
	}

	free(ResultPrices);
	ResultPrices = NULL;

	return 0;
}
Esempio n. 25
0
void
init_random_bit (uint32_t seed)
{
  assert (0 == vslNewStream (&stream, VSL_BRNG_MT19937, (unsigned int) seed));
  assert (0 == atexit (deinit_random_bit));
}
Esempio n. 26
0
void Random<CPU>::set_seed (int seed)
{ rand_check (vslDeleteStream (&vStream_));
  rand_check (vslNewStream (&vStream_, VSL_BRNG_MT19937, seed));
}
Esempio n. 27
0
void mkl_srand(unsigned int s)
{
    vslNewStream(&(__UTIL_sRNG.stream),__UTIL_sRNG.brng,s);
    __UTIL_sRNG.ind = RNG_BLOCK_SIZE;
}