PNL_BEGIN void pnlSeed(int s) { vslDeleteStream(&g_RNG.m_vslStream); vslNewStream(&g_RNG.m_vslStream, _VSL_UNI_METHOD_, s); }
void MainWindow::randomizeSigma_1() { double x1; VSLStreamStatePtr stream; vslNewStream( &stream, VSL_BRNG_MCG31, this->seed ); vdRngUniform( 0, stream, model->nI(), &model->Sigma[0], 0.0, 1.0 ); vslDeleteStream( &stream ); for (int i=0; i < model->nI(); ++i) { std::pair<int,int> ends = model->ends(i); int from = ends.first; int to = ends.second; std::pair<double,double> xy0 = model->xy(from); std::pair<double,double> xy1 = model->xy(to); if (xy0.first==0 && xy1.first==0 || xy0.first==0 && xy1.first==1 || xy0.first==1 && xy1.first==0 || xy0.first==1 && xy1.first==1 || xy0.first==model->xmax() && xy1.first==model->xmax() || xy0.first==model->xmax()-1 && xy1.first==model->xmax() || xy0.first==model->xmax() && xy1.first==model->xmax()-1 || xy0.first==model->xmax()-1 && xy1.first==model->xmax()-1 ) { model->Sigma[i]=this->sigmaU; } else {x1=model->Sigma[i]; if (x1 < this->fraction) model->Sigma[i] = CUTOFF_SIGMA; else model->Sigma[i] =1; } } }
static void bernoulli_generate(int n, double p, int* r) { int seed = 17 + caffe_rng_rand() % 4096; #ifdef _OPENMP int nthr = omp_get_max_threads(); int threshold = nthr * caffe::cpu::OpenMpManager::getProcessorSpeedMHz() / 3; bool run_parallel = (Caffe::mode() != Caffe::GPU) && (omp_in_parallel() == 0) && (n >= threshold); if (!run_parallel) nthr = 1; # pragma omp parallel num_threads(nthr) { const int ithr = omp_get_thread_num(); const int avg_amount = (n + nthr - 1) / nthr; const int my_offset = ithr * avg_amount; const int my_amount = std::min(my_offset + avg_amount, n) - my_offset; #else { const int my_amount = n; const int my_offset = 0; #endif VSLStreamStatePtr stream; vslNewStream(&stream, VSL_BRNG_MCG31, seed); vslSkipAheadStream(stream, my_offset); viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, stream, my_amount, r + my_offset, p); vslDeleteStream(&stream); } }
void Caffe::set_random_seed(unsigned int seed) { CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_)); CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)); CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_, seed)); VSL_CHECK(vslDeleteStream(&Get().vsl_stream_)); VSL_CHECK(vslNewStream(&Get().vsl_stream_, VSL_BRNG_MT19937, seed)); }
JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vslDeleteStream (JNIEnv *env, jclass clazz, jobject jstream) { VSLStreamStatePtr streamp = getStream(env, clazz, jstream); int status = vslDeleteStream(&streamp); setStream(env, clazz, jstream, streamp); return (jint)status; }
Caffe::~Caffe() { if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_)); if (curand_generator_) CURAND_CHECK(curandDestroyGenerator(curand_generator_)); if (vsl_stream_) VSL_CHECK(vslDeleteStream(&vsl_stream_)); }
double * get_vector(int size, int i) { double *vec; VSLStreamStatePtr stream; vslNewStream( &stream, VSL_BRNG_MT19937, i*time(0) ); vec = (double *)calloc(size, sizeof(double)); vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream, size, vec, 1.0, 3.0 ); vslDeleteStream( &stream ); return vec; }
void MainWindow::randRcr() { int i_Rcr = model->index_of_Rcr(); elementCr = fabs((model->I[ i_Rcr ])); this->sigmaMin=model->Sigma[i_Rcr ]; VSLStreamStatePtr stream; vslNewStream( &stream, VSL_BRNG_MCG31, this->seed ); vdRngUniform( 0, stream, model->nI(), &model->Sigma[0], 0.0, 1.0 ); vslDeleteStream( &stream ); double x1=model->Sigma[i_Rcr]; this->randc=x1; this->rand=x1; }
int main(){ unsigned int iter=200000000; int i,j; double x, y; double dUnderCurve=0.0; double pi=0.0; VSLStreamStatePtr stream; //You need one stream for each thread double end_time,start_time; start_time=clock(); #pragma omp parallel private(stream,x,y,i) reduction(+:dUnderCurve) { double r[BLOCK_SIZE*2]; //Careful!!! //you need a private copy of whole array for each thread vslNewStream( &stream, BRNG, (int)clock() ); #pragma omp for for(j=0;j<iter/BLOCK_SIZE;j++) { vdRngUniform( METHOD, stream, BLOCK_SIZE*2, r, 0.0, 1.0 ); //Create random numbers into array r for (i=0;i<BLOCK_SIZE;i++) { x=r[i]; //X Coordinate y=r[i+BLOCK_SIZE]; //Y Coordinate if (x*x + y*y <= 1.0) { //is distance from Origin under Curve dUnderCurve++; } } } vslDeleteStream( &stream ); } pi = dUnderCurve / (double) iter * 4 ; end_time=clock(); printf ("pi = %10.9f\n", pi); printf ("Seconds = %10.9f\n",(double)((end_time-start_time)/1000.0)); return 0; }
void hard_mkl() { /*char *results_file = "hard_mkl.txt"; FILE *res; if((res=fopen(results_file, "w"))==NULL) { printf("Can't open file %s.\n", results_file); exit(1); }*/ for(int i = 10; i <= ARRAY_SIZE; i*=10) { VSLStreamStatePtr stream; vslNewStream( &stream, VSL_BRNG_MT19937, i*time(0) ); double *ar1, *ar2, *ar3, *ar4, *ar5, *ar6; ar1 = (double *)malloc(i*sizeof(double)); ar2 = (double *)malloc(i*sizeof(double)); ar3 = (double *)malloc(i*sizeof(double)); ar4 = (double *)malloc(i*sizeof(double)); ar5 = (double *)malloc(i*sizeof(double)); ar6 = (double *)malloc(i*sizeof(double)); vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream, i, ar1, 1.0, 3.0 ); vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream, i, ar2, 1.0, 3.0 ); double start = omp_get_wtime(); for(int j = 0; j < EXPERIMENTS_NUM; j++) { vdCos (i, ar1, ar3); vdLn (i, ar1, ar4); vdPow (i, ar1, ar2, ar5); vdCosh(i, ar2, ar6); } double end = omp_get_wtime(); free(ar1); free(ar2); free(ar3); free(ar4); free(ar5); free(ar6); //fprintf(res, "%lf\n", end-start); printf("%lf, i=%d\n", end-start, i); vslDeleteStream( &stream ); } //fclose(res); }
void Random<CPU>::set_seed (int seed) { rand_check (vslDeleteStream (&vStream_)); rand_check (vslNewStream (&vStream_, VSL_BRNG_MT19937, seed)); }
Random<CPU>::~Random () { rand_check (vslDeleteStream (&vStream_)); }
static void deinit_random_bit () { (void) vslDeleteStream (&stream); stream = NULL; }
void rngDestroy(RngEngine* rng, RngErrorType* info) { *info = (rng->m_stream != NULL)?(vslDeleteStream(&(rng->m_stream))):(0); }
~CRNG() { if(m_vslStream) vslDeleteStream(&m_vslStream); }
int main(int argc, char* argv[]) { double sTime, eTime; double sum_delta = 0.0; double sum_ref = 0.0; double max_delta = 0.0; double sumReserve = 0.0; printf("Monte Carlo European Option Pricing Single Precision\n\n"); printf("Compiler Version = %d\n", __INTEL_COMPILER/100); printf("Release Update = %d\n", __INTEL_COMPILER_UPDATE); printf("Build Time = %s %s\n", __DATE__, __TIME__); printf("Path Length = %d\n", RAND_N); printf("Number of Options = %d\n", OPT_N); printf("Block Size = %d\n", RAND_BLOCK_LENGTH); printf("Worker Threads = %d\n\n", NTHREADS); const int mem_size = sizeof(float)*OPT_PER_THREAD; #ifndef _OPENMP NTHREADS = 1; #endif float *samples[MAX_THREADS]; VSLStreamStatePtr Streams[MAX_THREADS]; const int nblocks = RAND_N/RAND_BLOCK_LENGTH; #pragma omp parallel reduction(+ : sum_delta) reduction(+ : sum_ref) reduction(+ : sumReserve) reduction(max : max_delta) { #ifdef _OPENMP int threadID = omp_get_thread_num(); #else int threadID = 0; #endif unsigned int randseed = RANDSEED + threadID; srand(randseed); float *CallResultList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN); float *CallConfidenceList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN); float *StockPriceList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN); float *OptionStrikeList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN); float *OptionYearsList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN); for(int i = 0; i < OPT_PER_THREAD; i++) { CallResultList[i] = 0.0f; CallConfidenceList[i] = 0.0f; StockPriceList[i] = RandFloat_T(5.0f, 50.0f, &randseed); OptionStrikeList[i] = RandFloat_T(10.0f, 25.0f, &randseed); OptionYearsList[i] = RandFloat_T(1.0f, 5.0f, &randseed); } samples[threadID] = (float *)scalable_aligned_malloc(RAND_BLOCK_LENGTH * sizeof(float), SIMDALIGN); vslNewStream(&(Streams[threadID]), VSL_BRNG_MT2203 + threadID, RANDSEED); #pragma omp barrier if (threadID == 0) { printf("Starting options pricing...\n"); sTime = second(); start_cyc = _rdtsc(); } for(int opt = 0; opt < OPT_PER_THREAD; opt++) { const float VBySqrtT = VLog2E * sqrtf(OptionYearsList[opt]); const float MuByT = MuLog2E * OptionYearsList[opt]; const float Y = StockPriceList[opt]; const float Z = OptionStrikeList[opt]; float v0 = 0.0f; float v1 = 0.0f; for(int block = 0; block < nblocks; ++block) { float *rand = samples[threadID]; vsRngGaussian (VSL_RNG_METHOD_GAUSSIAN_ICDF, Streams[threadID], RAND_BLOCK_LENGTH, rand, MuByT, VBySqrtT); #pragma vector aligned #pragma simd reduction(+:v0) reduction(+:v1) #pragma unroll(4) for(int i=0; i < RAND_BLOCK_LENGTH; i++) { float callValue = Y * exp2f(rand[i]) - Z; callValue = (callValue > 0.0) ? callValue : 0.0; v0 += callValue; v1 += callValue * callValue; } } const float exprt = exp2f(RLog2E*OptionYearsList[opt]); CallResultList[opt] = exprt * v0 * INV_RAND_N; const float stdDev = sqrtf((F_RAND_N * v1 - v0 * v0) * STDDEV_DENOM); CallConfidenceList[opt] = (float)(exprt * stdDev * CONFIDENCE_DENOM); } //end of opt #pragma omp barrier if (threadID == 0) { end_cyc = _rdtsc(); eTime = second(); printf("Parallel simulation completed in %f seconds.\n", eTime-sTime); printf("Validating the result...\n"); } double delta = 0.0, ref = 0.0, L1norm = 0.0; int max_index = 0; double max_local = 0.0; for(int i = 0; i < OPT_PER_THREAD; i++) { double callReference, putReference; BlackScholesBodyCPU( callReference, putReference, StockPriceList[i], OptionStrikeList[i], OptionYearsList[i], RISKFREE, VOLATILITY ); ref = callReference; delta = fabs(callReference - CallResultList[i]); sum_delta += delta; sum_ref += fabs(ref); if(delta > 1e-6) sumReserve += CallConfidenceList[i] / delta; max_local = delta>max_local? delta: max_local; } max_delta = max_local>max_delta? max_local: max_delta; vslDeleteStream(&(Streams[threadID])); scalable_aligned_free(samples[threadID]); scalable_aligned_free(CallResultList); scalable_aligned_free(CallConfidenceList); scalable_aligned_free(StockPriceList); scalable_aligned_free(OptionStrikeList); scalable_aligned_free(OptionYearsList); }//end of parallel block sumReserve /= (double)OPT_N; const double L1norm = sum_delta / sum_ref; printf("L1_Norm = %4.3E\n", L1norm); printf("Average RESERVE = %4.3f\n", sumReserve); printf("Max Error = %4.3E\n", max_delta); const unsigned long long cyc = end_cyc - start_cyc; const double optcyc = (double)cyc/(double)OPT_N; printf("==========================================\n"); printf("Total Cycles = %lld\n", cyc); printf("Cyc/opt = %8.3f\n", optcyc); printf("Time Elapsed = %8.3f\n", eTime-sTime); printf("Options/sec = %8.3f\n", OPT_N/(eTime-sTime)); printf("==========================================\n"); return 0; }
int main(int argc, char *argv[]) { unsigned long long count = 0; double EPSILON = X0*1.0E-2; double err; double PXend; const double dt = T/N; const double rootdt = sqrt((double)T/N); int nCal = N/Ncache; const int left = N%Ncache; VSLStreamStatePtr stream; int errcode = vslNewStream(&stream, VSL_BRNG_MT2203, 0);//seed=0 start_timer(); for (unsigned long long m = 0; m < M; ++m){ // one-time MC simulation err = 0.0; vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream, Ncache, NRV, 0.0f, 1.0f);// leaves the rest of random numbers generated by the idle thread BM[0] = rootdt*NRV[0]; PX[0] = X0; for (int k = 0; k < nCal; ++k){ //rootdt:firstprivate??? #pragma omp parallel default(none) shared(NRV, BM, PX, stream, err, PXend, rootdt, dt, k) { double errloc = 0.0; double upbd, tmp; //GUIDED_CHUNK too large: load imbalance //GUIDED_CHUNK too small: scheduling overhead //#pragma omp for schedule(guided, GUIDED_CHUNK) #pragma omp for schedule(guided) //tunable for (int i = 1; i < Ncache; ++i){ //tmp = BM[0]; tmp = 0.0; #pragma simd reduction(+:tmp) vectorlengthfor(double) assert for (int j = 1; j <= i; ++j){ //tmp += rootdt*NRV[j]; tmp += NRV[j]; } //BM[i] = tmp; BM[i] = BM[0] + tmp*rootdt; //PX[i+1] = X0*exp(-0.5*SIGMA*SIGMA*(k*Ncache+i+1)*dt+SIGMA*tmp); PX[i+1] = X0*exp(-0.5*SIGMA*SIGMA*(k*Ncache+i+1)*dt+SIGMA*BM[i]); } #pragma omp single { PX[1] = X0*exp(-0.5*SIGMA*SIGMA*(k*Ncache+1)*dt+SIGMA*BM[0]); } //maybe vary the scheduling strategy? #pragma omp for reduction(+:err) nowait for (int i = 0; i < Ncache; ++i){ int j = k*Ncache+i; double Tj = j*(double)T/N; upbd = (log(PX[i]/K)+0.5*SIGMA*SIGMA*(T-Tj))/(SIGMA*sqrt(T-Tj)); //errloc -= 1/(sqrt(2*PI))*(PX[i+1]-PX[i])*vNormalIntegral(upbd); err += -1/(sqrt(2*PI))*(PX[i+1]-PX[i])*vNormalIntegral(upbd); } #pragma omp single { vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream, Ncache, NRV, 0.0f, 1.0f);// leaves the rest of random numbers generated by the idle thread }//single }//parallel BM[0] = BM[Ncache-1] + rootdt*NRV[0]; PX[0] = PX[Ncache]; }//for nCal PXend = PX[Ncache]; #pragma omp parallel default(none) shared(NRV, BM, PX, err, rootdt, dt, nCal, left, PXend) { double errloc = 0.0; double upbd, tmp; if(left!=0){ //GUIDED_CHUNK too large: load imbalance //GUIDED_CHUNK too small: scheduling overhead //#pragma omp for schedule(guided, GUIDED_CHUNK) #pragma omp for schedule(guided) //tunable for (int i = 1; i < left; ++i){ //tmp = BM[0]; tmp = 0.0; #pragma simd reduction(+:tmp) vectorlengthfor(double) assert for (int j = 1; j <= i; ++j){ //tmp += rootdt*NRV[j]; tmp += NRV[j]; } //BM[i] = tmp; BM[i] = BM[0] + tmp*rootdt; //PX[i+1] = X0*exp(-0.5*SIGMA*SIGMA*(nCal*Ncache+i+1)*dt+SIGMA*BM[i]); PX[i+1] = X0*exp(-0.5*SIGMA*SIGMA*(nCal*Ncache+i+1)*dt+SIGMA*BM[i]); } #pragma omp single { PX[1] = X0*exp(-0.5*SIGMA*SIGMA*(nCal*Ncache+1)*dt+SIGMA*BM[0]); PXend = PX[left]; } //maybe vary the scheduling strategy? #pragma omp for reduction(+:err) nowait for (int i = 0; i < left; ++i){ int j = nCal*Ncache+i; double Tj = j*(double)T/N; upbd = (log(PX[i]/K)+0.5*SIGMA*SIGMA*(T-Tj))/(SIGMA*sqrt(T-Tj)); err += -1/sqrt((2*PI))*(PX[i+1]-PX[i])*vNormalIntegral(upbd); } }//if #pragma omp single nowait { upbd = (log(X0/K) + 0.5*SIGMA*SIGMA*T)/(SIGMA*sqrt(T)); errloc -= X0/(sqrt(2*PI))*vNormalIntegral(upbd); #pragma omp atomic err += errloc; } #pragma omp single nowait { upbd = (log(X0/K) - 0.5*SIGMA*SIGMA*T)/(SIGMA*sqrt(T)); errloc += K/(sqrt(2*PI))*vNormalIntegral(upbd); #pragma omp atomic err += errloc; } #pragma omp single nowait { if(PXend > K) errloc += PXend - K; #pragma omp atomic err += errloc; } }//parallel err = fabs(err); if(err < EPSILON) count++; //printf("err=%.10lf\n",err); }//MC simulation printf ("time %g ms\n", stop_timer()); printf("err=%.20lf\n",err); printf("count=%llu, M=%llu\n", count, M); printf("%.5g\n", (double)count/(double)M); vslDeleteStream(&stream); return 0; }