void drnorm(RngEngine* rng, double* buffer, BlasInt* n, BlasInt* isAligned, RngErrorType* info) { UNUSED(isAligned); #if defined(USE_RNG_BOX_MULLER) if (*n == 1) { *info = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, rng->m_stream, 1, buffer, 0.0, 1.0); } else if (*n > 1) { *info = vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, rng->m_stream, *n, buffer, 0.0, 1.0); } else { *info = 0; } #elif defined(USE_RNG_MARSAGLIA) if (*n > 0) { for (BlasInt iter = 0; iter < *n; ++iter) { double x[2]; double r; do { vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, rng->m_stream, 2, x, 0.0, 1.0); x[0] = 2.0 * x[0] - 1.0; x[1] = 2.0 * x[1] - 1.0; r = x[0] * x[0] + x[1] * x[1]; } while (r >= 1 || r == 0); buffer[iter] = x[0] * sqrt(- 2.0 * log(r) / r); } } else { *info = 0; } #endif }
JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vdRngGaussian (JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jdoubleArray j_r, jdouble a, jdouble b) { VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); jdouble * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); jint retval = vdRngGaussian(method, stream, n, r, a, b); (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); return retval; }
double * get_vector(int size, int i) { double *vec; VSLStreamStatePtr stream; vslNewStream( &stream, VSL_BRNG_MT19937, i*time(0) ); vec = (double *)calloc(size, sizeof(double)); vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream, size, vec, 1.0, 3.0 ); vslDeleteStream( &stream ); return vec; }
void fluc_force(Float *force, int n, VSLStreamStatePtr rngstream) { extern Float mon_stdev; int errcode; /* Calculate the fluctuations */ errcode=vdRngGaussian( METHOD, rngstream, n, force, 0.0, mon_stdev); CheckVslError(errcode); // printf("%le %le %le\n", f_fluc[0], f_fluc[1], f_fluc[2]); }
void hard_mkl() { /*char *results_file = "hard_mkl.txt"; FILE *res; if((res=fopen(results_file, "w"))==NULL) { printf("Can't open file %s.\n", results_file); exit(1); }*/ for(int i = 10; i <= ARRAY_SIZE; i*=10) { VSLStreamStatePtr stream; vslNewStream( &stream, VSL_BRNG_MT19937, i*time(0) ); double *ar1, *ar2, *ar3, *ar4, *ar5, *ar6; ar1 = (double *)malloc(i*sizeof(double)); ar2 = (double *)malloc(i*sizeof(double)); ar3 = (double *)malloc(i*sizeof(double)); ar4 = (double *)malloc(i*sizeof(double)); ar5 = (double *)malloc(i*sizeof(double)); ar6 = (double *)malloc(i*sizeof(double)); vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream, i, ar1, 1.0, 3.0 ); vdRngGaussian( VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream, i, ar2, 1.0, 3.0 ); double start = omp_get_wtime(); for(int j = 0; j < EXPERIMENTS_NUM; j++) { vdCos (i, ar1, ar3); vdLn (i, ar1, ar4); vdPow (i, ar1, ar2, ar5); vdCosh(i, ar2, ar6); } double end = omp_get_wtime(); free(ar1); free(ar2); free(ar3); free(ar4); free(ar5); free(ar6); //fprintf(res, "%lf\n", end-start); printf("%lf, i=%d\n", end-start, i); vslDeleteStream( &stream ); } //fclose(res); }
int main() { SetThreads(); PrintInfo(); double Start = omp_get_wtime(); double * restrict ResultPrices; ResultPrices = malloc(sizeof(double) * HISTORY); #pragma offload target(mic) out(ResultPrices:length(HISTORY)) { SetMICThreads(); double * restrict Prices; double * restrict Epsilon; Prices = malloc(sizeof(double) * HISTORY); Epsilon = malloc(sizeof(double) * HISTORY); //Creating random stream VSLStreamStatePtr RndStream; vslNewStream(&RndStream, VSL_BRNG_SFMT19937, (int)time(NULL)); long double Buff; for (unsigned int iter = 0; iter < TE; iter++) { //Randomize volumes vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, RndStream, HISTORY, Epsilon, 0, 0.002); #pragma omp parallel for shared(Prices, ResultPrices) for (unsigned long long int i = 0; i < HISTORY; i++) { //Buff = i * i * powl(10, (-21.65) - i * 4.5 * powl(10, (-10.65)); //Prices[i] = (((i * i * powl(10, (-24.65))) - (i * 4.5 * powl(10, (-13.65))) + 1.095) + Epsilon[i]); Prices[i] = ( ( i * i * powl(10, (-24.65)) - i * 4.5 * powl(10, (-13.65)) + 1.095 ) + Epsilon[i]); ResultPrices[i] += Prices[i]; } } #pragma omp parallel for shared(ResultPrices) for (unsigned long long int j = 0; j < HISTORY; j++) { ResultPrices[j] = ResultPrices[j] / TE;; } free(Prices); free(Epsilon); Prices = NULL; Epsilon = NULL; } double End = omp_get_wtime(); printf("%lf\n", (End - Start)); FILE *FpResultHistory; //unsigned long long int Buff; FpResultHistory = fopen("res_history.txt", "wb"); if (FpResultHistory) { printf("//================================================================\n"); printf("|| Result history file status : open\n"); for (unsigned long long int i = 0; i < HISTORY; i++) { //Buff = (i); fprintf(FpResultHistory, "%llu %lf\n", (i * 10), ResultPrices[i]); //fprintf(fp_result, "%lf %lf %lf\n", ResultPrices[i], ResultVolumeUp[i], ResultVolumeDown[i]); } fclose(FpResultHistory); printf("|| Result history file status : close\n||\n"); printf("\\================================================================\n\n"); } free(ResultPrices); ResultPrices = NULL; return 0; }
int main() { const int n = 500; // Number of atoms, molecules const int mt = 100; // Max time steps const int dtxyz = 100; // Time interval to output xyz int i; int j; double *x; double *v; double *f; const double domain = 300; // Domain size (a.u.) const double dt = 10; // Time interval (a.u.) const double ms = 0.00001; // Max speed (a.u.) const double em = 1822.88839 * 28.0134; // Effective mass of N2 const double lje = 0.000313202; // Lennard-Jones epsilon of N2 const double ljs = 6.908841465; // Lennard-Jones sigma of N2 #ifdef MKLRNG VSLStreamStatePtr stream; vslNewStream(&stream, VSL_BRNG_MT19937, 5489); // Initiation, type, seed //vslNewStream(&stream, VSL_BRNG_SFMT19937, 5489); // Initiation, type, seed #endif x = (double *) malloc(n * 3 * sizeof(double)); v = (double *) malloc(n * 3 * sizeof(double)); f = (double *) malloc(n * 3 * sizeof(double)); // Initialization #ifdef MKLRNG for (i=0; i<n; i++) { int nRN = 3; double GRN[3]; vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, stream, nRN, GRN, 0.5 * domain, domain); for (j=0; j<3; j++) x[i*3+j] = GRN[j]; vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, stream, nRN, GRN, 0.0, 0.5 * ms); for (j=0; j<3; j++) v[i*3+j] = GRN[j]; } #else for (i=0; i<n; i++) for (j=0; j<3; j++) x[i*3+j] = domain * rand() / (RAND_MAX + 1.0); for (i=0; i<n; i++) for (j=0; j<3; j++) v[i*3+j] = ms * (rand() / (RAND_MAX + 1.0) - 0.5); #endif // Dynamics for (i=0; i<mt; i++) { Force(n, lje, ljs, x, f); Solver(n, dt, em, x, v, f); Output_energy(i, n, dt, em, lje, ljs, x, v); if (i % dtxyz == 0) Output_xyz(i, n, x); } Output_xyz(i, n, x); return 0; }
void pnlRandNormal( int numElem, double* vec, double mean, double sigma ) { vdRngGaussian( VSL_METHOD_DGAUSSIAN_BOXMULLER2, g_RNG.m_vslStream, numElem, vec, mean, sqrt(sigma) ); }
double pnlRandNormal( double mean, double sigma ) { double val = 0; vdRngGaussian( VSL_METHOD_DGAUSSIAN_BOXMULLER2, g_RNG.m_vslStream, 1, &val, mean, sqrt(sigma) ); return val; }
int main(int argc, char *argv[]) { unsigned long long count = 0; double EPSILON = X0*1.0E-2; double err; double PXend; const double dt = T/N; const double rootdt = sqrt((double)T/N); int nCal = N/Ncache; const int left = N%Ncache; VSLStreamStatePtr stream; int errcode = vslNewStream(&stream, VSL_BRNG_MT2203, 0);//seed=0 start_timer(); for (unsigned long long m = 0; m < M; ++m){ // one-time MC simulation err = 0.0; vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream, Ncache, NRV, 0.0f, 1.0f);// leaves the rest of random numbers generated by the idle thread BM[0] = rootdt*NRV[0]; PX[0] = X0; for (int k = 0; k < nCal; ++k){ //rootdt:firstprivate??? #pragma omp parallel default(none) shared(NRV, BM, PX, stream, err, PXend, rootdt, dt, k) { double errloc = 0.0; double upbd, tmp; //GUIDED_CHUNK too large: load imbalance //GUIDED_CHUNK too small: scheduling overhead //#pragma omp for schedule(guided, GUIDED_CHUNK) #pragma omp for schedule(guided) //tunable for (int i = 1; i < Ncache; ++i){ //tmp = BM[0]; tmp = 0.0; #pragma simd reduction(+:tmp) vectorlengthfor(double) assert for (int j = 1; j <= i; ++j){ //tmp += rootdt*NRV[j]; tmp += NRV[j]; } //BM[i] = tmp; BM[i] = BM[0] + tmp*rootdt; //PX[i+1] = X0*exp(-0.5*SIGMA*SIGMA*(k*Ncache+i+1)*dt+SIGMA*tmp); PX[i+1] = X0*exp(-0.5*SIGMA*SIGMA*(k*Ncache+i+1)*dt+SIGMA*BM[i]); } #pragma omp single { PX[1] = X0*exp(-0.5*SIGMA*SIGMA*(k*Ncache+1)*dt+SIGMA*BM[0]); } //maybe vary the scheduling strategy? #pragma omp for reduction(+:err) nowait for (int i = 0; i < Ncache; ++i){ int j = k*Ncache+i; double Tj = j*(double)T/N; upbd = (log(PX[i]/K)+0.5*SIGMA*SIGMA*(T-Tj))/(SIGMA*sqrt(T-Tj)); //errloc -= 1/(sqrt(2*PI))*(PX[i+1]-PX[i])*vNormalIntegral(upbd); err += -1/(sqrt(2*PI))*(PX[i+1]-PX[i])*vNormalIntegral(upbd); } #pragma omp single { vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream, Ncache, NRV, 0.0f, 1.0f);// leaves the rest of random numbers generated by the idle thread }//single }//parallel BM[0] = BM[Ncache-1] + rootdt*NRV[0]; PX[0] = PX[Ncache]; }//for nCal PXend = PX[Ncache]; #pragma omp parallel default(none) shared(NRV, BM, PX, err, rootdt, dt, nCal, left, PXend) { double errloc = 0.0; double upbd, tmp; if(left!=0){ //GUIDED_CHUNK too large: load imbalance //GUIDED_CHUNK too small: scheduling overhead //#pragma omp for schedule(guided, GUIDED_CHUNK) #pragma omp for schedule(guided) //tunable for (int i = 1; i < left; ++i){ //tmp = BM[0]; tmp = 0.0; #pragma simd reduction(+:tmp) vectorlengthfor(double) assert for (int j = 1; j <= i; ++j){ //tmp += rootdt*NRV[j]; tmp += NRV[j]; } //BM[i] = tmp; BM[i] = BM[0] + tmp*rootdt; //PX[i+1] = X0*exp(-0.5*SIGMA*SIGMA*(nCal*Ncache+i+1)*dt+SIGMA*BM[i]); PX[i+1] = X0*exp(-0.5*SIGMA*SIGMA*(nCal*Ncache+i+1)*dt+SIGMA*BM[i]); } #pragma omp single { PX[1] = X0*exp(-0.5*SIGMA*SIGMA*(nCal*Ncache+1)*dt+SIGMA*BM[0]); PXend = PX[left]; } //maybe vary the scheduling strategy? #pragma omp for reduction(+:err) nowait for (int i = 0; i < left; ++i){ int j = nCal*Ncache+i; double Tj = j*(double)T/N; upbd = (log(PX[i]/K)+0.5*SIGMA*SIGMA*(T-Tj))/(SIGMA*sqrt(T-Tj)); err += -1/sqrt((2*PI))*(PX[i+1]-PX[i])*vNormalIntegral(upbd); } }//if #pragma omp single nowait { upbd = (log(X0/K) + 0.5*SIGMA*SIGMA*T)/(SIGMA*sqrt(T)); errloc -= X0/(sqrt(2*PI))*vNormalIntegral(upbd); #pragma omp atomic err += errloc; } #pragma omp single nowait { upbd = (log(X0/K) - 0.5*SIGMA*SIGMA*T)/(SIGMA*sqrt(T)); errloc += K/(sqrt(2*PI))*vNormalIntegral(upbd); #pragma omp atomic err += errloc; } #pragma omp single nowait { if(PXend > K) errloc += PXend - K; #pragma omp atomic err += errloc; } }//parallel err = fabs(err); if(err < EPSILON) count++; //printf("err=%.10lf\n",err); }//MC simulation printf ("time %g ms\n", stop_timer()); printf("err=%.20lf\n",err); printf("count=%llu, M=%llu\n", count, M); printf("%.5g\n", (double)count/(double)M); vslDeleteStream(&stream); return 0; }