double tls_rand() { /* Setup PRNG state for current thread. */ UNUSED(pthread_once(&tls_prng_once, tls_prng_init)); /* Create PRNG state if not exists. */ dsfmt_t* s = pthread_getspecific(tls_prng_key); if (!s) { /* Initialize seed from system PRNG generator. */ uint32_t seed = 0; FILE *fp = fopen("/dev/urandom", "r"); if (fp == NULL) { fp = fopen("/dev/random", "r"); } if (fp != NULL) { if (fread(&seed, sizeof(uint32_t), 1, fp) != 1) { fclose(fp); fp = NULL; } } if (fp == NULL) { fprintf(stderr, "error: PRNG: cannot seed from " "/dev/urandom, seeding from local time\n"); struct timeval tv; if (gettimeofday(&tv, NULL) == 0) { seed = (uint32_t)(tv.tv_sec ^ tv.tv_usec); } else { /* Last resort. */ seed = (uint32_t)time(NULL); } } else { fclose(fp); } /* Initialize PRNG state. */ #ifdef HAVE_POSIX_MEMALIGN if (posix_memalign((void **)&s, 16, sizeof(dsfmt_t)) != 0) { fprintf(stderr, "error: PRNG: not enough memory\n"); return .0; } #else if ((s = malloc(sizeof(dsfmt_t))) == NULL) { fprintf(stderr, "error: PRNG: not enough memory\n"); return .0; } #endif dsfmt_init_gen_rand(s, seed); UNUSED(pthread_setspecific(tls_prng_key, s)); } return dsfmt_genrand_close_open(s); }
void rand_seed(ru32 s, void *d) { // get time seed if( s == 0 ) { s = rand_time_seed(d); } // set seed if( d == NULL ) { dsfmt_gv_init_gen_rand(s); } else { dsfmt_t *p = (dsfmt_t*) d; dsfmt_init_gen_rand(p, s); } }
int main(int argc, char* argv[]) { int i, inside, seed; double x, y, pi; const long n_steps = 1000000000; dsfmt_t dsfmt; seed = 142857; inside = 0; dsfmt_init_gen_rand(&dsfmt, seed); for (i = 0; i < n_steps; i++) { x = dsfmt_genrand_close_open(&dsfmt); y = dsfmt_genrand_close_open(&dsfmt); if (x * x + y * y < 1.0) { inside++; } } pi = (double)inside / n_steps * 4; printf("%.10g\n", pi); return 0; }
microbialGA::microbialGA(unsigned int _populationSize, unsigned int _demeSize, unsigned int _geneSize, float _recombinationRate, float _mutationRate, objectiveFunctionEvaluator *_eval, fitnessComparisonTypes comparisonType, int reportEvery) : populationSize(_populationSize), demeSize(_demeSize), geneSize(_geneSize), recombinationRate(_recombinationRate), mutationRate(_mutationRate), evaluator(_eval), fitnessComparisonType(comparisonType), reportPeriod(reportEvery) { srand((int)time(NULL)); mtRand = new dsfmt_t(); dsfmt_init_gen_rand(mtRand, (int)time(NULL)); //initialise the population randomly population.resize(populationSize); for(int i=0; i < populationSize; i++) { population[i].resize(geneSize); for(int j=0; j < geneSize; j++) { population[i][j] = randUF() * numeric_limits<unsigned int>::max(); } } geneCharSize = geneSize * sizeof(unsigned int); }
int main(int argc, char *argv[]) { // 乱数初期化 dsfmt_init_gen_rand(&dsfmt, 0); dsfmt_gv_init_gen_rand(0); // 周りの環境でクリップする Polygon_2 world; world.push_back(Polygon_2::Point_2(1, 1)); world.push_back(Polygon_2::Point_2(2400, 1)); world.push_back(Polygon_2::Point_2(2400, 2400)); world.push_back(Polygon_2::Point_2(1, 2400)); #include "treedata.inc" /* world.push_back(Polygon_2::Point_2(500.0, 0.0)); world.push_back(Polygon_2::Point_2(1000.0, 1000.0)); world.push_back(Polygon_2::Point_2(0.0, 1000.0)); TreeNode tn1("tn1", 500); TreeNode tn2("tn2", 100); TreeNode tn3("tn3", 300); TreeNode tn4("tn4", 400); TreeNode tn5("tn5", 800); TreeNode tn6("tn6", 400); TreeNode tnroot("root", 0); tnroot.add_child(tn1); tnroot.add_child(tn2); tnroot.add_child(tn3); tnroot.add_child(tn4); tnroot.add_child(tn5); tnroot.add_child(tn6); #define ROOT_TREE_NODE tnroot */ ROOT_TREE_NODE.region = world; voronoi_treemap(ROOT_TREE_NODE); ROOT_TREE_NODE.draw(); }
/** * * * @param X * * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) * @return */ int Lanczos_EigenValue(struct BindStruct *X) { fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueStart); FILE *fp; char sdt[D_FileNameMax],sdt_2[D_FileNameMax]; int stp, iproc; long int i,iv,i_max; unsigned long int i_max_tmp, sum_i_max; int k_exct,Target; int iconv=-1; double beta1,alpha1; //beta,alpha1 should be real double complex temp1,temp2; double complex cbeta1; double E[5],ebefor; int mythread; // for GC double dnorm; double complex cdnorm; long unsigned int u_long_i; dsfmt_t dsfmt; #ifdef lapack double **tmp_mat; double *tmp_E; int int_i,int_j,mfint[7]; #endif sprintf(sdt_2, cFileNameLanczosStep, X->Def.CDataFileHead); i_max=X->Check.idim_max; k_exct = X->Def.k_exct; if(initial_mode == 0){ sum_i_max = SumMPI_li(X->Check.idim_max); X->Large.iv = (sum_i_max / 2 + X->Def.initial_iv) % sum_i_max + 1; iv=X->Large.iv; fprintf(stdoutMPI, " initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d \n\n",initial_mode,iv,i_max,k_exct); #pragma omp parallel for default(none) private(i) shared(v0, v1) firstprivate(i_max) for(i = 1; i <= i_max; i++){ v0[i]=0.0; v1[i]=0.0; } sum_i_max = 0; for (iproc = 0; iproc < nproc; iproc++) { i_max_tmp = BcastMPI_li(iproc, i_max); if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) { if (myrank == iproc) { v1[iv - sum_i_max+1] = 1.0; if (X->Def.iInitialVecType == 0) { v1[iv - sum_i_max+1] += 1.0*I; v1[iv - sum_i_max+1] /= sqrt(2.0); } }/*if (myrank == iproc)*/ }/*if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp)*/ sum_i_max += i_max_tmp; }/*for (iproc = 0; iproc < nproc; iproc++)*/ }/*if(initial_mode == 0)*/ else if(initial_mode==1){ iv = X->Def.initial_iv; fprintf(stdoutMPI, " initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d \n\n",initial_mode,iv,i_max,k_exct); #pragma omp parallel default(none) private(i, u_long_i, mythread, dsfmt) \ shared(v0, v1, iv, X, nthreads, myrank) firstprivate(i_max) { #pragma omp for for (i = 1; i <= i_max; i++) { v0[i] = 0.0; } /* Initialise MT */ #ifdef _OPENMP mythread = omp_get_thread_num(); #else mythread = 0; #endif u_long_i = 123432 + labs(iv) + mythread + nthreads * myrank; dsfmt_init_gen_rand(&dsfmt, u_long_i); if (X->Def.iInitialVecType == 0) { #pragma omp for for (i = 1; i <= i_max; i++) v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I; } else { #pragma omp for for (i = 1; i <= i_max; i++) v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5); } }/*#pragma omp parallel*/ cdnorm=0.0; #pragma omp parallel for default(none) private(i) shared(v1, i_max) reduction(+: cdnorm) for(i=1;i<=i_max;i++){ cdnorm += conj(v1[i])*v1[i]; } cdnorm = SumMPI_dc(cdnorm); dnorm=creal(cdnorm); dnorm=sqrt(dnorm); #pragma omp parallel for default(none) private(i) shared(v1) firstprivate(i_max, dnorm) for(i=1;i<=i_max;i++){ v1[i] = v1[i]/dnorm; } }/*else if(initial_mode==1)*/ //Eigenvalues by Lanczos method TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueStart, "a"); mltply(X, v0, v1); stp=1; TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp); alpha1=creal(X->Large.prdct) ;// alpha = v^{\dag}*H*v alpha[1]=alpha1; cbeta1=0.0; #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) for(i = 1; i <= i_max; i++){ cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]); } cbeta1 = SumMPI_dc(cbeta1); beta1=creal(cbeta1); beta1=sqrt(beta1); beta[1]=beta1; ebefor=0; /* Set Maximum number of loop to the dimention of the Wavefunction */ i_max_tmp = SumMPI_li(i_max); if(i_max_tmp < X->Def.Lanczos_max){ X->Def.Lanczos_max = i_max_tmp; } if(i_max_tmp < X->Def.LanczosTarget){ X->Def.LanczosTarget = i_max_tmp; } if(i_max_tmp == 1){ E[1]=alpha[1]; vec12(alpha,beta,stp,E,X); X->Large.itr=stp; X->Phys.Target_energy=E[k_exct]; iconv=0; fprintf(stdoutMPI," LanczosStep E[1] \n"); fprintf(stdoutMPI," stp=%d %.10lf \n",stp,E[1]); } else{ #ifdef lapack fprintf(stdoutMPI, " LanczosStep E[1] E[2] E[3] E[4] E_Max/Nsite\n"); #else fprintf(stdoutMPI, " LanczosStep E[1] E[2] E[3] E[4] \n"); #endif for(stp = 2; stp <= X->Def.Lanczos_max; stp++){ #pragma omp parallel for default(none) private(i,temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1) for(i=1;i<=i_max;i++){ temp1 = v1[i]; temp2 = (v0[i]-alpha1*v1[i])/beta1; v0[i] = -beta1*temp1; v1[i] = temp2; } mltply(X, v0, v1); TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp); alpha1=creal(X->Large.prdct); alpha[stp]=alpha1; cbeta1=0.0; #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) for(i=1;i<=i_max;i++){ cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]); } cbeta1 = SumMPI_dc(cbeta1); beta1=creal(cbeta1); beta1=sqrt(beta1); beta[stp]=beta1; Target = X->Def.LanczosTarget; if(stp==2){ #ifdef lapack d_malloc2(tmp_mat,stp,stp); d_malloc1(tmp_E,stp+1); for(int_i=0;int_i<stp;int_i++){ for(int_j=0;int_j<stp;int_j++){ tmp_mat[int_i][int_j] = 0.0; } } tmp_mat[0][0] = alpha[1]; tmp_mat[0][1] = beta[1]; tmp_mat[1][0] = beta[1]; tmp_mat[1][1] = alpha[2]; DSEVvalue(stp,tmp_mat,tmp_E); E[1] = tmp_E[0]; E[2] = tmp_E[1]; E[3] = tmp_E[2]; E[4] = tmp_E[3]; d_free1(tmp_E,stp+1); d_free2(tmp_mat,stp,stp); #else bisec(alpha,beta,stp,E,4,eps_Bisec); #endif ebefor=E[Target]; childfopenMPI(sdt_2,"w", &fp); #ifdef lapack fprintf(stdoutMPI, " stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); fprintf(fp, "LanczosStep E[1] E[2] E[3] E[4] E_Max/Nsite\n"); fprintf(fp, "stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); #else fprintf(stdoutMPI, " stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); fprintf(fp, "LanczosStep E[1] E[2] E[3] E[4] \n"); fprintf(fp,"stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); #endif fclose(fp); } if(stp>2 && stp%2==0){ childfopenMPI(sdt_2,"a", &fp); #ifdef lapack d_malloc2(tmp_mat,stp,stp); d_malloc1(tmp_E,stp+1); for(int_i=0;int_i<stp;int_i++){ for(int_j=0;int_j<stp;int_j++){ tmp_mat[int_i][int_j] = 0.0; } } tmp_mat[0][0] = alpha[1]; tmp_mat[0][1] = beta[1]; for(int_i=1;int_i<stp-1;int_i++){ tmp_mat[int_i][int_i] = alpha[int_i+1]; tmp_mat[int_i][int_i+1] = beta[int_i+1]; tmp_mat[int_i][int_i-1] = beta[int_i]; } tmp_mat[int_i][int_i] = alpha[int_i+1]; tmp_mat[int_i][int_i-1] = beta[int_i]; DSEVvalue(stp,tmp_mat,tmp_E); E[1] = tmp_E[0]; E[2] = tmp_E[1]; E[3] = tmp_E[2]; E[4] = tmp_E[3]; E[0] = tmp_E[stp-1]; d_free1(tmp_E,stp+1); d_free2(tmp_mat,stp,stp); fprintf(stdoutMPI, " stp = %d %.10lf %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4],E[0]/(double)X->Def.NsiteMPI); fprintf(fp,"stp=%d %.10lf %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4],E[0]/(double)X->Def.NsiteMPI); #else bisec(alpha,beta,stp,E,4,eps_Bisec); fprintf(stdoutMPI, " stp = %d %.10lf %.10lf %.10lf %.10lf \n",stp,E[1],E[2],E[3],E[4]); fprintf(fp,"stp=%d %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4]); #endif fclose(fp); if(fabs((E[Target]-ebefor)/E[Target])<eps_Lanczos || fabs(beta[stp])<pow(10.0, -14)){ vec12(alpha,beta,stp,E,X); X->Large.itr=stp; X->Phys.Target_energy=E[k_exct]; iconv=0; break; } ebefor=E[Target]; } } } sprintf(sdt,cFileNameTimeKeep,X->Def.CDataFileHead); if(iconv!=0){ sprintf(sdt, cLogLanczos_EigenValueNotConverged); return -1; } TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueFinish, "a"); fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueEnd); return 0; }
static void ran_set1 (void *vstate, unsigned long int s){ dsfmt_init_gen_rand((dsfmt *)vstate, (uint32_t)s); }
int main(int argc, char *argv[]) { int ret = EXIT_FAILURE; // Set up the PRNG dsfmt_t *dsfmt = malloc(sizeof(dsfmt_t)); if(dsfmt == NULL) { fprintf(stdout, "unable to allocate PRNG\n"); goto skip_deallocate_prng; } dsfmt_init_gen_rand(dsfmt, SEED); // Set up the source values double *src = fftw_malloc(N*VL*sizeof(double)); if(src == NULL) { fprintf(stdout, "unable to allocate source vector\n"); goto skip_deallocate_src; } for(unsigned int i = 0; i < N*VL; ++i) { src[i] = dsfmt_genrand_open_close(dsfmt); } // Allocate the FFT destination array double complex *fft = fftw_malloc(N*VL*sizeof(double complex)); if(fft == NULL) { fprintf(stdout, "unable to allocate fft vector\n"); goto skip_deallocate_fft; } // Execute the forward FFT fftw_plan fwd_plan = fftw_plan_many_dft_r2c(1, &N, VL, src, NULL, VL, 1, fft, NULL, VL, 1, FFTW_ESTIMATE); if(fwd_plan == NULL) { fprintf(stdout, "unable to allocate fft forward plan\n"); goto skip_deallocate_fwd_plan; } fftw_execute(fwd_plan); // Fill in the rest of the destination values using the Hermitian property. fft_r2c_1d_vec_finish(fft, N, VL); // Allocate the reverse FFT destination array double complex *dst = fftw_malloc(N*VL*sizeof(double complex)); if(dst == NULL) { fprintf(stdout, "unable to allocate dst vector\n"); goto skip_deallocate_dst; } // Perform the reverse FFT fftw_plan rev_plan = fftw_plan_many_dft(1, &N, VL, fft, NULL, VL, 1, dst, NULL, VL, 1, FFTW_BACKWARD, FFTW_ESTIMATE); if(rev_plan == NULL) { fprintf(stdout, "unable to allocate fft reverse plan\n"); goto skip_deallocate_rev_plan; } fftw_execute(rev_plan); // Compare the two vectors by sup norm double norm = 0.0; for(unsigned int i = 0; i < N*VL; ++i) { // Divide the resulting by N, because FFTW computes the un-normalized DFT: // the forward followed by reverse transform scales the data by N. norm = fmax(norm, cabs(dst[i]/N - src[i])); } if(norm <= 1e-6) { ret = EXIT_SUCCESS; } fftw_destroy_plan(rev_plan); skip_deallocate_rev_plan: fftw_free(dst); skip_deallocate_dst: fftw_destroy_plan(fwd_plan); skip_deallocate_fwd_plan: fftw_free(fft); skip_deallocate_fft: fftw_free(src); skip_deallocate_src: free(dsfmt); skip_deallocate_prng: // Keep valgrind happy by having fftw clean up its internal structures. This // helps ensure we aren't leaking memory. fftw_cleanup(); return ret; }
void rngInit(RngEngine* rng, RngSeedType* seedValue, RngErrorType* info) { dsfmt_init_gen_rand(&(rng->m_dsfmt), *seedValue); *info = 0; }
int main(int argc, char **argv) { static const char **fields; static uint64_t *lengths; dsfmt_t state; Pvoid_t uuids = NULL; tdb_cons* c = tdb_cons_init(); test_cons_settings(c); uint64_t i, j; __uint128_t prev_uuid = 0; Word_t key; int tst; assert(tdb_cons_open(c, argv[1], fields, 0) == 0); dsfmt_init_gen_rand(&state, 2489); for (i = 0; i < NUM_TRAILS; i++){ uint8_t uuid[16]; gen_random_uuid(uuid, &state); memcpy(&key, uuid, 8); J1S(tst, uuids, key); if (!tst){ printf("half-word collision! change random seed!\n"); return -1; } for (j = 0; j < NUM_EVENTS; j++) tdb_cons_add(c, uuid, i * 100 + j, fields, lengths); } J1C(key, uuids, 0, -1); assert(key == NUM_TRAILS); assert(tdb_cons_finalize(c) == 0); tdb_cons_close(c); tdb* t = tdb_init(); assert(tdb_open(t, argv[1]) == 0); assert(tdb_num_trails(t) == NUM_TRAILS); assert(tdb_num_events(t) == NUM_TRAILS * NUM_EVENTS); for (i = 0; i < NUM_TRAILS; i++){ __uint128_t this_uuid; /* uuids must be monotonically increasing */ memcpy(&this_uuid, tdb_get_uuid(t, i), 16); assert(this_uuid > prev_uuid); prev_uuid = this_uuid; /* remove this uuid from the uuid set and make sure it exists */ memcpy(&key, &this_uuid, 8); J1U(tst, uuids, key); assert(tst == 1); } /* make sure we retrieved all uuids */ J1C(key, uuids, 0, -1); assert(key == 0); return 0; }
struct double_pair randmatstat(int t) { dsfmt_t dsfmt; dsfmt_init_gen_rand(&dsfmt, 1234); int n = 5; struct double_pair r; double *v = (double*)calloc(t,sizeof(double)); double *w = (double*)calloc(t,sizeof(double)); double *a = (double*)malloc((n)*(n)*sizeof(double)); double *b = (double*)malloc((n)*(n)*sizeof(double)); double *c = (double*)malloc((n)*(n)*sizeof(double)); double *d = (double*)malloc((n)*(n)*sizeof(double)); double *P = (double*)malloc((n)*(4*n)*sizeof(double)); double *Q = (double*)malloc((2*n)*(2*n)*sizeof(double)); double *PtP1 = (double*)malloc((4*n)*(4*n)*sizeof(double)); double *PtP2 = (double*)malloc((4*n)*(4*n)*sizeof(double)); double *QtQ1 = (double*)malloc((2*n)*(2*n)*sizeof(double)); double *QtQ2 = (double*)malloc((2*n)*(2*n)*sizeof(double)); for (int i=0; i < t; i++) { randmtzig_fill_randn(&dsfmt, a, n*n); randmtzig_fill_randn(&dsfmt, b, n*n); randmtzig_fill_randn(&dsfmt, c, n*n); randmtzig_fill_randn(&dsfmt, d, n*n); memcpy(P+0*n*n, a, n*n*sizeof(double)); memcpy(P+1*n*n, b, n*n*sizeof(double)); memcpy(P+2*n*n, c, n*n*sizeof(double)); memcpy(P+3*n*n, d, n*n*sizeof(double)); for (int j=0; j < n; j++) { for (int k=0; k < n; k++) { Q[2*n*j+k] = a[k]; Q[2*n*j+n+k] = b[k]; Q[2*n*(n+j)+k] = c[k]; Q[2*n*(n+j)+n+k] = d[k]; } } cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, n, n, 4*n, 1.0, P, 4*n, P, 4*n, 0.0, PtP1, 4*n); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, 4*n, 4*n, 4*n, 1.0, PtP1, 4*n, PtP1, 4*n, 0.0, PtP2, 4*n); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, 4*n, 4*n, 4*n, 1.0, PtP2, 4*n, PtP2, 4*n, 0.0, PtP1, 4*n); for (int j=0; j < n; j++) { v[i] += PtP1[(n+1)*j]; } cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 2*n, 2*n, 2*n, 1.0, Q, 2*n, Q, 2*n, 0.0, QtQ1, 2*n); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, 2*n, 2*n, 2*n, 1.0, QtQ1, 2*n, QtQ1, 2*n, 0.0, QtQ2, 2*n); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, 2*n, 2*n, 2*n, 1.0, QtQ2, 2*n, QtQ2, 2*n, 0.0, QtQ1, 2*n); for (int j=0; j < 2*n; j++) { w[i] += QtQ1[(2*n+1)*j]; } } free(PtP1); free(PtP2); free(QtQ1); free(QtQ2); free(P); free(Q); free(a); free(b); free(c); free(d); double v1=0.0, v2=0.0, w1=0.0, w2=0.0; for (int i=0; i < t; i++) { v1 += v[i]; v2 += v[i]*v[i]; w1 += w[i]; w2 += w[i]*w[i]; } free(v); free(w); r.s1 = sqrt((t*(t*v2-v1*v1))/((t-1)*v1*v1)); r.s2 = sqrt((t*(t*w2-w1*w1))/((t-1)*w1*w1)); return r; }
/* * Main Function */ int run(int rank) { long long NP; // Number of particles pfloat sysLeng; // Length of system int numCells; // Number of cells in system pfloat tol_std; // Tolerance for standard deviation calculations //Random number generator dsfmt_t * dsfmt = (dsfmt_t*)malloc(sizeof(dsfmt_t)); //stores user input NP = opt.NP; sysLeng = opt.sysLen; numCells = opt.numCells; tol_std = opt.tol_std; int nprocs; #ifdef DO_MPI MPI_Comm_size(MPI_COMM_WORLD,&nprocs); #else nprocs = 1; #endif //Initialization of vital simulation parameters pfloat dx = sysLeng / numCells; //cell width pfloat eps = 0.1; //the threshold angle for tallying pfloat sig_t = opt.sig_t; //total collision cross-section pfloat sig_s = opt.sig_s; //scattering cross-section //The source term uniform for now pfloat q0_avg = 1.0; //Form data structure for simulation parameters struct data data; data.NP = NP; //Number of Particles data.lx = sysLeng; //Length of System data.nx = numCells; //Number of Cells data.dx = dx; //Cell Width data.dx2 = dx*dx; // Cell Width SQUARED data.dx_recip = 1.0/dx; // Pre-computed reciprocal data.sig_t = sig_t; //Total Collision Cross-Section data.sig_t_recip = 1.0/sig_t; // Pre-computed reciprocal data.sig_s = sig_s; //Scattering Collision Cross-Section data.q0_avg = q0_avg; //Weight of each particle data.eps = eps; //Threshold Angle for Tallying long long NPc = (long long )((pfloat)NP / numCells); // number of particles in cell long long NP_tot = NP; //total number of particles for the CMC data.NP_tot = NP_tot; // Total Number of Particles data.NPc = NPc; // Number of Particles per Cell //Initialize the average values int iter = 0; int iter_avg = 0 ; //tally for iteration for convergence int i; // MT RNG dsfmt_init_gen_rand(dsfmt, (int)time(NULL)); //time keeping structures struct timeval start_time, end_time, startPerIter, endPerIter; afloat * phi_n_avg = NULL; //Vector to hold avg value of phi_n across iterations afloat * phi_n_tot = NULL; //Vector to hold sum of phi_n across iterations afloat * phiS2_n_tot = NULL; #if defined(L2_1) || defined(L2_2) || defined(L2_3) afloat * anal_soln = NULL; //Holds the analytical solution if comparing with it pfloat l2; //Stores the l2 norm while comparing with analytical solution #else meanDev * phi_nStats = NULL; //Mean and standard deviation #endif colData all_tallies; //Stores the tallies for the current iteration if( rank == 0 ) { phi_n_avg = (afloat *) calloc(numCells, sizeof(afloat)); phi_n_tot = (afloat*) calloc(numCells,sizeof(afloat)); phiS2_n_tot = (afloat*) calloc(numCells,sizeof(afloat)); #if defined(L2_1) || defined(L2_2) || defined(L2_3) anal_soln = (afloat *) calloc(numCells, sizeof (afloat)); // Reference solution #if defined(L2_1) init_L2_norm_1(anal_soln); #elif defined(L2_2) init_L2_norm_2(anal_soln); #elif defined(L2_3) init_L2_norm_3(anal_soln); #endif #else //Initialize vector to hold stats of the monte carlo phi_nStats = (meanDev*) malloc(sizeof(meanDev)*opt.numCells); #endif } //Plot and print initial condition as well as simulation parameters if (rank == 0 && !opt.silent) sim_param_print(data); int flag_sim = 0; //Flag to singal convergence long long samp_cnt = 0; //total history tally //Allocate tallies on each process. Aligned to cache line ( not really necessary for MPI ) colData tallies = allocate_tallies(numCells); #ifdef DO_MPI //In MPI mode, allocate on process 0, all_tallies to hold sum of tallies across processes if( rank == 0 ) all_tallies = allocate_tallies(numCells); else { all_tallies.phi_n = NULL; all_tallies.phi_n2 = NULL; all_tallies.tot_col = NULL; all_tallies.full_buffer = NULL; all_tallies.buffer_size = tallies.buffer_size; } #else //For sequential mode, alias all_tallies to tallies all_tallies.phi_n = tallies.phi_n; all_tallies.phi_n2 = tallies.phi_n2; all_tallies.tot_col = tallies.tot_col; all_tallies.buffer_size = tallies.buffer_size; #endif double avg_col = 0; //Average collisions across all the particles streamed across all processes /* double tot_col = 0; */ /* double all_col = 0; */ double running_col = 0.0; //Calculation starts here gettimeofday(&start_time, NULL); #ifdef DO_PAPI PAPI_library_init(PAPI_VER_CURRENT); int EventSet = PAPI_NULL; long long start[4],stop[4]; PAPI_create_eventset(&EventSet); PAPI_add_event(EventSet,PAPI_TOT_CYC); PAPI_add_event(EventSet,PAPI_TOT_INS); PAPI_add_event(EventSet,PAPI_FP_OPS); PAPI_add_event(EventSet,PAPI_FP_INS); PAPI_start(EventSet); PAPI_read(EventSet,start); #endif // while not converged and total number of iterations < opt.numiters while (flag_sim == 0 && iter < opt.numIters) { /* //start time for iteration */ /* gettimeofday(&startPerIter, NULL); */ iter += 1; if(!opt.silent && rank==0) printf("This is the %dth iteration\n", iter); //calls collision and tally to stream particles and collect statistics *(tallies.tot_col) = collision_and_tally(data, &tallies,rank,nprocs,dsfmt); #ifdef DO_MPI //In MPI mode, need to reduce phi_n and phi_n2 across all processes MPI_Reduce(tallies.full_buffer,all_tallies.full_buffer,tallies.buffer_size,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); #endif //Do the averaging and convergence calculations on process 0 if( rank == 0 ) { running_col += *(all_tallies.tot_col); avg_col = running_col / iter / NP; //Normalizing the values of phi_n for( i = 0 ; i < numCells ; i++ ) { all_tallies.phi_n[i] /= (afloat)data.dx*NP; all_tallies.phi_n[i] *= data.lx; } /*************************************************** Calculates the averages **************************************************\/ */ iter_avg += 1; samp_cnt = NP_tot * ((long long) iter_avg); //accumulate phase -- add new data to the running averages for (i = 0; i < numCells; i++) { phi_n_tot[i] += all_tallies.phi_n[i]; phiS2_n_tot[i] += all_tallies.phi_n2[i]; } // for each cell, calculate the average for phi_n, phi_lo and E_n for (i = 0; i < numCells; i++) { phi_n_avg[i] = phi_n_tot[i] / iter_avg; } //prints out the necessary data if(!opt.silent && rank == 0){ printf("Running Average number of collisions = %lf\n",avg_col); } //check for convergence #if !defined(L2_1) && !defined(L2_2) && !defined(L2_3) //Use mean and standard deviation for convergence mean_std_calc(phi_n_tot, phiS2_n_tot, samp_cnt, opt.NP, opt.numCells, opt.dx, phi_nStats); if (maxFAS(&phi_nStats[0].stdDev, numCells, 2) <= tol_std) { flag_sim = 1; } if( !opt.silent ){ printf("The maximum standard deviation of flux at node is, max (sig_phi) = %f\n", maxFAS(&phi_nStats[0].stdDev, numCells, 2)); } #else //Use L2 norm of the analytical solution l2 = l2_norm_cmp(phi_n_avg, anal_soln, numCells, dx); flag_sim = l2 <= tol_std; if(rank == 0){ gettimeofday(&end_time, NULL); printf("L2: %f, Sofar: %ldu_sec\n", l2, (end_time.tv_sec - start_time.tv_sec)*1000000 + (end_time.tv_usec - start_time.tv_usec)); } #endif } #ifdef DO_MPI //Broadcast to other processes if convergence is reached MPI_Bcast(&flag_sim,1,MPI_INT,0,MPI_COMM_WORLD); #endif /* //end time per iteration */ /* gettimeofday(&endPerIter, NULL); */ /* printf("ID = %d, Time per Iteration: %ldu_sec, flags sim = %d\n\n", rank,(endPerIter.tv_sec - startPerIter.tv_sec)*1000000 + (endPerIter.tv_usec - startPerIter.tv_usec),flag_sim); */ } #ifdef DO_PAPI PAPI_read(EventSet,stop); printf("%lld %lld %lld %lld\n",stop[0] - start[0],stop[1] - start[1],stop[2] - start[2],stop[3] - start[3]); PAPI_cleanup_eventset(EventSet); PAPI_destroy_eventset(EventSet); #endif gettimeofday(&end_time, NULL); if( rank == 0 ) printf("Elapsed Time: %ldu_sec\n", (end_time.tv_sec - start_time.tv_sec)*1000000 + (end_time.tv_usec - start_time.tv_usec)); if(rank == 0 && !opt.silent){ printf("NODEAVG\n"); for (i = 0; i < numCells; i++) { printf("%d %lf\n", i, phi_n_avg[i]); } } /************************************************ * Free memory *************************************************/ deallocate_tallies(tallies); free(dsfmt); if( rank == 0 ) { #ifdef DO_MPI deallocate_tallies(all_tallies); #endif free(phi_n_tot); free(phiS2_n_tot); free(phi_n_avg); #if defined(L2_1) || defined(L2_2) || defined(L2_3) free(anal_soln); #else free(phi_nStats); #endif } return (EXIT_SUCCESS); }
void initRandomNumbers(int seed) { dsfmt_init_gen_rand(&dsfmt_global_data, seed); initialised = true; }
int main(int argc, char **argv) { int err; // Error handling scheme: this function has failed until proven otherwise. int ret = EXIT_FAILURE; err = MPI_Init(&argc, &argv); if(err != MPI_SUCCESS) { // Theoretically, an error at this point will abort the program, and this // code path is never followed. This is here for completeness. fprintf(stderr, "unable to initialize MPI\n"); goto die_immed; } // Install the MPI error handler that returns error codes, so we can perform // the usual process suicide ritual. err = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); if(err != MPI_SUCCESS) { // Again, theoretically, the previous error handler (MPI_Abort) gets called // instead of reaching this fail point. fprintf(stderr, "unable to reset MPI error handler\n"); goto die_finalize_mpi; } int size, rank; err = MPI_Comm_size(MPI_COMM_WORLD, &size); if(err == MPI_SUCCESS) err = MPI_Comm_rank(MPI_COMM_WORLD, &rank); if(err != MPI_SUCCESS) { fprintf(stderr, "unable to determine rank or size\n"); goto die_finalize_mpi; } /* Create cartestian communicator */ int dims[2] = {0, 0}; int periods[2] = {1, 1}; err = MPI_Dims_create(size, 2, dims); if(err != MPI_SUCCESS) { fprintf(stderr, "unable to create a cartestian topology\n"); goto die_finalize_mpi; } MPI_Comm cart; err = MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &cart); if(err != MPI_SUCCESS) { fprintf(stderr, "unable to create cartestian communicator\n"); goto die_finalize_mpi; } dsfmt_t *prng = malloc(sizeof(dsfmt_t)); if(prng == NULL) { fprintf(stderr, "unable to allocate PRNG\n"); goto die_free_cart_comm; } dsfmt_init_gen_rand(prng, SEED + rank); int const net_elems = proc_elems[0]*proc_elems[1]; // Allocate master source array for FFT. double *const master = fftw_malloc(net_elems*sizeof(double)); if(master == NULL) { fprintf(stderr, "unable to allocate master array\n"); goto die_free_prng; } for(unsigned int i = 0; i < net_elems; ++i) { master[i] = dsfmt_genrand_open_close(prng) * 10; } /* Allocate source array for serial array. We copy the master array to this * array, then transform it in place, then reverse transform it. The idea is * that we should get the original data back, and we use this as a consistency * check. We need the original data to compare to. */ double *const source = fftw_malloc(net_elems*sizeof(double)); if(source == NULL) { fprintf(stderr, "unable to allocate source array\n"); goto die_free_master; } for(int i = 0; i < net_elems; ++i) source[i] = master[i]; /* Allocate the destination array */ double complex *const dest = fftw_malloc(net_elems*sizeof(double complex)); if(dest == NULL) { fprintf(stderr, "unable to allocate destination array\n"); goto die_free_source; } /* Allocate a plan to compute the FFT */ fft_par_plan plan = fft_par_plan_r2c(cart, proc_elems, 1, source, dest, &err); if(plan == NULL) { fprintf(stderr, "unable to initialize parallel FFT plan\n"); goto die_free_dest; } /* Execute the forward plan */ err = fft_par_execute_fwd(plan); if(err != MPI_SUCCESS) { fprintf(stderr, "error computing forward plan\n"); goto die_free_plan; } /* Execute the reverse plan */ err = fft_par_execute_rev(plan); if(err != MPI_SUCCESS) { fprintf(stderr, "error computing reverse plan\n"); goto die_free_plan; } /* Compare source to master, use supremum norm */ int norm = 0.0; for(int i = 0; i < net_elems; ++i) { /* Each FFT effectively multiplies by sqrt(net_elems*num_procs) */ norm = fmax(norm, fabs(master[i] - source[i]/net_elems/size)); } if(norm < 1.0e-6) { ret = EXIT_SUCCESS; } die_free_plan: fft_par_plan_destroy(plan); die_free_dest: fftw_free(dest); die_free_source: fftw_free(source); die_free_master: fftw_free(master); die_free_prng: free(prng); die_free_cart_comm: if(err == MPI_SUCCESS) err = MPI_Comm_free(&cart); if(err != MPI_SUCCESS) { fprintf(stderr, "unable to free cartestian communicator\n"); ret = EXIT_FAILURE; } die_finalize_mpi: if(err == MPI_SUCCESS) err = MPI_Finalize(); if(err != MPI_SUCCESS) { fprintf(stderr, "unable to finalize MPI\n"); ret = EXIT_FAILURE; } die_immed: fftw_cleanup(); return ret; }
/** * * * @param X * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ void Lanczos_EigenVector(struct BindStruct *X){ printf("%s", cLogLanczos_EigenVectorStart); int i,j,i_max,iv; int k_exct; double beta1,alpha1,dnorm, dnorm_inv; double complex temp1,temp2; // for GC long unsigned int u_long_i; dsfmt_t dsfmt; k_exct = X->Def.k_exct; iv=X->Large.iv; i_max=X->Check.idim_max; //Eigenvectors by Lanczos method //initialization: initialization should be identical to that of Lanczos_EigenValue.c #pragma omp parallel for default(none) private(i) shared(v0, v1, vg) firstprivate(i_max) for(i=1;i<=i_max;i++){ v0[i]=0.0+0.0*I; v1[i]=0.0+0.0*I; vg[i]=0.0+0.0*I; } if(initial_mode == 0){ v1[iv]=1.0; vg[iv]=vec[k_exct][1]; }else if(initial_mode==1){ iv = X->Def.initial_iv; u_long_i = 123432 + abs(iv); dsfmt_init_gen_rand(&dsfmt, u_long_i); for(i = 1; i <= i_max; i++){ v1[i]=2.0*(dsfmt_genrand_close_open(&dsfmt)-0.5)+2.0*(dsfmt_genrand_close_open(&dsfmt)-0.5)*I; } dnorm=0; #pragma omp parallel for default(none) private(i) shared(v1, i_max) reduction(+: dnorm) for(i=1;i<=i_max;i++){ dnorm += conj(v1[i])*v1[i]; } dnorm=sqrt(dnorm); dnorm_inv=1.0/dnorm; #pragma omp parallel for default(none) private(i) shared(v1,vg,vec,k_exct) firstprivate(i_max, dnorm_inv) for(i=1;i<=i_max;i++){ v1[i] = v1[i]*dnorm_inv; vg[i] = v1[i]*vec[k_exct][1]; } } mltply(X, v0, v1); alpha1=alpha[1]; beta1=beta[1]; #pragma omp parallel for default(none) private(j) shared(vec, v0, v1, vg) firstprivate(alpha1, beta1, i_max, k_exct) for(j=1;j<=i_max;j++){ vg[j]+=vec[k_exct][2]*(v0[j]-alpha1*v1[j])/beta1; } //iteration for(i=2;i<=X->Large.itr-1;i++){ #pragma omp parallel for default(none) private(j, temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1) for(j=1;j<=i_max;j++){ temp1=v1[j]; temp2=(v0[j]-alpha1*v1[j])/beta1; v0[j]=-beta1*temp1; v1[j]=temp2; } mltply(X, v0, v1); alpha1 = alpha[i]; beta1 = beta[i]; #pragma omp parallel for default(none) private(j) shared(vec, v0, v1, vg) firstprivate(alpha1, beta1, i_max, k_exct, i) for(j=1;j<=i_max;j++){ vg[j] += vec[k_exct][i+1]*(v0[j]-alpha1*v1[j])/beta1; } } #pragma omp parallel for default(none) private(j) shared(v0, vg) firstprivate(i_max) for(j=1;j<=i_max;j++){ v0[j] = vg[j]; } //normalization dnorm=0.0; #pragma omp parallel for default(none) reduction(+:dnorm) private(j) shared(v0) firstprivate(i_max) for(j=1;j<=i_max;j++){ dnorm += conj(v0[j])*v0[j]; } dnorm=sqrt(dnorm); dnorm_inv=dnorm; #pragma omp parallel for default(none) private(j) shared(v0) firstprivate(i_max, dnorm_inv) for(j=1;j<=i_max;j++){ v0[j] = v0[j]*dnorm_inv; } TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenVectorFinish, "a"); printf("%s", cLogLanczos_EigenVectorEnd); }
int main(int argc, char **argv) { // Error handling scheme: this function has failed until proven otherwise. int ret = EXIT_FAILURE; if(MPI_Init(&argc, &argv) != MPI_SUCCESS) { // Theoretically, an error at this point will abort the program, and this // code path is never followed. This is here for completeness. fprintf(stderr, "unable to initialize MPI\n"); goto die_immed; } // Install the MPI error handler that returns error codes, so we can perform // the usual process suicide ritual. if(MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN) != MPI_SUCCESS) { // Again, theoretically, the previous error handler (MPI_Abort) gets called // instead of reaching this fail point. fprintf(stderr, "unable to reset MPI error handler\n"); goto die_finalize_mpi; } int size, rank; if(MPI_Comm_size(MPI_COMM_WORLD, &size) != MPI_SUCCESS || MPI_Comm_rank(MPI_COMM_WORLD, &rank) != MPI_SUCCESS) { fprintf(stderr, "unable to determine rank and size\n"); goto die_finalize_mpi; } dsfmt_t *prng = malloc(sizeof(dsfmt_t)); if(prng == NULL) { fprintf(stderr, "unable to allocate PRNG\n"); goto die_finalize_mpi; } dsfmt_init_gen_rand(prng, SEED); const int master_elems = proc_elems * size; double *const master = fftw_malloc(VL*master_elems*sizeof(double)); if(master == NULL) { fprintf(stderr, "unable to allocate master array\n"); goto die_free_prng; } for(int i = 0; i < master_elems*VL; ++i) { master[i] = 2*dsfmt_genrand_open_close(prng) - 1; } /* Allocate the array holding the serial result */ double complex *const serial = fftw_malloc(VL*master_elems*sizeof(*serial)); if(serial == NULL) { fprintf(stderr, "unable to allocate serial array\n"); goto die_free_master; } /* Perform serial transform */ fftw_plan serial_plan = fftw_plan_many_dft_r2c(1, &master_elems, VL, master, NULL, VL, 1, serial, NULL, VL, 1, FFTW_ESTIMATE); if(serial_plan == NULL) { fprintf(stderr, "unable to construct forward transform plan\n"); goto die_free_serial; } /* Perform the serial transform, and complete it */ fftw_execute(serial_plan); fft_r2c_1d_vec_finish(serial, master_elems, VL); /* Allocate space to hold the parallel transform result */ double complex *const parallel = fftw_malloc( proc_elems*VL*sizeof(double complex)); if(parallel == NULL) { fprintf(stderr, "unable to allocate space for parallel array\n"); goto die_destroy_serial_plan; } /* Create the parallel plan */ fft_par_plan par_plan = fft_par_plan_r2c_1d(MPI_COMM_WORLD, proc_elems, VL, master + rank*proc_elems*VL, parallel, NULL); if(par_plan == NULL) { fprintf(stderr, "unable to create parallel transform plan\n"); goto die_free_parallel; } /* Execute the parallel transform */ if(fft_par_execute_fwd(par_plan) != MPI_SUCCESS) { fprintf(stderr, "unable to execute parallel transform\n"); goto die_destroy_par_plan; } /* Compare values */ int sup = 0.0; for(int i = 0; i < proc_elems*VL; ++i) { sup = fmax(sup, cabs(serial[rank*proc_elems*VL + i] - parallel[i])); } if(sup < 1.0e-6) { ret = EXIT_SUCCESS; } die_destroy_par_plan: fft_par_plan_destroy(par_plan); die_free_parallel: fftw_free(parallel); die_destroy_serial_plan: fftw_destroy_plan(serial_plan); die_free_serial: fftw_free(serial); die_free_master: fftw_free(master); die_free_prng: free(prng); die_finalize_mpi: if(MPI_Finalize() != MPI_SUCCESS) { fprintf(stderr, "unable to finalize MPI\n"); ret = EXIT_FAILURE; } die_immed: fftw_cleanup(); return ret; }