void drnorm(RngEngine* rng, double* buffer, BlasInt* n, BlasInt* isAligned, RngErrorType* info) { UNUSED(isAligned); if (*n > 0) { #if defined(USE_RNG_BOX_MULLER) for (BlasInt iter = 0; iter < *n; iter += 2) { double x = 2.0 * M_PI * dsfmt_genrand_close_open(&(rng->m_dsfmt)); double z = sqrt(-log(dsfmt_genrand_close_open(&(rng->m_dsfmt)))); double c = cos(x); buffer[iter] = z * c; if (iter + 1 < *n) { double s = sin(x); buffer[iter + 1] = z * s; } } #elif defined(USE_RNG_MARSAGLIA) for (BlasInt iter = 0; iter < *n; ++iter) { double x, y, r; do { x = 2.0 * dsfmt_genrand_close_open(&(rng->m_dsfmt)) - 1.0; y = 2.0 * dsfmt_genrand_close_open(&(rng->m_dsfmt)) - 1.0; r = x * x + y * y; } while (r >= 1 || r == 0); buffer[iter] = x * sqrt(- 2.0 * log(r) / r); } #endif } *info = 0; }
Color Phong::color(const SIMD::Point &p, const SIMD::Matrix &m, const SIMD::Vec &in, Scene &scene, size_t depth, dsfmt_t &dsfmt) const { const SIMD::Vec u = m[0], v = m[1], n = m[2]; Color ret; ret.add(c_emit); if(n.dot(in) > 0) return ret; RT_FLOAT xi = dsfmt_genrand_close_open(&dsfmt); if(diffuse > xi) { // Cosine weighted random hemisphere sampling // from smallpt by Kevin Beason RT_FLOAT r1=2*M_PI*dsfmt_genrand_close_open(&dsfmt), r2=dsfmt_genrand_close_open(&dsfmt), r2s=std::sqrt(r2); SIMD::Vec d = u*std::cos(r1)*r2s + v*std::sin(r1)*r2s + n*std::sqrt(1-r2); d.normalize(); // End of CWRHS Raytracer::Color lambda = c_diffuse; lambda.mult(scene.radiance(SIMD::Ray(p, d), depth+1, dsfmt)); ret.add(lambda); } else if(diffuse+specular > xi) { // Importance sampling of Phong reflection model by Jason Lawrence SIMD::Vec u, v, out = in-2*in.dot(n)*n, d; if(std::abs(out[0]) > 0.1) u = SIMD::Vec(0, 1, 0).cross(out); else u = SIMD::Vec(1, 0, 0).cross(out); u.normalize(); v = out.cross(u); do { RT_FLOAT cos_theta = std::pow(dsfmt_genrand_close_open(&dsfmt), 1/(spec_pow+2)), phi = 2*M_PI*dsfmt_genrand_close_open(&dsfmt); RT_FLOAT sin_theta = std::sqrt(1-cos_theta*cos_theta); d = u*std::cos(phi)*sin_theta+v*std::sin(phi)*sin_theta+out*cos_theta; } while(d.dot(n) < 0); d.normalize(); // End of ISoPRM Color lambda = scene.radiance(SIMD::Ray(p, d), depth+1, dsfmt); lambda.mult(c_specular); ret.add(lambda); } return ret; }
int main(int argc, char* argv[]) { int i, inside, seed; double x, y, pi; const long n_steps = 1000000000; dsfmt_t dsfmt; seed = 142857; inside = 0; dsfmt_init_gen_rand(&dsfmt, seed); for (i = 0; i < n_steps; i++) { x = dsfmt_genrand_close_open(&dsfmt); y = dsfmt_genrand_close_open(&dsfmt); if (x * x + y * y < 1.0) { inside++; } } pi = (double)inside / n_steps * 4; printf("%.10g\n", pi); return 0; }
void drunif(RngEngine* rng, double* buffer, BlasInt* n, BlasInt* isAligned, RngErrorType* info) { if ((*isAligned == 0) || (*n == 1)) { for (BlasInt iter = 0; iter < *n; ++iter) { buffer[iter] = dsfmt_genrand_close_open(&(rng->m_dsfmt)); } } else if (*n > 1) { dsfmt_fill_array_close_open(&(rng->m_dsfmt), buffer, *n); } *info = 0; }
double tls_rand() { /* Setup PRNG state for current thread. */ UNUSED(pthread_once(&tls_prng_once, tls_prng_init)); /* Create PRNG state if not exists. */ dsfmt_t* s = pthread_getspecific(tls_prng_key); if (!s) { /* Initialize seed from system PRNG generator. */ uint32_t seed = 0; FILE *fp = fopen("/dev/urandom", "r"); if (fp == NULL) { fp = fopen("/dev/random", "r"); } if (fp != NULL) { if (fread(&seed, sizeof(uint32_t), 1, fp) != 1) { fclose(fp); fp = NULL; } } if (fp == NULL) { fprintf(stderr, "error: PRNG: cannot seed from " "/dev/urandom, seeding from local time\n"); struct timeval tv; if (gettimeofday(&tv, NULL) == 0) { seed = (uint32_t)(tv.tv_sec ^ tv.tv_usec); } else { /* Last resort. */ seed = (uint32_t)time(NULL); } } else { fclose(fp); } /* Initialize PRNG state. */ #ifdef HAVE_POSIX_MEMALIGN if (posix_memalign((void **)&s, 16, sizeof(dsfmt_t)) != 0) { fprintf(stderr, "error: PRNG: not enough memory\n"); return .0; } #else if ((s = malloc(sizeof(dsfmt_t))) == NULL) { fprintf(stderr, "error: PRNG: not enough memory\n"); return .0; } #endif dsfmt_init_gen_rand(s, seed); UNUSED(pthread_setspecific(tls_prng_key, s)); } return dsfmt_genrand_close_open(s); }
static size_t sample1(const double *probs, size_t n, dsfmt_t * dsfmt) { size_t i; double u = dsfmt_genrand_close_open(dsfmt); double sum = 0; for (i = 0; i < n - 1; i++) { sum += probs[i]; if (sum >= u) break; } return i; }
/** * * * @param X * * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) * @return */ int Lanczos_EigenValue(struct BindStruct *X) { fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueStart); FILE *fp; char sdt[D_FileNameMax],sdt_2[D_FileNameMax]; int stp, iproc; long int i,iv,i_max; unsigned long int i_max_tmp, sum_i_max; int k_exct,Target; int iconv=-1; double beta1,alpha1; //beta,alpha1 should be real double complex temp1,temp2; double complex cbeta1; double E[5],ebefor; int mythread; // for GC double dnorm; double complex cdnorm; long unsigned int u_long_i; dsfmt_t dsfmt; #ifdef lapack double **tmp_mat; double *tmp_E; int int_i,int_j,mfint[7]; #endif sprintf(sdt_2, cFileNameLanczosStep, X->Def.CDataFileHead); i_max=X->Check.idim_max; k_exct = X->Def.k_exct; if(initial_mode == 0){ sum_i_max = SumMPI_li(X->Check.idim_max); X->Large.iv = (sum_i_max / 2 + X->Def.initial_iv) % sum_i_max + 1; iv=X->Large.iv; fprintf(stdoutMPI, " initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d \n\n",initial_mode,iv,i_max,k_exct); #pragma omp parallel for default(none) private(i) shared(v0, v1) firstprivate(i_max) for(i = 1; i <= i_max; i++){ v0[i]=0.0; v1[i]=0.0; } sum_i_max = 0; for (iproc = 0; iproc < nproc; iproc++) { i_max_tmp = BcastMPI_li(iproc, i_max); if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) { if (myrank == iproc) { v1[iv - sum_i_max+1] = 1.0; if (X->Def.iInitialVecType == 0) { v1[iv - sum_i_max+1] += 1.0*I; v1[iv - sum_i_max+1] /= sqrt(2.0); } }/*if (myrank == iproc)*/ }/*if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp)*/ sum_i_max += i_max_tmp; }/*for (iproc = 0; iproc < nproc; iproc++)*/ }/*if(initial_mode == 0)*/ else if(initial_mode==1){ iv = X->Def.initial_iv; fprintf(stdoutMPI, " initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d \n\n",initial_mode,iv,i_max,k_exct); #pragma omp parallel default(none) private(i, u_long_i, mythread, dsfmt) \ shared(v0, v1, iv, X, nthreads, myrank) firstprivate(i_max) { #pragma omp for for (i = 1; i <= i_max; i++) { v0[i] = 0.0; } /* Initialise MT */ #ifdef _OPENMP mythread = omp_get_thread_num(); #else mythread = 0; #endif u_long_i = 123432 + labs(iv) + mythread + nthreads * myrank; dsfmt_init_gen_rand(&dsfmt, u_long_i); if (X->Def.iInitialVecType == 0) { #pragma omp for for (i = 1; i <= i_max; i++) v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I; } else { #pragma omp for for (i = 1; i <= i_max; i++) v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5); } }/*#pragma omp parallel*/ cdnorm=0.0; #pragma omp parallel for default(none) private(i) shared(v1, i_max) reduction(+: cdnorm) for(i=1;i<=i_max;i++){ cdnorm += conj(v1[i])*v1[i]; } cdnorm = SumMPI_dc(cdnorm); dnorm=creal(cdnorm); dnorm=sqrt(dnorm); #pragma omp parallel for default(none) private(i) shared(v1) firstprivate(i_max, dnorm) for(i=1;i<=i_max;i++){ v1[i] = v1[i]/dnorm; } }/*else if(initial_mode==1)*/ //Eigenvalues by Lanczos method TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueStart, "a"); mltply(X, v0, v1); stp=1; TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp); alpha1=creal(X->Large.prdct) ;// alpha = v^{\dag}*H*v alpha[1]=alpha1; cbeta1=0.0; #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) for(i = 1; i <= i_max; i++){ cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]); } cbeta1 = SumMPI_dc(cbeta1); beta1=creal(cbeta1); beta1=sqrt(beta1); beta[1]=beta1; ebefor=0; /* Set Maximum number of loop to the dimention of the Wavefunction */ i_max_tmp = SumMPI_li(i_max); if(i_max_tmp < X->Def.Lanczos_max){ X->Def.Lanczos_max = i_max_tmp; } if(i_max_tmp < X->Def.LanczosTarget){ X->Def.LanczosTarget = i_max_tmp; } if(i_max_tmp == 1){ E[1]=alpha[1]; vec12(alpha,beta,stp,E,X); X->Large.itr=stp; X->Phys.Target_energy=E[k_exct]; iconv=0; fprintf(stdoutMPI," LanczosStep E[1] \n"); fprintf(stdoutMPI," stp=%d %.10lf \n",stp,E[1]); } else{ #ifdef lapack fprintf(stdoutMPI, " LanczosStep E[1] E[2] E[3] E[4] E_Max/Nsite\n"); #else fprintf(stdoutMPI, " LanczosStep E[1] E[2] E[3] E[4] \n"); #endif for(stp = 2; stp <= X->Def.Lanczos_max; stp++){ #pragma omp parallel for default(none) private(i,temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1) for(i=1;i<=i_max;i++){ temp1 = v1[i]; temp2 = (v0[i]-alpha1*v1[i])/beta1; v0[i] = -beta1*temp1; v1[i] = temp2; } mltply(X, v0, v1); TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp); alpha1=creal(X->Large.prdct); alpha[stp]=alpha1; cbeta1=0.0; #pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) for(i=1;i<=i_max;i++){ cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]); } cbeta1 = SumMPI_dc(cbeta1); beta1=creal(cbeta1); beta1=sqrt(beta1); beta[stp]=beta1; Target = X->Def.LanczosTarget; if(stp==2){ #ifdef lapack d_malloc2(tmp_mat,stp,stp); d_malloc1(tmp_E,stp+1); for(int_i=0;int_i<stp;int_i++){ for(int_j=0;int_j<stp;int_j++){ tmp_mat[int_i][int_j] = 0.0; } } tmp_mat[0][0] = alpha[1]; tmp_mat[0][1] = beta[1]; tmp_mat[1][0] = beta[1]; tmp_mat[1][1] = alpha[2]; DSEVvalue(stp,tmp_mat,tmp_E); E[1] = tmp_E[0]; E[2] = tmp_E[1]; E[3] = tmp_E[2]; E[4] = tmp_E[3]; d_free1(tmp_E,stp+1); d_free2(tmp_mat,stp,stp); #else bisec(alpha,beta,stp,E,4,eps_Bisec); #endif ebefor=E[Target]; childfopenMPI(sdt_2,"w", &fp); #ifdef lapack fprintf(stdoutMPI, " stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); fprintf(fp, "LanczosStep E[1] E[2] E[3] E[4] E_Max/Nsite\n"); fprintf(fp, "stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); #else fprintf(stdoutMPI, " stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); fprintf(fp, "LanczosStep E[1] E[2] E[3] E[4] \n"); fprintf(fp,"stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]); #endif fclose(fp); } if(stp>2 && stp%2==0){ childfopenMPI(sdt_2,"a", &fp); #ifdef lapack d_malloc2(tmp_mat,stp,stp); d_malloc1(tmp_E,stp+1); for(int_i=0;int_i<stp;int_i++){ for(int_j=0;int_j<stp;int_j++){ tmp_mat[int_i][int_j] = 0.0; } } tmp_mat[0][0] = alpha[1]; tmp_mat[0][1] = beta[1]; for(int_i=1;int_i<stp-1;int_i++){ tmp_mat[int_i][int_i] = alpha[int_i+1]; tmp_mat[int_i][int_i+1] = beta[int_i+1]; tmp_mat[int_i][int_i-1] = beta[int_i]; } tmp_mat[int_i][int_i] = alpha[int_i+1]; tmp_mat[int_i][int_i-1] = beta[int_i]; DSEVvalue(stp,tmp_mat,tmp_E); E[1] = tmp_E[0]; E[2] = tmp_E[1]; E[3] = tmp_E[2]; E[4] = tmp_E[3]; E[0] = tmp_E[stp-1]; d_free1(tmp_E,stp+1); d_free2(tmp_mat,stp,stp); fprintf(stdoutMPI, " stp = %d %.10lf %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4],E[0]/(double)X->Def.NsiteMPI); fprintf(fp,"stp=%d %.10lf %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4],E[0]/(double)X->Def.NsiteMPI); #else bisec(alpha,beta,stp,E,4,eps_Bisec); fprintf(stdoutMPI, " stp = %d %.10lf %.10lf %.10lf %.10lf \n",stp,E[1],E[2],E[3],E[4]); fprintf(fp,"stp=%d %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4]); #endif fclose(fp); if(fabs((E[Target]-ebefor)/E[Target])<eps_Lanczos || fabs(beta[stp])<pow(10.0, -14)){ vec12(alpha,beta,stp,E,X); X->Large.itr=stp; X->Phys.Target_energy=E[k_exct]; iconv=0; break; } ebefor=E[Target]; } } } sprintf(sdt,cFileNameTimeKeep,X->Def.CDataFileHead); if(iconv!=0){ sprintf(sdt, cLogLanczos_EigenValueNotConverged); return -1; } TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueFinish, "a"); fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueEnd); return 0; }
double eqdist() { return dsfmt_genrand_close_open(&g_dsfmt); }
static void gen_random_uuid(uint8_t *uuid, dsfmt_t *state) { double rand[2] = {dsfmt_genrand_close_open(state) * DBL_MAX, dsfmt_genrand_close_open(state) * DBL_MAX}; memcpy(uuid, rand, 16); }
/** * * * @param X * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ void Lanczos_EigenVector(struct BindStruct *X){ printf("%s", cLogLanczos_EigenVectorStart); int i,j,i_max,iv; int k_exct; double beta1,alpha1,dnorm, dnorm_inv; double complex temp1,temp2; // for GC long unsigned int u_long_i; dsfmt_t dsfmt; k_exct = X->Def.k_exct; iv=X->Large.iv; i_max=X->Check.idim_max; //Eigenvectors by Lanczos method //initialization: initialization should be identical to that of Lanczos_EigenValue.c #pragma omp parallel for default(none) private(i) shared(v0, v1, vg) firstprivate(i_max) for(i=1;i<=i_max;i++){ v0[i]=0.0+0.0*I; v1[i]=0.0+0.0*I; vg[i]=0.0+0.0*I; } if(initial_mode == 0){ v1[iv]=1.0; vg[iv]=vec[k_exct][1]; }else if(initial_mode==1){ iv = X->Def.initial_iv; u_long_i = 123432 + abs(iv); dsfmt_init_gen_rand(&dsfmt, u_long_i); for(i = 1; i <= i_max; i++){ v1[i]=2.0*(dsfmt_genrand_close_open(&dsfmt)-0.5)+2.0*(dsfmt_genrand_close_open(&dsfmt)-0.5)*I; } dnorm=0; #pragma omp parallel for default(none) private(i) shared(v1, i_max) reduction(+: dnorm) for(i=1;i<=i_max;i++){ dnorm += conj(v1[i])*v1[i]; } dnorm=sqrt(dnorm); dnorm_inv=1.0/dnorm; #pragma omp parallel for default(none) private(i) shared(v1,vg,vec,k_exct) firstprivate(i_max, dnorm_inv) for(i=1;i<=i_max;i++){ v1[i] = v1[i]*dnorm_inv; vg[i] = v1[i]*vec[k_exct][1]; } } mltply(X, v0, v1); alpha1=alpha[1]; beta1=beta[1]; #pragma omp parallel for default(none) private(j) shared(vec, v0, v1, vg) firstprivate(alpha1, beta1, i_max, k_exct) for(j=1;j<=i_max;j++){ vg[j]+=vec[k_exct][2]*(v0[j]-alpha1*v1[j])/beta1; } //iteration for(i=2;i<=X->Large.itr-1;i++){ #pragma omp parallel for default(none) private(j, temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1) for(j=1;j<=i_max;j++){ temp1=v1[j]; temp2=(v0[j]-alpha1*v1[j])/beta1; v0[j]=-beta1*temp1; v1[j]=temp2; } mltply(X, v0, v1); alpha1 = alpha[i]; beta1 = beta[i]; #pragma omp parallel for default(none) private(j) shared(vec, v0, v1, vg) firstprivate(alpha1, beta1, i_max, k_exct, i) for(j=1;j<=i_max;j++){ vg[j] += vec[k_exct][i+1]*(v0[j]-alpha1*v1[j])/beta1; } } #pragma omp parallel for default(none) private(j) shared(v0, vg) firstprivate(i_max) for(j=1;j<=i_max;j++){ v0[j] = vg[j]; } //normalization dnorm=0.0; #pragma omp parallel for default(none) reduction(+:dnorm) private(j) shared(v0) firstprivate(i_max) for(j=1;j<=i_max;j++){ dnorm += conj(v0[j])*v0[j]; } dnorm=sqrt(dnorm); dnorm_inv=dnorm; #pragma omp parallel for default(none) private(j) shared(v0) firstprivate(i_max, dnorm_inv) for(j=1;j<=i_max;j++){ v0[j] = v0[j]*dnorm_inv; } TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenVectorFinish, "a"); printf("%s", cLogLanczos_EigenVectorEnd); }