예제 #1
0
파일: conv.cpp 프로젝트: igkiou/mkl_ext
void drnorm(RngEngine* rng, double* buffer, BlasInt* n, BlasInt* isAligned,
			RngErrorType* info) {
	UNUSED(isAligned);
	if (*n > 0) {
#if defined(USE_RNG_BOX_MULLER)
		for (BlasInt iter = 0; iter < *n; iter += 2) {
			double x = 2.0 * M_PI * dsfmt_genrand_close_open(&(rng->m_dsfmt));
			double z = sqrt(-log(dsfmt_genrand_close_open(&(rng->m_dsfmt))));
			double c = cos(x);
			buffer[iter] = z * c;
			if (iter + 1 < *n) {
				double s = sin(x);
				buffer[iter + 1] = z * s;
			}
		}
#elif defined(USE_RNG_MARSAGLIA)
		for (BlasInt iter = 0; iter < *n; ++iter) {
			double x, y, r;
			do {
				x = 2.0 * dsfmt_genrand_close_open(&(rng->m_dsfmt)) - 1.0;
				y = 2.0 * dsfmt_genrand_close_open(&(rng->m_dsfmt)) - 1.0;
				r = x * x + y * y;
			} while (r >= 1 || r == 0);
			buffer[iter] = x * sqrt(- 2.0 * log(r) / r);
		}
#endif
	}
	*info = 0;
}
예제 #2
0
Color Phong::color(const SIMD::Point &p, const SIMD::Matrix &m, const SIMD::Vec &in, Scene &scene, size_t depth, dsfmt_t &dsfmt) const
{
    const SIMD::Vec u = m[0], v = m[1], n = m[2];
    Color ret;
    ret.add(c_emit);

    if(n.dot(in) > 0)
        return ret;

    RT_FLOAT xi = dsfmt_genrand_close_open(&dsfmt);
    if(diffuse > xi)
    {
        // Cosine weighted random hemisphere sampling
        // from smallpt by Kevin Beason
        RT_FLOAT r1=2*M_PI*dsfmt_genrand_close_open(&dsfmt), r2=dsfmt_genrand_close_open(&dsfmt), r2s=std::sqrt(r2);

        SIMD::Vec d = u*std::cos(r1)*r2s + v*std::sin(r1)*r2s + n*std::sqrt(1-r2);
        d.normalize();
        // End of CWRHS

        Raytracer::Color lambda = c_diffuse;
        lambda.mult(scene.radiance(SIMD::Ray(p, d), depth+1, dsfmt));
        ret.add(lambda);
    }
    else if(diffuse+specular > xi)
    {
        // Importance sampling of Phong reflection model by Jason Lawrence
        SIMD::Vec u, v, out = in-2*in.dot(n)*n, d;

        if(std::abs(out[0]) > 0.1)
            u = SIMD::Vec(0, 1, 0).cross(out);
        else
            u = SIMD::Vec(1, 0, 0).cross(out);
        u.normalize();
        v = out.cross(u);

        do
        {
            RT_FLOAT cos_theta = std::pow(dsfmt_genrand_close_open(&dsfmt), 1/(spec_pow+2)), phi = 2*M_PI*dsfmt_genrand_close_open(&dsfmt);
            RT_FLOAT sin_theta = std::sqrt(1-cos_theta*cos_theta);

            d = u*std::cos(phi)*sin_theta+v*std::sin(phi)*sin_theta+out*cos_theta;
        }
        while(d.dot(n) < 0);
        d.normalize();
        // End of ISoPRM

        Color lambda = scene.radiance(SIMD::Ray(p, d), depth+1, dsfmt);
        lambda.mult(c_specular);
        ret.add(lambda);
    }

    return ret;
}
예제 #3
0
파일: single.c 프로젝트: xyos/paralela
int main(int argc, char* argv[]) {
    int i, inside, seed;
    double x, y, pi;
    const long n_steps = 1000000000;
    dsfmt_t dsfmt;

    seed = 142857;
    inside = 0;
    dsfmt_init_gen_rand(&dsfmt, seed);
    for (i = 0; i < n_steps; i++) {
        x = dsfmt_genrand_close_open(&dsfmt);
        y = dsfmt_genrand_close_open(&dsfmt);
        if (x * x + y * y < 1.0) {
            inside++;
        }
    }
    pi = (double)inside / n_steps * 4;
    printf("%.10g\n", pi);
    return 0;
}
예제 #4
0
파일: conv.cpp 프로젝트: igkiou/mkl_ext
void drunif(RngEngine* rng, double* buffer, BlasInt* n, BlasInt* isAligned,
			RngErrorType* info) {

	if ((*isAligned == 0) || (*n == 1)) {
		for (BlasInt iter = 0; iter < *n; ++iter) {
			buffer[iter] = dsfmt_genrand_close_open(&(rng->m_dsfmt));
		}
	} else if (*n > 1) {
		dsfmt_fill_array_close_open(&(rng->m_dsfmt), buffer, *n);
	}
	*info = 0;
}
예제 #5
0
double tls_rand()
{
	/* Setup PRNG state for current thread. */
	UNUSED(pthread_once(&tls_prng_once, tls_prng_init));

	/* Create PRNG state if not exists. */
	dsfmt_t* s = pthread_getspecific(tls_prng_key);
	if (!s) {
		/* Initialize seed from system PRNG generator. */
		uint32_t seed = 0;
		FILE *fp = fopen("/dev/urandom", "r");
		if (fp == NULL) {
			fp = fopen("/dev/random", "r");
		}
		if (fp != NULL) {
			if (fread(&seed, sizeof(uint32_t), 1, fp) != 1) {
				fclose(fp);
				fp = NULL;
			}
		}
		if (fp == NULL) {
			fprintf(stderr, "error: PRNG: cannot seed from "
				"/dev/urandom, seeding from local time\n");
			struct timeval tv;
			if (gettimeofday(&tv, NULL) == 0) {
				seed = (uint32_t)(tv.tv_sec ^ tv.tv_usec);
			} else {
				/* Last resort. */
				seed = (uint32_t)time(NULL);
			}
		} else {
			fclose(fp);
		}

		/* Initialize PRNG state. */
#ifdef HAVE_POSIX_MEMALIGN
		if (posix_memalign((void **)&s, 16, sizeof(dsfmt_t)) != 0) {
			fprintf(stderr, "error: PRNG: not enough memory\n");
			return .0;
		}
#else
		if ((s = malloc(sizeof(dsfmt_t))) == NULL) {
			fprintf(stderr, "error: PRNG: not enough memory\n");
			return .0;
		}
#endif
		dsfmt_init_gen_rand(s, seed);
		UNUSED(pthread_setspecific(tls_prng_key, s));
	}

	return dsfmt_genrand_close_open(s);
}
예제 #6
0
파일: recv_boot.c 프로젝트: patperry/iproc
static size_t sample1(const double *probs, size_t n, dsfmt_t * dsfmt)
{
	size_t i;
	double u = dsfmt_genrand_close_open(dsfmt);
	double sum = 0;

	for (i = 0; i < n - 1; i++) {
		sum += probs[i];
		if (sum >= u)
			break;
	}

	return i;
}
예제 #7
0
/** 
 * 
 * 
 * @param X 
 * 
 * @author Takahiro Misawa (The University of Tokyo)
 * @author Kazuyoshi Yoshimi (The University of Tokyo)
 * @return 
 */
int Lanczos_EigenValue(struct BindStruct *X)
{

  fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueStart);
  FILE *fp;
  char sdt[D_FileNameMax],sdt_2[D_FileNameMax];
  int stp, iproc;
  long int i,iv,i_max;      
  unsigned long int i_max_tmp, sum_i_max;
  int k_exct,Target;
  int iconv=-1;
  double beta1,alpha1; //beta,alpha1 should be real
  double  complex temp1,temp2;
  double complex cbeta1;
  double E[5],ebefor;
  int mythread;

// for GC
  double dnorm;
  double complex cdnorm;
  long unsigned int u_long_i;
  dsfmt_t dsfmt;

#ifdef lapack
  double **tmp_mat;
  double *tmp_E;
  int    int_i,int_j,mfint[7];
#endif
      
  sprintf(sdt_2, cFileNameLanczosStep, X->Def.CDataFileHead);

  i_max=X->Check.idim_max;      
  k_exct = X->Def.k_exct;

  if(initial_mode == 0){

    sum_i_max = SumMPI_li(X->Check.idim_max);
    X->Large.iv = (sum_i_max / 2 + X->Def.initial_iv) % sum_i_max + 1;
    iv=X->Large.iv;
    fprintf(stdoutMPI, "  initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d \n\n",initial_mode,iv,i_max,k_exct);       
#pragma omp parallel for default(none) private(i) shared(v0, v1) firstprivate(i_max)
    for(i = 1; i <= i_max; i++){
      v0[i]=0.0;
      v1[i]=0.0;
    }

    sum_i_max = 0;
    for (iproc = 0; iproc < nproc; iproc++) {

      i_max_tmp = BcastMPI_li(iproc, i_max);
      if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) {

        if (myrank == iproc) {
          v1[iv - sum_i_max+1] = 1.0;
          if (X->Def.iInitialVecType == 0) {
            v1[iv - sum_i_max+1] += 1.0*I;
            v1[iv - sum_i_max+1] /= sqrt(2.0);
          }
        }/*if (myrank == iproc)*/
      }/*if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp)*/

      sum_i_max += i_max_tmp;

    }/*for (iproc = 0; iproc < nproc; iproc++)*/
  }/*if(initial_mode == 0)*/
  else if(initial_mode==1){
    iv = X->Def.initial_iv;
    fprintf(stdoutMPI, "  initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d \n\n",initial_mode,iv,i_max,k_exct);       
    #pragma omp parallel default(none) private(i, u_long_i, mythread, dsfmt) \
            shared(v0, v1, iv, X, nthreads, myrank) firstprivate(i_max)
    {

#pragma omp for
      for (i = 1; i <= i_max; i++) {
        v0[i] = 0.0;
      }
      /*
       Initialise MT
      */
#ifdef _OPENMP
      mythread = omp_get_thread_num();
#else
      mythread = 0;
#endif
      u_long_i = 123432 + labs(iv) + mythread + nthreads * myrank;
      dsfmt_init_gen_rand(&dsfmt, u_long_i);

      if (X->Def.iInitialVecType == 0) {
#pragma omp for
        for (i = 1; i <= i_max; i++)
          v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I;
      }
      else {
#pragma omp for
        for (i = 1; i <= i_max; i++)
          v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5);
      }

    }/*#pragma omp parallel*/

    cdnorm=0.0;
#pragma omp parallel for default(none) private(i) shared(v1, i_max) reduction(+: cdnorm) 
    for(i=1;i<=i_max;i++){
     cdnorm += conj(v1[i])*v1[i];
    }
    cdnorm = SumMPI_dc(cdnorm);
    dnorm=creal(cdnorm);
    dnorm=sqrt(dnorm);
    #pragma omp parallel for default(none) private(i) shared(v1) firstprivate(i_max, dnorm)
    for(i=1;i<=i_max;i++){
      v1[i] = v1[i]/dnorm;
    }
  }/*else if(initial_mode==1)*/
  
  //Eigenvalues by Lanczos method
  TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueStart, "a");
  mltply(X, v0, v1);
  stp=1;
  TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp);

    alpha1=creal(X->Large.prdct) ;// alpha = v^{\dag}*H*v

  alpha[1]=alpha1;
  cbeta1=0.0;
  
#pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1)
  for(i = 1; i <= i_max; i++){
    cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]);
  }
  cbeta1 = SumMPI_dc(cbeta1);
  beta1=creal(cbeta1);
  beta1=sqrt(beta1);
  beta[1]=beta1;
  ebefor=0;

/*
      Set Maximum number of loop to the dimention of the Wavefunction
    */
  i_max_tmp = SumMPI_li(i_max);
  if(i_max_tmp < X->Def.Lanczos_max){
    X->Def.Lanczos_max = i_max_tmp;
  }
  if(i_max_tmp < X->Def.LanczosTarget){
    X->Def.LanczosTarget = i_max_tmp;
  }
  if(i_max_tmp == 1){
    E[1]=alpha[1];
    vec12(alpha,beta,stp,E,X);		
    X->Large.itr=stp;
    X->Phys.Target_energy=E[k_exct];
    iconv=0;
    fprintf(stdoutMPI,"  LanczosStep  E[1] \n");
    fprintf(stdoutMPI,"  stp=%d %.10lf \n",stp,E[1]);
  }
  else{
#ifdef lapack
    fprintf(stdoutMPI, "  LanczosStep  E[1] E[2] E[3] E[4] E_Max/Nsite\n");
#else
    fprintf(stdoutMPI, "  LanczosStep  E[1] E[2] E[3] E[4] \n");
#endif
  for(stp = 2; stp <= X->Def.Lanczos_max; stp++){
#pragma omp parallel for default(none) private(i,temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1)
    for(i=1;i<=i_max;i++){
      temp1 = v1[i];
      temp2 = (v0[i]-alpha1*v1[i])/beta1;
      v0[i] = -beta1*temp1;
      v1[i] =  temp2;
    }

      mltply(X, v0, v1);
      TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp);
    alpha1=creal(X->Large.prdct);
    alpha[stp]=alpha1;
    cbeta1=0.0;

#pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1)
    for(i=1;i<=i_max;i++){
      cbeta1+=conj(v0[i]-alpha1*v1[i])*(v0[i]-alpha1*v1[i]);
    }
    cbeta1 = SumMPI_dc(cbeta1);
    beta1=creal(cbeta1);
    beta1=sqrt(beta1);
    beta[stp]=beta1;

    Target  = X->Def.LanczosTarget;
        
    if(stp==2){      
     #ifdef lapack
      d_malloc2(tmp_mat,stp,stp);
      d_malloc1(tmp_E,stp+1);

       for(int_i=0;int_i<stp;int_i++){
         for(int_j=0;int_j<stp;int_j++){
           tmp_mat[int_i][int_j] = 0.0;
         }
       } 
       tmp_mat[0][0]   = alpha[1]; 
       tmp_mat[0][1]   = beta[1]; 
       tmp_mat[1][0]   = beta[1]; 
       tmp_mat[1][1]   = alpha[2]; 
       DSEVvalue(stp,tmp_mat,tmp_E);
       E[1] = tmp_E[0];
       E[2] = tmp_E[1];
       E[3] = tmp_E[2];
       E[4] = tmp_E[3];
       d_free1(tmp_E,stp+1);
       d_free2(tmp_mat,stp,stp);
     #else
       bisec(alpha,beta,stp,E,4,eps_Bisec);
     #endif
       ebefor=E[Target];
       
       childfopenMPI(sdt_2,"w", &fp);
#ifdef lapack
       fprintf(stdoutMPI, "  stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]);

       fprintf(fp, "LanczosStep  E[1] E[2] E[3] E[4] E_Max/Nsite\n");
       fprintf(fp, "stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]);
#else
       fprintf(stdoutMPI, "  stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]);
       fprintf(fp, "LanczosStep  E[1] E[2] E[3] E[4] \n");
       fprintf(fp,"stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx \n",stp,E[1],E[2]);
#endif
       fclose(fp);
    }
            
    if(stp>2 && stp%2==0){
      
      childfopenMPI(sdt_2,"a", &fp);
      
#ifdef lapack
      d_malloc2(tmp_mat,stp,stp);
      d_malloc1(tmp_E,stp+1);

       for(int_i=0;int_i<stp;int_i++){
         for(int_j=0;int_j<stp;int_j++){
           tmp_mat[int_i][int_j] = 0.0;
         }
       } 
       tmp_mat[0][0]   = alpha[1]; 
       tmp_mat[0][1]   = beta[1]; 
       for(int_i=1;int_i<stp-1;int_i++){
         tmp_mat[int_i][int_i]     = alpha[int_i+1]; 
         tmp_mat[int_i][int_i+1]   = beta[int_i+1]; 
         tmp_mat[int_i][int_i-1]   = beta[int_i]; 
       }
       tmp_mat[int_i][int_i]       = alpha[int_i+1]; 
       tmp_mat[int_i][int_i-1]     = beta[int_i]; 
       DSEVvalue(stp,tmp_mat,tmp_E);
       E[1] = tmp_E[0];
       E[2] = tmp_E[1];
       E[3] = tmp_E[2];
       E[4] = tmp_E[3];
       E[0] = tmp_E[stp-1];
       d_free1(tmp_E,stp+1);
       d_free2(tmp_mat,stp,stp);       
       fprintf(stdoutMPI, "  stp = %d %.10lf %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4],E[0]/(double)X->Def.NsiteMPI);
       fprintf(fp,"stp=%d %.10lf %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4],E[0]/(double)X->Def.NsiteMPI);
#else
       bisec(alpha,beta,stp,E,4,eps_Bisec);
       fprintf(stdoutMPI, "  stp = %d %.10lf %.10lf %.10lf %.10lf \n",stp,E[1],E[2],E[3],E[4]);
       fprintf(fp,"stp=%d %.10lf %.10lf %.10lf %.10lf\n",stp,E[1],E[2],E[3],E[4]);
#endif 
       fclose(fp);

      if(fabs((E[Target]-ebefor)/E[Target])<eps_Lanczos || fabs(beta[stp])<pow(10.0, -14)){
        vec12(alpha,beta,stp,E,X);		
        X->Large.itr=stp;       
        X->Phys.Target_energy=E[k_exct];
	iconv=0;
	break;
      }

      ebefor=E[Target];            
    }
  }        
  }

  sprintf(sdt,cFileNameTimeKeep,X->Def.CDataFileHead);
  if(iconv!=0){
    sprintf(sdt,  cLogLanczos_EigenValueNotConverged);
    return -1;
  }

  TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueFinish, "a");
  fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueEnd);

  return 0;
}
예제 #8
0
파일: random.c 프로젝트: swishart/potfit
double eqdist() {
    return dsfmt_genrand_close_open(&g_dsfmt);
}
예제 #9
0
static void gen_random_uuid(uint8_t *uuid, dsfmt_t *state)
{
    double rand[2] = {dsfmt_genrand_close_open(state) * DBL_MAX,
                      dsfmt_genrand_close_open(state) * DBL_MAX};
    memcpy(uuid, rand, 16);
}
예제 #10
0
/** 
 * 
 * 
 * @param X 
 * @author Takahiro Misawa (The University of Tokyo)
 * @author Kazuyoshi Yoshimi (The University of Tokyo) 
 */
void Lanczos_EigenVector(struct BindStruct *X){

  printf("%s", cLogLanczos_EigenVectorStart);
  
  int i,j,i_max,iv;  	 
  int k_exct;
  double beta1,alpha1,dnorm, dnorm_inv;
  double complex temp1,temp2;

// for GC
  long unsigned int u_long_i;
  dsfmt_t dsfmt;

  k_exct = X->Def.k_exct;
	
  iv=X->Large.iv;
  i_max=X->Check.idim_max;
 
  //Eigenvectors by Lanczos method
  //initialization: initialization should be identical to that of Lanczos_EigenValue.c
#pragma omp parallel for default(none) private(i) shared(v0, v1, vg) firstprivate(i_max)
  for(i=1;i<=i_max;i++){
    v0[i]=0.0+0.0*I;
    v1[i]=0.0+0.0*I;
    vg[i]=0.0+0.0*I;
  }
    
  if(initial_mode == 0){
    v1[iv]=1.0;
    vg[iv]=vec[k_exct][1];
  }else if(initial_mode==1){      
    iv = X->Def.initial_iv;
    u_long_i = 123432 + abs(iv);
    dsfmt_init_gen_rand(&dsfmt, u_long_i);    
    for(i = 1; i <= i_max; i++){
      v1[i]=2.0*(dsfmt_genrand_close_open(&dsfmt)-0.5)+2.0*(dsfmt_genrand_close_open(&dsfmt)-0.5)*I;
    }
    dnorm=0;
    #pragma omp parallel for default(none) private(i) shared(v1, i_max) reduction(+: dnorm) 
    for(i=1;i<=i_max;i++){
      dnorm += conj(v1[i])*v1[i];
    }    
    dnorm=sqrt(dnorm);
    dnorm_inv=1.0/dnorm;
#pragma omp parallel for default(none) private(i) shared(v1,vg,vec,k_exct) firstprivate(i_max, dnorm_inv)
    for(i=1;i<=i_max;i++){
      v1[i]        = v1[i]*dnorm_inv;
      vg[i]        = v1[i]*vec[k_exct][1];
    }
  }
  
  mltply(X, v0, v1);
  
  alpha1=alpha[1];
  beta1=beta[1];

#pragma omp parallel for default(none) private(j) shared(vec, v0, v1, vg) firstprivate(alpha1, beta1, i_max, k_exct)
  for(j=1;j<=i_max;j++){
    vg[j]+=vec[k_exct][2]*(v0[j]-alpha1*v1[j])/beta1;
  }
    
  //iteration
  for(i=2;i<=X->Large.itr-1;i++){
#pragma omp parallel for default(none) private(j, temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1)
    for(j=1;j<=i_max;j++){
      temp1=v1[j];
      temp2=(v0[j]-alpha1*v1[j])/beta1;
      v0[j]=-beta1*temp1;
      v1[j]=temp2;        
    }
    mltply(X, v0, v1);   
	
    alpha1 = alpha[i];
    beta1  = beta[i];

#pragma omp parallel for default(none) private(j) shared(vec, v0, v1, vg) firstprivate(alpha1, beta1, i_max, k_exct, i)
    for(j=1;j<=i_max;j++){
      vg[j] += vec[k_exct][i+1]*(v0[j]-alpha1*v1[j])/beta1;
    }	
  }

#pragma omp parallel for default(none) private(j) shared(v0, vg) firstprivate(i_max)
    for(j=1;j<=i_max;j++){
      v0[j] = vg[j];
    } 
      
  //normalization
  dnorm=0.0;
#pragma omp parallel for default(none) reduction(+:dnorm) private(j) shared(v0) firstprivate(i_max)
  for(j=1;j<=i_max;j++){
    dnorm += conj(v0[j])*v0[j];
  }
  dnorm=sqrt(dnorm);
  dnorm_inv=dnorm;
#pragma omp parallel for default(none) private(j) shared(v0) firstprivate(i_max, dnorm_inv)
  for(j=1;j<=i_max;j++){
    v0[j] = v0[j]*dnorm_inv;
  }
  
  TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenVectorFinish, "a");
  printf("%s", cLogLanczos_EigenVectorEnd);
}