//------------------------------------------------------------------------------
double MonteCarloImportanceSampled::integrate(const vec &p, const vec &q, const
                                              vec &r, const vec &s) {
    double I; double x, theta, cosTheta, sinTheta;

    I = 0;
    for (int i = 0; i < mcSamples; i++) {
        x = sqrt(-log(1.0 - ran3(&idum)));
        theta = 2*PI*ran3(&idum);

        cosTheta = cos(theta);
        sinTheta = sin(theta);

        I +=      hermitePolynomial(p[2], x*cosTheta)
                * hermitePolynomial(q[2], x*sinTheta)
                * hermitePolynomial(r[2], x*cosTheta)
                * hermitePolynomial(s[2], x*sinTheta)
                / (sqrt( x*x*(1 - sin(2*theta))/w  + aa));
    }

    // The PI-factor has been reomved due to the change of variables.
    I *= 1 / (sqrt(pow(2, p[2] + q[2] + r[2] + s[2])
            * factorial(p[2])
            * factorial(q[2])
            * factorial(r[2])
            * factorial(s[2])));

    I /= mcSamples;

#if DEBUG
    cout << "p = " << p[2] << " q = " << q[2] << " r = " << r[2] << " s = " << s[2] << endl;
    cout << "MonteCarloIntegrator \t= " << I << "\t L = " << L << "\t MC samples = " << mcSamples << endl;
#endif
    return I;
}
Ejemplo n.º 2
0
//------------------------------------------------------------------------------
// Modified from NumRep 2 p289-290
double gasdev(long *idum)
{
    double ran3(long *idum);
    //float ran3(long *idum);
    static int iset=0;
    //static float gset;
    static double gset;
    //float fac,rsq,v1,v2;
    double fac,rsq,v1,v2;
    
    
    if (*idum < 0) iset=0;
    if  (iset == 0) {
        do {
            v1=2.0*ran3(idum)-1.0;
            v2=2.0*ran3(idum)-1.0;
            rsq=v1*v1+v2*v2;
        } while (rsq >= 1.0 || rsq == 0.0);
        fac=sqrt(-2.0*log(rsq)/rsq);
        gset=v1*fac;
        iset=1;
        return v2*fac;
    } else {
        iset=0;
        return gset;
    }
}
Ejemplo n.º 3
0
static double gasdev (int *idum)
{
    static int iset = 0;
    static double gset;
    double fac, r, v1, v2;

    if(iset==0)
    {
        do /* no deviate */
        {
            /* get two uniform numbers in the square extending from -1 to +1
               in each direction */
            v1=2.0*ran3(idum)-1.0; 
            v2=2.0*ran3(idum)-1.0;
            r=v1*v1+v2*v2; /* see if they are in the unit circle */
        }
        while (r >= 1.0); /* if not, try again */
        /* make the Box-Muller transformation to get two normal deviates,
           return one, save the other for next call */
        fac=sqrt(-2.0*log(r)/r); 
        gset=v1*fac;
        iset=1; /* set flag */
        return(v2*fac);
    }

    else
    {
        iset=0; /* extra deviat from last time, unset flag, return it */
        return (gset);
    }

}
Ejemplo n.º 4
0
double gasdev(long*idum)
/* Returns a normally distributed deviate with zero mean and unit */
/* variance, using ran3(idum) as the source of uniform deviates.  */
{
  static int iset=0;
  static double gset;
  double fac,rsq,v1,v2;

  if(iset==0) {
    do {
      v1=2*ran3(idum)-1; v2=2*ran3(idum)-1; rsq=v1*v1+v2*v2;
    } while(rsq>=1||rsq==0);
    fac=sqrt(-2*log(rsq)/rsq); 
    gset=v1*fac; iset=1;
    return v2*fac;
  } else {
    iset=0; return gset;
  }
}
Ejemplo n.º 5
0
Archivo: qvmc.cpp Proyecto: sarahr/QVMC
/**
 * Computation of the trial position according to the Metropolis algorithm
 * @param p - particle that has been moved compared to previous configuration
 * @param alpha - first variational parameter
 * @param beta - second variational parameter
 */
void Metropolis::trial_pos(int p, double alpha, double beta) {

    for (int k = 0; k < dim; k++) {
        R_tr(p, k) = R_cur(p, k) + delta * (2 * ran3(&idum) - 1);
    }

    Trial->Pos_tr->update_p(p, R_tr);

    return;
}
Ejemplo n.º 6
0
//To be deleted... cvh 
Vector3d move(Vector3d v, double x, double y, double z){
  const double scale =.5;
  Vector3d temp;
  while(true){
    temp = ran3()*scale;
    if((fabs((v+temp)[0]) <= x) && (fabs((v+temp)[1]) <= y) && (fabs((v+temp)[2]) <= z)){
	break;
   }
  }
 return temp + v;
}
Ejemplo n.º 7
0
//To be deleted... cvh
Vector3d move(Vector3d v, double R){
  const double scale = .05;
  Vector3d temp;
  while(true){
    temp = ran3()*scale;
    if(distance(v+temp,Vector3d(0,0,0))<=R){
      break;
    }
  }
  return temp + v;
}
Ejemplo n.º 8
0
extern void FindSolution(float *dX, float *dY, float *Orient,
			 float *X1, float *Y1, float *M1, int NL1,
			 float *X2, float *Y2, float *M2, int NL2,
			 int *NNeigh, int **NeighList,
			 SuperAlignParmRec *PSAPP)
/* Tune up the best fit *dX, *dY, and *Orient that matches
   the (X1,Y1,M1) array with the (X2,Y2,M2) array.  Since the solution
   should already be very close, no need to compute the number of neighbors
   or neighbor lists for each object -- contained in NNeigh and NNeighList */
{
  float Score,dX1,dY1,Orient1,D,BestScore,NumC,BestNumC;
  int I;
  long Global2;

  Score = CalcSigma(*dX,*dY,*Orient,X1,Y1,M1,NL1,X2,Y2,M2,NL2,
		    NNeigh,NeighList,PSAPP,0,NULL,NULL,&NumC);
  BestScore = Score;
  BestNumC = NumC;
  Global2 = -1;
  /* Use a simulated annealing type procedure to determine the
     solution with the highest score */
  for (I=0;I<1000;I++) {
    D = PSAPP->MaxDist*exp(-(float)I/200)/4;
    Orient1 = *Orient + (0.5-ran3(&Global2))*D/100;
    dX1 = *dX + (0.5-ran3(&Global2))*D;
    dY1 = *dY + (0.5-ran3(&Global2))*D;
    Score = CalcSigma(dX1,dY1,Orient1,X1,Y1,M1,NL1,X2,Y2,M2,NL2,
		      NNeigh,NeighList,PSAPP,0,NULL,NULL,&NumC);
    if (Score > BestScore) {
      BestScore = Score;
      *dX = dX1;
      *dY = dY1;
      *Orient = Orient1;
    }
    else {
      Score = BestScore;
      NumC = BestNumC;
    }
  }
}
float Ctest_stream_power_model::gasdev(int *idum)
{
	static int iset=0;
	static float gset;
	float fac,r,v1,v2;
	//float ran3();

	if  (iset == 0) {
		do {
			v1=2.0*ran3(idum)-1.0;
			v2=2.0*ran3(idum)-1.0;
			r=v1*v1+v2*v2;
		} while (r >= 1.0);
		fac=sqrt(-2.0*log(r)/r);
		gset=v1*fac;
		iset=1;
		return v2*fac;
	} else {
		iset=0;
		return gset;
	}
}
Ejemplo n.º 10
0
/****************************************************************
 gamma deviate for integer shape argument.  Code modified from pp
 292-293 of Numerical Recipes in C, 2nd edition.
 ****************************************************************/
double igamma_dev(int ia)
{
    int j;
    double am,e,s,v1,v2,x,y;
    long lidum=1L;
    
    if (ia < 1)
    {
        printf("Error: arg of igamma_dev was <1\n");

       // exit(1);
    }
    if (ia < 6)
    {
        x=1.0;
        for (j=0; j<ia; j++)
            x *= ran3(&lidum);
        x = -log(x);
    }else
    {
        do
        {
            do
            {
                do
                {                         /* next 4 lines are equivalent */
                    v1=2.0*ran3(&lidum)-1.0;       /* to y = tan(Pi * uni()).     */
                    v2=2.0*ran3(&lidum)-1.0;
                }while (v1*v1+v2*v2 > 1.0);
                y=v2/v1;
                am=ia-1;
                s=sqrt(2.0*am+1.0);
                x=s*y+am;
            }while (x <= 0.0);
            e=(1.0+y*y)*exp(am*log(x/am)-s*y);
        }while (ran3(&lidum) > e);
    }
    return(x);
}
Ejemplo n.º 11
0
/****************************************************************
 Random deviates from standard gamma distribution with density
 a-1
 x    exp[ -x ]
 f(x) = ----------------
 Gamma[a]
 
 where a is the shape parameter.  The algorithm for integer a comes
 from numerical recipes, 2nd edition, pp 292-293.  The algorithm for
 a<1 uses code from p 213 of Statistical Computing, by Kennedy and
 Gentle, 1980 edition.  This algorithm was originally published in:
 
 Ahrens, J.H. and U. Dieter (1974), "Computer methods for sampling from
 Gamma, Beta, Poisson, and Binomial Distributions".  COMPUTING
 12:223-246.
 
 The mean and variance of these values are both supposed to equal a.
 My tests indicate that they do.
 
 This algorithm has problems when a is small.  In single precision, the
 problem  arises when a<0.1, roughly.  That is why I have declared
 everything as double below.  Trouble is, I still don't know how small
 a can be without causing trouble.  Mean and variance are ok at least
 down to a=0.01.  f(x) doesn't seem to have a series expansion around
 x=0.
 ****************************************************************/
double
gamma_dev(double a) {
    
    int ia;
    double u, b, p, x, y=0.0, recip_a;
    long lidum=1L;
    
    if(a <= 0) {
        printf("\ngamma_dev: parameter must be positive\n");
      //  exit(1);
    }
    
    ia = (int) (floor(a));  /* integer part */
    a -= ia;        /* fractional part */
    if(ia > 0) {
        y = igamma_dev(ia);  /* gamma deviate w/ integer argument ia */
        if(a==0.0) return(y);
    }
    
    /* get gamma deviate with fractional argument "a" */
    b = (M_E + a)/M_E;
    recip_a = 1.0/a;
    for(;;) {
        u = ran3(&lidum);
        p = b*u;
        if(p > 1) {
            x = -log( (b-p)/a );
            if( ran3(&lidum) > pow(x, a-1)) continue;
            break;
        }
        else {
            x = pow(p, recip_a);
            if( ran3(&lidum) > exp(-x)) continue;
            break;
        }
    }
    return(x+y);
}
void initialize(GRID_INFO_TYPE* grid,int spin[][LENGTH], int nbr1[], int nbr2[])
{
 int i, ix, iy,m,n,mt,nt,tag=50; float sd;
  char message[100];
     for (iy = 0 ; iy <LENGTH; iy++) /* start with random spins */
     for (ix = 0 ; ix <LENGTH; ix++)
     { sd=ran3(&iseed);
       if( sd>0.5 )spin[ix][iy] = 1; 
       if( sd <= 0.5 )spin[ix][iy] = -1; }
     for (i = 1 ; i < LENGTH-1 ; i++)  /* set up neighbor list */ 
     {
       nbr1[i] = i - 1;
       nbr2[i] = i + 1;
      }
}
Ejemplo n.º 13
0
int main(void)
{
	int i,j,*indx;
	long idum=(-13);
	float d,*x,**a,**aa;
	static float ainit[NP][NP]=
		{1.0,2.0,3.0,4.0,5.0,
		2.0,3.0,4.0,5.0,1.0,
		1.0,1.0,1.0,1.0,1.0,
		4.0,5.0,1.0,2.0,3.0,
		5.0,1.0,2.0,3.0,4.0};
	static float b[N+1]={0.0,1.0,1.0,1.0,1.0,1.0};

	indx=ivector(1,N);
	x=vector(1,N);
	a=convert_matrix(&ainit[0][0],1,N,1,N);
	aa=matrix(1,N,1,N);
	for (i=1;i<=N;i++) {
		x[i]=b[i];
		for (j=1;j<=N;j++)
			aa[i][j]=a[i][j];
	}
	ludcmp(aa,N,indx,&d);
	lubksb(aa,N,indx,x);
	printf("\nSolution vector for the equations:\n");
	for (i=1;i<=N;i++) printf("%12.6f",x[i]);
	printf("\n");
	/* now phoney up x and let mprove fix it */
	for (i=1;i<=N;i++) x[i] *= (1.0+0.2*ran3(&idum));
	printf("\nSolution vector with noise added:\n");
	for (i=1;i<=N;i++) printf("%12.6f",x[i]);
	printf("\n");
	mprove(a,aa,N,indx,b,x);
	printf("\nSolution vector recovered by mprove:\n");
	for (i=1;i<=N;i++) printf("%12.6f",x[i]);
	printf("\n");
	free_matrix(aa,1,N,1,N);
	free_convert_matrix(a,1,N,1,N);
	free_vector(x,1,N);
	free_ivector(indx,1,N);
	return 0;
}
Ejemplo n.º 14
0
Vector3d move(Vector3d v){
  double scale = .5;
  return v+scale*ran3();
}
Ejemplo n.º 15
0
/*==============================================================================
 * MAIN: where everything starts ....
 *==============================================================================*/
int main(int argc, char **argv)
{
    FILE    *fpout;
    char     AMIGA_input[MAXSTRING], outfile[MAXSTRING];
    int      no_timestep;
    double   timecounter;
    double   timestep;
    uint64_t Nobserver;
    uint64_t iobserver, jobserver;
    partptr  cur_part;
    double   Rsphere,Rsphere_min,Rsphere_max,Rfrac;
    int      Nspheres;
    uint64_t isphere;
    int      iseed=ISEED;
    HALO     halo;
    double   x_fac, r_fac, v_fac;
    uint64_t ineighbour;
    double   dx,dy,dz,d;
    double   vx,vy,vz,vlos;
    double   Hubble;
    double  *Hlos, *Hsphere, *sigmaHsphere;
    uint64_t norm;
    int      omp_id;
    time_t   elapsed = (time_t)0;

#ifdef OBSERVER_FROM_FILE
    FILE    *fpin;
    char     observer_file[MAXSTRING], line[MAXSTRING];
    double  *xobs, *yobs, *zobs, *vxobs, *vyobs, *vzobs;
    int      idummy;
#endif

    //========================================================
    //               deal with command line
    //========================================================
#ifdef OBSERVER_FROM_FILE
    if(argc<6) {
        fprintf(stderr,"usage:    %s sigmaH.input observer_file Nspheres Rsphere_min Rsphere_max\n", argv[0]);
        fprintf(stderr,"       or %s --parameterfile\n", argv[0]);
        exit(1);
    }
#else
    if(argc<6) {
        fprintf(stderr,"usage:    %s sigmaH.input Nobserver Nspheres Rsphere_min Rsphere_max\n", argv[0]);
        fprintf(stderr,"       or %s --parameterfile\n", argv[0]);
        exit(1);
    }
#endif

    // maybe the user only wants the parameterfile?
    if(strcmp(argv[1],"--parameterfile") == 0)  {
        global_io.params                 = (io_parameter_t) calloc(1,sizeof(io_parameter_struct_t));
        global_io.params->outfile_prefix = (char *) calloc(MAXSTRING,sizeof(char));
        global.a                         = 1;
        strcpy(global_io.params->outfile_prefix,"AHF");
        write_parameterfile();
        exit(0);
    }
    else {
        strcpy(AMIGA_input, argv[1]);
        Nspheres     = (uint64_t) atoi(argv[3]);
        Rsphere_min  = (double) atof(argv[4]);
        Rsphere_max  = (double) atof(argv[5]);

#ifdef OBSERVER_FROM_FILE
        strcpy(observer_file, argv[2]);
#else
        Nobserver    = (uint64_t) atoi(argv[2]);
#endif
    }

#ifdef OBSERVER_FROM_FILE
    //========================================================
    //               get observers from file
    //========================================================
    fpin = fopen(observer_file,"r");
    if(fpin == NULL) {
        fprintf(stderr,"FATAL: cannot open %s for reading\n",observer_file);
        exit(0);
    }

    Nobserver = 0;
    xobs      = NULL;
    yobs      = NULL;
    zobs      = NULL;
    vxobs     = NULL;
    vyobs     = NULL;
    vzobs     = NULL;
    // read first line
    fgets(line,MAXSTRING,fpin);

    while(!feof(fpin)) {
        if(strncmp(line,"#",1) != 0) {
            Nobserver++;
            xobs  = (double *) realloc(xobs, Nobserver*sizeof(double));
            yobs  = (double *) realloc(yobs, Nobserver*sizeof(double));
            zobs  = (double *) realloc(zobs, Nobserver*sizeof(double));
            vxobs = (double *) realloc(vxobs,Nobserver*sizeof(double));
            vyobs = (double *) realloc(vyobs,Nobserver*sizeof(double));
            vzobs = (double *) realloc(vzobs,Nobserver*sizeof(double));

            // scan line for positions and velocities (if there are no velocities, do not try to use them!)

            // generic input format
//      sscanf(line,"%lf %lf %lf %lf %lf %lf",
//             (xobs+(Nobserver-1)),
//             (yobs+(Nobserver-1)),
//             (zobs+(Nobserver-1)),
//             (vxobs+(Nobserver-1)),
//             (vyobs+(Nobserver-1)),
//             (vzobs+(Nobserver-1))  );

            // void files from Stefan
            sscanf(line,"%d %lf %lf %lf",
                   &idummy,
                   (xobs+(Nobserver-1)),
                   (yobs+(Nobserver-1)),
                   (zobs+(Nobserver-1)));
            vxobs[Nobserver-1] = 0.0;
            vyobs[Nobserver-1] = 0.0;
            vzobs[Nobserver-1] = 0.0;
        }

        // read next line
        fgets(line,MAXSTRING,fpin);
    }

    fclose(fpin);
#endif

    //========================================================
    //                     startrun
    //========================================================
    elapsed  = (time_t)0;
    elapsed -= time(NULL);

    timing.io       -= time(NULL);
    timing.startrun -= time(NULL);

    startrun((argc > 1) ? argv[1] : NULL, &timecounter, &timestep, &no_timestep);

    timing.startrun += time(NULL);
    timing.io       += time(NULL);

    fprintf(stderr,"\nsigmaH parameters:\n");
    fprintf(stderr,"------------------\n");
    fprintf(stderr,"Nobserver     = %"PRIu64"\n",Nobserver);
    fprintf(stderr,"Nspheres      = %d\n",Nspheres);
    fprintf(stderr,"Rsphere_min   = %lf\n",Rsphere_min);
    fprintf(stderr,"Rsphere_max   = %lf\n\n",Rsphere_max);


    elapsed += time(NULL);
    fprintf(stderr,"o startrun done in %ld sec.\n",elapsed);

    // some relevant stuff
    global.fst_part = global_info.fst_part;

    //========================================================
    //                 generate SFC keys
    //========================================================
    elapsed  = (time_t)0;
    elapsed -= time(NULL);
    fprintf(stderr,"o generating sfc keys ... ");

    timing.sfckey -= time(NULL);
    for (uint64_t i=0; i<global_info.no_part; i++) {
        partptr part=global_info.fst_part+i;
        part->sfckey = sfc_curve_calcKey(global_info.ctype,
                                         (double)(part->pos[0]),
                                         (double)(part->pos[1]),
                                         (double)(part->pos[2]),
                                         BITS_PER_DIMENSION);
    }
    // sorting all particles to have fast access later on
    qsort(global_info.fst_part,
          global_info.no_part,
          sizeof(part),
          &cmp_sfckey_part);
    timing.sfckey += time(NULL);

    elapsed += time(NULL);
    fprintf(stderr,"   done in %ld sec.\n",elapsed);








    //========================================================
    //                    analysis
    //========================================================
    elapsed  = (time_t)0;
    elapsed -= time(NULL);
    fprintf(stderr,"o placing observers:\n");

    ahf.time -= time(NULL);

    // derive mean Hlos for every observer and every sphere radius
    //=============================================================

    // dump observer positions to file
    sprintf(outfile,"%s-Nobserver%07"PRIu64,global_io.params->outfile_prefix,Nobserver);
    fpout = fopen(outfile,"w");
    if(fpout == NULL) {
        fprintf(stderr,"FATAL: cannot open %s for writing\n",outfile);
        exit(0);
    }


    // prepare some conversion factors
    x_fac   = simu.boxsize;
    r_fac   = simu.boxsize * global.a;
    v_fac   = simu.boxsize / simu.t_unit / global.a;   //NOTE: AHF stores a^2 \dot{x} as velocity!!!!
    Hubble  = calc_Hubble(global.a);                   // [km/sec/Mpc]

    fprintf(stderr,"    Hubble  = %lf\n",Hubble);
    fprintf(stderr,"    boxsize = %lf\n",simu.boxsize);
    fprintf(stderr,"    a       = %lf\n",global.a);
    fprintf(stderr,"    r_fac   = %lf\n",r_fac);
    fprintf(stderr,"    v_fac   = %lf\n",v_fac);


    // array to hold all Hlos values
    Hlos = (double *) calloc(Nobserver*Nspheres, sizeof(double));

#ifdef WITH_OPENMP
    // obtain thread number to serve as seed
    omp_id = omp_get_thread_num();
    iseed *= omp_id;
#endif

    // loop over all observers
#ifdef WITH_OPENMP
    #pragma omp parallel for schedule(dynamic) private(jobserver,iobserver,cur_part,halo,isphere,Rsphere,ineighbour,dx,dy,dz,d,vx,vy,vz,vlos,iseed,norm) shared(Hlos,Nobserver,global_info,v_fac,r_fac)
#endif
    for(iobserver=0; iobserver<Nobserver; iobserver++) {



        /*===========================================================
         *                  OBSERVER_FROM_FILE
         *===========================================================*/
#ifdef OBSERVER_FROM_FILE
        halo.pos.x = xobs[iobserver]   /x_fac;
        halo.pos.y = yobs[iobserver]   /x_fac;
        halo.pos.z = zobs[iobserver]   /x_fac;
        halo.vel.x = vxobs[iobserver]  /v_fac;
        halo.vel.y = vyobs[iobserver]  /v_fac;
        halo.vel.z = vzobs[iobserver]  /v_fac;
#endif // OBSERVER_FROM_FILE


        /*===========================================================
         *              OBSERVER_CLOSEST_RANDOM_HALO
         *===========================================================*/
#ifdef OBSERVER_CLOSEST_RANDOM_HALO

        // pick random position throughout box
        halo.pos.x     = (float)(ran3(&iseed));
        halo.pos.y     = (float)(ran3(&iseed));
        halo.pos.z     = (float)(ran3(&iseed));
        halo.npart     = 0;
        Rfrac          = 0.25;

        while(halo.npart == 0) {
#ifdef DEBUG
            fprintf(stderr,"   observer at %f %f %f\n",halo.pos.x*simu.boxsize,halo.pos.y*simu.boxsize,halo.pos.z*simu.boxsize);
#endif

            // search for neighbours about that position
            halo.gatherRad = Rfrac*Rsphere_min/simu.boxsize;
            halo.npart     = 0;
            halo.ipart     = NULL;
            ahf_halos_sfc_gatherParts(&halo);
#ifdef DEBUG
            fprintf(stderr,"    -> found %ld nearest neighbours to observer\n",halo.npart);
#endif

            // pick closest particle
            if(halo.npart > 0) {
                sort_halo_particles(&halo);
                cur_part = global_info.fst_part + halo.ipart[0];
#ifdef DEBUG
                fprintf(stderr,"      -> using ipart=%ld\n",halo.ipart[0]);
#endif
            }
            else {
                Rfrac *= 2.;
#ifdef DEBUG
                fprintf(stderr,"      -> increasing neighbour search radius to %lf\n",Rfrac*Rsphere_min);
#endif
            }
        }

        // remove from halo structure again
        if(halo.npart > 0) free(halo.ipart);
        halo.ipart = NULL;
        halo.npart = 0;

        // prepare halo structure as we are re-using ahf_halos_sfc_gatherParts()
        halo.pos.x     = cur_part->pos[X];
        halo.pos.y     = cur_part->pos[Y];
        halo.pos.z     = cur_part->pos[Z];
        halo.vel.x     = cur_part->mom[X];
        halo.vel.y     = cur_part->mom[Y];
        halo.vel.z     = cur_part->mom[Z];
#endif // OBSERVER_CLOSEST_RANDOM_HALO



        /*===========================================================
         *                  OBSERVER_RANDOM_HALO
         *===========================================================*/
#ifdef OBSERVER_RANDOM_HALO
        // pick a random particle as the observer
        jobserver = (uint64_t) (ran3(&iseed) * global_info.no_part);
        while(jobserver >= global_info.no_part)
            jobserver = (uint64_t) (ran3(&iseed) * global_info.no_part);
        cur_part  = global_info.fst_part + jobserver;

        // prepare halo structure as we are re-using ahf_halos_sfc_gatherParts()
        halo.pos.x     = cur_part->pos[X];
        halo.pos.y     = cur_part->pos[Y];
        halo.pos.z     = cur_part->pos[Z];
        //fprintf(stderr,"cur_part=%ld jobserver=%ld global_info.no_part=%ld global_info.fst_part=%ld...",cur_part,jobserver,global_info.no_part,global_info.fst_part);
        halo.vel.x     = cur_part->mom[X];
        halo.vel.y     = cur_part->mom[Y];
        halo.vel.z     = cur_part->mom[Z];
        //fprintf(stderr,"set\n");
#endif


        /*===========================================================
         *                  OBSERVER_RANDOM_POINT
         *===========================================================*/
#ifdef OBSERVER_RANDOM_POINT
        halo.pos.x     = (float)(ran3(&iseed));
        halo.pos.y     = (float)(ran3(&iseed));
        halo.pos.z     = (float)(ran3(&iseed));
        halo.vel.x     = 0.0;
        halo.vel.y     = 0.0;
        halo.vel.z     = 0.0;
#endif


        /*===========================================================
         *                    OBSERVER_AT_REST
         *===========================================================*/
#ifdef OBSERVER_AT_REST
        halo.vel.x     = 0.0;
        halo.vel.y     = 0.0;
        halo.vel.z     = 0.0;
#endif

        fprintf(fpout,"%f %f %f %f %f %f\n",
                halo.pos.x*simu.boxsize,
                halo.pos.y*simu.boxsize,
                halo.pos.z*simu.boxsize,
                halo.vel.x*v_fac,
                halo.vel.y*v_fac,
                halo.vel.z*v_fac);
        fflush(fpout);













        /*===========================================================
         *                      SPHERE LOOP
         *===========================================================*/
        // loop over all spheres for this observer
        for(isphere=0; isphere<Nspheres; isphere++) {

            // sphere radius in internal units
            Rsphere = (Rsphere_min + (double)isphere*(Rsphere_max-Rsphere_min)/((double)(Nspheres-1)))/simu.boxsize;

            // add gathering radius and prepare array to hold neighbours
            halo.gatherRad = Rsphere;
            halo.npart     = 0;
            halo.ipart     = NULL;

            // collect all particles inside the gathering radius
            ahf_halos_sfc_gatherParts(&halo);


#ifdef DEBUG
            fprintf(stderr," (sfc gathered %ld neighbours)",halo.npart);
#endif

            // perform statistic for all neighbours
            Hlos[Hidx(isphere,iobserver,Nspheres)] = 0.0;
            norm                                   = 0;

            for(ineighbour=0; ineighbour<halo.npart; ineighbour++) {

                cur_part = global_info.fst_part+halo.ipart[ineighbour];

                // 3D distance
                dx  = (cur_part->pos[X] - halo.pos.x);
                dy  = (cur_part->pos[Y] - halo.pos.y);  // distance in internal units
                dz  = (cur_part->pos[Z] - halo.pos.z);

                if (dx >  0.5) dx -= 1.0;
                if (dy >  0.5) dy -= 1.0;
                if (dz >  0.5) dz -= 1.0;
                if (dx < -0.5) dx += 1.0;   // take care of periodic boundary conditions
                if (dy < -0.5) dy += 1.0;
                if (dz < -0.5) dz += 1.0;

                dx  *= r_fac;
                dy  *= r_fac;  // distance (eventually) in physical units
                dz  *= r_fac;

                d   = sqrt(pow2(dx)+pow2(dy)+pow2(dz));

                // 3D velocity
                vx  = (cur_part->mom[X] - halo.vel.x) * v_fac;
                vy  = (cur_part->mom[Y] - halo.vel.y) * v_fac; // peculiar velocity in physical units
                vz  = (cur_part->mom[Z] - halo.vel.z) * v_fac;
                vx += Hubble * (dx);
                vy += Hubble * (dy); // add correct Hubble flow
                vz += Hubble * (dz);

                // line-of-sight velocity as measured by present observer
                vlos = (vx*dx + vy*dy + vz*dz)/d;

                // accumulate Hlos
                if(d > MACHINE_ZERO) {
                    // Hubble parameter as measured by present observer
                    Hlos[Hidx(isphere,iobserver,Nspheres)] += vlos/d;
                    norm++;
                }
            } // ineighbour

            // mean Hlos for this sphere
            if(norm > 0)
                Hlos[Hidx(isphere,iobserver,Nspheres)] /= (double)norm;
            else
                Hlos[Hidx(isphere,iobserver,Nspheres)] = -1.;

            // physically remove particles from memory
            if(halo.ipart) {
                free(halo.ipart);
                halo.npart = 0;
                halo.ipart = NULL;
            }
        } // isphere

    } // iobserver

    fclose(fpout);
    elapsed += time(NULL);
    fprintf(stderr,"   done in %ld sec.\n",elapsed);


    // collapse Hlos values obtaining mean and stddev for every sphere
    //=================================================================
    elapsed  = (time_t)0;
    elapsed -= time(NULL);
    fprintf(stderr,"o calculating means and stddevs ... ");

    Hsphere      = (double *) calloc(Nspheres, sizeof(double));
    sigmaHsphere = (double *) calloc(Nspheres, sizeof(double));
#ifdef WITH_OPENMP
    #pragma omp parallel for schedule(static) private(isphere,iobserver) shared(Nobserver,Hlos,Hsphere)
#endif
    for(isphere=0; isphere<Nspheres; isphere++) {
        norm = 0;
        for(iobserver=0; iobserver<Nobserver; iobserver++) {
            if(Hlos[Hidx(isphere,iobserver,Nspheres)] > 0.) {
                Hsphere[isphere] += Hlos[Hidx(isphere,iobserver,Nspheres)];
                norm++;
            }
        }
        if(norm > 0)
            Hsphere[isphere] /= (double)norm;
        else
            Hsphere[isphere] = -1.;
    }

#ifdef WITH_OPENMP
    #pragma omp parallel for schedule(static) private(isphere,iobserver) shared(Nobserver,Hlos,Hsphere,Hubble)
#endif
    for(isphere=0; isphere<Nspheres; isphere++) {
        norm = 0;
        for(iobserver=0; iobserver<Nobserver; iobserver++) {
            if(Hsphere[isphere] > 0.) {
                sigmaHsphere[isphere] += pow2((Hsphere[isphere]-Hubble)/Hubble);
                norm++;
            }
        }
        if(norm > 0)
            sigmaHsphere[isphere] /= (double)norm;
        else
            sigmaHsphere[isphere] -1.;
    }

    elapsed += time(NULL);
    fprintf(stderr,"   done in %ld sec.\n",elapsed);

    ahf.time += time(NULL);







    //========================================================
    //                       output
    //========================================================
    elapsed  = (time_t)0;
    elapsed -= time(NULL);

    // full information for each sphere
    //==================================
    for(isphere=0; isphere<Nspheres; isphere++) {

        // sphere radius
        Rsphere = (Rsphere_min + (double)isphere*(Rsphere_max-Rsphere_min)/((double)(Nspheres-1)));

        // construct outfile name
        sprintf(outfile,"%s-Nobserver%07"PRIu64"-Nspheres%03d-Rpshere%lf",global_io.params->outfile_prefix,Nobserver,Nspheres,Rsphere);
        fprintf(stderr,"o writing Rsphere=%lf information to: %s ... ",Rsphere,outfile);

        // open output file
        fpout = fopen(outfile,"w");
        if(fpout == NULL) {
            fprintf(stderr,"FATAL: cannot open %s for writing\n",outfile);
            exit(0);
        }

        fprintf(fpout,"# Hlos(1)\n");

        for(iobserver=0; iobserver<Nobserver; iobserver++) {
            fprintf(fpout,"%lf\n",Hlos[Hidx(isphere,iobserver,Nspheres)]);
        }
    }
    fclose(fpout);


    // reduced information
    //=====================
    sprintf(outfile,"%s-Nobserver%07"PRIu64"-Nspheres%03d-Rsphere_min%lf-Rpshere_max%lf",global_io.params->outfile_prefix,Nobserver,Nspheres,Rsphere_min,Rsphere_max);
    fprintf(stderr,"o writing sphere information to: %s ... ",outfile);

    // open output file
    fpout = fopen(outfile,"w");
    if(fpout == NULL) {
        fprintf(stderr,"FATAL: cannot open %s for writing\n",outfile);
        exit(0);
    }

    fprintf(fpout,"# Rsphere(1) Hsphere(2) sigmaH(3)\n");
    for(isphere=0; isphere<Nspheres; isphere++) {
        Rsphere = (Rsphere_min + (double)isphere*(Rsphere_max-Rsphere_min)/((double)(Nspheres-1)));
        fprintf(fpout,"%lf %lf %lf\n",Rsphere,Hsphere[isphere],sigmaHsphere[isphere]);
    }
    fclose(fpout);

    elapsed += time(NULL);
    fprintf(stderr," done in %ld sec.\n",elapsed);



    //========================================================
    //                 update logfile
    //========================================================
    //write_logfile(timecounter, timestep, no_timestep);

    //========================================================
    //                      BYE BYE
    //========================================================
    free(Hlos);
    free(Hsphere);
    free(sigmaHsphere);
#ifdef OBSERVER_FROM_FILE
    free(xobs);
    free(yobs);
    free(zobs);
    free(vxobs);
    free(vyobs);
    free(vzobs);
#endif

    if(io.icfile_name)       free(io.icfile_name);
    if(io.dumpfile_name)     free(io.dumpfile_name);
    if(io.logfile_name)      free(io.logfile_name);
    if(io.outfile_prefix)    free(io.outfile_prefix);
    if(global.termfile_name) free(global.termfile_name);

    if(global.fst_part)      free(global.fst_part);
    if(global.fst_gas)       free(global.fst_gas);
    if(global.fst_star)      free(global.fst_star);

    fprintf(io.logfile, "==========================================================\n");
    fprintf(io.logfile, "                       FINISHED (v%3.1f/%03d)\n",VERSION,BUILD);
    fprintf(io.logfile, "==========================================================\n");
    fclose(io.logfile);


    return EXIT_SUCCESS;
}
void mcmove(GRID_INFO_TYPE* grid, int spin[][LENGTH], int nbr1[] , int nbr2[])
{

/* ONE MONTE CARLO STEP by Metropolis: Flip probability 1 if Enew < Eold, 
   else prob is exp -(Enew-Eold)/T.  Simplified here since only there 
   are five cases in d=2 for external field = 0.
   FLIP WITH prob1   prob2    1.0     1.0     1.0   (Below spins called)
               +       -       -       -       -           ss2
             + + +   + + +   + + -   + + -   - + -      ss1 ss0 ss3
               +       +       +       -       -           ss4          */

  int i, ix, iy, cx, cy,m,n,nt,mt,tag=50;
  int ixpick, iypick;
  int ss0, ss1, ss2, ss3, ss4, de;
  int flag;
  long int idum;
  double prob1 , prob2;
 int *U1,*D1,*R1,*L1;
  char message[100];
/* arrays to recieve the spin configuration from neighbors */
     U1=(int *)malloc(LENGTH*sizeof(int));
     D1=(int *)malloc(LENGTH*sizeof(int));
     R1=(int *)malloc(LENGTH*sizeof(int));
     L1=(int *)malloc(LENGTH*sizeof(int));
   MPI_Status status;
  MPI_Cart_shift(grid->comm,0,-1,&m,&n);
  MPI_Recv(U1,LENGTH, MPI_INT,n,tag,MPI_COMM_WORLD,&status); /* receive U1 buffer with dimension LENGTH of type MPI_INT from process "n" with "tag" */
  MPI_Cart_shift(grid->comm,0,1,&m,&n);
  MPI_Recv(D1,LENGTH, MPI_INT,n,tag,MPI_COMM_WORLD,&status);
  MPI_Cart_shift(grid->comm,1,-1,&m,&n);
  MPI_Recv(L1,LENGTH, MPI_INT,n,tag,MPI_COMM_WORLD,&status);
  MPI_Cart_shift(grid->comm,1,1,&m,&n);
  MPI_Recv(R1,LENGTH, MPI_INT,n,tag,MPI_COMM_WORLD,&status);
  prob1 = exp(-8.0/TEMP);
  prob2 = exp(-4.0/TEMP);
  for (i = 1 ; i < LENGTH-1; i++)
  {
   spin[i][0]=U1[i];
   spin[i][LENGTH-1]=D1[i];
   spin[0][i]=R1[i];
   spin[LENGTH-1][i]=L1[i];
//   if(grid->my_rank==12)printf("mcmove-1 %d %d %d \n",i, U1[i],D1[i]);
  }
	free(U1);
	free(D1);
	free(L1);
	free(R1);
        //    print_config (grid,spin,i);
  for (i = 1 ; i <= (LENGTH-1)*(LENGTH-1) ; i++)
  {
	  ixpick = floor((LENGTH-2) * ran3(&idum)+0.5);	  
	  iypick = floor((LENGTH-2) * ran3(&idum)+0.5);	  
      ss0 = spin [ixpick]       [iypick]       ;     
      ss1 = spin [nbr1[ixpick]] [iypick]       ;
      ss2 = spin [ixpick]       [nbr1[iypick]] ;
      ss3 = spin [nbr2[ixpick]] [iypick]       ;
      ss4 = spin [ixpick]       [nbr2[iypick]] ;

      de =  2*ss0*(ss1+ss2+ss3+ss4);

      flag = 1;                     /* flip spin if flag = 1 */

             if ( (de == 8) && (ran3(&idum) > prob1)
			||    
         	  (de == 4) && (ran3(&idum) > prob2) )     
	     flag = 0;
	 
       spin[ixpick][iypick] = (1 - 2*flag )*spin[ixpick][iypick];
  }

}
Ejemplo n.º 17
0
/*******************************************************************
 * 
 * NAME :               run_importance_sampling(int cycles, int& accepted, 
 *                                      double& energy, double& energy_sq)
 *
 * DESCRIPTION :        Importance sampling
 */
void MC_Importance_Sampling::run_importance_sampling(int cycles, int& accepted, double& energy, double& energy_sq) {
    int dum = (int) idum;
    RanNormalSetSeedZigVec(&dum, 200);

    // Special case for blocking. We need the rank when writing to file.
#if BLOCKING
    int my_rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    ostringstream filename;
    filename << "Blocking/files/blocking_" << my_rank << ".dat";
    ofstream blockfile(filename.str().c_str(), ios::out | ios::binary);
#endif
    double delta_e, greens_function, R;
    int n_particles = wf->getNParticles();
    int dim = wf->getDim();

    // Initiating variables
    energy = 0;
    energy_sq = 0;
    accepted = 0;
    delta_e = 0;

    // Quantum Force.
    mat q_force_old = zeros(n_particles, dim);
    mat q_force_new = zeros(n_particles, dim);

    // Initial position of the electrons
    mat r_old = randn(n_particles, dim) * sqrt(dt);
    mat r_new = r_old;

    // Evalutating the Quantum Force and Wave Function in the inital position.
    wf->set_r_new(r_old, 0);
    wf->init_slater();
    q_force_old = wf->q_force();
    wf->accept_move();
    
    // Monte Carlo cycles
    for (int sample = 0; sample < (cycles + thermalization); sample++) {

        // Looping over all particles.   
        for (int active = 0; active < n_particles; active++) {

            // Using the quantum force to calculate a new position.
            for (int i = 0; i < dim; i++)
                r_new(active, i) = r_old(active, i) + D * q_force_old(active, i) * dt + DRanNormalZigVec() * sqrt(dt);

            // Evaluating the Wave Function in r_new.
            wf->set_r_new(r_new, active);
            wf->evaluate_new();

            // Updating the quantum force.
            q_force_new = wf->q_force();

            // Calculating the ratio between the Green's functions.     
            greens_function = 0;
            for (int j = 0; j < dim; j++) {
                greens_function += (q_force_old(active, j) + q_force_new(active, j))
                        * (D * dt * 0.5 * (q_force_old(active, j)
                        - q_force_new(active, j)) - r_new(active, j) + r_old(active, j));
            }

            greens_function = exp(0.5 * greens_function);

            // Metropolis-Hastings acceptance test.
            R = wf->get_ratio();
            R = R * R *greens_function;

            if (ran3(&idum) <= R) {
                r_old = r_new;
                q_force_old = q_force_new;

                wf->accept_move();

                if (sample > thermalization) {
                    accepted++;
                    delta_e = ht->get_energy(r_old);
                }
            } else {
                // If the move is not accepted the position and quantum force is reset.
                r_new = r_old;
                q_force_new = q_force_old;
            }

            // Sampling the energy.
            if (sample > thermalization) {
                energy += delta_e;
                energy_sq += delta_e*delta_e;
#if BLOCKING
                blockfile.write((char*) &delta_e, sizeof (double));
#endif
            }
        }
    } // End MC cycles.

    // Scaling the results.
    energy = energy / cycles / n_particles;
    energy_sq = energy_sq / cycles / n_particles;
    accepted /= n_particles;

#if BLOCKING
    blockfile.close();
#endif
}
Ejemplo n.º 18
0
Archivo: anneal.c Proyecto: gnovak/bin
void anneal(float x[], float y[], int iorder[], int ncity)
{
	int irbit1(unsigned long *iseed);
	int metrop(float de, float t);
	float ran3(long *idum);
	float revcst(float x[], float y[], int iorder[], int ncity, int n[]);
	void reverse(int iorder[], int ncity, int n[]);
	float trncst(float x[], float y[], int iorder[], int ncity, int n[]);
	void trnspt(int iorder[], int ncity, int n[]);
	int ans,nover,nlimit,i1,i2;
	int i,j,k,nsucc,nn,idec;
	static int n[7];
	long idum;
	unsigned long iseed;
	float path,de,t;

	nover=100*ncity;
	nlimit=10*ncity;
	path=0.0;
	t=0.5;
	for (i=1;i<ncity;i++) {
		i1=iorder[i];
		i2=iorder[i+1];
		path += ALEN(x[i1],x[i2],y[i1],y[i2]);
	}
	i1=iorder[ncity];
	i2=iorder[1];
	path += ALEN(x[i1],x[i2],y[i1],y[i2]);
	idum = -1;
	iseed=111;
	for (j=1;j<=100;j++) {
		nsucc=0;
		for (k=1;k<=nover;k++) {
			do {
				n[1]=1+(int) (ncity*ran3(&idum));
				n[2]=1+(int) ((ncity-1)*ran3(&idum));
				if (n[2] >= n[1]) ++n[2];
				nn=1+((n[1]-n[2]+ncity-1) % ncity);
			} while (nn<3);
			idec=irbit1(&iseed);
			if (idec == 0) {
				n[3]=n[2]+(int) (abs(nn-2)*ran3(&idum))+1;
				n[3]=1+((n[3]-1) % ncity);
				de=trncst(x,y,iorder,ncity,n);
				ans=metrop(de,t);
				if (ans) {
					++nsucc;
					path += de;
					trnspt(iorder,ncity,n);
				}
			} else {
				de=revcst(x,y,iorder,ncity,n);
				ans=metrop(de,t);
				if (ans) {
					++nsucc;
					path += de;
					reverse(iorder,ncity,n);
				}
			}
			if (nsucc >= nlimit) break;
		}
		printf("\n %s %10.6f %s %12.6f \n","T =",t,
			"	 Path Length =",path);
		printf("Successful Moves: %6d\n",nsucc);
		t *= TFACTR;
		if (nsucc == 0) return;
	}
}
int main(int argc, char *argv[]){
  if (argc != 5) {
    printf("usage:  %s packing-fraction uncertainty_goal dr filename\n", argv[0]);
    return 1;
  }
  const char *outfilename = argv[4];
  const double packing_fraction = atof(argv[1]);
  const double mean_density = packing_fraction/(4*M_PI/3*R*R*R);
  const long N = (mean_density*lenx*leny*lenz - 1) + 0.5;
  printf("density %g, packing fraction %g gives N %ld\n", mean_density, packing_fraction, N);
  fflush(stdout);
  const double dr = atof(argv[3]);
  Vector3d *spheres = new Vector3d[N];

  //////////////////////////////////////////////////////////////////////////////////////////
  // We start with randomly-placed spheres, and then gradually wiggle
  // them around until they are all within bounds and are not
  // overlapping.  We do this by creating an "overlap" value which we
  // constrain to never increase.  Note that this may not work at all
  // for high filling fractions, since we could get stuck in a local
  // minimum.
  for(long i=0; i<N; i++) {
    spheres[i]=10*lenx*ran3();
  }
  clock_t start = clock();
  long num_to_time = 100*N;
  long num_timed = 0;
  long i = 0;
  double scale = .005;

  // At this stage, we'll set up our output grid...
  long div = long((lenx/2 - innerRad)/dr);
  if (div < 10) div = 10;
  printf("Using %ld divisions, dx ~ %g\n", div, (lenx/2 - innerRad)/div);

  double *radius = new double[div+1];
  for (long i=0;i<div+1;i++) radius[i] = innerRad + (lenx/2 - innerRad)*double(i)/div;

  const double uncertainty_goal = atof(argv[2]);
  const double minvolume = M_PI*(radius[1]*radius[1]*radius[1] - innerRad*innerRad*innerRad)/2;
  const double num_in_min_volume = minvolume*N/lenx/leny/lenz;
  const long iterations = 2.0/uncertainty_goal/uncertainty_goal/num_in_min_volume;
  printf("running with %ld spheres for %ld iterations.\n", N, iterations);
  fflush(stdout);


  // Let's move each sphere once, so they'll all start within our
  // periodic cell!
  for (i=0;i<N;i++) spheres[i] = move(spheres[i], scale);

  clock_t starting_initial_state = clock();
  printf("Initial countOverLaps is %g\n", countOverLaps(spheres, N, R));
  while (countOverLaps(spheres, N, R)>0){
    for (int movethis=0;movethis < 100*N; movethis++) {
      if (num_timed++ > num_to_time) {
        clock_t now = clock();
        //printf("took %g seconds per initialising iteration\n",
        //       (now - double(start))/CLOCKS_PER_SEC/num_to_time);
        num_timed = 0;
        start = now;
      }
      Vector3d old =spheres[i%N];
      double oldoverlap = countOneOverLap(spheres, N, i%N, R);
      spheres[i%N]=move(spheres[i%N],scale);
      double newoverlap = countOneOverLap(spheres, N, i%N, R);
      if(newoverlap>oldoverlap){
        spheres[i%N]=old;
      }
      i++;
      if (i%(100*N) == 0) {
        if (i>iterations/4) {
          for(long i=0; i<N; i++) {
            printf("%g\t%g\t%g\n", spheres[i][0],spheres[i][1],spheres[i][2]);
          }
          printf("couldn't find good state\n");
          exit(1);
        }
        char *debugname = new char[10000];
        sprintf(debugname, "%s.debug", outfilename);
        FILE *spheredebug = fopen(debugname, "w");
        for(long i=0; i<N; i++) {
          fprintf(spheredebug, "%g\t%g\t%g\n", spheres[i][0],spheres[i][1],spheres[i][2]);
        }
        fclose(spheredebug);
        printf("numOverLaps=%g (debug file: %s)\n",countOverLaps(spheres,N,R), debugname);
        delete[] debugname;
        fflush(stdout);
      }
    }
  }
  assert(countOverLaps(spheres, N, R) == 0);
  {
    clock_t now = clock();
    //printf("took %g seconds per initialising iteration\n",
    //       (now - double(start))/CLOCKS_PER_SEC/num_to_time);
    printf("\nFound initial state in %g days!\n", (now - double(starting_initial_state))/CLOCKS_PER_SEC/60.0/60.0/24.0);
  }

  // Here we use a hokey heuristic to decide on an average move
  // distance, which is proportional to the mean distance between
  // spheres.
  const double mean_spacing = pow(lenx*leny*lenz/N, 1.0/3);
  if (mean_spacing > 2*R) {
    scale = 2*(mean_spacing - 2*R);
  } else {
    scale = 0.1;
  }
  printf("Using scale of %g\n", scale);
  long count = 0;
  long *shells = new long[div];
  for (long l=0; l<div; l++) shells[l] = 0;

  double *density = new double[div];

  /////////////////////////////////////////////////////////////////////////////
  num_to_time = 1000;
  start = clock();
  num_timed = 0;
  double secs_per_iteration = 0;
  long workingmoves=0;

  clock_t output_period = CLOCKS_PER_SEC*60; // start at outputting every minute
  clock_t max_output_period = clock_t(CLOCKS_PER_SEC)*60*30; // top out at half hour interval
  clock_t last_output = clock(); // when we last output data
  for (long j=0; j<iterations; j++){
	  num_timed = num_timed + 1;
    if (num_timed > num_to_time || j == iterations - 1) {
      num_timed = 0;
      ///////////////////////////////////////////start of print.dat
      const clock_t now = clock();
      secs_per_iteration = (now - double(start))/CLOCKS_PER_SEC/num_to_time;
      if (secs_per_iteration*num_to_time < 1) {
        printf("took %g microseconds per iteration\n", 1000000*secs_per_iteration);
        num_to_time *= 2;
      } else {
        // Set the number of iterations to time to a minute, so we
        // won't check *too* many times.
        num_to_time = long(60/secs_per_iteration);
      }
      start = now;
      if (now - last_output > output_period || j == iterations - 1) {
        last_output = now;
        if (output_period < max_output_period/2) {
          output_period *= 2;
        } else if (output_period < max_output_period) {
          output_period = max_output_period;
        }
        {
          double secs_done = double(now)/CLOCKS_PER_SEC;
          long mins_done = secs_done / 60;
          long hours_done = mins_done / 60;
          mins_done = mins_done % 60;
          if (hours_done > 50) {
            printf("Saved data after %ld hours\n", hours_done);
          } else if (mins_done < 1) {
            printf("Saved data after %.1f seconds\n", secs_done);
          } else if (hours_done < 1) {
            printf("Saved data after %ld minutes\n", mins_done);
          } else if (hours_done < 2) {
            printf("Saved data after one hour %ld minutes\n", mins_done);
          } else {
            printf("Saved data after %ld hours, %ld minutes\n", hours_done, mins_done);
          }
          fflush(stdout);
        }
        for(long i=0; i<div; i++){
          const double rmax = radius[i+1];
          const double rmin = radius[i];
          const double num_counted = (j+1)/double(N);
          const double dV = 4/3.*M_PI*(rmax*rmax*rmax - rmin*rmin*rmin);
          density[i]=shells[i]/dV/num_counted*(4*M_PI/3);
        }

        FILE *out = fopen((const char *)outfilename,"w");
        if (out == NULL) {
          printf("Error creating file %s\n", outfilename);
          return 1;
        }
        // We will just extrapolate to the contact point with a linear
        // extrapolation.  We could do better than this, but this
        // should be good enough once we have solid statistics.  And
        // in a pinch we could always delete this first line.
        fprintf(out, "%g\t%g\n", radius[0], 1.5*density[0] - 0.5*density[1]);
        fprintf(out, "%g\t%g\n", 0.5*(radius[0]+radius[1]), density[0]);
        long divtoprint = div;
        divtoprint = div - 1;
        for(long i=1; i<divtoprint; i++){
          fprintf(out, "%g\t%g\n", 0.5*(radius[i]+radius[i+1]), density[i]);
        }
        fflush(stdout);
        fclose(out);
      }
      ///////////////////////////////////////////end of print.dat
    }
	
    // only write out the sphere positions after they've all had a
    // chance to move
    if (workingmoves%N == 0) {
      for (long i=0;i<N;i++) {
        //printf("Sphere at %.1f %.1f %.1f\n", spheres[i][0], spheres[i][1], spheres[i][2]);
        shells[shell(spheres[i], div, radius)]++;
      }
    }
    if(j % (iterations/100)==0 && j != 0){
      double secs_to_go = secs_per_iteration*(iterations - j);
      long mins_to_go = secs_to_go / 60;
      long hours_to_go = mins_to_go / 60;
      mins_to_go = mins_to_go % 60;
      if (hours_to_go > 5) {
        printf("%.0f%% complete... (%ld hours to go)\n",j/(iterations*1.0)*100, hours_to_go);
      } else if (mins_to_go < 1) {
        printf("%.0f%% complete... (%.1f seconds to go)\n",j/(iterations*1.0)*100, secs_to_go);
      } else if (hours_to_go < 1) {
        printf("%.0f%% complete... (%ld minutes to go)\n",j/(iterations*1.0)*100, mins_to_go);
      } else if (hours_to_go < 2) {
        printf("%.0f%% complete... (1 hour, %ld minutes to go)\n",j/(iterations*1.0)*100, mins_to_go);
      } else {
        printf("%.0f%% complete... (%ld hours, %ld minutes to go)\n",j/(iterations*1.0)*100, hours_to_go, mins_to_go);
      }
      char *debugname = new char[10000];
      sprintf(debugname, "%s.debug", outfilename);
      FILE *spheredebug = fopen(debugname, "w");
      for(long i=0; i<N; i++) {
        fprintf(spheredebug, "%g\t%g\t%g\n", spheres[i][0],spheres[i][1],spheres[i][2]);
      }
      fclose(spheredebug);
      delete[] debugname;
      fflush(stdout);
    }
    Vector3d temp = move(spheres[j%N],scale);
    count++;
    if(!overlap(spheres, temp, N, R, j%N)){
      spheres[j%N] = temp;
      workingmoves++;
    }
  }

  //////////////////////////////////////////////////////////////////////////////////////////

  printf("Total number of attempted moves = %ld\n",count);
  printf("Total number of successful moves = %ld\n",workingmoves);
  printf("Acceptance rate = %g\n", workingmoves/double(count));
  fflush(stdout);
  delete[] shells;
  delete[] density;
  delete[] spheres;
}
Vector3d move(Vector3d v,double scale){
  Vector3d newv = v+scale*ran3();
  return fixPeriodic(newv);
}
Ejemplo n.º 21
0
int main(int argc, char **argv) {
  int nmax, nprocs, me, me_plus;
  int g_a_data, g_a_i, g_a_j, isize;
  int gt_a_data, gt_a_i, gt_a_j;
  int g_b, g_c;
  int i, j, jj, k, one, jcnt;
  int chunk, kp1, ld;
  int *p_i, *p_j;
  double *p_data, *p_b, *p_c;
  double t_beg, t_beg2, t_ga_tot, t_get, t_mult, t_cnstrct, t_mpi_in, t_ga_in;
  double t_hypre_strct, t_ga_trans, t_gp_get;
  double t_get_blk_csr, t_trans_blk_csr, t_trans_blk, t_create_csr_ga, t_beg3;
  double t_gp_tget, t_gp_malloc, t_gp_assign, t_beg4;
  double prdot, dotga, dothypre, tempc;
  double prtot, gatot, hypretot, gatot2, hypretot2;
  double prdot2, prtot2;
  int status;
  int idim, jdim, kdim, idum, memsize;
  int lsize, ntot;
  int heap=200000, fudge=100, stack=200000, ma_heap;
  double *cbuf, *vector;
  int pdi, pdj, pdk, ip, jp, kp, ncells;
  int lo[3],hi[3];
  int blo[3], bhi[3];
  int ld_a, ld_b, ld_c, ld_i, ld_j, irows, ioff, joff, total_procs;
  int iproc, iblock, btot;
  double *amat, *bvec;
  int *ivec, *jvec;
  int *proclist, *proc_inv, *icnt;
  int *voffset, *offset, *mapc;
  int iloop, lo_bl, hi_bl;
  char *buf, **buf_ptr;
  int *iparams, *jval, *ival;
  double *rval, *rvalt;
  int imin, imax, jmin, jmax, irow, icol, nnz;
  int nrows, kmin, kmax, lmin, lmax, jdx;
  int LOOPNUM = 100;
  void **blk_ptr;
  void *blk;
  int blk_size, tsize, zero;
  int *iblk, *jblk, *blkidx;
  int *tblk_ptr;
  int *ivalt, *jvalt, *iparamst;
  int *iblk_t, *jblk_t, *blkidx_t;
/*
   Hypre declarations
*/
  int ierr;
#if USE_HYPRE
  HYPRE_StructGrid grid;
  HYPRE_StructStencil stencil;
  HYPRE_StructMatrix matrix;
  HYPRE_StructVector vec_x, vec_y;
  int i4, j4, ndim, nelems, offsets[7][3];
  int stencil_indices[7], hlo[3], hhi[3];
  double weights[7];
  double *values;
  double alpha, beta;
  int *rows, *cols;
#endif
/*
  ***  Intitialize a message passing library
*/
  zero = 0;
  one = 1;
  ierr = MPI_Init(&argc, &argv);
/*
 ***  Initialize GA
 
      There are 2 choices: ga_initialize or ga_initialize_ltd.
      In the first case, there is no explicit limit on memory usage.
      In the second, user can set limit (per processor) in bytes.
*/
  t_beg = GA_Wtime();
  NGA_Initialize();
  GP_Initialize();
  t_ga_in = GA_Wtime() - t_beg;
  NGA_Dgop(&t_ga_in,one,"+");

  t_ga_tot = 0.0;
  t_ga_trans = 0.0;
  t_get_blk_csr = 0.0;
  t_create_csr_ga = 0.0;
  t_trans_blk_csr = 0.0;
  t_trans_blk = 0.0;
  t_gp_get = 0.0;
  t_gp_malloc = 0.0;
  t_gp_assign = 0.0;
  t_mult = 0.0;
  t_get = 0.0;
  t_gp_tget = 0.0;
  t_hypre_strct = 0.0;
  prtot = 0.0;
  prtot2 = 0.0;
  gatot = 0.0;
  hypretot = 0.0;

  me = NGA_Nodeid();
  me_plus = me + 1;
  nprocs = NGA_Nnodes();
  if (me == 0) {
   printf("Time to initialize GA:                                 %12.4f\n",
          t_ga_in/((double)nprocs));
  }
/*
     we can also use GA_set_memory_limit BEFORE first ga_create call
*/
  ma_heap = heap + fudge;
/*      call GA_set_memory_limit(util_mdtob(ma_heap)) */
 
  if (me == 0) {
    printf("\nNumber of cores used: %d\n\nGA initialized\n\n",nprocs);
  }
/*
 ***  Initialize the MA package
      MA must be initialized before any global array is allocated
*/
  if (!MA_init(MT_DBL, stack, ma_heap)) NGA_Error("ma_init failed",-1);
/*
     create a sparse LMAX x LMAX matrix and two vectors of length
     LMAX. The matrix is stored in compressed row format.
     One of the vectors is filled with random data and the other
     is filled with zeros.
*/
  idim = IMAX;
  jdim = JMAX;
  kdim = KMAX;
  ntot = idim*jdim*kdim;
  if (me == 0) {
    printf("\nDimension of matrix: %d\n\n",ntot);
  }
  t_beg = GA_Wtime();
  grid_factor(nprocs,idim,jdim,kdim,&pdi,&pdj,&pdk);
  if (me == 0) {
    printf("\nProcessor grid configuration\n");
    printf("  PDX: %d\n",pdi);
    printf("  PDY: %d\n",pdj);
    printf("  PDZ: %d\n\n",pdk);
    printf(" Number of Loops: %d\n",LOOPNUM);
  }

  create_laplace_mat(idim,jdim,kdim,pdi,pdj,pdk,&g_a_data,&g_a_j,&g_a_i,&mapc);
  t_cnstrct = GA_Wtime() - t_beg;

  g_b = NGA_Create_handle();
  NGA_Set_data(g_b,one,&ntot,MT_DBL);
  NGA_Set_irreg_distr(g_b,mapc,&nprocs);
  status = NGA_Allocate(g_b);
/*
    fill g_b with random values
*/
  NGA_Distribution(g_b,me,blo,bhi);
  NGA_Access(g_b,blo,bhi,&p_b,&ld);
  ld = bhi[0]-blo[0]+1;
  btot = ld;
  vector = (double*)malloc(ld*sizeof(double));
  for (i=0; i<ld; i++) {
    idum  = 0;
    p_b[i] = ran3(&idum);
    vector[i] = p_b[i];
  }
  NGA_Release(g_b,blo,bhi);
  NGA_Sync();

  g_c = NGA_Create_handle();
  NGA_Set_data(g_c,one,&ntot,MT_DBL);
  NGA_Set_irreg_distr(g_c,mapc,&nprocs);
  status = NGA_Allocate(g_c);
  NGA_Zero(g_c);
#if USE_HYPRE
/*
    Assemble HYPRE grid and use that to create matrix. Start by creating
    grid partition
*/
  ndim = 3;
  i = me;
  ip = i%pdi;
  i = (i-ip)/pdi;
  jp = i%pdj;
  kp = (i-jp)/pdj;
  lo[0] = (int)(((double)idim)*((double)ip)/((double)pdi));
  if (ip < pdi-1) {
    hi[0] = (int)(((double)idim)*((double)(ip+1))/((double)pdi)) - 1;
  } else {
    hi[0] = idim - 1;
  }
  lo[1] = (int)(((double)jdim)*((double)jp)/((double)pdj));
  if (jp < pdj-1) {
    hi[1] = (int)(((double)jdim)*((double)(jp+1))/((double)pdj)) - 1;
  } else {
    hi[1] = jdim - 1;
  }
  lo[2] = (int)(((double)kdim)*((double)kp)/((double)pdk));
  if (kp < pdk-1) {
    hi[2] = (int)(((double)kdim)*((double)(kp+1))/((double)pdk)) - 1;
  } else {
    hi[2] = kdim - 1;
  }
/*
   Create grid
*/
  hlo[0] = lo[0];
  hlo[1] = lo[1];
  hlo[2] = lo[2];
  hhi[0] = hi[0];
  hhi[1] = hi[1];
  hhi[2] = hi[2];
  ierr = HYPRE_StructGridCreate(MPI_COMM_WORLD, ndim, &grid);
  ierr = HYPRE_StructGridSetExtents(grid, hlo, hhi);
  ierr = HYPRE_StructGridAssemble(grid);
/*
   Create stencil
*/
  offsets[0][0] = 0;
  offsets[0][1] = 0;
  offsets[0][2] = 0;

  offsets[1][0] = 1;
  offsets[1][1] = 0;
  offsets[1][2] = 0;

  offsets[2][0] = 0;
  offsets[2][1] = 1;
  offsets[2][2] = 0;

  offsets[3][0] = 0;
  offsets[3][1] = 0;
  offsets[3][2] = 1;

  offsets[4][0] = -1;
  offsets[4][1] = 0;
  offsets[4][2] = 0;

  offsets[5][0] = 0;
  offsets[5][1] = -1;
  offsets[5][2] = 0;

  offsets[6][0] = 0;
  offsets[6][1] = 0;
  offsets[6][2] = -1;

  nelems = 7;
  ierr = HYPRE_StructStencilCreate(ndim, nelems, &stencil);
  for (i=0; i<nelems; i++) {
    ierr = HYPRE_StructStencilSetElement(stencil, i, offsets[i]);
  }

  ncells = (hi[0]-lo[0]+1)*(hi[1]-lo[1]+1)*(hi[2]-lo[2]+1);
  jcnt = 7*ncells;
  values = (double*)malloc(jcnt*sizeof(double));
  jcnt = 0;
  weights[0] = 6.0;
  weights[1] = -1.0;
  weights[2] = -1.0;
  weights[3] = -1.0;
  weights[4] = -1.0;
  weights[5] = -1.0;
  weights[6] = -1.0;
  for (i=0; i<ncells; i++) {
    for (j=0; j<7; j++) {
      values[jcnt] = weights[j];
      jcnt++;
    }
  }

  ierr = HYPRE_StructMatrixCreate(MPI_COMM_WORLD, grid, stencil, &matrix);
  ierr = HYPRE_StructMatrixInitialize(matrix);
  for (i=0; i<7; i++) {
    stencil_indices[i] = i;
  }
  ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, 7, stencil_indices, values);
  free(values);
/*
   Check all six sides of current box to see if any are boundaries.
   Set values to zero if they are.
*/
  if (hi[0] == idim-1) {
    ncells = (hi[1]-lo[1]+1)*(hi[2]-lo[2]+1);
    hlo[0] = idim-1;
    hhi[0] = idim-1;
    hlo[1] = lo[1];
    hhi[1] = hi[1];
    hlo[2] = lo[2];
    hhi[2] = hi[2];
    values = (double*)malloc(ncells*sizeof(double));
    for (i=0; i<ncells; i++) values[i] = 0.0;
    i4 = 1;
    j4 = 1;
    ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values);
    free(values);
  }
  if (hi[1] == jdim-1) {
    ncells = (hi[0]-lo[0]+1)*(hi[2]-lo[2]+1);
    hlo[0] = lo[0];
    hhi[0] = hi[0];
    hlo[1] = jdim-1;
    hhi[1] = jdim-1;
    hlo[2] = lo[2];
    hhi[2] = hi[2];
    values = (double*)malloc(ncells*sizeof(double));
    for (i=0; i<ncells; i++) values[i] = 0.0;
    i4 = 1;
    j4 = 2;
    ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values);
    free(values);
  } 
  if (hi[2] == kdim-1) {
    ncells = (hi[0]-lo[0]+1)*(hi[1]-lo[1]+1);
    hlo[0] = lo[0];
    hhi[0] = hi[0];
    hlo[1] = lo[1];
    hhi[1] = hi[1];
    hlo[2] = kdim-1;
    hhi[2] = kdim-1;
    values = (double*)malloc(ncells*sizeof(double));
    for (i=0; i<ncells; i++) values[i] = 0.0;
    i4 = 1;
    j4 = 3;
    ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values);
    free(values);
  }
  if (lo[0] == 0) {
    ncells = (hi[1]-lo[1]+1)*(hi[2]-lo[2]+1);
    hlo[0] = 0;
    hhi[0] = 0;
    hlo[1] = lo[1];
    hhi[1] = hi[1];
    hlo[2] = lo[2];
    hhi[2] = hi[2];
    values = (double*)malloc(ncells*sizeof(double));
    for (i=0; i<ncells; i++) values[i] = 0.0;
    i4 = 1;
    j4 = 4;
    ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values);
    free(values);
  }
  if (lo[1] == 0) {
    ncells = (hi[0]-lo[0]+1)*(hi[2]-lo[2]+1);
    hlo[0] = lo[0];
    hhi[0] = hi[0];
    hlo[1] = 0;
    hhi[1] = 0;
    hlo[2] = lo[2];
    hhi[2] = hi[2];
    values = (double*)malloc(ncells*sizeof(double));
    for (i=0; i<ncells; i++) values[i] = 0.0;
    i4 = 1;
    j4 = 5;
    ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values);
    free(values);
  }
  if (lo[2] == 1) {
    ncells = (hi[1]-lo[1]+1)*(hi[2]-lo[2]+1);
    hlo[0] = lo[0];
    hhi[0] = hi[0];
    hlo[1] = lo[1];
    hhi[1] = hi[1];
    hlo[2] = 0;
    hhi[2] = 0;
    values = (double*)malloc(ncells*sizeof(double));
    for (i=0; i<ncells; i++) values[i] = 0.0;
    i4 = 1;
    j4 = 6;
    ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values);
    free(values);
  }
  ierr = HYPRE_StructMatrixAssemble(matrix);
/*
    Create vectors for matrix-vector multiply
*/
  ierr = HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &vec_x);
  ierr = HYPRE_StructVectorInitialize(vec_x);
  hlo[0] = lo[0];
  hlo[1] = lo[1];
  hlo[2] = lo[2];
  hhi[0] = hi[0];
  hhi[1] = hi[1];
  hhi[2] = hi[2];
  ierr = HYPRE_StructVectorSetBoxValues(vec_x, hlo, hhi, vector);
  ierr = HYPRE_StructVectorAssemble(vec_x);
  NGA_Distribution(g_a_i,me,blo,bhi);

  if (bhi[1] > ntot-1) {
    bhi[1] = ntot-1;
  }

  btot = (hi[0]-lo[0]+1)*(hi[1]-lo[1]+1)*(hi[2]-lo[2]+1);

  for (i=0; i<btot; i++) vector[i] = 0.0;
  hlo[0] = lo[0];
  hlo[1] = lo[1];
  hlo[2] = lo[2];
  hhi[0] = hi[0];
  hhi[1] = hi[1];
  hhi[2] = hi[2];
  ierr = HYPRE_StructVectorGetBoxValues(vec_x, hlo, hhi, vector);

  for (i=0; i<btot; i++) vector[i] = 0.0;
  ierr = HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &vec_y);
  ierr = HYPRE_StructVectorInitialize(vec_y);
  ierr = HYPRE_StructVectorSetBoxValues(vec_y, hlo, hhi, vector);
  ierr = HYPRE_StructVectorAssemble(vec_y);
#endif
/* Multiply sparse matrix. Start by accessing pointers to local portions of
   g_a_data, g_a_j, g_a_i */

  NGA_Sync();
  for (iloop=0; iloop<LOOPNUM; iloop++) {
    t_beg2 = GA_Wtime();

    NGA_Distribution(g_c,me,blo,bhi);
    NGA_Access(g_c,blo,bhi,&p_c,&ld_c);
    for (i = 0; i<bhi[0]-blo[0]+1; i++) {
      p_c[i] = 0.0;
    }

/* get number of matrix blocks coupled to this process */
    NGA_Get(g_a_i,&me,&me,&lo_bl,&one);
#if 1
    NGA_Get(g_a_i,&me_plus,&me_plus,&hi_bl,&one);
    hi_bl--;
    total_procs = hi_bl - lo_bl + 1;
    blk_ptr = (void**)malloc(sizeof(void*));
/* Loop through matrix blocks */
    ioff = 0;
    for (iblock = 0; iblock<total_procs; iblock++) {
      t_beg = GA_Wtime();
      jdx = lo_bl+iblock;
#if 0
      GP_Access_element(g_a_data, &jdx, &blk_ptr[0], &isize);
#endif
#if 1
      GP_Get_size(g_a_data, &jdx, &jdx, &isize);
#endif
      blk = (void*)malloc(isize);
#if 1
      GP_Get(g_a_data, &jdx, &jdx, blk, blk_ptr, &one, &blk_size, &one, &tsize, 0); 
#endif
      t_gp_get = t_gp_get + GA_Wtime() - t_beg;
      iparams = (int*)blk_ptr[0];
      rval = (double*)(iparams+7);
      imin = iparams[0];
      imax = iparams[1];
      jmin = iparams[2];
      jmax = iparams[3];
      irow = iparams[4];
      icol = iparams[5];
      nnz = iparams[6];
      jval = (int*)(rval+nnz);
      ival = (int*)(jval+nnz);
      nrows = imax - imin + 1;
      bvec = (double*)malloc((jmax-jmin+1)*sizeof(double));
      j = 0;
      t_beg = GA_Wtime();
      NGA_Get(g_b,&jmin,&jmax,bvec,&j);
      t_get = t_get + GA_Wtime() - t_beg;
      t_beg = GA_Wtime();
      for (i=0; i<nrows; i++) {
        kmin = ival[i];
        kmax = ival[i+1]-1;
        tempc = 0.0;
        for (j = kmin; j<=kmax; j++) {
          jj = jval[j];
          tempc = tempc + rval[j]*bvec[jj];
        }
        p_c[i] = p_c[i] + tempc;
      }
      t_mult = t_mult + GA_Wtime() - t_beg;
      free(bvec);
      free(blk);
    }
    NGA_Sync();
    t_ga_tot = t_ga_tot + GA_Wtime() - t_beg2;

    NGA_Distribution(g_c,me,blo,bhi);
    NGA_Release(g_c,blo,bhi);

#if USE_HYPRE
    alpha = 1.0;
    beta = 0.0;
    t_beg = GA_Wtime();
    ierr = HYPRE_StructMatrixMatvec(alpha, matrix, vec_x, beta, vec_y);
    t_hypre_strct = t_hypre_strct + GA_Wtime() - t_beg;
    hlo[0] = lo[0];
    hlo[1] = lo[1];
    hlo[2] = lo[2];
    hhi[0] = hi[0];
    hhi[1] = hi[1];
    hhi[2] = hi[2];
    ierr = HYPRE_StructVectorGetBoxValues(vec_y, hlo, hhi, vector);
    NGA_Distribution(g_c,me,hlo,hhi);
    cbuf = (double*)malloc((hhi[0]-hlo[0]+1)*sizeof(double));
    NGA_Get(g_c,hlo,hhi,cbuf,&one);
    prdot = 0.0;
    dotga = 0.0;
    dothypre = 0.0;
    for (i=0; i<(hhi[0]-hlo[0]+1); i++) {
      dothypre = dothypre + vector[i]*vector[i];
      dotga = dotga + cbuf[i]*cbuf[i];
      prdot = prdot + (vector[i]-cbuf[i])*(vector[i]-cbuf[i]);
    }
    NGA_Dgop(&dotga,1,"+");
    NGA_Dgop(&dothypre,1,"+");
    NGA_Dgop(&prdot,1,"+");
    gatot += sqrt(dotga);
    hypretot += sqrt(dothypre);
    prtot += sqrt(prdot);
    free(cbuf);
#endif

/* Transpose matrix. Start by making local copies of ival and jval arrays for
   the sparse matrix of blocks stored in the GP array */
#if 1
    t_beg2 = GA_Wtime();
    t_beg3 = GA_Wtime();
    iblk = (int*)malloc((nprocs+1)*sizeof(int));
    iblk_t = (int*)malloc((nprocs+1)*sizeof(int));
#if 0
    NGA_Get(g_a_i,&zero,&nprocs,iblk,&one);
#else
    if (me == 0) {
      NGA_Get(g_a_i,&zero,&nprocs,iblk,&one);
    } else {
      for (i=0; i<nprocs+1; i++) {
        iblk[i] = 0;
      }
    }
    GA_Igop(iblk,nprocs+1,"+");
#endif
    jblk = (int*)malloc(iblk[nprocs]*sizeof(int));
    jblk_t = (int*)malloc(iblk[nprocs]*sizeof(int));
    iblock = iblk[nprocs]-1;
#if 0
    NGA_Get(g_a_j,&zero,&iblock,jblk,&one);
#else
    if (me == 0) {
      NGA_Get(g_a_j,&zero,&iblock,jblk,&one);
    } else {
      for (i=0; i<iblock+1; i++) {
        jblk[i] = 0;
      }
    }
    GA_Igop(jblk,iblock+1,"+");
#endif
    iblock++;
    blkidx = (int*)malloc(iblk[nprocs]*sizeof(int));
    blkidx_t = (int*)malloc(iblk[nprocs]*sizeof(int));
    for (i=0; i<iblock; i++) {
      blkidx[i] = i;
    }
    iblock = nprocs;
    t_get_blk_csr = t_get_blk_csr + GA_Wtime() - t_beg3;
    t_beg3 = GA_Wtime();
    stran(iblock, iblock, iblk, jblk, blkidx, iblk_t, jblk_t, blkidx_t);
    t_trans_blk_csr = t_trans_blk_csr + GA_Wtime() - t_beg3;
    t_beg3 = GA_Wtime();
    gt_a_data = GP_Create_handle();
    i = iblk_t[nprocs];
    GP_Set_dimensions(gt_a_data, one, &i);
    GP_Set_irreg_distr(gt_a_data, iblk_t, &nprocs);
    GP_Allocate(gt_a_data);

    gt_a_j = NGA_Create_handle();
    i = iblk_t[nprocs];
    NGA_Set_data(gt_a_j, one, &i, C_INT);
    NGA_Set_irreg_distr(gt_a_j, iblk_t, &nprocs);
    NGA_Allocate(gt_a_j);

    gt_a_i = NGA_Create_handle();
    i = nprocs+1;
    NGA_Set_data(gt_a_i,one,&i,C_INT);
    for (i=0; i<nprocs; i++) mapc[i] = i;
    NGA_Set_irreg_distr(gt_a_i, mapc, &nprocs);
    NGA_Allocate(gt_a_i);

    /* copy i and j arrays of transposed matrix into distributed arrays */
    if (me==0) {
      lo_bl = 0;
      hi_bl = nprocs;
      NGA_Put(gt_a_i,&lo_bl,&hi_bl,iblk_t,&one);
      lo_bl = 0;
      hi_bl = iblk_t[nprocs]-1;
      NGA_Put(gt_a_j,&lo_bl,&hi_bl,jblk_t,&one);
    }
    NGA_Sync();
    lo_bl = iblk[me];
    hi_bl = iblk[me+1];
    total_procs = hi_bl - lo_bl + 1;
    total_procs = hi_bl - lo_bl;
    t_create_csr_ga = t_create_csr_ga + GA_Wtime() - t_beg3;
    for (iblock = lo_bl; iblock < hi_bl; iblock++) {
      t_beg4 = GA_Wtime();
      jdx = blkidx_t[iblock];
      GP_Get_size(g_a_data, &jdx, &jdx, &isize);
      blk = (void*)malloc(isize);
      GP_Get(g_a_data, &jdx, &jdx, blk, blk_ptr, &one, &blk_size, &one, &tsize, 0); 
      /* Parameters for original block */
      iparams = (int*)blk_ptr[0];
      rval = (double*)(iparams+7);
      imin = iparams[0];
      imax = iparams[1];
      jmin = iparams[2];
      jmax = iparams[3];
      irow = iparams[4];
      icol = iparams[5];
      nnz = iparams[6];
      jval = (int*)(rval+nnz);
      ival = (int*)(jval+nnz);

      /* Create transposed block */
      isize = 7*sizeof(int) + nnz*(sizeof(double)+sizeof(int))
            + (jmax-jmin+2)*sizeof(int);
      t_gp_tget = t_gp_tget + GA_Wtime() - t_beg4;
      t_beg4 = GA_Wtime();
      tblk_ptr = (int*)GP_Malloc(isize);
      t_gp_malloc = t_gp_malloc + GA_Wtime() - t_beg4;
      t_beg3 = GA_Wtime();
      iparamst = (int*)tblk_ptr;
      rvalt = (double*)(iparamst+7);
      jvalt = (int*)(rvalt+nnz);
      ivalt = (int*)(jvalt+nnz);
      iparamst[0] = jmin;
      iparamst[1] = jmax;
      iparamst[2] = imin;
      iparamst[3] = imax;
      iparamst[4] = icol;
      iparamst[5] = irow;
      iparamst[6] = nnz;
      i = imax-imin+1;
      j = jmax-jmin+1;
      stranr(i, j, ival, jval, rval, ivalt, jvalt, rvalt);
      t_trans_blk = t_trans_blk + GA_Wtime() - t_beg3;
      t_beg4 = GA_Wtime();
      GP_Assign_local_element(gt_a_data, &iblock, (void*)tblk_ptr, isize);
      t_gp_assign = t_gp_assign + GA_Wtime() - t_beg4;
#if 1
      free(blk);
#endif
    }

    /* Clean up after transpose */
#if 1
    free(iblk);
    free(iblk_t);
    free(jblk);
    free(jblk_t);
    free(blkidx);
    free(blkidx_t);
#endif
    NGA_Sync();
    t_ga_trans = t_ga_trans + GA_Wtime() - t_beg2;
#if USE_HYPRE
    alpha = 1.0;
    beta = 0.0;
    ierr = HYPRE_StructMatrixMatvec(alpha, matrix, vec_x, beta, vec_y);
    hlo[0] = lo[0];
    hlo[1] = lo[1];
    hlo[2] = lo[2];
    hhi[0] = hi[0];
    hhi[1] = hi[1];
    hhi[2] = hi[2];
    ierr = HYPRE_StructVectorGetBoxValues(vec_y, hlo, hhi, vector);
    NGA_Distribution(g_c,me,hlo,hhi);
    cbuf = (double*)malloc((hhi[0]-hlo[0]+1)*sizeof(double));
    NGA_Get(g_c,hlo,hhi,cbuf,&one);
    dothypre = 0.0;
    dotga = 0.0;
    prdot2 = 0.0;
    for (i=0; i<(hhi[0]-hlo[0]+1); i++) {
      dothypre = dothypre + vector[i]*vector[i];
      dotga = dotga + cbuf[i]*cbuf[i];
      if (fabs(vector[i]-cbuf[i]) > 1.0e-10) {
        printf("p[%d] i: %d vector: %f cbuf: %f\n",me,i,vector[i],cbuf[i]);
      }
      prdot2 = prdot2 + (vector[i]-cbuf[i])*(vector[i]-cbuf[i]);
    }
    NGA_Dgop(&dotga,1,"+");
    NGA_Dgop(&dothypre,1,"+");
    NGA_Dgop(&prdot2,1,"+");
    prtot2 += sqrt(prdot2);
    gatot2 += sqrt(dotga);
    hypretot2 += sqrt(dothypre);
    free(cbuf);
    free(blk_ptr);
#endif
    /* Clean up transposed matrix */
    GP_Distribution(gt_a_data,me,blo,bhi);
    for (i=blo[0]; i<bhi[0]; i++) {
      GP_Free(GP_Free_local_element(gt_a_data,&i));
    }
    GP_Destroy(gt_a_data);
    NGA_Destroy(gt_a_i);
    NGA_Destroy(gt_a_j);
#endif
#endif
  }
  free(vector);
#if USE_HYPRE
  if (me == 0) {
    printf("Magnitude of GA solution:                         %e\n",
        gatot/((double)LOOPNUM));
    printf("Magnitude of HYPRE solution:                      %e\n",
        hypretot/((double)LOOPNUM));
    printf("Magnitude of GA solution(2):                      %e\n",
        gatot2/((double)LOOPNUM));
    printf("Magnitude of HYPRE solution(2):                   %e\n",
        hypretot2/((double)LOOPNUM));
    printf("Difference between GA and HYPRE (Struct) results: %e\n",
        prtot/((double)LOOPNUM));
    printf("Difference between transpose and HYPRE results:   %e\n",
        prtot2/((double)LOOPNUM));
  }
#endif

/*
   Clean up arrays
*/
  NGA_Destroy(g_b);
  NGA_Destroy(g_c);
  GP_Distribution(g_a_data,me,blo,bhi);
  for (i=blo[0]; i<bhi[0]; i++) {
    GP_Free(GP_Free_local_element(g_a_data,&i));
  }
  GP_Destroy(g_a_data);
  NGA_Destroy(g_a_i);
  NGA_Destroy(g_a_j);
#if USE_HYPRE
  ierr = HYPRE_StructStencilDestroy(stencil);
  ierr = HYPRE_StructGridDestroy(grid);
  ierr = HYPRE_StructMatrixDestroy(matrix);
  ierr = HYPRE_StructVectorDestroy(vec_x);
  ierr = HYPRE_StructVectorDestroy(vec_y);
#endif

  NGA_Dgop(&t_cnstrct,1,"+");
  NGA_Dgop(&t_get,1,"+");
  NGA_Dgop(&t_gp_get,1,"+");
  NGA_Dgop(&t_mult,1,"+");
  NGA_Dgop(&t_ga_tot,1,"+");
  NGA_Dgop(&t_ga_trans,1,"+");
  NGA_Dgop(&t_get_blk_csr,1,"+");
  NGA_Dgop(&t_trans_blk_csr,1,"+");
  NGA_Dgop(&t_trans_blk,1,"+");
  NGA_Dgop(&t_create_csr_ga,1,"+");
  NGA_Dgop(&t_gp_tget,1,"+");
  NGA_Dgop(&t_gp_malloc,1,"+");
  NGA_Dgop(&t_gp_assign,1,"+");
#if USE_HYPRE
  NGA_Dgop(&t_hypre_strct,1,"+");
#endif
  free(mapc);

  if (me == 0) {
    printf("Time to create sparse matrix:                         %12.4f\n",
      t_cnstrct/((double)(nprocs*LOOPNUM)));
    printf("Time to get right hand side vector:                   %12.4f\n",
      t_get/((double)(nprocs*LOOPNUM)));
    printf("Time to get GP blocks:                                %12.4f\n",
      t_gp_get/((double)(nprocs*LOOPNUM)));
    printf("Time for sparse matrix block multiplication:          %12.4f\n",
      t_mult/((double)(nprocs*LOOPNUM)));
    printf("Time for total sparse matrix multiplication:          %12.4f\n",
      t_ga_tot/((double)(nprocs*LOOPNUM)));
#if USE_HYPRE
    printf("Total time for HYPRE (Struct)  matrix-vector multiply:%12.4f\n",
      t_hypre_strct/((double)(nprocs*LOOPNUM)));
#endif
    printf("Time to get block CSR distribution:                   %12.4f\n",
      t_get_blk_csr/((double)(nprocs*LOOPNUM)));
    printf("Time for transposing block CSR distribution:          %12.4f\n",
      t_trans_blk_csr/((double)(nprocs*LOOPNUM)));
    printf("Time for creating transposed block CSR GA:            %12.4f\n",
      t_create_csr_ga/((double)(nprocs*LOOPNUM)));
    printf("Time for transposing blocks:                          %12.4f\n",
      t_trans_blk/((double)(nprocs*LOOPNUM)));
    printf("Time to get GP blocks for transpose:                  %12.4f\n",
      t_gp_tget/((double)(nprocs*LOOPNUM)));
    printf("Time to malloc GP blocks for transpose:               %12.4f\n",
      t_gp_malloc/((double)(nprocs*LOOPNUM)));
    printf("Time to assign GP blocks for transpose:               %12.4f\n",
      t_gp_assign/((double)(nprocs*LOOPNUM)));
    printf("Time for total sparse matrix transpose:               %12.4f\n",
      t_ga_trans/((double)(nprocs*LOOPNUM)));
  }
  if (me==0) {
    printf("Terminating GA library\n");
  }
  NGA_Terminate();
/*
 ***  Tidy up after message-passing library
 */
  ierr = MPI_Finalize();
}
Ejemplo n.º 22
0
/*
    create a random sparse matrix in compressed row form corresponding to a
    7-point stencil for a grid on a lattice of dimension idim X jdim X kdim grid
    points
*/
void create_laplace_mat(int idim, int jdim, int kdim, int pdi, int pdj, int pdk,
                        int *gp_block, int *g_j, int *g_i, int **imapc) {
/*
    idim: i-dimension of grid
    jdim: j-dimension of grid
    kdim: k-dimension of grid
    pdi: i-dimension of processor grid
    pdj: j-dimension of processor grid
    pdk: k-dimension of processor grid
!    g_data: global array of values
!    g_j: global array containing j indices (using local indices)
!    g_i: global array containing starting location of each row in g_j
!         (using local indices)
    gp_block: global pointer array containing non-zero sparse sub-blocks of
              matrix
    g_j: global array containing j indices of sub-blocks
    g_i: global array containing starting location of each row in g_j
    tsize: total number of non-zero elements in matrix
    imapc: map array for vectors
*/
  int ltotal_procs;
  int *lproclist, *lproc_inv,  *lvoffset, *lnsize, *loffset, *licnt, *limapc;
  int *nnz_list;
  int nnz, offset, b_nnz;
  int nprocs, me, imin, imax, jcnt;
  int *jmin, *jmax;
  int ix, iy, iz, idx;
  double x, dr;
  double *rval, *gp_rval;
  int isize, idbg;
  int *jval, *gp_jval, *ival, *gp_ival, *ivalt;
  int i, j, k, itmp, one, tlo, thi, ld;
  int idum, ntot, indx, nghbrs[7], ncnt, nsave;
  int ixn[7],iyn[7],izn[7], procid[7];
  int status;
  int lo[3], hi[3], ip, jp, kp, ldi, ldj, jdx, joff;
  int il, jl, kl, ldmi, ldpi, ldmj, ldpj;
  int *xld, *yld, *zld, *tmapc;
  int *ecnt, *total_distr;
  int total_max, toffset;
  int *iparams, *blk_ptr;
  int *iparamst, *jvalt;
  double *rvalt;
  FILE *fp, *fopen();

  me = NGA_Nodeid();
  nprocs = NGA_Nnodes();
  idum = -(12345+me);
  x = ran3(&idum);
  one = 1;

  if (me == 0) {
    printf("\n Dimension of grid: \n\n");
    printf(" I Dimension: %d\n",idim);
    printf(" J Dimension: %d\n",jdim);
    printf(" K Dimension: %d\n\n",kdim);
  }
/*
   Find position of processor in processor grid and calulate minimum
   and maximum values of indices
*/
  i = me;
  ip = i%pdi;
  i = (i-ip)/pdi;
  jp = i%pdj;
  kp = (i-jp)/pdj;
 
  lo[0] = (int)((((double)idim)*((double)ip))/((double)pdi));
  if (ip < pdi-1) {
    hi[0] = (int)((((double)idim)*((double)(ip+1)))/((double)pdi))-1;
  } else {
    hi[0] = idim - 1;
  } 

  lo[1] = (int)((((double)jdim)*((double)jp))/((double)pdj));
  if (jp < pdj-1) {
    hi[1] = (int)((((double)jdim)*((double)(jp+1)))/((double)pdj))-1;
  } else {
    hi[1] = jdim - 1;
  } 

  lo[2] = (int)((((double)kdim)*((double)kp))/((double)pdk));
  if (kp < pdk-1) {
    hi[2] = (int)((((double)kdim)*((double)(kp+1)))/((double)pdk))-1;
  } else {
    hi[2] = kdim - 1;
  } 
 
  ldi = hi[0]-lo[0]+1;
  ldj = hi[1]-lo[1]+1;
 
  /* Evaluate xld, yld, zld. These contain the number of elements in each
     division along the x, y, z axes */
  xld = (int*)malloc(pdi*sizeof(int));
  for (i=0; i<pdi; i++) {
    if (i<pdi-1) {
      xld[i] = (int)((((double)idim)*((double)(i+1)))/((double)pdi));
    } else {
      xld[i] = idim;
    }
    xld[i] = xld[i] - (int)((((double)idim)*((double)(i)))/((double)pdi));
  }

  yld = (int*)malloc(pdj*sizeof(int));
  for (i=0; i<pdj; i++) {
    if (i<pdj-1) {
      yld[i] = (int)((((double)jdim)*((double)(i+1)))/((double)pdj));
    } else {
      yld[i] = jdim;
    }
    yld[i] = yld[i] - (int)((((double)jdim)*((double)(i)))/((double)pdj));
  }

  zld = (int*)malloc(pdk*sizeof(int));
  for (i=0; i<pdk; i++) {
    if (i<pdk-1) {
      zld[i] = (int)((((double)kdim)*((double)(i+1)))/((double)pdk));
    } else {
      zld[i] = jdim;
    }
    zld[i] = zld[i] - (int)((((double)kdim)*((double)(i)))/((double)pdk));
  }

/* Determine number of rows per processor
   lnsize[i]: number of rows associated with process i
   loffset[i]: global offset to location of first row associated
               with process i */

  lnsize = (int*)malloc(nprocs*sizeof(int));
  loffset = (int*)malloc(nprocs*sizeof(int));
  for (i=0; i<nprocs; i++) {
    lnsize[i] = 0;
    loffset[i] = 0;
  }
  lnsize[me] = (hi[0]-lo[0]+1)*(hi[1]-lo[1]+1)*(hi[2]-lo[2]+1);
  NGA_Igop(lnsize,nprocs,"+");
  loffset[0] = 0;
  for (i=1; i<nprocs; i++) {
    loffset[i] = loffset[i-1] + lnsize[i-1];
  }
 
  ntot = idim*jdim*kdim;
  NGA_Sync();
/*
    scan over rows of lattice
    imin: minimum global index of rows associated with this process (me)
    imax: maximum global index of rows associated with this process (me)
*/
  imin = loffset[me];
  imax = loffset[me]+lnsize[me]-1;
  free(loffset);
/*
    find out how many other processors couple to this row of blocks
    ecnt[i]: the number of columns on processor i that are coupled to this
    process
*/
  ecnt = (int*)malloc(nprocs*sizeof(int));
  for (i=0; i<nprocs; i++) {
    ecnt[i] = 0;
  }

  for (i=imin; i<=imax; i++) {
/*
    compute local indices of grid point corresponding to row i
*/
    indx = i - imin;
    ix = indx%ldi;
    indx = (indx - ix)/ldi;
    iy = indx%ldj;
    iz = (indx - iy)/ldj;
    ix = ix + lo[0];
    iy = iy + lo[1];
    iz = iz + lo[2];
 
    ecnt[me] = ecnt[me] + 1;
    if (ix+1 <= idim-1) {
      if (ix+1 > hi[0]) {
        jdx = kp*pdi*pdj + jp*pdi + ip + 1;
        ecnt[jdx] = ecnt[jdx] + 1;
      } else {
        ecnt[me] = ecnt[me] + 1;
      }
    }
    if (ix-1 >= 0) {
      if (ix-1 < lo[0]) {
        jdx = kp*pdi*pdj + jp*pdi + ip - 1;
        ecnt[jdx] = ecnt[jdx] + 1;
      } else {
        ecnt[me] = ecnt[me] + 1;
      }
    }
    if (iy+1 <= jdim-1) {
      if (iy+1 > hi[1]) {
        jdx = kp*pdi*pdj + (jp+1)*pdi + ip;
        ecnt[jdx] = ecnt[jdx] + 1;
      } else {
        ecnt[me] = ecnt[me] + 1;
      }
    }
    if (iy-1 >= 0) {
      if (iy-1 < lo[1]) {
        jdx = kp*pdi*pdj + (jp-1)*pdi + ip;
        ecnt[jdx] = ecnt[jdx] + 1;
      } else {
        ecnt[me] = ecnt[me] + 1;
      }
    }
    if (iz+1 <= kdim-1) {
      if (iz+1 > hi[2]) {
        jdx = (kp+1)*pdi*pdj + jp*pdi + ip;
        ecnt[jdx] = ecnt[jdx] + 1;
      } else {
        ecnt[me] = ecnt[me] + 1;
      }
    }
    if (iz-1 >= 0) {
      if (iz-1 < lo[2]) {
        jdx = (kp-1)*pdi*pdj + jp*pdi + ip;
        ecnt[jdx] = ecnt[jdx] + 1;
      } else {
        ecnt[me] = ecnt[me] + 1;
      }
    }
  }

/* Create list of processors that this processor is coupled to.
   If ecnt[i] is greater than zero then process i is coupled to this process.
   ltotal_procs: the total number of other processor that this process is coupled
                 to. This includes this process (the diagonal term).
   lproclist[i]: the IDs of the processor that this processor is coupled to
   lproc_inv[i]: the location in lproclist of processor i. If processor i is not
                 coupled to this process, the lproc_inv[i] = -1
   ncnt: total number of non-zero elements held by this process
   nnz_list[i]: number of processes coupled to process i by sparse blocks
   nnz: total number of sparse blocks */

  ltotal_procs = 0;
  ncnt = 0;
  for (i=0; i<nprocs; i++) {
    if (ecnt[i] > 0) {
      ltotal_procs++;
      ncnt += ecnt[i];
    }
  }
  nsave = ncnt;

  lproclist = (int*)malloc(ltotal_procs*sizeof(int));
  lproc_inv = (int*)malloc(nprocs*sizeof(int));
  licnt = (int*)malloc(ltotal_procs*sizeof(int));
  for (i=0; i<ltotal_procs; i++) {
    licnt[i] = 0;
  }

  rval = (double*)malloc(ncnt*sizeof(double));
  idbg = ncnt;
  jval = (int*)malloc(ncnt*sizeof(int));
  ival = (int*)malloc((imax-imin+2)*ltotal_procs*sizeof(int));
  ivalt = (int*)malloc((imax-imin+2)*ltotal_procs*sizeof(int));

  for (i=0; i<ncnt; i++) {
    rval[i] = 0.0;
    jval[i] = 0;
  }

  j = (imax-imin+2)*ltotal_procs;
  for (i=0; i<j; i++) {
    ival[i] = 0;
    ivalt[i] = 0;
  }

  nnz_list = (int*)malloc(nprocs*sizeof(int));
  for (i=0; i<nprocs; i++) {
    nnz_list[i] = 0;
  }

  /* nnz is total number of non-zero sparse blocks */
  nnz_list[me] = ltotal_procs;
  NGA_Igop(nnz_list, nprocs, "+");
  nnz = 0;
  for (i=0; i<nprocs; i++) {
    nnz += nnz_list[i];
  }

/*  lvoffset[i]: local offset into array ival[i] to get to elements associated
    with block i (i runs from 0 to ltotal_procs-1)
    isize: number of rows (plus 1) that reside on this processor */
  isize = (imax-imin+2);
  for (i=0; i<nprocs; i++) {
    lproc_inv[i] = -1;
  }
  lvoffset = (int*)malloc(ltotal_procs*sizeof(int));
  lvoffset[0] = 0;
  j = 0;
  for (i=0; i<nprocs; i++) {
    if (ecnt[i] > 0) {
      lproclist[j] = i;
      if (j > 0) {
        lvoffset[j] = ecnt[lproclist[j-1]]+lvoffset[j-1];
      }
      lproc_inv[i] = j;
      j++;
    }
  }

/* Create arrays the hold the sparse block representation of the sparse matrix
   gp_block[nnz]: Global Pointer array holding the sparse sub-matrices
   g_j[nnz]: column block indices for the element in gp_block
   g_i[nprocs]: row block indices for the elements in g_j */

  tmapc = (int*)malloc((nprocs+1)*sizeof(int));
  tmapc[0] = 0;
  for (i=1; i<=nprocs; i++) {
    tmapc[i] = tmapc[i-1]+nnz_list[i-1];
  }
  *gp_block = GP_Create_handle();
  GP_Set_dimensions(*gp_block,one,&nnz);
  GP_Set_irreg_distr(*gp_block, tmapc, &nprocs);
  GP_Allocate(*gp_block);

  *g_j = NGA_Create_handle();
  NGA_Set_data(*g_j,one,&nnz,C_INT);
  NGA_Set_irreg_distr(*g_j, tmapc, &nprocs);
  NGA_Allocate(*g_j);

  for (i=0; i<nprocs; i++) {
    tmapc[i] = i;
  }
  *g_i = NGA_Create_handle();
  i = nprocs+1;
  NGA_Set_data(*g_i,one,&i,C_INT);
  NGA_Set_irreg_distr(*g_i, tmapc, &nprocs);
  NGA_Allocate(*g_i);
  free(tmapc);

  jmin = (int*)malloc(nprocs*sizeof(int));
  jmax = (int*)malloc(nprocs*sizeof(int));
  for (i=0; i<nprocs; i++) {
    jmin[i] = 0;
    jmax[i] = 0;
  }
  jmin[me] = imin;
  jmax[me] = imax;
  NGA_Igop(jmin, nprocs, "+");
  NGA_Igop(jmax, nprocs, "+");

/*
   Create the sparse blocks holding actual data. All the elements within each block
   couple this processor to one other processor
   rval[i]: values of matrix elements
   jval[i]: column indices of matrix elements
   ival[i]: index of first elements in rval and jval for the row represented by
            the index i.
   ivalt[i]: temporary array used in the construction of ival[i]
*/
  for (i=imin; i<=imax; i++) {
    /*
    compute local indices of grid point corresponding to row i
     */
    indx = i - imin;
    ix = indx%ldi;
    indx = (indx - ix)/ldi;
    iy = indx%ldj;
    iz = (indx - iy)/ldj;
    ix = ix + lo[0];
    iy = iy + lo[1];
    iz = iz + lo[2];
    /*
    find locations of neighbors in 7-point stencil (if they are on the grid)
     */
    ncnt = 0;
    ixn[ncnt] = ix;
    iyn[ncnt] = iy;
    izn[ncnt] = iz;
    il = ix - lo[0];
    jl = iy - lo[1];
    kl = iz - lo[2];
    idx = kl*ldi*ldj + jl*ldi + il;
    nghbrs[ncnt] = idx;
    procid[ncnt] = me;
    if (ix+1 <= idim - 1) {
      ncnt++;
      ixn[ncnt] = ix + 1;
      iyn[ncnt] = iy;
      izn[ncnt] = iz;
      if (ix+1 > hi[0]) {
        jdx = kp*pdi*pdj + jp*pdi + ip + 1;
        il = 0;
        jl = iy - lo[1];
        kl = iz - lo[2];
        ldpi = xld[ip+1];
      } else {
        jdx = me;
        il = ix - lo[0] + 1;
        jl = iy - lo[1];
        kl = iz - lo[2];
        ldpi = ldi;
      }
      idx = kl*ldpi*ldj + jl*ldpi + il;
      nghbrs[ncnt] = idx;
      procid[ncnt] = jdx;
    }
    if (ix-1 >= 0) {
      ncnt++;
      ixn[ncnt] = ix - 1;
      iyn[ncnt] = iy;
      izn[ncnt] = iz;
      if (ix-1 < lo[0]) {
        jdx = kp*pdi*pdj + jp*pdi + ip - 1;
        il = xld[ip-1] - 1;
        jl = iy - lo[1];
        kl = iz - lo[2];
        ldmi = xld[ip-1];
      } else {
        jdx = me;
        il = ix - lo[0] - 1;
        jl = iy - lo[1];
        kl = iz - lo[2];
        ldmi = ldi;
      }
      idx = kl*ldmi*ldj + jl*ldmi + il;
      nghbrs[ncnt] = idx;
      procid[ncnt] = jdx;
    }
    if (iy+1 <= jdim-1) {
      ncnt++;
      ixn[ncnt] = ix; 
      iyn[ncnt] = iy + 1;
      izn[ncnt] = iz;
      if (iy+1 > hi[1]) {
        jdx = kp*pdi*pdj + (jp+1)*pdi + ip;
        il = ix - lo[0];
        jl = 0;
        kl = iz - lo[2];
        ldpj = yld[jp+1];
      } else {
        jdx = me;
        il = ix - lo[0];
        jl = iy - lo[1] + 1;
        kl = iz - lo[2];
        ldpj = ldj;
      }
      idx = kl*ldi*ldpj + jl*ldi + il;
      nghbrs[ncnt] = idx;
      procid[ncnt] = jdx;
    }
    if (iy-1 >= 0) {
      ncnt++;
      ixn[ncnt] = ix;
      iyn[ncnt] = iy - 1;
      izn[ncnt] = iz;
      if (iy-1 < lo[1]) {
        jdx = kp*pdi*pdj + (jp-1)*pdi + ip;
        il = ix - lo[0];
        jl = yld[jp-1] - 1;
        kl = iz - lo[2];
        ldmj = yld[jp-1];
      } else {
        jdx = me;
        il = ix - lo[0];
        jl = iy - lo[1] - 1;
        kl = iz - lo[2];
        ldmj = ldj;
      }
      idx = kl*ldi*ldmj + jl*ldi + il;
      nghbrs[ncnt] = idx;
      procid[ncnt] = jdx;
    }
    if (iz+1 <= kdim-1) {
      ncnt++;
      ixn[ncnt] = ix;
      iyn[ncnt] = iy;
      izn[ncnt] = iz + 1;
      if (iz+1 > hi[2]) {
        jdx = (kp+1)*pdi*pdj + jp*pdi + ip;
        il = ix - lo[0];
        jl = iy - lo[1];
        kl = 0;
      } else {
        jdx = me;
        il = ix - lo[0];
        jl = iy - lo[1];
        kl = iz - lo[2] + 1;
      }
      idx = kl*ldi*ldj + jl*ldi + il;
      nghbrs[ncnt] = idx;
      procid[ncnt] = jdx;
    }
    if (iz-1 >= 0) {
      ncnt++;
      ixn[ncnt] = ix;
      iyn[ncnt] = iy;
      izn[ncnt] = iz - 1;
      if (iz-1 < lo[2]) {
        jdx = (kp-1)*pdi*pdj + jp*pdi + ip;
        il = ix - lo[0];
        jl = iy - lo[1];
        kl = zld[kp-1] - 1;
      } else {
        jdx = me;
        il = ix - lo[0];
        jl = iy - lo[1];
        kl = iz - lo[2] - 1;
      }
      idx = kl*ldi*ldj + jl*ldi + il;
      nghbrs[ncnt] = idx;
      procid[ncnt] = jdx;
    }
    /*
    sort indices so that neighbors run from lowest to highest local index. This sort
    is not particularly efficient but ncnt is generally small
     */
    ncnt++;
    for (j=0; j<ncnt; j++) {
      for (k=j+1; k<ncnt; k++) {
        if (nghbrs[j] > nghbrs[k]) {
          itmp = nghbrs[j];
          nghbrs[j] = nghbrs[k];
          nghbrs[k] = itmp;
          itmp = ixn[j];
          ixn[j] = ixn[k];
          ixn[k] = itmp;
          itmp = iyn[j];
          iyn[j] = iyn[k];
          iyn[k] = itmp;
          itmp = izn[j];
          izn[j] = izn[k];
          izn[k] = itmp;
          itmp = procid[j];
          procid[j] = procid[k];
          procid[k] = itmp;
        }
      }
    }
    for (k=0; k<ncnt; k++) {
      if (nghbrs[k] < 0 || nghbrs[k] >= ntot) {
        printf("p[%d] Invalid neighbor %d\n",me,nghbrs[k]);
      }
    }

/* set weights corresponding to a finite difference Laplacian on a 7-point
   stencil */

    for (j=0; j<ncnt; j++) {
      jdx = procid[j];
      idx = lproc_inv[jdx];
      if (ix == ixn[j] && iy == iyn[j] && iz == izn[j]) {
        rval[lvoffset[idx]+licnt[idx]] = 6.0;
      } else {
        rval[lvoffset[idx]+licnt[idx]] = -1.0;
      }
      if (lvoffset[idx]+licnt[idx] < 0 || lvoffset[idx]+licnt[idx] >= nsave) {
        printf("p[%d] Out of bounds (lvoffset+licnt)[%d]: %d\n",me,idx,lvoffset[idx]+licnt[idx]);
      }
      if (lvoffset[idx]+licnt[idx]>=idbg) {
      }
      /* TODO: Check this carefully */
      jval[lvoffset[idx]+licnt[idx]] = nghbrs[j];
      ivalt[idx*isize+i-imin] = ivalt[idx*isize+i-imin]+1;
      licnt[idx]++;
    }
  }

/* finish evaluating ival array */

  for (i=0; i<ltotal_procs; i++) {
    ival[i*isize] = lvoffset[i];
    for (j=1; j<isize; j++) {
      ival[i*isize+j] = ival[i*isize+j-1] + ivalt[i*isize+j-1];
    }
  }
  isize = 0;
  for (i=0; i<ltotal_procs; i++) {
    isize = isize + licnt[i];
  }
  if (isize > MAXVEC)
    NGA_Error("ISIZE exceeds MAXVEC in local arrays ",isize);

/* Local portion of sparse matrix has been evaluated and decomposed into blocks
   that match partitioning of right hand side across processors. The following
   data is available at this point:
      1) ltotal_procs: the number of processors that are coupled to this one via
         the sparse matrix
      2) lproclist(ltotal_procs): a list of processor IDs that are coupled to
         this processor
      3) lproc_inv(nprocs): The entry in proc_list that corresponds to a given
         processor. If the entry is -1 then that processor does not couple to
         this processor.
      4) licnt(ltotal_procs): The number of non-zero entries in the sparse matrix
         that couple the process represented by proc_list(j) to this process
      5) lvoffset(ltotal_procs): The offsets for the non-zero data in the arrays
         rval and jval for the blocks that couple this processor to other
         processes in proc_list
      6) offset(nprocs): the offset array for the distributed right hand side
         vector
    These arrays describe how the sparse matrix is layed out both locally and
    across processors. In addition, the actual data for the distributed sparse
    matrix is found in the following arrays:
      1) rval: values of matrix for all blocks on this processor
      2) jval: j-indices of matrix for all blocks on this processor
      3) ival(ltotal_procs*(lnsize(me)+1)): starting index in rval and
         jval for each row in each block */
 
  NGA_Sync();

/* Create a sparse array of sparse blocks.
   Each block element is divided into for sections.
   The first section consists of 7 ints and contains the parameters
     imin: minimum i index represented by block
     imin: maximum i index represented by block
     jmin: minimum j index represented by block
     jmin: maximum j index represented by block
     iblock: row index of block
     jblock: column index of block
     nnz: number of non-zero elements in block
   The next section consists of nnz doubles that represent the non-zero values
   in the block. The third section consists of nnz ints and contains the local
   j indices of all values. The final section consists of (imax-imin+2) ints
   and contains the starting index in jval and rval for the each row between
   imin and imax. An extra value is included at the end and is set equal to
   nnz+1. This is included to simplify some coding.
 */

  offset = 0;
  for (i=0; i<me; i++) {
    offset += nnz_list[i];
  }
  NGA_Put(*g_i, &me, &me, &offset, &one);
  if (me==nprocs-1) {
    NGA_Put(*g_i, &nprocs, &nprocs, &nnz, &one);
  }
  NGA_Sync();
  for (i = 0; i<ltotal_procs; i++) {
    /* evaluate total size of block */
    b_nnz = ecnt[lproclist[i]];
    isize = 7*sizeof(int) + b_nnz*(sizeof(double)+sizeof(int))
          + (imax-imin+2)*sizeof(int);
    blk_ptr = (int*)GP_Malloc(isize);

    iparams = blk_ptr;
    gp_rval = (double*)(iparams+7);
    gp_jval = (int*)(gp_rval+b_nnz);
    gp_ival = (gp_jval+b_nnz);

    iparams[0] = imin;
    iparams[1] = imax;
    iparams[2] = jmin[lproclist[i]];
    iparams[3] = jmax[lproclist[i]];
    iparams[4] = me;
    iparams[5] = lproclist[i];
    iparams[6] = b_nnz;

    ldj = (imax-imin+2);
    k = 0;
    toffset = lvoffset[i];
    for (j=0; j<b_nnz; j++) {
      gp_jval[j] = jval[toffset+j];
      gp_rval[j] = rval[toffset+j];
    }

    toffset = ival[i*ldj];
    for (k=0; k<ldj; k++) {
      gp_ival[k] = ival[i*ldj+k]-toffset;
    }

    /* Assign blk_ptr to GP array element */
    GP_Assign_local_element(*gp_block, &offset, (void*)blk_ptr, isize);
    j = 1;
    NGA_Put(*g_j,&offset,&offset,&lproclist[i],&j);
    offset++;
  }
  NGA_Sync();

  tmapc = (int*)malloc(nprocs*sizeof(int));
  tmapc[0] = 0;
  for (i=1; i<nprocs; i++) {
    tmapc[i] = tmapc[i-1] + lnsize[i-1];
  }
    i = nprocs-1;
  *imapc = tmapc;

  free(rval);
  free(jval);
  free(ival);
  free(ivalt);
  free(xld);
  free(yld);
  free(zld);
  free(lnsize);
  free(lvoffset);
  free(ecnt);
  free(licnt);
  free(lproclist);
  free(lproc_inv);
  free(jmin);
  free(jmax);
  free(nnz_list);
  return;
}
Ejemplo n.º 23
0
Archivo: qvmc.cpp Proyecto: sarahr/QVMC
/**
 * Constructor
 * @param N - number of MC cycles
 * @param N_therm - number of thermalization steps
 * @param alpha - first variational parameter
 * @param beta - second variational parameter
 * @param myrank - MPI rank
 */
void VMC::run_algo(int N, int N_therm, double alpha, double beta, int myrank) {

    int i;
    double rat, eps;
    double del_E = 0;
    double del_Epot = 0;
    double del_Ekin = 0;

#if DENSITY
    ofstream ofile2;
    ostringstream ost;
    ost << "density" << myrank << ".dat";
    ofile2.open(ost.str().c_str(), ios::out);
#endif
#if PAIRCOR
    ofstream ofile4;
    ostringstream ost;
    ost << "paircorVMC" << myrank << ".dat";
    ofile4.open(ost.str().c_str(), ios::out);
#endif  
#if POSITION
    ofstream ofile5;
    ostringstream ost;
    ost << "position" << myrank << ".dat";
    ofile5.open(ost.str().c_str(), ios::out);
#endif


    //*************************  Thermalization  ******************************

    accepted = 0;

    initialize(alpha, beta); // Initialize the system


    for (i = 0; i < N_therm; i++) {

        for (int p = 0; p < numpart; p++) {// Loop over all particles

            // Calculate trial position
            trial_pos(p, alpha, beta);

            // Compute acceptance ratio
            rat = ratio(p, alpha, beta);

            // Check if move is accepted
            if (rat >= 1.0) { // accept if probability is greater
                accept(p, alpha, beta);
                accepted++;
            } else { // otherwise check against random number
                eps = ran3(&idum);
                if (eps < rat) {
                    accept(p, alpha, beta);
                    accepted++;
                } else
                    not_accept(p); // Do not accept
            }
        }
    }


    //********************** After thermalization *****************************


    if (N > 0) accepted = 0; // For function delta_opt()

    for (i = 0; i < N; i++) {

        for (int p = 0; p < numpart; p++) { // Loop over particles

            // Calculate trial position
            trial_pos(p, alpha, beta);

            // Compute acceptance ratio
            rat = ratio(p, alpha, beta);

            // Check if move is accepted
            if (rat >= 1.0) { // accept if probability is greater
                accept(p, alpha, beta);
                del_E = E_local(alpha, beta);
#if E_POT_KIN
                E_Pot_Kin(alpha, beta, del_Epot, del_Ekin);
#endif

#if MINIMIZE
                // part_psi(alpha, beta);
                part_psi_analytic(alpha, beta);
#endif  
                accepted++;
            } else { // otherwise check against random number

                eps = ran3(&idum);
                if (eps < rat) {
                    accept(p, alpha, beta);
                    del_E = E_local(alpha, beta);
#if E_POT_KIN
                    E_Pot_Kin(alpha, beta, del_Epot, del_Ekin);
#endif

#if MINIMIZE
                    //part_psi(alpha, beta);
                    part_psi_analytic(alpha, beta);
#endif   
                    accepted++;
                } else
                    not_accept(p); // Do not accept

            }

            // Updating statistics 
#if E_POT_KIN
            update_statistics(del_E, del_Epot, del_Ekin);
#else
            update_statistics(del_E);
#endif

        }

#if DENSITY
        if (i % 100 == 0) {
            for (int l = 0; l < numpart; l++) {
                ofile2 << sqrt(Trial->Pos->r(l)) << " ";
            }

            ofile2 << 1.0 << endl; // weight "1" for walker in VMC
        }
#endif
#if PAIRCOR
        if ((i % 500) == 0) {
            for (int h = 1; h < numpart; h++) {
                for (int m = 0; m < h; m++) {
                    ofile4 << Trial->Pos->r_int(m, h) << " ";
                }
            }

            ofile4 << 1 << endl;
        }

#endif
#if POSITION
        if ((i % 500) == 0)
            for (int part = 0; part < numpart; part++) {
                for (int m = 0; m < dim; m++) {
                    ofile5 << Trial->Pos->current(part, m) << " ";
                }
                ofile5 << endl;
            }
#endif

    }

    //cout << accepted/(N*numpart) << endl;

#if DENSITY
    ofile2.close();
#endif
#if PAIRCOR
    ofile4.close();
#endif
#if POSITION
    ofile5.close();
#endif


    return;

}
Ejemplo n.º 24
0
double Diffusion::call_RNG() {

    return ran3(&random_seed);
}
Ejemplo n.º 25
0
// Funcao principal
int main(void)
{
  // Variaveis de configuracao da rede
  char nameConfig [10];
  float valueConfig;
  int Training, NumPattern, NumInput, NumHidden, NumOutput;

  // Avisa sobre o treinamento da rede
  printf("\nLendo arquivo de configuracao.\n");

  // Arquivo de configuracoes
  CONFIG = fopen("config.ann","r");
  if (CONFIG == NULL)
  {
    printf("Falha ao abrir arquivo de configuracao.");
    return(1);
  }

  // Le as configuracoes
  Training = leConfigInt(CONFIG);
  NumPattern = leConfigInt(CONFIG);
  NumInput = leConfigInt(CONFIG);
  NumHidden = leConfigInt(CONFIG);
  NumOutput = leConfigInt(CONFIG);

////////////////////////////////////////////////////////////////////////
// Treinando uma rede
////////////////////////////////////////////////////////////////////////
  if (Training == 1)
  {
    // Avisa sobre o treinamento da rede
    printf("\nTreinando a rede!!!\n");

    // Arquivo de dados
    ENTRADA = fopen("trainInput.dat","r");
    if (ENTRADA == NULL)
    {
      fprintf(stderr,"ERRO: Falha ao abrir arquivo de entrada.\n\n");
      return(1);
    }
    OBJETIVO = fopen("trainOutput.dat","r");
    if (OBJETIVO == NULL)
    {
      fprintf(stderr,"ERRO: Falha ao abrir arquivo de objetivos.\n\n");
      return(1);
    }
    SAIDA = fopen("saidaTreina.dat","w+");
    if (SAIDA == NULL)
    {
      fprintf(stderr,"ERRO: Falha ao criar arquivo de saida.\n\n");
      return(1);
    }
    PESOS = fopen("pesos.dat","w+");
    if (PESOS == NULL)
    {
      fprintf(stderr,"ERRO: Falha ao criar arquivo de pesos.\n\n");
      return(1);
    }
    CURVAERRO = fopen("curvaErro.dat","w+");
    if (CURVAERRO == NULL)
    {
      fprintf(stderr,"ERRO: Falha ao criar arquivo de curva de erro.\n\n");
      return(1);
    }
    FULLERRO = fopen("fullErro.dat","w+");
    if (FULLERRO == NULL)
    {
      fprintf(stderr,"ERRO: Falha ao criar arquivo de erro completo.\n\n");
      return(1);
    }

    // Variaveis em uso
    int    *ranpat;
    int    i, j, k, p, np, op, epoca;
    long   seed, *idum;
    float  Temp;
    double *DeltaO, *SumDOW, *DeltaH;
    double **Input, **Target, **SumH, **Hidden, **SumO, **Output;
    double **WeightIH, **WeightHO, **DeltaWeightIH, **DeltaWeightHO;
    double Erro, Ece, Esse, Omega;
    int    MaxEpoca;
    double eta, alpha, smallwt, ErroStop;

    // Lendo as variaveis de treinamento da rede
    MaxEpoca = leConfigInt(CONFIG);
    eta = leConfigFloat(CONFIG);
    alpha = leConfigFloat(CONFIG);
    smallwt = leConfigFloat(CONFIG);
    ErroStop = leConfigFloat(CONFIG);
    seed = leConfigInt(CONFIG);

    // Declaracao variaveis dinamicas
    ranpat = malloc(NumPattern*sizeof(int));
    DeltaO = malloc(NumOutput*sizeof(double));
    SumDOW = malloc(NumHidden*sizeof(double));
    DeltaH = malloc(NumHidden*sizeof(double));
    Input = malloc(NumPattern*sizeof(double));
    Target = malloc(NumPattern*sizeof(double));
    SumH = malloc(NumPattern*sizeof(double));
    Hidden = malloc(NumPattern*sizeof(double));
    SumO = malloc(NumPattern*sizeof(double));
    Output = malloc(NumPattern*sizeof(double));
    for (i = 0; i < NumPattern; i++)
    {
        Input[i] = malloc(NumInput*sizeof(double));
        Target[i] = malloc(NumOutput*sizeof(double));
        SumH[i] = malloc(NumHidden*sizeof(double));
        Hidden[i] = malloc(NumHidden*sizeof(double));
        SumO[i] = malloc(NumOutput*sizeof(double));
        Output[i] = malloc(NumOutput*sizeof(double));
    }
    WeightIH = malloc(NumInput*sizeof(double));
    DeltaWeightIH = malloc(NumInput*sizeof(double));
    for (i = 0; i < NumInput; i++)
    {
        WeightIH[i] = malloc(NumHidden*sizeof(double));
        DeltaWeightIH[i] = malloc(NumHidden*sizeof(double));
    }
    WeightHO = malloc(NumHidden*sizeof(double));
    DeltaWeightHO = malloc(NumHidden*sizeof(double));
    for (i = 0; i < NumHidden; i++)
    {
        WeightHO[i] = malloc(NumOutput*sizeof(double));
        DeltaWeightHO[i] = malloc(NumOutput*sizeof(double));
    }

    // Le os dados dos arquivos
    for (i = 0; i < NumPattern; i++)
    {
      for (j = 0; j < NumInput; j++)
      {
        fscanf(ENTRADA, "%f", &Temp);
        Input[i][j] = (double)Temp;
      }
      for (j = 0; j < NumOutput; j++)
      {
        fscanf(OBJETIVO, "%f", &Temp);
        Target[i][j] = (double)Temp;
      }
    }

    // Semente geradora de numeros aleatorios
    idum = &seed;

    // Inicializa WeightIH e DeltaWeightIH
    for(j = 0; j < NumHidden; j++)
    {
      for(i =0; i < NumInput; i++)
      {
        DeltaWeightIH[i][j] = 0.0;
        WeightIH[i][j] = 2.0 * ( ran3(idum) - 0.5 ) * smallwt;
      }
    }

    // Inicializa WeightHO e DeltaWeightHO
    for(k = 0; k < NumOutput; k ++)
    {
      for(j = 0; j < NumHidden; j++)
      {
        DeltaWeightHO[j][k] = 0.0;
        WeightHO[j][k] = 2.0 * ( ran3(idum) - 0.5 ) * smallwt;
      }
    }

    // Laco de repeticao para atualizacao dos pesos
    for(epoca = 0; epoca < MaxEpoca; epoca++)
    {

      // Ordenacao e apresentacao aleatoria de individuos
      for(p = 0; p < NumPattern; p++)
      {
        ranpat[p] = p;
      }
      for(p = 0; p < NumPattern; p++)
      {
        np = p + ran3(idum) * ( NumPattern - p );
        op = ranpat[p];
        ranpat[p] = ranpat[np];
        ranpat[np] = op;
      }

      // Inicializa as variaveis de erro
      Erro = 0.0;
      Ece = 0.0;
      Esse = 0.0;
      Omega = 0.0;

      // Laco de repeticao para todos os padroes de treinamento repeat for all the training patterns */
      for(np = 0; np < NumPattern; np++)
      {
        p = ranpat[np];
        // Calcula a ativacao das unidades escondidas
        for(j = 0; j < NumHidden; j++)
        {
          SumH[p][j] = WeightIH[0][j];
          for(i = 0; i < NumInput; i++)
          {
            SumH[p][j] += Input[p][i] * WeightIH[i][j];
          }
          Hidden[p][j] = 1.0/(1.0 + exp(-SumH[p][j]));
        }

        // Calcula a ativacao das unidades de saida e erros
        for(k = 0; k < NumOutput; k++)
        {
          SumO[p][k] = WeightHO[0][k];
          for(j = 0; j < NumHidden; j++)
          {
            SumO[p][k] += Hidden[p][j] * WeightHO[j][k];
          }
          // Saida Sigmoidal
          Output[p][k] = 1.0/(1.0 + exp(-SumO[p][k]));
          // Saida Linear
  //      Output[p][k] = SumO[p][k];
          // Calcula os erros
          // Soma dos Erros Quadraticos (SSE)
          Esse += 0.5 * (Target[p][k] - Output[p][k]) * (Target[p][k] - Output[p][k]);
          // Erro de Entropia-Cruzada (Cross-Entropy Error)
  //      Ecc -= ( Target[p][k] * log( Output[p][k] ) + ( 1.0 - Target[p][k] ) * log( 1.0 - Output[p][k] ) );
          // Saida Sigmoidal, SSE
          DeltaO[k] = (Target[p][k] - Output[p][k]) * Output[p][k] * (1.0 - Output[p][k]);
          // Saida Sigmoidal, Erro de Entropia-Cruzada
  //      DeltaO[k] = Target[p][k] - Output[p][k];
          // Saida Lineatreinamentor, SSE
  //      DeltaO[k] = Target[p][k] - Output[p][k];
        }

        //Nao faz a ultima das ultimas atualizacoes
        if ((epoca == MaxEpoca-1) && (np == NumPattern-1))
        {
          fprintf(stdout,"\n\nFim do treinamento.");
          break;
        }

        // Erro quadradico medio
        Erro = Esse;

        // Atualiza para tras os erros na camada escondida ('Back-propagate' errors to hidden layer)
        for(j = 0; j < NumHidden; j++)
        {
          SumDOW[j] = 0.0;
          for(k = 0; k < NumOutput; k++)
          {
            SumDOW[j] += WeightHO[j][k] * DeltaO[k];
          }
          DeltaH[j] = SumDOW[j] * Hidden[p][j] * (1.0 - Hidden[p][j]);
        }

        // Atualiza os pesos em WeightIH
        for(j = 0; j < NumHidden; j++)
        {
          DeltaWeightIH[0][j] = eta * DeltaH[j] + alpha * DeltaWeightIH[0][j];
          WeightIH[0][j] += DeltaWeightIH[0][j];
          for(i = 0; i < NumInput; i++)
          {
            // Ajuste do peso
//          DeltaWeightIH[i][j] = eta * Input[p][i] * DeltaH[j] + alpha * DeltaWeightIH[i][j];
            DeltaWeightIH[i][j] = eta * Input[p][i] * DeltaH[j];
            WeightIH[i][j] += DeltaWeightIH[i][j];
          }
        }

        // Atualiza os pesos em WeightHO
        for(k = 0; k < NumOutput; k ++)
        {
          DeltaWeightHO[0][k] = eta * DeltaO[k] + alpha * DeltaWeightHO[0][k];
          WeightHO[0][k] += DeltaWeightHO[0][k];
          for(j = 0; j < NumHidden; j++)
          {
            // Ajuste do peso
//          DeltaWeightHO[j][k] = eta * Hidden[p][j] * DeltaO[k] + alpha * DeltaWeightHO[j][k];
            DeltaWeightHO[j][k] = eta * Hidden[p][j] * DeltaO[k];
            WeightHO[j][k] += DeltaWeightHO[j][k];
          }
        }

      } // Fim do laco de repeticao para os padroes de treinamento

      if(epoca%100 == 0)
      {
        fprintf(stdout, "\nEpoca %-5d :   Erro = %f", epoca, Erro);
        fprintf(CURVAERRO, "%f\n", Erro);
      }
      fprintf(FULLERRO, "%f\n", Erro);

      // Criterio de parada para quando 'aprender o suficiente'
      if(Erro < ErroStop)
      {
        fprintf(CURVAERRO, "%f\n", Erro);
        break;
      }

    } // Fim do laco de atualizacao dos pesos

    // Imprime os resultado de saida da rede
    fprintf(stdout, "\n\nDados finais do treinamento -> Epoca = %d; Erro = %f", epoca, Erro);
    for(p = 0; p < NumPattern; p++)
    {
      for(k = 0; k < NumOutput; k++)
      {
        fprintf(SAIDA, "%f ", Output[p][k]);
      }
      fprintf(SAIDA, "\n");
    }
    fprintf(stdout, "\n\nFinalizado!\n\n");

    // Salva os pesos WeightIH
    for(i = 0; i < NumInput; i++)
    {
      for(j = 0; j < NumHidden; j++)
      {
        fprintf(PESOS, "%3.40f ", WeightIH[i][j]);
      }
      fprintf(PESOS, "\n");
    }

      // Salva os pesos WeightHO
    for(j = 0; j < NumHidden; j++)
    {
      for(k = 0; k < NumOutput; k ++)
      {
        fprintf(PESOS, "%3.40f ", WeightHO[j][k]);
      }
      fprintf(PESOS, "\n");
    }

    // Fecha o arquivo
    fclose(CONFIG);
    fclose(ENTRADA);
    fclose(OBJETIVO);
    fclose(SAIDA);
    fclose(PESOS);
    fclose(CURVAERRO);
    fclose(FULLERRO);

    // Desalocando variaveis dinamicas
    free(ranpat);
    free(DeltaO);
    free(SumDOW);
    free(DeltaH);
    for (i = 0; i < NumPattern; i++)
    {
        free(Input[i]);
        free(Target[i]);
        free(SumH[i]);
        free(Hidden[i]);
        free(SumO[i]);
        free(Output[i]);
    }
    free(Input);
    free(Target);
    free(SumH);
    free(Hidden);
    free(SumO);
    free(Output);
    for (i = 0; i < NumInput; i++)
    {
        free(WeightIH[i]);
        free(DeltaWeightIH[i]);
    }
    free(WeightIH);
    free(DeltaWeightIH);
    for (i = 0; i < NumHidden; i++)
    {
        free(WeightHO[i]);
        free(DeltaWeightHO[i]);
    }
    free(WeightHO);
    free(DeltaWeightHO);
  }

////////////////////////////////////////////////////////////////////////
// Aplicacando uma rede ja treinada
////////////////////////////////////////////////////////////////////////
  else
  {
    // Avisa sobre a aplicacao da rede
    printf("\nAplicando a rede!!!\n\n");
    
    // Arquivo de dados
    ENTRADA = fopen("aplicInput.dat","r");
    if (ENTRADA == NULL)
    {
      fprintf(stderr,"ERRO: Falha ao abrir arquivo de entrada.\n\n");
      return(1);
    }
    SAIDA = fopen("saidaAplic.dat","w+");
    if (SAIDA == NULL)
    {
      fprintf(stderr,"ERRO: Falha ao criar arquivo de saida.\n\n");
      return(1);
    }
    PESOS = fopen("pesos.dat","r");
    if (PESOS == NULL)
    {
      fprintf(stderr,"ERRO: Falha ao abrir arquivo de pesos da rede.\n\n");
      return(1);
    }

    // Variaveis em uso
    int    i, j, k, p;
    float  Temp;
    double **Input, **SumH, **Hidden, **SumO, **Output, **WeightIH, **WeightHO;

    // Declaracao variaveis dinamicas
    Input = malloc(NumPattern*sizeof(double));
    SumH = malloc(NumPattern*sizeof(double));
    Hidden = malloc(NumPattern*sizeof(double));
    SumO = malloc(NumPattern*sizeof(double));
    Output = malloc(NumPattern*sizeof(double));
    for (i = 0; i < NumPattern; i++)
    {
        Input[i] = malloc(NumInput*sizeof(double));
        SumH[i] = malloc(NumHidden*sizeof(double));
        Hidden[i] = malloc(NumHidden*sizeof(double));
        SumO[i] = malloc(NumOutput*sizeof(double));
        Output[i] = malloc(NumOutput*sizeof(double));
    }
    WeightIH = malloc(NumInput*sizeof(double));
    for (i = 0; i < NumInput; i++)
    {
        WeightIH[i] = malloc(NumHidden*sizeof(double));
    }
    WeightHO = malloc(NumHidden*sizeof(double));
    for (i = 0; i < NumHidden; i++)
    {
        WeightHO[i] = malloc(NumOutput*sizeof(double));
    }

    // Le os dados dos arquivo de entrada para aplicacao
    for (p = 0; p < NumPattern; p++)
    {
      for (i = 0; i < NumInput; i++)
      {
        fscanf(ENTRADA, "%f", &Temp);
        Input[p][i] = (double)Temp;
      }
      //fprintf(stdout,"%f %f %f\n",Input[p][0],Input[p][1],Input[p][2]);
    }

    // Carrega WeightIH
    for(i = 0; i < NumInput; i++)
    {
      for(j =0; j < NumHidden ; j++)
      {
        fscanf(PESOS, "%f", &Temp);
        WeightIH[i][j] = (double)Temp;
      }
      //fprintf(stdout,"%f %f %f\n",WeightIH[i][0],WeightIH[i][1],WeightIH[i][2]);
    }

    // Carrega WeightHO
    for(j = 0; j < NumHidden; j ++)
    {
      for(k = 0; k < NumOutput; k++)
      {
        fscanf(PESOS, "%f", &Temp);
        WeightHO[j][k] = (double)Temp;
      }
      //fprintf(stdout,"%f %f \n",WeightHO[j][0],WeightHO[j][1]);
    }

    // Laco de repeticao para todos os padroes
    for(p = 0; p < NumPattern; p++)
    {
      // Calcula a ativacao das unidades escondidas
      for(j = 0; j < NumHidden; j++)
      {
        SumH[p][j] = WeightIH[0][j];
        for(i = 0; i < NumInput; i++)
        {
          SumH[p][j] += Input[p][i] * WeightIH[i][j];
        }
        Hidden[p][j] = 1.0/(1.0 + exp(-SumH[p][j]));
      }

      // Calcula a ativacao das unidades de saida
      for(k = 0; k < NumOutput; k++)
      {
        SumO[p][k] = WeightHO[0][k];
        for(j = 0; j < NumHidden; j++)
        {
          SumO[p][k] += Hidden[p][j] * WeightHO[j][k];
        }
        // Saida Sigmoidal
        Output[p][k] = 1.0/(1.0 + exp(-SumO[p][k]));
        // Saida Linear
//      Output[p][k] = SumO[p][k];
        //Grava o resultado em arquivo
        fprintf(SAIDA, "%f ", Output[p][k]);
      }
      fprintf(SAIDA, "\n");
    }

    // Fecha o arquivo
    fclose(CONFIG);
    fclose(ENTRADA);
    fclose(SAIDA);
    fclose(PESOS);

    // Desalocando variaveis dinamicas
    for (i = 0; i < NumPattern; i++)
    {
        free(Input[i]);
        free(SumH[i]);
        free(Hidden[i]);
        free(SumO[i]);
        free(Output[i]);
    }
    free(Input);
    free(SumH);
    free(Hidden);
    free(SumO);
    free(Output);
    for (i = 0; i < NumInput; i++)
    {
        free(WeightIH[i]);
    }
    free(WeightIH);
    for (i = 0; i < NumHidden; i++)
    {
        free(WeightHO[i]);
    }
    free(WeightHO);
  }

  return (0);

} // Fim da funcao principal
Ejemplo n.º 26
0
REAL cElRanGen::cNRrandom3 ()
{
     double r =  ran3(&idum);
     return ElMin(r,0.999999);
}
Ejemplo n.º 27
0
int main (int argc,char *argv[])
{
    int             number, j, i, l, k, n, mid, iter;
    long          idem=-1;
    char          arg[64], outfn[64], *eqptr;
    double     *data, *olddata, h, h1, h2, p, lr, dt, dp, rp;

    if( argc > 1 )
    {
        for(i = 1; i < argc; i++)
	{
	    eqptr = strchr(argv[i], '=');
	    strcpy(arg, eqptr + 1);
	    switch(argv[i][0])
	    {
	       case 'p':
	       case 'P':
                  p = atof(arg);
		  break;
	       case 'n':
	       case 'N':
		  number = atoi(arg);
		  break;
	       case 'h':
	       case 'H':
		  h = atof(arg);
		  break;
	       case 'o':
	       case 'O':
	          strcpy(outfn, arg);
		  break;
	    }
	}
    }
    printf("Input parameters: %f  %d  %f  %s\n", p, number, h, outfn);
    /* We need number to be odd so it easier to devide to 2 */
    if(number%2==0) number++;
    data=(double*)malloc(sizeof(double));
    olddata=(double*)malloc(sizeof(double));
    if( (data==NULL)&&(olddata==NULL) )
    {
          printf("Could not allocate memory!\n");
	  exit(-1); 
    }
    
    /* Estimate number of iteration needed to get number points */
    iter=(int)(log2(number));
   
    if(argc>0) /* enough arguments? */
    {
        olddata[0]=h;
        for(i=1; i<=iter; i++)
	{
	      printf("Iteration %d . ", i);
	      n=(int)pow(2.0, (double)i);
	      mid=n/2; k=0;
	      data=realloc(data, n*sizeof(double));
	      for(j=0; j<mid; j++)
	      {
	            idem=(long)(-ABSV(ran3(&idem)));
		    lr=ran3(&idem); 
		    dp=gasdev((int*)(&idem)); rp=p+(dp/10);
		    cascade(olddata[j], &h1, &h2, rp, lr);
		    /*printf("%.3f %3.3f %3.3f |",rp,h1,h2);*/
		    data[2*j]=h1; data[2*j+1]=h2;
		    if((j>0)&&(j%(10000*mid)==0)) printf(". ");
	      }
	      printf("completed\n");
	      olddata=(double*)realloc(olddata, n*sizeof(double));
	      for(j=0; j<n; j++) olddata[j]=data[j];
	}

	/* Now write this out */
	out = fopen(outfn, "w");
	dt=1/number;
	for(i=0; i <number; i++) fprintf(out,"%f %f\n", (double)i, data[i]);
    }
    fclose(out);
    free(data); free(olddata);
    return (0); /* for lint formalities */

}
Ejemplo n.º 28
0
/*******************************************************************
 * 
 * NAME :               mc_sampling( int cycles, double step_length, 
 *                      int& accepted, double& energy, 
 *                      double& energy_sq)
 *
 * DESCRIPTION :        Coming
 */
void MC_Brute_Force::mc_sampling(int cycles, double step_length, int& accepted, double& energy, double& energy_sq) {
    double R;
    double delta_e, loc_energy, loc_energy_sq;

    int n_particles = wf->getNParticles();
    int dim = wf->getDim();

    // Initiating variables
    loc_energy = 0;
    loc_energy_sq = 0;
    delta_e = 0;
    accepted = 0;

    // Initial position of the electrons
    mat r_old = zeros(n_particles, dim);
    mat r_new = zeros(n_particles, dim);

    for (int i = 0; i < n_particles; i++)
        for (int j = 0; j < dim; j++)
            r_old(i, j) = r_old(i, j) + step_length * (ran3(&idum) - 0.5);

    r_new = r_old;

    // Evalutating the Quantum Force and Wave Function in the inital position.
    wf->set_r_new(r_old, 0);
    wf->init_slater();
    wf->accept_move();

    //cout << "MC_cycles = " << cycles << endl;
    //cout << "Thermalization = " << thermalization << endl;

    // Monte Carlo cycles
    for (int sample = 0; sample < (cycles + thermalization); sample++) {

        // Looping over all particles.   
        for (int active = 0; active < n_particles; active++) {

            // Calculating new trial position.
            for (int i = 0; i < dim; i++) {
                r_new(active, i) = r_old(active, i) + step_length * (ran3(&idum) - 0.5);
            }
            // Evaluating the Wave Function in r_new.
            wf->set_r_new(r_new, active);
            wf->evaluate_new();

            // Metropolis acceptance test.
            R = wf->get_ratio();
            R = R * R;

            if (ran3(&idum) <= R) {
                r_old = r_new;
                wf->accept_move();

                if (sample > thermalization) {
                    accepted++;
                }
            } else {
                // If the move is not accepted the position is reset.
                r_new = r_old;
            }

            // Computing the local energy
            if (sample > thermalization) {
                delta_e = ht->get_energy(r_old);
                energy += delta_e;
                energy_sq += delta_e*delta_e;
            }
        } // End p - particles.
    } // End MC cycles.

    // Computing the total energy
    energy = energy / cycles / n_particles;
    energy_sq = energy_sq / cycles / n_particles;
    accepted /= n_particles;
}
Ejemplo n.º 29
0
Archivo: qvmc.cpp Proyecto: sarahr/QVMC
/**
 * The VMC algorithm adapted to the blocking procedure
 * @param N - Number of MC cycles
 * @param N_therm - Number of thermalization steps
 * @param alpha - first variational parameter
 * @param beta - second variational parameter
 * @return local energies of all samples
 */
vec VMC::run_algo_blocking(int N, int N_therm, double alpha, double beta) {

    int i;
    double rat, eps;
    double del_E = 0;
    vec all_energies(N * numpart);


    // ************************* Thermalization ******************************

    accepted = 0;

    initialize(alpha, beta); // Initialize the system

    for (i = 0; i < N_therm; i++) {

        for (int p = 0; p < numpart; p++) {// Loop over all particles

            // Calculate trial position
            trial_pos(p, alpha, beta);

            // Compute acceptance ratio
            rat = ratio(p, alpha, beta);

            // Check if move is accepted
            if (rat >= 1.0) { // accept if probability is greater
                accept(p, alpha, beta);
                accepted++;
            } else { // otherwise check against random number

                eps = ran3(&idum);
                if (eps < rat) {
                    accept(p, alpha, beta);
                    accepted++;
                } else
                    not_accept(p); // Do not accept
            }
        }
    }


    // *********************** After thermalization  **************************

    if (N > 0) accepted = 0; // For function delta_opt()

    for (i = 0; i < N; i++) {

        for (int p = 0; p < numpart; p++) { // Loop over all particles

            // Calculate trial position
            trial_pos(p, alpha, beta);

            // Compute acceptance ratio
            rat = ratio(p, alpha, beta);

            // Check if move is accepted
            if (rat >= 1.0) { // accept if probability is greater
                accept(p, alpha, beta);
                del_E = E_local(alpha, beta);
                accepted++;
            } else { // otherwise check against random number

                eps = ran3(&idum);
                if (eps < rat) {
                    accept(p, alpha, beta);
                    del_E = E_local(alpha, beta);
                    accepted++;
                } else
                    not_accept(p);

            }

            // Collecting the local energies for blocking
            all_energies(i) = del_E;

        }
    }

    return all_energies;

}