int main(int argc, char * argv[])
{
    int X=atoi(argv[1]);
    int Y=X;
    double ** A=malloc2D(X,Y);
    int i,j,k;
    double l;
    struct timeval ts,tf;
    double total_time;
	
	init2D(A,X,Y);
	
	gettimeofday(&ts,NULL);
	for (k=0;k<X-1;k++)
		for (i=k+1;i<X;i++) {
			l=A[i][k]/A[k][k];
			for (j=k;j<Y;j++)		
				A[i][j]-=l*A[k][j];
		}
	gettimeofday(&tf,NULL);
	total_time=(tf.tv_sec-ts.tv_sec)+(tf.tv_usec-ts.tv_usec)*0.000001;
	printf("LU-Serial\t%d\t%.3lf\n",X,total_time);
    	char * filename="output_serial";
    	print2DFile(A,X,Y,filename);
	return 0;
}
Esempio n. 2
0
int main(int argc, char **argv){
    int x = atoi(argv[1]);
    int y = x;
    
    double ** A = malloc2D(x, y);
    init2D(A, x, y);
    
    print2DFile(A, x, y, argv[2]);
    free2D(A, x, y);
    return 0;
}
Esempio n. 3
0
static CartesianGrid * cartesian_grid_new (guint nx, guint ny)
{
  CartesianGrid * grid;

  grid = g_malloc (sizeof (CartesianGrid));
  grid->vertices = (GtsVertex ***) malloc2D (nx, ny, sizeof (GtsVertex *));
  grid->nx = nx;
  grid->ny = ny;
  grid->xmin = G_MAXDOUBLE;
  grid->xmax = - G_MAXDOUBLE;
  grid->ymin = G_MAXDOUBLE;
  grid->ymax = - G_MAXDOUBLE;

  return grid;
}
Esempio n. 4
0
/*------------------------ main() ---------------------*/
int main()
{
    char fileout[NCMAX], cline[NCMAX];
    const char version[] = "06-apr-2013 (ejk)";
    int ix, iy, nx, ny, ixmid, iymid, i, ismoth, npixels, ns,
        done, status, multiMode, NPARAM;
    float rmin, rmax, aimin, aimax, p2;
    float *param, pixr, pixi, **pixsq;
    double k2max, keV, wavlen, ax, by, rx, ry,
        rx2, ry2, pi, dx, dy, scale, scale2, pixel,
        Cs3, Cs5, df, sum, time, apert;
    double  x;

    cfpix cpix;

    floatTIFF myFile;

/*  Echo version date etc.  */
    printf( "probe version dated %s\n", version );
    printf("Copyright (C) 1998-2013 Earl J. Kirkland\n" );
    printf( "This program is provided AS-IS with ABSOLUTELY NO WARRANTY\n "
        " under the GNU general public license\n\n" );

#ifdef MANY_ABERR
    printf( "calculate a focused probe wave function including multiple aberr.\n\n");
#else
    printf( "calculate a focused probe wave function\n\n");
#endif

    pi = 4.0 * atan( 1.0 );

    /*  memory to store parameters */
    NPARAM = myFile.maxParam();
    param = (float*) malloc1D( NPARAM, sizeof(float), "probe-param" );
    for( i=0; i<NPARAM; i++) param[i] = 0.0F;

/* ---- Get desired image size, parameters etc. ------------- */

    printf("Name of file to get focused probe wave function:\n");
    ns = scanf("%s", fileout );

    printf("Desired size of output image in pixels Nx,Ny:\n");
    ns = scanf("%d %d", &nx, &ny );

    printf("Size of output image in Angstroms ax,by:\n");
    ns = scanf("%lf %lf", &ax, &by );

    printf("Probe parameters, V0(kv), Cs3(mm), Cs5(mm),"
           " df(Angstroms), apert(mrad):\n");
    ns = scanf("%lg %lg %lg %lg %lg",
          &keV, &Cs3, &Cs5, &df, &apert );
    param[pDEFOCUS] = (float) df;
    param[pCS]  = (float) ( Cs3*1.0e7 );
    param[pCS5] = (float) ( Cs5*1.0e7 );

    printf("Type 1 for smooth aperture:\n");
    ns = scanf("%d", &ismoth );

    printf("Probe position x,y in Ang.:\n");
    ns = scanf("%lf %lf", &dx, &dy );

#ifdef MANY_ABERR
    /*   get higher order aberrations if necessary */
    printf("type higher order aber. name (as C32a, etc.) followed\n"
        " by a value in mm. (END to end)\n");
    done = multiMode = 0;
    do{
        ns = scanf( "%20s", cline );
        if( strstr( cline, "END" ) != NULL ) {
            done = 1;
        } else {
            ns = scanf( "%lg", &x );
            /* printf("%s, %f\n", cline, x );  testing */
            status = readCnm( cline, param, x );        
            if( status < 0 ) {
                printf( "unrecognized aberration, exit...\n");
                exit( EXIT_SUCCESS );
            } else multiMode = 1;
        }
    } while( !done );

#endif

/* ------- Calculate misc constants ------------ */

    time = cputim( );
    
    rx  = 1.0/ax;
    rx2 = rx * rx;
    ry  = 1.0/by;
    ry2 = ry * ry;
    
    ixmid = nx/2;
    iymid = ny/2;
    
    wavlen = wavelength( keV );
    printf("electron wavelength = %g Angstroms\n", wavlen);

    k2max = apert*0.001/wavlen;
    k2max = k2max * k2max;

/* ------- allocate memory ------------ */

    pixsq = (float**) malloc2D( nx, ny, sizeof(float), "pixsq" );

    kx   = (float*) malloc1D( nx, sizeof(float), "kx" );
    kx2  = (float*) malloc1D( nx, sizeof(float), "kx2" );
    xpos = (float*) malloc1D( nx, sizeof(float), "xpos" );
    freqn( kx, kx2, xpos, nx, ax );

    ky   = (float*) malloc1D( ny, sizeof(float), "ky" );
    ky2  = (float*) malloc1D( ny, sizeof(float), "ky2" );
    ypos = (float*) malloc1D( ny, sizeof(float), "ypos" );
    freqn( ky, ky2, ypos, ny, by );

    cpix.resize( nx, ny );
    cpix.init( 1 );        //  only fast init and slow execution  needed here

    /* --------- calculate probe wavefunction -------- */
    pixel = ( rx2 + ry2 );
    npixels = makeProbe( cpix, nx, ny, dx, dy, 
        param, wavlen, k2max, pixel, multiMode, ismoth );
    printf("there were %d pixels inside the aperture\n", npixels );

    /* -----  copy back for output ----- */
    sum = 0.0;
    myFile.resize( 2*nx, ny);
    myFile.setnpix( 2 );
     for( ix=0; ix<nx; ix++)
    for( iy=0; iy<ny; iy++) {
        myFile(ix,iy)    = pixr = cpix.re(ix,iy);  // real
        myFile(ix+nx,iy) = pixi = cpix.im(ix,iy);  // imag
        pixsq[ix][iy] = p2 = pixr*pixr + pixi*pixi;
        sum +=  p2;
    }

/* ----- Normalize probe intensity to unity ------------ */

    scale = 1.0 / sum;
    scale = scale * ((double)nx) * ((double)ny);
    scale = (double) sqrt( scale );
    scale2 = scale*scale;

    for( ix=0; ix<nx; ix++) 
       for( iy=0; iy<ny; iy++) {
        myFile(ix,iy)    *= (float) scale;
        myFile(ix+nx,iy) *= (float) scale;
        pixsq[ix][iy]    *= (float) scale2;
    }

/*------- Output results and find min and max to echo ---------------
*/
    rmin  = myFile.min(0);    // real part
    rmax  = myFile.max(0);
    aimin = myFile.min(1);   // imaginary
    aimax = myFile.max(1);

    param[pRMAX] = rmax;
    param[pIMAX] = aimax;
    param[pRMIN] = rmin;
    param[pIMIN] = aimin;
    param[pDEFOCUS]= (float) df;
    param[pDX]= (float) (ax / nx);
    param[pDY]= (float) (by / ny);
    param[pENERGY]= (float) keV;
    param[pWAVEL]= (float) ( sqrt(k2max) * wavlen);
    param[pCS]= (float) Cs3;
    param[pPPOSX]= (float) dx;
    param[pPPOSX]= (float) dy;

    for( i=0; i<NPARAM; i++) myFile.setParam( i, param[i] );  // not very efficient

    if( myFile.write( fileout, rmin, rmax, aimin, aimax, param[pDX], param[pDY] ) != 1 )
        printf( "probe cannot write an output file.\n");

    printf( "Pix range %15.7g to %15.7g real,\n"
        "      and %15.7g to %15.7g imaginary\n",
        rmin, rmax, aimin, aimax );

/*------- calculate probe size ---------------*/

    x = prbSize( pixsq, nx, ny, dx, dy, ax, by );
    printf("probe size (FWHM-II) = %g Ang.\n", x);

/*------- exit ---------------*/

    time = cputim() - time;
    printf("\nCPU time = %f sec\n", time );

    return EXIT_SUCCESS;

}  /* end main() */
int main (int argc, char * argv[]) {
    int rank,size;
    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&size);
    MPI_Comm_rank(MPI_COMM_WORLD,&rank);

    int X,Y,x,y,X_ext,i,j,k;
    double ** A, ** localA, l, *msg;
    X=atoi(argv[1]);
    Y=X;

    //Extend dimension X with ghost cells if X%size!=0
    if (X%size!=0)
        X_ext=X+size-X%size;
    else
        X_ext=X;

    if (rank==0) {
        //Allocate and init matrix A
        A=malloc2D(X_ext,Y);
        init2D(A,X,Y);
    }
      
    //Local dimensions x,y
    x=X_ext/size;
    y=Y;
    

    //Allocate local matrix and scatter global matrix
    localA=malloc2D(x,y);
    double * idx;
    if (rank==0) 
        idx=&A[0][0];
    MPI_Scatter(idx,x*y,MPI_DOUBLE,&localA[0][0],x*y,MPI_DOUBLE,0,MPI_COMM_WORLD);
 
   if (rank==0) {
        free2D(A,X_ext,Y);
    }

    //Timers   
    struct timeval ts,tf,comps,compf,comms,commf;
    double total_time=0,computation_time=0,communication_time=0;

    MPI_Barrier(MPI_COMM_WORLD);
    gettimeofday(&ts,NULL);        

    /******************************************************************************
     The matrix A is distributed in contiguous blocks to the local matrices localA
     You have to use point-to-point communication routines    
     Don't forget to set the timers for computation and communication!
    ******************************************************************************/

//********************************************************************************
	msg = malloc(y * sizeof(double));
	int tag =55, dest, dif, srank;
	MPI_Status status;
	MPI_Request request;

    	for(k = 0; k < X - 1; k++){
		// if is owner_of_pivot_line(k) - x*rank <= k < x*(rank+1)
		if ( ( x*rank <= k ) && ( k < (x * (rank + 1)) ) ) {
			//pack_data(lA, send_buffer);
			memcpy(msg, localA[ k%x ], y * sizeof(double) );
			//send_data_to_all
			for(dest=0;dest<size;dest++) {
				if ((dest==rank) || (dest<rank))
					continue;
				gettimeofday(&comms,NULL);
				MPI_Send(msg,y,MPI_DOUBLE,dest,tag,MPI_COMM_WORLD);
				gettimeofday(&commf,NULL);
				communication_time+=commf.tv_sec-comms.tv_sec+(commf.tv_usec-comms.tv_usec)*0.000001;
			}			
		}
		else {
			//receive_data_from_owner
			//unpack_data(receive_buffer, lA);
			srank = k / x;
			if ((rank<srank) || (rank==srank)) 
				continue;
			gettimeofday(&comms,NULL);
			MPI_Recv(msg,y,MPI_DOUBLE,srank,tag,MPI_COMM_WORLD,&status);
			gettimeofday(&commf,NULL);
			communication_time+=commf.tv_sec-comms.tv_sec+(commf.tv_usec-comms.tv_usec)*0.000001;
		}
		
		//compute(k, lA);
		gettimeofday(&comps,NULL);
		if ( k < ( x * (rank + 1) - 1 ) ) {
			dif = ( x * (rank + 1) - 1 ) - k;
			if (dif > x) 
				dif = x;

			for ( i = x - dif; i < x; i++ ) {
				l = localA[i][k] / msg[k];
				for ( j=k; j<y; j++ )		
					localA[i][j] -= l * msg[j];
			}
		}
		gettimeofday(&compf,NULL);
		computation_time+=compf.tv_sec-comps.tv_sec+(compf.tv_usec-comps.tv_usec)*0.000001;
	}
	
	free(msg);
	
	MPI_Barrier(MPI_COMM_WORLD);
//********************************************************************************

    gettimeofday(&tf,NULL);
    total_time=tf.tv_sec-ts.tv_sec+(tf.tv_usec-ts.tv_usec)*0.000001;


    //Gather local matrices back to the global matrix
    if (rank==0) {
        A=malloc2D(X_ext,Y);    
        idx=&A[0][0];
    }
    MPI_Gather(&localA[0][0],x*y,MPI_DOUBLE,idx,x*y,MPI_DOUBLE,0,MPI_COMM_WORLD);
    
    double avg_total,avg_comp,avg_comm,max_total,max_comp,max_comm;
    MPI_Reduce(&total_time,&max_total,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD);
    MPI_Reduce(&computation_time,&max_comp,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD);
    MPI_Reduce(&communication_time,&max_comm,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD);
    MPI_Reduce(&total_time,&avg_total,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
    MPI_Reduce(&computation_time,&avg_comp,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
    MPI_Reduce(&communication_time,&avg_comm,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);

    avg_total/=size;
    avg_comp/=size;
    avg_comm/=size;

    if (rank==0) {
        printf("LU-Block-p2p\tSize\t%d\tProcesses\t%d\n",X,size);
        printf("Max times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",max_total,max_comp,max_comm);
        printf("Avg times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",avg_total,avg_comp,avg_comm);
    }

    //Print triangular matrix U to file
    if (rank==0) {
        char * filename="output_block_p2p";
        print2DFile(A,X,Y,filename);
    }


    MPI_Finalize();

    return 0;
}
Esempio n. 6
0
int main()
{
    char filein[NCMAX], fileout[NCMAX], filestart[NCMAX],
        filebeam[NCMAX], description[NCMAX],
        filecross[NCMAX], cline[NCMAX];
  
    const char version[] = "21-nov-2012 (ejk)";

    int lstart=0, lpartl=0, lbeams=0, lwobble=0, lcross=0, nwobble=1;
    int ix, iy, iz, nx, ny, nz, nzout, ixmid, iymid, i, nslic0, islice,
        nacx,nacy, iqx, iqy, iwobble, ndf, idf, nbout, ib,
        ncellx, ncelly, ncellz, iycross, ns, NPARAM;
    int *hbeam, *kbeam;
    int natom, *Znum, *Znum2, istart, na, done, status, multiMode;
    long nbeams, nillum;
    long  ltime;
    unsigned long iseed;
    long32 nxl, nyl;   /*  tiffsubs 32 bit integer type */

    float *x, *y, *z, *occ, *wobble;
    float *x2, *y2, *z2, *occ2;
   
    float wmin, wmax, xmin,xmax, ymin, ymax, zmin, zmax;

    float *kx, *ky, *xpos, *ypos, *param, *sparam;
    float k2, k2max, scale, v0, mm0, wavlen, rx, ry,
        ax, by, cz, pi, rmin, rmax, aimin, aimax,
        rx2,ry2, ctiltx, ctilty, tctx, tcty,
        acmin, acmax, Cs3, Cs5, df, df0, sigmaf, dfdelt, aobj,
        qx, qy, qy2, q2, q2min, q2max, sumdf, pdf, k2maxo,
        temperature, ycross, dx, dy;

    float tr, ti, wr, wi;

    float **wave0r, **wave0i, **pix, **depthpix,
        *propxr, *propxi, *propyr, *propyi;

    cfpix wave;            /* complex probe wave functions */
    cfpix trans;           /* complex transmission functions */
    cfpix temp ;           /* complex scratch wavefunction */

    double sum, timer, xdf, chi0, chi1, chi2, chi3, t, zslice,
        deltaz, phirms, rsq, vz, alx, aly;
        
    FILE *fp1;

    floatTIFF myFile;

/*  echo version date and get input file name */

    printf("autoslic(e) version dated %s\n", version);
    printf("Copyright (C) 1998-2012 Earl J. Kirkland\n" );
    printf( "This program is provided AS-IS with ABSOLUTELY NO WARRANTY\n "
        " under the GNU general public license\n\n" );

    printf("perform CTEM multislice with automatic slicing and FFTW\n");
#ifdef USE_OPENMP
    printf( "and multithreaded using openMP\n");
#endif
    printf( "\n" );
    
    pi = (float) (4.0 * atan( 1.0 ));
    NPARAM = myFile.maxParam();
    param = (float*) malloc1D( NPARAM, sizeof(float), "param" );
    sparam = (float*) malloc1D( NPARAM, sizeof(float), "sparam" );
    for( ix=0; ix<NPARAM; ix++ ) param[ix] = 0.0F;

    printf("Name of file with input atomic "
           "potential in x,y,z format:\n");
    ns = scanf("%s", filein );

/*  get simulation options */

    printf("Replicate unit cell by NCELLX,NCELLY,NCELLZ :\n");
    ns = scanf("%d %d %d", &ncellx, &ncelly, &ncellz);
    if( ncellx < 1 ) ncellx = 1;
    if( ncelly < 1 ) ncelly = 1;
    if( ncellz < 1 ) ncellz = 1;

    printf("Name of file to get binary output of multislice result:\n");
    ns = scanf("%s", fileout );

    lpartl = askYN("Do you want to include partial coherence");

    if( lpartl == 1 ) {
        printf("Illumination angle min, max in mrad:\n");
        ns = scanf("%f %f", &acmin, &acmax);
        acmin  = acmin  * 0.001F;
        acmax  = acmax  * 0.001F;
        printf("Spherical aberration Cs3, Cs5(in mm.):\n");
        ns = scanf("%g %g", &Cs3, &Cs5);
        param[pCS]  = (float) ( Cs3*1.0e7 );
        param[pCS5] = (float) ( Cs5*1.0e7 );
        printf("Defocus, mean, standard deviation, and"
               " sampling size (in Angstroms) =\n");
        ns = scanf("%f %f %f", &df0, &sigmaf, &dfdelt);
        param[pDEFOCUS] = (float) df0;
        printf("Objective aperture (in mrad) =\n");
        ns = scanf("%f", &aobj);
        aobj = aobj * 0.001F;
#ifdef MANY_ABERR
    /*   get higher order aberrations if necessary */
    printf("type higher order aber. name (as C32a, etc.) followed\n"
        " by a value in mm. (END to end)\n");
    done = multiMode = 0;
    do{
        ns = scanf( "%20s", cline );
        if( strstr( cline, "END" ) != NULL ) {
            done = 1;
        } else {
            ns = scanf( "%lg", &vz );
            status = readCnm( cline, param, vz );        
            if( status < 0 ) {
                printf( "unrecognized aberration, exit...\n");
                exit( EXIT_SUCCESS );
            } else multiMode = 1;
        }
    } while( !done );

#endif
        lstart = 0;
    } else {
        printf("NOTE, the program image must also be run.\n");
        lstart = askYN("Do you want to start from previous result");
    }

    if ( lstart == 1 ) {
        printf("Name of file to start from:\n");
        ns = scanf("%s", filestart);
    } else {
        printf("Incident beam energy in kev:\n");
        ns = scanf("%g", &v0);
        printf("Wavefunction size in pixels, Nx,Ny:\n");
        ns = scanf("%d %d", &nx, &ny );
    }

    printf("Crystal tilt x,y in mrad.:\n");
    ns = scanf("%f %f", &ctiltx, &ctilty);
    ctiltx = ctiltx /1000;
    ctilty = ctilty /1000;

    /*  remember that the slice thickness must be > atom size
        to use projected atomic potential */
    printf("Slice thickness (in Angstroms):\n");
    ns = scanf("%lf", &deltaz );
    if( deltaz < 1.0 ) {
        printf("WARNING: this slice thickness is probably too thin"
            " for autoslice to work properly.\n");
    }

    if( lpartl == 0 ) {
        lbeams = askYN("Do you want to record the (real,imag) value\n"
        " of selected beams vs. thickness");
        if( lbeams == 1 ) {
            printf("Name of file for beams info:\n");
            ns = scanf("%s", filebeam );
            printf("Number of beams:\n");
            ns = scanf("%d", &nbout);
            if( nbout<1 ) nbout = 1;
            hbeam = (int*) malloc1D( nbout, sizeof(int), "hbeam" );
            kbeam = (int*) malloc1D( nbout, sizeof(int), "kbeam" );
            for( ib=0; ib<nbout; ib++) {
                printf("Beam %d, h,k=\n", ib+1);
                ns = scanf("%d %d", &hbeam[ib], &kbeam[ib] );
            }
        }
    }

    lwobble = askYN("Do you want to include thermal vibrations");
    if( lwobble == 1 ) {
        printf( "Type the temperature in degrees K:\n");
        ns = scanf( "%g", &temperature );
        printf( "Type number of configurations to average over:\n");
        ns = scanf( "%d", &nwobble );
        if( nwobble < 1 ) nwobble = 1;
        /* get random number seed from time if available 
            otherwise ask for a seed */
        ltime = (long) time( NULL );
        iseed = (unsigned) ltime;
        if( ltime == -1 ) {
            printf("Type initial seed for random number generator:\n");
            ns = scanf("%ld", &iseed);
        } else
            printf( "Random number seed initialized to %ld\n", iseed );
    } else temperature = 0.0F;

    if( lpartl == 0 ) {
        lcross = askYN("Do you want to output intensity vs. depth cross section");
        if( lcross == 1 ){
            printf( "Type name of file to get depth profile image:\n");
            ns = scanf("%s", filecross );
            printf( "Type y position of depth cross section (in Ang.):\n");
            ns = scanf("%f", &ycross );
        }
    }

/* start timing the actual computation just for fun */

    timer = cputim();
#ifdef USE_OPENMP
    walltimer = walltim();  /* wall time for opneMP */
#endif

/*  get starting value of transmitted wavefunction if required
   (this can only be used in coherent mode)
    remember to save params for final output pix  */

    if ( lstart == 1 ) {
        if( myFile.read( filestart ) != 1 ) {
            printf("Cannot open input file: %s .\n", filestart ); 
            exit( 0 );
        }

        if( myFile.getnpix() != 2 ) {
           printf("Input file %s must be complex, can't continue.\n",
               filestart );
           exit( 0 );
        }

        nx =  myFile.nx();
        ny =  myFile.ny();

        wave0r = (float**) malloc2D( nx, ny, sizeof(float), "waver" );
        nx = nx/2;
        wave0i = wave0r + nx;

        //  save starting pix for later
        for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) {
                wave0r[ix][iy] = myFile(ix,iy);
                wave0i[ix][iy] = myFile(ix+nx,iy);
        }

        //  save parameters to verify successive images are same size etc.
        for( i=0; i<NPARAM; i++) sparam[i] = myFile.getParam( i );

        ax = sparam[pDX] * nx;
        by = sparam[pDY] * ny;
        v0     = sparam[pENERGY];
        nslic0 = (int) sparam[pNSLICES];
        printf("Starting pix range %g to %g real\n"
               "           %g to %g imag\n",
               sparam[pRMIN], sparam[pRMAX], sparam[pIMIN], sparam[pIMAX] );
        printf("Beam voltage = %g kV\n", v0);
        printf("Old crystal tilt x,y = %g, %g mrad\n", 1000.*sparam[pXCTILT],
            1000.*sparam[pYCTILT]);

    } else nslic0 = 0;     /* end if( lstart...) */

/*  calculate relativistic factor and electron wavelength */

    mm0 = 1.0F + v0/511.0F;
    wavlen = (float) wavelength( v0 );
    printf("electron wavelength = %g Angstroms\n", wavlen);

/*  read in specimen coordinates and scattering factors */

    natom = ReadXYZcoord( filein, ncellx, ncelly, ncellz,
        &ax, &by, &cz, &Znum, &x, &y, &z, &occ, &wobble,
        description, NCMAX );

    printf("%d atomic coordinates read in\n", natom );
    printf("%s", description );

    printf("Size in pixels Nx, Ny= %d x %d = %d beams\n",
           nx,ny, nx*ny);
    printf("Lattice constant a,b = %12.4f, %12.4f\n", ax,by);

    /*  calculate the total specimen volume and echo */
    xmin = xmax = x[0];
    ymin = ymax = y[0];
    zmin = zmax = z[0];
    wmin = wmax = wobble[0];

    for( i=0; i<natom; i++) {
        if( x[i] < xmin ) xmin = x[i];
        if( x[i] > xmax ) xmax = x[i];
        if( y[i] < ymin ) ymin = y[i];
        if( y[i] > ymax ) ymax = y[i];
        if( z[i] < zmin ) zmin = z[i];
        if( z[i] > zmax ) zmax = z[i];
        if( wobble[i] < wmin ) wmin = wobble[i];
        if( wobble[i] > wmax ) wmax = wobble[i];
    }
    printf("Total specimen range is\n %g to %g in x\n"
           " %g to %g in y\n %g to %g in z\n", xmin, xmax,
           ymin, ymax, zmin, zmax );
    if( lwobble == 1 )
        printf("Range of thermal rms displacements (300K) = %g to %g\n",
            wmin, wmax );
    
#ifdef USE_OPENMP
    /*  force LUT init. to avoid redundant init in parallel form */ 
    rsq = 0.5;  /* arbitrary position */   
    for( i=0; i<natom; i++) vz =  vzatomLUT( Znum[i], rsq );
#endif

/*  calculate spatial frequencies and positions for future use */

    rx = 1.0F/ax;
    rx2= rx*rx;
    ry = 1.0F/by;
    ry2= ry*ry;
    ixmid = nx/2;
    iymid = ny/2;
    nxl = nx;
    nyl = ny;

    kx   = (float*) malloc1D( nx, sizeof(float), "kx" );
    kx2  = (float*) malloc1D( nx, sizeof(float), "kx2" );
    xpos = (float*) malloc1D( nx, sizeof(float), "xpos" );
    freqn( kx, kx2, xpos, nx, ax );

    ky   = (float*) malloc1D( ny, sizeof(float), "ky" );
    ky2  = (float*) malloc1D( ny, sizeof(float), "ky2" );
    ypos = (float*) malloc1D( ny, sizeof(float), "ypos" );
    freqn( ky, ky2, ypos, ny, by );

/*  allocate some more arrays and initialize wavefunction */

    trans.resize( nx, ny );
    trans.init();

    wave.resize( nx, ny );
    wave.copyInit( trans );

    if( lstart == 0 ) {
        for( ix=0; ix<nx; ix++)
        for( iy=0; iy<ny; iy++) {
            wave.re(ix,iy) = 1.0F;  /*  real part */
            wave.im(ix,iy) = 1.0F;  /*  imag part */
        }
    } else {
        for( ix=0; ix<nx; ix++)
        for( iy=0; iy<ny; iy++) {
            wave.re(ix,iy) = wave0r[ix][iy];  /*  real part */
            wave.im(ix,iy) = wave0i[ix][iy];  /*  imag part */
        }
    }

    if( lcross == 1 ) {
        /* nz may be too small with thermal vibrations so add a few extra */
        nz = (int) ( (zmax-zmin)/ deltaz + 3.5);
        depthpix = (float**) malloc2D( nx, nz,
            sizeof(float), "depthpix" );
        for( ix=0; ix<nx; ix++)
        for( iz=0; iz<nz; iz++)  depthpix[ix][iz] = 0.0F;
        iycross = (int) ( 0.5 + (ny * ycross / by));
        while( iycross < 0 ) iycross += ny;
        iycross = iycross%ny;  /* make periodic in ny */
        printf("save xz cross section at iy= %d pixels\n", iycross );
    }

 /*  calculate propagator function  */
 
    k2max = nx/(2.0F*ax);
    tctx = ny/(2.0F*by);
    if( tctx < k2max ) k2max = tctx;
    k2max = BW * k2max;
    printf("Bandwidth limited to a real space resolution of %f Angstroms\n",
                     1.0F/k2max);
    printf("   (= %.2f mrad)  for symmetrical anti-aliasing.\n",
         wavlen*k2max*1000.0F);
    k2max = k2max*k2max;

    tctx = (float) (2.0 * tan(ctiltx));
    tcty = (float) (2.0 * tan(ctilty));
    
    propxr = (float*) malloc1D( nx, sizeof(float), "propxr" );
    propxi = (float*) malloc1D( nx, sizeof(float), "propxi" );
    propyr = (float*) malloc1D( ny, sizeof(float), "propyr" );
    propyi = (float*) malloc1D( ny, sizeof(float), "propyi" );

    scale = pi * ((float)deltaz);

    for( ix=0; ix<nx; ix++) {
        t = scale * ( kx2[ix]*wavlen - kx[ix]*tctx );
        propxr[ix] = (float)  cos(t);
        propxi[ix] = (float) -sin(t);
    }
    for( iy=0; iy<ny; iy++) {
        t = scale * ( ky2[iy]*wavlen - ky[iy]*tcty );
        propyr[iy] = (float)  cos(t);
        propyi[iy] = (float) -sin(t);
    }


/*  iterate the multislice algorithm proper

   NOTE: zero freg is in the bottom left corner and
     expands into all other corners - not in the center
     this is required for the FFT - don't waste time rearranging

  partial coherence method
   force the integrals to include the origin and to be symmetric
   about the origin and to have the same periodic boundary
   conditions as the sampling grid
*/
    if( lpartl == 1 ) {

        printf("Illumination angle sampling (in mrad) = %f, %f\n\n",
            1000.*rx*wavlen, 1000.*ry*wavlen);

        pix = (float**) malloc2D( nx, ny, sizeof(float), "pix" );
        for( ix=0; ix<nx; ix++)
        for( iy=0; iy<ny; iy++) pix[ix][iy] = 0.0F;
        
        temp.resize( nx, ny );
	temp.copyInit( trans );

        ndf = (int) ( ( 2.5F * sigmaf ) / dfdelt );

        nacx = (int) ( ( acmax / ( wavlen * rx ) ) + 1.5F );
        nacy = (int) ( ( acmax / ( wavlen * ry ) ) + 1.5F );

        q2max = acmax / wavlen;
        q2max = q2max*q2max;

        q2min = acmin / wavlen;
        q2min = q2min*q2min;

        k2maxo = aobj / wavlen;
        k2maxo = k2maxo*k2maxo;

        chi1 = pi * wavlen;
        chi2 = 0.5 * Cs3 * wavlen *wavlen;
        chi3 = Cs5 * wavlen*wavlen*wavlen*wavlen /3.0;
        nillum = 0;

        /* for Monte Carlo stuff */
        x2      = (float*) malloc1D( natom, sizeof(float), "x2" );
        y2      = (float*) malloc1D( natom, sizeof(float), "y2" );
        z2      = (float*) malloc1D( natom, sizeof(float), "z2" );
        occ2    = (float*) malloc1D( natom, sizeof(float), "occ2" );
        Znum2   =   (int*) malloc1D( natom, sizeof(int), "Znum2" );

        if( lwobble == 0 )
            sortByZ( x, y, z, occ, Znum, natom );

/*  integrate over the illumination angles */

        for( iwobble=0; iwobble<nwobble; iwobble++) {
            if( lwobble == 1 ) printf("configuration # %d\n", iwobble+1 );
            for( iqy= -nacy; iqy<=nacy; iqy++) {
                qy = iqy * ry;
                qy2 = qy * qy;
        
                for( iqx= -nacx; iqx<=nacx; iqx++) {
                    qx = iqx * rx;
                    q2 = qx*qx + qy2;
        
                    if( (q2 <= q2max) && (q2 >= q2min) ) {
                        nillum += 1;
                        for( ix=0; ix<nx; ix++) {
                            for( iy=0; iy<ny; iy++) {
                                t = 2.0*pi*( qx*xpos[ix] + qy*ypos[iy] );
                                wave.re(ix,iy) = (float) cos(t);  /* real */
                                wave.im(ix,iy) = (float) sin(t);  /* imag */
                            }
                        }
                        /*  add random thermal displacements scaled by temperature
                                if requested 
                            remember that initial wobble is at 300K for each direction */
                        if( lwobble == 1 ){
                            scale = (float) sqrt(temperature/300.0) ;
                            for( i=0; i<natom; i++) {
                                x2[i] = x[i] + (float)(wobble[i]*rangauss(&iseed)*scale);
                                y2[i] = y[i] + (float)(wobble[i]*rangauss(&iseed)*scale);
                                z2[i] = z[i] + (float)(wobble[i]*rangauss(&iseed)*scale);
                                occ2[i] = occ[i];
                                Znum2[i] = Znum[i];
                            }
                            printf( "Sorting atoms by depth...\n");
                            sortByZ( x2, y2, z2, occ2, Znum2, natom );
                            zmin = z2[0];       /* reset zmin/max after wobble */
                            zmax = z2[natom-1];
                            printf("Thickness range with thermal displacements"
                                   " is %g to %g (in z)\n", zmin, zmax );
                        } else for( i=0; i<natom; i++) {
                            x2[i] = x[i];
                            y2[i] = y[i];
                            z2[i] = z[i];
                            occ2[i] = occ[i];
                            Znum2[i] = Znum[i];
                        }
            
                        zslice = 0.75*deltaz;  /*  start a little before top of unit cell */
                        istart = 0;
            
                        while( istart < natom ) {
            
                            /* find range of atoms for current slice */
                            na = 0;
                            for(i=istart; i<natom; i++) 
                            if( z2[i] < zslice ) na++; else break;
            
                            /* calculate transmission function, skip if layer empty */
                            if( na > 0 ) {
                                trlayer( &x2[istart], &y2[istart], &occ2[istart],
                                    &Znum2[istart],na, ax, by, v0, 
                                    trans,  nxl, nyl, &phirms, &nbeams, k2max );
                
                                wave *= trans;  // transmit
                            }
            
                            /* remember: prop needed here to get anti-aliasing
                                    right */
 			    wave.fft();
                            propagate( wave, propxr, propxi,
                                propyr, propyi,
                                kx2,  ky2,  k2max, nx, ny );
			    wave.ifft();
            
                            zslice += deltaz;
                            istart += na;
            
                        } /* end while(zslice<=..) */
          
                        scale = 1.0F / ( ((float)nx) * ((float)ny) );
                        sum = 0.0;
                        for( ix=0; ix<nx; ix++) {
                            for( iy=0; iy<ny; iy++)
                                sum += wave.re(ix,iy)*wave.re(ix,iy)
                                    + wave.im(ix,iy)*wave.im(ix,iy);
                        }
                        sum = sum * scale;
             
                        printf("Illumination angle = %7.3f, %7.3f mrad",
                            1000.*qx*wavlen, 1000.*qy*wavlen);
                        printf(", integrated intensity= %f\n", sum );
            
                        /*-------- integrate over +/- 2.5 sigma of defocus ------------ */
    
                        wave.fft();
                        sumdf = 0.0F;
            
                        for( idf= -ndf; idf<=ndf; idf++) {
                            param[pDEFOCUS] = df = df0 + idf*dfdelt;
            
                            for( ix=0; ix<nx; ix++) {
                                alx = wavlen * kx[ix];  /* x component of angle alpha */
                                for( iy=0; iy<ny; iy++) {
                                    aly = wavlen * ky[iy];  /* y component of angle alpha */
                                    k2 = kx2[ix] + ky2[iy];
                                    if( k2 <= k2maxo ) {
                                        chi0 = (2.0*pi/wavlen) * chi( param, 
                                                alx, aly, multiMode );
                                        tr = (float)  cos(chi0);
                                        ti = (float) -sin(chi0);
                                        wr = wave.re(ix,iy);
                                        wi = wave.im(ix,iy);
                                        temp.re(ix,iy) = wr*tr - wi*ti;
                                        temp.im(ix,iy) = wr*ti + wi*tr;
                                    } else {
                                        temp.re(ix,iy) = 0.0F;  /* real */
                                        temp.im(ix,iy) = 0.0F;  /* imag */
                                    }
                                }  /*  end for( iy=0... ) */
                            }   /*  end for( ix=0... ) */

                            temp.ifft();
            
                            xdf = (double) ( (df - df0) /sigmaf );
                            pdf = (float) exp( -0.5F * xdf*xdf );
                            sumdf += pdf;
            
                            for( ix=0; ix<nx; ix++) {
                                for( iy=0; iy<ny; iy++) {
                                    wr = temp.re(ix,iy);
                                    wi = temp.im(ix,iy);
                                    pix[ix][iy] += pdf* ( wr*wr + wi*wi );
                                }
                            }
            
                        }/* end for(idf..) */
                    }/* end if( q2...) */
        
                } /* end for( iqx..) */
            } /* end for( iqy..) */
        } /* end for( iwobble...) */


        printf("Total number of illumination angle = %ld\n",
                nillum);
        printf("Total number of defocus values = %d\n", 2*ndf+1);
        /*  remember that nillum already includes nwobble so don't
             divide by nwobble! */
        scale = 1.0F / ( ((float)nillum) * sumdf );
        rmin  = pix[0][0] * scale;
        rmax  = rmin;
        aimin = 0.0F;
        aimax = 0.0F;

        for( ix=0; ix<nx; ix++)
        for( iy=0; iy<ny; iy++) {
            pix[ix][iy] = pix[ix][iy] * scale;
            if( pix[ix][iy] < rmin ) rmin = pix[ix][iy];
            if( pix[ix][iy] > rmax ) rmax = pix[ix][iy];
        }

/* ---- start coherent method below ----------------
        (remember that waver,i[][] was initialize above) */

    } else {

        if( lbeams ==1 ) {
            fp1 = fopen( filebeam, "w" );
            if( NULL == fp1 ) {
                printf("can't open file %s\n", filebeam);
                exit(0);
            }
            fprintf( fp1, " (h,k) = " );
            for(ib=0; ib<nbout; ib++)
                fprintf(fp1," (%d,%d)", hbeam[ib], kbeam[ib]);
            fprintf( fp1, "\n" );
            fprintf( fp1, "nslice, (real,imag) (real,imag) ...\n\n");
            for( ib=0; ib<nbout; ib++) {
                if( hbeam[ib] < 0 ) hbeam[ib] = nx + hbeam[ib];
                if( kbeam[ib] < 0 ) kbeam[ib] = ny + kbeam[ib];
                if( hbeam[ib] < 0 ) hbeam[ib] = 0;
                if( kbeam[ib] < 0 ) kbeam[ib] = 0;
                if( hbeam[ib] > nx-1 ) hbeam[ib] = nx-1;
                if( kbeam[ib] > ny-1 ) kbeam[ib] = ny-1;
            }
        }

        /*  add random thermal displacements scaled by temperature if requested 
            remember that initial wobble is at 300K for each direction */
        if( lwobble == 1 ){
            scale = (float) sqrt(temperature/300.0) ;
            for( i=0; i<natom; i++) {
                x[i] += (float) (wobble[i] * rangauss( &iseed ) * scale);
                y[i] += (float) (wobble[i] * rangauss( &iseed ) * scale);
                z[i] += (float) (wobble[i] * rangauss( &iseed ) * scale);
            }
        }

        printf( "Sorting atoms by depth...\n");
        sortByZ( x, y, z, occ, Znum, natom );

        if( lwobble == 1 ){
            zmin = z[0];        /* reset zmin/max after wobble */
            zmax = z[natom-1];
            printf("Thickness range with thermal displacements"
                " is %g to %g (in z)\n", zmin, zmax );
        }

        scale = 1.0F / ( ((float)nx) * ((float)ny) );

        zslice = 0.75*deltaz;  /*  start a little before top of unit cell */
        istart = 0;
        islice = 1;

        while( (istart < natom) && ( zslice < (zmax+deltaz) ) ) {

            /* find range of atoms for current slice */
            na = 0;
            for(i=istart; i<natom; i++) 
            if( z[i] < zslice ) na++; else break;

            /* calculate transmission function, skip if layer empty */
            if( na > 0 ) {
                trlayer( &x[istart], &y[istart], &occ[istart],
                    &Znum[istart], na, ax, by, v0, trans,
                    nxl, nyl, &phirms, &nbeams, k2max );
    
                /*??? printf("average atompot comparison = %g\n", 
                           phirms/(wavlen*mm0) ); */
    
                wave *= trans;    //  transmit
            }       

            /*  bandwidth limit */
            wave.fft();
            if( lbeams== 1 )  {
                fprintf( fp1, "%5d", islice);
                for( ib=0; ib<nbout; ib++) 
                    fprintf(fp1, "%10.6f %10.6f",
                        scale*wave.re(hbeam[ib],kbeam[ib]),   /* real */
                        scale*wave.im(hbeam[ib],kbeam[ib]) ); /* imag */
                    fprintf( fp1, "\n");
            }
            /* remember: prop needed here to get anti-aliasing right */
            propagate( wave, propxr, propxi,
                propyr, propyi, kx2,  ky2,  k2max, nx, ny );
            wave.ifft();

            /* save depth cross section if requested */
            if( (lcross == 1) && (islice<=nz) ) {
                for( ix=0; ix<nx; ix++) {
                    depthpix[ix][islice-1] = 
                        wave.re(ix,iycross)*wave.re(ix,iycross)
                           + wave.im(ix,iycross)*wave.im(ix,iycross);
                }
                nzout = islice;
            }

            sum = 0.0;
            for( ix=0; ix<nx; ix++) {
                for( iy=0; iy<ny; iy++)
                    sum += wave.re(ix,iy)*wave.re(ix,iy) +
                        wave.im(ix,iy)*wave.im(ix,iy);
            }
            sum = sum * scale;

            printf("z= %f A, %ld beams, %d coord., \n"
               "     aver. phase= %f, total intensity = %f\n",
               zslice, nbeams, na, phirms, sum );

            zslice += deltaz;
            istart += na;
            islice++;

        } /* end while(istart<natom..) */
    
        rmin  = wave.re(0,0);
        rmax  = rmin;
        aimin = wave.im(0,0);
        aimax = aimin;

        for( ix=0; ix<nx; ix++) {
            for( iy=0; iy<ny; iy++) {
                wr = wave.re(ix,iy);
                wi = wave.im(ix,iy);
                if( wr < rmin ) rmin = wr;
                if( wr > rmax ) rmax = wr;
                if( wi < aimin ) aimin = wi;
                if( wi > aimax ) aimax = wi;
            }
        }

    } /* end else .. coherent section */

/*  output results and find min and max to echo
    remember that complex pix are stored in the file in FORTRAN
        order for compatibility */

    if( lstart == 1 )
        for( ix=0; ix<NPARAM; ix++ ) param[ix] = sparam[ix];
    param[pRMAX]  = rmax;
    param[pIMAX]  = aimax;
    param[pRMIN]  = rmin;
    param[pIMIN]  = aimin;
    param[pXCTILT]  = ctiltx;
    param[pYCTILT] = ctilty;
    param[pENERGY] = v0;
    param[pDX] = dx = (float) ( ax/((float)nx) );
    param[pDY] = dy = (float) ( by/((float)ny) );
    param[pWAVEL] = wavlen;
    param[pNSLICES] = 0.0F;  /* ??? */
    if ( lpartl == 1 ) {
        param[pDEFOCUS] = df0;
        param[pOAPERT] = aobj;
        param[pCAPERT] = acmax;
        param[pDDF] = sigmaf;
    }

    for( ix=0; ix<NPARAM; ix++ ) myFile.setParam( ix, param[ix] );

    if ( lpartl == 1 ) {
        myFile.resize( nx, ny );
        myFile.setnpix( 1 );
        for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++)
            myFile(ix,iy) = pix[ix][iy];
        i = myFile.write( fileout, rmin, rmax, aimin, aimax, dx, dy );

    } else {

        myFile.resize( 2*nx, ny );
        myFile.setnpix( 2 );
        for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) {
            myFile(ix,iy)    = wave.re(ix,iy);
            myFile(ix+nx,iy) = wave.im(ix,iy);
        }

        i = myFile.write( fileout, rmin, rmax, aimin, aimax, dx, dy );
    }

    if( i != 1 ) printf( "autoslice cannot write TIF file %s\n",
            fileout );
    printf( "pix range %g to %g real,\n"
        "      %g to %g imag\n",  rmin,rmax,aimin,aimax );

    /*  output depth cross section if requested */
    if( lcross == 1 ){
        rmin  = depthpix[0][0];
        rmax  = rmin;

        for( ix=0; ix<nx; ix++)
        for( iz=0; iz<nzout; iz++) {
            wr = depthpix[ix][iz];
            if( wr < rmin ) rmin = wr;
            if( wr > rmax ) rmax = wr;
        }
        myFile.setParam( pRMAX, rmax );
        myFile.setParam( pIMAX, 0.0F );
        myFile.setParam( pRMIN, rmin );
        myFile.setParam( pIMIN, 0.0F );
        myFile.setParam( pDY, dy = (float) ( deltaz ) );

        myFile.resize( nx, nzout );
        myFile.setnpix( 1 );
        for( ix=0; ix<nx; ix++) for( iz=0; iz<nzout; iz++) {
            myFile(ix,iz) = depthpix[ix][iz];
        }
        i = myFile.write( filecross, rmin, rmax, aimin, aimax, dx, dy );

        if( i != 1 ) printf( "autoslice cannot write TIF file %s\n",
                filecross );
        printf( "depth pix range %g to %g real,\n",  rmin,rmax );
    }

    printf("Total CPU time = %f sec.\n", cputim()-timer );
#ifdef USE_OPENMP
    printf("wall time = %g sec.\n", walltim() - walltimer);
#endif

    return 0;

} /* end main() */
Esempio n. 7
0
int main()
{
    char **filein, fileout[NCMAX];
    char datetime[20];

    int i, ipix, ix, iy, nx, ny, nxold, nyold, ixmid, iymid, npix, npixold,
        ninput, nsum, nh, logpix, ns, PowerSpectra, pixtype, NPARAM;
    long *nhist;

    float scale, pixc, rmin,rmin2,rmax, aimin,aimax,tr, ti, dx, dy;
    float *param;
    float  **pixr, **pixi, **pixout;
    double sum, *hist, ax, by, rx, ry2;

    FILE *fp;

    floatTIFF myFile;

    /*--------  get input file names etc. ------------ */
    printf( "sumpix version dated 6-nov-2012 ejk\n");
    printf("Copyright (C) 1998-2012 Earl J. Kirkland\n" );
    printf( "This program is provided AS-IS with ABSOLUTELY NO WARRANTY\n "
        " under the GNU general public license\n\n" );

     printf( "Sum multiple image or wave function files,\n"
        "complex images will be converted to squared "
        "magnitude before summing.\n");
    printf( "All input images must be the same type and size.\n\n" );
    printf( "Type number of input image files\n");
    ns = scanf( "%d", &ninput );
    filein = (char**) malloc2D( ninput, NCMAX, sizeof(char), "filein" );
    for( ipix=0; ipix<ninput; ipix++) {
        printf("input %d : ", ipix );
        ns = scanf("%s", filein[ipix] );
    }
    printf("\n");

    printf("Type name of output file:\n");
    ns = scanf( "%s", fileout );

    logpix = askYN( "Do you want to display on log scale");

    PowerSpectra = askYN( "Do you want to convert to a power spectra");

/* get image size and type from the first input pix
    all successive images have to be the same type and size !!! 
   -remember that floatTIFF cannot handle plain integer TIFF images

 -------- read floating point images and average --------

   remember that complex images are stacked side by side
    with npix=2 and nx twice its real value 
    (real images have npix=1 and nx its normal value)
*/
    NPARAM = myFile.maxParam();
    param = (float*) malloc1D( NPARAM, sizeof(float), "param" );

    for( ipix=0; ipix<ninput; ipix++) {

        for( ix=0; ix<NPARAM; ix++) param[ix] = 0.0F;
        if( myFile.read( filein[ipix] ) != 1 ) {
            printf("Cannot open file %s\n", filein[ipix] );
            exit( 0 );
        }
        myFile.getDateTime( datetime );
        nx = (int) myFile.nx();
        ny = (int) myFile.ny();
        npix = myFile.getnpix();
        if( 0 == ipix ) {
            npixold = npix;
            nxold = nx;
            nyold = ny;
            pixr = (float**) malloc2D( 2*nx, ny, sizeof(int), "pixr-1" );  //npix ????
            pixout = (float**) malloc2D( nx, ny, sizeof(int), "pixout-1" );
            for( ix=0; ix<nx; ix++) 
                for( iy=0; iy<ny; iy++) pixout[ix][iy] = 0.0F;
            pixtype = floatPIX;
            printf( "Image size : Nx= %d, Ny= %d\n", nx, ny );
        } else if( (nx != nxold) || (ny != nyold) ) {
            printf( "different size in file %s, "
                " nx= %d, ny= %d\n", filein[ipix], nx, ny );
            exit( 0 );
        }
        if( npix != npixold ) {
            printf( "Can't mix real and complex images"
                " in file: %s\n", filein[ipix] );
            exit( 0 );
        }
        if( (npix<1) || (npix>2) ) {
            printf( "bad npix = %d in TIFF file %s\n",
                npix, filein[ipix] );
            exit( 0 );
        }

        //  copy both real+imag back to old style array to re-use old code
        //        (not optimal but works for now)
        for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++)
                pixr[ix][iy] = myFile(ix,iy);

        nx = nx /npix;
        ax = myFile.getParam(pDX) * ((float)nx);
        by = myFile.getParam(pDY) * ((float)ny);
        rmin = myFile.getParam(pRMIN);
        rmax = myFile.getParam(pRMAX);
        aimin = myFile.getParam(pIMIN);
        aimax = myFile.getParam(pIMAX);
        if( npix == 2 ) {
            printf( "pix %d created %s, range: %g to %g (real),"
                "\n     and %g to %g (imag)\n",
                ipix, datetime, rmin, rmax, aimin, aimax);
        } else if( npix == 1 ) {
            printf( "pix %d created %s, range: %g to %g (real)\n",
                ipix, datetime, rmin, rmax );
        }
        if( PowerSpectra == 1 ) {
            if( npix == 1 ) {
                if( 0 == ipix )
                    pixi = (float**) malloc2D( nx, ny, sizeof(float),
                        "pixi-2" );
                for( ix=0; ix<nx; ix++) 
                    for( iy=0; iy<ny; iy++)   pixi[ix][iy] = 0.0F;
            } else if( (npix==2) && (ipix==0) ) pixi = pixr + nx;
            npix = 2;
            fft2d ( pixr, pixi, nx, ny, +1);
        }

        if( npix == 1 ) {       /* real pix */
            for( ix=0; ix<nx; ix++) 
            for( iy=0; iy<ny; iy++) 
                pixout[ix][iy] += pixr[ix][iy];
        } else if( npix == 2 ) {    /* complex pix */
            if( 0 == ipix ) pixi = pixr + nx;
            for( ix=0; ix<nx; ix++) 
            for( iy=0; iy<ny; iy++) {
                tr = pixr[ix][iy];
                ti = pixi[ix][iy];
                pixout[ix][iy] += ( tr*tr + ti*ti);
            }
        }

    }  // end for(ipix=... )
    

/*  Output results and find min and max to echo
     NOTE the logarithmic scaling of diffraction pattern
    is taken from Gonzalez and Wintz pg 48
    added scaling trick from showpix.f  9-aug-1995 ejk
*/
    printf("Output pix size : Nx= %d, Ny= %d\n", nx, ny );

    if( (PowerSpectra == 1) && ( pixtype == floatPIX ) ) {

        /* put (0,0) in the center */
        invert2D( pixout, nx, ny);

        /* histogram the azimutal average */
        hist = (double*) malloc1D( (nx+ny), sizeof(double), "hist" );
        nhist = (long*) malloc1D( (nx+ny), sizeof(long), "nhist" );
        for( ix=0; ix<(nx+ny); ix++) {
            hist[ix] = 0.0;
            nhist[ix] = 0;
        }

        scale = 1.0F / ( ((float)nx) * ((float)ny) );

        sum = 0.0;
        nsum = 0;
        nh = 0;
        ixmid = nx/2;
        iymid = ny/2;

        for( iy=0; iy<ny; iy++) {
            ry2 = (double) ( iy-iymid);
            ry2 = ry2*(ax/by);
            ry2 = ry2*ry2;
            for( ix=0; ix<nx; ix++) {
                pixc = pixout[ix][iy];
                rx = (double) (ix-ixmid);
                i = (int) ( sqrt( rx*rx + ry2 ) + 0.5);
                hist[i] += pixc;
                nhist[i]++;
                if( i > nh ) nh = i;
                if( logpix == 1 ) {
                    if( pixc > 1.e-10F)  pixc = 
                        (float) log( (double) fabs(pixc) );
                    else pixc = -23.0F;
                    pixout[ix][iy] = pixc;
                }
                if( (ix == 0) && (iy == 0) ) {
                    rmin = pixc;
                    rmax = rmin;
                } else if( (ix != ixmid) && (iy != iymid) ) {
                    if( pixc < rmin ) rmin = pixc;
                    if( pixc > rmax ) rmax = pixc;
                }
                if( (ix>(3*nx)/8) && (ix<(5*nx)/8) &&
                    (iy>(3*ny)/8) && (iy<(5*ny)/8) ) {
                    sum = sum + pixc;
                    nsum += 1;
                }

            }  /* end for ix... */
        } /* end for iy... */

        printf( "write azimuthal averaged intensity vs. \n"
            "  spatial frequency k, into file azimuth.dat\n");
        fp = fopen( "azimuth.dat", "w+" );
        if( fp == NULL ) {
            printf("cannot open file azimuthal.dat\n");
            exit( 0 );
        }
        for( i=0; i<=nh; i++) {
            hist[i] = hist[i] / nhist[i];
            fprintf( fp, "%16.8g  %16.8g\n", ((double)i)/ax,
                 hist[i] );
        }
        fclose( fp );

        myFile.resize( nx, ny );  // in case it was complex
	myFile.setnpix( 1 );
        myFile.setParam( pRMAX, rmax);
        myFile.setParam( pIMAX, aimax = 0.0F);
        myFile.setParam( pIMIN, aimin = 0.0F);
        myFile.setParam( pRMIN, rmin );
        myFile.setParam( pDX,  dx = 1.0F / ((float)ax) );
        myFile.setParam( pDY,  dy = 1.0F / ((float)by) );
        printf("output image size: %f to %f /Angstroms\n", nx*dx, ny*dy );
        printf("Power Spectra range %f to %f\n",  rmin, rmax );

        for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++)
            myFile(ix,iy) = pixout[ix][iy];
        rmin2= (float) (0.05*rmin + 0.95*sum/nsum);   //  somtimes better
        //myFile.write( fileout, rmin, rmax, aimin, aimax, dx, dy );
        myFile.write( fileout, rmin2, rmax, aimin, aimax, dx, dy );

    } else if(  (pixtype == floatPIX) && (PowerSpectra == 0) ) {

        for( iy=0; iy<ny; iy++) {
            for( ix=0; ix<nx; ix++) {
                pixc = pixout[ix][iy];
                if( logpix == 1 ) {
                    if( pixc > 1.e-30F)  pixc = 
                        (float) log( (double) fabs(pixc) );
                    else pixc = -100.0F;
                    pixout[ix][iy] = pixc;
                }
                if( (ix == 0) && (iy == 0) ) {
                    rmin = pixc;
                    rmax = rmin;
                } else {
                    if( pixc < rmin ) rmin = pixc;
                    if( pixc > rmax ) rmax = pixc;
                }

            }  /* end for ix... */
        } /* end for iy... */

        myFile.resize( nx, ny );  // in case it was complex
        myFile.setnpix( 1 );
        myFile.setParam( pRMAX, rmax);
        myFile.setParam( pIMAX, 0.0F);
        myFile.setParam( pIMIN, 0.0F);
        myFile.setParam( pRMIN, rmin);
        printf("Summed pix range %f to %f\n",  rmin, rmax );

        for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++)
            myFile(ix,iy) = pixout[ix][iy];
        dx = myFile.getParam( pDX );
        dy = myFile.getParam( pDY );
        aimin = aimax = 0.0F;
        myFile.write( fileout, rmin, rmax, aimin, aimax, dx, dy );

    }

    return EXIT_SUCCESS;

} /* end main() */
Esempio n. 8
0
int main (int argc, char * argv[]) {
    int rank,size;
    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&size);
    MPI_Comm_rank(MPI_COMM_WORLD,&rank);

    int X,Y,x,y,X_ext,i;    
    double **A, **localA;
    X=atoi(argv[1]);
    Y=X;

    //Extend dimension X with ghost cells if X%size!=0
    if (X%size!=0)
        X_ext=X+size-X%size;
    else
        X_ext=X;
      

    if (rank==0) {
        //Allocate and init matrix A
        A=malloc2D(X_ext,Y);
        init2D(A,X,Y);
    }

    //Local dimensions x,y
    x=X_ext/size;
    y=Y;

    //Allocate local matrix and scatter global matrix
    localA=malloc2D(x,y);
    double * idx;
    for (i=0;i<x;i++) {
        if (rank==0)
            idx=&A[i*size][0];            
        MPI_Scatter(idx,Y,MPI_DOUBLE,&localA[i][0],y,MPI_DOUBLE,0,MPI_COMM_WORLD);
    }
    if (rank==0)
        free2D(A,X_ext,Y);
 
    //Timers   
    struct timeval ts,tf,comps,compf,comms,commf;
    double total_time,computation_time,communication_time;

    MPI_Barrier(MPI_COMM_WORLD);
    gettimeofday(&ts,NULL);        

    /******************************************************************************
     The matrix A is distributed in a round-robin fashion to the local matrices localA
     You have to use point-to-point communication routines.
     Don't forget the timers for computation and communication!
        
    ******************************************************************************/
    
    int line_index, line_owner;
    int k, start;
    double *k_row, *temp;
    MPI_Status status;
    
    temp = malloc(y * sizeof(*temp));
//     k_row = malloc(y * sizeof(*k_row));
    
    /* omoia me to allo cyclic, vriskoume ton line_owner */
    for (k=0; k<y-1; k++){
	line_owner = k % size;
	line_index = k / size;
	
	if (rank <= line_owner)
	    start = k / size + 1;
	else
	    start = k / size;
	
	if (rank == line_owner)
	    k_row = localA[line_index];
	else
	    k_row = temp;
	
	/* set communication timer */
	gettimeofday(&comms, NULL);
	
	/* COMM */
	
// 	if (rank != line_owner){
// 	    if (rank == 0)
// 		MPI_Recv( k_row, y, MPI_DOUBLE, size-1, MPI_ANY_SOURCE, MPI_COMM_WORLD, &status);
// 	    else
// 		MPI_Recv( k_row, y, MPI_DOUBLE, rank-1, MPI_ANY_SOURCE, MPI_COMM_WORLD, &status);
// 	}
// 	
// 	/* autos pou einai prin ton line_owner den prepei na steilei */
// 	if (rank != line_owner -1){
// 	    /* o teleutaios prepei na steilei ston prwto, ektos an o prwtos einai o line_owner */
// 	    if (rank == size-1) {
// 		if (line_owner != 0)
// 		    MPI_Send( k_row, y, MPI_DOUBLE, 0, rank, MPI_COMM_WORLD);
// 	    }
// 	    else
// 		MPI_Send(k_row, y, MPI_DOUBLE, rank+1, rank, MPI_COMM_WORLD);
// 	}

	/* o line_owner stelnei se olous (ektos tou eautou tou) kai oloi oi alloi kanoun
	 * receive */
	if (rank == line_owner){
	    for (i=0; i<size; i++)
		if (i != line_owner)
		    MPI_Send( k_row, y, MPI_DOUBLE, i, line_owner, MPI_COMM_WORLD);
	}
	else
	    MPI_Recv(k_row, y, MPI_DOUBLE, line_owner, line_owner, MPI_COMM_WORLD, &status);
	
	/* stop communication timer */
	gettimeofday(&commf, NULL);
	communication_time += commf.tv_sec - comms.tv_sec + (commf.tv_usec - comms.tv_usec)*0.000001;
	
	/* set computation timer */
	gettimeofday(&comps, NULL);
	
	/* Compute */
	go_to_work( localA, k_row, x, y, rank, start, k );
	
	/* stop computation timer */
	gettimeofday(&compf, NULL);
	computation_time += compf.tv_sec - comps.tv_sec + (compf.tv_usec - comps.tv_usec)*0.000001;
    }

    gettimeofday(&tf,NULL);
    total_time=tf.tv_sec-ts.tv_sec+(tf.tv_usec-ts.tv_usec)*0.000001;


    //Gather local matrices back to the global matrix
    if (rank==0) 
        A=malloc2D(X_ext,Y);
    for (i=0;i<x;i++) {
            if (rank==0)
                idx=&A[i*size][0];
            MPI_Gather(&localA[i][0],y,MPI_DOUBLE,idx,Y,MPI_DOUBLE,0,MPI_COMM_WORLD);
    }
    
    double avg_total,avg_comp,avg_comm,max_total,max_comp,max_comm;
    MPI_Reduce(&total_time,&max_total,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD);
    MPI_Reduce(&computation_time,&max_comp,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD);
    MPI_Reduce(&communication_time,&max_comm,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD);
    MPI_Reduce(&total_time,&avg_total,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
    MPI_Reduce(&computation_time,&avg_comp,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
    MPI_Reduce(&communication_time,&avg_comm,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);

    avg_total/=size;
    avg_comp/=size;
    avg_comm/=size;

    if (rank==0) {
        printf("LU-Cyclic-p2p\tSize\t%d\tProcesses\t%d\n",X,size);
        printf("Max times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",max_total,max_comp,max_comm);
        printf("Avg times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",avg_total,avg_comp,avg_comm);
    }

    //Print triangular matrix U to file
    if (rank==0) {
        char * filename="output_cyclic_p2p";
        print2DFile(A,X,Y,filename);
    }


    MPI_Finalize();

    return 0;
}
Esempio n. 9
0
int main (int argc, char * argv[]) {
    int rank,size;
    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&size);
    MPI_Comm_rank(MPI_COMM_WORLD,&rank);

    int X,Y,x,y,X_ext, i;
    double ** A, ** localA;
    X=atoi(argv[1]);
    Y=X;

    //Extend dimension X with ghost cells if X%size!=0
    if (X%size!=0)
        X_ext=X+size-X%size;
    else
        X_ext=X;

    if (rank==0) {
        //Allocate and init matrix A
        A=malloc2D(X_ext,Y);
        init2D(A,X,Y);
    }
      
    //Local dimensions x,y
    x=X_ext/size;
    y=Y;
    

    //Allocate local matrix and scatter global matrix
    localA=malloc2D(x,y);
    double * idx;
    if (rank==0) 
        idx=&A[0][0];
    MPI_Scatter(idx,x*y,MPI_DOUBLE,&localA[0][0],x*y,MPI_DOUBLE,0,MPI_COMM_WORLD);
 
   if (rank==0) {
        free2D(A,X_ext,Y);
    }

    //Timers   
    struct timeval ts,tf,comps,compf,comms,commf;
    double total_time,computation_time,communication_time;

    MPI_Barrier(MPI_COMM_WORLD);
    gettimeofday(&ts,NULL);        

    /******************************************************************************
     The matrix A is distributed in contiguous blocks to the local matrices localA
     You have to use point-to-point communication routines    
     Don't forget to set the timers for computation and communication!
    ******************************************************************************/
    
    int line_index, line_owner;
    int k, start;
    MPI_Status status;
    double *k_row, *temp;
    
    temp = malloc(y * sizeof(*k_row));
    
    for (k=0; k<y-1; k++){
	start = 0;
	line_owner = k / x;
	line_index = k % x;
	
	if (rank == line_owner){
	    start = line_index+1;
	    k_row = localA[line_index];
	}
	else
	    k_row = temp;
	
	/* set communication timer */
	gettimeofday(&comms, NULL);
	
	/* o line_owner stelnei se olous (ektos tou eautou tou) kai oi alloi
	 * kanoun receive th k_row */
	if (rank == line_owner){
	    for (i=0; i<size; i++)
		if (i != line_owner)
		    MPI_Send( k_row, y, MPI_DOUBLE, i, line_owner, MPI_COMM_WORLD);
	}
	else
	    MPI_Recv(k_row, y, MPI_DOUBLE, line_owner, line_owner, MPI_COMM_WORLD, &status);

	
	/* stop communication timer */
	gettimeofday(&commf, NULL);
	communication_time += commf.tv_sec - comms.tv_sec + (commf.tv_usec - comms.tv_usec)*0.000001;
	
	/* set computation timer */
	gettimeofday(&comps, NULL);
	
	/* Compute */
	go_to_work( localA, k_row, x, y, rank, line_owner, start, k );
	
	/* stop computation timer */
	gettimeofday(&compf, NULL);
	computation_time += compf.tv_sec - comps.tv_sec + (compf.tv_usec - comps.tv_usec)*0.000001;
    }

    gettimeofday(&tf,NULL);
    total_time=tf.tv_sec-ts.tv_sec+(tf.tv_usec-ts.tv_usec)*0.000001;


    //Gather local matrices back to the global matrix
    if (rank==0) {
        A=malloc2D(X_ext,Y);    
        idx=&A[0][0];
    }
    MPI_Gather(&localA[0][0],x*y,MPI_DOUBLE,idx,x*y,MPI_DOUBLE,0,MPI_COMM_WORLD);
    
    double avg_total,avg_comp,avg_comm,max_total,max_comp,max_comm;
    MPI_Reduce(&total_time,&max_total,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD);
    MPI_Reduce(&computation_time,&max_comp,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD);
    MPI_Reduce(&communication_time,&max_comm,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD);
    MPI_Reduce(&total_time,&avg_total,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
    MPI_Reduce(&computation_time,&avg_comp,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
    MPI_Reduce(&communication_time,&avg_comm,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);

    avg_total/=size;
    avg_comp/=size;
    avg_comm/=size;

    if (rank==0) {
        printf("LU-Block-p2p\tSize\t%d\tProcesses\t%d\n",X,size);
        printf("Max times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",max_total,max_comp,max_comm);
        printf("Avg times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",avg_total,avg_comp,avg_comm);
    }

    //Print triangular matrix U to file
    if (rank==0) {
        char * filename="output_block_p2p";
        print2DFile(A,X,Y,filename);
    }


    MPI_Finalize();

    return 0;
}