int main(int argc, char * argv[]) { int X=atoi(argv[1]); int Y=X; double ** A=malloc2D(X,Y); int i,j,k; double l; struct timeval ts,tf; double total_time; init2D(A,X,Y); gettimeofday(&ts,NULL); for (k=0;k<X-1;k++) for (i=k+1;i<X;i++) { l=A[i][k]/A[k][k]; for (j=k;j<Y;j++) A[i][j]-=l*A[k][j]; } gettimeofday(&tf,NULL); total_time=(tf.tv_sec-ts.tv_sec)+(tf.tv_usec-ts.tv_usec)*0.000001; printf("LU-Serial\t%d\t%.3lf\n",X,total_time); char * filename="output_serial"; print2DFile(A,X,Y,filename); return 0; }
int main(int argc, char **argv){ int x = atoi(argv[1]); int y = x; double ** A = malloc2D(x, y); init2D(A, x, y); print2DFile(A, x, y, argv[2]); free2D(A, x, y); return 0; }
static CartesianGrid * cartesian_grid_new (guint nx, guint ny) { CartesianGrid * grid; grid = g_malloc (sizeof (CartesianGrid)); grid->vertices = (GtsVertex ***) malloc2D (nx, ny, sizeof (GtsVertex *)); grid->nx = nx; grid->ny = ny; grid->xmin = G_MAXDOUBLE; grid->xmax = - G_MAXDOUBLE; grid->ymin = G_MAXDOUBLE; grid->ymax = - G_MAXDOUBLE; return grid; }
/*------------------------ main() ---------------------*/ int main() { char fileout[NCMAX], cline[NCMAX]; const char version[] = "06-apr-2013 (ejk)"; int ix, iy, nx, ny, ixmid, iymid, i, ismoth, npixels, ns, done, status, multiMode, NPARAM; float rmin, rmax, aimin, aimax, p2; float *param, pixr, pixi, **pixsq; double k2max, keV, wavlen, ax, by, rx, ry, rx2, ry2, pi, dx, dy, scale, scale2, pixel, Cs3, Cs5, df, sum, time, apert; double x; cfpix cpix; floatTIFF myFile; /* Echo version date etc. */ printf( "probe version dated %s\n", version ); printf("Copyright (C) 1998-2013 Earl J. Kirkland\n" ); printf( "This program is provided AS-IS with ABSOLUTELY NO WARRANTY\n " " under the GNU general public license\n\n" ); #ifdef MANY_ABERR printf( "calculate a focused probe wave function including multiple aberr.\n\n"); #else printf( "calculate a focused probe wave function\n\n"); #endif pi = 4.0 * atan( 1.0 ); /* memory to store parameters */ NPARAM = myFile.maxParam(); param = (float*) malloc1D( NPARAM, sizeof(float), "probe-param" ); for( i=0; i<NPARAM; i++) param[i] = 0.0F; /* ---- Get desired image size, parameters etc. ------------- */ printf("Name of file to get focused probe wave function:\n"); ns = scanf("%s", fileout ); printf("Desired size of output image in pixels Nx,Ny:\n"); ns = scanf("%d %d", &nx, &ny ); printf("Size of output image in Angstroms ax,by:\n"); ns = scanf("%lf %lf", &ax, &by ); printf("Probe parameters, V0(kv), Cs3(mm), Cs5(mm)," " df(Angstroms), apert(mrad):\n"); ns = scanf("%lg %lg %lg %lg %lg", &keV, &Cs3, &Cs5, &df, &apert ); param[pDEFOCUS] = (float) df; param[pCS] = (float) ( Cs3*1.0e7 ); param[pCS5] = (float) ( Cs5*1.0e7 ); printf("Type 1 for smooth aperture:\n"); ns = scanf("%d", &ismoth ); printf("Probe position x,y in Ang.:\n"); ns = scanf("%lf %lf", &dx, &dy ); #ifdef MANY_ABERR /* get higher order aberrations if necessary */ printf("type higher order aber. name (as C32a, etc.) followed\n" " by a value in mm. (END to end)\n"); done = multiMode = 0; do{ ns = scanf( "%20s", cline ); if( strstr( cline, "END" ) != NULL ) { done = 1; } else { ns = scanf( "%lg", &x ); /* printf("%s, %f\n", cline, x ); testing */ status = readCnm( cline, param, x ); if( status < 0 ) { printf( "unrecognized aberration, exit...\n"); exit( EXIT_SUCCESS ); } else multiMode = 1; } } while( !done ); #endif /* ------- Calculate misc constants ------------ */ time = cputim( ); rx = 1.0/ax; rx2 = rx * rx; ry = 1.0/by; ry2 = ry * ry; ixmid = nx/2; iymid = ny/2; wavlen = wavelength( keV ); printf("electron wavelength = %g Angstroms\n", wavlen); k2max = apert*0.001/wavlen; k2max = k2max * k2max; /* ------- allocate memory ------------ */ pixsq = (float**) malloc2D( nx, ny, sizeof(float), "pixsq" ); kx = (float*) malloc1D( nx, sizeof(float), "kx" ); kx2 = (float*) malloc1D( nx, sizeof(float), "kx2" ); xpos = (float*) malloc1D( nx, sizeof(float), "xpos" ); freqn( kx, kx2, xpos, nx, ax ); ky = (float*) malloc1D( ny, sizeof(float), "ky" ); ky2 = (float*) malloc1D( ny, sizeof(float), "ky2" ); ypos = (float*) malloc1D( ny, sizeof(float), "ypos" ); freqn( ky, ky2, ypos, ny, by ); cpix.resize( nx, ny ); cpix.init( 1 ); // only fast init and slow execution needed here /* --------- calculate probe wavefunction -------- */ pixel = ( rx2 + ry2 ); npixels = makeProbe( cpix, nx, ny, dx, dy, param, wavlen, k2max, pixel, multiMode, ismoth ); printf("there were %d pixels inside the aperture\n", npixels ); /* ----- copy back for output ----- */ sum = 0.0; myFile.resize( 2*nx, ny); myFile.setnpix( 2 ); for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) { myFile(ix,iy) = pixr = cpix.re(ix,iy); // real myFile(ix+nx,iy) = pixi = cpix.im(ix,iy); // imag pixsq[ix][iy] = p2 = pixr*pixr + pixi*pixi; sum += p2; } /* ----- Normalize probe intensity to unity ------------ */ scale = 1.0 / sum; scale = scale * ((double)nx) * ((double)ny); scale = (double) sqrt( scale ); scale2 = scale*scale; for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) { myFile(ix,iy) *= (float) scale; myFile(ix+nx,iy) *= (float) scale; pixsq[ix][iy] *= (float) scale2; } /*------- Output results and find min and max to echo --------------- */ rmin = myFile.min(0); // real part rmax = myFile.max(0); aimin = myFile.min(1); // imaginary aimax = myFile.max(1); param[pRMAX] = rmax; param[pIMAX] = aimax; param[pRMIN] = rmin; param[pIMIN] = aimin; param[pDEFOCUS]= (float) df; param[pDX]= (float) (ax / nx); param[pDY]= (float) (by / ny); param[pENERGY]= (float) keV; param[pWAVEL]= (float) ( sqrt(k2max) * wavlen); param[pCS]= (float) Cs3; param[pPPOSX]= (float) dx; param[pPPOSX]= (float) dy; for( i=0; i<NPARAM; i++) myFile.setParam( i, param[i] ); // not very efficient if( myFile.write( fileout, rmin, rmax, aimin, aimax, param[pDX], param[pDY] ) != 1 ) printf( "probe cannot write an output file.\n"); printf( "Pix range %15.7g to %15.7g real,\n" " and %15.7g to %15.7g imaginary\n", rmin, rmax, aimin, aimax ); /*------- calculate probe size ---------------*/ x = prbSize( pixsq, nx, ny, dx, dy, ax, by ); printf("probe size (FWHM-II) = %g Ang.\n", x); /*------- exit ---------------*/ time = cputim() - time; printf("\nCPU time = %f sec\n", time ); return EXIT_SUCCESS; } /* end main() */
int main (int argc, char * argv[]) { int rank,size; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&size); MPI_Comm_rank(MPI_COMM_WORLD,&rank); int X,Y,x,y,X_ext,i,j,k; double ** A, ** localA, l, *msg; X=atoi(argv[1]); Y=X; //Extend dimension X with ghost cells if X%size!=0 if (X%size!=0) X_ext=X+size-X%size; else X_ext=X; if (rank==0) { //Allocate and init matrix A A=malloc2D(X_ext,Y); init2D(A,X,Y); } //Local dimensions x,y x=X_ext/size; y=Y; //Allocate local matrix and scatter global matrix localA=malloc2D(x,y); double * idx; if (rank==0) idx=&A[0][0]; MPI_Scatter(idx,x*y,MPI_DOUBLE,&localA[0][0],x*y,MPI_DOUBLE,0,MPI_COMM_WORLD); if (rank==0) { free2D(A,X_ext,Y); } //Timers struct timeval ts,tf,comps,compf,comms,commf; double total_time=0,computation_time=0,communication_time=0; MPI_Barrier(MPI_COMM_WORLD); gettimeofday(&ts,NULL); /****************************************************************************** The matrix A is distributed in contiguous blocks to the local matrices localA You have to use point-to-point communication routines Don't forget to set the timers for computation and communication! ******************************************************************************/ //******************************************************************************** msg = malloc(y * sizeof(double)); int tag =55, dest, dif, srank; MPI_Status status; MPI_Request request; for(k = 0; k < X - 1; k++){ // if is owner_of_pivot_line(k) - x*rank <= k < x*(rank+1) if ( ( x*rank <= k ) && ( k < (x * (rank + 1)) ) ) { //pack_data(lA, send_buffer); memcpy(msg, localA[ k%x ], y * sizeof(double) ); //send_data_to_all for(dest=0;dest<size;dest++) { if ((dest==rank) || (dest<rank)) continue; gettimeofday(&comms,NULL); MPI_Send(msg,y,MPI_DOUBLE,dest,tag,MPI_COMM_WORLD); gettimeofday(&commf,NULL); communication_time+=commf.tv_sec-comms.tv_sec+(commf.tv_usec-comms.tv_usec)*0.000001; } } else { //receive_data_from_owner //unpack_data(receive_buffer, lA); srank = k / x; if ((rank<srank) || (rank==srank)) continue; gettimeofday(&comms,NULL); MPI_Recv(msg,y,MPI_DOUBLE,srank,tag,MPI_COMM_WORLD,&status); gettimeofday(&commf,NULL); communication_time+=commf.tv_sec-comms.tv_sec+(commf.tv_usec-comms.tv_usec)*0.000001; } //compute(k, lA); gettimeofday(&comps,NULL); if ( k < ( x * (rank + 1) - 1 ) ) { dif = ( x * (rank + 1) - 1 ) - k; if (dif > x) dif = x; for ( i = x - dif; i < x; i++ ) { l = localA[i][k] / msg[k]; for ( j=k; j<y; j++ ) localA[i][j] -= l * msg[j]; } } gettimeofday(&compf,NULL); computation_time+=compf.tv_sec-comps.tv_sec+(compf.tv_usec-comps.tv_usec)*0.000001; } free(msg); MPI_Barrier(MPI_COMM_WORLD); //******************************************************************************** gettimeofday(&tf,NULL); total_time=tf.tv_sec-ts.tv_sec+(tf.tv_usec-ts.tv_usec)*0.000001; //Gather local matrices back to the global matrix if (rank==0) { A=malloc2D(X_ext,Y); idx=&A[0][0]; } MPI_Gather(&localA[0][0],x*y,MPI_DOUBLE,idx,x*y,MPI_DOUBLE,0,MPI_COMM_WORLD); double avg_total,avg_comp,avg_comm,max_total,max_comp,max_comm; MPI_Reduce(&total_time,&max_total,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&computation_time,&max_comp,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&communication_time,&max_comm,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&total_time,&avg_total,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&computation_time,&avg_comp,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&communication_time,&avg_comm,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); avg_total/=size; avg_comp/=size; avg_comm/=size; if (rank==0) { printf("LU-Block-p2p\tSize\t%d\tProcesses\t%d\n",X,size); printf("Max times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",max_total,max_comp,max_comm); printf("Avg times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",avg_total,avg_comp,avg_comm); } //Print triangular matrix U to file if (rank==0) { char * filename="output_block_p2p"; print2DFile(A,X,Y,filename); } MPI_Finalize(); return 0; }
int main() { char filein[NCMAX], fileout[NCMAX], filestart[NCMAX], filebeam[NCMAX], description[NCMAX], filecross[NCMAX], cline[NCMAX]; const char version[] = "21-nov-2012 (ejk)"; int lstart=0, lpartl=0, lbeams=0, lwobble=0, lcross=0, nwobble=1; int ix, iy, iz, nx, ny, nz, nzout, ixmid, iymid, i, nslic0, islice, nacx,nacy, iqx, iqy, iwobble, ndf, idf, nbout, ib, ncellx, ncelly, ncellz, iycross, ns, NPARAM; int *hbeam, *kbeam; int natom, *Znum, *Znum2, istart, na, done, status, multiMode; long nbeams, nillum; long ltime; unsigned long iseed; long32 nxl, nyl; /* tiffsubs 32 bit integer type */ float *x, *y, *z, *occ, *wobble; float *x2, *y2, *z2, *occ2; float wmin, wmax, xmin,xmax, ymin, ymax, zmin, zmax; float *kx, *ky, *xpos, *ypos, *param, *sparam; float k2, k2max, scale, v0, mm0, wavlen, rx, ry, ax, by, cz, pi, rmin, rmax, aimin, aimax, rx2,ry2, ctiltx, ctilty, tctx, tcty, acmin, acmax, Cs3, Cs5, df, df0, sigmaf, dfdelt, aobj, qx, qy, qy2, q2, q2min, q2max, sumdf, pdf, k2maxo, temperature, ycross, dx, dy; float tr, ti, wr, wi; float **wave0r, **wave0i, **pix, **depthpix, *propxr, *propxi, *propyr, *propyi; cfpix wave; /* complex probe wave functions */ cfpix trans; /* complex transmission functions */ cfpix temp ; /* complex scratch wavefunction */ double sum, timer, xdf, chi0, chi1, chi2, chi3, t, zslice, deltaz, phirms, rsq, vz, alx, aly; FILE *fp1; floatTIFF myFile; /* echo version date and get input file name */ printf("autoslic(e) version dated %s\n", version); printf("Copyright (C) 1998-2012 Earl J. Kirkland\n" ); printf( "This program is provided AS-IS with ABSOLUTELY NO WARRANTY\n " " under the GNU general public license\n\n" ); printf("perform CTEM multislice with automatic slicing and FFTW\n"); #ifdef USE_OPENMP printf( "and multithreaded using openMP\n"); #endif printf( "\n" ); pi = (float) (4.0 * atan( 1.0 )); NPARAM = myFile.maxParam(); param = (float*) malloc1D( NPARAM, sizeof(float), "param" ); sparam = (float*) malloc1D( NPARAM, sizeof(float), "sparam" ); for( ix=0; ix<NPARAM; ix++ ) param[ix] = 0.0F; printf("Name of file with input atomic " "potential in x,y,z format:\n"); ns = scanf("%s", filein ); /* get simulation options */ printf("Replicate unit cell by NCELLX,NCELLY,NCELLZ :\n"); ns = scanf("%d %d %d", &ncellx, &ncelly, &ncellz); if( ncellx < 1 ) ncellx = 1; if( ncelly < 1 ) ncelly = 1; if( ncellz < 1 ) ncellz = 1; printf("Name of file to get binary output of multislice result:\n"); ns = scanf("%s", fileout ); lpartl = askYN("Do you want to include partial coherence"); if( lpartl == 1 ) { printf("Illumination angle min, max in mrad:\n"); ns = scanf("%f %f", &acmin, &acmax); acmin = acmin * 0.001F; acmax = acmax * 0.001F; printf("Spherical aberration Cs3, Cs5(in mm.):\n"); ns = scanf("%g %g", &Cs3, &Cs5); param[pCS] = (float) ( Cs3*1.0e7 ); param[pCS5] = (float) ( Cs5*1.0e7 ); printf("Defocus, mean, standard deviation, and" " sampling size (in Angstroms) =\n"); ns = scanf("%f %f %f", &df0, &sigmaf, &dfdelt); param[pDEFOCUS] = (float) df0; printf("Objective aperture (in mrad) =\n"); ns = scanf("%f", &aobj); aobj = aobj * 0.001F; #ifdef MANY_ABERR /* get higher order aberrations if necessary */ printf("type higher order aber. name (as C32a, etc.) followed\n" " by a value in mm. (END to end)\n"); done = multiMode = 0; do{ ns = scanf( "%20s", cline ); if( strstr( cline, "END" ) != NULL ) { done = 1; } else { ns = scanf( "%lg", &vz ); status = readCnm( cline, param, vz ); if( status < 0 ) { printf( "unrecognized aberration, exit...\n"); exit( EXIT_SUCCESS ); } else multiMode = 1; } } while( !done ); #endif lstart = 0; } else { printf("NOTE, the program image must also be run.\n"); lstart = askYN("Do you want to start from previous result"); } if ( lstart == 1 ) { printf("Name of file to start from:\n"); ns = scanf("%s", filestart); } else { printf("Incident beam energy in kev:\n"); ns = scanf("%g", &v0); printf("Wavefunction size in pixels, Nx,Ny:\n"); ns = scanf("%d %d", &nx, &ny ); } printf("Crystal tilt x,y in mrad.:\n"); ns = scanf("%f %f", &ctiltx, &ctilty); ctiltx = ctiltx /1000; ctilty = ctilty /1000; /* remember that the slice thickness must be > atom size to use projected atomic potential */ printf("Slice thickness (in Angstroms):\n"); ns = scanf("%lf", &deltaz ); if( deltaz < 1.0 ) { printf("WARNING: this slice thickness is probably too thin" " for autoslice to work properly.\n"); } if( lpartl == 0 ) { lbeams = askYN("Do you want to record the (real,imag) value\n" " of selected beams vs. thickness"); if( lbeams == 1 ) { printf("Name of file for beams info:\n"); ns = scanf("%s", filebeam ); printf("Number of beams:\n"); ns = scanf("%d", &nbout); if( nbout<1 ) nbout = 1; hbeam = (int*) malloc1D( nbout, sizeof(int), "hbeam" ); kbeam = (int*) malloc1D( nbout, sizeof(int), "kbeam" ); for( ib=0; ib<nbout; ib++) { printf("Beam %d, h,k=\n", ib+1); ns = scanf("%d %d", &hbeam[ib], &kbeam[ib] ); } } } lwobble = askYN("Do you want to include thermal vibrations"); if( lwobble == 1 ) { printf( "Type the temperature in degrees K:\n"); ns = scanf( "%g", &temperature ); printf( "Type number of configurations to average over:\n"); ns = scanf( "%d", &nwobble ); if( nwobble < 1 ) nwobble = 1; /* get random number seed from time if available otherwise ask for a seed */ ltime = (long) time( NULL ); iseed = (unsigned) ltime; if( ltime == -1 ) { printf("Type initial seed for random number generator:\n"); ns = scanf("%ld", &iseed); } else printf( "Random number seed initialized to %ld\n", iseed ); } else temperature = 0.0F; if( lpartl == 0 ) { lcross = askYN("Do you want to output intensity vs. depth cross section"); if( lcross == 1 ){ printf( "Type name of file to get depth profile image:\n"); ns = scanf("%s", filecross ); printf( "Type y position of depth cross section (in Ang.):\n"); ns = scanf("%f", &ycross ); } } /* start timing the actual computation just for fun */ timer = cputim(); #ifdef USE_OPENMP walltimer = walltim(); /* wall time for opneMP */ #endif /* get starting value of transmitted wavefunction if required (this can only be used in coherent mode) remember to save params for final output pix */ if ( lstart == 1 ) { if( myFile.read( filestart ) != 1 ) { printf("Cannot open input file: %s .\n", filestart ); exit( 0 ); } if( myFile.getnpix() != 2 ) { printf("Input file %s must be complex, can't continue.\n", filestart ); exit( 0 ); } nx = myFile.nx(); ny = myFile.ny(); wave0r = (float**) malloc2D( nx, ny, sizeof(float), "waver" ); nx = nx/2; wave0i = wave0r + nx; // save starting pix for later for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) { wave0r[ix][iy] = myFile(ix,iy); wave0i[ix][iy] = myFile(ix+nx,iy); } // save parameters to verify successive images are same size etc. for( i=0; i<NPARAM; i++) sparam[i] = myFile.getParam( i ); ax = sparam[pDX] * nx; by = sparam[pDY] * ny; v0 = sparam[pENERGY]; nslic0 = (int) sparam[pNSLICES]; printf("Starting pix range %g to %g real\n" " %g to %g imag\n", sparam[pRMIN], sparam[pRMAX], sparam[pIMIN], sparam[pIMAX] ); printf("Beam voltage = %g kV\n", v0); printf("Old crystal tilt x,y = %g, %g mrad\n", 1000.*sparam[pXCTILT], 1000.*sparam[pYCTILT]); } else nslic0 = 0; /* end if( lstart...) */ /* calculate relativistic factor and electron wavelength */ mm0 = 1.0F + v0/511.0F; wavlen = (float) wavelength( v0 ); printf("electron wavelength = %g Angstroms\n", wavlen); /* read in specimen coordinates and scattering factors */ natom = ReadXYZcoord( filein, ncellx, ncelly, ncellz, &ax, &by, &cz, &Znum, &x, &y, &z, &occ, &wobble, description, NCMAX ); printf("%d atomic coordinates read in\n", natom ); printf("%s", description ); printf("Size in pixels Nx, Ny= %d x %d = %d beams\n", nx,ny, nx*ny); printf("Lattice constant a,b = %12.4f, %12.4f\n", ax,by); /* calculate the total specimen volume and echo */ xmin = xmax = x[0]; ymin = ymax = y[0]; zmin = zmax = z[0]; wmin = wmax = wobble[0]; for( i=0; i<natom; i++) { if( x[i] < xmin ) xmin = x[i]; if( x[i] > xmax ) xmax = x[i]; if( y[i] < ymin ) ymin = y[i]; if( y[i] > ymax ) ymax = y[i]; if( z[i] < zmin ) zmin = z[i]; if( z[i] > zmax ) zmax = z[i]; if( wobble[i] < wmin ) wmin = wobble[i]; if( wobble[i] > wmax ) wmax = wobble[i]; } printf("Total specimen range is\n %g to %g in x\n" " %g to %g in y\n %g to %g in z\n", xmin, xmax, ymin, ymax, zmin, zmax ); if( lwobble == 1 ) printf("Range of thermal rms displacements (300K) = %g to %g\n", wmin, wmax ); #ifdef USE_OPENMP /* force LUT init. to avoid redundant init in parallel form */ rsq = 0.5; /* arbitrary position */ for( i=0; i<natom; i++) vz = vzatomLUT( Znum[i], rsq ); #endif /* calculate spatial frequencies and positions for future use */ rx = 1.0F/ax; rx2= rx*rx; ry = 1.0F/by; ry2= ry*ry; ixmid = nx/2; iymid = ny/2; nxl = nx; nyl = ny; kx = (float*) malloc1D( nx, sizeof(float), "kx" ); kx2 = (float*) malloc1D( nx, sizeof(float), "kx2" ); xpos = (float*) malloc1D( nx, sizeof(float), "xpos" ); freqn( kx, kx2, xpos, nx, ax ); ky = (float*) malloc1D( ny, sizeof(float), "ky" ); ky2 = (float*) malloc1D( ny, sizeof(float), "ky2" ); ypos = (float*) malloc1D( ny, sizeof(float), "ypos" ); freqn( ky, ky2, ypos, ny, by ); /* allocate some more arrays and initialize wavefunction */ trans.resize( nx, ny ); trans.init(); wave.resize( nx, ny ); wave.copyInit( trans ); if( lstart == 0 ) { for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) { wave.re(ix,iy) = 1.0F; /* real part */ wave.im(ix,iy) = 1.0F; /* imag part */ } } else { for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) { wave.re(ix,iy) = wave0r[ix][iy]; /* real part */ wave.im(ix,iy) = wave0i[ix][iy]; /* imag part */ } } if( lcross == 1 ) { /* nz may be too small with thermal vibrations so add a few extra */ nz = (int) ( (zmax-zmin)/ deltaz + 3.5); depthpix = (float**) malloc2D( nx, nz, sizeof(float), "depthpix" ); for( ix=0; ix<nx; ix++) for( iz=0; iz<nz; iz++) depthpix[ix][iz] = 0.0F; iycross = (int) ( 0.5 + (ny * ycross / by)); while( iycross < 0 ) iycross += ny; iycross = iycross%ny; /* make periodic in ny */ printf("save xz cross section at iy= %d pixels\n", iycross ); } /* calculate propagator function */ k2max = nx/(2.0F*ax); tctx = ny/(2.0F*by); if( tctx < k2max ) k2max = tctx; k2max = BW * k2max; printf("Bandwidth limited to a real space resolution of %f Angstroms\n", 1.0F/k2max); printf(" (= %.2f mrad) for symmetrical anti-aliasing.\n", wavlen*k2max*1000.0F); k2max = k2max*k2max; tctx = (float) (2.0 * tan(ctiltx)); tcty = (float) (2.0 * tan(ctilty)); propxr = (float*) malloc1D( nx, sizeof(float), "propxr" ); propxi = (float*) malloc1D( nx, sizeof(float), "propxi" ); propyr = (float*) malloc1D( ny, sizeof(float), "propyr" ); propyi = (float*) malloc1D( ny, sizeof(float), "propyi" ); scale = pi * ((float)deltaz); for( ix=0; ix<nx; ix++) { t = scale * ( kx2[ix]*wavlen - kx[ix]*tctx ); propxr[ix] = (float) cos(t); propxi[ix] = (float) -sin(t); } for( iy=0; iy<ny; iy++) { t = scale * ( ky2[iy]*wavlen - ky[iy]*tcty ); propyr[iy] = (float) cos(t); propyi[iy] = (float) -sin(t); } /* iterate the multislice algorithm proper NOTE: zero freg is in the bottom left corner and expands into all other corners - not in the center this is required for the FFT - don't waste time rearranging partial coherence method force the integrals to include the origin and to be symmetric about the origin and to have the same periodic boundary conditions as the sampling grid */ if( lpartl == 1 ) { printf("Illumination angle sampling (in mrad) = %f, %f\n\n", 1000.*rx*wavlen, 1000.*ry*wavlen); pix = (float**) malloc2D( nx, ny, sizeof(float), "pix" ); for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) pix[ix][iy] = 0.0F; temp.resize( nx, ny ); temp.copyInit( trans ); ndf = (int) ( ( 2.5F * sigmaf ) / dfdelt ); nacx = (int) ( ( acmax / ( wavlen * rx ) ) + 1.5F ); nacy = (int) ( ( acmax / ( wavlen * ry ) ) + 1.5F ); q2max = acmax / wavlen; q2max = q2max*q2max; q2min = acmin / wavlen; q2min = q2min*q2min; k2maxo = aobj / wavlen; k2maxo = k2maxo*k2maxo; chi1 = pi * wavlen; chi2 = 0.5 * Cs3 * wavlen *wavlen; chi3 = Cs5 * wavlen*wavlen*wavlen*wavlen /3.0; nillum = 0; /* for Monte Carlo stuff */ x2 = (float*) malloc1D( natom, sizeof(float), "x2" ); y2 = (float*) malloc1D( natom, sizeof(float), "y2" ); z2 = (float*) malloc1D( natom, sizeof(float), "z2" ); occ2 = (float*) malloc1D( natom, sizeof(float), "occ2" ); Znum2 = (int*) malloc1D( natom, sizeof(int), "Znum2" ); if( lwobble == 0 ) sortByZ( x, y, z, occ, Znum, natom ); /* integrate over the illumination angles */ for( iwobble=0; iwobble<nwobble; iwobble++) { if( lwobble == 1 ) printf("configuration # %d\n", iwobble+1 ); for( iqy= -nacy; iqy<=nacy; iqy++) { qy = iqy * ry; qy2 = qy * qy; for( iqx= -nacx; iqx<=nacx; iqx++) { qx = iqx * rx; q2 = qx*qx + qy2; if( (q2 <= q2max) && (q2 >= q2min) ) { nillum += 1; for( ix=0; ix<nx; ix++) { for( iy=0; iy<ny; iy++) { t = 2.0*pi*( qx*xpos[ix] + qy*ypos[iy] ); wave.re(ix,iy) = (float) cos(t); /* real */ wave.im(ix,iy) = (float) sin(t); /* imag */ } } /* add random thermal displacements scaled by temperature if requested remember that initial wobble is at 300K for each direction */ if( lwobble == 1 ){ scale = (float) sqrt(temperature/300.0) ; for( i=0; i<natom; i++) { x2[i] = x[i] + (float)(wobble[i]*rangauss(&iseed)*scale); y2[i] = y[i] + (float)(wobble[i]*rangauss(&iseed)*scale); z2[i] = z[i] + (float)(wobble[i]*rangauss(&iseed)*scale); occ2[i] = occ[i]; Znum2[i] = Znum[i]; } printf( "Sorting atoms by depth...\n"); sortByZ( x2, y2, z2, occ2, Znum2, natom ); zmin = z2[0]; /* reset zmin/max after wobble */ zmax = z2[natom-1]; printf("Thickness range with thermal displacements" " is %g to %g (in z)\n", zmin, zmax ); } else for( i=0; i<natom; i++) { x2[i] = x[i]; y2[i] = y[i]; z2[i] = z[i]; occ2[i] = occ[i]; Znum2[i] = Znum[i]; } zslice = 0.75*deltaz; /* start a little before top of unit cell */ istart = 0; while( istart < natom ) { /* find range of atoms for current slice */ na = 0; for(i=istart; i<natom; i++) if( z2[i] < zslice ) na++; else break; /* calculate transmission function, skip if layer empty */ if( na > 0 ) { trlayer( &x2[istart], &y2[istart], &occ2[istart], &Znum2[istart],na, ax, by, v0, trans, nxl, nyl, &phirms, &nbeams, k2max ); wave *= trans; // transmit } /* remember: prop needed here to get anti-aliasing right */ wave.fft(); propagate( wave, propxr, propxi, propyr, propyi, kx2, ky2, k2max, nx, ny ); wave.ifft(); zslice += deltaz; istart += na; } /* end while(zslice<=..) */ scale = 1.0F / ( ((float)nx) * ((float)ny) ); sum = 0.0; for( ix=0; ix<nx; ix++) { for( iy=0; iy<ny; iy++) sum += wave.re(ix,iy)*wave.re(ix,iy) + wave.im(ix,iy)*wave.im(ix,iy); } sum = sum * scale; printf("Illumination angle = %7.3f, %7.3f mrad", 1000.*qx*wavlen, 1000.*qy*wavlen); printf(", integrated intensity= %f\n", sum ); /*-------- integrate over +/- 2.5 sigma of defocus ------------ */ wave.fft(); sumdf = 0.0F; for( idf= -ndf; idf<=ndf; idf++) { param[pDEFOCUS] = df = df0 + idf*dfdelt; for( ix=0; ix<nx; ix++) { alx = wavlen * kx[ix]; /* x component of angle alpha */ for( iy=0; iy<ny; iy++) { aly = wavlen * ky[iy]; /* y component of angle alpha */ k2 = kx2[ix] + ky2[iy]; if( k2 <= k2maxo ) { chi0 = (2.0*pi/wavlen) * chi( param, alx, aly, multiMode ); tr = (float) cos(chi0); ti = (float) -sin(chi0); wr = wave.re(ix,iy); wi = wave.im(ix,iy); temp.re(ix,iy) = wr*tr - wi*ti; temp.im(ix,iy) = wr*ti + wi*tr; } else { temp.re(ix,iy) = 0.0F; /* real */ temp.im(ix,iy) = 0.0F; /* imag */ } } /* end for( iy=0... ) */ } /* end for( ix=0... ) */ temp.ifft(); xdf = (double) ( (df - df0) /sigmaf ); pdf = (float) exp( -0.5F * xdf*xdf ); sumdf += pdf; for( ix=0; ix<nx; ix++) { for( iy=0; iy<ny; iy++) { wr = temp.re(ix,iy); wi = temp.im(ix,iy); pix[ix][iy] += pdf* ( wr*wr + wi*wi ); } } }/* end for(idf..) */ }/* end if( q2...) */ } /* end for( iqx..) */ } /* end for( iqy..) */ } /* end for( iwobble...) */ printf("Total number of illumination angle = %ld\n", nillum); printf("Total number of defocus values = %d\n", 2*ndf+1); /* remember that nillum already includes nwobble so don't divide by nwobble! */ scale = 1.0F / ( ((float)nillum) * sumdf ); rmin = pix[0][0] * scale; rmax = rmin; aimin = 0.0F; aimax = 0.0F; for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) { pix[ix][iy] = pix[ix][iy] * scale; if( pix[ix][iy] < rmin ) rmin = pix[ix][iy]; if( pix[ix][iy] > rmax ) rmax = pix[ix][iy]; } /* ---- start coherent method below ---------------- (remember that waver,i[][] was initialize above) */ } else { if( lbeams ==1 ) { fp1 = fopen( filebeam, "w" ); if( NULL == fp1 ) { printf("can't open file %s\n", filebeam); exit(0); } fprintf( fp1, " (h,k) = " ); for(ib=0; ib<nbout; ib++) fprintf(fp1," (%d,%d)", hbeam[ib], kbeam[ib]); fprintf( fp1, "\n" ); fprintf( fp1, "nslice, (real,imag) (real,imag) ...\n\n"); for( ib=0; ib<nbout; ib++) { if( hbeam[ib] < 0 ) hbeam[ib] = nx + hbeam[ib]; if( kbeam[ib] < 0 ) kbeam[ib] = ny + kbeam[ib]; if( hbeam[ib] < 0 ) hbeam[ib] = 0; if( kbeam[ib] < 0 ) kbeam[ib] = 0; if( hbeam[ib] > nx-1 ) hbeam[ib] = nx-1; if( kbeam[ib] > ny-1 ) kbeam[ib] = ny-1; } } /* add random thermal displacements scaled by temperature if requested remember that initial wobble is at 300K for each direction */ if( lwobble == 1 ){ scale = (float) sqrt(temperature/300.0) ; for( i=0; i<natom; i++) { x[i] += (float) (wobble[i] * rangauss( &iseed ) * scale); y[i] += (float) (wobble[i] * rangauss( &iseed ) * scale); z[i] += (float) (wobble[i] * rangauss( &iseed ) * scale); } } printf( "Sorting atoms by depth...\n"); sortByZ( x, y, z, occ, Znum, natom ); if( lwobble == 1 ){ zmin = z[0]; /* reset zmin/max after wobble */ zmax = z[natom-1]; printf("Thickness range with thermal displacements" " is %g to %g (in z)\n", zmin, zmax ); } scale = 1.0F / ( ((float)nx) * ((float)ny) ); zslice = 0.75*deltaz; /* start a little before top of unit cell */ istart = 0; islice = 1; while( (istart < natom) && ( zslice < (zmax+deltaz) ) ) { /* find range of atoms for current slice */ na = 0; for(i=istart; i<natom; i++) if( z[i] < zslice ) na++; else break; /* calculate transmission function, skip if layer empty */ if( na > 0 ) { trlayer( &x[istart], &y[istart], &occ[istart], &Znum[istart], na, ax, by, v0, trans, nxl, nyl, &phirms, &nbeams, k2max ); /*??? printf("average atompot comparison = %g\n", phirms/(wavlen*mm0) ); */ wave *= trans; // transmit } /* bandwidth limit */ wave.fft(); if( lbeams== 1 ) { fprintf( fp1, "%5d", islice); for( ib=0; ib<nbout; ib++) fprintf(fp1, "%10.6f %10.6f", scale*wave.re(hbeam[ib],kbeam[ib]), /* real */ scale*wave.im(hbeam[ib],kbeam[ib]) ); /* imag */ fprintf( fp1, "\n"); } /* remember: prop needed here to get anti-aliasing right */ propagate( wave, propxr, propxi, propyr, propyi, kx2, ky2, k2max, nx, ny ); wave.ifft(); /* save depth cross section if requested */ if( (lcross == 1) && (islice<=nz) ) { for( ix=0; ix<nx; ix++) { depthpix[ix][islice-1] = wave.re(ix,iycross)*wave.re(ix,iycross) + wave.im(ix,iycross)*wave.im(ix,iycross); } nzout = islice; } sum = 0.0; for( ix=0; ix<nx; ix++) { for( iy=0; iy<ny; iy++) sum += wave.re(ix,iy)*wave.re(ix,iy) + wave.im(ix,iy)*wave.im(ix,iy); } sum = sum * scale; printf("z= %f A, %ld beams, %d coord., \n" " aver. phase= %f, total intensity = %f\n", zslice, nbeams, na, phirms, sum ); zslice += deltaz; istart += na; islice++; } /* end while(istart<natom..) */ rmin = wave.re(0,0); rmax = rmin; aimin = wave.im(0,0); aimax = aimin; for( ix=0; ix<nx; ix++) { for( iy=0; iy<ny; iy++) { wr = wave.re(ix,iy); wi = wave.im(ix,iy); if( wr < rmin ) rmin = wr; if( wr > rmax ) rmax = wr; if( wi < aimin ) aimin = wi; if( wi > aimax ) aimax = wi; } } } /* end else .. coherent section */ /* output results and find min and max to echo remember that complex pix are stored in the file in FORTRAN order for compatibility */ if( lstart == 1 ) for( ix=0; ix<NPARAM; ix++ ) param[ix] = sparam[ix]; param[pRMAX] = rmax; param[pIMAX] = aimax; param[pRMIN] = rmin; param[pIMIN] = aimin; param[pXCTILT] = ctiltx; param[pYCTILT] = ctilty; param[pENERGY] = v0; param[pDX] = dx = (float) ( ax/((float)nx) ); param[pDY] = dy = (float) ( by/((float)ny) ); param[pWAVEL] = wavlen; param[pNSLICES] = 0.0F; /* ??? */ if ( lpartl == 1 ) { param[pDEFOCUS] = df0; param[pOAPERT] = aobj; param[pCAPERT] = acmax; param[pDDF] = sigmaf; } for( ix=0; ix<NPARAM; ix++ ) myFile.setParam( ix, param[ix] ); if ( lpartl == 1 ) { myFile.resize( nx, ny ); myFile.setnpix( 1 ); for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) myFile(ix,iy) = pix[ix][iy]; i = myFile.write( fileout, rmin, rmax, aimin, aimax, dx, dy ); } else { myFile.resize( 2*nx, ny ); myFile.setnpix( 2 ); for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) { myFile(ix,iy) = wave.re(ix,iy); myFile(ix+nx,iy) = wave.im(ix,iy); } i = myFile.write( fileout, rmin, rmax, aimin, aimax, dx, dy ); } if( i != 1 ) printf( "autoslice cannot write TIF file %s\n", fileout ); printf( "pix range %g to %g real,\n" " %g to %g imag\n", rmin,rmax,aimin,aimax ); /* output depth cross section if requested */ if( lcross == 1 ){ rmin = depthpix[0][0]; rmax = rmin; for( ix=0; ix<nx; ix++) for( iz=0; iz<nzout; iz++) { wr = depthpix[ix][iz]; if( wr < rmin ) rmin = wr; if( wr > rmax ) rmax = wr; } myFile.setParam( pRMAX, rmax ); myFile.setParam( pIMAX, 0.0F ); myFile.setParam( pRMIN, rmin ); myFile.setParam( pIMIN, 0.0F ); myFile.setParam( pDY, dy = (float) ( deltaz ) ); myFile.resize( nx, nzout ); myFile.setnpix( 1 ); for( ix=0; ix<nx; ix++) for( iz=0; iz<nzout; iz++) { myFile(ix,iz) = depthpix[ix][iz]; } i = myFile.write( filecross, rmin, rmax, aimin, aimax, dx, dy ); if( i != 1 ) printf( "autoslice cannot write TIF file %s\n", filecross ); printf( "depth pix range %g to %g real,\n", rmin,rmax ); } printf("Total CPU time = %f sec.\n", cputim()-timer ); #ifdef USE_OPENMP printf("wall time = %g sec.\n", walltim() - walltimer); #endif return 0; } /* end main() */
int main() { char **filein, fileout[NCMAX]; char datetime[20]; int i, ipix, ix, iy, nx, ny, nxold, nyold, ixmid, iymid, npix, npixold, ninput, nsum, nh, logpix, ns, PowerSpectra, pixtype, NPARAM; long *nhist; float scale, pixc, rmin,rmin2,rmax, aimin,aimax,tr, ti, dx, dy; float *param; float **pixr, **pixi, **pixout; double sum, *hist, ax, by, rx, ry2; FILE *fp; floatTIFF myFile; /*-------- get input file names etc. ------------ */ printf( "sumpix version dated 6-nov-2012 ejk\n"); printf("Copyright (C) 1998-2012 Earl J. Kirkland\n" ); printf( "This program is provided AS-IS with ABSOLUTELY NO WARRANTY\n " " under the GNU general public license\n\n" ); printf( "Sum multiple image or wave function files,\n" "complex images will be converted to squared " "magnitude before summing.\n"); printf( "All input images must be the same type and size.\n\n" ); printf( "Type number of input image files\n"); ns = scanf( "%d", &ninput ); filein = (char**) malloc2D( ninput, NCMAX, sizeof(char), "filein" ); for( ipix=0; ipix<ninput; ipix++) { printf("input %d : ", ipix ); ns = scanf("%s", filein[ipix] ); } printf("\n"); printf("Type name of output file:\n"); ns = scanf( "%s", fileout ); logpix = askYN( "Do you want to display on log scale"); PowerSpectra = askYN( "Do you want to convert to a power spectra"); /* get image size and type from the first input pix all successive images have to be the same type and size !!! -remember that floatTIFF cannot handle plain integer TIFF images -------- read floating point images and average -------- remember that complex images are stacked side by side with npix=2 and nx twice its real value (real images have npix=1 and nx its normal value) */ NPARAM = myFile.maxParam(); param = (float*) malloc1D( NPARAM, sizeof(float), "param" ); for( ipix=0; ipix<ninput; ipix++) { for( ix=0; ix<NPARAM; ix++) param[ix] = 0.0F; if( myFile.read( filein[ipix] ) != 1 ) { printf("Cannot open file %s\n", filein[ipix] ); exit( 0 ); } myFile.getDateTime( datetime ); nx = (int) myFile.nx(); ny = (int) myFile.ny(); npix = myFile.getnpix(); if( 0 == ipix ) { npixold = npix; nxold = nx; nyold = ny; pixr = (float**) malloc2D( 2*nx, ny, sizeof(int), "pixr-1" ); //npix ???? pixout = (float**) malloc2D( nx, ny, sizeof(int), "pixout-1" ); for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) pixout[ix][iy] = 0.0F; pixtype = floatPIX; printf( "Image size : Nx= %d, Ny= %d\n", nx, ny ); } else if( (nx != nxold) || (ny != nyold) ) { printf( "different size in file %s, " " nx= %d, ny= %d\n", filein[ipix], nx, ny ); exit( 0 ); } if( npix != npixold ) { printf( "Can't mix real and complex images" " in file: %s\n", filein[ipix] ); exit( 0 ); } if( (npix<1) || (npix>2) ) { printf( "bad npix = %d in TIFF file %s\n", npix, filein[ipix] ); exit( 0 ); } // copy both real+imag back to old style array to re-use old code // (not optimal but works for now) for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) pixr[ix][iy] = myFile(ix,iy); nx = nx /npix; ax = myFile.getParam(pDX) * ((float)nx); by = myFile.getParam(pDY) * ((float)ny); rmin = myFile.getParam(pRMIN); rmax = myFile.getParam(pRMAX); aimin = myFile.getParam(pIMIN); aimax = myFile.getParam(pIMAX); if( npix == 2 ) { printf( "pix %d created %s, range: %g to %g (real)," "\n and %g to %g (imag)\n", ipix, datetime, rmin, rmax, aimin, aimax); } else if( npix == 1 ) { printf( "pix %d created %s, range: %g to %g (real)\n", ipix, datetime, rmin, rmax ); } if( PowerSpectra == 1 ) { if( npix == 1 ) { if( 0 == ipix ) pixi = (float**) malloc2D( nx, ny, sizeof(float), "pixi-2" ); for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) pixi[ix][iy] = 0.0F; } else if( (npix==2) && (ipix==0) ) pixi = pixr + nx; npix = 2; fft2d ( pixr, pixi, nx, ny, +1); } if( npix == 1 ) { /* real pix */ for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) pixout[ix][iy] += pixr[ix][iy]; } else if( npix == 2 ) { /* complex pix */ if( 0 == ipix ) pixi = pixr + nx; for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) { tr = pixr[ix][iy]; ti = pixi[ix][iy]; pixout[ix][iy] += ( tr*tr + ti*ti); } } } // end for(ipix=... ) /* Output results and find min and max to echo NOTE the logarithmic scaling of diffraction pattern is taken from Gonzalez and Wintz pg 48 added scaling trick from showpix.f 9-aug-1995 ejk */ printf("Output pix size : Nx= %d, Ny= %d\n", nx, ny ); if( (PowerSpectra == 1) && ( pixtype == floatPIX ) ) { /* put (0,0) in the center */ invert2D( pixout, nx, ny); /* histogram the azimutal average */ hist = (double*) malloc1D( (nx+ny), sizeof(double), "hist" ); nhist = (long*) malloc1D( (nx+ny), sizeof(long), "nhist" ); for( ix=0; ix<(nx+ny); ix++) { hist[ix] = 0.0; nhist[ix] = 0; } scale = 1.0F / ( ((float)nx) * ((float)ny) ); sum = 0.0; nsum = 0; nh = 0; ixmid = nx/2; iymid = ny/2; for( iy=0; iy<ny; iy++) { ry2 = (double) ( iy-iymid); ry2 = ry2*(ax/by); ry2 = ry2*ry2; for( ix=0; ix<nx; ix++) { pixc = pixout[ix][iy]; rx = (double) (ix-ixmid); i = (int) ( sqrt( rx*rx + ry2 ) + 0.5); hist[i] += pixc; nhist[i]++; if( i > nh ) nh = i; if( logpix == 1 ) { if( pixc > 1.e-10F) pixc = (float) log( (double) fabs(pixc) ); else pixc = -23.0F; pixout[ix][iy] = pixc; } if( (ix == 0) && (iy == 0) ) { rmin = pixc; rmax = rmin; } else if( (ix != ixmid) && (iy != iymid) ) { if( pixc < rmin ) rmin = pixc; if( pixc > rmax ) rmax = pixc; } if( (ix>(3*nx)/8) && (ix<(5*nx)/8) && (iy>(3*ny)/8) && (iy<(5*ny)/8) ) { sum = sum + pixc; nsum += 1; } } /* end for ix... */ } /* end for iy... */ printf( "write azimuthal averaged intensity vs. \n" " spatial frequency k, into file azimuth.dat\n"); fp = fopen( "azimuth.dat", "w+" ); if( fp == NULL ) { printf("cannot open file azimuthal.dat\n"); exit( 0 ); } for( i=0; i<=nh; i++) { hist[i] = hist[i] / nhist[i]; fprintf( fp, "%16.8g %16.8g\n", ((double)i)/ax, hist[i] ); } fclose( fp ); myFile.resize( nx, ny ); // in case it was complex myFile.setnpix( 1 ); myFile.setParam( pRMAX, rmax); myFile.setParam( pIMAX, aimax = 0.0F); myFile.setParam( pIMIN, aimin = 0.0F); myFile.setParam( pRMIN, rmin ); myFile.setParam( pDX, dx = 1.0F / ((float)ax) ); myFile.setParam( pDY, dy = 1.0F / ((float)by) ); printf("output image size: %f to %f /Angstroms\n", nx*dx, ny*dy ); printf("Power Spectra range %f to %f\n", rmin, rmax ); for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) myFile(ix,iy) = pixout[ix][iy]; rmin2= (float) (0.05*rmin + 0.95*sum/nsum); // somtimes better //myFile.write( fileout, rmin, rmax, aimin, aimax, dx, dy ); myFile.write( fileout, rmin2, rmax, aimin, aimax, dx, dy ); } else if( (pixtype == floatPIX) && (PowerSpectra == 0) ) { for( iy=0; iy<ny; iy++) { for( ix=0; ix<nx; ix++) { pixc = pixout[ix][iy]; if( logpix == 1 ) { if( pixc > 1.e-30F) pixc = (float) log( (double) fabs(pixc) ); else pixc = -100.0F; pixout[ix][iy] = pixc; } if( (ix == 0) && (iy == 0) ) { rmin = pixc; rmax = rmin; } else { if( pixc < rmin ) rmin = pixc; if( pixc > rmax ) rmax = pixc; } } /* end for ix... */ } /* end for iy... */ myFile.resize( nx, ny ); // in case it was complex myFile.setnpix( 1 ); myFile.setParam( pRMAX, rmax); myFile.setParam( pIMAX, 0.0F); myFile.setParam( pIMIN, 0.0F); myFile.setParam( pRMIN, rmin); printf("Summed pix range %f to %f\n", rmin, rmax ); for( ix=0; ix<nx; ix++) for( iy=0; iy<ny; iy++) myFile(ix,iy) = pixout[ix][iy]; dx = myFile.getParam( pDX ); dy = myFile.getParam( pDY ); aimin = aimax = 0.0F; myFile.write( fileout, rmin, rmax, aimin, aimax, dx, dy ); } return EXIT_SUCCESS; } /* end main() */
int main (int argc, char * argv[]) { int rank,size; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&size); MPI_Comm_rank(MPI_COMM_WORLD,&rank); int X,Y,x,y,X_ext,i; double **A, **localA; X=atoi(argv[1]); Y=X; //Extend dimension X with ghost cells if X%size!=0 if (X%size!=0) X_ext=X+size-X%size; else X_ext=X; if (rank==0) { //Allocate and init matrix A A=malloc2D(X_ext,Y); init2D(A,X,Y); } //Local dimensions x,y x=X_ext/size; y=Y; //Allocate local matrix and scatter global matrix localA=malloc2D(x,y); double * idx; for (i=0;i<x;i++) { if (rank==0) idx=&A[i*size][0]; MPI_Scatter(idx,Y,MPI_DOUBLE,&localA[i][0],y,MPI_DOUBLE,0,MPI_COMM_WORLD); } if (rank==0) free2D(A,X_ext,Y); //Timers struct timeval ts,tf,comps,compf,comms,commf; double total_time,computation_time,communication_time; MPI_Barrier(MPI_COMM_WORLD); gettimeofday(&ts,NULL); /****************************************************************************** The matrix A is distributed in a round-robin fashion to the local matrices localA You have to use point-to-point communication routines. Don't forget the timers for computation and communication! ******************************************************************************/ int line_index, line_owner; int k, start; double *k_row, *temp; MPI_Status status; temp = malloc(y * sizeof(*temp)); // k_row = malloc(y * sizeof(*k_row)); /* omoia me to allo cyclic, vriskoume ton line_owner */ for (k=0; k<y-1; k++){ line_owner = k % size; line_index = k / size; if (rank <= line_owner) start = k / size + 1; else start = k / size; if (rank == line_owner) k_row = localA[line_index]; else k_row = temp; /* set communication timer */ gettimeofday(&comms, NULL); /* COMM */ // if (rank != line_owner){ // if (rank == 0) // MPI_Recv( k_row, y, MPI_DOUBLE, size-1, MPI_ANY_SOURCE, MPI_COMM_WORLD, &status); // else // MPI_Recv( k_row, y, MPI_DOUBLE, rank-1, MPI_ANY_SOURCE, MPI_COMM_WORLD, &status); // } // // /* autos pou einai prin ton line_owner den prepei na steilei */ // if (rank != line_owner -1){ // /* o teleutaios prepei na steilei ston prwto, ektos an o prwtos einai o line_owner */ // if (rank == size-1) { // if (line_owner != 0) // MPI_Send( k_row, y, MPI_DOUBLE, 0, rank, MPI_COMM_WORLD); // } // else // MPI_Send(k_row, y, MPI_DOUBLE, rank+1, rank, MPI_COMM_WORLD); // } /* o line_owner stelnei se olous (ektos tou eautou tou) kai oloi oi alloi kanoun * receive */ if (rank == line_owner){ for (i=0; i<size; i++) if (i != line_owner) MPI_Send( k_row, y, MPI_DOUBLE, i, line_owner, MPI_COMM_WORLD); } else MPI_Recv(k_row, y, MPI_DOUBLE, line_owner, line_owner, MPI_COMM_WORLD, &status); /* stop communication timer */ gettimeofday(&commf, NULL); communication_time += commf.tv_sec - comms.tv_sec + (commf.tv_usec - comms.tv_usec)*0.000001; /* set computation timer */ gettimeofday(&comps, NULL); /* Compute */ go_to_work( localA, k_row, x, y, rank, start, k ); /* stop computation timer */ gettimeofday(&compf, NULL); computation_time += compf.tv_sec - comps.tv_sec + (compf.tv_usec - comps.tv_usec)*0.000001; } gettimeofday(&tf,NULL); total_time=tf.tv_sec-ts.tv_sec+(tf.tv_usec-ts.tv_usec)*0.000001; //Gather local matrices back to the global matrix if (rank==0) A=malloc2D(X_ext,Y); for (i=0;i<x;i++) { if (rank==0) idx=&A[i*size][0]; MPI_Gather(&localA[i][0],y,MPI_DOUBLE,idx,Y,MPI_DOUBLE,0,MPI_COMM_WORLD); } double avg_total,avg_comp,avg_comm,max_total,max_comp,max_comm; MPI_Reduce(&total_time,&max_total,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&computation_time,&max_comp,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&communication_time,&max_comm,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&total_time,&avg_total,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&computation_time,&avg_comp,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&communication_time,&avg_comm,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); avg_total/=size; avg_comp/=size; avg_comm/=size; if (rank==0) { printf("LU-Cyclic-p2p\tSize\t%d\tProcesses\t%d\n",X,size); printf("Max times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",max_total,max_comp,max_comm); printf("Avg times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",avg_total,avg_comp,avg_comm); } //Print triangular matrix U to file if (rank==0) { char * filename="output_cyclic_p2p"; print2DFile(A,X,Y,filename); } MPI_Finalize(); return 0; }
int main (int argc, char * argv[]) { int rank,size; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&size); MPI_Comm_rank(MPI_COMM_WORLD,&rank); int X,Y,x,y,X_ext, i; double ** A, ** localA; X=atoi(argv[1]); Y=X; //Extend dimension X with ghost cells if X%size!=0 if (X%size!=0) X_ext=X+size-X%size; else X_ext=X; if (rank==0) { //Allocate and init matrix A A=malloc2D(X_ext,Y); init2D(A,X,Y); } //Local dimensions x,y x=X_ext/size; y=Y; //Allocate local matrix and scatter global matrix localA=malloc2D(x,y); double * idx; if (rank==0) idx=&A[0][0]; MPI_Scatter(idx,x*y,MPI_DOUBLE,&localA[0][0],x*y,MPI_DOUBLE,0,MPI_COMM_WORLD); if (rank==0) { free2D(A,X_ext,Y); } //Timers struct timeval ts,tf,comps,compf,comms,commf; double total_time,computation_time,communication_time; MPI_Barrier(MPI_COMM_WORLD); gettimeofday(&ts,NULL); /****************************************************************************** The matrix A is distributed in contiguous blocks to the local matrices localA You have to use point-to-point communication routines Don't forget to set the timers for computation and communication! ******************************************************************************/ int line_index, line_owner; int k, start; MPI_Status status; double *k_row, *temp; temp = malloc(y * sizeof(*k_row)); for (k=0; k<y-1; k++){ start = 0; line_owner = k / x; line_index = k % x; if (rank == line_owner){ start = line_index+1; k_row = localA[line_index]; } else k_row = temp; /* set communication timer */ gettimeofday(&comms, NULL); /* o line_owner stelnei se olous (ektos tou eautou tou) kai oi alloi * kanoun receive th k_row */ if (rank == line_owner){ for (i=0; i<size; i++) if (i != line_owner) MPI_Send( k_row, y, MPI_DOUBLE, i, line_owner, MPI_COMM_WORLD); } else MPI_Recv(k_row, y, MPI_DOUBLE, line_owner, line_owner, MPI_COMM_WORLD, &status); /* stop communication timer */ gettimeofday(&commf, NULL); communication_time += commf.tv_sec - comms.tv_sec + (commf.tv_usec - comms.tv_usec)*0.000001; /* set computation timer */ gettimeofday(&comps, NULL); /* Compute */ go_to_work( localA, k_row, x, y, rank, line_owner, start, k ); /* stop computation timer */ gettimeofday(&compf, NULL); computation_time += compf.tv_sec - comps.tv_sec + (compf.tv_usec - comps.tv_usec)*0.000001; } gettimeofday(&tf,NULL); total_time=tf.tv_sec-ts.tv_sec+(tf.tv_usec-ts.tv_usec)*0.000001; //Gather local matrices back to the global matrix if (rank==0) { A=malloc2D(X_ext,Y); idx=&A[0][0]; } MPI_Gather(&localA[0][0],x*y,MPI_DOUBLE,idx,x*y,MPI_DOUBLE,0,MPI_COMM_WORLD); double avg_total,avg_comp,avg_comm,max_total,max_comp,max_comm; MPI_Reduce(&total_time,&max_total,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&computation_time,&max_comp,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&communication_time,&max_comm,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&total_time,&avg_total,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&computation_time,&avg_comp,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&communication_time,&avg_comm,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); avg_total/=size; avg_comp/=size; avg_comm/=size; if (rank==0) { printf("LU-Block-p2p\tSize\t%d\tProcesses\t%d\n",X,size); printf("Max times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",max_total,max_comp,max_comm); printf("Avg times:\tTotal\t%lf\tComp\t%lf\tComm\t%lf\n",avg_total,avg_comp,avg_comm); } //Print triangular matrix U to file if (rank==0) { char * filename="output_block_p2p"; print2DFile(A,X,Y,filename); } MPI_Finalize(); return 0; }