int main(void) { double array[SIZE] = {20.0, 17.66, 8.2, 15.3, 22.22}; printf("The original array:\n"); show_array(array, SIZE); mult_array(array, SIZE, 2.5); printf("The array after calling mult_array():\n"); show_array(array, SIZE); return EXIT_SUCCESS; }
int main (void) { double dip[SIZE] = {20.0, 17.66, 8.2, 15.3, 22.22}; printf("The original dip array:\n"); show_array (dip, SIZE); mult_array (dip, SIZE, 2.5); printf("The dip array after calling mult_array():\n"); show_array(dip, SIZE); return 0; }
int main(int argc,char **argv) { #ifndef SINGLE_PREC double *A,*B,*p,*C; #else float *A,*B,*p,*C; #endif int i,j,k,x,y,z,nx,ny,nz,proc_id,nproc,dims[2],ndim,nu; int istart[3],isize[3],iend[3]; int fstart[3],fsize[3],fend[3]; int iproc,jproc,ng[3],kmax,iex,conf,m,n; long int Nglob,Ntot; double pi,twopi,sinyz; double *sinx,*siny,*sinz,factor; double rtime1,rtime2,gt[12],gt1[12],gt2[12],timers[12]; double tcomm,gtcomm[3]; double cdiff,ccdiff,ans,prec; FILE *fp; unsigned char op_f[]="fft", op_b[]="tff"; int memsize[3]; #ifndef SINGLE_PREC void print_all(double *,long int,int,long int),mult_array(double *,long int,double); #else void print_all(float *,long int,int,long int),mult_array(float *,long int,double); #endif MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&nproc); MPI_Comm_rank(MPI_COMM_WORLD,&proc_id); pi = atan(1.0)*4.0; twopi = 2.0*pi; for(i=0; i< 12; i++) { gt[i] = 0.0; gt1[i] = 0.0; gt2[i] = 1E10; } Cset_timers(); if(proc_id == 0) { if((fp=fopen("stdin", "r"))==NULL){ printf("Cannot open file. Setting to default nx=ny=nz=128, ndim=2, n=1.\n"); nx=ny=nz=128; n=1; } else { fscanf(fp,"%d %d %d %d %d\n",&nx,&ny,&nz,&ndim,&n); fclose(fp); } #ifndef SINGLE_PREC printf("Double precision\n (%d %d %d) grid\n %d proc. dimensions\n%d repetitions\n",nx,ny,nz,ndim,n); #else printf("Single precision\n (%d %d %d) grid\n %d proc. dimensions\n%d repetitions\n",nx,ny,nz,ndim,n); #endif } MPI_Bcast(&nx,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&ny,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&nz,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&n,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&ndim,1,MPI_INT,0,MPI_COMM_WORLD); if(ndim == 1) { dims[0] = 1; dims[1] = nproc; } else if(ndim == 2) { fp = fopen("dims","r"); if(fp != NULL) { if(proc_id == 0) printf("Reading proc. grid from file dims\n"); fscanf(fp,"%d %d\n",dims,dims+1); fclose(fp); if(dims[0]*dims[1] != nproc) dims[1] = nproc / dims[0]; } else { if(proc_id == 0) printf("Creating proc. grid with mpi_dims_create\n"); dims[0]=dims[1]=0; MPI_Dims_create(nproc,2,dims); if(dims[0] > dims[1]) { dims[0] = dims[1]; dims[1] = nproc/dims[0]; } } } if(proc_id == 0) printf("Using processor grid %d x %d\n",dims[0],dims[1]); /* Initialize P3DFFT */ Cp3dfft_setup(dims,nx,ny,nz,MPI_Comm_c2f(MPI_COMM_WORLD),nx,ny,nz,1,memsize); /* Get dimensions for input array - real numbers, X-pencil shape. Note that we are following the Fortran ordering, i.e. the dimension with stride-1 is X. */ /* printf("Calling get_dims 1\n"); */ conf = 1; Cp3dfft_get_dims(istart,iend,isize,conf); /* Get dimensions for output array - complex numbers, Z-pencil shape. Stride-1 dimension could be X or Z, depending on how the library was compiled (stride1 option) */ /* printf("Calling get_dims 2\n"); */ conf = 2; Cp3dfft_get_dims(fstart,fend,fsize,conf); /* printf("Allocating\n"); */ /* Allocate and Initialize */ #ifndef SINGLE_PREC A = (double *) malloc(sizeof(double) * isize[0]*isize[1]*isize[2]); B = (double *) malloc(sizeof(double) * fsize[0]*fsize[1]*fsize[2]*2); C = (double *) malloc(sizeof(double) * isize[0]*isize[1]*isize[2]); #else A = (float *) malloc(sizeof(float) * isize[0]*isize[1]*isize[2]); B = (float *) malloc(sizeof(float) * fsize[0]*fsize[1]*fsize[2]*2); C = (float *) malloc(sizeof(float) * isize[0]*isize[1]*isize[2]); #endif if(A == NULL) printf("%d: Error allocating array A (%d)\n",proc_id,isize[0]*isize[1]*isize[2]); if(B == NULL) printf("%d: Error allocating array B (%d)\n",proc_id,fsize[0]*fsize[1]*fsize[2]*2); if(C == NULL) printf("%d: Error allocating array C (%d)\n",proc_id,isize[0]*isize[1]*isize[2]); /* printf("Initializing\n"); */ sinx = malloc(sizeof(double)*nx); siny = malloc(sizeof(double)*ny); sinz = malloc(sizeof(double)*nz); for(z=0;z < isize[2];z++) sinz[z] = sin((z+istart[2]-1)*twopi/nz); for(y=0;y < isize[1];y++) siny[y] = sin((y+istart[1]-1)*twopi/ny); for(x=0;x < isize[0];x++) sinx[x] = sin((x+istart[0]-1)*twopi/nx); p = A; for(z=0;z < isize[2];z++) for(y=0;y < isize[1];y++) { sinyz = siny[y]*sinz[z]; for(x=0;x < isize[0];x++) *p++ = sinx[x]*sinyz; } Ntot = fsize[0]*fsize[1]; Ntot *= fsize[2]*2; Nglob = nx * ny; Nglob *= nz; factor = 1.0/Nglob; rtime1 = 0.0; for(m=0;m < n;m++) { if(proc_id == 0) printf("Iteration %d\n",m); MPI_Barrier(MPI_COMM_WORLD); rtime1 = rtime1 - MPI_Wtime(); /* compute forward Fourier transform on A, store results in B */ Cp3dfft_ftran_r2c(A,B,op_f); rtime1 = rtime1 + MPI_Wtime(); if(proc_id == 0) printf("Result of forward transform\n"); print_all(B,Ntot,proc_id,Nglob); /* normalize */ mult_array(B,Ntot,factor); /* Compute backward transform on B, store results in C */ MPI_Barrier(MPI_COMM_WORLD); rtime1 = rtime1 - MPI_Wtime(); Cp3dfft_btran_c2r(B,C,op_b); rtime1 = rtime1 + MPI_Wtime(); } /* free work space */ Cp3dfft_clean(); /* Check results */ cdiff = 0.0; p = C; for(z=0;z < isize[2];z++) for(y=0;y < isize[1];y++) { sinyz =siny[y]*sinz[z]; for(x=0;x < isize[0];x++) { ans = sinx[x]*sinyz; if(cdiff < fabs(*p - ans)) cdiff = fabs(*p - ans); p++; } } Cget_timers(timers); #ifndef SINGLE_PREC MPI_Reduce(&cdiff,&ccdiff,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); #else MPI_Reduce(&cdiff,&ccdiff,1,MPI_REAL,MPI_MAX,0,MPI_COMM_WORLD); #endif if(proc_id == 0) { #ifndef SINGLE_PREC prec = 1.0e-14; #else prec = 1.0e-5; #endif if(ccdiff > prec * Nglob*0.25) printf("Results are incorrect\n"); else printf("Results are correct\n"); printf("max diff =%g\n",ccdiff); } /* Gather timing statistics */ MPI_Reduce(&rtime1,&rtime2,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); for (i=0;i < 12;i++) { timers[i] = timers[i] / ((double) n); } MPI_Reduce(&timers,>,12,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&timers,>1,12,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); MPI_Reduce(&timers,>2,12,MPI_DOUBLE,MPI_MIN,0,MPI_COMM_WORLD); tcomm = (timers[1]+timers[2]+timers[3]+timers[4]); MPI_Reduce(&timers,>,12,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); for (i=0;i < 12;i++) { gt[i] = gt[i]/ ((double) nproc); } if(proc_id == 0) { printf("Time per loop=%lg\n",rtime2/((double) n)); for(i=0;i < 12;i++) { printf("timer[%d] (avg/max/min): %lE %lE %lE\n",i+1,gt[i],gt1[i],gt2[i]); } } MPI_Finalize(); }