int main(int argc, char **argv) { int status; char* env = NULL; env = getenv("GPAW_OFFLOAD"); if (env) { errno = 0; gpaw_offload_enabled = strtol(env, NULL, 10); if (errno) { fprintf(stderr, "Wrong value for for GPAW_OFFLOAD.\nShould be either 0 or 1, but was %s\n", env); } } fprintf(stderr, "GPAW info: GPAW_OFFLOAD=%d\n", gpaw_offload_enabled); #ifdef CRAYPAT PAT_region_begin(1, "C-Initializations"); #endif #ifndef GPAW_OMP MPI_Init(&argc, &argv); #else int granted; MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &granted); if(granted != MPI_THREAD_MULTIPLE) exit(1); #endif // GPAW_OMP // Get initial timing double t0 = MPI_Wtime(); #ifdef GPAW_PERFORMANCE_REPORT gpaw_perf_init(); #endif #ifdef GPAW_MPI_MAP int tag = 99; int myid, numprocs, i, procnamesize; char procname[MPI_MAX_PROCESSOR_NAME]; MPI_Comm_size(MPI_COMM_WORLD, &numprocs ); MPI_Comm_rank(MPI_COMM_WORLD, &myid ); MPI_Get_processor_name(procname, &procnamesize); if (myid > 0) { MPI_Send(&procnamesize, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); MPI_Send(procname, procnamesize, MPI_CHAR, 0, tag, MPI_COMM_WORLD); } else { printf("MPI_COMM_SIZE is %d \n", numprocs); printf("%s \n", procname); for (i = 1; i < numprocs; ++i) { MPI_Recv(&procnamesize, 1, MPI_INT, i, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Recv(procname, procnamesize, MPI_CHAR, i, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); printf("%s \n", procname); } } #endif // GPAW_MPI_MAP #ifdef GPAW_MPI_DEBUG // Default Errhandler is MPI_ERRORS_ARE_FATAL MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN); #endif // Progname seems to be needed in some circumstances to resolve // correct default sys.path Py_SetProgramName(argv[0]); Py_Initialize(); #pragma offload target(mic) if(gpaw_offload_enabled) { init_openmp(); } if (PyType_Ready(&MPIType) < 0) return -1; if (PyType_Ready(&LFCType) < 0) return -1; if (PyType_Ready(&LocalizedFunctionsType) < 0) return -1; if (PyType_Ready(&OperatorType) < 0) return -1; if (PyType_Ready(&SplineType) < 0) return -1; if (PyType_Ready(&TransformerType) < 0) return -1; if (PyType_Ready(&XCFunctionalType) < 0) return -1; if (PyType_Ready(&lxcXCFunctionalType) < 0) return -1; PyObject* m = Py_InitModule3("_gpaw", functions, "C-extension for GPAW\n\n...\n"); if (m == NULL) return -1; Py_INCREF(&MPIType); PyModule_AddObject(m, "Communicator", (PyObject *)&MPIType); // Add initial time to _gpaw object PyModule_AddObject(m, "time0", PyFloat_FromDouble(t0)); Py_INCREF(&LFCType); Py_INCREF(&LocalizedFunctionsType); Py_INCREF(&OperatorType); Py_INCREF(&SplineType); Py_INCREF(&TransformerType); Py_INCREF(&XCFunctionalType); Py_INCREF(&lxcXCFunctionalType); #ifdef GPAW_WITH_HDF5 init_gpaw_hdf5(); #endif import_array1(-1); MPI_Barrier(MPI_COMM_WORLD); #ifdef CRAYPAT PAT_region_end(1); PAT_region_begin(2, "all other"); #endif status = Py_Main(argc, argv); #ifdef CRAYPAT PAT_region_end(2); #endif #ifdef GPAW_PERFORMANCE_REPORT gpaw_perf_finalize(); #endif MPI_Finalize(); return status; }
main(int argc, char* argv[]){ time_t time1 = time(0), time2; //-------MPI initialzation------------- int numprocs, myid, namelen; char processor_name[MPI_MAX_PROCESSOR_NAME]; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Get_processor_name(processor_name, &namelen); fprintf(stderr, "Process %d running on %s\n", myid, processor_name); string numbers = "0123456789"; // !!!!! np <= 10 string myid_str(numbers, myid, 1); MPI_Status status; // define a new MPI data type for particles MPI_Datatype particletype; MPI_Type_contiguous(18, MPI_DOUBLE, &particletype); // !!! 14->18 changed MPI_Type_commit(&particletype); //-------- end MPI init---------------- // wait for gdb waitforgdb(myid); // read input file (e.g. patric.cfg): if(argv[1] == 0){ printf("No input file name !\n"); MPI_Abort(MPI_COMM_WORLD, 0); } input_from_file(argv[1], myid); double eps_x = rms_emittance_x0; // handy abbreviation double eps_y = rms_emittance_y0; // same // Synchronous particle: SynParticle SP; SP.Z = Z; SP.A = A; SP.gamma0 = 1.0 + (e_kin*1e6*qe)/(mp*clight*clight) ; SP.beta0 = sqrt((SP.gamma0*SP.gamma0-1.0)/(SP.gamma0*SP.gamma0)) ; SP.eta0 = 1.0/pow(gamma_t, 2)-1.0/pow(SP.gamma0, 2); //-------Init Lattice------- BeamLine lattice; double tunex, tuney; SectorMap CF(CF_advance_h/NCF, CF_advance_v/NCF, CF_R, CF_length/NCF, SP.gamma0); BeamLine CF_cell; if(madx_input_file == 1){ // read madx sectormap and twiss files cout << "madx sectormap" << endl; string data_dir_in = input; lattice.init(data_dir_in+"/mad/", circum, tunex, tuney); } else{ // init constant focusing (CF) sectormap and cell: cout << "constsnt focusing" << endl; for(int j=0; j<NCF; j++) CF_cell.add_map(CF); lattice.init(CF_cell); } // Other variables: double dx = 2.0*piperadius/(NX-1.0); // needed for Poisson solver and grids double dy = 2.0*piperadius/(NY-1.0); // needed for Poisson solver and grids double dz = circum/NZ; double ds = 0.4; // value needed here only for setting dxs, dys. double dxs = 4.0*(dx/ds)/(NX-1.0); // only for plotting xs, not for tracking double dys = 4.0*(dx/ds)/(NX-1.0); // only for plotting ys, not for tracking double charge = current*circum/(NPIC*SP.beta0*clight*qe); // macro-particle charge Q/e double zm = 0.5*circum*bunchfactor; // (initial) bunch length if(init_pic_z == 1 || init_pic_z == 3 || init_pic_z == 4 || init_pic_z == 6) zm = 1.5*0.5*circum*bunchfactor; // for parabolic bunch double zm1 = -zm*1.0; // left bunch boundary double zm2 = zm*1.0; // right bunch boundary if(init_pic_z==7) zm=0.25; double rmsToFull; // ratio of rms to full emittance for Bump; SP // open output file patric.dat: string data_dir = ausgabe; data_dir = data_dir + "/"; string outfile = data_dir + "patric.dat"; FILE *out = fopen(outfile.c_str(), "w"); // init random number generator: long d = -11*(myid+1); // was -1021 transverse distribution: each slice needs a different initialization ! long dl = -103; // was -103 longitudinal plane: same random set needed long dran = -101; // for BTF noise excitation: same random sets needed // set some global lattice parameters double cell_length = lattice.get_L(); int Nelements = lattice.get_size(); if(myid == 0){ cout << "Nelements:" << Nelements << endl; cout << "Cell length:" << cell_length << endl; } // define pointers to first/last element in beam line: const list<SectorMap>::iterator first_elem = lattice.get_first_element(); const list<SectorMap>::iterator last_elem = --lattice.get_end_element(); TwissP twiss0, twiss_TK; lattice.first_element(); twiss0 = last_elem->get_twiss(); twiss_TK = first_elem->get_twiss(); double Ds0 = 0.0; // Dispersion derivative if(madx_input_file == 0){ // machine tunes from lattice lattice.phase_advance(tunex, tuney); tunex = circum/cell_length*tunex/(2.0*PI); tuney = circum/cell_length*tuney/(2.0*PI); bumpI=0; if(myid == 0){ cout << "advancex: " << tunex*180.0/PI << endl; cout << "tunex0: " << tunex << endl; cout << "tuney0: " << tuney << endl; } } // Chromatic correction kick: Chrom Chrom0; // Octupole: Octupole Oct0(koct); // Amplitude detuning; works only for constant focusing; SA //if(madx_input_file == 0) //AmplitudeDetuning Amp0(tunex, tuney, dqx_detune/(1.0e-6*eps_x), dqy_detune/(1.0e-6*eps_y), circum/(2.0*PI), CF); //--------end lattice---------- // set matched RF voltage: int linrf = 0; if (cavity == 3) linrf = 1; double Ym = circum/(2.0*PI)*(1.0-cos(2.0*PI*zm/circum)); if (linrf == 1) Ym = circum/(2.0*PI)*0.5*pow(2.0*PI*zm/circum, 2); double velm = abs(SP.eta0)*SP.beta0*clight*sqrt(5.0)*momentum_spread*2.0*PI/(circum); double fsyn = 1.0/(2.0*PI)*velm*sqrt(circum/(2.0*PI))/sqrt(2.0*Ym); double V0rf = pow(2.0*PI*fsyn, 2)*pow(circum, 2)/(2.0*PI)*mp*SP.A*SP.gamma0/(qe*SP.Z*abs(SP.eta0)); // Init particle distribution: Pic Pics(&SP, charge, NPIC/numprocs, data_dir + "pics_" + myid_str + ".dat"); Pics.z1 = zm1+myid*(zm2-zm1)/numprocs; // left boundary in z for this slice Pics.z2 = Pics.z1+(zm2-zm1)/numprocs; // right boundary double slice_length = Pics.z2-Pics.z1; // slice length Pic NewPics(&SP, charge, NPIC/numprocs); NewPics.z1 = Pics.z1; NewPics.z2 = Pics.z2; // Init 1D longitudinal grids Grid1D rho_z_tmp(NZ, dz, -0.5*circum); Grid1D rho_z(NZ, dz, -0.5*circum, data_dir + "rho_z.dat"); Grid1D dipole_current_x_tmp(NZ, dz, -0.5*circum); Grid1D dipole_current_x(NZ, dz, -0.5*circum, data_dir + "dipole_x.dat"); Grid1D dipole_current_xs_tmp(NZ, dz, -0.5*circum); Grid1D dipole_current_xs(NZ, dz, -0.5*circum); Grid1D dipole_kick_x(NZ, dz, -0.5*circum, data_dir + "dipole_kick_x.dat"); Grid1D dipole_current_y_tmp(NZ, dz, -0.5*circum); Grid1D dipole_current_y(NZ, dz, -0.5*circum, data_dir + "dipole_y.dat"); // Init 2D transverse grids: Grid2D rho_xy(NX, NY, dx, dy, data_dir + "rho_xy.dat"); Grid2D rho_xy_tmp(NX, NY, dx, dy); Grid2D xxs(NX, NX, dx, dxs, data_dir + "xxs.dat"); Grid2D xxs_tmp(NX, NX, dx, dxs); Grid2D yys(NY, NY, dy, dys, data_dir + "yys.dat"); Grid2D yys_tmp(NY, NY, dy, dys); Grid2D xsys(NX, NY, dxs, dys, data_dir + "xsys.dat"); Grid2D xsys_tmp(NX, NY, dxs, dys); Grid2D zx(NZ, NX, dz, dx, data_dir + "zx.dat"); Grid2D zx_tmp(NZ, NX, dz, dx); Grid2D Ex(NX, NY, dx, dy, data_dir + "Ex.dat"); Grid2D Ey(NX, NY, dx, dy, data_dir + "Ey.dat"); // Init 3D sliced grids (for 3D space charge calculation) if( fmod((float)NZ_bunch, (float)numprocs) != 0.0 ){ cout << "NZ_bunch kein Vielfaches von numprocs" << endl; MPI_Abort(MPI_COMM_WORLD, 0); } Grid3D rho_xyz(NZ_bunch/numprocs, Pics.z1, Pics.z2, rho_xy); Grid3D Ey3(NZ_bunch/numprocs, Pics.z1, Pics.z2, rho_xy); Grid3D Ex3(NZ_bunch/numprocs, Pics.z1, Pics.z2, rho_xy); // Init 2D Greens function for poisson solver Greenfb gf1(rho_xy, image_x, image_y); // open boundary condition // for the beam radius cacluation; factor for rms equivalent switch(init_pic_xy){ case 0: // Waterbag rmsToFull = 6; break; case 1: // KV rmsToFull = 4; break; case 2: // Semi-Gauss rmsToFull = 4; // approximate break; case 3: // Gauss rmsToFull = 4; // approximate break; default: printf("Invalid option for transverse particle distribution. Aborting.\n"); MPI_Abort(MPI_COMM_WORLD, 0); } // injection bump initialize Bump lob(tunex); double a; // beam radius horizontal switch(bumpI){ case 0: cout << "no mti" << endl; max_inj = 1; amp0=0; break; case 1: // The bump height is defined by user given offcenter parameter. The injection angle is equal to the septum tilt angle (as done in SIS18). cout << "mti version SP" << endl; a = sqrt(twiss_TK.betx*eps_x*rmsToFull)*0.001+twiss_TK.Dx*momentum_spread; // half width of injected beam [m] with WB distribution, change to Main, SA offcenter_x=x_septum + d_septum + a; amp0=offcenter_x; amp=amp0; ampp0=inj_angle; delAmp=(amp0-2*a)/double(max_inj); //0.0041*3;// lob.BumpSp(&lattice,max_inj, myid, amp0, ampp0, delAmp); // local orbit bump for beam injection; SP break; case 2: amp=amp0; cout << "mti flexibility version" << endl; lob.BumpModi(&lattice,amp); break; case 3: amp=amp0; cout << "mti flexibility version exponential decrease" << "tau" << tau << endl; lob.BumpModi(&lattice,amp); break; case 4: amp=amp0; cout << "mti flexibility version sin decrease" << "tau" << tau << endl; lob.BumpModi(&lattice,amp); break; default: printf("Invalid option for bump injection. Aborting.\n"); MPI_Abort(MPI_COMM_WORLD, 0); } //if(myid == 0) //cout << "Expected single beamlett tune shifts: dQ_x=" //<< rp*SP.Z*current*circum / (rmsToFull*PI*clight*qe*SP.A*pow(SP.beta0*SP.gamma0, 3)*(eps_x+sqrt(eps_x*eps_y*tunex/tuney)))*1e6 //<< ", dQ_y=" //<< rp*SP.Z*current*circum / (rmsToFull*PI*clight*qe*SP.A*pow(SP.beta0*SP.gamma0, 3)*(eps_y+sqrt(eps_x*eps_y*tuney/tunex)))*1e6 //<< endl; // print IDL parameter file idl.dat: if(myid == 0){ //cout << "Vrf [kV]: " << V0rf*1.0e-3 << " fsyn [kHz]: " << fsyn*1.0e-3 << endl; print_IDL(data_dir, numprocs, cell_length, Nelements, tunex, tuney, lattice, cells, max_inj); } //----------------counters and other variables-------------------------- int Nexchange = 1; // exchange of particles between slices after every sector map. int Nprint = print_cell*Nelements; // output of particles every cell*print_cell //int Nibs = 1; // correct for IBS every Nibs steps double Ntot; // total number of particles: for screen output int counter = 0; // counts sector maps double s = 0.0; // path length double Nslice; // total number of slices double emitx; // emittance: for screen output double dtheta = 0.0; // btf dipole kick double pickup_h, pickup_v; // horizontal/vertical pickup signals double rms_advancex = 0.0, rms_advancey = 0.0; // rms phase advance: for output int inj_counter = 0; // number of injected beamletts; SP long N_inj = 0; // number of injected particles //---------parameters for exchange of particles between slices------- int destl; //!< ID of left neighbour slice (-1: no neighbour). int destr; //!< ID of right neighbour //---finite bunch: no exchange between ends--- if(bc_end == 0){ if(myid == 0){ destl =-1; destr = myid+1; }else if(myid == numprocs-1){ destl = myid-1; destr =-1; }else{ destl = myid-1; destr = myid+1; } } //---periodic (in z) boundary condition--- if(bc_end == 1){ if(myid == 0){ destl = numprocs-1; destr = myid+1; }else if(myid == numprocs-1){ destl = myid-1; destr = 0; } else{ destl = myid-1; destr = myid+1; } } //--------------------- end-parameters for particle exchange --------------- long *septLoss = new long; long *sl_slice = new long; double *momenta = new double[19]; double *momenta_tot = new double[19]; double tmp=0; long size_old; offcenter_y=0.0; inj_phase_y=0.0e-3; //-------------------------------------------------------------------------- //----------------------- start loop (do...while) -------------------------- //-------------------------------------------------------------------------- double z0; do{ // injection; SP if(!(counter%Nelements)) { // at beginning each turn... if(inj_counter < max_inj) { size_old=Pics.get_size(); // set longitudinal distribution: switch(init_pic_z){ case 0: // coasting + Elliptic Pics.parabolic_dc(bunchfactor, circum, momentum_spread, NPIC, &dl); break; case 1: // bunch + Elliptic (1.5 correction factor for bunching) Pics.parabolic(zm, 0, momentum_spread, NPIC, &dl); break; case 2: // coasting + Gauss Pics.coast_gauss(bunchfactor, circum, momentum_spread, NPIC, &dl); break; case 3: // bunch + Gauss Pics.bunch_gauss(zm, circum, momentum_spread, NPIC, &dl); break; case 4: // const. bunch dist. Pics.bunch_const(zm, circum, momentum_spread, NPIC, &dl,linrf); break; case 5: // air bag dist. Pics.barrier_air_bag(zm, momentum_spread, NPIC, &dl); break; case 6: // bunch air bag dist. Pics.bunch_air_bag(zm, circum, momentum_spread, NPIC, &dl); break; case 7: // 168 mirco bunches, injection z0=-circum/2.; int l; for (l=0; l<168; l++){ Pics.parabolic(zm, z0, momentum_spread, NPIC/168, &dl); z0+=1.286; } break; default: printf("Invalid option for longitudinal particle distribution. Aborting.\n"); MPI_Abort(MPI_COMM_WORLD, 0); } // set transverse distribution: switch(init_pic_xy){ case 0: // Waterbag rmsToFull = 6; Pics.waterbag_xy(1.e-6*eps_x, 1.0e-6*eps_y, twiss_TK.alpx, twiss_TK.alpy, pow(mismatch_x, 2)*twiss_TK.betx, pow(mismatch_y, 2)*twiss_TK.bety, twiss_TK.Dx, Ds0, offcenter_x, inj_angle, offcenter_y, inj_phase_y, size_old, &d); break; case 1: // KV rmsToFull = 4; Pics.KV_xy(1.e-6*eps_x, 1.0e-6*eps_y, twiss_TK.alpx, twiss_TK.alpy, pow(mismatch_x, 2)*twiss_TK.betx, pow(mismatch_y, 2)*twiss_TK.bety, twiss_TK.Dx, Ds0, offcenter_x, inj_angle, offcenter_y, inj_phase_y, size_old, &d); break; case 2: // Semi-Gauss rmsToFull = 4; // approximate Pics.SG(1.e-6*eps_x, 1.0e-6*eps_y, twiss_TK.alpx, twiss_TK.alpy, pow(mismatch_x, 2)*twiss_TK.betx, pow(mismatch_y, 2)*twiss_TK.bety, twiss_TK.Dx, Ds0, offcenter_x, inj_angle, offcenter_y, inj_phase_y, size_old, &d); break; case 3: // Gauss rmsToFull = 4; // approximate Pics.Gauss_xy(1.e-6*eps_x, 1.0e-6*eps_y, twiss_TK.alpx, twiss_TK.alpy, pow(mismatch_x, 2)*twiss_TK.betx, pow(mismatch_y, 2)*twiss_TK.bety, twiss_TK.Dx, Ds0, offcenter_x, inj_angle, offcenter_y, inj_phase_y, size_old, &d); break; default: printf("Invalid option for transverse particle distribution. Aborting.\n"); MPI_Abort(MPI_COMM_WORLD, 0); } if (bumpI!=0) { *sl_slice = NewPics.localLoss_x(x_septum, 100.); // loss on septum loss+=*sl_slice; MPI_Reduce(sl_slice, septLoss, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD); if(myid == 0) cout<<"The incoming beamlett number "<<inj_counter+1<< " lost "<<loss<< " macro particles on the septum.\n"; } N_inj += NPIC; inj_counter +=1; } // bump reduction if (amp > 0.001 ) { if(bumpI==1) { amp-=delAmp; ampp0-=delAmp*ampp0/amp0; lob.decrement(); } if (bumpI==2) { amp-=delAmp; lob.decrementModi(amp); } if (bumpI==3) { amp=amp0*exp(-tau*counter/Nelements); lob.decrementModi(amp); } if (bumpI==4) { amp=amp0*(1+sin(-tau*counter/Nelements)); lob.decrementModi(amp); } } } //------------ Start Output---------------------------------------- // store rms momenta every time step in patric.dat: if(counter%1 == 0){ Nslice = Pics.get_size(); // number of particles in this slice momenta[0] = Nslice*Pics.rms_emittance_x(); momenta[1] = Nslice*Pics.rms_emittance_y(); momenta[2] = Nslice*Pics.x_max(); momenta[3] = Nslice*Pics.y_max(); momenta[4] = Nslice*Pics.x_rms(); momenta[5] = Nslice*Pics.y_rms(); momenta[6] = Nslice*Pics.rms_momentum_spread(); momenta[7] = Nslice*Pics.xzn(2.0, zm); momenta[8] = Nslice*Pics.xzn(1.0, zm); momenta[9] = Nslice; momenta[10] = Nslice*rms_advancex; // rms phase advance in x momenta[11] = Nslice*rms_advancey; momenta[12] = Nslice*Pics.offset_x(); momenta[13] = Nslice*Pics.offset_y(); momenta[14] = Nslice*dtheta; // btf noise signal momenta[15] = Nslice*pickup_h; momenta[16] = Nslice*pickup_v; momenta[17] = Nslice*loss; momenta[18] = Nslice*N_inj; // mpi_reduce for summation of all 17 moments over all slices MPI_Reduce(momenta, momenta_tot, 19, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); Ntot = momenta_tot[9]; // total number of particles over all slices emitx = momenta_tot[0]/Ntot; // total rms emittance // stop when loss tolerance level is exceeded (1-Ntot/(max_inj*NPIC))*100. if(myid == 0 && Ntot/N_inj <= lossTol){ // test on numer of injected particles; SP cout<<"Loss tolerance exceeded within "<<counter/Nelements+1<<" turns ("<< Ntot<<" of "<<N_inj<<" macro particles left). Exiting.\n"; cout.flush(); MPI_Abort(MPI_COMM_WORLD, 0); } // cout<<counter<<' '<<lattice.get_element()->get_name()<<' '<<lattice.get_element()->get_K(1)<<endl; //tmp // write momenta if(myid == 0){ fprintf(out, "%g", s); for(int i=0; i<19; i++) if(i != 9){ fprintf(out, "%15g", momenta_tot[i]/Ntot);} else{ fprintf(out, "%15g", momenta_tot[i]);} fprintf(out, "\n"); fflush(out); } } //------output every Nprint*sectormap--------- if(counter%Nprint == 0){ if(myid == 0){ // to screen //printf("saving at s=%g (m) eps_t=%g dp/p=%g zm2=%g Ntotal=%g\n", s, 1.0e6*emitx, Pics.rms_momentum_spread(), zm2, Ntot); cout.flush(); // electric fields Ex.print(); Ey.print(); } // paricle coordinates to pic.dat: Pics.print(pic_subset); // collect densities for output only: Pics.gatherZ(charge*qe/dz, rho_z_tmp); Pics.gatherX(SP.beta0*clight*charge*qe/dz, dipole_current_x_tmp); Pics.gatherY(SP.beta0*clight*charge*qe/dz, dipole_current_y_tmp); Pics.gatherXY(charge*qe/circum, rho_xy_tmp); Pics.gatherXXs(charge*qe/circum, xxs_tmp); Pics.gatherYYs(charge*qe/circum, yys_tmp); Pics.gatherXsYs(charge*qe/circum, xsys_tmp); Pics.gatherZX(charge*qe/circum, zx_tmp); // summation over all slices: MPI_Allreduce(rho_z_tmp.get_grid(), rho_z.get_grid(), NZ, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(dipole_current_x_tmp.get_grid(), dipole_current_x.get_grid(), NZ, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(dipole_current_y_tmp.get_grid(), dipole_current_y.get_grid(), NZ, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(rho_xy_tmp.get_grid(), rho_xy.get_grid(), NX*NY, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(xxs_tmp.get_grid(), xxs.get_grid(), NX*NX, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(yys_tmp.get_grid(), yys.get_grid(), NY*NY, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(xsys_tmp.get_grid(), xsys.get_grid(), NX*NY, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(zx_tmp.get_grid(), zx.get_grid(), NZ*NX, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); // output to density files: if(myid == 0){ dipole_current_x.print(); dipole_kick_x.print(); dipole_current_y.print(); rho_z.print(); rho_xy.print(); xxs.print(); yys.print(); xsys.print(); zx.print(); } } //-----------------end output-------------------------------------------- // at beginning of a cell: calculate advance per (last) cell, // store old coordinates if(lattice.get_element() == first_elem){ rms_advancex = Pics.rms_phaseadvance_h(); // Pics.rms_wavelength_h(); rms_advancey = Pics.rms_phaseadvance_v(); // Pics.rms_wavelength_v(); if(footprint == 0) Pics.store_old_coordinates(); } if(lattice.get_element()->get_name() == "\"SEPTUM\""){ // losses at septum; SP loss += Pics.localLoss_x(-piperadius, coll_halfgap); } if(lattice.get_element()->get_name() == "\"ACCEPTANCE\""){ // losses at limiting acceptance; SA double tmp = lattice.get_element()->get_betx(); Pics.localLoss_x(-sqrt(180e-6*tmp), sqrt(180e-6*tmp)); } // Transport particles through sectormap, update slice position s: ds = lattice.get_element()->get_L(); s += ds; Pics.transport(lattice.get_element()->get_map(), piperadius); //-----exchange particles between slices------------------------ if(counter != 0 && counter%Nexchange == 0 && numprocs > 1){ int Npl; //!< Number of particles to be exchanged with left neighbour int Npr; //!< particles exchanged with right neighbour //! vector of particles to be exchanged vector<Particle> pl, pr; // send particle to neighbor slices: if(destl >= 0){ pl = Pics.get_particles_left(circum); Npl = pl.size(); MPI_Send(&Npl, 1, MPI_INT, destl, 1, MPI_COMM_WORLD); MPI_Send(&pl[0], Npl, particletype, destl, 1, MPI_COMM_WORLD); } if(destr >= 0){ pr = Pics.get_particles_right(circum); Npr = pr.size(); MPI_Send(&Npr, 1, MPI_INT, destr, 0, MPI_COMM_WORLD); MPI_Send(&pr[0], Npr, particletype, destr, 0, MPI_COMM_WORLD); } // receive from neighbour slices: Npl = 0; Npr = 0; vector<Particle> pl_in, pr_in; if( destl >= 0 ){ MPI_Recv(&Npl, 1, MPI_INT, destl, 0, MPI_COMM_WORLD, &status); pl_in = vector<Particle>(Npl); MPI_Recv(&pl_in[0], Npl, particletype, destl, 0, MPI_COMM_WORLD, &status); } if(destr >= 0){ MPI_Recv(&Npr, 1, MPI_INT, destr, 1, MPI_COMM_WORLD, &status); pr_in = vector<Particle>(Npr); MPI_Recv(&pr_in[0], Npr, particletype, destr, 1, MPI_COMM_WORLD, &status); } Pics.add_particles(pl_in); Pics.add_particles(pr_in); } //-----end exchange of particles------------- // periodic bc without exchange if(numprocs == 1) Pics.periodic_bc(circum); // update wave lengths //if( footprint == 1){ //Pics.update_wavelength_h(ds, 0.0); //Pics.update_wavelength_v(ds);} // nonlinear thin lens kick: if(octupole_kick == 1) Pics.kick(Oct0, lattice.get_element()->get_twiss(), ds); //if(ampdetun_kick == 1) // works only for constant focusing //Pics.kick(Amp0, lattice.get_element()->get_twiss()ds); // correct for chromaticity if(chroma == 1) Pics.kick(Chrom0,lattice.get_element()->get_twiss(), ds); // cavity kick every cell: if(cavity == 1 && counter%Nelements == 0.0) Pics.cavity_kick(V0rf*cell_length/circum, 1, circum/(2.0*PI)); if(cavity == 2 && counter%Nelements == 0.0) Pics.barrier_kick(zm1, zm2); if(cavity == 3 && counter%Nelements == 0.0) Pics.cavity_kick_linear(V0rf*cell_length/circum, 1, circum/(2.0*PI)); // Pickup signals Pics.gatherX(SP.beta0*clight*charge*qe/dz, dipole_current_x_tmp); Pics.gatherY(SP.beta0*clight*charge*qe/dz, dipole_current_y_tmp); MPI_Allreduce(dipole_current_x_tmp.get_grid(), dipole_current_x.get_grid(), NZ, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(dipole_current_y_tmp.get_grid(), dipole_current_y.get_grid(), NZ, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); pickup_h = Pics.pickup_signal(dipole_current_x, circum, s/(SP.beta0*clight))/current; pickup_v = Pics.pickup_signal(dipole_current_y, circum, s/(SP.beta0*clight))/current; //---------------impedance kicks----------------------- komplex dqc_t(dqcr, dqci); // for sliced == 0 if(imp_kick == 1){ if(sliced == 0) Pics.kick(ds/circum*InducedKick(Pics.offset_x(), ds, dqc_t, SP.beta0, tunex, circum), 0.0); else{ dipole_kick_x.reset(); if(Rs > 0.0 || leit > 0.0){ Pics.gatherXs(SP.beta0*clight*charge*qe/dz, dipole_current_xs_tmp); MPI_Allreduce(dipole_current_xs_tmp.get_grid(), dipole_current_xs.get_grid(), NZ, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); InducedWakeKick(dipole_kick_x, dipole_current_x, dipole_current_xs, tunex, 2.0*PI*SP.beta0*clight/circum, nres, Rs, Qs, piperadius, leit, SP.beta0, SP.gamma0*mp*SP.A*pow(clight, 2), SP.Z*qe); } if(Zimage != 0.0) InducedKick(dipole_kick_x, dipole_current_x, Zimage, SP.beta0, SP.gamma0*mp*SP.A*pow(clight, 2), SP.Z*qe); Pics.impedance_kick(dipole_kick_x, circum, ds); } } //---------------end impedance kicks----------------------- //------------self-consistent space charge kicks after every sectormap---- if(space_charge == 1){ // PIC -> charge density for Poisson solver: if (sliced == 0){ Pics.gatherXY(charge*qe/circum, rho_xy_tmp); MPI_Allreduce(rho_xy_tmp.get_grid(), rho_xy.get_grid(), NX*NY, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); }else{ Pics.gatherXYZ(charge*qe/rho_xyz.get_dz(), rho_xyz); // send and receive density ghost grids to neighbor slices: // what is exchanged here ??? if(destl >= 0) MPI_Send(rho_xyz.get_ghostl(), NX*NY, MPI_DOUBLE, destl, 2, MPI_COMM_WORLD); if(destr >= 0){ MPI_Recv(rho_xy_tmp.get_grid(), NX*NY, MPI_DOUBLE, destr, 2, MPI_COMM_WORLD, &status); rho_xyz[NZ_bunch/numprocs-1] += rho_xy_tmp; } if(destr >= 0) MPI_Send(rho_xyz.get_ghostr(), NX*NY, MPI_DOUBLE, destr, 3, MPI_COMM_WORLD); if(destl >= 0){ MPI_Recv(rho_xy_tmp.get_grid(), NX*NY, MPI_DOUBLE, destl, 3, MPI_COMM_WORLD, &status); rho_xyz[0]+= rho_xy_tmp; } } // Poisson solver if(sliced == 0) poisson_xy(Ex, Ey, rho_xy, gf1); else{ poisson_xyz(Ex3, Ey3, rho_xyz, gf1); // send and receive efield ghost grids to neighbor slices: if(destl >= 0){ MPI_Send(Ex3.get_ghostl(), NX*NY, MPI_DOUBLE, destl, 2, MPI_COMM_WORLD); MPI_Send(Ey3.get_ghostl(), NX*NY, MPI_DOUBLE, destl, 4, MPI_COMM_WORLD); } if(destr >= 0){ MPI_Recv(Ex3[NZ_bunch/numprocs-1].get_grid(), NX*NY, MPI_DOUBLE, destr, 2, MPI_COMM_WORLD, &status); MPI_Recv(Ey3[NZ_bunch/numprocs-1].get_grid(), NX*NY, MPI_DOUBLE, destr, 4, MPI_COMM_WORLD, &status); } if(destr >= 0){ MPI_Send(Ex3.get_ghostr(), NX*NY, MPI_DOUBLE, destr, 3, MPI_COMM_WORLD); MPI_Send(Ey3.get_ghostr(), NX*NY, MPI_DOUBLE, destr, 5, MPI_COMM_WORLD); } if(destl >= 0){ MPI_Recv(Ex3[0].get_grid(), NX*NY, MPI_DOUBLE, destl, 3, MPI_COMM_WORLD, &status); MPI_Recv(Ey3[0].get_grid(), NX*NY, MPI_DOUBLE, destl, 5, MPI_COMM_WORLD, &status); } } } // Shift xs and ys: if(space_charge == 1 && ds > 0.0){ if(sliced == 0) Pics.kick(Ex, Ey, ds); else Pics.kick(Ex3, Ey3, ds); } //---------------end self-consistent space charge kicks--------------- // linear sc kicks: if(space_charge == 2 && ds > 0.0) Pics.linear_SC_kick(dQxm, dQym, tunex, tuney, rho_z, current/(SP.beta0*clight), dipole_current_x, dipole_current_y, circum, ds); // nonlinear sc kicks: if(space_charge == 3 && ds > 0.0) Pics.nonlinear_SC_kick(sqrt(1.0e-6*twiss0.betx*eps_x), sqrt(1.0e-6*twiss0.bety*eps_y), dQxm, dQym, tunex, tuney, rho_z, current/(SP.beta0*clight), circum, ds); // dipole noise modulation kick: double dnoiseamp = 1.0e-6; double nus = fsyn/(SP.beta0*clight/circum); if(btf == 1) dtheta = Pics.dipole_mod_kick(s/(SP.beta0*clight), ds, circum, dnoiseamp, (tunex+nus)*SP.beta0*clight/circum, btf_harmonic); // correct for ibs: /*if(counter != 0 && counter%Nibs == 0){ double rate_ibs = 1.0e4; double Dz = rate_ibs*pow(Pics.rms_momentum_spread(), 2); double Dxy = rate_ibs*0.5*(Pics.rms_emittance_x()+Pics.rms_emittance_y()); double betx = lattice.get_element()->get_betx(); double bety = lattice.get_element()->get_bety(); Pics.langevin(rate_ibs, rate_ibs*0.0, Dxy, Dz*0.0, Nibs*ds, betx, bety, &d); }*/ // For bunch compression: Update slice boundaries z1 and z2 from // new bunch boundaries zm1, zm2: /*if(counter != 0 && counter%Nexchange == 0){ if(myid == 0) zm1 = Pics.z_min(); MPI_Bcast(&zm1, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); if(myid == numprocs-1) zm2 = Pics.z_max(); MPI_Bcast(&zm2, 1, MPI_DOUBLE, numprocs-1, MPI_COMM_WORLD); Pics.z1 = zm1+myid*(zm2-zm1)/numprocs; Pics.z2 = Pics.z1+(zm2-zm1)/numprocs; slice_length = Pics.z2-Pics.z1; rho_xyz.get_zleft() = zm1; rho_xyz.get_zright() = zm2; Ex3.get_zleft() = zm1; Ex3.get_zright() = zm2; Ey3.get_zleft() = zm1; Ey3.get_zright() = zm2; }*/ // advance in beam line, go to next element: lattice.next_element(); ++counter; }while(counter != cells*Nelements); //loop check, cells (turns) given by user SA //------------------end of loop------------------------------- // close files, free heap: delete septLoss, sl_slice; delete[] momenta, momenta_tot; // [] needed here!; SP fclose(out); // MPI end: MPI_Finalize(); time2 = time(0); double sec = difftime(time2, time1); double h = floor(sec/3600); double min = floor(sec/60-60.*h); sec -= 3600.*h+60.*min; if(myid == 0) {cout << "Total losses: " << (1-Ntot/(max_inj*NPIC))*100. << " \%\n" << "Stored particles: " << current*circum*Ntot/(qe*Z*SP.beta0*clight*NPIC) << endl << "Computation time: " << h << ":" << min << ":" << sec << endl; } }
int main(int argc, char *argv[]) { struct plat_opts_config_mpilogme config; SDF_boolean_t success = SDF_TRUE; uint32_t numprocs; int tmp, namelen, mpiv = 0, mpisubv = 0, i; char processor_name[MPI_MAX_PROCESSOR_NAME]; int msg_init_flags = SDF_MSG_MPI_INIT; config.inputarg = 0; config.msgtstnum = 500; /* We may not need to gather anything from here but what the heck */ loadProperties("/opt/schooner/config/schooner-med.properties"); // TODO get filename from command line /* make sure this is first in order to get the the mpi init args */ success = plat_opts_parse_mpilogme(&config, argc, argv) ? SDF_FALSE : SDF_TRUE; printf("input arg %d msgnum %d success %d\n", config.inputarg, config.msgtstnum, success); fflush(stdout); myid = sdf_msg_init_mpi(argc, argv, &numprocs, &success, msg_init_flags); if ((!success) || (myid < 0)) { printf("Node %d: MPI Init failure... exiting - errornum %d\n", myid, success); fflush(stdout); MPI_Finalize(); return (EXIT_FAILURE); } int debug = 0; while(debug); tmp = init_msgtest_sm((uint32_t)myid); /* Enable this process to run threads across 2 cpus, MPI will default to running all threads * on only one core which is not what we really want as it forces the msg thread to time slice * with the fth threads that send and receive messsages * first arg is the number of the processor you want to start off on and arg #2 is the sequential * number of processors from there */ lock_processor(0, 7); sleep(1); msg_init_flags = msg_init_flags | SDF_MSG_RTF_DISABLE_MNGMT; /* Startup SDF Messaging Engine FIXME - dual node mode still - pnodeid is passed and determined * from the number of processes mpirun sees. */ sdf_msg_init(myid, &pnodeid, msg_init_flags); MPI_Get_version(&mpiv, &mpisubv); MPI_Get_processor_name(processor_name, &namelen); printf("Node %d: MPI Version: %d.%d Name %s \n", myid, mpiv, mpisubv, processor_name); fflush(stdout); plat_log_msg( PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE, "\nNode %d: Completed Msg Init.. numprocs %d pnodeid %d Starting Test\n", myid, numprocs, pnodeid); for (i = 0; i < 2; i++) { sleep(2); plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE, "\nNode %d: Number of sleeps %d\n", myid, i); } fthInit(); sdf_msg_startmsg(myid, 0, NULL); /* SAVE THIS may need to play with the priority later */ #if 0 struct sched_param param; int newprio = 60; pthread_attr_t hi_prior_attr; pthread_attr_init(&hi_prior_attr); pthread_attr_setschedpolicy(&hi_prior_attr, SCHED_FIFO); pthread_attr_getschedparam(&hi_prior_attr, ¶m); param.sched_priority = newprio; pthread_attr_setschedparam(&hi_prior_attr, ¶m); pthread_create(&fthPthread, &hi_prior_attr, &fthPthreadRoutine, NULL); #endif pthread_attr_t attr; pthread_attr_init(&attr); pthread_create(&fthPthread, &attr, &MultiNodeMultiPtlMstosrPthreadRoutine, &numprocs); plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE, "\nNode %d: Created pthread for FTH %d\n", myid, i); pthread_join(fthPthread, NULL); plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE, "\nNode %d: SDF Messaging Test Complete - i %d\n", myid, i); /* Lets stop the messaging engine this will block until they complete */ /* FIXME arg is the threadlvl */ #if 0 if (numprocs > 1) { if (!myid) { for (int index = 1; index < numprocs; index ++) sdf_msg_nsync(myid, index); } else { sdf_msg_nsync(myid, 0); } } #endif sdf_msg_stopmsg(myid, SYS_SHUTDOWN_SELF); plat_shmem_detach(); if (myid == 0) { sched_yield(); printf("Node %d: Exiting message test after yielding... Calling MPI_Finalize\n", myid); fflush(stdout); sched_yield(); MPI_Finalize(); } else { printf("Node %d: Exiting message test... Calling MPI_Finalize\n", myid); fflush(stdout); sched_yield(); MPI_Finalize(); } printf("Successfully ends\n"); return (EXIT_SUCCESS); }
int OSPU_Comm_split_node(MPI_Comm oldcomm, MPI_Comm * newcomm) { int rc = MPI_SUCCESS; #if MPI_VERSION >= 3 rc = MPI_Comm_split_type(oldcomm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, newcomm); if (rc!=MPI_SUCCESS) return rc; #elif defined(MPICH2) && (MPICH2_NUMVERSION>10500000) rc = MPIX_Comm_split_type(oldcomm, MPIX_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, newcomm); if (rc!=MPI_SUCCESS) return rc; #else /* This code was authored by Jim Dinan */ char my_name[MPI_MAX_PROCESSOR_NAME]; MPI_Comm node_comm = MPI_COMM_NULL; MPI_Comm parent_comm; int len; /* Dup so we don't leak communicators */ rc = MPI_Comm_dup(oldcomm, &parent_comm); if (rc!=MPI_SUCCESS) return rc; rc = MPI_Get_processor_name(my_name, &len); if (rc!=MPI_SUCCESS) return rc; while (node_comm == MPI_COMM_NULL) { char root_name[MPI_MAX_PROCESSOR_NAME]; int rank; MPI_Comm old_parent; rc = MPI_Comm_rank(parent_comm, &rank); if (rc!=MPI_SUCCESS) return rc; if (rank == 0) { rc = MPI_Bcast(my_name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, 0, parent_comm); if (rc!=MPI_SUCCESS) return rc; strncpy(root_name, my_name, MPI_MAX_PROCESSOR_NAME); } else { rc = MPI_Bcast(root_name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, 0, parent_comm); if (rc!=MPI_SUCCESS) return rc; } old_parent = parent_comm; if (strncmp(my_name, root_name, MPI_MAX_PROCESSOR_NAME) == 0) { /* My group splits off, I'm done after this */ rc = MPI_Comm_split(parent_comm, 1, rank, &node_comm); if (rc!=MPI_SUCCESS) return rc; } else { /* My group keeps going, separate from the others */ rc = MPI_Comm_split(parent_comm, 0, rank, &parent_comm); if (rc!=MPI_SUCCESS) return rc; } /* Old parent is no longer needed */ rc = MPI_Comm_free(&old_parent); if (rc!=MPI_SUCCESS) return rc; } *newcomm = node_comm; #endif return rc = MPI_SUCCESS; }
int main (int argc, char *argv[]) { int rank, nprocs, ilen; char processor[MPI_MAX_PROCESSOR_NAME]; double tstart = 0.0, tend = 0.0; MPI_Status reqstat; MPI_Request send_request; MPI_Request recv_request; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Get_processor_name(processor, &ilen); if (nprocs != 2) { if(rank == 0) printf("This test requires exactly two processes\n"); MPI_Finalize(); exit(EXIT_FAILURE); } int other_proc = (rank == 1 ? 0 : 1); // Hard code GPU affinity since this example only works with 2 GPUs. int igpu = 0; // if(rank == 0 ) /* printf("%s allocates %d MB pinned memory with regual mpi and " "bidirectional bandwidth\n", argv[0], MAX_MSG_SIZE / 1024 / 1024); */ /*printf("node=%d(%s): my other _proc = %d and using GPU=%d\n", rank, processor, other_proc, igpu); */ char *h_src, *h_rcv; // h_src=(char *)malloc(MYBUFSIZE*100*sizeof(char)); // h_rcv=(char *)malloc(MYBUFSIZE*100*sizeof(char)); CHECK(cudaSetDevice(igpu)); CHECK(cudaMallocHost((void**)&h_src, MYBUFSIZE)); CHECK(cudaMallocHost((void**)&h_rcv, MYBUFSIZE)); char *d_src, *d_rcv; CHECK(cudaSetDevice(igpu)); CHECK(cudaMalloc((void **)&d_src, MYBUFSIZE)); CHECK(cudaMalloc((void **)&d_rcv, MYBUFSIZE)); initalData(h_src, h_rcv, MYBUFSIZE); CHECK(cudaMemcpy(d_src, h_src, MYBUFSIZE, cudaMemcpyDefault)); CHECK(cudaMemcpy(d_rcv, h_rcv, MYBUFSIZE, cudaMemcpyDefault)); // latency test for(int size = 1; size <= MAX_MSG_SIZE; size = size * 2) { MPI_Barrier(MPI_COMM_WORLD); if(rank == 0) { tstart = MPI_Wtime(); for(int i = 0; i < loop; i++) { /* * Transfer data from the GPU to the host to be transmitted to * the other MPI process. */ CHECK(cudaMemcpy(h_src, d_src, size, cudaMemcpyDeviceToHost)); // bi-directional transmission MPI_Isend(h_src, size, MPI_CHAR, other_proc, 100, MPI_COMM_WORLD, &send_request); MPI_Irecv(h_rcv, size, MPI_CHAR, other_proc, 10, MPI_COMM_WORLD, &recv_request); MPI_Waitall(1, &recv_request, &reqstat); MPI_Waitall(1, &send_request, &reqstat); /* * Transfer the data received from the other MPI process to * the device. */ CHECK(cudaMemcpy(d_rcv, h_rcv, size, cudaMemcpyHostToDevice)); } tend = MPI_Wtime(); } else { for(int i = 0; i < loop; i++) { /* * Transfer data from the GPU to the host to be transmitted to * the other MPI process. */ CHECK(cudaMemcpy(h_src, d_src, size, cudaMemcpyDeviceToHost)); // bi-directional transmission MPI_Isend(h_src, size, MPI_CHAR, other_proc, 10, MPI_COMM_WORLD, &send_request); MPI_Irecv(h_rcv, size, MPI_CHAR, other_proc, 100, MPI_COMM_WORLD, &recv_request); MPI_Waitall(1, &recv_request, &reqstat); MPI_Waitall(1, &send_request, &reqstat); /* * Transfer the data received from the other MPI process to * the device. */ CHECK(cudaMemcpy(d_rcv, h_rcv, size, cudaMemcpyHostToDevice)); } } MPI_Barrier(MPI_COMM_WORLD); if(rank == 0) { double latency = (tend - tstart) * 1e6 / (2.0 * loop); float performance = (float) size / (float) latency; printf("%6d , %10.2f , %10.2f \n", (size >= 1024 * 1024) ? size : size , latency, performance); fflush(stdout); } } CHECK(cudaFreeHost(h_src)); CHECK(cudaFreeHost(h_rcv)); CHECK(cudaSetDevice(igpu)); CHECK(cudaFree(d_src)); CHECK(cudaFree(d_rcv)); MPI_Finalize(); return EXIT_SUCCESS; }
/* ADIOI_cb_gather_name_array() - gather a list of processor names from all processes * in a communicator and store them on rank 0. * * This is a collective call on the communicator(s) passed in. * * Obtains a rank-ordered list of processor names from the processes in * "dupcomm". * * Returns 0 on success, -1 on failure. * * NOTE: Needs some work to cleanly handle out of memory cases! */ int ADIOI_cb_gather_name_array(MPI_Comm comm, MPI_Comm dupcomm, ADIO_cb_name_array *arrayp) { char my_procname[MPI_MAX_PROCESSOR_NAME], **procname = 0; int *procname_len = NULL, my_procname_len, *disp = NULL, i; int commsize, commrank, found; ADIO_cb_name_array array = NULL; int alloc_size; if (ADIOI_cb_config_list_keyval == MPI_KEYVAL_INVALID) { /* cleaned up by ADIOI_End_call */ MPI_Keyval_create((MPI_Copy_function *) ADIOI_cb_copy_name_array, (MPI_Delete_function *) ADIOI_cb_delete_name_array, &ADIOI_cb_config_list_keyval, NULL); } else { MPI_Attr_get(comm, ADIOI_cb_config_list_keyval, (void *) &array, &found); if (found) { ADIOI_Assert(array != NULL); *arrayp = array; return 0; } } MPI_Comm_size(dupcomm, &commsize); MPI_Comm_rank(dupcomm, &commrank); MPI_Get_processor_name(my_procname, &my_procname_len); /* allocate space for everything */ array = (ADIO_cb_name_array) ADIOI_Malloc(sizeof(*array)); if (array == NULL) { return -1; } array->refct = 2; /* we're going to associate this with two comms */ if (commrank == 0) { /* process 0 keeps the real list */ array->namect = commsize; array->names = (char **) ADIOI_Malloc(sizeof(char *) * commsize); if (array->names == NULL) { return -1; } procname = array->names; /* simpler to read */ procname_len = (int *) ADIOI_Malloc(commsize * sizeof(int)); if (procname_len == NULL) { return -1; } } else { /* everyone else just keeps an empty list as a placeholder */ array->namect = 0; array->names = NULL; } /* gather lengths first */ MPI_Gather(&my_procname_len, 1, MPI_INT, procname_len, 1, MPI_INT, 0, dupcomm); if (commrank == 0) { #ifdef CB_CONFIG_LIST_DEBUG for (i=0; i < commsize; i++) { FPRINTF(stderr, "len[%d] = %d\n", i, procname_len[i]); } #endif alloc_size = 0; for (i=0; i < commsize; i++) { /* add one to the lengths because we need to count the * terminator, and we are going to use this list of lengths * again in the gatherv. */ alloc_size += ++procname_len[i]; } procname[0] = ADIOI_Malloc(alloc_size); if (procname[0] == NULL) { return -1; } for (i=1; i < commsize; i++) { procname[i] = procname[i-1] + procname_len[i-1]; } /* create our list of displacements for the gatherv. we're going * to do everything relative to the start of the region allocated * for procname[0] */ disp = ADIOI_Malloc(commsize * sizeof(int)); disp[0] = 0; for (i=1; i < commsize; i++) { disp[i] = (int) (procname[i] - procname[0]); } } /* now gather strings */ if (commrank == 0) { MPI_Gatherv(my_procname, my_procname_len + 1, MPI_CHAR, procname[0], procname_len, disp, MPI_CHAR, 0, dupcomm); } else { /* if we didn't do this, we would need to allocate procname[] * on all processes...which seems a little silly. */ MPI_Gatherv(my_procname, my_procname_len + 1, MPI_CHAR, NULL, NULL, NULL, MPI_CHAR, 0, dupcomm); } if (commrank == 0) { /* no longer need the displacements or lengths */ ADIOI_Free(disp); ADIOI_Free(procname_len); #ifdef CB_CONFIG_LIST_DEBUG for (i=0; i < commsize; i++) { FPRINTF(stderr, "name[%d] = %s\n", i, procname[i]); } #endif } /* store the attribute; we want to store SOMETHING on all processes * so that they can all tell if we have gone through this procedure * or not for the given communicator. * * specifically we put it on both the original comm, so we can find * it next time an open is performed on this same comm, and on the * dupcomm, so we can use it in I/O operations. */ MPI_Attr_put(comm, ADIOI_cb_config_list_keyval, array); MPI_Attr_put(dupcomm, ADIOI_cb_config_list_keyval, array); *arrayp = array; return 0; }
int main (int argc, char **argv) { char pname[MPI_MAX_PROCESSOR_NAME]; int iter; int counter; int c; int tnum = 0; int resultlen; int ret; double value; extern char *optarg; while ((c = getopt (argc, argv, "p:")) != -1) { switch (c) { case 'p': if ((ret = GPTLevent_name_to_code (optarg, &counter)) != 0) { printf ("Failure from GPTLevent_name_to_code\n"); return 1; } if (GPTLsetoption (counter, 1) < 0) { printf ("Failure from GPTLsetoption (%s,1)\n", optarg); return 1; } break; default: printf ("unknown option %c\n", c); printf ("Usage: %s [-p option_name]\n", argv[0]); return 2; } } ret = GPTLsetoption (GPTLabort_on_error, 1); ret = GPTLsetoption (GPTLoverhead, 1); ret = GPTLsetoption (GPTLnarrowprint, 1); if (MPI_Init (&argc, &argv) != MPI_SUCCESS) { printf ("Failure from MPI_Init\n"); return 1; } ret = GPTLinitialize (); ret = GPTLstart ("total"); ret = MPI_Comm_rank (MPI_COMM_WORLD, &iam); ret = MPI_Comm_size (MPI_COMM_WORLD, &nproc); ret = MPI_Get_processor_name (pname, &resultlen); printf ("Rank %d is running on processor %s\n", iam, pname); #ifdef THREADED_OMP nthreads = omp_get_max_threads (); #pragma omp parallel for private (iter, ret, tnum) #endif for (iter = 1; iter <= nthreads; iter++) { #ifdef THREADED_OMP tnum = omp_get_thread_num (); #endif printf ("Thread %d of rank %d on processor %s\n", tnum, iam, pname); value = sub (iter); } ret = GPTLstop ("total"); ret = GPTLpr (iam); if (iam == 0) { printf ("summary: testing GPTLpr_summary...\n"); printf ("Number of threads was %d\n", nthreads); printf ("Number of tasks was %d\n", nproc); } // NOTE: if ENABLE_PMPI is set, 2nd pr call below will show some extra send/recv calls // due to MPI calls from within GPTLpr_summary_file if (GPTLpr_summary (MPI_COMM_WORLD) != 0) return 1; if (GPTLpr_summary_file (MPI_COMM_WORLD, "timing.summary.duplicate") != 0) return 1; ret = MPI_Finalize (); if (GPTLfinalize () != 0) return 1; return 0; }
int main(int argc, char *argv[]) { info = (struct test_info *)malloc(sizeof(struct test_info)); test_info_init(info); info->test_type = 0; info->msg_count=50; struct plat_opts_config_mpilogme config; SDF_boolean_t success = SDF_TRUE; uint32_t numprocs; int tmp, namelen, mpiv = 0, mpisubv = 0; char processor_name[MPI_MAX_PROCESSOR_NAME]; int msg_init_flags = SDF_MSG_MPI_INIT; config.inputarg = 0; config.msgtstnum = 50; /* We may not need to gather anything from here but what the heck */ loadProperties("/opt/schooner/config/schooner-med.properties"); // TODO get filename from command line /* make sure this is first in order to get the the mpi init args */ success = plat_opts_parse_mpilogme(&config, argc, argv) ? SDF_FALSE : SDF_TRUE; printf("input arg %d msgnum %d success %d\n", config.inputarg, config.msgtstnum, success); fflush(stdout); myid = sdf_msg_init_mpi(argc, argv, &numprocs, &success, msg_init_flags); info->myid = myid; if ((!success)||(myid < 0)) { printf("Node %d: MPI Init failure... exiting - errornum %d\n", myid, success); fflush(stdout); MPI_Finalize(); return (EXIT_FAILURE); } tmp = init_msgtest_sm((uint32_t)myid); /* Enable this process to run threads across 2 cpus, MPI will default to running all threads * on only one core which is not what we really want as it forces the msg thread to time slice * with the fth threads that send and receive messsages * first arg is the number of the processor you want to start off on and arg #2 is the sequential * number of processors from there */ lock_processor(0, 2); info->lock_cpu = 2; /* Startup SDF Messaging Engine FIXME - dual node mode still - pnodeid is passed and determined * from the number of processes mpirun sees. */ sleep(1); msg_init_flags = msg_init_flags | SDF_MSG_RTF_DISABLE_MNGMT; sdf_msg_init(myid, &pnodeid, msg_init_flags); MPI_Get_version(&mpiv, &mpisubv); MPI_Get_processor_name(processor_name, &namelen); printf("Node %d: MPI Version: %d.%d Name %s \n", myid, mpiv, mpisubv, processor_name); fflush(stdout); plat_log_msg( PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE, "\nNode %d: Completed Msg Init.. numprocs %d pnodeid %d Starting Test\n", myid, numprocs, pnodeid); info->pnodeid = pnodeid; for (msgCount = 0; msgCount < 2; msgCount++) { sleep(2); plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE, "\nNode %d: Number of sleeps %d\n", myid, msgCount); } /* create the fth test threads */ fthInit(); // Init pthread_attr_t attr; pthread_attr_init(&attr); pthread_create(&fthPthread, &attr, &SystemPthreadRoutine, &myid); plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE, "\nNode %d: Created pthread for System protocol\n", myid); info->pthread_info = 1; info->fth_info = 2; pthread_join(fthPthread, NULL); plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE, "\nNode %d: SDF Messaging Test Complete\n", myid); /* Lets stop the messaging engine this will block until they complete */ /* FIXME arg is the threadlvl */ sdf_msg_stopmsg(myid, SYS_SHUTDOWN_SELF); plat_shmem_detach(); info->success++; if (myid == 0) { sched_yield(); printf("Node %d: Exiting message test after yielding... Calling MPI_Finalize\n", myid); fflush(stdout); sched_yield(); MPI_Finalize(); print_test_info(info); test_info_final(info); } else { printf("Node %d: Exiting message test... Calling MPI_Finalize\n", myid); fflush(stdout); sched_yield(); MPI_Finalize(); } printf("Successfully ends\n"); return (EXIT_SUCCESS); }
int main(int argc,char* argv[]) { int numtasks, rank, rc; int micros=35; int minsec=42; const int buf_size = 60; /* Size of the buffer for timestamp */ /* initialize MPI and check for success*/ rc = MPI_Init(&argc,&argv); if (rc != MPI_SUCCESS) { printf ("Error starting MPI programm. Termianting.\n"); MPI_Abort(MPI_COMM_WORLD, rc); } /* get size of comm and rank in that comm */ MPI_Comm_size(MPI_COMM_WORLD,&numtasks ); MPI_Comm_rank(MPI_COMM_WORLD,&rank); /* Make sure we have at least 2 processes(need at least that much). */ if (numtasks < 2) { fprintf(stderr, "World size must be at least two for %s to run properly!\n", argv[0]); MPI_Abort(MPI_COMM_WORLD, 1); } /* get hostname */ char hostname[MPI_MAX_PROCESSOR_NAME]; int resultlength=0; MPI_Get_processor_name(hostname,&resultlength); /* get current time */ struct tm *Tm; struct timeval detail_time; time_t timer = time(NULL); Tm=localtime(&timer); gettimeofday(&detail_time,NULL); micros = detail_time.tv_usec; /* workernodes do */ if (rank != 0) { /* make formatted string from time */ char timestamp[buf_size]; snprintf(timestamp,buf_size,"%s(%d):%d %d %d, %d:%d:%d and %dns\n", //Tm->tm_wday, /* Mon - Sun */ hostname, rank, Tm->tm_mday, Tm->tm_mon+1, Tm->tm_year+1900, Tm->tm_hour, Tm->tm_min, Tm->tm_sec, (int) detail_time.tv_usec); /* /1000 for ms */ /* send timestamp to Master */ MPI_Send(timestamp, buf_size, MPI_CHAR, 0, 0, MPI_COMM_WORLD); }else if (rank == 0) { /* print recieved messages */ printf("The masternode recieved the following timestamps:\n"); char buf[buf_size]; for (int i = 1; i < numtasks; i++) { MPI_Recv(buf, buf_size, MPI_CHAR, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); fprintf(stdout, "%s", buf); } } /* collect the minimum from all processes */ MPI_Reduce(µs, &minsec, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); if (rank == 0) { printf("Minimum of all microsecond counts was:%dns\n",minsec); } MPI_Barrier(MPI_COMM_WORLD); fprintf(stdout,"Rang %d beendet jetzt!\n",rank); /* finalize the MPI environment */ MPI_Finalize(); return 0; }
int initParallelEnv(){ omp_set_num_threads(THREADS); /* Setup MPI programming environment */ MPI_Init_thread(NULL, NULL, MPI_THREAD_MULTIPLE, &threadSupport); comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &numMPIprocs); MPI_Comm_rank(comm, &myMPIRank); /*Find the number of bytes for an int */ sizeInteger = sizeof(int); /* Find the processor name of each MPI process */ MPI_Get_processor_name(myProcName, &procNameLen); /* Use processor name to create a communicator * across node boundaries. */ setupCommunicators(); /* setup OpenMP programming environment */ #pragma omp parallel shared(numThreads,globalIDarray,myMPIRank) { numThreads = omp_get_num_threads(); myThreadID = omp_get_thread_num(); /* Allocate space for globalIDarray */ #pragma omp single { globalIDarray = (int *)malloc(numThreads * sizeof(int)); } /*calculate the globalID for each thread */ globalIDarray[myThreadID] = (myMPIRank * numThreads) + myThreadID; } MPI_Barrier(comm); gaspi_config_t config; GASPI(config_get(&config)); config.qp_count = THREADS; GASPI(config_set(config)); /* GASPI setup */ GASPI(proc_init(GASPI_BLOCK)); gaspi_rank_t totalRanks; GASPI(proc_num(&totalRanks)); gaspi_rank_t rank; GASPI(proc_rank(&rank)); gaspi_number_t q_num; GASPI(queue_num(&q_num)); assert (q_num == THREADS); GASPI(barrier (GASPI_GROUP_ALL, GASPI_BLOCK)); // ok, we will continue to use the MPI ranks, just make sure GASPI and MPI ranks are identical // this is not guaranteed, so depending on the setup this may fail. assert (totalRanks == numMPIprocs); assert (rank == myMPIRank); /* set parallel info in benchmark report type */ setParallelInfo(numMPIprocs,threadSupport,numThreads); return 0; }
int main (int argc, char **argv) { int nprocs = -1; int rank = -1; MPI_Comm comm = MPI_COMM_WORLD; char processor_name[128]; int namelen = 128; int bbuf[(BUF_SIZE + MPI_BSEND_OVERHEAD) * 2 * NUM_BSEND_TYPES]; int buf[BUF_SIZE * 2 * NUM_SEND_TYPES]; int i, j, k, at_size, send_t_number, index, outcount, total, flag; int num_errors, error_count, indices[2 * NUM_SEND_TYPES]; MPI_Request aReq[2 * NUM_SEND_TYPES]; MPI_Status aStatus[2 * NUM_SEND_TYPES]; /* init */ MPI_Init (&argc, &argv); MPI_Comm_size (comm, &nprocs); MPI_Comm_rank (comm, &rank); MPI_Get_processor_name (processor_name, &namelen); printf ("(%d) is alive on %s\n", rank, processor_name); fflush (stdout); MPI_Buffer_attach (bbuf, sizeof(int) * (BUF_SIZE + MPI_BSEND_OVERHEAD) * 2 * NUM_BSEND_TYPES); if (rank == 0) { /* set up persistent sends... */ send_t_number = NUM_SEND_TYPES - NUM_PERSISTENT_SEND_TYPES; MPI_Send_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Send_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; MPI_Bsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Bsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; MPI_Rsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Rsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; MPI_Ssend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Ssend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); } for (k = 0; k < (NUM_COMPLETION_MECHANISMS * 2); k++) { if (rank == 0) { /* initialize all of the send buffers */ for (j = 0; j < NUM_SEND_TYPES; j++) { for (i = 0; i < BUF_SIZE; i++) { buf[2 * j * BUF_SIZE + i] = i; buf[((2 * j + 1) * BUF_SIZE) + i] = BUF_SIZE - 1 - i; } } } else if (rank == 1) { /* zero out all of the receive buffers */ bzero (buf, sizeof(int) * BUF_SIZE * 2 * NUM_SEND_TYPES); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { /* set up transient sends... */ send_t_number = 0; MPI_Isend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Isend (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; MPI_Ibsend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Ibsend (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; /* Barrier to ensure receives are posted for rsends... */ MPI_Barrier(MPI_COMM_WORLD); MPI_Irsend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Irsend (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); send_t_number++; MPI_Issend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2, comm, &aReq[send_t_number * 2]); MPI_Issend (&buf[(send_t_number * 2 + 1) * BUF_SIZE], BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, comm, &aReq[send_t_number * 2 + 1]); /* just to be paranoid */ send_t_number++; assert (send_t_number == NUM_SEND_TYPES - NUM_PERSISTENT_SEND_TYPES); /* start the persistent sends... */ if (k % 2) { MPI_Startall (NUM_PERSISTENT_SEND_TYPES * 2, &aReq[2 * send_t_number]); } else { for (j = 0; j < NUM_PERSISTENT_SEND_TYPES * 2; j++) { MPI_Start (&aReq[2 * send_t_number + j]); } } /* NOTE: Changing the send buffer of a Bsend is NOT an error... */ for (j = 0; j < NUM_SEND_TYPES; j++) { /* muck the buffers */ buf[j * 2 * BUF_SIZE + (BUF_SIZE >> 1)] = BUF_SIZE; } printf ("USER MSG: 6 change send buffer errors in iteration #%d:\n", k); /* complete the sends */ switch (k/2) { case 0: /* use MPI_Wait */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { MPI_Wait (&aReq[j], &aStatus[j]); } break; case 1: /* use MPI_Waitall */ MPI_Waitall (NUM_SEND_TYPES * 2, aReq, aStatus); break; case 2: /* use MPI_Waitany */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { MPI_Waitany (NUM_SEND_TYPES * 2, aReq, &index, aStatus); } break; case 3: /* use MPI_Waitsome */ total = 0; while (total < NUM_SEND_TYPES * 2) { MPI_Waitsome (NUM_SEND_TYPES * 2, aReq, &outcount, indices, aStatus); total += outcount; } break; case 4: /* use MPI_Test */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { flag = 0; while (!flag) { MPI_Test (&aReq[j], &flag, &aStatus[j]); } } break; case 5: /* use MPI_Testall */ flag = 0; while (!flag) { MPI_Testall (NUM_SEND_TYPES * 2, aReq, &flag, aStatus); } break; case 6: /* use MPI_Testany */ for (j = 0; j < NUM_SEND_TYPES * 2; j++) { flag = 0; while (!flag) { MPI_Testany (NUM_SEND_TYPES * 2, aReq, &index, &flag, aStatus); } } break; case 7: /* use MPI_Testsome */ total = 0; while (total < NUM_SEND_TYPES * 2) { outcount = 0; while (!outcount) { MPI_Testsome (NUM_SEND_TYPES * 2, aReq, &outcount, indices, aStatus); } total += outcount; } break; default: assert (0); break; } } else if (rank == 1) {
int main (int argc, char *argv[]) { int numproc, rank, len,i; char hostname[MPI_MAX_PROCESSOR_NAME]; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numproc); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Get_processor_name(hostname, &len); FP_PREC *yc, *dyc, *derr, *fullerr; FP_PREC *xc, dx, intg, davg_err, dstd_dev, intg_err; FP_PREC globalSum = 0.0; // MPI vailables MPI_Request *requestList,request; MPI_Status *status; //"real" grid indices int imin, imax; imin = 1 + (rank * (NGRID/numproc)); if(rank == numproc - 1) imax = NGRID; else imax = (rank+1) * (NGRID/numproc); int range = imax - imin + 1; xc = (FP_PREC*) malloc((range + 2) * sizeof(FP_PREC)); yc = (FP_PREC*) malloc((range + 2) * sizeof(FP_PREC)); dyc = (FP_PREC*) malloc((range + 2) * sizeof(FP_PREC)); dx = (XF - XI)/(double)NGRID; for (i = 1; i <= range ; i++) { //xc[i] = imin + (XF - XI) * (FP_PREC)(i - 1)/(FP_PREC)(NGRID - 1); xc[i] = XI + dx * (imin + i - 2); } xc[0] = xc[1] - dx; xc[range + 1] = xc[range] + dx; for( i = 1; i <= range; i++ ) { yc[i] = fn(xc[i]); } yc[0] = fn(xc[0]); yc[range + 1] = fn(xc[range + 1]); for (i = 1; i <= range; i++) { dyc[i] = (yc[i + 1] - yc[i - 1])/(2.0 * dx); } intg = 0.0; for (i = 1; i <= range; i++) { intg += 0.5 * (xc[i + 1] - xc[i]) * (yc[i + 1] + yc[i]); } MPI_Reduce(&intg, &globalSum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); //compute the error, average error of the derivatives derr = (FP_PREC*)malloc((range + 2) * sizeof(FP_PREC)); //compute the errors for(i = 1; i <= range; i++) { derr[i] = fabs((dyc[i] - dfn(xc[i]))/dfn(xc[i])); } derr[0] = derr[range + 1] = 0.0; if(rank == 0) { fullerr = (FP_PREC *)malloc(sizeof(FP_PREC)*NGRID); requestList =(MPI_Request*)malloc((numproc-1)*sizeof(MPI_Request)); for(i = 0;i<range;i++) { fullerr[i] = derr[i+1]; } for(i = 1; i<numproc; i++) { int rmin, rmax, *indx; rmin = 1 + (i * (NGRID/numproc)); if(i == numproc - 1) rmax = NGRID; else rmax = (i+1) * (NGRID/numproc); MPI_Irecv(fullerr+rmin-1, rmax-rmin+1, MPI_DOUBLE, i, 1, MPI_COMM_WORLD, &(requestList[i-1])); } double sum = 0.0; for(i=0; i<NGRID; i++) { sum+=fullerr[i]; } davg_err = sum/(FP_PREC)NGRID; dstd_dev = 0.0; for(i = 0; i< NGRID; i++) { dstd_dev += pow(derr[i] - davg_err, 2); } dstd_dev = sqrt(dstd_dev/(FP_PREC)NGRID); intg_err = fabs((ifn(XI, XF) - globalSum)/ifn(XI, XF)); printf("%0.4e: %0.4e: %0.4e\n", davg_err, dstd_dev, intg_err); } else { MPI_Isend(derr+1, imax-imin+1, MPI_DOUBLE, 0, rank, MPI_COMM_WORLD, &request); fflush(stdout); } MPI_Finalize(); }
int main (int argc, char **argv) { int nprocs = -1; int rank = -1; char processor_name[128]; int namelen = 128; int buf0[buf_size]; int buf1[buf_size]; MPI_Status status; MPI_Comm comm; int drank, dnprocs; /* init */ MPI_Init (&argc, &argv); MPI_Comm_size (MPI_COMM_WORLD, &nprocs); MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Get_processor_name (processor_name, &namelen); printf ("(%d) is alive on %s\n", rank, processor_name); fflush (stdout); MPI_Barrier (MPI_COMM_WORLD); if (nprocs < 3) { printf ("not enough tasks\n"); } else { MPI_Comm_split (MPI_COMM_WORLD, rank % 2, nprocs - rank, &comm); if (comm != MPI_COMM_NULL) { MPI_Comm_size (comm, &dnprocs); MPI_Comm_rank (comm, &drank); if (dnprocs > 1) { if (drank == 0) { memset (buf0, 0, buf_size); MPI_Recv (buf1, buf_size, MPI_INT, 1, 0, comm, &status); MPI_Send (buf0, buf_size, MPI_INT, 1, 0, comm); } else if (drank == 1) { memset (buf1, 1, buf_size); MPI_Recv (buf0, buf_size, MPI_INT, 0, 0, comm, &status); MPI_Send (buf1, buf_size, MPI_INT, 0, 0, comm); } } else { printf ("(%d) Derived communicator too small (size = %d)\n", rank, dnprocs); } MPI_Comm_free (&comm); } else { printf ("(%d) Got MPI_COMM_NULL\n", rank); } } MPI_Barrier (MPI_COMM_WORLD); MPI_Finalize (); printf ("(%d) Finished normally\n", rank); }
int main ( int argc, char *argv[] ) { int *messList = NULL; int testIdx, doTestLoop; int i; executableName = "com"; MPI_Init ( &argc, &argv ); MPI_Get_processor_name ( hostName, &i ); /* Set global wsize and rank values */ MPI_Comm_size ( MPI_COMM_WORLD, &wsize ); MPI_Comm_rank ( MPI_COMM_WORLD, &rank ); if ( !initAllTestTypeParams ( &testParams ) ) { MPI_Finalize ( ); exit ( 1 ); } argStruct.testList = "Bidirectional, BidirAsync"; if ( !processArgs ( argc, argv ) ) { if ( rank == 0 ) printUse ( ); MPI_Finalize ( ); exit ( 1 ); } /* If using a source directory of process rank target files, * get the next appropriate file. */ if ( targetDirectory != NULL && getNextTargetFile ( ) == 0 ) { prestaAbort ( "Failed to open target file in target directory %s\n", targetDirectory ); } doTestLoop = 1; while ( doTestLoop ) { if ( !setupTestListParams ( ) || !initAllTestTypeParams ( &testParams ) ) { if ( rank == 0 ) printUse ( ); MPI_Finalize ( ); exit ( 1 ); } #ifdef PRINT_ENV if ( rank == 0 ) printEnv(); #endif printReportHeader ( ); for ( testIdx = 0; testIdx < TYPETOT; testIdx++ ) { if ( argStruct.testList == NULL || ( argStruct.testList != NULL && strstr ( argStruct.testList, testParams[testIdx].name ) != NULL ) ) { prestaRankDebug ( 0, "running test index %d\n", testIdx ); runTest ( &testParams[testIdx] ); } } if ( presta_check_data == 1 ) { MPI_Reduce ( &presta_data_err_total, &presta_global_data_err_total, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD ); } if ( targetDirectory == NULL || getNextTargetFile ( ) == 0 ) { doTestLoop = 0; } } printSeparator ( ); freeBuffers ( &testParams ); free ( messList ); MPI_Finalize ( ); exit ( 0 ); }
int main(int argc, char *argv[]) { int sendbuf[COUNT], recvbuf[COUNT], i; int err = 0, rank, nprocs, errs = 0; MPI_Comm intercomm; int listenfd, connfd, port, namelen; struct sockaddr_in cliaddr, servaddr; struct hostent *h; char hostname[MPI_MAX_PROCESSOR_NAME]; socklen_t len, clilen; MTest_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (nprocs != 2) { printf("Run this program with 2 processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } if (rank == 1) { /* server */ listenfd = socket(AF_INET, SOCK_STREAM, 0); if (listenfd < 0) { printf("server cannot open socket\n"); MPI_Abort(MPI_COMM_WORLD, 1); } memset(&servaddr, 0, sizeof(servaddr)); servaddr.sin_family = AF_INET; servaddr.sin_addr.s_addr = htonl(INADDR_ANY); servaddr.sin_port = 0; err = bind(listenfd, (struct sockaddr *) &servaddr, sizeof(servaddr)); if (err < 0) { errs++; printf("bind failed\n"); MPI_Abort(MPI_COMM_WORLD, 1); } len = sizeof(servaddr); err = getsockname(listenfd, (struct sockaddr *) &servaddr, &len); if (err < 0) { errs++; printf("getsockname failed\n"); MPI_Abort(MPI_COMM_WORLD, 1); } port = ntohs(servaddr.sin_port); MPI_Get_processor_name(hostname, &namelen); err = listen(listenfd, 5); if (err < 0) { errs++; printf("listen failed\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Send(hostname, namelen + 1, MPI_CHAR, 0, 0, MPI_COMM_WORLD); MPI_Send(&port, 1, MPI_INT, 0, 1, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); clilen = sizeof(cliaddr); connfd = accept(listenfd, (struct sockaddr *) &cliaddr, &clilen); if (connfd < 0) { printf("accept failed\n"); MPI_Abort(MPI_COMM_WORLD, 1); } } else { /* client */ MPI_Recv(hostname, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Recv(&port, 1, MPI_INT, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Barrier(MPI_COMM_WORLD); h = gethostbyname(hostname); if (h == NULL) { printf("gethostbyname failed\n"); MPI_Abort(MPI_COMM_WORLD, 1); } servaddr.sin_family = h->h_addrtype; memcpy((char *) &servaddr.sin_addr.s_addr, h->h_addr_list[0], h->h_length); servaddr.sin_port = htons(port); /* create socket */ connfd = socket(AF_INET, SOCK_STREAM, 0); if (connfd < 0) { printf("client cannot open socket\n"); MPI_Abort(MPI_COMM_WORLD, 1); } /* connect to server */ err = connect(connfd, (struct sockaddr *) &servaddr, sizeof(servaddr)); if (err < 0) { errs++; printf("client cannot connect\n"); MPI_Abort(MPI_COMM_WORLD, 1); } } MPI_Barrier(MPI_COMM_WORLD); /* To improve reporting of problems about operations, we * change the error handler to errors return */ MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); err = MPI_Comm_join(connfd, &intercomm); if (err) { errs++; printf("Error in MPI_Comm_join %d\n", err); } /* To improve reporting of problems about operations, we * change the error handler to errors return */ MPI_Comm_set_errhandler(intercomm, MPI_ERRORS_RETURN); for (i = 0; i < COUNT; i++) { recvbuf[i] = -1; sendbuf[i] = i + COUNT * rank; } err = MPI_Sendrecv(sendbuf, COUNT, MPI_INT, 0, 0, recvbuf, COUNT, MPI_INT, 0, 0, intercomm, MPI_STATUS_IGNORE); if (err != MPI_SUCCESS) { errs++; printf("Error in MPI_Sendrecv on new communicator\n"); } for (i = 0; i < COUNT; i++) { if (recvbuf[i] != ((rank + 1) % 2) * COUNT + i) errs++; } MPI_Barrier(MPI_COMM_WORLD); err = MPI_Comm_disconnect(&intercomm); if (err != MPI_SUCCESS) { errs++; printf("Error in MPI_Comm_disconnect\n"); } MTest_Finalize(errs); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int ret; char *buf; char processor_name[MPI_MAX_PROCESSOR_NAME]; int namelen; double start_time; double used_time; double avg_time; double barrier_time; double us_rate; int max_len, lenbuf; int j; int me, nproc; FILE *fparam ; /* * begining ... */ setbuf(stdout, NULL) ; /* * max_len ... */ /* if (argc != 2) { printf("Use: bcast <max_len> \n") ; exit(1) ; } max_len =atoi(argv[1]) ; */ /* #if defined(__LINUX__) fparam = fopen("bcast.in","rt") ; #endif #if defined(__SUNOS__) fparam = fopen("bcast.in","rt") ; #endif #if defined(__SP2__) fparam = fopen("/u/fperez/XMP/MiMPI/test/mp/mpi/performance/bcast/bcast.in","rt") ; #endif if (fparam == NULL) { printf("ERROR: can not open bcast.in, sorry.\n") ; exit(1) ; } ret = fscanf(fparam,"max_len=%i",&max_len) ; fclose(fparam) ; if (ret != 1) { printf("ERROR: can not read a valid 'max_len' value from bcast.in, sorry.\n") ; exit(1) ; } */ max_len = 1024 * 1024; if ( (max_len <= 0) || (max_len >= 8*1024*1024) ) { printf("ERROR: (max_len <= 0) || (max_len >= 8*1024*1024)\n") ; exit(1) ; } /* * MPI init ... */ ret = MPI_Init(&argc, &argv); if (ret < 0) { printf("Can't init\n") ; exit(1) ; } MPI_Comm_rank(MPI_COMM_WORLD,&me) ; MPI_Get_processor_name(processor_name,&namelen) ; MPI_Comm_size(MPI_COMM_WORLD, &nproc) ; #if (0) printf("Process %d; total %d is alive on %s\n",me,nproc,processor_name) ; #endif buf = (char *) malloc((unsigned) max_len) ; if (buf == NULL) { perror("Error en malloc") ; exit(1) ; } memset(buf,'x',max_len) ; printf("barrier\n") ; MPI_Barrier(MPI_COMM_WORLD) ; /* ... Barrier ... */ start_time = MPI_Wtime() ; for(j = 0; j < 10; j++) { MPI_Barrier(MPI_COMM_WORLD) ; } barrier_time = (MPI_Wtime() - start_time) ; barrier_time = barrier_time / 2000.0; if (me == 0) printf(">>>>>>>>> BARRERA1 =%e\n", barrier_time); /*barrier_time = 0;*/ /* ... test ... */ lenbuf = 1; while (lenbuf <= max_len) { MPI_Barrier(MPI_COMM_WORLD) ; avg_time = 0.0; if (me != 0) { for(j = 0; j < PRUEBAS; j++) { ret = MPI_Bcast(buf,lenbuf,MPI_CHAR,0, MPI_COMM_WORLD) ; if (ret != MPI_SUCCESS) printf("ERROR EN BCAST \n"); } } else { start_time = MPI_Wtime() ; for(j = 0; j < PRUEBAS; j++) { ret = MPI_Bcast(buf,lenbuf,MPI_CHAR,0, MPI_COMM_WORLD) ; if (ret != MPI_SUCCESS) printf("ERROR EN BCAST \n"); } used_time = (MPI_Wtime() - start_time) ; avg_time = used_time / (float) PRUEBAS; if (avg_time > 0) /* rate is megabytes per second */ us_rate = (double)((nproc * lenbuf)/ (avg_time*(double)1000000)) ; else us_rate = 0.0; printf("len_bytes=%e avg_time_sec=%e rate_Mbytes_sec=%e\n", (double)lenbuf, (double)avg_time, (double)us_rate) ; } lenbuf *= 2; } if (me == 0) { char c; read(0, &c, 1); } #if (0) if (me == 0) printf("\nclock resolution in seconds: %10.8f\n", MPI_Wtick()) ; #endif MPI_Finalize() ; free(buf) ; exit(0) ; }
std::string processor_name() const { char name[MPI_MAX_PROCESSOR_NAME]; int len; MPI_Get_processor_name(name, &len); return std::string(name); }
// argc = cpu count, argv = file.cpp int main(int argc, char *argv[]) { // create win object, this is used for locks MPI_Win win; // needed for MPI int namelen = 0; int myid, numprocs = 0; // processor name char processor_name[MPI_MAX_PROCESSOR_NAME]; //initialize MPI execution environment MPI_Init(&argc, &argv); //each process get total # of processes //the total # of processes specified in mpirun �np n MPI_Comm_size(MPI_COMM_WORLD, &numprocs); //each process gets its own id MPI_Comm_rank(MPI_COMM_WORLD, &myid); // needed for times double program_start = 0; double program_end = 0; double process_start = 0; double process_end = 0; // take time if (myid == 0) // get start program time program_start = MPI_Wtime(); // Gets the name of the processor MPI_Get_processor_name(processor_name, &namelen); // number of processes int n = 0; // display info fprintf(stderr, "process %d on %s\n", myid, processor_name); fflush(stderr); // create win object for locks MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); // declare array to hold char from words plus \0 char* arr; // list to keep track of length of each word short* list; // size of entire array int arr_size = 0; // size of the list int list_size = 0; // new list of words that are not palindromes char* new_words; // size of new array of words eahc process will // have inorder to send back to root after finding // all none plaindrome words int new_size = 0; // this will be the total size of non-palidrome words // which will be recieved from each process int total_size = 0; // temp vector to hold arrays in file std::vector<std::string>* words; // root does if (myid == 0) { // stream to open file std::fstream in; // vector to dynamically grow as we add strings to it // this makes it so we don't need to open file twice since // we would normally open file and count number of words // then reopen it to get the actually words to put in an array // we just declared based off the size we got the first time words = new std::vector<std::string>(); // open file as instream in.open("Palindromes.txt", std::ios::in); // if error opening file if (in.fail()) { // display message and close std::cout << "Error Opening File" << std::endl; MPI_Abort(MPI_COMM_WORLD, 1); } // no error while opening file else { // temp string to hold each word std::string temp; // grab each word from each line while (getline(in, temp)) { // put word into vector words->push_back(temp); // loop each string (word) and get it's length for (int i = 0; i < temp.size(); i++) //increment size arr_size++; // increment one last time since we will be adding a // \0 for each word arr_size++; } // done, close file in.close(); } // set size depending on word size list_size = words->size(); // we added one since later on in the program // we use the next index to mark where the loop stops // without one at end, there is no way to mark the end // and last word never gets processed list_size++; } // take time if (myid == 0) // get start program time process_start = MPI_Wtime(); // barrier MPI_Barrier(MPI_COMM_WORLD); // broadcast the size of char array and list to other processes // they will be used to allocate the needed space per process MPI_Bcast(&arr_size, 1, MPI_INT, 0, MPI_COMM_WORLD); // barrier MPI_Barrier(MPI_COMM_WORLD); // broadcats list size MPI_Bcast(&list_size, 1, MPI_SHORT, 0, MPI_COMM_WORLD); // barrier MPI_Barrier(MPI_COMM_WORLD); // allocate list, list should be number of \0 // since there is one per word, it should be the number of words list = new short[list_size]; // barrier MPI_Barrier(MPI_COMM_WORLD); // allocate array arr = new char[arr_size]; // barrier MPI_Barrier(MPI_COMM_WORLD); // root does this if (myid == 0) { // put the values into array // using a counter int counter = 0; // loop entier array, while looping each word // and put them sequentially into array // with null terminator ending each word // we do list_size-1 since list_size is increased by 1 // to fix an earlier problem where we need to mark // last element in list to be able to end it // without it, it crashes, not sure why for (int i = 0; i < list_size - 1; i++) { //mark start of word arr[counter] = '\0'; // put null terminator index into list list[i] = counter; // incremenet counter counter++; // loop to get count of the next word for (int j = 0; j < words->at(i).size(); j++) { // get word from vector at i (string is returned) // get char at j from string arr[counter++] = words->at(i).at(j); } } // make last element to stop loops later in program list[list_size - 1] = counter; // free up memory, this object is no longer used delete words; } // broadcast array of char (basically all the words // in a char array where each word ends in \0) // also broadcast list of word indexes MPI_Barrier(MPI_COMM_WORLD); // send list of indexes to all processes MPI_Bcast(list, list_size, MPI_SHORT, 0, MPI_COMM_WORLD); // send array ofwords to processes MPI_Bcast(arr, arr_size, MPI_CHAR, 0, MPI_COMM_WORLD); // run function for each process to create a new list of non-palindromes // this is using cyclic partiioning new_words = markParalindromes(myid, arr_size, list_size, arr, list, numprocs, new_size); // use lock MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win); // create out stream object std::fstream out; // open file out.open("Non-Palindromes.txt", std::ios::out | std::ios::app); // if there is an error creating/opening if (out.fail()) { // display message and close std::cout << "Error Opening File" << std::endl; MPI_Abort(MPI_COMM_WORLD, 1); } else { // loop non-palindrome words for (int i = 0; i < new_size; i++) { // if char is null terminator // replace it with newline if (new_words[i] == '\0') { // if I write one newline, it does not work // but two does out << std::endl; out << std::endl; } // if not null terminator else // write char of current word out << new_words[i]; } // needed for end of file out << std::endl; out << std::endl; // close file out.close(); } // unlock MPI_Win_unlock(1, win); // barrier MPI_Barrier(MPI_COMM_WORLD); if (myid == 0) // get start program time process_end = MPI_Wtime(); // clean up and display results if (myid == 0) { // clean up if(arr != NULL) delete[] arr; if (list != NULL) delete[] list; if (new_words != NULL) delete[] new_words; } // barrier MPI_Barrier(MPI_COMM_WORLD); if (myid == 0) // get start program time program_end = MPI_Wtime(); if(myid == 0) { // get total time std::cout << "Program Time: " << (program_end - program_start) << "s" << std::endl; // get processe stime std::cout << "Process Time: " << (process_end - process_start) << "s" << std::endl; } // needed to clean up MPI_Win_free(&win); MPI_Finalize(); }
int main (int argc, char **argv) { int nprocs = -1; int rank = -1; int i, j; int *granks; char processor_name[128]; int namelen = 128; int buf[buf_size]; MPI_Status status; MPI_Comm temp; MPI_Comm intercomm = MPI_COMM_NULL; MPI_Comm dcomms[DCOMM_CALL_COUNT]; MPI_Group world_group, dgroup; int intersize, dnprocs[DCOMM_CALL_COUNT], drank[DCOMM_CALL_COUNT]; int dims[TWOD], periods[TWOD], remain_dims[TWOD]; int graph_index[] = { 2, 3, 4, 6 }; int graph_edges[] = { 1, 3, 0, 3, 0, 2 }; /* init */ MPI_Init (&argc, &argv); MPI_Comm_size (MPI_COMM_WORLD, &nprocs); MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Get_processor_name (processor_name, &namelen); printf ("(%d) is alive on %s\n", rank, processor_name); fflush (stdout); MPI_Barrier (MPI_COMM_WORLD); /* probably want number to be higher... */ if (nprocs < 4) { printf ("not enough tasks\n"); } else { if (DCOMM_CALL_COUNT > 0) { #ifdef RUN_COMM_DUP /* create all of the derived communicators... */ /* simplest is created by MPI_Comm_dup... */ MPI_Comm_dup (MPI_COMM_WORLD, &dcomms[0]); #else dcomms[0] = MPI_COMM_NULL; #endif } if (DCOMM_CALL_COUNT > 1) { #ifdef RUN_COMM_CREATE /* use subset of MPI_COMM_WORLD group for MPI_Comm_create... */ MPI_Comm_group (MPI_COMM_WORLD, &world_group); granks = (int *) malloc (sizeof(int) * (nprocs/2)); for (i = 0; i < nprocs/2; i++) granks [i] = 2 * i; MPI_Group_incl (world_group, nprocs/2, granks, &dgroup); MPI_Comm_create (MPI_COMM_WORLD, dgroup, &dcomms[1]); MPI_Group_free (&world_group); MPI_Group_free (&dgroup); free (granks); #else dcomms[1] = MPI_COMM_NULL; #endif } if (DCOMM_CALL_COUNT > 2) { #ifdef RUN_COMM_SPLIT /* split into thirds with inverted ranks... */ MPI_Comm_split (MPI_COMM_WORLD, rank % 3, nprocs - rank, &dcomms[2]); #else dcomms[2] = MPI_COMM_NULL; #endif } #ifdef RUN_INTERCOMM_CREATE if ((DCOMM_CALL_COUNT < 2) || (dcomms[2] == MPI_COMM_NULL)) { MPI_Comm_split (MPI_COMM_WORLD, rank % 3, nprocs - rank, &temp); } else { temp = dcomms[2]; } if (rank % 3) { MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD, (((nprocs % 3) == 2) && ((rank % 3) == 2)) ? nprocs - 1 : nprocs - (rank % 3) - (nprocs % 3), INTERCOMM_CREATE_TAG, &intercomm); } if ((DCOMM_CALL_COUNT < 2) || (dcomms[2] == MPI_COMM_NULL)) { MPI_Comm_free (&temp); } #endif if (DCOMM_CALL_COUNT > 3) { #ifdef RUN_CART_CREATE /* create a 2 X nprocs/2 torus topology, allow reordering */ dims[0] = 2; dims[1] = nprocs/2; periods[0] = periods[1] = 1; MPI_Cart_create (MPI_COMM_WORLD, TWOD, dims, periods, 1, &dcomms[3]); #else dcomms[3] = MPI_COMM_NULL; #endif } if (DCOMM_CALL_COUNT > 4) { #ifdef RUN_GRAPH_CREATE /* create the graph on p.268 MPI: The Complete Reference... */ MPI_Graph_create (MPI_COMM_WORLD, GRAPH_SZ, graph_index, graph_edges, 1, &dcomms[4]); #else dcomms[4] = MPI_COMM_NULL; #endif } if (DCOMM_CALL_COUNT > 5) { #ifdef RUN_CART_SUB #ifndef RUN_CART_CREATE /* need to make cartesian communicator temporarily... */ /* create a 2 X nprocs/2 torus topology, allow reordering */ dims[0] = 2; dims[1] = nprocs/2; periods[0] = periods[1] = 1; MPI_Cart_create (MPI_COMM_WORLD, TWOD, dims, periods, 1, &dcomms[3]); #endif if (dcomms[3] != MPI_COMM_NULL) { /* create 2 1 X nprocs/2 topologies... */ remain_dims[0] = 0; remain_dims[1] = 1; MPI_Cart_sub (dcomms[3], remain_dims, &dcomms[5]); #ifndef RUN_CART_CREATE /* free up temporarily created cartesian communicator... */ MPI_Comm_free (&dcomms[3]); #endif } else { dcomms[5] = MPI_COMM_NULL; } #else dcomms[5] = MPI_COMM_NULL; #endif } if (DCOMM_CALL_COUNT > 6) { #ifdef RUN_INTERCOMM_MERGE #ifndef RUN_INTERCOMM_CREATE #ifndef RUN_COMM_SPLIT /* need to make split communicator temporarily... */ /* split into thirds with inverted ranks... */ MPI_Comm_split (MPI_COMM_WORLD, rank % 3, nprocs - rank, &dcomms[2]); #endif #endif /* create an intercommunicator and merge it... */ if (rank % 3) { #ifndef RUN_INTERCOMM_CREATE MPI_Intercomm_create (dcomms[2], 0, MPI_COMM_WORLD, (((nprocs % 3) == 2) && ((rank % 3) == 2)) ? nprocs - 1 : nprocs - (rank % 3) - (nprocs % 3), INTERCOMM_CREATE_TAG, &intercomm); #endif MPI_Intercomm_merge (intercomm, ((rank % 3) == 1), &dcomms[6]); #ifndef RUN_INTERCOMM_CREATE /* we are done with intercomm... */ MPI_Comm_free (&intercomm); #endif } else { dcomms[6] = MPI_COMM_NULL; } #ifndef RUN_INTERCOMM_CREATE #ifndef RUN_COMM_SPLIT if (dcomms[2] != MPI_COMM_NULL) /* free up temporarily created split communicator... */ MPI_Comm_free (&dcomms[2]); #endif #endif #else dcomms[6] = MPI_COMM_NULL; #endif } /* get all of the sizes and ranks... */ for (i = 0; i < DCOMM_CALL_COUNT; i++) { if (dcomms[i] != MPI_COMM_NULL) { MPI_Comm_size (dcomms[i], &dnprocs[i]); MPI_Comm_rank (dcomms[i], &drank[i]); } else { dnprocs[i] = 0; drank[i] = -1; } } #ifdef RUN_INTERCOMM_CREATE /* get the intercomm remote size... */ if (rank % 3) { MPI_Comm_remote_size (intercomm, &intersize); } #endif /* do some point to point on all of the dcomms... */ for (i = 0; i < DCOMM_CALL_COUNT; i++) { if (dnprocs[i] > 1) { if (drank[i] == 0) { for (j = 1; j < dnprocs[i]; j++) { MPI_Recv (buf, buf_size, MPI_INT, j, 0, dcomms[i], &status); } } else { memset (buf, 1, buf_size*sizeof(int)); MPI_Send (buf, buf_size, MPI_INT, 0, 0, dcomms[i]); } } } #ifdef RUN_INTERCOMM_CREATE /* do some point to point on the intercomm... */ if ((rank % 3) == 1) { for (j = 0; j < intersize; j++) { MPI_Recv (buf, buf_size, MPI_INT, j, 0, intercomm, &status); } } else if ((rank % 3) == 2) { for (j = 0; j < intersize; j++) { memset (buf, 1, buf_size*sizeof(int)); MPI_Send (buf, buf_size, MPI_INT, j, 0, intercomm); } } #endif /* do a bcast on all of the dcomms... */ for (i = 0; i < DCOMM_CALL_COUNT; i++) { /* IBM's implementation gets error with comm over MPI_COMM_NULL... */ if (dnprocs[i] > 0) MPI_Bcast (buf, buf_size, MPI_INT, 0, dcomms[i]); } /* use any source receives... */ for (i = 0; i < DCOMM_CALL_COUNT; i++) { if (dnprocs[i] > 1) { if (drank[i] == 0) { for (j = 1; j < dnprocs[i]; j++) { MPI_Recv (buf, buf_size, MPI_INT, MPI_ANY_SOURCE, 0, dcomms[i], &status); } } else { memset (buf, 1, buf_size*sizeof(int)); MPI_Send (buf, buf_size, MPI_INT, 0, 0, dcomms[i]); } } } #ifdef RUN_INTERCOMM_CREATE /* do any source receives on the intercomm... */ if ((rank % 3) == 1) { for (j = 0; j < intersize; j++) { MPI_Recv (buf, buf_size, MPI_INT, MPI_ANY_SOURCE, 0, intercomm, &status); } } else if ((rank % 3) == 2) { for (j = 0; j < intersize; j++) { memset (buf, 1, buf_size*sizeof(int)); MPI_Send (buf, buf_size, MPI_INT, j, 0, intercomm); } } #endif /* do a barrier on all of the dcomms... */ for (i = 0; i < DCOMM_CALL_COUNT; i++) { /* IBM's implementation gets with communication over MPI_COMM_NULL... */ if (dnprocs[i] > 0) MPI_Barrier (dcomms[i]); } /* free all of the derived communicators... */ for (i = 0; i < DCOMM_CALL_COUNT; i++) { /* freeing MPI_COMM_NULL is explicitly defined as erroneous... */ if (dnprocs[i] > 0) MPI_Comm_free (&dcomms[i]); } #ifdef RUN_INTERCOMM_CREATE if (rank % 3) /* we are done with intercomm... */ MPI_Comm_free (&intercomm); #endif } MPI_Barrier (MPI_COMM_WORLD); MPI_Finalize (); printf ("(%d) Finished normally\n", rank); }
int main (int argc, char * argv[]) { static int const maxlen = 200, rowsize = 521, colsize = 428, linelen = 12; char str[maxlen], lines[5][maxlen]; FILE *fp, *fout; int icheck, nlines = 0; unsigned int h1, h2, h3; char *sptr; int **R, **G, **B, **Rnew, **Gnew, **Bnew; int *Rrow, *Grow, *Brow, *sendbuf, *recvbuf; int row = 0, col = 0, nblurs, lineno=0, k; struct timeval tim; int bufsize, coords[2]; int myrowsize, mycolsize, myrowstart, myrowend, mycolstart, mycolend; int len, tag = 99, dest, prowsize, pcolsize, lastcolsize, nsend, localrow, localcol, coffset; char name[MPI_MAX_PROCESSOR_NAME]; int nprocs, rank, nprows, npcols, myrow, mycol, left, right, up, down; MPI_Comm new_comm; MPI_Status status; /* Initialize MPI */ MPI_Init (&argc, &argv); /* Initialising more variables. */ int dimensions[2], periods[2]; /* Determining process count and rank. */ MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* Creating the 2D topology. */ dimensions[0] = dimensions[1] = 0; periods[0] = periods[1] = 0; MPI_Dims_create(nprocs, 2, dimensions); // Determining processes in each dimension. nprows = dimensions[0]; npcols = dimensions[1]; MPI_Cart_create(MPI_COMM_WORLD, 2, dimensions, periods, 1,&new_comm); // Creating new_comm. MPI_Cart_coords(new_comm, rank, 2, coords); // Getting coordinates of this process in the grid. myrow = coords[0]; mycol = coords[1]; /* Determining ranks of neighbours. */ MPI_Cart_shift(new_comm, 0, 1, &up, &down); MPI_Cart_shift(new_comm, 1, 1, &left, &right); /* Do data decomposition */ prowsize = ((rowsize-1)/nprows) + 1; myrowstart = myrow*prowsize; myrowend = (myrow+1)*prowsize - 1; if (myrowend >= rowsize) myrowend = rowsize - 1; pcolsize = ((colsize-1)/npcols) + 1; mycolstart = mycol*pcolsize; mycolend = (mycol+1)*pcolsize - 1; if (mycolend >= colsize) mycolend = colsize - 1; myrowsize = myrowend - myrowstart + 1; mycolsize = mycolend - mycolstart + 1; printf("rank = %d: (myrow,mycol) = (%d,%d), (left,right,up,down) = (%d,%d,%d,%d), row(start,end) = (%d,%d), col(start,end) = (%d,%d)\n",rank,myrow,mycol,left,right,up,down,myrowstart,myrowend,mycolstart,mycolend); /* Allocate arrays */ R = (int **)malloc((sizeof(int*)*(myrowsize+2))); G = (int **)malloc((sizeof(int*)*(myrowsize+2))); B = (int **)malloc((sizeof(int*)*(myrowsize+2))); Rnew = (int **)malloc((sizeof(int*)*(myrowsize+2))); Gnew = (int **)malloc((sizeof(int*)*(myrowsize+2))); Bnew = (int **)malloc((sizeof(int*)*(myrowsize+2))); for (k=0;k<myrowsize+2;k++){ R[k] = (int *)malloc(sizeof(int)*(mycolsize+2)); G[k] = (int *)malloc(sizeof(int)*(mycolsize+2)); B[k] = (int *)malloc(sizeof(int)*(mycolsize+2)); Rnew[k] = (int *)malloc(sizeof(int)*(mycolsize+2)); Gnew[k] = (int *)malloc(sizeof(int)*(mycolsize+2)); Bnew[k] = (int *)malloc(sizeof(int)*(mycolsize+2)); } bufsize = myrowsize > mycolsize ? myrowsize : mycolsize; sendbuf = (int *)malloc(sizeof(int)*(bufsize)); recvbuf = (int *)malloc(sizeof(int)*(bufsize)); /* Read input on process 0 and distribute to processes */ if (rank==0){ localrow = 1; lastcolsize = colsize - pcolsize*(npcols-1); Rrow = (int *)malloc(sizeof(int)*(colsize)); Grow = (int *)malloc(sizeof(int)*(colsize)); Brow = (int *)malloc(sizeof(int)*(colsize)); fp = fopen("David.ps", "r"); while(! feof(fp)) { icheck = fscanf(fp, "\n%[^\n]", str); if (nlines < 5) {strcpy((char *)lines[nlines++],(char *)str);} else if(icheck>0){ for (sptr=&str[0];*sptr != '\0';sptr+=6){ sscanf(sptr,"%2x",&h1); sscanf(sptr+2,"%2x",&h2); sscanf(sptr+4,"%2x",&h3); if (row < rowsize) { Rrow[col] = h1; Grow[col] = h2; Brow[col] = h3; col++; } if (col==colsize){ coords[0] = row/prowsize; for(k=0;k<npcols;k++){ nsend = (k<npcols-1 ? pcolsize : lastcolsize); coffset = k*pcolsize; coords[1] = k; MPI_Cart_rank(new_comm, coords, &dest); if(dest!=0){ MPI_Send(Rrow+coffset,nsend,MPI_INT,dest,tag,new_comm); MPI_Send(Grow+coffset,nsend,MPI_INT,dest,tag,new_comm); MPI_Send(Brow+coffset,nsend,MPI_INT,dest,tag,new_comm); } else{ for(localcol=1;localcol<=mycolsize;localcol++){ R[localrow][localcol] = Rrow[coffset+localcol-1]; G[localrow][localcol] = Grow[coffset+localcol-1]; B[localrow][localcol] = Brow[coffset+localcol-1]; } localrow++; } } col = 0; row++; } } } } fclose(fp); } else{ for(localrow=1;localrow<=myrowsize;localrow++){ MPI_Recv(&R[localrow][1],mycolsize,MPI_INT,0,tag,new_comm,&status); MPI_Recv(&G[localrow][1],mycolsize,MPI_INT,0,tag,new_comm,&status); MPI_Recv(&B[localrow][1],mycolsize,MPI_INT,0,tag,new_comm,&status); } } nblurs = 10; MPI_Barrier(new_comm); double t1; if(rank==0){ gettimeofday(&tim, NULL); t1=tim.tv_sec+(tim.tv_usec/1000000.0); } for(k=0;k<nblurs;k++){ /* Shifting the data up */ MPI_Sendrecv(&R[1][1], mycolsize, MPI_INT, up, tag, &R[myrowsize+1][1], mycolsize, MPI_INT, down, tag, new_comm, &status); MPI_Sendrecv(&G[1][1], mycolsize, MPI_INT, up, tag, &G[myrowsize+1][1], mycolsize, MPI_INT, down, tag, new_comm, &status); MPI_Sendrecv(&B[1][1], mycolsize, MPI_INT, up, tag, &B[myrowsize+1][1], mycolsize, MPI_INT, down, tag, new_comm, &status); /* Shifting the data down */ MPI_Sendrecv(&R[myrowsize][1], mycolsize, MPI_INT, down, tag, &R[0][1], mycolsize, MPI_INT, up, tag, new_comm, &status); MPI_Sendrecv(&G[myrowsize][1], mycolsize, MPI_INT, down, tag, &G[0][1], mycolsize, MPI_INT, up, tag, new_comm, &status); MPI_Sendrecv(&B[myrowsize][1], mycolsize, MPI_INT, down, tag, &B[0][1], mycolsize, MPI_INT, up, tag, new_comm, &status); /* Shifting the data left */ int i; for (i = 0; i < myrowsize; i++) sendbuf[i] = R[i+1][1]; MPI_Sendrecv(sendbuf, myrowsize, MPI_INT, left, tag, recvbuf, myrowsize, MPI_INT, right, tag, new_comm, &status); for (i = 0; i < myrowsize; i++) R[i+1][mycolsize+1] = recvbuf[i]; for (i = 0; i < myrowsize; i++) sendbuf[i] = G[i+1][1]; MPI_Sendrecv(sendbuf, myrowsize, MPI_INT, left, tag, recvbuf, myrowsize, MPI_INT, right, tag, new_comm, &status); for (i = 0; i < myrowsize; i++) G[i+1][mycolsize+1] = recvbuf[i]; for (i = 0; i < myrowsize; i++) sendbuf[i] = B[i+1][1]; MPI_Sendrecv(sendbuf, myrowsize, MPI_INT, left, tag, recvbuf, myrowsize, MPI_INT, right, tag, new_comm, &status); for (i = 0; i < myrowsize; i++) B[i+1][mycolsize+1] = recvbuf[i]; /* Shifting the data right */ for (i = 0; i < myrowsize; i++) sendbuf[i] = R[i+1][mycolsize]; MPI_Sendrecv(sendbuf, myrowsize, MPI_INT, right, tag, recvbuf, myrowsize, MPI_INT, left, tag, new_comm, &status); for (i = 0; i < myrowsize; i++) R[i+1][0] = recvbuf[i]; for (i = 0; i < myrowsize; i++) sendbuf[i] = G[i+1][mycolsize]; MPI_Sendrecv(sendbuf, myrowsize, MPI_INT, right, tag, recvbuf, myrowsize, MPI_INT, left, tag, new_comm, &status); for (i = 0; i < myrowsize; i++) G[i+1][0] = recvbuf[i]; for (i = 0; i < myrowsize; i++) sendbuf[i] = B[i+1][mycolsize]; MPI_Sendrecv(sendbuf, myrowsize, MPI_INT, right, tag, recvbuf, myrowsize, MPI_INT, left, tag, new_comm, &status); for (i = 0; i < myrowsize; i++) B[i+1][0] = recvbuf[i]; for(localrow=1;localrow<=myrowsize;localrow++){ row = prowsize*myrow + localrow - 1; for (localcol=1;localcol<=mycolsize;localcol++){ col = pcolsize*mycol + localcol - 1; if (row != 0 && row != (rowsize-1) && col != 0 && col != (colsize-1)){ Rnew[localrow][localcol] = (R[localrow+1][localcol]+R[localrow-1][localcol]+R[localrow][localcol+1]+R[localrow][localcol-1])/4; Gnew[localrow][localcol] = (G[localrow+1][localcol]+G[localrow-1][localcol]+G[localrow][localcol+1]+G[localrow][localcol-1])/4; Bnew[localrow][localcol] = (B[localrow+1][localcol]+B[localrow-1][localcol]+B[localrow][localcol+1]+B[localrow][localcol-1])/4; } else if (row == 0 && col != 0 && col != (colsize-1)){ Rnew[localrow][localcol] = (R[localrow+1][localcol]+R[localrow][localcol+1]+R[localrow][localcol-1])/3; Gnew[localrow][localcol] = (G[localrow+1][localcol]+G[localrow][localcol+1]+G[localrow][localcol-1])/3; Bnew[localrow][localcol] = (B[localrow+1][localcol]+B[localrow][localcol+1]+B[localrow][localcol-1])/3; } else if (row == (rowsize-1) && col != 0 && col != (colsize-1)){ Rnew[localrow][localcol] = (R[localrow-1][localcol]+R[localrow][localcol+1]+R[localrow][localcol-1])/3; Gnew[localrow][localcol] = (G[localrow-1][localcol]+G[localrow][localcol+1]+G[localrow][localcol-1])/3; Bnew[localrow][localcol] = (B[localrow-1][localcol]+B[localrow][localcol+1]+B[localrow][localcol-1])/3; } else if (col == 0 && row != 0 && row != (rowsize-1)){ Rnew[localrow][localcol] = (R[localrow+1][localcol]+R[localrow-1][localcol]+R[localrow][localcol+1])/3; Gnew[localrow][localcol] = (G[localrow+1][localcol]+G[localrow-1][localcol]+G[localrow][localcol+1])/3; Bnew[localrow][localcol] = (B[localrow+1][localcol]+B[localrow-1][localcol]+B[localrow][localcol+1])/3; } else if (col == (colsize-1) && row != 0 && row != (rowsize-1)){ Rnew[localrow][localcol] = (R[localrow+1][localcol]+R[localrow-1][localcol]+R[localrow][localcol-1])/3; Gnew[localrow][localcol] = (G[localrow+1][localcol]+G[localrow-1][localcol]+G[localrow][localcol-1])/3; Bnew[localrow][localcol] = (B[localrow+1][localcol]+B[localrow-1][localcol]+B[localrow][localcol-1])/3; } else if (row==0 &&col==0){ Rnew[localrow][localcol] = (R[localrow][localcol+1]+R[localrow+1][localcol])/2; Gnew[localrow][localcol] = (G[localrow][localcol+1]+G[localrow+1][localcol])/2; Bnew[localrow][localcol] = (B[localrow][localcol+1]+B[localrow+1][localcol])/2; } else if (row==0 &&col==(colsize-1)){ Rnew[localrow][localcol] = (R[localrow][localcol-1]+R[localrow+1][localcol])/2; Gnew[localrow][localcol] = (G[localrow][localcol-1]+G[localrow+1][localcol])/2; Bnew[localrow][localcol] = (B[localrow][localcol-1]+B[localrow+1][localcol])/2; } else if (row==(rowsize-1) &&col==0){ Rnew[localrow][localcol] = (R[localrow][localcol+1]+R[localrow-1][localcol])/2; Gnew[localrow][localcol] = (G[localrow][localcol+1]+G[localrow-1][localcol])/2; Bnew[localrow][localcol] = (B[localrow][localcol+1]+B[localrow-1][localcol])/2; } else if (row==(rowsize-1) &&col==(colsize-1)){ Rnew[localrow][localcol] = (R[localrow][localcol-1]+R[localrow-1][localcol])/2; Gnew[localrow][localcol] = (G[localrow][localcol-1]+G[localrow-1][localcol])/2; Bnew[localrow][localcol] = (B[localrow][localcol-1]+B[localrow-1][localcol])/2; } } } for(localrow=1;localrow<=myrowsize;localrow++){ for (localcol=1;localcol<=mycolsize;localcol++){ R[localrow][localcol] = Rnew[localrow][localcol]; G[localrow][localcol] = Gnew[localrow][localcol]; B[localrow][localcol] = Bnew[localrow][localcol]; } } } /* Output timing result */ MPI_Barrier(new_comm); if(rank==0){ gettimeofday(&tim, NULL); double t2=tim.tv_sec+(tim.tv_usec/1000000.0); MPI_Get_processor_name(name,&len); printf("Rank %d on %s: %.6lf seconds elapsed\n", rank,name,t2-t1); } /* Gather data from processes and output on process 0 */ if(rank==0){ localrow = 1; fout= fopen("DavidBlurMPI.ps", "w"); for (k=0;k<nlines;k++) fprintf(fout,"\n%s", lines[k]); fprintf(fout,"\n"); for(row=0;row<rowsize;row++){ coords[0] = row/prowsize; for (k=0;k<npcols;k++){ coords[1] = k; MPI_Cart_rank(new_comm, coords, &dest); nsend = (k<npcols-1 ? pcolsize : lastcolsize); coffset = k*pcolsize; if(dest!=0){ MPI_Recv(Rrow+coffset,nsend,MPI_INT,dest,tag,new_comm,&status); MPI_Recv(Grow+coffset,nsend,MPI_INT,dest,tag,new_comm,&status); MPI_Recv(Brow+coffset,nsend,MPI_INT,dest,tag,new_comm,&status); } else{ for(localcol=1;localcol<=mycolsize;localcol++){ Rrow[coffset+localcol-1] = R[localrow][localcol]; Grow[coffset+localcol-1] = G[localrow][localcol]; Brow[coffset+localcol-1] = B[localrow][localcol]; } localrow++; } } for(col=0;col<colsize;col++){ fprintf(fout,"%02x%02x%02x",Rrow[col],Grow[col],Brow[col]); lineno++; if (lineno==linelen){ fprintf(fout,"\n"); lineno = 0; } } } fclose(fout); } else{ for(localrow=1;localrow<=myrowsize;localrow++){ MPI_Send(&R[localrow][1],mycolsize,MPI_INT,0,tag,new_comm); MPI_Send(&G[localrow][1],mycolsize,MPI_INT,0,tag,new_comm); MPI_Send(&B[localrow][1],mycolsize,MPI_INT,0,tag,new_comm); } } /* Finalize and exit */ MPI_Finalize(); return 0; }
int main( int argc , char * argv[ ] ) { char ** pcmd = argv ; int icmd = 1 ; char buffer[ MAXCHARINLINE ] ; int lenBuff ; char cache[ MAXCHARINLINE ] ; int itmp , itmp2 ; double dtmp ; char ctmp , tmp_char ; char stmp[ 300 ] , stmp2[ 300 ] , tmpString[ 300 ]; double dtmpArray[ 150 ] ; int info , signal , blank_signal ; register FILE * debug ; char * flag ; char * command , * set_env , * get_NID , * env_value ; int exe = NO ; int command_signal ; while( icmd < argc ) { pcmd ++ ; flag = * pcmd ; //printf("\nNo.%d argument , Currently @ flag = %s ...\n\n" , icmd , flag ); if( ( * flag == '-' ) && ( strlen( flag ) == 2 ) ) { switch ( *( flag + 1 ) ) { case 'e' : //strcpy( command , *( ++ pcmd ) ) ; info = asprintf( &command , "%s" , *( ++ pcmd ) ) ; if( info == -1 ) { fprintf( stderr , "\nOops ... Did not successfully 10-4 your command line ...\n\n" ) ; exit( 3 ) ; } else { exe = YES ; } icmd = icmd + 2 ; break ; case 'h' : printf("\nUsage: %s [ -e 'command line to be executed' ] \n" , * argv ); /* printf("\n Note : 1) when -m is specified as \"None\" or \"none\" or \"NONE\", no reference will be checked and orbital numbers provided in CI file will be directly used.\n\n"); printf("\n Note : 2) Default group selection : [ -p \"Donor-Block\"] [ -q \"Acceptor-Block\"] [ -r \"Bridge-Block\"] [ -x \"Irrelevant-Block\"] [ -z \"Irrelevant-Block-2\"]\n\n\n") ; printf("\n Note : 3) Default fragment names selection : [ -D \"Donor\"] [ -A \"Acceptor\"]\n\n\n") ; printf("\n Note : 4) If one of the partition is not necessary, \"None/none/None\" has to be specified ...\n\n\n") ; */ icmd = icmd + 1 ; exit( 1 ) ; default : printf("\n\nInvalid option ' %s ' ... Please refer to the usage by typing ' %s -h '\n\n" , flag , * argv ); icmd = argc ; exit( 1 ); } } else { printf("\n\nInvalid option ' %s ' ... Please refer to the usage by typing ' %s -h '\n\n" , flag , * argv ); exit(1); } } /* MPI Setup */ int myid , numprocs , namelen ; char processor_name[ MPI_MAX_PROCESSOR_NAME ] ; MPI_Init( &argc , &argv ) ; MPI_Comm_rank( MPI_COMM_WORLD , &myid ) ; MPI_Comm_size( MPI_COMM_WORLD , &numprocs ) ; MPI_Get_processor_name( processor_name , &namelen ) ; info = asprintf( &set_env , "ZM_MPI_RANK=%d" , myid ) ; info = putenv( set_env ) ; env_value = getenv( "ZM_MPI_RANK" ) ; printf( "\nRank is [ %s ] \n" , env_value ) ; info = asprintf( &get_NID , "ZM_NID=%s" , processor_name ); info = putenv( get_NID ) ; env_value = getenv( "ZM_NID" ) ; printf( "Node ID is [ %s ] \n" , env_value ) ; command_signal = system( command ) ; //fprintf( stderr , "Hello, World! Process %d of %d on %s\n" , myid , numprocs , processor_name ) ; MPI_Finalize() ; /* MPI Done */ return( 0 ) ; }
int main (int argc, char **argv) { int nprocs = -1; int rank = -1; char processor_name[128]; int namelen = 128; int buf0[buf_size]; int buf1[buf_size]; MPI_Status status; MPI_Comm temp, intercomm; int trank, tnprocs; int drank, dnprocs, rleader, rnprocs; /* init */ MPI_Init (&argc, &argv); MPI_Comm_size (MPI_COMM_WORLD, &nprocs); MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Get_processor_name (processor_name, &namelen); printf ("(%d) is alive on %s\n", rank, processor_name); fflush (stdout); MPI_Barrier (MPI_COMM_WORLD); if (nprocs < 3) { printf ("not enough tasks\n"); } else { /* need to make split communicator temporarily... */ MPI_Comm_split (MPI_COMM_WORLD, rank % 2, nprocs - rank, &temp); if (temp != MPI_COMM_NULL) { MPI_Comm_size (temp, &tnprocs); MPI_Comm_rank (temp, &trank); /* create an intercommunicator temporarily so can merge it... */ rleader = ((rank + nprocs) % 2) ? nprocs - 2 : nprocs - 1; if ((trank == 0) && (rank % 2)) { MPI_Recv (buf0, buf_size, MPI_INT, rleader, 0, MPI_COMM_WORLD, &status); } MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD, rleader, INTERCOMM_CREATE_TAG, &intercomm); if ((trank == 0) && !(rank % 2)) { memset (buf0, 0, buf_size); MPI_Send (buf0, buf_size, MPI_INT, 1, 0, temp); } else { printf ("(%d) Split communicator too small\n", rank); } MPI_Comm_free (&temp); if (intercomm != MPI_COMM_NULL) { MPI_Comm_size (intercomm, &dnprocs); MPI_Comm_rank (intercomm, &drank); MPI_Comm_remote_size (intercomm, &rnprocs); if (rnprocs > drank) { if (rank % 2) { memset (buf1, 1, buf_size); MPI_Recv (buf0, buf_size, MPI_INT, drank, 0, intercomm, &status); MPI_Send (buf1, buf_size, MPI_INT, drank, 0, intercomm); } else { memset (buf0, 0, buf_size); MPI_Send (buf0, buf_size, MPI_INT, drank, 0, intercomm); MPI_Recv (buf1, buf_size, MPI_INT, drank, 0, intercomm, &status); } } else { printf ("(%d) Intercomm too small (lrank = %d; remote size = %d)\n", rank, drank, rnprocs); } MPI_Comm_free (&intercomm); } else { printf ("(%d) Got MPI_COMM_NULL\n", rank); } } else { printf ("(%d) MPI_Comm_split got MPI_COMM_NULL\n", rank); } } MPI_Barrier (MPI_COMM_WORLD); MPI_Finalize (); printf ("(%d) Finished normally\n", rank); }
int main(int argc, char *argv[]) { int namelen, rank, size, i, error; char processor_name[MPI_MAX_PROCESSOR_NAME]; int buffer[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; MPI_Request request; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Get_processor_name(processor_name, &namelen); fprintf(stderr, "Process %d of %d on %s\n", rank, size, processor_name); if (rank == 0) { for (i=0;i<10;i++) { buffer[i] = 42+i; } for (i=1;i<size;i++) { fprintf(stderr, "Send to %d\n", i); error = MPI_Send(buffer, 10, MPI_INTEGER, i, 0, MPI_COMM_WORLD); if (error != MPI_SUCCESS) { fprintf(stderr, "Send failed! %d\n", error); } } } else { fprintf(stderr, "IRecv from to %d\n", 0); error = MPI_Irecv(buffer, 10, MPI_INTEGER, 0, 0, MPI_COMM_WORLD, &request); if (error != MPI_SUCCESS) { fprintf(stderr, "Irecv failed! %d\n", error); } else { error = MPI_Wait(&request, &status); if (error != MPI_SUCCESS) { fprintf(stderr, "Wait failed! %d\n", error); } else { fprintf(stderr, "Received from 0: {"); for (i=0;i<10;i++) { fprintf(stderr, "%d ", buffer[i]); } fprintf(stderr, "}\n"); } } } fprintf(stderr, "Done!\n"); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int rank, comm_size; int prev; char name[MPI_MAX_PROCESSOR_NAME]; int nlen; INT_T size = 100000; double time = 0; struct opt args; MPI_Init(&argc,&argv); // get rank and size from communicator MPI_Comm_size(MPI_COMM_WORLD,&comm_size); MPI_Comm_rank(MPI_COMM_WORLD,&rank); //printf("%d %d\n", rank, argc); parse_args(argc, argv, &args); if(args.size > 0 && args.parallel) { size = args.size/comm_size; } else if(args.size > 0) { size = args.size; } if(comm_size > 1 && (! args.parallel) && rank == 0) { printf("WARNING: sequential execution with more than 1 thread gives wrong data\n"); } MPI_Get_processor_name(name,&nlen); INT_T* A = gen_data(rank, size); arrayscan(A, size, MPI_COMM_WORLD, commscan_primitive); #ifdef DEBUG { INT_T a = check_asc(A, size); if(a == -1) { printf("ascending check ok\n"); } else { printf("ascending error on position %ld\n", a); } #ifdef DEBUGDEBUG for(INT_T i = 0; i < size; i++) { printf("%ld\n", A[i]); } #endif } #endif MPI_Barrier(MPI_COMM_WORLD); time = - MPI_Wtime(); /* if (rank==0) { printf("Rank %d initializing, total %d\n",rank,comm_size); } else { MPI_Recv(&prev,1,MPI_INT,rank-1,HELLO,MPI_COMM_WORLD,MPI_STATUS_IGNORE); printf("Rank %d on %s received from %d, passing on\n",rank,name,prev); } if (rank+1<size) MPI_Send(&rank,1,MPI_INT,rank+1,HELLO,MPI_COMM_WORLD); */ if(args.parallel) arrayscan(A, size, MPI_COMM_WORLD, my_commscan); else localscan(A, size); time += MPI_Wtime(); INT_T sum; double rtime; if(rank == 0 && comm_size > 1 && args.parallel) { MPI_Recv(&sum, 1, INT_MPI_T, comm_size-1, SCAN, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if(rank == comm_size-1 && comm_size > 1 && args.parallel) { MPI_Send(A + size - 1, 1, INT_MPI_T, 0, SCAN, MPI_COMM_WORLD); } else { sum = A[size - 1]; } MPI_Reduce(&time, &rtime, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if(rank == 0 /*&& sum == size*comm_size*(size*comm_size+1)/2*/) { printf("inclusive-scan np=%d s=%ld%s time=%lf%s\n", comm_size, comm_size*size, args.parallel ? "" : " local", rtime, sum == size*comm_size*(size*comm_size+1)/2 ? "" : " FAIL"); } /*else if (rank == 0) { printf("inclusive-scan np=%d s=%ld %s time=%lf FAIL\n", comm_size, comm_size*size, rtime); }*/ #ifndef EVAL printf("Rank %3d min: %20ld sum: %20ld time: %2lf\n", rank, A[0], A[size-1], time); if(rank == 0) { printf("Sum should be %ld\n", size*comm_size*(size*comm_size+1)/2); } #endif #ifdef DEBUGDEBUG for(INT_T i = 0; i < size; i++) { printf("%ld\n", A[i]); } #endif MPI_Finalize(); return 0; }
int main (int argc, char **argv) { int nprocs = -1; int rank = -1; char processor_name[128]; int namelen = 128; int buf0[buf_size]; int buf1[buf_size]; int done; MPI_Status statuses[2]; MPI_Request reqs[2]; /* init */ MPI_Init (&argc, &argv); MPI_Comm_size (MPI_COMM_WORLD, &nprocs); MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Get_processor_name (processor_name, &namelen); printf ("(%d) is alive on %s\n", rank, processor_name); fflush (stdout); MPI_Barrier (MPI_COMM_WORLD); /* this code is very similar to no-error-waitall-any_src.c */ /* but deadlocks since task 2's send and recv are inverted... */ if (nprocs < 3) { printf ("not enough tasks\n"); } else if (rank == 0) { MPI_Irecv (buf0, buf_size, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[0]); MPI_Irecv (buf1, buf_size, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[1]); MPI_Waitall (2, reqs, statuses); MPI_Send (buf1, buf_size, MPI_INT, 1, 1, MPI_COMM_WORLD); } else if (rank == 1) { memset (buf0, 0, buf_size); MPI_Isend (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &reqs[0]); MPI_Isend (buf0, buf_size, MPI_INT, 2, 1, MPI_COMM_WORLD, &reqs[1]); MPI_Waitany (2, reqs, &done, statuses); MPI_Send (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD); MPI_Recv (buf1, buf_size, MPI_INT, 0, 1, MPI_COMM_WORLD, statuses); MPI_Wait (&reqs[(done + 1) % 2], statuses); } else if (rank == 2) { sleep (60); MPI_Recv (buf1, buf_size, MPI_INT, 1, 1, MPI_COMM_WORLD, statuses); } MPI_Barrier (MPI_COMM_WORLD); MPI_Finalize (); printf ("(%d) Finished normally\n", rank); }
int main (int argc, char **argv) { int nprocs = -1; int rank = -1; char processor_name[128]; int namelen = 128; int buf0[buf_size]; int buf1[buf_size]; MPI_Request aReq[2]; MPI_Status aStatus[2]; MPI_Status status; /* init */ MPI_Init (&argc, &argv); MPI_Comm_size (MPI_COMM_WORLD, &nprocs); MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Get_processor_name (processor_name, &namelen); printf ("(%d) is alive on %s\n", rank, processor_name); fflush (stdout); MPI_Barrier (MPI_COMM_WORLD); if (nprocs < 2) { printf ("not enough tasks\n"); } else { if (rank == 0) { memset (buf0, 0, buf_size*sizeof(int)); MPI_Send_init (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &aReq[0]); MPI_Recv_init (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &aReq[1]); MPI_Start (&aReq[0]); MPI_Start (&aReq[1]); MPI_Waitall (2, aReq, aStatus); memset (buf0, 1, buf_size*sizeof(int)); MPI_Startall (2, aReq); MPI_Waitall (2, aReq, aStatus); } else if (rank == 1) { memset (buf1, 1, buf_size*sizeof(int)); MPI_Recv_init (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &aReq[0]); MPI_Send_init (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &aReq[1]); MPI_Start (&aReq[0]); MPI_Start (&aReq[1]); MPI_Waitall (2, aReq, aStatus); memset (buf1, 0, buf_size*sizeof(int)); MPI_Startall (2, aReq); MPI_Waitall (2, aReq, aStatus); } } MPI_Barrier (MPI_COMM_WORLD); MPI_Request_free (&aReq[0]); MPI_Request_free (&aReq[1]); MPI_Finalize (); printf ("(%d) Finished normally\n", rank); }
int main (int argc, char **argv) { int nprocs = -1; int rank = -1; MPI_Comm comm = MPI_COMM_WORLD; char processor_name[128]; int namelen = 128; int buf[BUF_SIZE * 2]; int i, j, k, index, outcount, flag; int indices[2]; MPI_Request aReq[2]; MPI_Status aStatus[2]; /* init */ MPI_Init (&argc, &argv); MPI_Comm_size (comm, &nprocs); MPI_Comm_rank (comm, &rank); MPI_Get_processor_name (processor_name, &namelen); printf ("(%d) is alive on %s\n", rank, processor_name); fflush (stdout); if (rank == 0) { /* set up persistent sends... */ MPI_Send_init (&buf[0], BUF_SIZE, MPI_INT, 1, 0, comm, &aReq[0]); MPI_Send_init (&buf[BUF_SIZE], BUF_SIZE, MPI_INT, 1, 1, comm, &aReq[1]); /* initialize the send buffers */ for (i = 0; i < BUF_SIZE; i++) { buf[i] = i; buf[BUF_SIZE + i] = BUF_SIZE - 1 - i; } } for (k = 0; k < 4; k++) { if (rank == 1) { /* zero out the receive buffers */ bzero (buf, sizeof(int) * BUF_SIZE * 2); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { /* start the persistent sends... */ if (k % 2) { MPI_Startall (2, &aReq[0]); } else { for (j = 0; j < 2; j++) { MPI_Start (&aReq[j]); } } /* complete the sends */ if (k < 2) { /* use MPI_Testany */ for (j = 0; j < 2; j++) { flag = 0; while (!flag) { MPI_Testany (2, aReq, &index, &flag, aStatus); } } } else { /* use MPI_Testsome */ j = 0; while (j < 2) { outcount = 0; while (!outcount) { MPI_Testsome (2, aReq, &outcount, indices, aStatus); } j += outcount; } } } else if (rank == 1) { /* set up receives for all of the sends */ for (j = 0; j < 2; j++) { MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE, MPI_INT, 0, j, comm, &aReq[j]); } /* complete all of the receives... */ MPI_Waitall (2, aReq, aStatus); } } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { /* free the persistent requests */ for (i = 0 ; i < 2; i++) { MPI_Request_free (&aReq[i]); } } MPI_Finalize (); printf ("(%d) Finished normally\n", rank); }
int main (int argc, char **argv) { int nprocs = -1; int rank = -1; char processor_name[128]; int namelen = 128; int buf0[buf_size]; int buf1[buf_size]; MPI_Status status; MPI_Request req; /* init */ MPI_Init (&argc, &argv); MPI_Comm_size (MPI_COMM_WORLD, &nprocs); MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Get_processor_name (processor_name, &namelen); printf ("(%d) is alive on %s\n", rank, processor_name); fflush (stdout); MPI_Barrier (MPI_COMM_WORLD); if (nprocs < 3) { printf ("not enough tasks\n"); } else if (rank == 0) { // sleep (60); MPI_Irecv (buf0, buf_size, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req); MPI_Recv (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD, &status); MPI_Send (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD); MPI_Recv (buf1, buf_size, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); MPI_Wait (&req, &status); } else if (rank == 2) { memset (buf0, 0, buf_size); // sleep (30); MPI_Send (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD); MPI_Recv (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &status); MPI_Send (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD); } else if (rank == 1) { memset (buf1, 1, buf_size); MPI_Send (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD); } MPI_Barrier (MPI_COMM_WORLD); MPI_Finalize (); printf ("(%d) Finished normally\n", rank); }
/* * MAIN */ int main(int argc, char *argv[]) { int ret; char processor_name[MPI_MAX_PROCESSOR_NAME]; int namelen; double start_time; double used_time; double avg_time; double us_rate; int lenbuf; MPI_Status status; /* FILE *fparam ;*/ int i; int k; int j; struct thr t[N_THREADS+1]; double crear_threads[N_THREADS+1]; #if defined(HAVE_WINDOWS_H) HANDLE thid[N_THREADS+1]; DWORD thinfo ; #else pthread_attr_t attr ; pthread_t thid[N_THREADS+1]; #endif setbuf(stdout, NULL); #if (0) if (argc != 2) { printf("Use: client <max_len> \n"); exit(1); } max_len =atoi(argv[1]); #endif /* #if (1) #if defined(__LINUX__) fparam = fopen("ping.in","rt") ; #endif #if defined(__SUNOS__) fparam = fopen("ping.in","rt") ; #endif #if defined(__SP2__) fparam = fopen("/home/ssoo/gpmimd/FELIX/XMP/xmp/test/mpi/ping_r/ping.in","rt") ; #endif if (fparam == NULL) { printf("ERROR: can not open ping.in, sorry.\n"); exit(1); } ret = fscanf(fparam,"max_len=%i",&max_len) ; fclose(fparam) ; if (ret != 1) { printf("ERROR: can not read a valid 'max_len' value from ping.in, sorry.\n"); exit(1); } #endif */ max_len = 1024 * 1024 ; if ( (max_len <= 0) || (max_len >= 8*1024*1024) ) { printf("ERROR: max_len = %i\n",max_len); printf("ERROR: (max_len <= 0) || (max_len >= 4*1024*1024)\n"); exit(1) ; } ret = MPI_Init(&argc, &argv); if (ret < 0) { printf("Can't init\n"); exit(1); } MPI_Comm_rank(MPI_COMM_WORLD,&me); MPI_Get_processor_name(processor_name,&namelen); MPI_Comm_size(MPI_COMM_WORLD, &nproc); #if (0) printf("Process %d; total %d is alive on %s\n",me,nproc,processor_name); #endif MPI_Barrier(MPI_COMM_WORLD) ; #if (0) printf("PING LISTO \n"); if (me == 0) { ret=MPI_Send(buf,12,MPI_CHAR,1,1, MPI_COMM_WORLD); } else { ret=MPI_Recv(buf,12,MPI_CHAR,0,1, MPI_COMM_WORLD, &status); printf("count = %d\n", status.count); } #endif /* * tiempo en crear y esperar por N threads */ /* if (me == 0) { i = 1; while(i <= N_THREADS) { start_time = MPI_Wtime(); #if defined(HAVE_WINDOWS_H) for(j = 0; j < PRUEBAS_THREADS; j ++) { for (k = 0; k < i; k++) thid[k] = CreateThread (NULL, 0, (LPTHREAD_START_ROUTINE) nulo,NULL,0,&thinfo); WaitForMultipleObjects(k, thid, TRUE, INFINITE); } #else for(j = 0; j < PRUEBAS_THREADS; j ++) { for (k = 0; k < i; k++) pthread_create(&thid[k], &attr, (void *(*)(void *))nulo, NULL); for (k = 0; k < i; k++) pthread_join(thid[k], NULL); } #endif crear_threads[i]=((MPI_Wtime() - start_time)) / PRUEBAS_THREADS; printf("Tiempo para %d threads = %f\n", i, crear_threads[i]); i *= 2; } } */ /* * envios y recepciones... */ i = 1 ; while(i <= N_THREADS) { if (me == 0) printf("N_THREADS = %d \n\n", i); MPI_Barrier(MPI_COMM_WORLD) ; lenbuf = 1; while (lenbuf <= max_len) { buf = (char *) malloc((unsigned) lenbuf * N_THREADS ); if (buf == NULL) { perror("Error en malloc"); exit(1); } start_time = MPI_Wtime(); #if (0) transfiere(&t[i]); #endif #if defined(HAVE_WINDOWS_H) for (k = 0; k < i; k++) { t[k].lenbuf = lenbuf; t[k].tag = k; t[k].buf = buf + (k * lenbuf); thid[k] = CreateThread (NULL, 0, (LPTHREAD_START_ROUTINE) transfiere,&(t[k]),0,&thinfo); } WaitForMultipleObjects(k, thid, TRUE, INFINITE); #else for (k = 0; k < i; k++) { t[k].lenbuf = lenbuf; t[k].tag = k; t[k].buf = buf + (k * lenbuf); pthread_create(&thid[k], &attr, (void *(*)(void *))transfiere, &t[k]); } for (k = 0; k < i; k++) pthread_join(thid[k], NULL); #endif used_time = (MPI_Wtime() - start_time); avg_time = used_time / (float) PRUEBAS; if (avg_time > 0) /* rate is megabytes per second */ us_rate = (double)((nproc * lenbuf * (i))/ (avg_time*(double)1000000)); else us_rate = 0.0; if (me == 0) printf("len_bytes=%d avg_time_sec=%f rate_Mbytes_sec=%f\n", lenbuf, (double)avg_time/(2.0*i), (double)us_rate); lenbuf *= 2; free(buf); } i *= 2; } if (me == 0) { char c; read(0, &c, 1); } #if (0) if (me != 0) printf("\nclock resolution in seconds: %10.8f\n", MPI_Wtick()); #endif MPI_Finalize(); free(buf); exit(0); }
int main (int argc, char **argv) { struct arguments arguments; /* Parse our arguments; every option seen by parse_opt will be reflected in arguments. */ argp_parse (&argp, argc, argv, 0, 0, &arguments); int run_type; run_type = 0; //default is serial if (sscanf (arguments.args[0], "%i", &run_type)!=1) {} int iterations; iterations = 0; //default is serial if (sscanf (arguments.args[1], "%i", &iterations)!=1) {} int count_when; count_when = 1000; if (sscanf (arguments.args[2], "%i", &count_when)!=1) {} char print_list[200]; //used for input list if (sscanf (arguments.args[3], "%s", &print_list)!=1) {} // printf("Print list = %s\n", print_list); //Extract animation list from arguments char char_array[20][12] = { NULL }; //seperated input list int animation_list[20][2] = { NULL }; //integer input list start,range char *tok = strtok(print_list, ","); //counters int i,j,k,x,y,ii,jj; ii = 0; jj = 0; //Loop over tokens parsing our commas int tok_len = 0; while (tok != NULL) { //first loop parses out commas tok_len = strlen(tok); for (jj=0;jj<tok_len;jj++) { char_array[ii][jj] = tok[jj]; } // printf("Tok = %s\n", char_array[ii]); tok = strtok(NULL, ","); ii++; } //looking for a range input, convert to ints int stop; for (ii=0;ii<20;ii++) { //convert first number to int tok = strtok(char_array[ii], "-"); if (tok != NULL) { animation_list[ii][0] = atoi(tok); tok = strtok(NULL, ","); } //look for second number, add to range if (tok != NULL) { stop = atoi(tok); animation_list[ii][1] = stop - animation_list[ii][0]; } // if (rank == 0) // { // printf("Animation_list = %i, %i\n", // animation_list[ii][0], animation_list[ii][1]); // } } //should an animation be generated //prints a bunch of .pgm files, have to hand //make the gif... int animation; animation = arguments.animation; //verbose? int verbose; verbose = arguments.verbose; // printf("VERBOSE = %i",verbose); if (verbose>0 && verbose<=10) { verbose = 1; } // Initialize the MPI environment MPI_Init(NULL, NULL); // Get the number of processes int world_size; MPI_Comm_size(MPI_COMM_WORLD, &world_size); // Get the rank of the process int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Get the name of the processor char processor_name[MPI_MAX_PROCESSOR_NAME]; int name_len; MPI_Get_processor_name(processor_name, &name_len); //Print run information, exit on bad command line input if (rank == 0 && verbose == 1) { printf("Verbose=%i, RunType=%i, Iterations=%i, CountWhen=%i, Animation=%i\n", verbose,run_type,iterations,count_when, animation); } if (world_size>1 && run_type ==0) { printf("Runtype and processors count not consistant\n"); MPI_Finalize(); exit(0); } if (world_size==1 && run_type>0) { printf("Runtype and processors count not consistant\n"); MPI_Finalize(); exit(0); } if (count_when <= 0) { if (rank == 0) { printf("Invalid count interval, positive integers only\n"); } MPI_Finalize(); exit(0); } //serial if (world_size == 1 && run_type == 0) { ncols=1; nrows=1; } //Blocked else if (world_size>1 && run_type == 1) { ncols = 1; nrows = world_size; my_col = 0; my_row = rank; } //Checker else if (world_size>1 && run_type == 2) { ncols = (int)sqrt(world_size); nrows = (int)sqrt(world_size); my_row = rank/nrows; my_col = rank-my_row*nrows; if (ncols*nrows!=world_size) { if (rank == 0) { printf("Number of processors must be square, Exiting\n"); } MPI_Finalize(); exit(0); } } // if (verbose == 1) // { // printf("WR,row,col=%i,%i,%i\n",rank,my_row,my_col); // } //////////////////////READ IN INITIAL PGM//////////////////////////////// if(!readpgm("life.pgm")) { // printf("WR=%d,HERE2\n",rank); if( rank==0 ) { pprintf( "An error occured while reading the pgm file\n" ); } MPI_Finalize(); return 1; } // Count the life forms. Note that we count from [1,1] - [height+1,width+1]; // we need to ignore the ghost row! i = 0; for(y=1; y<local_height+1; y++ ) { for(x=1; x<local_width+1; x++ ) { if( field_a[ y * field_width + x ] ) { i++; } } } // pprintf( "%i local buggies\n", i ); int total; MPI_Allreduce( &i, &total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if( rank==0 && verbose == 1 ) { pprintf( "%i total buggies\n", total ); } // printf("WR=%d, Row=%d, Col=%d\n",rank,my_row,my_col); //Row and column size per processor int rsize, csize; rsize = local_width; csize = local_height; if (rank == 0 && verbose == 1) { printf("rsize,csize,NP = %d, %d, %d\n",rsize,csize,world_size); } //Create new derived datatype for writing to files MPI_Datatype submatrix; int array_of_gsizes[2]; int array_of_distribs[2]; int array_of_dargs[2]; int array_of_psize[2]; if (run_type == 1) { if (rank == 0) { printf("g0,g1 = %i,%i\n", local_height*ncols, local_width); printf("p0,p1 = %i,%i\n", nrows, ncols); } array_of_gsizes[0] = local_height*ncols; array_of_gsizes[1] = local_width; array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK; array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK; array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG; array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG; array_of_psize[0] = nrows; array_of_psize[1] = ncols; // int order = MPI_ORDER_C; //size,rank,ndims,array_gsizes,array_distribs,array_args,array_psizes //order,oldtype,*newtype MPI_Type_create_darray(world_size, rank, 2, array_of_gsizes, array_of_distribs, array_of_dargs, array_of_psize, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &submatrix); MPI_Type_commit(&submatrix); } else if (run_type == 2) { if (rank == 0) { printf("g0,g1 = %i,%i\n", local_height*ncols, local_width*nrows); printf("p0,p1 = %i,%i\n", nrows, ncols); } array_of_gsizes[0] = local_height*ncols; array_of_gsizes[1] = local_width*nrows; array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK; array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK; array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG; array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG; array_of_psize[0] = nrows; array_of_psize[1] = ncols; // int order = MPI_ORDER_C; //size,rank,ndims,array_gsizes,array_distribs,array_args,array_psizes //order,oldtype,*newtype MPI_Type_create_darray(world_size, rank, 2, array_of_gsizes, array_of_distribs, array_of_dargs, array_of_psize, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &submatrix); MPI_Type_commit(&submatrix); } MPI_Barrier(MPI_COMM_WORLD); //////////////////ALLOCATE ARRAYS, CREATE DATATYPES///////////////////// //Create new column derived datatype MPI_Datatype column; //count, blocklength, stride, oldtype, *newtype MPI_Type_hvector(csize, 1, sizeof(unsigned char), MPI_UNSIGNED_CHAR, &column); MPI_Type_commit(&column); //Create new row derived datatype MPI_Datatype row; //count, blocklength, stride, oldtype, *newtype MPI_Type_hvector(rsize, 1, sizeof(unsigned char), MPI_UNSIGNED_CHAR, &row); MPI_Type_commit(&row); //allocate arrays and corner storage unsigned char *section; unsigned char *neighbors; //to use unsigned char *top; unsigned char *bot; unsigned char *left; unsigned char *right; //to send unsigned char *ttop; unsigned char *tbot; unsigned char *tleft; unsigned char *tright; //MALLOC!! section = (unsigned char*)malloc(rsize*csize*sizeof(unsigned char)); neighbors = (unsigned char*)malloc(rsize*csize*sizeof(unsigned char)); top = (unsigned char*)malloc(rsize*sizeof(unsigned char)); bot = (unsigned char*)malloc(rsize*sizeof(unsigned char)); left = (unsigned char*)malloc(csize*sizeof(unsigned char)); right = (unsigned char*)malloc(csize*sizeof(unsigned char)); ttop = (unsigned char*)malloc(rsize*sizeof(unsigned char)); tbot = (unsigned char*)malloc(rsize*sizeof(unsigned char)); tleft = (unsigned char*)malloc(csize*sizeof(unsigned char)); tright = (unsigned char*)malloc(csize*sizeof(unsigned char)); //corners unsigned char topleft,topright,botleft,botright; //used in calculations unsigned char ttopleft,ttopright,tbotleft,tbotright; topleft = 255; topright = 255; botleft = 255; botright = 255; //used for animation, each process will put there own result in and then //each will send to process 1 which will add them up unsigned char* full_matrix; unsigned char* full_matrix_buffer; if (animation == 1) { int msize1 = rsize*ncols*csize*nrows; full_matrix = (unsigned char*)malloc(msize1*sizeof(unsigned char)); full_matrix_buffer = (unsigned char*)malloc(msize1*sizeof(unsigned char)); for (i=0; i<msize1; i++) { full_matrix[i] = 0; full_matrix_buffer[i] = 0; } } // printf("Rsize,Lsize,Fsize=%i %i %i,Csize,Lsize,Fsize=%i %i %i\n",rsize,local_width,field_width,csize,local_height,field_height); //Serial initialize vars int count = 0; if (world_size == 1 && run_type == 0) { for (i=0;i<csize;i++) { for (j=0;j<rsize;j++) { section[i*rsize + j] = 255; if (field_a[(i+1)*(2+rsize) + j + 1]) { section[i*rsize + j] = 0; count += 1; } else { section[i*rsize + j] = 255; } top[j] = 255; bot[j] = 255; ttop[j] = 255; tbot[j] = 255; } right[i] = 255; left[i] = 255; tright[i] = 255; tleft[i] = 255; } // printf("COUNT 4 = %d\n", count); } //Blocked/Checkered initializing variables else if (world_size > 1 && (run_type == 1 || run_type == 2)) { //initialize for (i=0;i<csize;i++) { for (j=0;j<rsize;j++) { section[i*rsize + j] = 255; if (field_a[(i+1)*(2+rsize) + j + 1]) { section[i*rsize + j] = 0; count += 1; } else { section[i*rsize + j] = 255; } top[j] = 255; bot[j] = 255; ttop[j] = 255; tbot[j] = 255; } right[i] = 255; left[i] = 255; tright[i] = 255; tleft[i] = 255; } // MPI_Allreduce( &count, &total, 1, MPI_UNSIGNED_CHAR, MPI_SUM, MPI_COMM_WORLD ); // if (rank == 0) // { // printf("COUNT 4 = %d\n", total); // } } //header/footer for mpio writes char header1[15]; header1[0] = 0x50; header1[1] = 0x35; header1[2] = 0x0a; header1[3] = 0x35; header1[4] = 0x31; header1[5] = 0x32; header1[6] = 0x20; header1[7] = 0x35; header1[8] = 0x31; header1[9] = 0x32; header1[10] = 0x0a; header1[11] = 0x32; header1[12] = 0x35; header1[13] = 0x35; header1[14] = 0x0a; char footer; footer = 0x0a; //make a frame or not? int create_frame = 0; //send to int send_to; int receive_from; int info[5]; info[2] = rank; info[3] = rsize; info[4] = csize; unsigned char info2[4]; info2[0] = topleft; info2[1] = topright; info2[2] = botleft; info2[3] = botright; int current_count; int location; //Gameplay for (k=0;k<iterations;k++) { //Count buggies if (k%count_when==0) { if (verbose == 1) { current_count = rsize*csize-count_buggies(rsize,csize,section); MPI_Allreduce( ¤t_count, &total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (rank == 0) { printf("Iteration=%5d, Count=%6d\n", k,total); } ////corner debug // printf("WR,tl,tr,bl,br = %d %d %d %d %d\n", rank, topleft, topright, botleft, botright); } } //Write to file serially for comparison //If animation is requested if (animation == 1 && run_type == 0) { //Put smaller matrix part into larger matrix for (i=0; i<csize; i++) { for (j=0; j<rsize; j++) { location = (my_row*csize*rsize*ncols + my_col*rsize + i*rsize*ncols + j); full_matrix_buffer[location] = section[i*rsize+j]; } // if (rank == 0) // { // printf("Location = %d\n", location); // } } //Gather matrix MPI_Reduce(full_matrix_buffer, full_matrix, rsize*ncols*csize*nrows, MPI_UNSIGNED_CHAR, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0 && run_type == 0) { write_matrix_to_pgm(k, rsize*ncols, csize*nrows, full_matrix); } } //mpio write pgm else if (animation == 1 && (run_type == 1 || run_type == 2)) { //default is no frame create_frame = 0; for (ii=0;ii<20;ii++) { for (jj=0;jj<animation_list[ii][1]+1;jj++) { // if (rank == 0) // { // printf("a,ii,j,k= %i,%i,%i,%i, Frame? = %i\n", // animation_list[ii][0],ii,jj,k,(animation_list[ii][0]+jj-k)==0); // } if ((animation_list[ii][0] + jj - k) == 0) { create_frame = 1; break; } } } if (create_frame == 1) { //dynamic filename with leading zeroes for easy conversion to gif char buffer[128]; snprintf(buffer, sizeof(char)*128, "Animation/frame%04d.pgm", k); /* open the file, and set the view */ MPI_File file; MPI_File_open(MPI_COMM_WORLD, buffer, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &file); MPI_File_set_view(file, 0, MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, "native", MPI_INFO_NULL); //write header MPI_File_write(file, &header1, 15, MPI_CHAR, MPI_STATUS_IGNORE); //write matrix MPI_File_set_view(file, 15, MPI_UNSIGNED_CHAR, submatrix, "native", MPI_INFO_NULL); MPI_File_write_all(file, section, rsize*csize, MPI_UNSIGNED_CHAR, MPI_STATUS_IGNORE); //write footer (trailing newline) MPI_File_set_view(file, 15+rsize*ncols*csize*nrows, MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, "native", MPI_INFO_NULL); MPI_File_write(file, &footer, 1, MPI_CHAR, MPI_STATUS_IGNORE); } } // BLOCKED COMMUNITATION // if (run_type == 1) { //change bot (send top) to account for middle area //alternate to avoid locking send_to = rank - 1; receive_from = rank + 1; //figure out what to send //top and bottom for (i=0;i<rsize;i++) { ttop[i] = section[i]; tbot[i] = section[rsize*(csize-1)+i]; } //left n right for (i=0;i<csize;i++) { tleft[i] = section[0 + rsize*i]; tright[i] = section[rsize-1 + rsize*i]; } //send top, receive bot if (rank%2==0) { if (send_to<world_size && send_to>=0) { MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from >= 0) { MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from >= 0) { MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0) { MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD); } } //change top to account for middle area //alternate to avoid locking send_to = rank + 1; receive_from = rank - 1; //send bot, receive top if (rank%2==0) { // printf("%d, %d, %d\n", rank, send_to, receive_from); if (send_to<world_size && send_to>=0) { MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from >= 0) { MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { // printf("%d, %d, %d\n", rank, send_to, receive_from); if (receive_from<world_size && receive_from >= 0) { //*data,count,type,from,tag,comm,mpi_status MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0) { //*data,count,type,to,tag,comm MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD); } } } // CHECKERED COMMUNITATION // else if (run_type == 2) { //figure out what to send //top and bottom for (i=0;i<rsize;i++) { ttop[i] = section[i]; tbot[i] = section[rsize*(csize-1)+i]; } //left n right for (i=0;i<csize;i++) { tleft[i] = section[0 + rsize*i]; tright[i] = section[rsize-1 + rsize*i]; } //corners ttopleft = tleft[0]; tbotleft = tleft[csize-1]; ttopright = tright[0]; tbotright = tright[csize-1]; //Send top, receive bot send_to = rank - nrows; receive_from = rank + nrows; if (rank%2==0) { if (send_to<world_size && send_to>=0) { MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0) { MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0) { MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0) { MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD); } } //Send bot, receive top send_to = rank + nrows; receive_from = rank - nrows; if (rank%2==0) { if (send_to<world_size && send_to>=0) { MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0) { MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0) { MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0) { MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD); } } //Send left, receive right send_to = rank - 1; receive_from = rank + 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row) { MPI_Send(tleft, 1, column, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row) { MPI_Recv(right, 1, column, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row) { MPI_Recv(right, 1, column, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row) { MPI_Send(tleft, 1, column, send_to, 0, MPI_COMM_WORLD); } } //Send right, receive left send_to = rank + 1; receive_from = rank - 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row) { MPI_Send(tright, 1, row, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row) { MPI_Recv(left, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row) { MPI_Recv(left, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row) { MPI_Send(tright, 1, row, send_to, 0, MPI_COMM_WORLD); } } //Send topright, receive botleft send_to = rank - ncols + 1; receive_from = rank + ncols - 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1) { MPI_Send(&ttopright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1) { MPI_Recv(&botleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1) { MPI_Recv(&botleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1) { MPI_Send(&ttopright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } } //Send topleft, receive botright send_to = rank - ncols - 1; receive_from = rank + ncols + 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1) { MPI_Send(&ttopleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1) { MPI_Recv(&botright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1) { MPI_Recv(&botright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1) { MPI_Send(&ttopleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } } //Send botleft, receive topright send_to = rank + ncols - 1; receive_from = rank - ncols + 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1) { MPI_Send(&tbotleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1) { MPI_Recv(&topright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1) { MPI_Recv(&topright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1) { MPI_Send(&tbotleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } } //Send botright, receive topleft send_to = rank + ncols + 1; receive_from = rank - ncols - 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1) { MPI_Send(&tbotright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1) { MPI_Recv(&topleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1) { MPI_Recv(&topleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1) { MPI_Send(&tbotright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } } info2[0] = topleft; info2[1] = topright; info2[2] = botleft; info2[3] = botright; } // if (rank == 1){ // print_matrix(rsize, 1, top); // print_matrix(rsize, csize, section); // print_matrix(rsize, 1, bot); // printf("\n"); // } // printf("wr=%d,iteration=%d,maxval=%d, 11\n", rank, k,(csize-1)*rsize-1+rsize); /////////// CELL UPDATES ///////////////// //count neighbor for (i=0;i<csize;i++) { for (j=0; j<rsize; j++) { info[0] = i; info[1] = j; neighbors[i*rsize+j] = count_neighbors(info, info2, section, top, bot, left, right); // printf("%i",neighbors[i*rsize+j]); } // printf("\n"); } //update cells current_count = 0; for (i=0;i<csize;i++) { for (j=0; j<rsize; j++) { //cell currently alive if (section[i*rsize+j] == 0) { //2 or 3 neighbors lives, else die if (neighbors[i*rsize+j] < 2 || neighbors[i*rsize+j] > 3) { section[i*rsize+j] = 255; } } else { //Exactly 3 neighbors spawns new life if (neighbors[i*rsize+j] == 3) { section[i*rsize+j] = 0; } } } } } MPI_Barrier(MPI_COMM_WORLD); sleep(0.5); //free malloc stuff if( field_a != NULL ) free( field_a ); if( field_b != NULL ) free( field_b ); free(section); free(neighbors); free(top); free(bot); free(left); free(right); MPI_Finalize(); exit (0); }