Exemplo n.º 1
1
int
main(int argc, char **argv)
{
  int status;
  char* env = NULL;

  env = getenv("GPAW_OFFLOAD");
  if (env) {
      errno = 0;
      gpaw_offload_enabled = strtol(env, NULL, 10);
      if (errno) {
        fprintf(stderr, 
                "Wrong value for for GPAW_OFFLOAD.\nShould be either 0 or 1, but was %s\n",
                env);
      }
  }
  fprintf(stderr, "GPAW info: GPAW_OFFLOAD=%d\n", gpaw_offload_enabled);
  
#ifdef CRAYPAT
  PAT_region_begin(1, "C-Initializations");
#endif

#ifndef GPAW_OMP
  MPI_Init(&argc, &argv);
#else
  int granted;
  MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &granted);
  if(granted != MPI_THREAD_MULTIPLE) exit(1);
#endif // GPAW_OMP

// Get initial timing
  double t0 = MPI_Wtime();

#ifdef GPAW_PERFORMANCE_REPORT
  gpaw_perf_init();
#endif

#ifdef GPAW_MPI_MAP
  int tag = 99;
  int myid, numprocs, i, procnamesize;
  char procname[MPI_MAX_PROCESSOR_NAME];
  MPI_Comm_size(MPI_COMM_WORLD, &numprocs );
  MPI_Comm_rank(MPI_COMM_WORLD, &myid );
  MPI_Get_processor_name(procname, &procnamesize);
  if (myid > 0) {
      MPI_Send(&procnamesize, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
      MPI_Send(procname, procnamesize, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
  }
  else {
      printf("MPI_COMM_SIZE is %d \n", numprocs);
      printf("%s \n", procname);
      
      for (i = 1; i < numprocs; ++i) {
          MPI_Recv(&procnamesize, 1, MPI_INT, i, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
          MPI_Recv(procname, procnamesize, MPI_CHAR, i, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
          printf("%s \n", procname);
      }
  }
#endif // GPAW_MPI_MAP

#ifdef GPAW_MPI_DEBUG
  // Default Errhandler is MPI_ERRORS_ARE_FATAL
  MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
#endif

  // Progname seems to be needed in some circumstances to resolve
  // correct default sys.path
  Py_SetProgramName(argv[0]);

  Py_Initialize();

#pragma offload target(mic) if(gpaw_offload_enabled)
    {
        init_openmp();
    }
  
  if (PyType_Ready(&MPIType) < 0)
    return -1;

  if (PyType_Ready(&LFCType) < 0)
    return -1;
  if (PyType_Ready(&LocalizedFunctionsType) < 0)
    return -1;
  if (PyType_Ready(&OperatorType) < 0)
    return -1;
  if (PyType_Ready(&SplineType) < 0)
    return -1;
  if (PyType_Ready(&TransformerType) < 0)
    return -1;
  if (PyType_Ready(&XCFunctionalType) < 0)
    return -1;
  if (PyType_Ready(&lxcXCFunctionalType) < 0)
    return -1;

  PyObject* m = Py_InitModule3("_gpaw", functions,
             "C-extension for GPAW\n\n...\n");
  if (m == NULL)
    return -1;

  Py_INCREF(&MPIType);
  PyModule_AddObject(m, "Communicator", (PyObject *)&MPIType);

  // Add initial time to _gpaw object
  PyModule_AddObject(m, "time0", PyFloat_FromDouble(t0));

  Py_INCREF(&LFCType);
  Py_INCREF(&LocalizedFunctionsType);
  Py_INCREF(&OperatorType);
  Py_INCREF(&SplineType);
  Py_INCREF(&TransformerType);
  Py_INCREF(&XCFunctionalType);
  Py_INCREF(&lxcXCFunctionalType);

#ifdef GPAW_WITH_HDF5
  init_gpaw_hdf5();
#endif
  import_array1(-1);
  MPI_Barrier(MPI_COMM_WORLD);
#ifdef CRAYPAT
  PAT_region_end(1);
  PAT_region_begin(2, "all other");
#endif
  status = Py_Main(argc, argv);
#ifdef CRAYPAT
  PAT_region_end(2);
#endif

#ifdef GPAW_PERFORMANCE_REPORT
  gpaw_perf_finalize();
#endif

  MPI_Finalize();
  return status;
}
Exemplo n.º 2
0
main(int argc, char* argv[]){
  time_t time1 = time(0), time2;

  //-------MPI initialzation-------------

  int numprocs, myid, namelen;
  char processor_name[MPI_MAX_PROCESSOR_NAME];

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
  MPI_Comm_rank(MPI_COMM_WORLD, &myid);

  MPI_Get_processor_name(processor_name, &namelen);
  fprintf(stderr, "Process %d running on %s\n", myid, processor_name);

  string numbers = "0123456789";  // !!!!! np <= 10
  string myid_str(numbers, myid, 1);

  MPI_Status status;

  // define a new MPI data type for particles
  MPI_Datatype particletype;
  MPI_Type_contiguous(18, MPI_DOUBLE, &particletype);  // !!! 14->18 changed
  MPI_Type_commit(&particletype);

  //-------- end MPI init----------------

  // wait for gdb
  waitforgdb(myid);

  // read input file (e.g. patric.cfg):

  if(argv[1] == 0){
    printf("No input file name !\n");
    MPI_Abort(MPI_COMM_WORLD, 0);
  }
  input_from_file(argv[1], myid);
  double eps_x = rms_emittance_x0;  // handy abbreviation
  double eps_y = rms_emittance_y0;  // same

  // Synchronous particle:

  SynParticle SP;
  SP.Z = Z;
  SP.A = A;
  SP.gamma0 = 1.0 + (e_kin*1e6*qe)/(mp*clight*clight) ;
  SP.beta0 = sqrt((SP.gamma0*SP.gamma0-1.0)/(SP.gamma0*SP.gamma0)) ;
  SP.eta0 = 1.0/pow(gamma_t, 2)-1.0/pow(SP.gamma0, 2);

  //-------Init Lattice-------

  BeamLine lattice;
  double tunex, tuney; 
  SectorMap CF(CF_advance_h/NCF, CF_advance_v/NCF, CF_R, CF_length/NCF, SP.gamma0);
  BeamLine CF_cell;
  if(madx_input_file == 1){
    // read madx sectormap and twiss files 
	cout << "madx sectormap" << endl;
    string data_dir_in = input;
    lattice.init(data_dir_in+"/mad/", circum, tunex, tuney); 
  }
  else{
    // init constant focusing (CF) sectormap and cell:
	cout << "constsnt focusing" << endl;
    for(int j=0; j<NCF; j++)
      CF_cell.add_map(CF);
    lattice.init(CF_cell);
  }

  // Other variables:
  double dx = 2.0*piperadius/(NX-1.0);  // needed for Poisson solver and grids
  double dy = 2.0*piperadius/(NY-1.0);  // needed for Poisson solver and grids
  double dz = circum/NZ;
  double ds = 0.4;  // value needed here only for setting dxs, dys.
  double dxs = 4.0*(dx/ds)/(NX-1.0);  // only for plotting xs, not for tracking
  double dys = 4.0*(dx/ds)/(NX-1.0);  // only for plotting ys, not for tracking
  double charge = current*circum/(NPIC*SP.beta0*clight*qe);  // macro-particle charge Q/e
  double zm = 0.5*circum*bunchfactor;  // (initial) bunch length
  if(init_pic_z == 1 || init_pic_z == 3 || init_pic_z == 4 || init_pic_z == 6)
    zm = 1.5*0.5*circum*bunchfactor;  // for parabolic bunch
  double zm1 = -zm*1.0;  // left bunch boundary
  double zm2 = zm*1.0;  // right bunch boundary
  if(init_pic_z==7)
	zm=0.25;
  double rmsToFull;  // ratio of rms to full emittance for Bump; SP

  // open output file patric.dat:

  string data_dir = ausgabe;
  data_dir = data_dir + "/";
  string outfile = data_dir + "patric.dat";
  FILE *out = fopen(outfile.c_str(), "w"); 

  // init random number generator:
  long d = -11*(myid+1);  // was -1021  transverse distribution: each slice needs a different initialization !
  long dl = -103;  // was -103   longitudinal plane: same random set needed
  long dran = -101;  // for BTF noise excitation: same random sets needed


  // set some global lattice parameters

  double cell_length = lattice.get_L();
  int Nelements = lattice.get_size();
  if(myid == 0){
    cout << "Nelements:" << Nelements << endl;
    cout << "Cell length:" << cell_length << endl;
  }

  // define pointers to first/last element in beam line:

  const list<SectorMap>::iterator first_elem = lattice.get_first_element();
  const list<SectorMap>::iterator last_elem = --lattice.get_end_element();

  TwissP twiss0, twiss_TK;
  lattice.first_element();
  twiss0 = last_elem->get_twiss();
  twiss_TK = first_elem->get_twiss();
  double Ds0 = 0.0;  // Dispersion derivative

  if(madx_input_file == 0){
    // machine tunes from lattice
    lattice.phase_advance(tunex, tuney);
    tunex = circum/cell_length*tunex/(2.0*PI);
    tuney = circum/cell_length*tuney/(2.0*PI);
	bumpI=0;
    if(myid == 0){
      cout << "advancex: " << tunex*180.0/PI << endl;
      cout << "tunex0: " << tunex << endl;
      cout << "tuney0: " << tuney << endl;
    }
  }

  // Chromatic correction kick:
  Chrom Chrom0;


  // Octupole:
  Octupole Oct0(koct);

  // Amplitude detuning; works only for constant focusing; SA
  //if(madx_input_file == 0)
    //AmplitudeDetuning Amp0(tunex, tuney, dqx_detune/(1.0e-6*eps_x), dqy_detune/(1.0e-6*eps_y), circum/(2.0*PI), CF);

  //--------end lattice----------

  // set matched RF voltage:

  int linrf = 0;
  if (cavity == 3) linrf = 1;
  double Ym = circum/(2.0*PI)*(1.0-cos(2.0*PI*zm/circum));
  if (linrf == 1) Ym = circum/(2.0*PI)*0.5*pow(2.0*PI*zm/circum, 2);
  double velm = abs(SP.eta0)*SP.beta0*clight*sqrt(5.0)*momentum_spread*2.0*PI/(circum);
  double fsyn = 1.0/(2.0*PI)*velm*sqrt(circum/(2.0*PI))/sqrt(2.0*Ym);
  double V0rf = pow(2.0*PI*fsyn, 2)*pow(circum, 2)/(2.0*PI)*mp*SP.A*SP.gamma0/(qe*SP.Z*abs(SP.eta0));

  // Init particle distribution:

  Pic Pics(&SP, charge, NPIC/numprocs, data_dir + "pics_" + myid_str + ".dat");
  Pics.z1 = zm1+myid*(zm2-zm1)/numprocs;  // left boundary in z for this slice
  Pics.z2 = Pics.z1+(zm2-zm1)/numprocs;  // right boundary
  double slice_length = Pics.z2-Pics.z1;  // slice length

  Pic NewPics(&SP, charge, NPIC/numprocs);
  NewPics.z1 = Pics.z1;
  NewPics.z2 = Pics.z2;

  // Init 1D longitudinal grids

  Grid1D rho_z_tmp(NZ, dz, -0.5*circum);
  Grid1D rho_z(NZ, dz, -0.5*circum, data_dir + "rho_z.dat");
  Grid1D dipole_current_x_tmp(NZ, dz, -0.5*circum);
  Grid1D dipole_current_x(NZ, dz, -0.5*circum, data_dir + "dipole_x.dat");
  Grid1D dipole_current_xs_tmp(NZ, dz, -0.5*circum);
  Grid1D dipole_current_xs(NZ, dz, -0.5*circum);
  Grid1D dipole_kick_x(NZ, dz, -0.5*circum, data_dir + "dipole_kick_x.dat");
  Grid1D dipole_current_y_tmp(NZ, dz, -0.5*circum);
  Grid1D dipole_current_y(NZ, dz, -0.5*circum, data_dir + "dipole_y.dat");

  // Init 2D transverse grids:

  Grid2D rho_xy(NX, NY, dx, dy, data_dir + "rho_xy.dat");
  Grid2D rho_xy_tmp(NX, NY, dx, dy);
  Grid2D xxs(NX, NX, dx, dxs, data_dir + "xxs.dat");
  Grid2D xxs_tmp(NX, NX, dx, dxs);
  Grid2D yys(NY, NY, dy, dys, data_dir + "yys.dat");
  Grid2D yys_tmp(NY, NY, dy, dys);
  Grid2D xsys(NX, NY, dxs, dys, data_dir + "xsys.dat");
  Grid2D xsys_tmp(NX, NY, dxs, dys);
  Grid2D zx(NZ, NX, dz, dx, data_dir + "zx.dat");
  Grid2D zx_tmp(NZ, NX, dz, dx);

  Grid2D Ex(NX, NY, dx, dy, data_dir + "Ex.dat");
  Grid2D Ey(NX, NY, dx, dy, data_dir + "Ey.dat");

  // Init 3D sliced grids (for 3D space charge calculation)

  if( fmod((float)NZ_bunch, (float)numprocs) != 0.0 ){
    cout << "NZ_bunch kein Vielfaches von numprocs" << endl;
    MPI_Abort(MPI_COMM_WORLD, 0);
  }
  Grid3D rho_xyz(NZ_bunch/numprocs, Pics.z1, Pics.z2, rho_xy);
  Grid3D Ey3(NZ_bunch/numprocs, Pics.z1, Pics.z2, rho_xy);
  Grid3D Ex3(NZ_bunch/numprocs, Pics.z1, Pics.z2, rho_xy);

  // Init 2D Greens function for poisson solver

  Greenfb gf1(rho_xy, image_x, image_y);  // open boundary condition

  // for the beam radius cacluation;  factor for rms equivalent 
  switch(init_pic_xy){
  case 0:  // Waterbag
	rmsToFull = 6;
    break;
  case 1:  // KV
	rmsToFull = 4;
    break;
  case 2:  // Semi-Gauss
    rmsToFull = 4;  // approximate
    break;
  case 3:  // Gauss
  	rmsToFull = 4;  // approximate
    break;
  default:
    printf("Invalid option for transverse particle distribution. Aborting.\n");
    MPI_Abort(MPI_COMM_WORLD, 0);
  }

  // injection bump initialize 
  Bump lob(tunex);     
  double a; // beam radius horizontal
 
  switch(bumpI){
	case 0:
	  cout << "no mti"  << endl;
	  max_inj = 1;
	  amp0=0;
	  break;
	case 1:
	  // The bump height is defined by user given offcenter parameter. The injection angle is equal to the septum tilt angle (as done in SIS18). 
	  cout << "mti version SP" << endl; 
	  a = sqrt(twiss_TK.betx*eps_x*rmsToFull)*0.001+twiss_TK.Dx*momentum_spread;  // half width of injected beam [m] with WB distribution, change to Main, SA 
	  offcenter_x=x_septum + d_septum + a; 
	  amp0=offcenter_x;
	  amp=amp0;
	  ampp0=inj_angle;
	  delAmp=(amp0-2*a)/double(max_inj);   //0.0041*3;//    
	  lob.BumpSp(&lattice,max_inj, myid, amp0, ampp0, delAmp); // local orbit bump for beam injection; SP
	  break;
	case 2:
	  amp=amp0;
	  cout << "mti flexibility version" << endl; 
	  lob.BumpModi(&lattice,amp);
	  break;
	case 3:
	  amp=amp0;
	  cout << "mti flexibility version exponential decrease" << "tau" << tau << endl; 
	  lob.BumpModi(&lattice,amp);
	  break;
	case 4:
	  amp=amp0;
	  cout << "mti flexibility version sin decrease" << "tau" << tau << endl; 
	  lob.BumpModi(&lattice,amp);
	  break;
	default:
	   printf("Invalid option for bump injection. Aborting.\n");
	   MPI_Abort(MPI_COMM_WORLD, 0);
  }
	 
  //if(myid == 0)
    //cout << "Expected single beamlett tune shifts: dQ_x="
	 //<< rp*SP.Z*current*circum / (rmsToFull*PI*clight*qe*SP.A*pow(SP.beta0*SP.gamma0, 3)*(eps_x+sqrt(eps_x*eps_y*tunex/tuney)))*1e6
	 //<< ", dQ_y="
	 //<< rp*SP.Z*current*circum / (rmsToFull*PI*clight*qe*SP.A*pow(SP.beta0*SP.gamma0, 3)*(eps_y+sqrt(eps_x*eps_y*tuney/tunex)))*1e6
	 //<< endl;            
		

  // print IDL parameter file idl.dat:       
  if(myid == 0){
    //cout << "Vrf [kV]: " << V0rf*1.0e-3 << "  fsyn [kHz]: " << fsyn*1.0e-3 << endl; 
    print_IDL(data_dir, numprocs, cell_length, Nelements, tunex, tuney, lattice, cells, max_inj); 
  }


  //----------------counters and other variables--------------------------

  int Nexchange = 1;  // exchange of particles between slices after every sector map.
  int Nprint = print_cell*Nelements;  // output of particles every cell*print_cell
  //int Nibs = 1;  // correct for IBS every Nibs steps
  double Ntot;  // total number of particles: for screen output
  int counter = 0;  // counts sector maps
  double s = 0.0;  // path length
  double Nslice;  // total number of slices
  double emitx;  // emittance: for screen output
  double dtheta = 0.0;  // btf dipole kick
  double pickup_h, pickup_v;  // horizontal/vertical pickup signals
  double rms_advancex = 0.0, rms_advancey = 0.0;  // rms phase advance: for output
  int inj_counter = 0;  // number of injected beamletts; SP
  long N_inj = 0;  // number of injected particles

  //---------parameters for exchange of particles between slices-------

  int destl;  //!< ID of left neighbour slice (-1: no neighbour).
  int destr;  //!< ID of right neighbour



  //---finite bunch: no exchange between ends---
  if(bc_end == 0){
    if(myid == 0){
      destl =-1;
      destr = myid+1;
    }else
      if(myid == numprocs-1){
	destl = myid-1;
	destr =-1;
      }else{
	destl = myid-1;
	destr = myid+1;
      }
  }
  //---periodic (in z) boundary condition---
  if(bc_end == 1){
    if(myid == 0){
      destl = numprocs-1;
      destr = myid+1;
    }else
      if(myid == numprocs-1){
	destl = myid-1;
	destr = 0;
      }
      else{
	destl = myid-1;
	destr = myid+1;
      }
  }



  //--------------------- end-parameters for particle exchange ---------------

  long *septLoss = new long;
  long *sl_slice = new long;
  double *momenta = new double[19];
  double *momenta_tot = new double[19];       
  double tmp=0;
  long size_old;	
  offcenter_y=0.0;  
  inj_phase_y=0.0e-3;
  
  //--------------------------------------------------------------------------
  //----------------------- start loop (do...while) --------------------------
  //--------------------------------------------------------------------------
   double z0;
   do{  // injection; SP
    if(!(counter%Nelements))
	{  // at beginning each turn...
	  	
	if(inj_counter < max_inj)
	{
 	  size_old=Pics.get_size();
	  // set longitudinal distribution:
	  switch(init_pic_z){
	  case 0:  //  coasting + Elliptic
	    Pics.parabolic_dc(bunchfactor, circum, momentum_spread, NPIC, &dl);
	    break;
	  case 1:  //  bunch + Elliptic  (1.5 correction factor for bunching)
	    Pics.parabolic(zm, 0, momentum_spread, NPIC, &dl);
	    break;
	  case 2:  //  coasting + Gauss
	    Pics.coast_gauss(bunchfactor, circum, momentum_spread, NPIC, &dl); 
	    break;
	  case 3:  //  bunch + Gauss
	    Pics.bunch_gauss(zm, circum, momentum_spread, NPIC, &dl);
	    break;
	  case 4:  //  const. bunch dist.		
	    Pics.bunch_const(zm, circum, momentum_spread, NPIC, &dl,linrf);
	    break;
	  case 5:  //  air bag dist.
	    Pics.barrier_air_bag(zm, momentum_spread, NPIC, &dl);
	    break;
	  case 6:  //  bunch air bag dist.
	    Pics.bunch_air_bag(zm, circum, momentum_spread, NPIC, &dl);
	    break;
	  case 7:  //  168 mirco bunches, injection
		z0=-circum/2.;
		int l;
		for (l=0; l<168; l++){
	    	Pics.parabolic(zm, z0, momentum_spread, NPIC/168, &dl);
			z0+=1.286;
		}
	    break;
	  default:
	    printf("Invalid option for longitudinal particle distribution. Aborting.\n");
	    MPI_Abort(MPI_COMM_WORLD, 0);
	  }
		
	  // set transverse distribution:
	  switch(init_pic_xy){
	  case 0:  // Waterbag
		rmsToFull = 6;
	    Pics.waterbag_xy(1.e-6*eps_x, 1.0e-6*eps_y, twiss_TK.alpx, twiss_TK.alpy, pow(mismatch_x, 2)*twiss_TK.betx, pow(mismatch_y, 2)*twiss_TK.bety, 
				        twiss_TK.Dx, Ds0, offcenter_x, inj_angle, offcenter_y, inj_phase_y, size_old, &d);
	    break;
	  case 1:  // KV
		rmsToFull = 4;
	    Pics.KV_xy(1.e-6*eps_x, 1.0e-6*eps_y, twiss_TK.alpx, twiss_TK.alpy, pow(mismatch_x, 2)*twiss_TK.betx, pow(mismatch_y, 2)*twiss_TK.bety, 
				   twiss_TK.Dx, Ds0, offcenter_x, inj_angle, offcenter_y, inj_phase_y, size_old, &d);
	    break;
	  case 2:  // Semi-Gauss
	    rmsToFull = 4;  // approximate
		Pics.SG(1.e-6*eps_x, 1.0e-6*eps_y, twiss_TK.alpx, twiss_TK.alpy, pow(mismatch_x, 2)*twiss_TK.betx, pow(mismatch_y, 2)*twiss_TK.bety, 
				twiss_TK.Dx, Ds0, offcenter_x, inj_angle, offcenter_y, inj_phase_y, size_old, &d);
	    break;
	  case 3:  // Gauss
	  	rmsToFull = 4;  // approximate
	    Pics.Gauss_xy(1.e-6*eps_x, 1.0e-6*eps_y, twiss_TK.alpx, twiss_TK.alpy, pow(mismatch_x, 2)*twiss_TK.betx, pow(mismatch_y, 2)*twiss_TK.bety, 
					  twiss_TK.Dx, Ds0, offcenter_x, inj_angle, offcenter_y, inj_phase_y, size_old, &d);
	    break;
	  default:
	    printf("Invalid option for transverse particle distribution. Aborting.\n");
	    MPI_Abort(MPI_COMM_WORLD, 0);
	  }
	  
	  if (bumpI!=0)
	  {
	  	*sl_slice = NewPics.localLoss_x(x_septum, 100.);  // loss on septum  
      	loss+=*sl_slice; 
      	MPI_Reduce(sl_slice, septLoss, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
	  	if(myid == 0)
     		cout<<"The incoming beamlett number "<<inj_counter+1<< " lost "<<loss<< " macro particles on the septum.\n";
      }
	  N_inj += NPIC; 
	  inj_counter +=1;
	}
	

	// bump reduction
    if (amp > 0.001 )
	{    
	   if(bumpI==1)
	   {   
		amp-=delAmp;                                 
	   	ampp0-=delAmp*ampp0/amp0;     
	   	lob.decrement();     
	   }
	   if (bumpI==2)
		{
		 amp-=delAmp;                                 
		 lob.decrementModi(amp);
		}
	   if (bumpI==3)
	    {
		 amp=amp0*exp(-tau*counter/Nelements);
		 lob.decrementModi(amp);
		}
	   if (bumpI==4)
	    {
		 amp=amp0*(1+sin(-tau*counter/Nelements));
		 lob.decrementModi(amp);
		}
	}	
   }
	
	
    //------------ Start Output----------------------------------------
    // store rms momenta every time step in patric.dat:
    
    if(counter%1 == 0){
      Nslice = Pics.get_size();  // number of particles in this slice
      momenta[0] = Nslice*Pics.rms_emittance_x();
      momenta[1] = Nslice*Pics.rms_emittance_y();
      momenta[2] = Nslice*Pics.x_max();
      momenta[3] = Nslice*Pics.y_max();
      momenta[4] = Nslice*Pics.x_rms();
      momenta[5] = Nslice*Pics.y_rms();
      momenta[6] = Nslice*Pics.rms_momentum_spread();
      momenta[7] = Nslice*Pics.xzn(2.0, zm);
      momenta[8] = Nslice*Pics.xzn(1.0, zm);
      momenta[9] = Nslice;
      momenta[10] = Nslice*rms_advancex;  // rms phase advance in x
      momenta[11] = Nslice*rms_advancey;
      momenta[12] = Nslice*Pics.offset_x();
      momenta[13] = Nslice*Pics.offset_y();
      momenta[14] = Nslice*dtheta;  // btf noise signal
      momenta[15] = Nslice*pickup_h;
      momenta[16] = Nslice*pickup_v; 
	  momenta[17] = Nslice*loss;   
	  momenta[18] = Nslice*N_inj;  
      // mpi_reduce for summation of all 17 moments over all slices
      MPI_Reduce(momenta, momenta_tot, 19, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
	 		
      Ntot = momenta_tot[9];  // total number of particles over all slices
      emitx = momenta_tot[0]/Ntot;  // total rms emittance    

 
      // stop when loss tolerance level is exceeded                            (1-Ntot/(max_inj*NPIC))*100.
	  
      if(myid == 0 && Ntot/N_inj <= lossTol){  // test on numer of injected particles; SP
		cout<<"Loss tolerance exceeded within "<<counter/Nelements+1<<" turns ("<<
	    Ntot<<" of "<<N_inj<<" macro particles left). Exiting.\n";
	    cout.flush();
	MPI_Abort(MPI_COMM_WORLD, 0);
      }  
      //      cout<<counter<<' '<<lattice.get_element()->get_name()<<' '<<lattice.get_element()->get_K(1)<<endl;  //tmp
      // write momenta
      if(myid == 0){
	fprintf(out, "%g", s);
	for(int i=0; i<19; i++)
	  if(i != 9){
	    fprintf(out, "%15g", momenta_tot[i]/Ntot);}
	  else{
	    fprintf(out, "%15g", momenta_tot[i]);}
	fprintf(out, "\n");
	fflush(out);
      }
}


    //------output every Nprint*sectormap---------

    if(counter%Nprint == 0){
      if(myid == 0){
	// to screen
	//printf("saving at s=%g (m) eps_t=%g dp/p=%g zm2=%g Ntotal=%g\n", s, 1.0e6*emitx, Pics.rms_momentum_spread(), zm2, Ntot);
	cout.flush();
	
	// electric fields
	Ex.print();
	Ey.print();
      }
	
      // paricle coordinates to pic.dat:
      Pics.print(pic_subset);
   	
      // collect densities for output only:

      Pics.gatherZ(charge*qe/dz, rho_z_tmp);
      Pics.gatherX(SP.beta0*clight*charge*qe/dz, dipole_current_x_tmp);
      Pics.gatherY(SP.beta0*clight*charge*qe/dz, dipole_current_y_tmp);
      Pics.gatherXY(charge*qe/circum, rho_xy_tmp);
      Pics.gatherXXs(charge*qe/circum, xxs_tmp);
      Pics.gatherYYs(charge*qe/circum, yys_tmp);
      Pics.gatherXsYs(charge*qe/circum, xsys_tmp);
      Pics.gatherZX(charge*qe/circum, zx_tmp);

      // summation over all slices:

      MPI_Allreduce(rho_z_tmp.get_grid(), rho_z.get_grid(),
		    NZ, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(dipole_current_x_tmp.get_grid(), dipole_current_x.get_grid(),
		    NZ, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(dipole_current_y_tmp.get_grid(), dipole_current_y.get_grid(),
		    NZ, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(rho_xy_tmp.get_grid(), rho_xy.get_grid(),
		    NX*NY, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(xxs_tmp.get_grid(), xxs.get_grid(),
		    NX*NX, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(yys_tmp.get_grid(), yys.get_grid(),
		    NY*NY, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(xsys_tmp.get_grid(), xsys.get_grid(),
		    NX*NY, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(zx_tmp.get_grid(), zx.get_grid(),
		    NZ*NX, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
		  
		
      // output to density files:

      if(myid == 0){
	dipole_current_x.print();
	dipole_kick_x.print();
	dipole_current_y.print();
	rho_z.print();
	rho_xy.print();
	xxs.print();
	yys.print();
	xsys.print();
	zx.print();
      }
    } 
    //-----------------end output--------------------------------------------		
	
    // at beginning of a cell: calculate advance per (last) cell,
    // store old coordinates 
    if(lattice.get_element() == first_elem){
      rms_advancex = Pics.rms_phaseadvance_h();  // Pics.rms_wavelength_h();
      rms_advancey = Pics.rms_phaseadvance_v();  // Pics.rms_wavelength_v();
      if(footprint == 0)
	Pics.store_old_coordinates();
    }

	
    if(lattice.get_element()->get_name() == "\"SEPTUM\""){  // losses at septum; SP
      loss += Pics.localLoss_x(-piperadius, coll_halfgap);	  
	}  
  

   if(lattice.get_element()->get_name() == "\"ACCEPTANCE\""){  // losses at limiting acceptance; SA
	  double tmp = lattice.get_element()->get_betx();
      Pics.localLoss_x(-sqrt(180e-6*tmp), sqrt(180e-6*tmp));      
	}

	// Transport particles through sectormap, update slice position s: 
    ds = lattice.get_element()->get_L();

    s += ds;
    Pics.transport(lattice.get_element()->get_map(), piperadius);



    //-----exchange particles between slices------------------------

    if(counter != 0 && counter%Nexchange == 0 && numprocs > 1){
      int Npl;  //!< Number of particles to be exchanged with left neighbour
      int Npr;  //!< particles exchanged with right neighbour

      //! vector of particles to be exchanged
      vector<Particle> pl, pr;

      // send particle to neighbor slices:
	
      if(destl >= 0){
	pl = Pics.get_particles_left(circum);
	Npl = pl.size();
	MPI_Send(&Npl, 1, MPI_INT, destl, 1, MPI_COMM_WORLD);
	MPI_Send(&pl[0], Npl, particletype, destl, 1, MPI_COMM_WORLD);
      }
      if(destr >= 0){
	pr = Pics.get_particles_right(circum);
	Npr = pr.size();
	MPI_Send(&Npr, 1, MPI_INT, destr, 0, MPI_COMM_WORLD);
	MPI_Send(&pr[0], Npr, particletype, destr, 0, MPI_COMM_WORLD);
      }

      // receive from neighbour slices:
	
      Npl = 0; Npr = 0;
      vector<Particle> pl_in, pr_in;
      if( destl >= 0 ){
	MPI_Recv(&Npl, 1, MPI_INT, destl, 0, MPI_COMM_WORLD, &status);
	pl_in = vector<Particle>(Npl);
	MPI_Recv(&pl_in[0], Npl, particletype, destl, 0, MPI_COMM_WORLD, &status);
      }
      if(destr >= 0){
	MPI_Recv(&Npr, 1, MPI_INT, destr, 1, MPI_COMM_WORLD, &status);
	pr_in = vector<Particle>(Npr);
	MPI_Recv(&pr_in[0], Npr, particletype, destr, 1, MPI_COMM_WORLD, &status);
      }
      Pics.add_particles(pl_in);
      Pics.add_particles(pr_in);
    }

    //-----end exchange of particles-------------



    // periodic bc without exchange
    if(numprocs == 1)
      Pics.periodic_bc(circum);	

    // update wave lengths

    //if( footprint == 1){
    //Pics.update_wavelength_h(ds, 0.0);
    //Pics.update_wavelength_v(ds);}

    // nonlinear thin lens kick:
    if(octupole_kick == 1)
      Pics.kick(Oct0, lattice.get_element()->get_twiss(), ds);

    //if(ampdetun_kick == 1)  // works only for constant focusing
    //Pics.kick(Amp0, lattice.get_element()->get_twiss()ds);

    // correct for chromaticity
     if(chroma == 1)  
		Pics.kick(Chrom0,lattice.get_element()->get_twiss(), ds);
			
    // cavity kick every cell:

    if(cavity == 1 && counter%Nelements == 0.0)
      Pics.cavity_kick(V0rf*cell_length/circum, 1, circum/(2.0*PI));
    if(cavity == 2 && counter%Nelements == 0.0)
      Pics.barrier_kick(zm1, zm2);
    if(cavity == 3 && counter%Nelements == 0.0)
      Pics.cavity_kick_linear(V0rf*cell_length/circum, 1, circum/(2.0*PI));

    // Pickup signals

    Pics.gatherX(SP.beta0*clight*charge*qe/dz, dipole_current_x_tmp);
    Pics.gatherY(SP.beta0*clight*charge*qe/dz, dipole_current_y_tmp);
    MPI_Allreduce(dipole_current_x_tmp.get_grid(), dipole_current_x.get_grid(), NZ,
		  MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);	
    MPI_Allreduce(dipole_current_y_tmp.get_grid(), dipole_current_y.get_grid(), NZ,
		  MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

    pickup_h = Pics.pickup_signal(dipole_current_x, circum,
				  s/(SP.beta0*clight))/current;
    pickup_v = Pics.pickup_signal(dipole_current_y, circum,
				  s/(SP.beta0*clight))/current;


    //---------------impedance kicks-----------------------

    komplex dqc_t(dqcr, dqci);  // for sliced == 0

    if(imp_kick == 1){
      if(sliced == 0)
	Pics.kick(ds/circum*InducedKick(Pics.offset_x(), ds, dqc_t, SP.beta0,
					tunex, circum), 0.0);
      else{
	dipole_kick_x.reset(); 	
	if(Rs > 0.0 || leit > 0.0){
	  Pics.gatherXs(SP.beta0*clight*charge*qe/dz, dipole_current_xs_tmp);
	  MPI_Allreduce(dipole_current_xs_tmp.get_grid(), dipole_current_xs.get_grid(),
			NZ, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);	
	  InducedWakeKick(dipole_kick_x, dipole_current_x, dipole_current_xs, tunex,
			  2.0*PI*SP.beta0*clight/circum, nres, Rs, Qs, piperadius,
			  leit, SP.beta0, SP.gamma0*mp*SP.A*pow(clight, 2), SP.Z*qe);
	}
	if(Zimage != 0.0)
	  InducedKick(dipole_kick_x, dipole_current_x, Zimage, SP.beta0,
		      SP.gamma0*mp*SP.A*pow(clight, 2), SP.Z*qe);
	Pics.impedance_kick(dipole_kick_x, circum, ds);
      }
    }


    //---------------end impedance kicks-----------------------

    //------------self-consistent space charge kicks after every sectormap----
    if(space_charge == 1){    
      // PIC -> charge density for Poisson solver:
      if (sliced == 0){
	Pics.gatherXY(charge*qe/circum, rho_xy_tmp);		
	MPI_Allreduce(rho_xy_tmp.get_grid(), rho_xy.get_grid(), NX*NY, MPI_DOUBLE,
		      MPI_SUM, MPI_COMM_WORLD);	
      }else{	
	Pics.gatherXYZ(charge*qe/rho_xyz.get_dz(), rho_xyz);

	// send and receive density ghost grids to neighbor slices:
	// what is exchanged here ???
	if(destl >= 0)
	  MPI_Send(rho_xyz.get_ghostl(), NX*NY, MPI_DOUBLE, destl, 2, MPI_COMM_WORLD);
	if(destr >= 0){
	  MPI_Recv(rho_xy_tmp.get_grid(), NX*NY, MPI_DOUBLE, destr, 2, MPI_COMM_WORLD,
		   &status);
	  rho_xyz[NZ_bunch/numprocs-1] += rho_xy_tmp;
	}
	if(destr >= 0)
	  MPI_Send(rho_xyz.get_ghostr(), NX*NY, MPI_DOUBLE, destr, 3, MPI_COMM_WORLD);
	if(destl >= 0){
	  MPI_Recv(rho_xy_tmp.get_grid(), NX*NY, MPI_DOUBLE, destl, 3, MPI_COMM_WORLD,
		   &status);
	  rho_xyz[0]+= rho_xy_tmp;
	}
      }
       // Poisson solver
      if(sliced == 0)
	poisson_xy(Ex, Ey, rho_xy, gf1);
      else{
	poisson_xyz(Ex3, Ey3, rho_xyz, gf1);
	
	// send and receive efield ghost grids to neighbor slices:
	if(destl >= 0){
	  MPI_Send(Ex3.get_ghostl(), NX*NY, MPI_DOUBLE, destl, 2,
		   MPI_COMM_WORLD);
	  MPI_Send(Ey3.get_ghostl(), NX*NY, MPI_DOUBLE, destl, 4,
		   MPI_COMM_WORLD);
	}
	if(destr >= 0){
	  MPI_Recv(Ex3[NZ_bunch/numprocs-1].get_grid(), NX*NY, MPI_DOUBLE,
		   destr, 2, MPI_COMM_WORLD, &status);
	  MPI_Recv(Ey3[NZ_bunch/numprocs-1].get_grid(), NX*NY, MPI_DOUBLE,
		   destr, 4, MPI_COMM_WORLD, &status);
	}
	if(destr >= 0){
	  MPI_Send(Ex3.get_ghostr(), NX*NY, MPI_DOUBLE, destr, 3, MPI_COMM_WORLD);
	  MPI_Send(Ey3.get_ghostr(), NX*NY, MPI_DOUBLE, destr, 5, MPI_COMM_WORLD);
	}
	if(destl >= 0){
	  MPI_Recv(Ex3[0].get_grid(), NX*NY, MPI_DOUBLE, destl, 3, MPI_COMM_WORLD, &status);
	  MPI_Recv(Ey3[0].get_grid(), NX*NY, MPI_DOUBLE, destl, 5, MPI_COMM_WORLD, &status);
	}
      }
    }
    
    // Shift xs and ys:
    
    if(space_charge == 1 && ds > 0.0){
      if(sliced == 0)
	Pics.kick(Ex, Ey, ds);
      else
	Pics.kick(Ex3, Ey3, ds);
    }
    
    //---------------end self-consistent space charge kicks---------------

    // linear sc kicks:
    
    if(space_charge == 2 && ds > 0.0)
      Pics.linear_SC_kick(dQxm, dQym, tunex, tuney, rho_z, current/(SP.beta0*clight),
			  dipole_current_x, dipole_current_y, circum, ds);
	
    // nonlinear sc kicks:

    if(space_charge == 3 && ds > 0.0)
      Pics.nonlinear_SC_kick(sqrt(1.0e-6*twiss0.betx*eps_x), sqrt(1.0e-6*twiss0.bety*eps_y),
			     dQxm, dQym, tunex, tuney, rho_z, current/(SP.beta0*clight),
			     circum, ds);

    // dipole noise modulation kick:
    double dnoiseamp = 1.0e-6;
    double nus = fsyn/(SP.beta0*clight/circum);
    if(btf == 1)
      dtheta = Pics.dipole_mod_kick(s/(SP.beta0*clight), ds, circum, dnoiseamp,
				    (tunex+nus)*SP.beta0*clight/circum, btf_harmonic);	
						
    // correct for ibs:

    /*if(counter != 0 && counter%Nibs == 0){
      double rate_ibs = 1.0e4;
      double Dz = rate_ibs*pow(Pics.rms_momentum_spread(), 2);
      double Dxy = rate_ibs*0.5*(Pics.rms_emittance_x()+Pics.rms_emittance_y());
      double betx = lattice.get_element()->get_betx();
      double bety = lattice.get_element()->get_bety();
      Pics.langevin(rate_ibs, rate_ibs*0.0, Dxy, Dz*0.0, Nibs*ds, betx, bety,
        &d);
      }*/

    // For bunch compression: Update slice boundaries z1 and z2 from
    // new bunch boundaries zm1, zm2:

    /*if(counter != 0 && counter%Nexchange == 0){
      if(myid == 0)
      zm1 = Pics.z_min();
      MPI_Bcast(&zm1, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
      if(myid == numprocs-1)
      zm2 = Pics.z_max();
      MPI_Bcast(&zm2, 1, MPI_DOUBLE, numprocs-1, MPI_COMM_WORLD);

      Pics.z1 = zm1+myid*(zm2-zm1)/numprocs;
      Pics.z2 = Pics.z1+(zm2-zm1)/numprocs;
      slice_length = Pics.z2-Pics.z1;

      rho_xyz.get_zleft() = zm1;
      rho_xyz.get_zright() = zm2;
      Ex3.get_zleft() = zm1;
      Ex3.get_zright() = zm2;
      Ey3.get_zleft() = zm1;
      Ey3.get_zright() = zm2;
      }*/


    // advance in beam line, go to next element:

    lattice.next_element();
    ++counter;

  }while(counter != cells*Nelements);          //loop check, cells (turns) given by user  SA

  //------------------end of loop-------------------------------

  // close files, free heap:

  delete septLoss, sl_slice;
  delete[] momenta, momenta_tot;  // [] needed here!; SP
  fclose(out);

  // MPI end:

  MPI_Finalize();

  time2 = time(0);
  double sec = difftime(time2, time1);
  double h = floor(sec/3600);
  double min = floor(sec/60-60.*h);
  sec -= 3600.*h+60.*min;
    
  if(myid == 0)  
    {cout << "Total losses: " << (1-Ntot/(max_inj*NPIC))*100. << " \%\n" <<
      "Stored particles: " << current*circum*Ntot/(qe*Z*SP.beta0*clight*NPIC) << endl <<
      "Computation time: " << h << ":" << min << ":" << sec << endl;   
	}
   }
int
main(int argc, char *argv[]) {
    struct plat_opts_config_mpilogme config;
    SDF_boolean_t success = SDF_TRUE;
    uint32_t numprocs;
    int tmp, namelen, mpiv = 0, mpisubv = 0, i;
    char processor_name[MPI_MAX_PROCESSOR_NAME];
    int msg_init_flags = SDF_MSG_MPI_INIT;
    config.inputarg = 0;
    config.msgtstnum = 500;

    /* We may not need to gather anything from here but what the heck */
    loadProperties("/opt/schooner/config/schooner-med.properties"); // TODO get filename from command line

    /* make sure this is first in order to get the the mpi init args */
    success = plat_opts_parse_mpilogme(&config, argc, argv) ? SDF_FALSE : SDF_TRUE;

    printf("input arg %d msgnum %d success %d\n", config.inputarg, config.msgtstnum, success);
    fflush(stdout);
    myid = sdf_msg_init_mpi(argc, argv, &numprocs, &success, msg_init_flags);

    if ((!success) || (myid < 0)) {
        printf("Node %d: MPI Init failure... exiting - errornum %d\n", myid, success);
        fflush(stdout);
        MPI_Finalize();
        return (EXIT_FAILURE);
    }

    int debug = 0;
    while(debug);

    tmp = init_msgtest_sm((uint32_t)myid);

    /* Enable this process to run threads across 2 cpus, MPI will default to running all threads
     * on only one core which is not what we really want as it forces the msg thread to time slice
     * with the fth threads that send and receive messsages
     * first arg is the number of the processor you want to start off on and arg #2 is the sequential
     * number of processors from there
     */
    lock_processor(0, 7);
    sleep(1);
    msg_init_flags =  msg_init_flags | SDF_MSG_RTF_DISABLE_MNGMT;

    /* Startup SDF Messaging Engine FIXME - dual node mode still - pnodeid is passed and determined
     * from the number of processes mpirun sees.
     */
    sdf_msg_init(myid, &pnodeid, msg_init_flags);

    MPI_Get_version(&mpiv, &mpisubv);
    MPI_Get_processor_name(processor_name, &namelen);

    printf("Node %d: MPI Version: %d.%d Name %s \n", myid, mpiv, mpisubv, processor_name);
    fflush(stdout);

    plat_log_msg(
            PLAT_LOG_ID_INITIAL,
            LOG_CAT,
            PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: Completed Msg Init.. numprocs %d pnodeid %d Starting Test\n",
            myid, numprocs, pnodeid);

    for (i = 0; i < 2; i++) {
        sleep(2);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: Number of sleeps %d\n", myid, i);
    }

    fthInit();
    sdf_msg_startmsg(myid, 0, NULL); 


    /* SAVE THIS may need to play with the priority later */
#if 0
    struct sched_param param;
    int newprio = 60;
    pthread_attr_t hi_prior_attr;

    pthread_attr_init(&hi_prior_attr);
    pthread_attr_setschedpolicy(&hi_prior_attr, SCHED_FIFO);
    pthread_attr_getschedparam(&hi_prior_attr, &param);
    param.sched_priority = newprio;
    pthread_attr_setschedparam(&hi_prior_attr, &param);
    pthread_create(&fthPthread, &hi_prior_attr, &fthPthreadRoutine, NULL);
#endif

    pthread_attr_t attr;
    pthread_attr_init(&attr);
    pthread_create(&fthPthread, &attr, &MultiNodeMultiPtlMstosrPthreadRoutine, &numprocs);

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: Created pthread for FTH %d\n", myid, i);

    pthread_join(fthPthread, NULL);

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                 "\nNode %d: SDF Messaging Test Complete - i %d\n", myid, i);

    /* Lets stop the messaging engine this will block until they complete */
    /* FIXME arg is the threadlvl */
#if 0
    if (numprocs > 1) {
        if (!myid) {
            for (int index = 1; index < numprocs; index ++)
                sdf_msg_nsync(myid, index);
        }
        else {
            sdf_msg_nsync(myid, 0);
        }

    }
#endif
    sdf_msg_stopmsg(myid, SYS_SHUTDOWN_SELF);

    plat_shmem_detach();

    if (myid == 0) {
        sched_yield();
        printf("Node %d: Exiting message test after yielding... Calling MPI_Finalize\n", myid);
        fflush(stdout);
        sched_yield();
        MPI_Finalize();
    }
    else {
        printf("Node %d: Exiting message test... Calling MPI_Finalize\n", myid);
        fflush(stdout);
        sched_yield();
        MPI_Finalize();
    }
    printf("Successfully ends\n");
    return (EXIT_SUCCESS);

}
Exemplo n.º 4
0
int OSPU_Comm_split_node(MPI_Comm oldcomm, MPI_Comm * newcomm)
{
    int rc = MPI_SUCCESS;

#if MPI_VERSION >= 3

    rc = MPI_Comm_split_type(oldcomm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, newcomm);
    if (rc!=MPI_SUCCESS) return rc;

#elif defined(MPICH2) && (MPICH2_NUMVERSION>10500000)

    rc = MPIX_Comm_split_type(oldcomm, MPIX_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, newcomm);
    if (rc!=MPI_SUCCESS) return rc;

#else

    /* This code was authored by Jim Dinan */

    char my_name[MPI_MAX_PROCESSOR_NAME];
    MPI_Comm node_comm = MPI_COMM_NULL;
    MPI_Comm parent_comm;
    int len;

    /* Dup so we don't leak communicators */
    rc = MPI_Comm_dup(oldcomm, &parent_comm);
    if (rc!=MPI_SUCCESS) return rc;

    rc = MPI_Get_processor_name(my_name, &len);
    if (rc!=MPI_SUCCESS) return rc;

    while (node_comm == MPI_COMM_NULL)
    {
        char root_name[MPI_MAX_PROCESSOR_NAME];
        int  rank;
        MPI_Comm old_parent;

        rc = MPI_Comm_rank(parent_comm, &rank);
        if (rc!=MPI_SUCCESS) return rc;

        if (rank == 0)
        {
            rc = MPI_Bcast(my_name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, 0, parent_comm);
            if (rc!=MPI_SUCCESS) return rc;
            strncpy(root_name, my_name, MPI_MAX_PROCESSOR_NAME);
        } 
        else 
        {
            rc = MPI_Bcast(root_name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, 0, parent_comm);
            if (rc!=MPI_SUCCESS) return rc;
        }

        old_parent = parent_comm;

        if (strncmp(my_name, root_name, MPI_MAX_PROCESSOR_NAME) == 0)
        {
            /* My group splits off, I'm done after this */
            rc = MPI_Comm_split(parent_comm, 1, rank, &node_comm);
            if (rc!=MPI_SUCCESS) return rc;
        }
        else
        {
            /* My group keeps going, separate from the others */
            rc = MPI_Comm_split(parent_comm, 0, rank, &parent_comm);
            if (rc!=MPI_SUCCESS) return rc;
        }

        /* Old parent is no longer needed */
        rc = MPI_Comm_free(&old_parent);
        if (rc!=MPI_SUCCESS) return rc;
    }

    *newcomm = node_comm;

#endif

    return rc = MPI_SUCCESS;
}
Exemplo n.º 5
0
int main (int argc, char *argv[])
{
    int rank, nprocs, ilen;
    char processor[MPI_MAX_PROCESSOR_NAME];
    double tstart = 0.0, tend = 0.0;

    MPI_Status reqstat;
    MPI_Request send_request;
    MPI_Request recv_request;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Get_processor_name(processor, &ilen);

    if (nprocs != 2)
    {
        if(rank == 0) printf("This test requires exactly two processes\n");

        MPI_Finalize();
        exit(EXIT_FAILURE);
    }

    int other_proc = (rank == 1 ? 0 : 1);

    // Hard code GPU affinity since this example only works with 2 GPUs.
    int igpu = 0;

   // if(rank == 0 )
       /* printf("%s allocates %d MB pinned memory with regual mpi and "
               "bidirectional bandwidth\n", argv[0],
               MAX_MSG_SIZE / 1024 / 1024);
	*/
    /*printf("node=%d(%s): my other _proc = %d and using GPU=%d\n", rank,
            processor, other_proc, igpu);
	*/
    char *h_src, *h_rcv;
   // h_src=(char *)malloc(MYBUFSIZE*100*sizeof(char));
   // h_rcv=(char *)malloc(MYBUFSIZE*100*sizeof(char));
    CHECK(cudaSetDevice(igpu));
    CHECK(cudaMallocHost((void**)&h_src, MYBUFSIZE));
    CHECK(cudaMallocHost((void**)&h_rcv, MYBUFSIZE));

    char *d_src, *d_rcv;
    CHECK(cudaSetDevice(igpu));
    CHECK(cudaMalloc((void **)&d_src, MYBUFSIZE));
    CHECK(cudaMalloc((void **)&d_rcv, MYBUFSIZE));

    initalData(h_src, h_rcv, MYBUFSIZE);

    CHECK(cudaMemcpy(d_src, h_src, MYBUFSIZE, cudaMemcpyDefault));
    CHECK(cudaMemcpy(d_rcv, h_rcv, MYBUFSIZE, cudaMemcpyDefault));

    // latency test
    for(int size = 1; size <= MAX_MSG_SIZE; size = size * 2)
    {
        MPI_Barrier(MPI_COMM_WORLD);

        if(rank == 0)
        {
            tstart = MPI_Wtime();

            for(int i = 0; i < loop; i++)
            {
                /*
                 * Transfer data from the GPU to the host to be transmitted to
                 * the other MPI process.
                 */
                CHECK(cudaMemcpy(h_src, d_src, size, cudaMemcpyDeviceToHost));

                // bi-directional transmission
                MPI_Isend(h_src, size, MPI_CHAR, other_proc, 100,
                          MPI_COMM_WORLD, &send_request);
MPI_Irecv(h_rcv, size, MPI_CHAR, other_proc, 10, MPI_COMM_WORLD,
                          &recv_request);


                MPI_Waitall(1, &recv_request, &reqstat);
                MPI_Waitall(1, &send_request, &reqstat);

                /*
                 * Transfer the data received from the other MPI process to
                 * the device.
                 */
                CHECK(cudaMemcpy(d_rcv, h_rcv, size, cudaMemcpyHostToDevice));
            }

            tend = MPI_Wtime();
        }
        else
        {
            for(int i = 0; i < loop; i++)
            {
                /*
                 * Transfer data from the GPU to the host to be transmitted to
                 * the other MPI process.
                 */
                CHECK(cudaMemcpy(h_src, d_src, size, cudaMemcpyDeviceToHost));

                // bi-directional transmission
                MPI_Isend(h_src, size, MPI_CHAR, other_proc, 10, MPI_COMM_WORLD,
                          &send_request);
MPI_Irecv(h_rcv, size, MPI_CHAR, other_proc, 100,
                          MPI_COMM_WORLD, &recv_request);
                MPI_Waitall(1, &recv_request, &reqstat);
                MPI_Waitall(1, &send_request, &reqstat);

                /*
                 * Transfer the data received from the other MPI process to
                 * the device.
                 */
                CHECK(cudaMemcpy(d_rcv, h_rcv, size, cudaMemcpyHostToDevice));
            }
        }

        MPI_Barrier(MPI_COMM_WORLD);

        if(rank == 0)
        {
            double latency = (tend - tstart) * 1e6 / (2.0 * loop);
            float performance = (float) size / (float) latency;
            printf("%6d , %10.2f , %10.2f \n",
                   (size >= 1024 * 1024) ? size  : size , latency, performance);

            fflush(stdout);
        }
    }

    CHECK(cudaFreeHost(h_src));
    CHECK(cudaFreeHost(h_rcv));

    CHECK(cudaSetDevice(igpu));
    CHECK(cudaFree(d_src));
    CHECK(cudaFree(d_rcv));

    MPI_Finalize();

    return EXIT_SUCCESS;
}
Exemplo n.º 6
0
/* ADIOI_cb_gather_name_array() - gather a list of processor names from all processes
 *                          in a communicator and store them on rank 0.
 *
 * This is a collective call on the communicator(s) passed in.
 *
 * Obtains a rank-ordered list of processor names from the processes in
 * "dupcomm".
 *
 * Returns 0 on success, -1 on failure.
 *
 * NOTE: Needs some work to cleanly handle out of memory cases!  
 */
int ADIOI_cb_gather_name_array(MPI_Comm comm,
			       MPI_Comm dupcomm,
			       ADIO_cb_name_array *arrayp)
{
    char my_procname[MPI_MAX_PROCESSOR_NAME], **procname = 0;
    int *procname_len = NULL, my_procname_len, *disp = NULL, i;
    int commsize, commrank, found;
    ADIO_cb_name_array array = NULL;
    int alloc_size;

    if (ADIOI_cb_config_list_keyval == MPI_KEYVAL_INVALID) {
        /* cleaned up by ADIOI_End_call */
	MPI_Keyval_create((MPI_Copy_function *) ADIOI_cb_copy_name_array, 
			  (MPI_Delete_function *) ADIOI_cb_delete_name_array,
			  &ADIOI_cb_config_list_keyval, NULL);
    }
    else {
	MPI_Attr_get(comm, ADIOI_cb_config_list_keyval, (void *) &array, &found);
        if (found) {
            ADIOI_Assert(array != NULL);
	    *arrayp = array;
	    return 0;
	}
    }

    MPI_Comm_size(dupcomm, &commsize);
    MPI_Comm_rank(dupcomm, &commrank);

    MPI_Get_processor_name(my_procname, &my_procname_len);

    /* allocate space for everything */
    array = (ADIO_cb_name_array) ADIOI_Malloc(sizeof(*array));
    if (array == NULL) {
	return -1;
    }
    array->refct = 2; /* we're going to associate this with two comms */

    if (commrank == 0) {
	/* process 0 keeps the real list */
	array->namect = commsize;

	array->names = (char **) ADIOI_Malloc(sizeof(char *) * commsize);
	if (array->names == NULL) {
	    return -1;
	}
	procname = array->names; /* simpler to read */

	procname_len = (int *) ADIOI_Malloc(commsize * sizeof(int));
	if (procname_len == NULL) { 
	    return -1;
	}
    }
    else {
	/* everyone else just keeps an empty list as a placeholder */
	array->namect = 0;
	array->names = NULL;
    }
    /* gather lengths first */
    MPI_Gather(&my_procname_len, 1, MPI_INT, 
	       procname_len, 1, MPI_INT, 0, dupcomm);

    if (commrank == 0) {
#ifdef CB_CONFIG_LIST_DEBUG
	for (i=0; i < commsize; i++) {
	    FPRINTF(stderr, "len[%d] = %d\n", i, procname_len[i]);
	}
#endif

	alloc_size = 0;
	for (i=0; i < commsize; i++) {
	    /* add one to the lengths because we need to count the
	     * terminator, and we are going to use this list of lengths
	     * again in the gatherv.  
	     */
	    alloc_size += ++procname_len[i];
	}
	
	procname[0] = ADIOI_Malloc(alloc_size);
	if (procname[0] == NULL) {
	    return -1;
	}

	for (i=1; i < commsize; i++) {
	    procname[i] = procname[i-1] + procname_len[i-1];
	}
	
	/* create our list of displacements for the gatherv.  we're going
	 * to do everything relative to the start of the region allocated
	 * for procname[0]
	 */
	disp = ADIOI_Malloc(commsize * sizeof(int));
	disp[0] = 0;
	for (i=1; i < commsize; i++) {
	    disp[i] = (int) (procname[i] - procname[0]);
	}

    }

    /* now gather strings */
    if (commrank == 0) {
	MPI_Gatherv(my_procname, my_procname_len + 1, MPI_CHAR, 
		    procname[0], procname_len, disp, MPI_CHAR,
		    0, dupcomm);
    }
    else {
	/* if we didn't do this, we would need to allocate procname[]
	 * on all processes...which seems a little silly.
	 */
	MPI_Gatherv(my_procname, my_procname_len + 1, MPI_CHAR, 
		    NULL, NULL, NULL, MPI_CHAR, 0, dupcomm);
    }

    if (commrank == 0) {
	/* no longer need the displacements or lengths */
	ADIOI_Free(disp);
	ADIOI_Free(procname_len);

#ifdef CB_CONFIG_LIST_DEBUG
	for (i=0; i < commsize; i++) {
	    FPRINTF(stderr, "name[%d] = %s\n", i, procname[i]);
	}
#endif
    }

    /* store the attribute; we want to store SOMETHING on all processes
     * so that they can all tell if we have gone through this procedure 
     * or not for the given communicator.
     *
     * specifically we put it on both the original comm, so we can find
     * it next time an open is performed on this same comm, and on the
     * dupcomm, so we can use it in I/O operations.
     */
    MPI_Attr_put(comm, ADIOI_cb_config_list_keyval, array);
    MPI_Attr_put(dupcomm, ADIOI_cb_config_list_keyval, array);
    *arrayp = array;
    return 0;
}
Exemplo n.º 7
0
int main (int argc, char **argv)
{
  char pname[MPI_MAX_PROCESSOR_NAME];

  int iter;
  int counter;
  int c;
  int tnum = 0;
  int resultlen;
  int ret;
  double value;
  extern char *optarg;

  while ((c = getopt (argc, argv, "p:")) != -1) {
    switch (c) {
    case 'p':
      if ((ret = GPTLevent_name_to_code (optarg, &counter)) != 0) {
	printf ("Failure from GPTLevent_name_to_code\n");
	return 1;
      }
      if (GPTLsetoption (counter, 1) < 0) {
	printf ("Failure from GPTLsetoption (%s,1)\n", optarg);
	return 1;
      }
      break;
    default:
      printf ("unknown option %c\n", c);
      printf ("Usage: %s [-p option_name]\n", argv[0]);
      return 2;
    }
  }
  
  ret = GPTLsetoption (GPTLabort_on_error, 1);
  ret = GPTLsetoption (GPTLoverhead, 1);
  ret = GPTLsetoption (GPTLnarrowprint, 1);

  if (MPI_Init (&argc, &argv) != MPI_SUCCESS) {
    printf ("Failure from MPI_Init\n");
    return 1;
  }

  ret = GPTLinitialize ();
  ret = GPTLstart ("total");
	 
  ret = MPI_Comm_rank (MPI_COMM_WORLD, &iam);
  ret = MPI_Comm_size (MPI_COMM_WORLD, &nproc);

  ret = MPI_Get_processor_name (pname, &resultlen);
  printf ("Rank %d is running on processor %s\n", iam, pname);

#ifdef THREADED_OMP
  nthreads = omp_get_max_threads ();
#pragma omp parallel for private (iter, ret, tnum)
#endif

  for (iter = 1; iter <= nthreads; iter++) {
#ifdef THREADED_OMP
    tnum = omp_get_thread_num ();
#endif
    printf ("Thread %d of rank %d on processor %s\n", tnum, iam, pname);
    value = sub (iter);
  }

  ret = GPTLstop ("total");
  ret = GPTLpr (iam);

  if (iam == 0) {
    printf ("summary: testing GPTLpr_summary...\n");
    printf ("Number of threads was %d\n", nthreads);
    printf ("Number of tasks was %d\n", nproc);
  }

  // NOTE: if ENABLE_PMPI is set, 2nd pr call below will show some extra send/recv calls
  // due to MPI calls from within GPTLpr_summary_file
  if (GPTLpr_summary (MPI_COMM_WORLD) != 0)
    return 1;

  if (GPTLpr_summary_file (MPI_COMM_WORLD, "timing.summary.duplicate") != 0)
    return 1;

  ret = MPI_Finalize ();

  if (GPTLfinalize () != 0)
    return 1;

  return 0;
}
Exemplo n.º 8
0
int main(int argc, char *argv[]) {
	info = (struct test_info *)malloc(sizeof(struct test_info));
	test_info_init(info);
	info->test_type = 0;
    info->msg_count=50;
    
    struct plat_opts_config_mpilogme config;
    SDF_boolean_t success = SDF_TRUE;
    uint32_t numprocs;
    int tmp, namelen, mpiv = 0, mpisubv = 0;
    char processor_name[MPI_MAX_PROCESSOR_NAME];
    int msg_init_flags = SDF_MSG_MPI_INIT;

    config.inputarg = 0;
    config.msgtstnum = 50;

    /* We may not need to gather anything from here but what the heck */
    loadProperties("/opt/schooner/config/schooner-med.properties"); // TODO get filename from command line

    /* make sure this is first in order to get the the mpi init args */
    success = plat_opts_parse_mpilogme(&config, argc, argv) ? SDF_FALSE : SDF_TRUE;

    printf("input arg %d msgnum %d success %d\n", config.inputarg, config.msgtstnum, success);
    fflush(stdout);
    myid = sdf_msg_init_mpi(argc, argv, &numprocs, &success, msg_init_flags);
    info->myid = myid;
    if ((!success)||(myid < 0)) {
	printf("Node %d: MPI Init failure... exiting - errornum %d\n", myid, success);
	fflush(stdout);
        MPI_Finalize();
        return (EXIT_FAILURE);
    }

    tmp = init_msgtest_sm((uint32_t)myid);

    /* Enable this process to run threads across 2 cpus, MPI will default to running all threads
     * on only one core which is not what we really want as it forces the msg thread to time slice
     * with the fth threads that send and receive messsages
     * first arg is the number of the processor you want to start off on and arg #2 is the sequential
     * number of processors from there
     */
    lock_processor(0, 2);
    info->lock_cpu = 2;
    /* Startup SDF Messaging Engine FIXME - dual node mode still - pnodeid is passed and determined
     * from the number of processes mpirun sees.
     */
    sleep(1);
    msg_init_flags =  msg_init_flags | SDF_MSG_RTF_DISABLE_MNGMT;
    sdf_msg_init(myid, &pnodeid, msg_init_flags);

    MPI_Get_version(&mpiv, &mpisubv);
    MPI_Get_processor_name(processor_name, &namelen);

    printf("Node %d: MPI Version: %d.%d Name %s \n", myid, mpiv, mpisubv,
            processor_name);
    fflush(stdout);

    plat_log_msg(
            PLAT_LOG_ID_INITIAL,
            LOG_CAT,
            PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: Completed Msg Init.. numprocs %d pnodeid %d Starting Test\n",
            myid, numprocs, pnodeid);
    info->pnodeid = pnodeid;
    for (msgCount = 0; msgCount < 2; msgCount++) {
        sleep(2);
        plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
                "\nNode %d: Number of sleeps %d\n", myid, msgCount);
    }

    /* create the fth test threads */

    fthInit(); // Init

    pthread_attr_t attr;
    pthread_attr_init(&attr);
    pthread_create(&fthPthread, &attr, &SystemPthreadRoutine, &myid);

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: Created pthread for System protocol\n", myid);
    info->pthread_info = 1;
    info->fth_info = 2;
    pthread_join(fthPthread, NULL);

    plat_log_msg(PLAT_LOG_ID_INITIAL, LOG_CAT, PLAT_LOG_LEVEL_TRACE,
            "\nNode %d: SDF Messaging Test Complete\n", myid);

    /* Lets stop the messaging engine this will block until they complete */
    /* FIXME arg is the threadlvl */
    sdf_msg_stopmsg(myid, SYS_SHUTDOWN_SELF);

    plat_shmem_detach();
    info->success++;
    if (myid == 0) {
        sched_yield();
        printf("Node %d: Exiting message test after yielding... Calling MPI_Finalize\n", myid);
        fflush(stdout);
        sched_yield();
        MPI_Finalize();
        print_test_info(info);
        test_info_final(info);
    }
    else {
        printf("Node %d: Exiting message test... Calling MPI_Finalize\n", myid);
        fflush(stdout);
        sched_yield();
        MPI_Finalize();
    }
    printf("Successfully ends\n");
    return (EXIT_SUCCESS);
}
Exemplo n.º 9
0
int main(int argc,char* argv[])
{
  int numtasks, rank, rc;
  int micros=35;
  int minsec=42;
  const int buf_size = 60;		/* Size of the buffer for timestamp */
  /* initialize MPI and check for success*/
  rc = MPI_Init(&argc,&argv);
  if (rc != MPI_SUCCESS)
    {
      printf ("Error starting MPI programm. Termianting.\n");
      MPI_Abort(MPI_COMM_WORLD, rc);
    }
  /* get size of comm and rank in that comm */
  MPI_Comm_size(MPI_COMM_WORLD,&numtasks );
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  /* Make sure we have at least 2 processes(need at least that much). */
  if (numtasks < 2) 
    {
      fprintf(stderr, "World size must be at least two for %s to run properly!\n", argv[0]);
      MPI_Abort(MPI_COMM_WORLD, 1); 
    }
  /* get hostname */
  char hostname[MPI_MAX_PROCESSOR_NAME];
  int resultlength=0;
  MPI_Get_processor_name(hostname,&resultlength);
  /* get current time */
  struct tm *Tm;
  struct timeval detail_time;
  time_t timer = time(NULL);
  Tm=localtime(&timer);
  gettimeofday(&detail_time,NULL);
  micros = detail_time.tv_usec;
  /* workernodes do */
  if (rank != 0)
    {
      /* make formatted string from time */
      char timestamp[buf_size];
      snprintf(timestamp,buf_size,"%s(%d):%d %d %d, %d:%d:%d and %dns\n",
	       //Tm->tm_wday, /* Mon - Sun */
	       hostname,
	       rank,
	       Tm->tm_mday,
	       Tm->tm_mon+1,
	       Tm->tm_year+1900,
	       Tm->tm_hour,
	       Tm->tm_min,
	       Tm->tm_sec,
	       (int) detail_time.tv_usec); /* /1000 for ms */
      
      /* send timestamp to Master */
      MPI_Send(timestamp, buf_size, MPI_CHAR, 0, 0, MPI_COMM_WORLD);
    }else if (rank == 0)
    {
      /* print recieved messages */
      printf("The masternode recieved the following timestamps:\n");
      char buf[buf_size];
      for (int i = 1; i < numtasks; i++)
	{
	  MPI_Recv(buf, buf_size, MPI_CHAR, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	  fprintf(stdout, "%s", buf);
	}
    }
  /* collect the minimum from all processes */
  MPI_Reduce(&micros, &minsec, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
  if (rank == 0)
    {
      printf("Minimum of all microsecond counts was:%dns\n",minsec);
    }
  MPI_Barrier(MPI_COMM_WORLD);
  fprintf(stdout,"Rang %d beendet jetzt!\n",rank);
  /* finalize the MPI environment */
  MPI_Finalize();
  return 0;
}
int initParallelEnv(){
    omp_set_num_threads(THREADS);

    /* Setup MPI programming environment */
	MPI_Init_thread(NULL, NULL, MPI_THREAD_MULTIPLE, &threadSupport);

	comm = MPI_COMM_WORLD;
	MPI_Comm_size(comm, &numMPIprocs);
	MPI_Comm_rank(comm, &myMPIRank);

	/*Find the number of bytes for an int */
	sizeInteger = sizeof(int);

	/* Find the processor name of each MPI process */
    MPI_Get_processor_name(myProcName, &procNameLen);

	/* Use processor name to create a communicator
	 * across node boundaries.
	 */
	setupCommunicators();

	/* setup OpenMP programming environment */
    #pragma omp parallel shared(numThreads,globalIDarray,myMPIRank)
   {
	   numThreads = omp_get_num_threads();
	   myThreadID = omp_get_thread_num();

	   /* Allocate space for globalIDarray */
        #pragma omp single
       {
           globalIDarray = (int *)malloc(numThreads * sizeof(int));
       }

	   /*calculate the globalID for each thread */
	   globalIDarray[myThreadID] = (myMPIRank * numThreads) + myThreadID;
   }
    MPI_Barrier(comm);

    gaspi_config_t config;
    GASPI(config_get(&config));
    config.qp_count = THREADS;
    GASPI(config_set(config));
    /* GASPI setup */
    GASPI(proc_init(GASPI_BLOCK));

    gaspi_rank_t totalRanks;
    GASPI(proc_num(&totalRanks));

    gaspi_rank_t rank;
    GASPI(proc_rank(&rank));

    gaspi_number_t q_num;
    GASPI(queue_num(&q_num));
    assert (q_num == THREADS);

    GASPI(barrier (GASPI_GROUP_ALL, GASPI_BLOCK));
    // ok, we will continue to use the MPI ranks, just make sure GASPI and MPI ranks are identical
    // this is not guaranteed, so depending on the setup this may fail.
    assert (totalRanks == numMPIprocs);
    assert (rank == myMPIRank);

   /* set parallel info in benchmark report type */
   setParallelInfo(numMPIprocs,threadSupport,numThreads);

return 0;
}
Exemplo n.º 11
0
int
main (int argc, char **argv)
{
  int nprocs = -1;
  int rank = -1;
  MPI_Comm comm = MPI_COMM_WORLD;
  char processor_name[128];
  int namelen = 128;
  int bbuf[(BUF_SIZE + MPI_BSEND_OVERHEAD) * 2 * NUM_BSEND_TYPES];
  int buf[BUF_SIZE * 2 * NUM_SEND_TYPES];
  int i, j, k, at_size, send_t_number, index, outcount, total, flag;
  int num_errors, error_count, indices[2 * NUM_SEND_TYPES];
  MPI_Request aReq[2 * NUM_SEND_TYPES];
  MPI_Status aStatus[2 * NUM_SEND_TYPES];

  /* init */
  MPI_Init (&argc, &argv);
  MPI_Comm_size (comm, &nprocs);
  MPI_Comm_rank (comm, &rank);
  MPI_Get_processor_name (processor_name, &namelen);
  printf ("(%d) is alive on %s\n", rank, processor_name);
  fflush (stdout);

  MPI_Buffer_attach (bbuf, sizeof(int) * 
		     (BUF_SIZE + MPI_BSEND_OVERHEAD) * 2 * NUM_BSEND_TYPES);

  if (rank == 0) {
    /* set up persistent sends... */
    send_t_number = NUM_SEND_TYPES - NUM_PERSISTENT_SEND_TYPES;

    MPI_Send_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 
		    1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
    MPI_Send_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], 
		    BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, 
		    comm, &aReq[send_t_number * 2 + 1]);

    send_t_number++;

    MPI_Bsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 
		    1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
    MPI_Bsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], 
		    BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, 
		    comm, &aReq[send_t_number * 2 + 1]);


    send_t_number++;

    MPI_Rsend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 
		    1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
    MPI_Rsend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], 
		    BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, 
		    comm, &aReq[send_t_number * 2 + 1]);

    send_t_number++;

    MPI_Ssend_init (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT, 
		    1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
    MPI_Ssend_init (&buf[(send_t_number * 2 + 1) * BUF_SIZE], 
		    BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, 
		    comm, &aReq[send_t_number * 2 + 1]);
  }

  for (k = 0; k < (NUM_COMPLETION_MECHANISMS * 2); k++) {
    if (rank == 0) {
      /* initialize all of the send buffers */
      for (j = 0; j < NUM_SEND_TYPES; j++) {
	for (i = 0; i < BUF_SIZE; i++) {
	  buf[2 * j * BUF_SIZE + i] = i;
	  buf[((2 * j + 1) * BUF_SIZE) + i] = BUF_SIZE - 1 - i;
	}
      }
    }
    else if (rank == 1) {
      /* zero out all of the receive buffers */
      bzero (buf, sizeof(int) * BUF_SIZE * 2 * NUM_SEND_TYPES);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    if (rank == 0) {
      /* set up transient sends... */
      send_t_number = 0;
    
      MPI_Isend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
		 1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
      MPI_Isend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
		 BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, 
		 comm, &aReq[send_t_number * 2 + 1]);

      send_t_number++;
      
      MPI_Ibsend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
		  1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
      MPI_Ibsend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
		  BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, 
		  comm, &aReq[send_t_number * 2 + 1]);

      send_t_number++;

      /* Barrier to ensure receives are posted for rsends... */
      MPI_Barrier(MPI_COMM_WORLD);

      MPI_Irsend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
		  1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
      MPI_Irsend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
		  BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, 
		  comm, &aReq[send_t_number * 2 + 1]);

      send_t_number++;

      MPI_Issend (&buf[send_t_number * 2 * BUF_SIZE], BUF_SIZE, MPI_INT,
		  1, send_t_number * 2, comm, &aReq[send_t_number * 2]);
      MPI_Issend (&buf[(send_t_number * 2 + 1) * BUF_SIZE],
		  BUF_SIZE, MPI_INT, 1, send_t_number * 2 + 1, 
		  comm, &aReq[send_t_number * 2 + 1]);

      /* just to be paranoid */
      send_t_number++;
      assert (send_t_number == NUM_SEND_TYPES - NUM_PERSISTENT_SEND_TYPES);

      /* start the persistent sends... */
      if (k % 2) {
	MPI_Startall (NUM_PERSISTENT_SEND_TYPES * 2, &aReq[2 * send_t_number]);
      }
      else {
	for (j = 0; j < NUM_PERSISTENT_SEND_TYPES * 2; j++) {
	  MPI_Start (&aReq[2 * send_t_number + j]);
	}
      }
    
      /* NOTE: Changing the send buffer of a Bsend is NOT an error... */
      for (j = 0; j < NUM_SEND_TYPES; j++) {
	/* muck the buffers */
	buf[j * 2 * BUF_SIZE + (BUF_SIZE >> 1)] = BUF_SIZE;
      }

      printf ("USER MSG: 6 change send buffer errors in iteration #%d:\n", k);

      /* complete the sends */
      switch (k/2) {
      case 0:
	/* use MPI_Wait */
	for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
	  MPI_Wait (&aReq[j], &aStatus[j]);
	}
	break;
	
      case 1:
	/* use MPI_Waitall */
	MPI_Waitall (NUM_SEND_TYPES * 2, aReq, aStatus);
	break;

      case 2:
	/* use MPI_Waitany */
	for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
	  MPI_Waitany (NUM_SEND_TYPES * 2, aReq, &index, aStatus);
	}

	break;
	
      case 3:
	/* use MPI_Waitsome */
	total = 0;
	while (total < NUM_SEND_TYPES * 2) {
	  MPI_Waitsome (NUM_SEND_TYPES * 2, aReq, &outcount, indices, aStatus);

	  total += outcount;
	}

	break;

      case 4:
	/* use MPI_Test */
	for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
	  flag = 0;

	  while (!flag) {
	    MPI_Test (&aReq[j], &flag, &aStatus[j]);
	  }
	}

	break;
	
      case 5:
	/* use MPI_Testall */
	flag = 0;
	while (!flag) {
	  MPI_Testall (NUM_SEND_TYPES * 2, aReq, &flag, aStatus);
	}

	break;

      case 6:
	/* use MPI_Testany */
	for (j = 0; j < NUM_SEND_TYPES * 2; j++) {
	  flag = 0;
	  while (!flag) {
	    MPI_Testany (NUM_SEND_TYPES * 2, aReq, &index, &flag, aStatus);
	  }
	}

	break;
	
      case 7:
	/* use MPI_Testsome */
	total = 0;
	while (total < NUM_SEND_TYPES * 2) {
	  outcount = 0;

	  while (!outcount) {
	    MPI_Testsome (NUM_SEND_TYPES * 2, aReq, 
			  &outcount, indices, aStatus);
	  }

	  total += outcount;
	}

	break;

      default:
	assert (0);
	break;
      }
    }
    else if (rank == 1) {
Exemplo n.º 12
0
int main (int argc, char *argv[])
{
    int  numproc, rank, len,i;
    char hostname[MPI_MAX_PROCESSOR_NAME];
    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &numproc);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Get_processor_name(hostname, &len);
    
    FP_PREC *yc, *dyc, *derr, *fullerr;
    FP_PREC *xc, dx, intg, davg_err, dstd_dev, intg_err;
    FP_PREC globalSum = 0.0;

    // MPI vailables 
    MPI_Request *requestList,request;
    MPI_Status  *status;

    //"real" grid indices
    int imin, imax;
    
    imin = 1 + (rank * (NGRID/numproc));
    
    if(rank == numproc - 1)
    imax = NGRID;
    
    else
    imax = (rank+1) * (NGRID/numproc);
       
    int range = imax - imin + 1;
    
    xc =  (FP_PREC*) malloc((range + 2) * sizeof(FP_PREC));
    yc =  (FP_PREC*) malloc((range + 2) * sizeof(FP_PREC));
    dyc =  (FP_PREC*) malloc((range + 2) * sizeof(FP_PREC));
    dx = (XF - XI)/(double)NGRID;
    for (i = 1; i <= range ; i++)
    {
        //xc[i] = imin + (XF - XI) * (FP_PREC)(i - 1)/(FP_PREC)(NGRID - 1);
        xc[i] = XI + dx * (imin + i - 2);
    }
    
    xc[0] = xc[1] - dx;
    xc[range + 1] = xc[range] + dx;
    
    for( i = 1; i <= range; i++ )
    {
        yc[i] = fn(xc[i]);
    }
    
    yc[0] = fn(xc[0]);
    yc[range + 1] = fn(xc[range + 1]);
    
    for (i = 1; i <= range; i++)
    {
        dyc[i] = (yc[i + 1] - yc[i - 1])/(2.0 * dx);
    }
    
    intg = 0.0;
    for (i = 1; i <= range; i++)
    {
        intg += 0.5 * (xc[i + 1] - xc[i]) * (yc[i + 1] + yc[i]);
    }
    
    MPI_Reduce(&intg, &globalSum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    
    
    //compute the error, average error of the derivatives
    derr = (FP_PREC*)malloc((range + 2) * sizeof(FP_PREC));
    
    //compute the errors
    for(i = 1; i <= range; i++)
    {
        derr[i] = fabs((dyc[i] - dfn(xc[i]))/dfn(xc[i]));
    }
    
    derr[0] = derr[range + 1] = 0.0;
    
    if(rank == 0)
    {
        fullerr = (FP_PREC *)malloc(sizeof(FP_PREC)*NGRID);
        requestList =(MPI_Request*)malloc((numproc-1)*sizeof(MPI_Request));
        for(i = 0;i<range;i++)
        {
            fullerr[i] = derr[i+1];
        }
        for(i = 1; i<numproc; i++)
        {
            int rmin, rmax, *indx;
            rmin = 1 + (i * (NGRID/numproc));
            if(i == numproc - 1)
                rmax = NGRID;
            else
                rmax = (i+1) * (NGRID/numproc);
            MPI_Irecv(fullerr+rmin-1, rmax-rmin+1, MPI_DOUBLE, i, 1, MPI_COMM_WORLD, &(requestList[i-1]));
        }
        double sum = 0.0;
        for(i=0; i<NGRID; i++)
        {
            sum+=fullerr[i];
        }
        davg_err = sum/(FP_PREC)NGRID;
        dstd_dev = 0.0;
        for(i = 0; i< NGRID; i++)
        {
            dstd_dev += pow(derr[i] - davg_err, 2);
        }
        dstd_dev = sqrt(dstd_dev/(FP_PREC)NGRID);
        
        intg_err = fabs((ifn(XI, XF) - globalSum)/ifn(XI, XF));
        printf("%0.4e: %0.4e: %0.4e\n", davg_err, dstd_dev, intg_err);

    }
    else
    {
        MPI_Isend(derr+1, imax-imin+1, MPI_DOUBLE, 0, rank, MPI_COMM_WORLD, &request);
        fflush(stdout);
    }
    
    MPI_Finalize();
}
Exemplo n.º 13
0
int
main (int argc, char **argv)
{
  int nprocs = -1;
  int rank = -1;
  char processor_name[128];
  int namelen = 128;
  int buf0[buf_size];
  int buf1[buf_size];
  MPI_Status status;
  MPI_Comm comm;
  int drank, dnprocs;

  /* init */
  MPI_Init (&argc, &argv);
  MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name (processor_name, &namelen);
  printf ("(%d) is alive on %s\n", rank, processor_name);
  fflush (stdout);

  MPI_Barrier (MPI_COMM_WORLD);

  if (nprocs < 3) {
    printf ("not enough tasks\n");
  }
  else {
    MPI_Comm_split (MPI_COMM_WORLD, rank % 2, nprocs - rank, &comm);
    
    if (comm != MPI_COMM_NULL) {
      MPI_Comm_size (comm, &dnprocs);
      MPI_Comm_rank (comm, &drank);

      if (dnprocs > 1) {
	if (drank == 0) {
	  memset (buf0, 0, buf_size);

	  MPI_Recv (buf1, buf_size, MPI_INT, 1, 0, comm, &status);
	
	  MPI_Send (buf0, buf_size, MPI_INT, 1, 0, comm);
	}
	else if (drank == 1) {
	  memset (buf1, 1, buf_size);

	  MPI_Recv (buf0, buf_size, MPI_INT, 0, 0, comm, &status);

	  MPI_Send (buf1, buf_size, MPI_INT, 0, 0, comm);
	}
      }
      else {
	printf ("(%d) Derived communicator too small (size = %d)\n",
		rank, dnprocs);
      }

      MPI_Comm_free (&comm);
    }
    else {
      printf ("(%d) Got MPI_COMM_NULL\n", rank);
    }
  }

  MPI_Barrier (MPI_COMM_WORLD);

  MPI_Finalize ();
  printf ("(%d) Finished normally\n", rank);
}
Exemplo n.º 14
0
Arquivo: com.c Projeto: 8l/insieme
int
main ( int argc, char *argv[] )
{
  int *messList = NULL;
  int testIdx, doTestLoop;
  int i;

  executableName = "com";

  MPI_Init ( &argc, &argv );
  MPI_Get_processor_name ( hostName, &i );

  /* Set global wsize and rank values */
  MPI_Comm_size ( MPI_COMM_WORLD, &wsize );
  MPI_Comm_rank ( MPI_COMM_WORLD, &rank );

  if ( !initAllTestTypeParams ( &testParams ) )
  {
    MPI_Finalize (  );
    exit ( 1 );
  }

  argStruct.testList = "Bidirectional, BidirAsync";

  if ( !processArgs ( argc, argv ) )
  {
    if ( rank == 0 )
      printUse (  );

    MPI_Finalize (  );
    exit ( 1 );
  }

  /* If using a source directory of process rank target files,
   * get the next appropriate file.
   */
  if ( targetDirectory != NULL && getNextTargetFile (  ) == 0 )
  {
    prestaAbort ( "Failed to open target file in target directory %s\n",
                  targetDirectory );
  }

  doTestLoop = 1;
  while ( doTestLoop )
  {
    if ( !setupTestListParams (  ) || !initAllTestTypeParams ( &testParams ) )
    {
      if ( rank == 0 )
        printUse (  );

      MPI_Finalize (  );
      exit ( 1 );
    }

#ifdef PRINT_ENV
    if ( rank == 0 )
      printEnv();
#endif

    printReportHeader (  );

    for ( testIdx = 0; testIdx < TYPETOT; testIdx++ )
    {
      if ( argStruct.testList == NULL
           || ( argStruct.testList != NULL
                && strstr ( argStruct.testList,
                            testParams[testIdx].name ) != NULL ) )
      {
        prestaRankDebug ( 0, "running test index %d\n", testIdx );
        runTest ( &testParams[testIdx] );
      }
    }

    if ( presta_check_data == 1 )
    {
      MPI_Reduce ( &presta_data_err_total, &presta_global_data_err_total,
                   1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD );
    }

    if ( targetDirectory == NULL || getNextTargetFile (  ) == 0 )
    {
      doTestLoop = 0;
    }
  }

  printSeparator (  );

  freeBuffers ( &testParams );
  free ( messList );

  MPI_Finalize (  );

  exit ( 0 );
}
Exemplo n.º 15
0
int main(int argc, char *argv[])
{
    int sendbuf[COUNT], recvbuf[COUNT], i;
    int err = 0, rank, nprocs, errs = 0;
    MPI_Comm intercomm;
    int listenfd, connfd, port, namelen;
    struct sockaddr_in cliaddr, servaddr;
    struct hostent *h;
    char hostname[MPI_MAX_PROCESSOR_NAME];
    socklen_t len, clilen;

    MTest_Init(&argc, &argv);

    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    if (nprocs != 2) {
        printf("Run this program with 2 processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    if (rank == 1) {
        /* server */
        listenfd = socket(AF_INET, SOCK_STREAM, 0);
        if (listenfd < 0) {
            printf("server cannot open socket\n");
            MPI_Abort(MPI_COMM_WORLD, 1);
        }

        memset(&servaddr, 0, sizeof(servaddr));
        servaddr.sin_family = AF_INET;
        servaddr.sin_addr.s_addr = htonl(INADDR_ANY);
        servaddr.sin_port = 0;

        err = bind(listenfd, (struct sockaddr *) &servaddr, sizeof(servaddr));
        if (err < 0) {
            errs++;
            printf("bind failed\n");
            MPI_Abort(MPI_COMM_WORLD, 1);
        }

        len = sizeof(servaddr);
        err = getsockname(listenfd, (struct sockaddr *) &servaddr, &len);
        if (err < 0) {
            errs++;
            printf("getsockname failed\n");
            MPI_Abort(MPI_COMM_WORLD, 1);
        }

        port = ntohs(servaddr.sin_port);
        MPI_Get_processor_name(hostname, &namelen);

        err = listen(listenfd, 5);
        if (err < 0) {
            errs++;
            printf("listen failed\n");
            MPI_Abort(MPI_COMM_WORLD, 1);
        }

        MPI_Send(hostname, namelen + 1, MPI_CHAR, 0, 0, MPI_COMM_WORLD);
        MPI_Send(&port, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);

        MPI_Barrier(MPI_COMM_WORLD);

        clilen = sizeof(cliaddr);

        connfd = accept(listenfd, (struct sockaddr *) &cliaddr, &clilen);
        if (connfd < 0) {
            printf("accept failed\n");
            MPI_Abort(MPI_COMM_WORLD, 1);
        }
    }
    else {
        /* client */

        MPI_Recv(hostname, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, 1, 0,
                 MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Recv(&port, 1, MPI_INT, 1, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);

        MPI_Barrier(MPI_COMM_WORLD);

        h = gethostbyname(hostname);
        if (h == NULL) {
            printf("gethostbyname failed\n");
            MPI_Abort(MPI_COMM_WORLD, 1);
        }

        servaddr.sin_family = h->h_addrtype;
        memcpy((char *) &servaddr.sin_addr.s_addr, h->h_addr_list[0], h->h_length);
        servaddr.sin_port = htons(port);

        /* create socket */
        connfd = socket(AF_INET, SOCK_STREAM, 0);
        if (connfd < 0) {
            printf("client cannot open socket\n");
            MPI_Abort(MPI_COMM_WORLD, 1);
        }

        /* connect to server */
        err = connect(connfd, (struct sockaddr *) &servaddr, sizeof(servaddr));
        if (err < 0) {
            errs++;
            printf("client cannot connect\n");
            MPI_Abort(MPI_COMM_WORLD, 1);
        }
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* To improve reporting of problems about operations, we
     * change the error handler to errors return */
    MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);

    err = MPI_Comm_join(connfd, &intercomm);
    if (err) {
        errs++;
        printf("Error in MPI_Comm_join %d\n", err);
    }

    /* To improve reporting of problems about operations, we
     * change the error handler to errors return */
    MPI_Comm_set_errhandler(intercomm, MPI_ERRORS_RETURN);


    for (i = 0; i < COUNT; i++) {
        recvbuf[i] = -1;
        sendbuf[i] = i + COUNT * rank;
    }

    err = MPI_Sendrecv(sendbuf, COUNT, MPI_INT, 0, 0, recvbuf, COUNT, MPI_INT,
                       0, 0, intercomm, MPI_STATUS_IGNORE);
    if (err != MPI_SUCCESS) {
        errs++;
        printf("Error in MPI_Sendrecv on new communicator\n");
    }

    for (i = 0; i < COUNT; i++) {
        if (recvbuf[i] != ((rank + 1) % 2) * COUNT + i)
            errs++;
    }

    MPI_Barrier(MPI_COMM_WORLD);
    err = MPI_Comm_disconnect(&intercomm);
    if (err != MPI_SUCCESS) {
        errs++;
        printf("Error in MPI_Comm_disconnect\n");
    }

    MTest_Finalize(errs);
    MPI_Finalize();

    return 0;
}
Exemplo n.º 16
0
int main(int argc, char *argv[])
{
	int             ret;
	char           *buf;
	char            processor_name[MPI_MAX_PROCESSOR_NAME];
	int             namelen;
	double          start_time;
        double          used_time;
        double          avg_time;
        double          barrier_time;
        double          us_rate;
        int             max_len, lenbuf;
        int             j;
	int             me, nproc;
        FILE           *fparam ;


        /*
         *  begining ...
         */

	setbuf(stdout, NULL) ;

        /*
         *  max_len  ...
         */
/*
	if (argc != 2)
        {
                printf("Use: bcast <max_len> \n") ;
		exit(1) ;
        }
	max_len =atoi(argv[1]) ;
*/
/*
#if defined(__LINUX__)
        fparam = fopen("bcast.in","rt") ;
#endif
#if defined(__SUNOS__)
        fparam = fopen("bcast.in","rt") ;
#endif
#if defined(__SP2__)
        fparam = fopen("/u/fperez/XMP/MiMPI/test/mp/mpi/performance/bcast/bcast.in","rt") ;
#endif
        if (fparam == NULL)
        {
                printf("ERROR: can not open bcast.in, sorry.\n") ;
		exit(1) ;
        }
        ret = fscanf(fparam,"max_len=%i",&max_len) ;
        fclose(fparam) ;
        if (ret != 1)
        {
                printf("ERROR: can not read a valid 'max_len' value from bcast.in, sorry.\n") ;
		exit(1) ;
        }
*/
	max_len = 1024 * 1024;

        if ( (max_len <= 0) || (max_len >= 8*1024*1024) )
        {
                printf("ERROR: (max_len <= 0) || (max_len >= 8*1024*1024)\n") ;
                exit(1) ;
        }

        /*
         *  MPI init  ...
         */
	ret = MPI_Init(&argc, &argv);	
	if (ret < 0)
	{
		printf("Can't init\n") ;
		exit(1) ;
	}

	MPI_Comm_rank(MPI_COMM_WORLD,&me) ;
	MPI_Get_processor_name(processor_name,&namelen) ;
	MPI_Comm_size(MPI_COMM_WORLD, &nproc) ;

#if (0)
	printf("Process %d; total %d is alive on %s\n",me,nproc,processor_name) ;
#endif


        buf = (char *) malloc((unsigned) max_len) ;
        if (buf == NULL)
        {
                perror("Error en malloc") ;
                exit(1) ;
        }
	memset(buf,'x',max_len) ;

	printf("barrier\n") ;
	 MPI_Barrier(MPI_COMM_WORLD) ;

        /* ... Barrier ... */
	start_time = MPI_Wtime() ;
	for(j = 0; j < 10; j++)
        {
	  MPI_Barrier(MPI_COMM_WORLD) ;
        }
	barrier_time = (MPI_Wtime() - start_time) ;
	barrier_time = barrier_time / 2000.0;

	if (me == 0)
        	printf(">>>>>>>>> BARRERA1  =%e\n",  barrier_time);

	/*barrier_time = 0;*/

        /* ... test ... */
	lenbuf = 1;
        while (lenbuf <= max_len)
        {
	        MPI_Barrier(MPI_COMM_WORLD) ;

		avg_time = 0.0;
		if (me != 0)
		{
			for(j = 0; j < PRUEBAS; j++)
                        {

				ret = MPI_Bcast(buf,lenbuf,MPI_CHAR,0,
				          MPI_COMM_WORLD) ;
				if (ret != MPI_SUCCESS)
						printf("ERROR EN BCAST \n");

	                    
			}
		}
		else
		{
			start_time = MPI_Wtime() ;
			for(j = 0; j < PRUEBAS; j++)
                        {

				ret = MPI_Bcast(buf,lenbuf,MPI_CHAR,0,
                                      MPI_COMM_WORLD) ;
	                  if (ret != MPI_SUCCESS)
						printf("ERROR EN BCAST \n");


			}
			used_time = (MPI_Wtime() - start_time) ;

                	avg_time =  used_time  / (float)  PRUEBAS;
				
			if (avg_time > 0)    /* rate is megabytes per second */
                        	us_rate = (double)((nproc * lenbuf)/
					(avg_time*(double)1000000)) ;
                	else
                        	us_rate = 0.0;

                	printf("len_bytes=%e avg_time_sec=%e rate_Mbytes_sec=%e\n", 
			        (double)lenbuf, (double)avg_time, (double)us_rate) ;
                }

                lenbuf *= 2;
        }

		if (me == 0)
		{
				char c;
				read(0, &c, 1);
		}

#if (0)
	if (me == 0)
	    printf("\nclock resolution in seconds: %10.8f\n", MPI_Wtick()) ;
#endif

	MPI_Finalize() ;
        free(buf) ;
	exit(0) ;

}
Exemplo n.º 17
0
	std::string processor_name() const {
	  char name[MPI_MAX_PROCESSOR_NAME];
	  int len;
	  MPI_Get_processor_name(name, &len);
	  return std::string(name);
	}
// argc = cpu count, argv = file.cpp
int main(int argc, char *argv[])
{
	// create win object, this is used for locks
	MPI_Win win;
	// needed for MPI
	int namelen = 0;
	int myid, numprocs = 0;
	// processor name
	char processor_name[MPI_MAX_PROCESSOR_NAME];
	//initialize MPI execution environment
	MPI_Init(&argc, &argv);
	//each process get total # of processes
	//the total # of processes specified in mpirun �np n
	MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
	//each process gets its own id
	MPI_Comm_rank(MPI_COMM_WORLD, &myid);
	// needed for times
	double program_start = 0;
	double program_end = 0;
	double process_start = 0;
	double process_end = 0;
	// take time
	if (myid == 0)
		// get start program time
		program_start = MPI_Wtime();
	// Gets the name of the processor
	MPI_Get_processor_name(processor_name, &namelen);
	// number of processes
	int n = 0;
	// display info
	fprintf(stderr, "process %d on %s\n", myid, processor_name);
	fflush(stderr);
	// create win object for locks
	MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win);
	// declare array to hold char from words plus \0
	char* arr;
	// list to keep track of length of each word
	short* list;
	// size of entire array
	int arr_size = 0;
	// size of the list
	int list_size = 0;
	// new list of words that are not palindromes
	char* new_words;
	// size of new array of words eahc process will
	// have inorder to send back to root after finding
	// all none plaindrome words
	int new_size = 0;
	// this will be the total size of non-palidrome words
	// which will be recieved from each process
	int total_size = 0;
	// temp vector to hold arrays in file
	std::vector<std::string>* words;
	// root does
	if (myid == 0)
	{
		// stream to open file
		std::fstream in;
		// vector to dynamically grow as we add strings to it
		// this makes it so we don't need to open file twice since 
		// we would normally open file and count number of words
		// then reopen it to get the actually words to put in an array
		// we just declared based off the size we got the first time
		words = new std::vector<std::string>();
		// open file as instream
		in.open("Palindromes.txt", std::ios::in);
		// if error opening file
		if (in.fail())
		{
			// display message and close
			std::cout << "Error Opening File" << std::endl;
			MPI_Abort(MPI_COMM_WORLD, 1);
		}
		// no error while opening file
		else
		{
			// temp string to hold each word
			std::string temp;
			// grab each word from each line
			while (getline(in, temp))
			{
				// put word into vector
				words->push_back(temp);
				// loop each string (word) and get it's length
				for (int i = 0; i < temp.size(); i++)
					//increment size
					arr_size++;
				// increment one last time since we will be adding a 
				// \0 for each word
				arr_size++;
			}
			// done, close file
			in.close();
		}
		// set size depending on word size
		list_size = words->size();
		// we added one since later on in the program
		// we use the next index to mark where the loop stops
		// without one at end, there is no way to mark the end
		// and last word never gets processed
		list_size++;
	}
	// take time
	if (myid == 0)
		// get start program time
		process_start = MPI_Wtime();
	// barrier
	MPI_Barrier(MPI_COMM_WORLD);
	// broadcast the size of char array and list to other processes
	// they will be used to allocate the needed space per process
	MPI_Bcast(&arr_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
	// barrier
	MPI_Barrier(MPI_COMM_WORLD);
	// broadcats list size
	MPI_Bcast(&list_size, 1, MPI_SHORT, 0, MPI_COMM_WORLD);
	// barrier
	MPI_Barrier(MPI_COMM_WORLD);
	// allocate list, list should be number of \0
	// since there is one per word, it should be the number of words
	list = new short[list_size];
	// barrier
	MPI_Barrier(MPI_COMM_WORLD);
	// allocate array
	arr = new char[arr_size];
	// barrier
	MPI_Barrier(MPI_COMM_WORLD);
	// root does this
	if (myid == 0)
	{
		// put the values into array
		// using a counter
		int counter = 0;
		// loop entier array, while looping each word
		// and put them sequentially into array
		// with null terminator ending each word
		// we do list_size-1 since list_size is increased by 1
		// to fix an earlier problem where we need to mark
		// last element in list to be able to end it
		// without it, it crashes, not sure why
		for (int i = 0; i < list_size - 1; i++)
		{
			//mark start of word
			arr[counter] = '\0';
			// put null terminator index into list
			list[i] = counter;
			// incremenet counter
			counter++;
			// loop to get count of the next word
			for (int j = 0; j < words->at(i).size(); j++)
			{
				// get word from vector at i (string is returned)
				// get char at j from string
				arr[counter++] = words->at(i).at(j);
			}
		}
		// make last element to stop loops later in program
		list[list_size - 1] = counter;
		// free up memory, this object is no longer used
		delete words;
	}
	// broadcast array of char (basically all the words
	// in a char array where each word ends in \0)
	// also broadcast list of word indexes
	MPI_Barrier(MPI_COMM_WORLD);
	// send list of indexes to all processes
	MPI_Bcast(list, list_size, MPI_SHORT, 0, MPI_COMM_WORLD);
	// send array ofwords to processes
	MPI_Bcast(arr, arr_size, MPI_CHAR, 0, MPI_COMM_WORLD);
	// run function for each process to create a new list of non-palindromes
	// this is using cyclic partiioning
	new_words = markParalindromes(myid, arr_size, list_size, arr, list, numprocs, new_size);
	// use lock
	MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win);
	// create out stream object
	std::fstream out;
	// open file
	out.open("Non-Palindromes.txt", std::ios::out | std::ios::app);
	// if there is an error creating/opening
	if (out.fail())
	{
		// display message and close
		std::cout << "Error Opening File" << std::endl;
		MPI_Abort(MPI_COMM_WORLD, 1);
	}
	else
	{
		// loop non-palindrome words
		for (int i = 0; i < new_size; i++)
		{
			// if char is null terminator
			// replace it with newline
			if (new_words[i] == '\0')
			{
				// if I write one newline, it does not work
				// but two does
				out << std::endl;
				out << std::endl;
			}
			// if not null terminator
			else
				// write char of current word
				out << new_words[i];
		}
		// needed for end of file
		out << std::endl;
		out << std::endl;
		// close file
		out.close();
	}
	// unlock 
	MPI_Win_unlock(1, win);
	// barrier
	MPI_Barrier(MPI_COMM_WORLD);
	if (myid == 0)
		// get start program time
		process_end = MPI_Wtime();
	// clean up and display results
	if (myid == 0)
	{
		// clean up
		if(arr != NULL)
			delete[] arr;
		if (list != NULL)
			delete[] list;
		if (new_words != NULL)
			delete[] new_words;
	}
	// barrier
	MPI_Barrier(MPI_COMM_WORLD);
	if (myid == 0)
		// get start program time
		program_end = MPI_Wtime();
	if(myid == 0)
	{ 
		// get total time
		std::cout << "Program Time: " << (program_end - program_start) << "s" << std::endl;
		// get processe stime
		std::cout << "Process Time: " << (process_end - process_start) << "s" << std::endl;
	}
	// needed to clean up 
	MPI_Win_free(&win);
	MPI_Finalize();
}
Exemplo n.º 19
0
int
main (int argc, char **argv)
{
  int nprocs = -1;
  int rank = -1;
  int i, j;
  int *granks;
  char processor_name[128];
  int namelen = 128;
  int buf[buf_size];
  MPI_Status status;
  MPI_Comm temp;
  MPI_Comm intercomm = MPI_COMM_NULL;
  MPI_Comm dcomms[DCOMM_CALL_COUNT];
  MPI_Group world_group, dgroup;
  int intersize, dnprocs[DCOMM_CALL_COUNT], drank[DCOMM_CALL_COUNT];
  int dims[TWOD], periods[TWOD], remain_dims[TWOD];
  int graph_index[] = { 2, 3, 4, 6 };
  int graph_edges[] = { 1, 3, 0, 3, 0, 2 };

  /* init */
  MPI_Init (&argc, &argv);
  MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name (processor_name, &namelen);
  printf ("(%d) is alive on %s\n", rank, processor_name);
  fflush (stdout);

  MPI_Barrier (MPI_COMM_WORLD);

  /* probably want number to be higher... */
  if (nprocs < 4) {
      printf ("not enough tasks\n");
  }
  else {
    if (DCOMM_CALL_COUNT > 0) {
#ifdef RUN_COMM_DUP
      /* create all of the derived communicators... */
      /* simplest is created by MPI_Comm_dup... */
      MPI_Comm_dup (MPI_COMM_WORLD, &dcomms[0]);
#else
      dcomms[0] = MPI_COMM_NULL;
#endif
    }

    if (DCOMM_CALL_COUNT > 1) {
#ifdef RUN_COMM_CREATE
      /* use subset of MPI_COMM_WORLD group for MPI_Comm_create... */
      MPI_Comm_group (MPI_COMM_WORLD, &world_group);
      granks = (int *) malloc (sizeof(int) * (nprocs/2));
      for (i = 0; i < nprocs/2; i++)
	granks [i] = 2 * i;
      MPI_Group_incl (world_group, nprocs/2, granks, &dgroup);
      MPI_Comm_create (MPI_COMM_WORLD, dgroup, &dcomms[1]);
      MPI_Group_free (&world_group);
      MPI_Group_free (&dgroup);
      free (granks);
#else
      dcomms[1] = MPI_COMM_NULL;
#endif
    }

    if (DCOMM_CALL_COUNT > 2) {
#ifdef RUN_COMM_SPLIT
      /* split into thirds with inverted ranks... */
      MPI_Comm_split (MPI_COMM_WORLD, rank % 3, nprocs - rank, &dcomms[2]);
#else
      dcomms[2] = MPI_COMM_NULL;
#endif
    }

#ifdef RUN_INTERCOMM_CREATE
    if ((DCOMM_CALL_COUNT < 2) || (dcomms[2] == MPI_COMM_NULL)) {
      MPI_Comm_split (MPI_COMM_WORLD, rank % 3, nprocs - rank, &temp);
    }
    else {
      temp = dcomms[2];
    }
    if (rank % 3) {
      MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD,
			    (((nprocs % 3) == 2) && ((rank % 3) == 2)) ?
			    nprocs - 1 : nprocs - (rank % 3) - (nprocs % 3),
			    INTERCOMM_CREATE_TAG, &intercomm);
    }
    if ((DCOMM_CALL_COUNT < 2) || (dcomms[2] == MPI_COMM_NULL)) {
      MPI_Comm_free (&temp);
    }
#endif

    if (DCOMM_CALL_COUNT > 3) {
#ifdef RUN_CART_CREATE
      /* create a 2 X nprocs/2 torus topology, allow reordering */
      dims[0] = 2;
      dims[1] = nprocs/2;
      periods[0] = periods[1] = 1;
      MPI_Cart_create (MPI_COMM_WORLD, TWOD, dims, periods, 1, &dcomms[3]);
#else
      dcomms[3] = MPI_COMM_NULL;
#endif
    }

    if (DCOMM_CALL_COUNT > 4) {
#ifdef RUN_GRAPH_CREATE
      /* create the graph on p.268 MPI: The Complete Reference... */
      MPI_Graph_create (MPI_COMM_WORLD, GRAPH_SZ,
			graph_index, graph_edges, 1, &dcomms[4]);
#else
      dcomms[4] = MPI_COMM_NULL;
#endif
    }

    if (DCOMM_CALL_COUNT > 5) {
#ifdef RUN_CART_SUB
#ifndef RUN_CART_CREATE
      /* need to make cartesian communicator temporarily... */
      /* create a 2 X nprocs/2 torus topology, allow reordering */
      dims[0] = 2;
      dims[1] = nprocs/2;
      periods[0] = periods[1] = 1;
      MPI_Cart_create (MPI_COMM_WORLD, TWOD, dims, periods, 1, &dcomms[3]);
#endif
      if (dcomms[3] != MPI_COMM_NULL) {
	/* create 2 1 X nprocs/2 topologies... */
	remain_dims[0] = 0;
	remain_dims[1] = 1;
	MPI_Cart_sub (dcomms[3], remain_dims, &dcomms[5]);
#ifndef RUN_CART_CREATE
	/* free up temporarily created cartesian communicator... */
	MPI_Comm_free (&dcomms[3]);
#endif
      }
      else {
	dcomms[5] = MPI_COMM_NULL;
      }
#else
      dcomms[5] = MPI_COMM_NULL;
#endif
    }

    if (DCOMM_CALL_COUNT > 6) {
#ifdef RUN_INTERCOMM_MERGE
#ifndef RUN_INTERCOMM_CREATE
#ifndef RUN_COMM_SPLIT
      /* need to make split communicator temporarily... */
      /* split into thirds with inverted ranks... */
      MPI_Comm_split (MPI_COMM_WORLD, rank % 3, nprocs - rank, &dcomms[2]);
#endif
#endif
      /* create an intercommunicator and merge it... */
      if (rank % 3) {
#ifndef RUN_INTERCOMM_CREATE
	MPI_Intercomm_create (dcomms[2], 0, MPI_COMM_WORLD,
			      (((nprocs % 3) == 2) && ((rank % 3) == 2)) ?
			      nprocs - 1 : nprocs - (rank % 3) - (nprocs % 3),
			      INTERCOMM_CREATE_TAG, &intercomm);
#endif

	MPI_Intercomm_merge (intercomm, ((rank % 3) == 1), &dcomms[6]);

#ifndef RUN_INTERCOMM_CREATE
	/* we are done with intercomm... */
	MPI_Comm_free (&intercomm);
#endif
      }
      else {
	dcomms[6] = MPI_COMM_NULL;
      }
#ifndef RUN_INTERCOMM_CREATE
#ifndef RUN_COMM_SPLIT
      if (dcomms[2] != MPI_COMM_NULL)
	/* free up temporarily created split communicator... */
	MPI_Comm_free (&dcomms[2]);
#endif
#endif
#else
      dcomms[6] = MPI_COMM_NULL;
#endif
    }

    /* get all of the sizes and ranks... */
    for (i = 0; i < DCOMM_CALL_COUNT; i++) {
      if (dcomms[i] != MPI_COMM_NULL) {
	MPI_Comm_size (dcomms[i], &dnprocs[i]);
	MPI_Comm_rank (dcomms[i], &drank[i]);
      }
      else {
	dnprocs[i] = 0;
	drank[i] = -1;
      }
    }

#ifdef RUN_INTERCOMM_CREATE
    /* get the intercomm remote size... */
    if (rank % 3) {
      MPI_Comm_remote_size (intercomm, &intersize);
    }
#endif

    /* do some point to point on all of the dcomms... */
    for (i = 0; i < DCOMM_CALL_COUNT; i++) {
      if (dnprocs[i] > 1) {
	if (drank[i] == 0) {
	  for (j = 1; j < dnprocs[i]; j++) {
	    MPI_Recv (buf, buf_size, MPI_INT, j, 0, dcomms[i], &status);
	  }
	}
	else {
	  memset (buf, 1, buf_size*sizeof(int));

	  MPI_Send (buf, buf_size, MPI_INT, 0, 0, dcomms[i]);
	}
      }
    }

#ifdef RUN_INTERCOMM_CREATE
    /* do some point to point on the intercomm... */
    if ((rank % 3) == 1) {
      for (j = 0; j < intersize; j++) {
	MPI_Recv (buf, buf_size, MPI_INT, j, 0, intercomm, &status);
      }
    }
    else if ((rank % 3) == 2) {
      for (j = 0; j < intersize; j++) {
	memset (buf, 1, buf_size*sizeof(int));

	MPI_Send (buf, buf_size, MPI_INT, j, 0, intercomm);
      }
    }
#endif

    /* do a bcast on all of the dcomms... */
    for (i = 0; i < DCOMM_CALL_COUNT; i++) {
      /* IBM's implementation gets error with comm over MPI_COMM_NULL... */
      if (dnprocs[i] > 0)
	MPI_Bcast (buf, buf_size, MPI_INT, 0, dcomms[i]);
    }

    /* use any source receives... */
    for (i = 0; i < DCOMM_CALL_COUNT; i++) {
      if (dnprocs[i] > 1) {
	if (drank[i] == 0) {
	  for (j = 1; j < dnprocs[i]; j++) {
	    MPI_Recv (buf, buf_size, MPI_INT,
		      MPI_ANY_SOURCE, 0, dcomms[i], &status);
	  }
	}
	else {
	  memset (buf, 1, buf_size*sizeof(int));

	  MPI_Send (buf, buf_size, MPI_INT, 0, 0, dcomms[i]);
	}
      }
    }

#ifdef RUN_INTERCOMM_CREATE
    /* do any source receives on the intercomm... */
    if ((rank % 3) == 1) {
      for (j = 0; j < intersize; j++) {
	MPI_Recv (buf, buf_size, MPI_INT,
		  MPI_ANY_SOURCE, 0, intercomm, &status);
      }
    }
    else if ((rank % 3) == 2) {
      for (j = 0; j < intersize; j++) {
	memset (buf, 1, buf_size*sizeof(int));

	MPI_Send (buf, buf_size, MPI_INT, j, 0, intercomm);
      }
    }
#endif

    /* do a barrier on all of the dcomms... */
    for (i = 0; i < DCOMM_CALL_COUNT; i++) {
      /* IBM's implementation gets with communication over MPI_COMM_NULL... */
      if (dnprocs[i] > 0)
	MPI_Barrier (dcomms[i]);
    }

    /* free all of the derived communicators... */
    for (i = 0; i < DCOMM_CALL_COUNT; i++) {
      /* freeing MPI_COMM_NULL is explicitly defined as erroneous... */
      if (dnprocs[i] > 0)
	MPI_Comm_free (&dcomms[i]);
    }

#ifdef RUN_INTERCOMM_CREATE
    if (rank % 3)
      /* we are done with intercomm... */
      MPI_Comm_free (&intercomm);
#endif
  }

  MPI_Barrier (MPI_COMM_WORLD);

  MPI_Finalize ();
  printf ("(%d) Finished normally\n", rank);
}
Exemplo n.º 20
0
int main (int argc, char * argv[]) {
	static int const maxlen = 200, rowsize = 521, colsize = 428, linelen = 12;
	char str[maxlen], lines[5][maxlen];
	FILE *fp, *fout;
	int icheck, nlines = 0;
	unsigned int h1, h2, h3;
	char *sptr;
        int **R, **G, **B, **Rnew, **Gnew, **Bnew;
	int *Rrow, *Grow, *Brow, *sendbuf, *recvbuf;
	int row = 0, col = 0, nblurs, lineno=0, k;
	struct timeval tim;
        int bufsize, coords[2];
        int myrowsize, mycolsize, myrowstart, myrowend, mycolstart, mycolend;
        int len, tag = 99, dest, prowsize, pcolsize, lastcolsize, nsend, localrow, localcol, coffset;
        char name[MPI_MAX_PROCESSOR_NAME];
        int nprocs, rank, nprows, npcols, myrow, mycol, left, right, up, down;
        MPI_Comm new_comm;
	MPI_Status status;

/* Initialize MPI */
		MPI_Init (&argc, &argv);

		/* Initialising more variables. */
		int dimensions[2], periods[2];

		/* Determining process count and rank. */
		MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
		MPI_Comm_rank(MPI_COMM_WORLD, &rank);

		/* Creating the 2D topology. */
		dimensions[0] = dimensions[1] = 0;
		periods[0] = periods[1] = 0;
		MPI_Dims_create(nprocs, 2, dimensions);										// Determining processes in each dimension.
		nprows = dimensions[0];							
		npcols = dimensions[1];
		MPI_Cart_create(MPI_COMM_WORLD, 2, dimensions, periods, 1,&new_comm);		// Creating new_comm.
		MPI_Cart_coords(new_comm, rank, 2, coords);									// Getting coordinates of this process in the grid. 
		myrow = coords[0];
		mycol = coords[1];

		/* Determining ranks of neighbours. */
		MPI_Cart_shift(new_comm, 0, 1, &up, &down);
		MPI_Cart_shift(new_comm, 1, 1, &left, &right);

/* Do data decomposition */
        prowsize = ((rowsize-1)/nprows) + 1;
        myrowstart = myrow*prowsize;
        myrowend   = (myrow+1)*prowsize - 1;
        if (myrowend >= rowsize) myrowend = rowsize - 1;
        pcolsize = ((colsize-1)/npcols) + 1;
        mycolstart = mycol*pcolsize;
        mycolend   = (mycol+1)*pcolsize - 1;
        if (mycolend >= colsize) mycolend = colsize - 1;
        myrowsize = myrowend - myrowstart + 1;
        mycolsize = mycolend - mycolstart + 1;
        printf("rank = %d: (myrow,mycol) = (%d,%d), (left,right,up,down) = (%d,%d,%d,%d), row(start,end) = (%d,%d), col(start,end) = (%d,%d)\n",rank,myrow,mycol,left,right,up,down,myrowstart,myrowend,mycolstart,mycolend);

/* Allocate arrays */
        R = (int **)malloc((sizeof(int*)*(myrowsize+2)));
        G = (int **)malloc((sizeof(int*)*(myrowsize+2)));
        B = (int **)malloc((sizeof(int*)*(myrowsize+2)));
        Rnew = (int **)malloc((sizeof(int*)*(myrowsize+2)));
        Gnew = (int **)malloc((sizeof(int*)*(myrowsize+2)));
        Bnew = (int **)malloc((sizeof(int*)*(myrowsize+2)));
        for (k=0;k<myrowsize+2;k++){
            R[k] = (int *)malloc(sizeof(int)*(mycolsize+2));
            G[k] = (int *)malloc(sizeof(int)*(mycolsize+2));
            B[k] = (int *)malloc(sizeof(int)*(mycolsize+2));
            Rnew[k] = (int *)malloc(sizeof(int)*(mycolsize+2));
            Gnew[k] = (int *)malloc(sizeof(int)*(mycolsize+2));
            Bnew[k] = (int *)malloc(sizeof(int)*(mycolsize+2));
	}
        bufsize = myrowsize > mycolsize ? myrowsize : mycolsize;
        sendbuf = (int *)malloc(sizeof(int)*(bufsize));
        recvbuf = (int *)malloc(sizeof(int)*(bufsize));
	
/* Read input on process 0 and distribute to processes */
	if (rank==0){
                localrow = 1;
                lastcolsize = colsize - pcolsize*(npcols-1);
        	Rrow = (int *)malloc(sizeof(int)*(colsize));
        	Grow = (int *)malloc(sizeof(int)*(colsize));
        	Brow = (int *)malloc(sizeof(int)*(colsize));

        	fp = fopen("David.ps", "r");
		while(! feof(fp))
		{
			icheck = fscanf(fp, "\n%[^\n]", str);
			if (nlines < 5) {strcpy((char *)lines[nlines++],(char *)str);}
			else if(icheck>0){
				for (sptr=&str[0];*sptr != '\0';sptr+=6){
					sscanf(sptr,"%2x",&h1);
					sscanf(sptr+2,"%2x",&h2);
					sscanf(sptr+4,"%2x",&h3);
					if (row < rowsize) {
						Rrow[col] = h1;
						Grow[col] = h2;
						Brow[col] = h3;
						col++;
					}
					if (col==colsize){
                                                coords[0] = row/prowsize;
                                                for(k=0;k<npcols;k++){
                                                        nsend = (k<npcols-1 ? pcolsize : lastcolsize); 
       							coffset = k*pcolsize;
							coords[1] = k;
  							MPI_Cart_rank(new_comm, coords, &dest);
							if(dest!=0){
								MPI_Send(Rrow+coffset,nsend,MPI_INT,dest,tag,new_comm);
								MPI_Send(Grow+coffset,nsend,MPI_INT,dest,tag,new_comm);
								MPI_Send(Brow+coffset,nsend,MPI_INT,dest,tag,new_comm);
							}
							else{
								for(localcol=1;localcol<=mycolsize;localcol++){
									R[localrow][localcol] = Rrow[coffset+localcol-1];
									G[localrow][localcol] = Grow[coffset+localcol-1];
									B[localrow][localcol] = Brow[coffset+localcol-1];
								}
								localrow++;
							}
						}
						col = 0;
						row++;
					}
				}
			}
		}
		fclose(fp);
        }
        else{
		for(localrow=1;localrow<=myrowsize;localrow++){
			MPI_Recv(&R[localrow][1],mycolsize,MPI_INT,0,tag,new_comm,&status);
			MPI_Recv(&G[localrow][1],mycolsize,MPI_INT,0,tag,new_comm,&status);
			MPI_Recv(&B[localrow][1],mycolsize,MPI_INT,0,tag,new_comm,&status);
		}
 	}

	nblurs = 10;
	MPI_Barrier(new_comm);
	double t1;
	if(rank==0){
		gettimeofday(&tim, NULL);
		t1=tim.tv_sec+(tim.tv_usec/1000000.0);
	}
        
	for(k=0;k<nblurs;k++){

		/* Shifting the data up */
		MPI_Sendrecv(&R[1][1], mycolsize, MPI_INT, up, tag,
                &R[myrowsize+1][1], mycolsize, MPI_INT, down, tag,
                new_comm, &status);
		MPI_Sendrecv(&G[1][1], mycolsize, MPI_INT, up, tag,
                &G[myrowsize+1][1], mycolsize, MPI_INT, down, tag,
                new_comm, &status);
		MPI_Sendrecv(&B[1][1], mycolsize, MPI_INT, up, tag,
                &B[myrowsize+1][1], mycolsize, MPI_INT, down, tag,
                new_comm, &status);


		/* Shifting the data down */
		MPI_Sendrecv(&R[myrowsize][1], mycolsize, MPI_INT, down, tag,
                &R[0][1], mycolsize, MPI_INT, up, tag,
                new_comm, &status);
		MPI_Sendrecv(&G[myrowsize][1], mycolsize, MPI_INT, down, tag,
                &G[0][1], mycolsize, MPI_INT, up, tag,
                new_comm, &status);
		MPI_Sendrecv(&B[myrowsize][1], mycolsize, MPI_INT, down, tag,
                &B[0][1], mycolsize, MPI_INT, up, tag,
                new_comm, &status);


		/* Shifting the data left */
		int i;
		for (i = 0; i < myrowsize; i++) sendbuf[i] = R[i+1][1];
		MPI_Sendrecv(sendbuf, myrowsize, MPI_INT, left, tag,
                recvbuf, myrowsize, MPI_INT, right, tag,
                new_comm, &status);
		for (i = 0; i < myrowsize; i++) R[i+1][mycolsize+1] = recvbuf[i];
		for (i = 0; i < myrowsize; i++) sendbuf[i] = G[i+1][1];
		MPI_Sendrecv(sendbuf, myrowsize, MPI_INT, left, tag,
                recvbuf, myrowsize, MPI_INT, right, tag,
                new_comm, &status);
		for (i = 0; i < myrowsize; i++) G[i+1][mycolsize+1] = recvbuf[i];
		for (i = 0; i < myrowsize; i++) sendbuf[i] = B[i+1][1];
		MPI_Sendrecv(sendbuf, myrowsize, MPI_INT, left, tag,
                recvbuf, myrowsize, MPI_INT, right, tag,
                new_comm, &status);
		for (i = 0; i < myrowsize; i++) B[i+1][mycolsize+1] = recvbuf[i];


		/* Shifting the data right */
		for (i = 0; i < myrowsize; i++) sendbuf[i] = R[i+1][mycolsize];
		MPI_Sendrecv(sendbuf, myrowsize, MPI_INT, right, tag,
                recvbuf, myrowsize, MPI_INT, left, tag,
                new_comm, &status);
		for (i = 0; i < myrowsize; i++) R[i+1][0] = recvbuf[i];
		for (i = 0; i < myrowsize; i++) sendbuf[i] = G[i+1][mycolsize];
		MPI_Sendrecv(sendbuf, myrowsize, MPI_INT, right, tag,
                recvbuf, myrowsize, MPI_INT, left, tag,
                new_comm, &status);
		for (i = 0; i < myrowsize; i++) G[i+1][0] = recvbuf[i];
		for (i = 0; i < myrowsize; i++) sendbuf[i] = B[i+1][mycolsize];
		MPI_Sendrecv(sendbuf, myrowsize, MPI_INT, right, tag,
                recvbuf, myrowsize, MPI_INT, left, tag,
                new_comm, &status);
		for (i = 0; i < myrowsize; i++) B[i+1][0] = recvbuf[i];


		for(localrow=1;localrow<=myrowsize;localrow++){
                        row = prowsize*myrow + localrow - 1;
			for (localcol=1;localcol<=mycolsize;localcol++){	
                        	col = pcolsize*mycol + localcol - 1;
				if (row != 0 && row != (rowsize-1) && col != 0 && col != (colsize-1)){
					Rnew[localrow][localcol] = (R[localrow+1][localcol]+R[localrow-1][localcol]+R[localrow][localcol+1]+R[localrow][localcol-1])/4;
					Gnew[localrow][localcol] = (G[localrow+1][localcol]+G[localrow-1][localcol]+G[localrow][localcol+1]+G[localrow][localcol-1])/4;
					Bnew[localrow][localcol] = (B[localrow+1][localcol]+B[localrow-1][localcol]+B[localrow][localcol+1]+B[localrow][localcol-1])/4;
				}
				else if (row == 0 && col != 0 && col != (colsize-1)){
					Rnew[localrow][localcol] = (R[localrow+1][localcol]+R[localrow][localcol+1]+R[localrow][localcol-1])/3;
					Gnew[localrow][localcol] = (G[localrow+1][localcol]+G[localrow][localcol+1]+G[localrow][localcol-1])/3;
					Bnew[localrow][localcol] = (B[localrow+1][localcol]+B[localrow][localcol+1]+B[localrow][localcol-1])/3;
				}
				else if (row == (rowsize-1) && col != 0 && col != (colsize-1)){
					Rnew[localrow][localcol] = (R[localrow-1][localcol]+R[localrow][localcol+1]+R[localrow][localcol-1])/3;
					Gnew[localrow][localcol] = (G[localrow-1][localcol]+G[localrow][localcol+1]+G[localrow][localcol-1])/3;
					Bnew[localrow][localcol] = (B[localrow-1][localcol]+B[localrow][localcol+1]+B[localrow][localcol-1])/3;
				}
				else if (col == 0 && row != 0 && row != (rowsize-1)){
					Rnew[localrow][localcol] = (R[localrow+1][localcol]+R[localrow-1][localcol]+R[localrow][localcol+1])/3;
					Gnew[localrow][localcol] = (G[localrow+1][localcol]+G[localrow-1][localcol]+G[localrow][localcol+1])/3;
					Bnew[localrow][localcol] = (B[localrow+1][localcol]+B[localrow-1][localcol]+B[localrow][localcol+1])/3;
				}
				else if (col == (colsize-1) && row != 0 && row != (rowsize-1)){
					Rnew[localrow][localcol] = (R[localrow+1][localcol]+R[localrow-1][localcol]+R[localrow][localcol-1])/3;
					Gnew[localrow][localcol] = (G[localrow+1][localcol]+G[localrow-1][localcol]+G[localrow][localcol-1])/3;
					Bnew[localrow][localcol] = (B[localrow+1][localcol]+B[localrow-1][localcol]+B[localrow][localcol-1])/3;
				}
				else if (row==0 &&col==0){
					Rnew[localrow][localcol] = (R[localrow][localcol+1]+R[localrow+1][localcol])/2;
					Gnew[localrow][localcol] = (G[localrow][localcol+1]+G[localrow+1][localcol])/2;
					Bnew[localrow][localcol] = (B[localrow][localcol+1]+B[localrow+1][localcol])/2;
				}
				else if (row==0 &&col==(colsize-1)){
					Rnew[localrow][localcol] = (R[localrow][localcol-1]+R[localrow+1][localcol])/2;
					Gnew[localrow][localcol] = (G[localrow][localcol-1]+G[localrow+1][localcol])/2;
					Bnew[localrow][localcol] = (B[localrow][localcol-1]+B[localrow+1][localcol])/2;
				}
				else if (row==(rowsize-1) &&col==0){
					Rnew[localrow][localcol] = (R[localrow][localcol+1]+R[localrow-1][localcol])/2;
					Gnew[localrow][localcol] = (G[localrow][localcol+1]+G[localrow-1][localcol])/2;
					Bnew[localrow][localcol] = (B[localrow][localcol+1]+B[localrow-1][localcol])/2;
				}
				else if (row==(rowsize-1) &&col==(colsize-1)){
					Rnew[localrow][localcol] = (R[localrow][localcol-1]+R[localrow-1][localcol])/2;
					Gnew[localrow][localcol] = (G[localrow][localcol-1]+G[localrow-1][localcol])/2;
					Bnew[localrow][localcol] = (B[localrow][localcol-1]+B[localrow-1][localcol])/2;
				}		
			}
		}
		for(localrow=1;localrow<=myrowsize;localrow++){
			for (localcol=1;localcol<=mycolsize;localcol++){
			    R[localrow][localcol] = Rnew[localrow][localcol];
			    G[localrow][localcol] = Gnew[localrow][localcol];
			    B[localrow][localcol] = Bnew[localrow][localcol];
			}
		}
	}

/* Output timing result */
	MPI_Barrier(new_comm);
	if(rank==0){
		gettimeofday(&tim, NULL);
		double t2=tim.tv_sec+(tim.tv_usec/1000000.0);
		MPI_Get_processor_name(name,&len);
		printf("Rank %d on %s: %.6lf seconds elapsed\n", rank,name,t2-t1);
	}
	
/* Gather data from processes and output on process 0 */
	if(rank==0){
		localrow = 1;
		fout= fopen("DavidBlurMPI.ps", "w");
		for (k=0;k<nlines;k++) fprintf(fout,"\n%s", lines[k]);
		fprintf(fout,"\n");
		for(row=0;row<rowsize;row++){
                	coords[0] = row/prowsize;
			for (k=0;k<npcols;k++){
                        	coords[1] = k;
  				MPI_Cart_rank(new_comm, coords, &dest);
                                nsend = (k<npcols-1 ? pcolsize : lastcolsize); 
       				coffset = k*pcolsize;
				if(dest!=0){
					MPI_Recv(Rrow+coffset,nsend,MPI_INT,dest,tag,new_comm,&status);
					MPI_Recv(Grow+coffset,nsend,MPI_INT,dest,tag,new_comm,&status);
					MPI_Recv(Brow+coffset,nsend,MPI_INT,dest,tag,new_comm,&status);
				}
				else{
					for(localcol=1;localcol<=mycolsize;localcol++){
						Rrow[coffset+localcol-1] = R[localrow][localcol];
						Grow[coffset+localcol-1] = G[localrow][localcol];
						Brow[coffset+localcol-1] = B[localrow][localcol];
					}
					localrow++;
				}
			}
			for(col=0;col<colsize;col++){
				fprintf(fout,"%02x%02x%02x",Rrow[col],Grow[col],Brow[col]);
				lineno++;
				if (lineno==linelen){
					fprintf(fout,"\n");
					lineno = 0;
				}
			}
		}
		fclose(fout);
	}
	else{
		for(localrow=1;localrow<=myrowsize;localrow++){
			MPI_Send(&R[localrow][1],mycolsize,MPI_INT,0,tag,new_comm);
			MPI_Send(&G[localrow][1],mycolsize,MPI_INT,0,tag,new_comm);
			MPI_Send(&B[localrow][1],mycolsize,MPI_INT,0,tag,new_comm);
		}
	}
/* Finalize and exit */
        MPI_Finalize();
    	return 0;
}
Exemplo n.º 21
0
int main( int argc , char * argv[ ] )
{

  char ** pcmd = argv ; 
  
  int icmd = 1 ;
  
  char buffer[ MAXCHARINLINE ] ;
  
  int lenBuff ;
  
  char cache[ MAXCHARINLINE ] ;

  
  int itmp , itmp2 ; double dtmp ; char ctmp , tmp_char ; 
  
  char stmp[ 300 ] , stmp2[ 300 ] , tmpString[ 300 ];
  
  double dtmpArray[ 150 ] ;
  
  int info , signal , blank_signal ;
  
  register FILE * debug ; 
  
  char * flag ;




  char * command , * set_env , * get_NID , * env_value ;
  
  int exe = NO ; int command_signal ;




  while( icmd < argc )
  {  
    pcmd ++ ; 

    flag = * pcmd ;

    //printf("\nNo.%d argument , Currently @ flag = %s ...\n\n" , icmd , flag );

    if( ( * flag == '-' ) && ( strlen( flag ) == 2 ) )
    {
      switch ( *( flag + 1 ) )
      {
	      
	      case 'e' : //strcpy( command , *( ++ pcmd ) ) ; 
                     
                     info = asprintf( &command , "%s" , *( ++ pcmd ) ) ;
                     
                     if( info == -1 )
                     {
                       fprintf( stderr , "\nOops ... Did not successfully 10-4 your command line ...\n\n" ) ;
                       
                       exit( 3 ) ;
                     }
                     else
                     {
                       exe = YES ;
                     }

	                 icmd = icmd + 2 ; 
	                 
	                 break ;
  
	    
	      case 'h' : printf("\nUsage:  %s [ -e 'command line to be executed' ] \n" , * argv ); 
	                 
	                 /*
                     printf("\n                Note : 1) when -m is specified as \"None\" or \"none\" or \"NONE\", no reference will be checked and orbital numbers provided in CI file will be directly used.\n\n");
                     printf("\n                Note : 2) Default group selection : [ -p \"Donor-Block\"] [ -q \"Acceptor-Block\"] [ -r \"Bridge-Block\"] [ -x \"Irrelevant-Block\"] [ -z \"Irrelevant-Block-2\"]\n\n\n") ; 
                     printf("\n                Note : 3) Default fragment names selection : [ -D \"Donor\"] [ -A \"Acceptor\"]\n\n\n") ; 
                     printf("\n                Note : 4) If one of the partition is not necessary, \"None/none/None\" has to be specified ...\n\n\n") ; 
	                 */

	                 icmd = icmd + 1 ; 
	                 
	                 exit( 1 ) ;
	                 
	      

	      default : printf("\n\nInvalid option ' %s ' ... Please refer to the usage by typing ' %s -h '\n\n" , flag , * argv ); 
	      
	                icmd = argc ; 
	                
	                exit( 1 );

      
      }
    
    }
    else
    {
        printf("\n\nInvalid option ' %s ' ... Please refer to the usage by typing ' %s -h '\n\n" , flag , * argv );

	    exit(1);
      
      
    }
    
 
  } 
  


  /* MPI Setup */
  
  
  int myid , numprocs , namelen ;
  
  char processor_name[ MPI_MAX_PROCESSOR_NAME ] ;
  
  
  MPI_Init( &argc , &argv ) ;
  
  MPI_Comm_rank( MPI_COMM_WORLD , &myid ) ;
  
  MPI_Comm_size( MPI_COMM_WORLD , &numprocs ) ;
  
  MPI_Get_processor_name( processor_name , &namelen ) ;
  
  
  info = asprintf( &set_env , "ZM_MPI_RANK=%d" , myid ) ;

  info = putenv( set_env ) ;
  
  env_value = getenv( "ZM_MPI_RANK" ) ;

  printf( "\nRank is [ %s ] \n" , env_value ) ;

  info = asprintf( &get_NID , "ZM_NID=%s" , processor_name );

  info = putenv( get_NID ) ; 

  env_value = getenv( "ZM_NID" ) ;

  printf( "Node ID is [ %s ] \n" , env_value ) ;
  



  command_signal = system( command ) ;


  
  
  //fprintf( stderr , "Hello, World! Process %d of %d on %s\n" , myid , numprocs , processor_name ) ;

  MPI_Finalize() ;

  
  
  /* MPI Done */


  return( 0 ) ;





}
Exemplo n.º 22
0
int
main (int argc, char **argv)
{
  int nprocs = -1;
  int rank = -1;
  char processor_name[128];
  int namelen = 128;
  int buf0[buf_size];
  int buf1[buf_size];
  MPI_Status status;
  MPI_Comm temp, intercomm;
  int trank, tnprocs;
  int drank, dnprocs, rleader, rnprocs;

  /* init */
  MPI_Init (&argc, &argv);
  MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name (processor_name, &namelen);
  printf ("(%d) is alive on %s\n", rank, processor_name);
  fflush (stdout);

  MPI_Barrier (MPI_COMM_WORLD);

  if (nprocs < 3) {
    printf ("not enough tasks\n");
  }
  else {
    /* need to make split communicator temporarily... */
    MPI_Comm_split (MPI_COMM_WORLD, rank % 2, nprocs - rank, &temp);

    if (temp != MPI_COMM_NULL) {
      MPI_Comm_size (temp, &tnprocs);
      MPI_Comm_rank (temp, &trank);

      /* create an intercommunicator temporarily so can merge it... */
      rleader = ((rank + nprocs) % 2) ?  nprocs - 2 : nprocs - 1;

      if ((trank == 0) && (rank % 2)) {
	MPI_Recv (buf0, buf_size, MPI_INT,
		  rleader, 0, MPI_COMM_WORLD, &status);
      }

      MPI_Intercomm_create (temp, 0, MPI_COMM_WORLD, rleader,
			    INTERCOMM_CREATE_TAG, &intercomm);

      if ((trank == 0) && !(rank % 2)) {
	memset (buf0, 0, buf_size);
	
	MPI_Send (buf0, buf_size, MPI_INT, 1, 0, temp);
      }
      else {
	printf ("(%d) Split communicator too small\n", rank);
      }

      MPI_Comm_free (&temp);

      if (intercomm != MPI_COMM_NULL) {
	MPI_Comm_size (intercomm, &dnprocs);
	MPI_Comm_rank (intercomm, &drank);
 	MPI_Comm_remote_size (intercomm, &rnprocs);

	if (rnprocs > drank) {
	  if (rank % 2) {
	    memset (buf1, 1, buf_size);

	    MPI_Recv (buf0, buf_size, MPI_INT, drank, 0, intercomm, &status);

	    MPI_Send (buf1, buf_size, MPI_INT, drank, 0, intercomm);
	  }
	  else {
	    memset (buf0, 0, buf_size);
	
	    MPI_Send (buf0, buf_size, MPI_INT, drank, 0, intercomm);
	
	    MPI_Recv (buf1, buf_size, MPI_INT, drank, 0, intercomm, &status);
	  }
	}
	else {
	  printf ("(%d) Intercomm too small (lrank = %d; remote size = %d)\n",
		  rank, drank, rnprocs);
	}

	MPI_Comm_free (&intercomm);
      }
      else {
	printf ("(%d) Got MPI_COMM_NULL\n", rank);
      }
    }
    else {
      printf ("(%d) MPI_Comm_split got MPI_COMM_NULL\n", rank);
    }
  }

  MPI_Barrier (MPI_COMM_WORLD);

  MPI_Finalize ();
  printf ("(%d) Finished normally\n", rank);
}
Exemplo n.º 23
0
int main(int argc, char *argv[])
{
    int  namelen, rank, size, i, error;
    char processor_name[MPI_MAX_PROCESSOR_NAME];
    int buffer[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };

    MPI_Request request;
    MPI_Status status;

    MPI_Init(&argc, &argv);

    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    MPI_Get_processor_name(processor_name, &namelen);

    fprintf(stderr, "Process %d of %d on %s\n", rank, size, processor_name);

    if (rank == 0) {
       for (i=0;i<10;i++) {
          buffer[i] = 42+i;
       }

       for (i=1;i<size;i++) {
          fprintf(stderr, "Send to %d\n", i);

          error = MPI_Send(buffer, 10, MPI_INTEGER, i, 0, MPI_COMM_WORLD);

          if (error != MPI_SUCCESS) {
             fprintf(stderr, "Send failed! %d\n", error);
          }
       }
    } else {
       fprintf(stderr, "IRecv from to %d\n", 0);

       error = MPI_Irecv(buffer, 10, MPI_INTEGER, 0, 0, MPI_COMM_WORLD, &request);

       if (error != MPI_SUCCESS) {
          fprintf(stderr, "Irecv failed! %d\n", error);
       } else {

          error = MPI_Wait(&request, &status);

          if (error != MPI_SUCCESS) {
             fprintf(stderr, "Wait failed! %d\n", error);
          } else {
             fprintf(stderr, "Received from 0: {");

             for (i=0;i<10;i++) {
                fprintf(stderr, "%d ", buffer[i]);
             }

             fprintf(stderr, "}\n");
          }
       }
    }

    fprintf(stderr, "Done!\n");

    MPI_Finalize();

    return 0;
}
Exemplo n.º 24
0
int main(int argc, char *argv[])
{
  int rank, comm_size;
  int prev;
  char name[MPI_MAX_PROCESSOR_NAME];
  int nlen;
  INT_T size = 100000;
  double time = 0;
  struct opt args;

  MPI_Init(&argc,&argv);

  // get rank and size from communicator
  MPI_Comm_size(MPI_COMM_WORLD,&comm_size);
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);

  //printf("%d %d\n", rank, argc);
  parse_args(argc, argv, &args);
  if(args.size > 0 && args.parallel) {
    size = args.size/comm_size;
  } else if(args.size > 0) {
    size = args.size;
  }

  if(comm_size > 1 && (! args.parallel) && rank == 0) {
    printf("WARNING: sequential execution with more than 1 thread gives wrong data\n");
  }

  MPI_Get_processor_name(name,&nlen);

  INT_T* A = gen_data(rank, size);
  arrayscan(A, size, MPI_COMM_WORLD, commscan_primitive);

#ifdef DEBUG
  {
    INT_T a = check_asc(A, size);
    if(a == -1) {
        printf("ascending check ok\n");
    } else {
        printf("ascending error on position %ld\n", a);
    }
    #ifdef DEBUGDEBUG
    for(INT_T i = 0; i < size; i++) {
        printf("%ld\n", A[i]);
    }
    #endif
  }
#endif
  MPI_Barrier(MPI_COMM_WORLD);
  time = - MPI_Wtime();
/*
  if (rank==0) {
    printf("Rank %d initializing, total %d\n",rank,comm_size);
  } else {
    MPI_Recv(&prev,1,MPI_INT,rank-1,HELLO,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
    printf("Rank %d on %s received from %d, passing on\n",rank,name,prev);
  }
  if (rank+1<size) MPI_Send(&rank,1,MPI_INT,rank+1,HELLO,MPI_COMM_WORLD);
*/
  
  if(args.parallel)
    arrayscan(A, size, MPI_COMM_WORLD, my_commscan);
  else
    localscan(A, size);
    
  time += MPI_Wtime();

  INT_T sum;
  double rtime;

  if(rank == 0 && comm_size > 1 && args.parallel) {
    MPI_Recv(&sum, 1, INT_MPI_T, comm_size-1, SCAN, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
  } else if(rank == comm_size-1 && comm_size > 1 && args.parallel) {
    MPI_Send(A + size - 1, 1, INT_MPI_T, 0, SCAN, MPI_COMM_WORLD);
  } else {
    sum = A[size - 1];
  }

  MPI_Reduce(&time, &rtime, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

  if(rank == 0 /*&& sum == size*comm_size*(size*comm_size+1)/2*/) {
    printf("inclusive-scan np=%d s=%ld%s time=%lf%s\n", comm_size, comm_size*size,
      args.parallel ? "" : " local", rtime,
      sum == size*comm_size*(size*comm_size+1)/2 ? "" : " FAIL");
  } /*else if (rank == 0) {
    printf("inclusive-scan np=%d s=%ld %s time=%lf FAIL\n", comm_size, comm_size*size, rtime);
  }*/

#ifndef EVAL
  printf("Rank %3d min: %20ld sum: %20ld time: %2lf\n", rank, A[0], A[size-1], time);
  if(rank == 0) {
    printf("Sum should be %ld\n", size*comm_size*(size*comm_size+1)/2);
  }
#endif
#ifdef DEBUGDEBUG
    for(INT_T i = 0; i < size; i++) {
        printf("%ld\n", A[i]);
    }
#endif
      
  MPI_Finalize();
  return 0;
}
Exemplo n.º 25
0
int
main (int argc, char **argv)
{
    int nprocs = -1;
    int rank = -1;
    char processor_name[128];
    int namelen = 128;
    int buf0[buf_size];
    int buf1[buf_size];
    int done;
    MPI_Status statuses[2];
    MPI_Request reqs[2];

    /* init */
    MPI_Init (&argc, &argv);
    MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
    MPI_Comm_rank (MPI_COMM_WORLD, &rank);
    MPI_Get_processor_name (processor_name, &namelen);
    printf ("(%d) is alive on %s\n", rank, processor_name);
    fflush (stdout);

    MPI_Barrier (MPI_COMM_WORLD);

    /* this code is very similar to no-error-waitall-any_src.c */
    /* but deadlocks since task 2's send and recv are inverted... */
    if (nprocs < 3)
    {
        printf ("not enough tasks\n");
    }
    else if (rank == 0)
    {
        MPI_Irecv (buf0, buf_size, MPI_INT,
                   MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[0]);

        MPI_Irecv (buf1, buf_size, MPI_INT,
                   MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &reqs[1]);

        MPI_Waitall (2, reqs, statuses);

        MPI_Send (buf1, buf_size, MPI_INT, 1, 1, MPI_COMM_WORLD);
    }
    else if (rank == 1)
    {
        memset (buf0, 0, buf_size);

        MPI_Isend (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &reqs[0]);

        MPI_Isend (buf0, buf_size, MPI_INT, 2, 1, MPI_COMM_WORLD, &reqs[1]);

        MPI_Waitany (2, reqs, &done, statuses);

        MPI_Send (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD);

        MPI_Recv (buf1, buf_size, MPI_INT, 0, 1, MPI_COMM_WORLD, statuses);

        MPI_Wait (&reqs[(done + 1) % 2], statuses);
    }
    else if (rank == 2)
    {
        sleep (60);

        MPI_Recv (buf1, buf_size, MPI_INT, 1, 1, MPI_COMM_WORLD, statuses);
    }

    MPI_Barrier (MPI_COMM_WORLD);

    MPI_Finalize ();
    printf ("(%d) Finished normally\n", rank);
}
Exemplo n.º 26
0
int
main (int argc, char **argv)
{
  int nprocs = -1;
  int rank = -1;
  char processor_name[128];
  int namelen = 128;
  int buf0[buf_size];
  int buf1[buf_size];
  MPI_Request aReq[2];
  MPI_Status aStatus[2];

  MPI_Status status;

  /* init */
  MPI_Init (&argc, &argv);
  MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name (processor_name, &namelen);
  printf ("(%d) is alive on %s\n", rank, processor_name);
  fflush (stdout);

  MPI_Barrier (MPI_COMM_WORLD);

  if (nprocs < 2) {
      printf ("not enough tasks\n");
  }
  else {
    if (rank == 0) {
      memset (buf0, 0, buf_size*sizeof(int));

      MPI_Send_init (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &aReq[0]);
      MPI_Recv_init (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &aReq[1]);

      MPI_Start (&aReq[0]);
      MPI_Start (&aReq[1]);

      MPI_Waitall (2, aReq, aStatus);

      memset (buf0, 1, buf_size*sizeof(int));

      MPI_Startall (2, aReq);

      MPI_Waitall (2, aReq, aStatus);
    }
    else if (rank == 1) {
      memset (buf1, 1, buf_size*sizeof(int));

      MPI_Recv_init (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &aReq[0]);
      MPI_Send_init (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &aReq[1]);

      MPI_Start (&aReq[0]);
      MPI_Start (&aReq[1]);

      MPI_Waitall (2, aReq, aStatus);

      memset (buf1, 0, buf_size*sizeof(int));

      MPI_Startall (2, aReq);

      MPI_Waitall (2, aReq, aStatus);
    }
  }

  MPI_Barrier (MPI_COMM_WORLD);

  MPI_Request_free (&aReq[0]);
  MPI_Request_free (&aReq[1]);

  MPI_Finalize ();
  printf ("(%d) Finished normally\n", rank);
}
int
main (int argc, char **argv)
{
  int nprocs = -1;
  int rank = -1;
  MPI_Comm comm = MPI_COMM_WORLD;
  char processor_name[128];
  int namelen = 128;
  int buf[BUF_SIZE * 2];
  int i, j, k, index, outcount, flag;
  int indices[2];
  MPI_Request aReq[2];
  MPI_Status aStatus[2];

  /* init */
  MPI_Init (&argc, &argv);
  MPI_Comm_size (comm, &nprocs);
  MPI_Comm_rank (comm, &rank);
  MPI_Get_processor_name (processor_name, &namelen);
  printf ("(%d) is alive on %s\n", rank, processor_name);
  fflush (stdout);

  if (rank == 0) {
    /* set up persistent sends... */
    MPI_Send_init (&buf[0], BUF_SIZE, MPI_INT, 1, 0, comm, &aReq[0]);
    MPI_Send_init (&buf[BUF_SIZE], BUF_SIZE, MPI_INT, 1, 1, comm, &aReq[1]);

    /* initialize the send buffers */
    for (i = 0; i < BUF_SIZE; i++) {
      buf[i] = i;
      buf[BUF_SIZE + i] = BUF_SIZE - 1 - i;
    }
  }

  for (k = 0; k < 4; k++) {
    if (rank == 1) {
      /* zero out the receive buffers */
      bzero (buf, sizeof(int) * BUF_SIZE * 2);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    if (rank == 0) {
      /* start the persistent sends... */
      if (k % 2) {
	MPI_Startall (2, &aReq[0]);
      }
      else {
	for (j = 0; j < 2; j++) {
	  MPI_Start (&aReq[j]);
	}
      }

      /* complete the sends */
      if (k < 2) {
	/* use MPI_Testany */
	for (j = 0; j < 2; j++) {
	  flag = 0;
	  while (!flag) {
	    MPI_Testany (2, aReq, &index, &flag, aStatus);
	  }
	}
      }
      else {
	/* use MPI_Testsome */
	j = 0;
	while (j < 2) {
	  outcount = 0;
	  while (!outcount) {
	    MPI_Testsome (2, aReq, &outcount, indices, aStatus);
	  }
	  j += outcount;
	}
      }
    }
    else if (rank == 1) {
      /* set up receives for all of the sends */
      for (j = 0; j < 2; j++) {
	MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE,
		   MPI_INT, 0, j, comm, &aReq[j]);
      }
      /* complete all of the receives... */
      MPI_Waitall (2, aReq, aStatus);
    }
  }

  MPI_Barrier(MPI_COMM_WORLD);

  if (rank == 0) {
    /* free the persistent requests */
    for (i = 0 ; i < 2; i++) {
      MPI_Request_free (&aReq[i]);
    }
  }

  MPI_Finalize ();
  printf ("(%d) Finished normally\n", rank);
}
Exemplo n.º 28
0
int
main (int argc, char **argv)
{
  int nprocs = -1;
  int rank = -1;
  char processor_name[128];
  int namelen = 128;
  int buf0[buf_size];
  int buf1[buf_size];
  MPI_Status status;
  MPI_Request req;

  /* init */
  MPI_Init (&argc, &argv);
  MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name (processor_name, &namelen);
  printf ("(%d) is alive on %s\n", rank, processor_name);
  fflush (stdout);

  MPI_Barrier (MPI_COMM_WORLD);

  if (nprocs < 3)
    {
      printf ("not enough tasks\n");
    }
  else if (rank == 0)
    {
//      sleep (60);

      MPI_Irecv (buf0, buf_size, MPI_INT, 
		 MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &req);

      MPI_Recv (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD, &status);

      MPI_Send (buf1, buf_size, MPI_INT, 2, 0, MPI_COMM_WORLD);

      MPI_Recv (buf1, buf_size, MPI_INT, 
		MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status);

      MPI_Wait (&req, &status);
    }
  else if (rank == 2)
    {
      memset (buf0, 0, buf_size);

 //     sleep (30);

      MPI_Send (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD);

      MPI_Recv (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);

      MPI_Send (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD);
    }
  else if (rank == 1)
    {
      memset (buf1, 1, buf_size);

      MPI_Send (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD);
    }

  MPI_Barrier (MPI_COMM_WORLD);

  MPI_Finalize ();
  printf ("(%d) Finished normally\n", rank);
}
Exemplo n.º 29
0
/*
 * MAIN
 */
int main(int argc, char *argv[])
{
	int             ret;
	char            processor_name[MPI_MAX_PROCESSOR_NAME];
	int             namelen;
	double          start_time;
        double          used_time;
        double          avg_time;
        double          us_rate;
        int             lenbuf;
	MPI_Status      status;
/*        FILE           *fparam ;*/
	int 		i;
	int 		k;
	int 		j;
	struct thr	t[N_THREADS+1];
	double		crear_threads[N_THREADS+1];
#if defined(HAVE_WINDOWS_H)
    HANDLE 	     thid[N_THREADS+1];
	DWORD        thinfo ;
#else
	pthread_attr_t   attr ;
    pthread_t 	     thid[N_THREADS+1];
#endif



	setbuf(stdout, NULL);
#if (0)
	if (argc != 2)
        {
                printf("Use: client <max_len> \n");
		exit(1);
        }
	max_len =atoi(argv[1]);
#endif

	/*
#if (1)
#if defined(__LINUX__)
        fparam = fopen("ping.in","rt") ;
#endif
#if defined(__SUNOS__)
        fparam = fopen("ping.in","rt") ;
#endif
#if defined(__SP2__)
        fparam = fopen("/home/ssoo/gpmimd/FELIX/XMP/xmp/test/mpi/ping_r/ping.in","rt") ;
#endif
        if (fparam == NULL)
        {
                printf("ERROR: can not open ping.in, sorry.\n");
		exit(1);
        }
        ret = fscanf(fparam,"max_len=%i",&max_len) ;
        fclose(fparam) ;
        if (ret != 1)
        {
                printf("ERROR: can not read a valid 'max_len' value from ping.in, sorry.\n");
		exit(1);
        }
#endif
*/
	    max_len = 1024 * 1024 ;

        if ( (max_len <= 0) || (max_len >= 8*1024*1024) )
        {
                printf("ERROR: max_len = %i\n",max_len);
                printf("ERROR: (max_len <= 0) || (max_len >= 4*1024*1024)\n");
                exit(1) ;
        }


	ret = MPI_Init(&argc, &argv);	
	if (ret < 0)
	{
		printf("Can't init\n");
		exit(1);
	}

	MPI_Comm_rank(MPI_COMM_WORLD,&me);
	MPI_Get_processor_name(processor_name,&namelen);
	MPI_Comm_size(MPI_COMM_WORLD, &nproc);

#if (0)
	printf("Process %d; total %d is alive on %s\n",me,nproc,processor_name);
#endif


	MPI_Barrier(MPI_COMM_WORLD) ;

       

#if (0)
	printf("PING LISTO \n");
	if (me == 0)
	{
	  ret=MPI_Send(buf,12,MPI_CHAR,1,1, MPI_COMM_WORLD);
	}
	else
	{
	  ret=MPI_Recv(buf,12,MPI_CHAR,0,1, MPI_COMM_WORLD, &status);
	  printf("count = %d\n", status.count);
	}
#endif


/* 
 *	tiempo en crear y esperar por N threads 
 */
	/*
if (me == 0)
{
	i = 1;
    while(i <= N_THREADS)
	{
		start_time = MPI_Wtime();

#if defined(HAVE_WINDOWS_H)
		for(j = 0; j < PRUEBAS_THREADS; j ++)
		{
		   for (k = 0; k < i; k++)
				thid[k] = CreateThread (NULL, 0, (LPTHREAD_START_ROUTINE) nulo,NULL,0,&thinfo);

		   WaitForMultipleObjects(k, thid, TRUE, INFINITE);
		}
#else
		for(j = 0; j < PRUEBAS_THREADS; j ++)
		{
			for (k = 0; k < i; k++)
              	 pthread_create(&thid[k], &attr, (void *(*)(void *))nulo, NULL);

			for (k = 0; k < i; k++)
                 pthread_join(thid[k], NULL);
		}
#endif

		crear_threads[i]=((MPI_Wtime() - start_time)) / PRUEBAS_THREADS;
		printf("Tiempo para %d threads = %f\n", i, crear_threads[i]);

		  i *= 2;

	}
}
*/

/* 
 *	envios y recepciones... 
 */
	i = 1 ;
	while(i <= N_THREADS)
	{

		if (me == 0)
			printf("N_THREADS = %d \n\n", i);
		MPI_Barrier(MPI_COMM_WORLD) ;

		lenbuf = 1;
		while (lenbuf <= max_len)
       	{

			 buf = (char *) malloc((unsigned) lenbuf * N_THREADS );
		     if (buf == NULL)
			 {
                perror("Error en malloc");
                exit(1);
			 }

		 	start_time = MPI_Wtime();
	
#if (0)
			transfiere(&t[i]);
#endif


#if defined(HAVE_WINDOWS_H)
			for (k = 0; k < i; k++)
			{
				t[k].lenbuf = lenbuf;
				t[k].tag = k;
				t[k].buf = buf + (k * lenbuf);

				thid[k] = CreateThread (NULL, 0, (LPTHREAD_START_ROUTINE) transfiere,&(t[k]),0,&thinfo);
			}

			WaitForMultipleObjects(k, thid, TRUE, INFINITE);
#else
			for (k = 0; k < i; k++)
			{
				t[k].lenbuf = lenbuf;
				t[k].tag = k;
				t[k].buf = buf + (k * lenbuf);

				pthread_create(&thid[k], &attr, (void *(*)(void *))transfiere, &t[k]);
			}

			for (k = 0; k < i; k++)
				pthread_join(thid[k], NULL);
#endif

			used_time = (MPI_Wtime() - start_time);
	
       		avg_time =  used_time / (float)  PRUEBAS;
	

			if (avg_time > 0)    /* rate is megabytes per second */
                   	us_rate = (double)((nproc * lenbuf * (i))/
					(avg_time*(double)1000000));
      			else
       				us_rate = 0.0;


			if (me == 0)
       			printf("len_bytes=%d avg_time_sec=%f rate_Mbytes_sec=%f\n", lenbuf, (double)avg_time/(2.0*i), (double)us_rate);
	
			lenbuf *= 2;
			free(buf);
		}

		i *= 2;

	}

		if (me == 0)
		{
			char c;
			read(0, &c, 1);
		}


#if (0)
	if (me != 0)
	    printf("\nclock resolution in seconds: %10.8f\n", MPI_Wtick());
#endif

	MPI_Finalize();
    free(buf);
	exit(0);

}
Exemplo n.º 30
0
int main (int argc, char **argv)
{
    struct arguments arguments;

    /* Parse our arguments; every option seen by parse_opt will
       be reflected in arguments. */
    argp_parse (&argp, argc, argv, 0, 0, &arguments); 

    int run_type;
    run_type = 0; //default is serial
    if (sscanf (arguments.args[0], "%i", &run_type)!=1) {}

    int iterations;
    iterations = 0; //default is serial
    if (sscanf (arguments.args[1], "%i", &iterations)!=1) {}

    int count_when;
    count_when = 1000;
    if (sscanf (arguments.args[2], "%i", &count_when)!=1) {}

    char print_list[200]; //used for input list
    if (sscanf (arguments.args[3], "%s", &print_list)!=1) {}

    // printf("Print list = %s\n", print_list);

    //Extract animation list from arguments
    char char_array[20][12] = { NULL };   //seperated input list
    int animation_list[20][2] = { NULL }; //integer input list start,range
    char *tok = strtok(print_list, ",");

    //counters
    int i,j,k,x,y,ii,jj;
    ii = 0;
    jj = 0;

    //Loop over tokens parsing our commas
    int tok_len = 0;
    while (tok != NULL)
    {
        //first loop parses out commas
        tok_len = strlen(tok);
        for (jj=0;jj<tok_len;jj++)
        {
            char_array[ii][jj] = tok[jj];
        }

        // printf("Tok = %s\n", char_array[ii]);
        tok = strtok(NULL, ",");
        ii++;
    }

    //looking for a range input, convert to ints
    int stop;
    for (ii=0;ii<20;ii++)
    {
        //convert first number to int
        tok = strtok(char_array[ii], "-");
        if (tok != NULL)
        {
            animation_list[ii][0] = atoi(tok);
            tok = strtok(NULL, ",");
        }
        
        //look for second number, add to range
        if (tok != NULL)
        {
            stop = atoi(tok);
            animation_list[ii][1] = stop - animation_list[ii][0];
        }

        // if (rank == 0)
        // {
        //     printf("Animation_list = %i, %i\n", 
        //         animation_list[ii][0], animation_list[ii][1]);

        // }
    }
    
    
    

    //should an animation be generated
    //prints a bunch of .pgm files, have to hand
    //make the gif...
    int animation;
    animation = arguments.animation;

    //verbose?
    int verbose;
    verbose = arguments.verbose;
    // printf("VERBOSE = %i",verbose);
    if (verbose>0 && verbose<=10)
    {
        verbose = 1;
    }

    

    // Initialize the MPI environment
    MPI_Init(NULL, NULL);

    // Get the number of processes
    int world_size;
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    // Get the rank of the process
    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    // Get the name of the processor
    char processor_name[MPI_MAX_PROCESSOR_NAME];
    int name_len;
    MPI_Get_processor_name(processor_name, &name_len);

    //Print run information, exit on bad command line input
    if (rank == 0 && verbose == 1)
    {
        printf("Verbose=%i, RunType=%i, Iterations=%i, CountWhen=%i, Animation=%i\n",
            verbose,run_type,iterations,count_when, animation);
    }
    if (world_size>1 && run_type ==0)
    {
        printf("Runtype and processors count not consistant\n");
        MPI_Finalize();
        exit(0);
    }
    if (world_size==1 && run_type>0)
    {
        printf("Runtype and processors count not consistant\n");
        MPI_Finalize();
        exit(0);
    }
    if (count_when <= 0)
    {
        if (rank == 0)
        {
            printf("Invalid count interval, positive integers only\n");
        }
        MPI_Finalize();
        exit(0);
    }

     //serial
    if (world_size == 1 && run_type == 0)
    {

        ncols=1;
        nrows=1;
    }
    //Blocked
    else if (world_size>1 && run_type == 1)
    {
        ncols = 1;
        nrows = world_size;
        my_col = 0;
        my_row = rank;
    }
    //Checker
    else if (world_size>1 && run_type == 2)
    {
        ncols = (int)sqrt(world_size);
        nrows = (int)sqrt(world_size);

        my_row = rank/nrows;
        my_col = rank-my_row*nrows;

        if (ncols*nrows!=world_size)
        {
            if (rank == 0)
            {
                printf("Number of processors must be square, Exiting\n");
            }
            MPI_Finalize();
            exit(0);
        }
    }

    // if (verbose == 1)
    // {
    //     printf("WR,row,col=%i,%i,%i\n",rank,my_row,my_col);
    // }

    
    //////////////////////READ IN INITIAL PGM////////////////////////////////
    if(!readpgm("life.pgm"))
    {
        // printf("WR=%d,HERE2\n",rank);
        if( rank==0 )
        {
            pprintf( "An error occured while reading the pgm file\n" );
        }
        MPI_Finalize();
        return 1;
    }

    // Count the life forms. Note that we count from [1,1] - [height+1,width+1];
    // we need to ignore the ghost row!
    i = 0;
    for(y=1; y<local_height+1; y++ )
    {
        for(x=1; x<local_width+1; x++ )
        {
            if( field_a[ y * field_width + x ] )
            {
                i++;
            }
        }
    }
    // pprintf( "%i local buggies\n", i );

    int total;
    MPI_Allreduce( &i, &total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    if( rank==0  && verbose == 1 )
    {
        pprintf( "%i total buggies\n", total );
    }
    

    
    // printf("WR=%d, Row=%d, Col=%d\n",rank,my_row,my_col);

    //Row and column size per processor
    int rsize, csize; 
    rsize = local_width;
    csize = local_height;


    if (rank == 0 && verbose == 1)
    {
        printf("rsize,csize,NP = %d, %d, %d\n",rsize,csize,world_size);
    }
    
    //Create new derived datatype for writing to files
    MPI_Datatype submatrix;

    int array_of_gsizes[2];
    int array_of_distribs[2];
    int array_of_dargs[2];
    int array_of_psize[2];

    if (run_type == 1)
    {
        if (rank == 0)
        {
            printf("g0,g1 = %i,%i\n", local_height*ncols, local_width);
            printf("p0,p1 = %i,%i\n", nrows, ncols);
        }
        array_of_gsizes[0] = local_height*ncols;
        array_of_gsizes[1] = local_width;
        array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
        array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
        array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_psize[0] = nrows;
        array_of_psize[1] = ncols;
        // int order = MPI_ORDER_C;

        //size,rank,ndims,array_gsizes,array_distribs,array_args,array_psizes
        //order,oldtype,*newtype
        MPI_Type_create_darray(world_size, rank, 2, array_of_gsizes, array_of_distribs,
                array_of_dargs, array_of_psize, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &submatrix);
        MPI_Type_commit(&submatrix);
    }
    else if (run_type == 2)
    {
        if (rank == 0)
        {
            printf("g0,g1 = %i,%i\n", local_height*ncols, local_width*nrows);
            printf("p0,p1 = %i,%i\n", nrows, ncols);
        }
        array_of_gsizes[0] = local_height*ncols;
        array_of_gsizes[1] = local_width*nrows;
        array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
        array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
        array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_psize[0] = nrows;
        array_of_psize[1] = ncols;
        // int order = MPI_ORDER_C;

        //size,rank,ndims,array_gsizes,array_distribs,array_args,array_psizes
        //order,oldtype,*newtype
        MPI_Type_create_darray(world_size, rank, 2, array_of_gsizes, array_of_distribs,
                array_of_dargs, array_of_psize, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &submatrix);
        MPI_Type_commit(&submatrix);
    }



    MPI_Barrier(MPI_COMM_WORLD);

    //////////////////ALLOCATE ARRAYS, CREATE DATATYPES/////////////////////

    //Create new column derived datatype
    MPI_Datatype column;
    //count, blocklength, stride, oldtype, *newtype
    MPI_Type_hvector(csize, 1, sizeof(unsigned char), MPI_UNSIGNED_CHAR, &column);
    MPI_Type_commit(&column);

    //Create new row derived datatype
    MPI_Datatype row;
    //count, blocklength, stride, oldtype, *newtype
    MPI_Type_hvector(rsize, 1, sizeof(unsigned char), MPI_UNSIGNED_CHAR, &row);
    MPI_Type_commit(&row);

    //allocate arrays and corner storage
    unsigned char *section;
    unsigned char *neighbors;
    //to use
    unsigned char *top;
    unsigned char *bot;
    unsigned char *left;
    unsigned char *right;
    //to send
    unsigned char *ttop;
    unsigned char *tbot;
    unsigned char *tleft;
    unsigned char *tright;
    //MALLOC!!
    section = (unsigned char*)malloc(rsize*csize*sizeof(unsigned char));
    neighbors = (unsigned char*)malloc(rsize*csize*sizeof(unsigned char));
    top = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    bot = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    left = (unsigned char*)malloc(csize*sizeof(unsigned char));
    right = (unsigned char*)malloc(csize*sizeof(unsigned char));
    ttop = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    tbot = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    tleft = (unsigned char*)malloc(csize*sizeof(unsigned char));
    tright = (unsigned char*)malloc(csize*sizeof(unsigned char));

    //corners
    unsigned char topleft,topright,botleft,botright; //used in calculations
    unsigned char ttopleft,ttopright,tbotleft,tbotright; 
    topleft = 255;
    topright = 255;
    botleft = 255;
    botright = 255;

    //used for animation, each process will put there own result in and then
    //each will send to process 1 which will add them up
    unsigned char* full_matrix;
    unsigned char* full_matrix_buffer;
    if (animation == 1)
    {
        int msize1 = rsize*ncols*csize*nrows;
        full_matrix = (unsigned char*)malloc(msize1*sizeof(unsigned char));
        full_matrix_buffer = (unsigned char*)malloc(msize1*sizeof(unsigned char));
        for (i=0; i<msize1; i++)
        {
            full_matrix[i] = 0;
            full_matrix_buffer[i] = 0;
        }
    }

    
    // printf("Rsize,Lsize,Fsize=%i %i %i,Csize,Lsize,Fsize=%i %i %i\n",rsize,local_width,field_width,csize,local_height,field_height);

    //Serial initialize vars
    int count = 0;
    if (world_size == 1 && run_type == 0)
    {
        for (i=0;i<csize;i++)
        {
            for (j=0;j<rsize;j++)
            {
                section[i*rsize + j] = 255;
                
                if (field_a[(i+1)*(2+rsize) + j + 1])
                {
                    section[i*rsize + j] = 0;
                    count += 1;
                }
                else
                {
                    section[i*rsize + j] = 255;
                }

                top[j] = 255;
                bot[j] = 255;
                ttop[j] = 255;
                tbot[j] = 255;
            }
            right[i] = 255;
            left[i] = 255;
            tright[i] = 255;
            tleft[i] = 255;
        }
        // printf("COUNT 4 = %d\n", count);
    }

    //Blocked/Checkered initializing variables
    else if (world_size > 1 && (run_type == 1 || run_type == 2))
    {
        //initialize
        for (i=0;i<csize;i++)
        {
            for (j=0;j<rsize;j++)
            {
                section[i*rsize + j] = 255;
                
                if (field_a[(i+1)*(2+rsize) + j + 1])
                {
                    section[i*rsize + j] = 0;
                    count += 1;
                }
                else
                {
                    section[i*rsize + j] = 255;
                }

                top[j] = 255;
                bot[j] = 255;
                ttop[j] = 255;
                tbot[j] = 255;
            }
            right[i] = 255;
            left[i] = 255;
            tright[i] = 255;
            tleft[i] = 255;
        }

        // MPI_Allreduce( &count, &total, 1, MPI_UNSIGNED_CHAR, MPI_SUM, MPI_COMM_WORLD );
        // if (rank == 0)
        // {
        //     printf("COUNT 4 = %d\n", total);
        // }
        
    }


    //header/footer for mpio writes
    char header1[15];
    header1[0] = 0x50;
    header1[1] = 0x35;
    header1[2] = 0x0a;
    header1[3] = 0x35;
    header1[4] = 0x31;
    header1[5] = 0x32;
    header1[6] = 0x20;
    header1[7] = 0x35;
    header1[8] = 0x31;
    header1[9] = 0x32;
    header1[10] = 0x0a;
    header1[11] = 0x32;
    header1[12] = 0x35;
    header1[13] = 0x35;
    header1[14] = 0x0a;

    char footer;
    footer = 0x0a;

    //make a frame or not?
    int create_frame = 0;

    //send to 
    int send_to;
    int receive_from;
    int info[5];
    info[2] = rank;
    info[3] = rsize;
    info[4] = csize;
    unsigned char info2[4];
    info2[0] = topleft;
    info2[1] = topright;
    info2[2] = botleft;
    info2[3] = botright;

    int current_count;
    int location;

    //Gameplay
    for (k=0;k<iterations;k++)
    {
        //Count buggies
        if (k%count_when==0)
        {
            if (verbose == 1)
            {
                current_count = rsize*csize-count_buggies(rsize,csize,section);
                MPI_Allreduce( &current_count, &total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
                if (rank == 0)
                {
                    printf("Iteration=%5d,  Count=%6d\n", k,total);
                }
                ////corner debug
                // printf("WR,tl,tr,bl,br = %d %d %d %d %d\n", rank, topleft, topright, botleft, botright);
            }
        }

        
        //Write to file serially for comparison
        //If animation is requested
        if (animation == 1 && run_type == 0)
        {
            //Put smaller matrix part into larger matrix
            for (i=0; i<csize; i++)
            {
                for (j=0; j<rsize; j++)
                {
                    location = (my_row*csize*rsize*ncols + my_col*rsize + 
                                    i*rsize*ncols + j);

                    full_matrix_buffer[location] = section[i*rsize+j];
                }
                // if (rank == 0)
                // {
                //     printf("Location = %d\n", location);
                // }
            }

            //Gather matrix
            MPI_Reduce(full_matrix_buffer, full_matrix, rsize*ncols*csize*nrows, 
                MPI_UNSIGNED_CHAR, MPI_SUM, 0, MPI_COMM_WORLD);

            
            if (rank == 0 && run_type == 0)
            {
                write_matrix_to_pgm(k, rsize*ncols, csize*nrows, full_matrix);
            }
        }
        //mpio write pgm
        else if (animation == 1 && (run_type == 1 || run_type == 2))
        {
            //default is no frame
            create_frame = 0;
            for (ii=0;ii<20;ii++)
            {
                for (jj=0;jj<animation_list[ii][1]+1;jj++)
                {
                    // if (rank == 0)
                    // {
                    //     printf("a,ii,j,k= %i,%i,%i,%i, Frame? = %i\n",
                    //         animation_list[ii][0],ii,jj,k,(animation_list[ii][0]+jj-k)==0);
                    // }
                    if ((animation_list[ii][0] + jj - k) == 0)
                    {

                        create_frame = 1;
                        break;
                    }
                }
            }

            if (create_frame == 1)
            {
               //dynamic filename with leading zeroes for easy conversion to gif
                char buffer[128];
                snprintf(buffer, sizeof(char)*128, "Animation/frame%04d.pgm", k);

                /* open the file, and set the view */
                MPI_File file;
                MPI_File_open(MPI_COMM_WORLD, buffer, 
                              MPI_MODE_CREATE|MPI_MODE_WRONLY,
                              MPI_INFO_NULL, &file);

                MPI_File_set_view(file, 0,  MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, 
                                       "native", MPI_INFO_NULL);

                //write header
                MPI_File_write(file, &header1, 15, MPI_CHAR, MPI_STATUS_IGNORE);

                //write matrix
                MPI_File_set_view(file, 15,  MPI_UNSIGNED_CHAR, submatrix, 
                                       "native", MPI_INFO_NULL);

                MPI_File_write_all(file, section, rsize*csize, 
                        MPI_UNSIGNED_CHAR, MPI_STATUS_IGNORE);

                //write footer (trailing newline)
                MPI_File_set_view(file, 15+rsize*ncols*csize*nrows,  
                        MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, 
                        "native", MPI_INFO_NULL);

                MPI_File_write(file, &footer, 1, MPI_CHAR, MPI_STATUS_IGNORE); 
            } 
        }


        // BLOCKED COMMUNITATION //
        if (run_type == 1)
        {
            //change bot (send top) to account for middle area
            //alternate to avoid locking
            send_to = rank - 1;
            receive_from = rank + 1;

            //figure out what to send
            //top and bottom
            for (i=0;i<rsize;i++)
            {
                ttop[i] = section[i];
                tbot[i] = section[rsize*(csize-1)+i];
            }

            //left n right
            for (i=0;i<csize;i++)
            {
                tleft[i] = section[0 + rsize*i];
                tright[i] = section[rsize-1 + rsize*i];
            }

            //send top, receive bot
            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from >= 0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {

                if (receive_from<world_size && receive_from >= 0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //change top to account for middle area
            //alternate to avoid locking
            send_to = rank + 1;
            receive_from = rank - 1;

            //send bot, receive top
            if (rank%2==0)
            {
                // printf("%d, %d, %d\n", rank, send_to, receive_from);
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                
                if (receive_from<world_size && receive_from >= 0)
                {
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                // printf("%d, %d, %d\n", rank, send_to, receive_from);
                if (receive_from<world_size && receive_from >= 0)
                {
                    //*data,count,type,from,tag,comm,mpi_status
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }

                if (send_to<world_size && send_to>=0)
                {
                    //*data,count,type,to,tag,comm
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }
        }

        // CHECKERED COMMUNITATION //
        else if (run_type == 2)
        {
            //figure out what to send
            //top and bottom
            for (i=0;i<rsize;i++)
            {
                ttop[i] = section[i];
                tbot[i] = section[rsize*(csize-1)+i];
            }

            //left n right
            for (i=0;i<csize;i++)
            {
                tleft[i] = section[0 + rsize*i];
                tright[i] = section[rsize-1 + rsize*i];
            }

            //corners
            ttopleft = tleft[0];
            tbotleft = tleft[csize-1];
            ttopright = tright[0];
            tbotright = tright[csize-1];

            //Send top, receive bot
            send_to = rank - nrows;
            receive_from = rank + nrows;
            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {

                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send bot, receive top
            send_to = rank + nrows;
            receive_from = rank - nrows;
            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {

                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send left, receive right
            send_to = rank - 1;
            receive_from = rank + 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tleft, 1, column, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(right, 1, column, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(right, 1, column, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tleft, 1, column, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send right, receive left
            send_to = rank + 1;
            receive_from = rank - 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tright, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(left, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(left, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tright, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send topright, receive botleft
            send_to = rank - ncols + 1;
            receive_from = rank + ncols - 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send topleft, receive botright
            send_to = rank - ncols - 1;
            receive_from = rank + ncols + 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send botleft, receive topright
            send_to = rank + ncols - 1;
            receive_from = rank - ncols + 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send botright, receive topleft
            send_to = rank + ncols + 1;
            receive_from = rank - ncols - 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }


            info2[0] = topleft;
            info2[1] = topright;
            info2[2] = botleft;
            info2[3] = botright;

        }
 
        // if (rank == 1){
        //     print_matrix(rsize, 1, top);
        //     print_matrix(rsize, csize, section);
        //     print_matrix(rsize, 1, bot);
        //     printf("\n");
        // }
        // printf("wr=%d,iteration=%d,maxval=%d, 11\n", rank, k,(csize-1)*rsize-1+rsize);
        


        /////////// CELL UPDATES /////////////////
        //count neighbor
        for (i=0;i<csize;i++)
        {
            for (j=0; j<rsize; j++)
            {
                info[0] = i;
                info[1] = j;
                neighbors[i*rsize+j] = count_neighbors(info, info2, section, 
                                    top, bot, left, right);
                // printf("%i",neighbors[i*rsize+j]);
            }
            // printf("\n");
        }

        //update cells
        current_count = 0;
        for (i=0;i<csize;i++)
        {
            for (j=0; j<rsize; j++)
            {
                //cell currently alive
                if (section[i*rsize+j] == 0)
                {
                    //2 or 3 neighbors lives, else die
                    if (neighbors[i*rsize+j] < 2 || 
                        neighbors[i*rsize+j] > 3)
                    {
                        section[i*rsize+j] = 255;
                    }
                }
                else
                {
                    //Exactly 3 neighbors spawns new life
                    if (neighbors[i*rsize+j] == 3)
                    {
                        section[i*rsize+j] = 0;
                    }
                }
            }
        }
    }

    MPI_Barrier(MPI_COMM_WORLD);
    sleep(0.5);
    //free malloc stuff
    if( field_a != NULL ) free( field_a );
    if( field_b != NULL ) free( field_b );
    free(section);
    free(neighbors);
    free(top);
    free(bot);
    free(left);
    free(right);

    MPI_Finalize();
    exit (0);
}