Esempio n. 1
0
void slave_procedure(int my_rank, int comm_size, long long the_number) {
	long long from, to;
	long long to_send;
	int shit_happened;
	struct elem* head = NULL;

	from = ((((long long int) sqrt((double) the_number)) / (comm_size - 1))) * (my_rank - 1);
	to = ((((long long int) sqrt((double) the_number)) / (comm_size - 1)) + 1) * (my_rank); // TODO: better square root

	from = from == 0 ? 1 : from; // Because why not

	long long int i;

	#pragma omp parallel shared(from, to) private(i)
	{
		#pragma omp for schedule(auto)
		for(i = 0; i < (to - from); ++i) {
			if(the_number % (from + i) == 0) {
				#pragma omp critical
				{
					add(&head, from + i);
					add(&head, the_number / (from + i));
				}
			}
		}
	}
	do {
		to_send = pick(&head);
		shit_happened = MPI_Ssend(&to_send, 1, MPI_LONG_LONG, 0, 0, MPI_COMM_WORLD);

		if(shit_happened) {
			fprintf(stderr, "Send failed");
			MPI_Abort(MPI_COMM_WORLD, 1);
		}

	}while(to_send != 0);
}
Esempio n. 2
0
int main(int argc, char *argv[])		
{
	int ITAG_A = 100,ITAG_B = 200; 
	int irank, i, idest, isrc, istag, iretag;
	float rmsg1[MSGLEN];
	float rmsg2[MSGLEN];
	MPI_Status recv_status;

	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &irank);  

	for (i = 1; i <= MSGLEN; i++)
 	{
		rmsg1[i] = 100;
		rmsg2[i] = -100;
	}
	if ( irank == 0 )
	{ 	
		idest = 1;
		isrc = 1;
		istag = ITAG_A;	
		iretag = ITAG_B;
	}
	else if ( irank == 1 )
	{
		idest = 0;
		isrc = 0;
		istag = ITAG_B;
		iretag = ITAG_A;
	}

	printf("Task %d has sent the message\n", irank);
	MPI_Ssend(&rmsg1, MSGLEN, MPI_FLOAT, idest, istag, MPI_COMM_WORLD); 
	MPI_Recv(&rmsg2, MSGLEN, MPI_FLOAT, isrc, iretag, MPI_COMM_WORLD, &recv_status);
	printf("Task %d has received the message\n", irank);
	MPI_Finalize();
}
  void send( const vector_type & v )
  {
    // Input vector: [ owned-interior , owned-send , receive ]

    const std::pair<unsigned,unsigned> send_range( m_map.count_interior , m_map.count_interior + m_map.count_send );

    vector_type vsend = subview<vector_type>( v , send_range );

    Impl::DeepCopy<HostSpace,typename Device::memory_space>( m_host_send_buffer.ptr_on_device() ,
                                                             vsend.ptr_on_device() ,
                                                             m_map.count_send * m_chunk * sizeof(scalar_type) );

    for ( unsigned i = 0 , j = 0 ; i < m_map.host_send.dimension_0() ; ++i ) {
      const int proc  = m_map.host_send(i,0);
      const int count = m_map.host_send(i,1);

      // Gather send data to contiguous buffer:

      for ( int k = 0 , km = 0 ; k < count ; ++k , ++j ) {
        const int km_end = km + m_chunk ;
        for ( int ki = m_chunk * m_map.host_send_item(j) ; km < km_end ; ++km , ++ki ) {
          m_host_send_message[km] = m_host_send_buffer[ki];
        }
      }

      // MPI_Ssend blocks until
      // (1) a receive is matched for the message and
      // (2) the send buffer can be re-used.
      //
      // It is suggested that MPI_Ssend will have the best performance:
      // http://www.mcs.anl.gov/research/projects/mpi/sendmode.html .

      MPI_Ssend( m_host_send_message.ptr_on_device(),
                 count * m_chunk * sizeof(scalar_type) , MPI_BYTE ,
                 proc , mpi_tag , m_map.machine.mpi_comm );
    }
  }
Esempio n. 4
0
void processor_A(void){
  
	double message[0];
	double timer1;
	const int ping=101, pong=102;
	int len = 0;
	int i = 0;
  	int envios = 10000;

  	MPI_Status status;


	timer1=MPI_Wtime();

	for (i=0; i<10000; i++) {
	MPI_Ssend(message, 0, MPI_DOUBLE, 1, ping, MPI_COMM_WORLD);
	MPI_Recv(message, 0, MPI_DOUBLE, 1, pong, MPI_COMM_WORLD, &status);
	}

	timer1=MPI_Wtime()-timer1;

	printf("## |%9d| %6d | %13g | %12g |%d|\n", 0, envios, timer1, timer1/envios, 0);

}
Esempio n. 5
0
/*! This function writes an actual snapshot file containing the data from
 *  processors 'writeTask' to 'lastTask'. 'writeTask' is the one that actually
 *  writes.  Each snapshot file contains a header first, then particle
 *  positions, velocities and ID's.  Particle masses are written only for
 *  those particle types with zero entry in MassTable.  After that, first the
 *  internal energies u, and then the density is written for the SPH
 *  particles.  If cooling is enabled, mean molecular weight and neutral
 *  hydrogen abundance are written for the gas particles. This is followed by
 *  the SPH smoothing length and further blocks of information, depending on
 *  included physics and compile-time flags.  If HDF5 is used, the header is
 *  stored in a group called "/Header", and the particle data is stored
 *  separately for each particle type in groups calles "/PartType0",
 *  "/PartType1", etc. The sequence of the blocks is unimportant in this case.
 */
void write_file(char *fname, int writeTask, int lastTask)
{
  int type, bytes_per_blockelement, npart, nextblock, typelist[6];
  int n_for_this_task, ntask, n, p, pc, offset = 0, task;
  int blockmaxlen, ntot_type[6], nn[6];
  enum iofields blocknr;
  int blksize;
  MPI_Status status;
  FILE *fd = 0;

#ifdef HAVE_HDF5
  hid_t hdf5_file = 0, hdf5_grp[6], hdf5_headergrp = 0, hdf5_dataspace_memory;
  hid_t hdf5_datatype = 0, hdf5_dataspace_in_file = 0, hdf5_dataset = 0;
  herr_t hdf5_status;
  hsize_t dims[2], count[2], start[2];
  int rank, pcsum = 0;
  char buf[500];
#endif

#define SKIP  {my_fwrite(&blksize,sizeof(int),1,fd);}

  /* determine particle numbers of each type in file */

  if(ThisTask == writeTask)
    {
      for(n = 0; n < 6; n++)
	ntot_type[n] = n_type[n];

      for(task = writeTask + 1; task <= lastTask; task++)
	{
	  MPI_Recv(&nn[0], 6, MPI_INT, task, TAG_LOCALN, MPI_COMM_WORLD, &status);
	  for(n = 0; n < 6; n++)
	    ntot_type[n] += nn[n];
	}

      for(task = writeTask + 1; task <= lastTask; task++)
	MPI_Send(&ntot_type[0], 6, MPI_INT, task, TAG_N, MPI_COMM_WORLD);
    }
  else
    {
      MPI_Send(&n_type[0], 6, MPI_INT, writeTask, TAG_LOCALN, MPI_COMM_WORLD);
      MPI_Recv(&ntot_type[0], 6, MPI_INT, writeTask, TAG_N, MPI_COMM_WORLD, &status);
    }



  /* fill file header */

  for(n = 0; n < 6; n++)
    {
      header.npart[n] = ntot_type[n];
      header.npartTotal[n] = (unsigned int) ntot_type_all[n];
      header.npartTotalHighWord[n] = (unsigned int) (ntot_type_all[n] >> 32);
    }

  for(n = 0; n < 6; n++)
    header.mass[n] = All.MassTable[n];

  header.time = All.Time;

  if(All.ComovingIntegrationOn)
    header.redshift = 1.0 / All.Time - 1;
  else
    header.redshift = 0;

  header.flag_sfr = 0;
  header.flag_feedback = 0;
  header.flag_cooling = 0;
  header.flag_stellarage = 0;
  header.flag_metals = 0;

#ifdef COOLING
  header.flag_cooling = 1;
#endif
#ifdef SFR
  header.flag_sfr = 1;
  header.flag_feedback = 1;
#ifdef STELLARAGE
  header.flag_stellarage = 1;
#endif
#ifdef METALS
  header.flag_metals = 1;
#endif
#endif

  header.num_files = All.NumFilesPerSnapshot;
  header.BoxSize = All.BoxSize;
  header.Omega0 = All.Omega0;
  header.OmegaLambda = All.OmegaLambda;
  header.HubbleParam = All.HubbleParam;


  /* open file and write header */

  if(ThisTask == writeTask)
    {
      if(All.SnapFormat == 3)
	{
#ifdef HAVE_HDF5
	  sprintf(buf, "%s.hdf5", fname);
	  hdf5_file = H5Fcreate(buf, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);

	  hdf5_headergrp = H5Gcreate(hdf5_file, "/Header", 0);

	  for(type = 0; type < 6; type++)
	    {
	      if(header.npart[type] > 0)
		{
		  sprintf(buf, "/PartType%d", type);
		  hdf5_grp[type] = H5Gcreate(hdf5_file, buf, 0);
		}
	    }

	  write_header_attributes_in_hdf5(hdf5_headergrp);
#endif
	}
      else
	{
	  if(!(fd = fopen(fname, "w")))
	    {
	      printf("can't open file `%s' for writing snapshot.\n", fname);
	      endrun(123);
	    }

	  if(All.SnapFormat == 2)
	    {
	      blksize = sizeof(int) + 4 * sizeof(char);
	      SKIP;
	      my_fwrite("HEAD", sizeof(char), 4, fd);
	      nextblock = sizeof(header) + 2 * sizeof(int);
	      my_fwrite(&nextblock, sizeof(int), 1, fd);
	      SKIP;
	    }

	  blksize = sizeof(header);
	  SKIP;
	  my_fwrite(&header, sizeof(header), 1, fd);
	  SKIP;
	}
    }

  ntask = lastTask - writeTask + 1;

  for(blocknr = 0; blocknr < IO_NBLOCKS; blocknr++)
    {
      if(blockpresent(blocknr))
	{
	  bytes_per_blockelement = get_bytes_per_blockelement(blocknr);

	  blockmaxlen = ((int) (All.BufferSize * 1024 * 1024)) / bytes_per_blockelement;

	  npart = get_particles_in_block(blocknr, &typelist[0]);

	  if(npart > 0)
	    {
	      if(ThisTask == writeTask)
		{

		  if(All.SnapFormat == 1 || All.SnapFormat == 2)
		    {
		      if(All.SnapFormat == 2)
			{
			  blksize = sizeof(int) + 4 * sizeof(char);
			  SKIP;
			  my_fwrite(Tab_IO_Labels[blocknr], sizeof(char), 4, fd);
			  nextblock = npart * bytes_per_blockelement + 2 * sizeof(int);
			  my_fwrite(&nextblock, sizeof(int), 1, fd);
			  SKIP;
			}

		      blksize = npart * bytes_per_blockelement;
		      SKIP;

		    }
		}

	      for(type = 0; type < 6; type++)
		{
		  if(typelist[type])
		    {
#ifdef HAVE_HDF5
		      if(ThisTask == writeTask && All.SnapFormat == 3 && header.npart[type] > 0)
			{
			  switch (get_datatype_in_block(blocknr))
			    {
			    case 0:
			      hdf5_datatype = H5Tcopy(H5T_NATIVE_UINT);
			      break;
			    case 1:
			      hdf5_datatype = H5Tcopy(H5T_NATIVE_FLOAT);
			      break;
			    case 2:
			      hdf5_datatype = H5Tcopy(H5T_NATIVE_UINT64);
			      break;
			    }

			  dims[0] = header.npart[type];
			  dims[1] = get_values_per_blockelement(blocknr);
			  if(dims[1] == 1)
			    rank = 1;
			  else
			    rank = 2;

			  get_dataset_name(blocknr, buf);

			  hdf5_dataspace_in_file = H5Screate_simple(rank, dims, NULL);
			  hdf5_dataset =
			    H5Dcreate(hdf5_grp[type], buf, hdf5_datatype, hdf5_dataspace_in_file,
				      H5P_DEFAULT);
			  pcsum = 0;
			}
#endif

		      for(task = writeTask, offset = 0; task <= lastTask; task++)
			{
			  if(task == ThisTask)
			    {
			      n_for_this_task = n_type[type];

			      for(p = writeTask; p <= lastTask; p++)
				if(p != ThisTask)
				  MPI_Send(&n_for_this_task, 1, MPI_INT, p, TAG_NFORTHISTASK, MPI_COMM_WORLD);
			    }
			  else
			    MPI_Recv(&n_for_this_task, 1, MPI_INT, task, TAG_NFORTHISTASK, MPI_COMM_WORLD,
				     &status);

			  while(n_for_this_task > 0)
			    {
			      pc = n_for_this_task;

			      if(pc > blockmaxlen)
				pc = blockmaxlen;

			      if(ThisTask == task)
				fill_write_buffer(blocknr, &offset, pc, type);

			      if(ThisTask == writeTask && task != writeTask)
				MPI_Recv(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, task,
					 TAG_PDATA, MPI_COMM_WORLD, &status);

			      if(ThisTask != writeTask && task == ThisTask)
				MPI_Ssend(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, writeTask,
					  TAG_PDATA, MPI_COMM_WORLD);

			      if(ThisTask == writeTask)
				{
				  if(All.SnapFormat == 3)
				    {
#ifdef HAVE_HDF5
				      start[0] = pcsum;
				      start[1] = 0;

				      count[0] = pc;
				      count[1] = get_values_per_blockelement(blocknr);
				      pcsum += pc;

				      H5Sselect_hyperslab(hdf5_dataspace_in_file, H5S_SELECT_SET,
							  start, NULL, count, NULL);

				      dims[0] = pc;
				      dims[1] = get_values_per_blockelement(blocknr);
				      hdf5_dataspace_memory = H5Screate_simple(rank, dims, NULL);

				      hdf5_status =
					H5Dwrite(hdf5_dataset, hdf5_datatype, hdf5_dataspace_memory,
						 hdf5_dataspace_in_file, H5P_DEFAULT, CommBuffer);

				      H5Sclose(hdf5_dataspace_memory);
#endif
				    }
				  else
				    my_fwrite(CommBuffer, bytes_per_blockelement, pc, fd);
				}

			      n_for_this_task -= pc;
			    }
			}

#ifdef HAVE_HDF5
		      if(ThisTask == writeTask && All.SnapFormat == 3 && header.npart[type] > 0)
			{
			  if(All.SnapFormat == 3)
			    {
			      H5Dclose(hdf5_dataset);
			      H5Sclose(hdf5_dataspace_in_file);
			      H5Tclose(hdf5_datatype);
			    }
			}
#endif
		    }
		}

	      if(ThisTask == writeTask)
		{
		  if(All.SnapFormat == 1 || All.SnapFormat == 2)
		    SKIP;
		}
	    }
	}
    }

  if(ThisTask == writeTask)
    {
      if(All.SnapFormat == 3)
	{
#ifdef HAVE_HDF5
	  for(type = 5; type >= 0; type--)
	    if(header.npart[type] > 0)
	      H5Gclose(hdf5_grp[type]);
	  H5Gclose(hdf5_headergrp);
	  H5Fclose(hdf5_file);
#endif
	}
      else
	fclose(fd);
    }
}
Esempio n. 6
0
int main( int argc, char *argv[] )
{
    int errs = 0, err;
    int rank, size, rsize, i;
    int np = 2;
    int errcodes[2];
    MPI_Comm      parentcomm, intercomm, intracomm, intracomm2, intracomm3;
    int           isChild = 0;
    MPI_Status    status;

    MTest_Init( &argc, &argv );

    MPI_Comm_get_parent( &parentcomm );

    if (parentcomm == MPI_COMM_NULL) {
	/* Create 2 more processes */
	MPI_Comm_spawn( (char*)"./spawnintra", MPI_ARGV_NULL, np,
			MPI_INFO_NULL, 0, MPI_COMM_WORLD,
			&intercomm, errcodes );
    }
    else 
	intercomm = parentcomm;

    /* We now have a valid intercomm */

    MPI_Comm_remote_size( intercomm, &rsize );
    MPI_Comm_size( intercomm, &size );
    MPI_Comm_rank( intercomm, &rank );

    if (parentcomm == MPI_COMM_NULL) {
	/* Master */
	if (rsize != np) {
	    errs++;
	    printf( "Did not create %d processes (got %d)\n", np, rsize );
	}
	if (rank == 0) {
	    for (i=0; i<rsize; i++) {
		MPI_Send( &i, 1, MPI_INT, i, 0, intercomm );
	    }
	}
    }
    else {
	/* Child */
	isChild = 1;
	if (size != np) {
	    errs++;
	    printf( "(Child) Did not create %d processes (got %d)\n", 
		    np, size );
	}
	MPI_Recv( &i, 1, MPI_INT, 0, 0, intercomm, &status );
	if (i != rank) {
	    errs++;
	    printf( "Unexpected rank on child %d (%d)\n", rank, i );
	}
    }

    /* At this point, try to form the intracommunicator */
    MPI_Intercomm_merge( intercomm, isChild, &intracomm );

    /* Check on the intra comm */
    {
	int icsize, icrank, wrank;

	MPI_Comm_size( intracomm, &icsize );
	MPI_Comm_rank( intracomm, &icrank );
	MPI_Comm_rank( MPI_COMM_WORLD, &wrank );

	if (icsize != rsize + size) {
	    errs++;
	    printf( "Intracomm rank %d thinks size is %d, not %d\n",
		    icrank, icsize, rsize + size );
	}
	/* Make sure that the processes are ordered correctly */
	if (isChild) {
	    int psize;
	    MPI_Comm_remote_size( parentcomm, &psize );
	    if (icrank != psize + wrank ) {
		errs++;
		printf( "Intracomm rank %d (from child) should have rank %d\n",
			icrank, psize + wrank );
	    }
	}
	else {
	    if (icrank != wrank) {
		errs++;
		printf( "Intracomm rank %d (from parent) should have rank %d\n",
			icrank, wrank );
	    }
	}
    }

    /* At this point, try to form the intracommunicator, with the other 
     processes first */
    MPI_Intercomm_merge( intercomm, !isChild, &intracomm2 );

    /* Check on the intra comm */
    {
	int icsize, icrank, wrank;

	MPI_Comm_size( intracomm2, &icsize );
	MPI_Comm_rank( intracomm2, &icrank );
	MPI_Comm_rank( MPI_COMM_WORLD, &wrank );

	if (icsize != rsize + size) {
	    errs++;
	    printf( "(2)Intracomm rank %d thinks size is %d, not %d\n",
		    icrank, icsize, rsize + size );
	}
	/* Make sure that the processes are ordered correctly */
	if (isChild) {
	    if (icrank != wrank ) {
		errs++;
		printf( "(2)Intracomm rank %d (from child) should have rank %d\n",
			icrank, wrank );
	    }
	}
	else {
	    int csize;
	    MPI_Comm_remote_size( intercomm, &csize );
	    if (icrank != wrank + csize) {
		errs++;
		printf( "(2)Intracomm rank %d (from parent) should have rank %d\n",
			icrank, wrank + csize );
	    }
	}
    }

    /* At this point, try to form the intracommunicator, with an 
       arbitrary choice for the first group of processes */
    MPI_Intercomm_merge( intercomm, 0, &intracomm3 );
    /* Check on the intra comm */
    {
	int icsize, icrank, wrank;

	MPI_Comm_size( intracomm3, &icsize );
	MPI_Comm_rank( intracomm3, &icrank );
	MPI_Comm_rank( MPI_COMM_WORLD, &wrank );

	if (icsize != rsize + size) {
	    errs++;
	    printf( "(3)Intracomm rank %d thinks size is %d, not %d\n",
		    icrank, icsize, rsize + size );
	}
	/* Eventually, we should test that the processes are ordered 
	   correctly, by groups (must be one of the two cases above) */
    }

    /* Update error count */
    if (isChild) {
	/* Send the errs back to the master process */
	MPI_Ssend( &errs, 1, MPI_INT, 0, 1, intercomm );
    }
    else {
	if (rank == 0) {
	    /* We could use intercomm reduce to get the errors from the 
	       children, but we'll use a simpler loop to make sure that
	       we get valid data */
	    for (i=0; i<rsize; i++) {
		MPI_Recv( &err, 1, MPI_INT, i, 1, intercomm, MPI_STATUS_IGNORE );
		errs += err;
	    }
	}
    }

    /* It isn't necessary to free the intracomms, but it should not hurt */
    MPI_Comm_free( &intracomm );
    MPI_Comm_free( &intracomm2 );
    MPI_Comm_free( &intracomm3 );

    /* It isn't necessary to free the intercomm, but it should not hurt */
    MPI_Comm_free( &intercomm );

    /* Note that the MTest_Finalize get errs only over COMM_WORLD */
    /* Note also that both the parent and child will generate "No Errors"
       if both call MTest_Finalize */
    if (parentcomm == MPI_COMM_NULL) {
	MTest_Finalize( errs );
    }

    MPI_Finalize();
    return 0;
}
void lucMeshCrossSection_Sample( void* drawingObject, Bool reverse)
{
   lucMeshCrossSection* self          = (lucMeshCrossSection*)drawingObject;
   FeVariable*          fieldVariable = (FeVariable*) self->fieldVariable;
   Mesh*                mesh          = (Mesh*) fieldVariable->feMesh;
   Grid*                vertGrid;
   Node_LocalIndex      crossSection_I;
   IJK                  node_ijk;
   Node_GlobalIndex     node_gI;
   Node_DomainIndex     node_dI;
   int                  i,j, d, sizes[3] = {1,1,1};
   Coord                globalMin, globalMax, min, max;

      int localcount = 0;

   vertGrid = *(Grid**)ExtensionManager_Get( mesh->info, mesh, self->vertexGridHandle );
   for (d=0; d<fieldVariable->dim; d++) sizes[d] = vertGrid->sizes[d];
   self->dim[0] = sizes[ self->axis ];
   self->dim[1] = sizes[ self->axis1 ];
   self->dim[2] = sizes[ self->axis2 ];

   crossSection_I = lucCrossSection_GetValue(self, 0, self->dim[0]-1);

   FieldVariable_GetMinAndMaxLocalCoords( fieldVariable, min, max );
   FieldVariable_GetMinAndMaxGlobalCoords( fieldVariable, globalMin, globalMax );

   Journal_Printf( lucDebug, "%s called on field %s, with axis of cross section as %d, crossSection_I as %d (dims %d,%d,%d) field dim %d\n",
                    __func__, fieldVariable->name, self->axis, crossSection_I, self->dim[0], self->dim[1], self->dim[2], self->fieldDim);

   /* Get mesh cross section self->vertices and values */
   self->resolutionA = self->dim[1];
   self->resolutionB = self->dim[2];
   lucCrossSection_AllocateSampleData(self, self->fieldDim);
   int lSize = Mesh_GetLocalSize( mesh, MT_VERTEX );
   double time = MPI_Wtime();
   Journal_Printf(lucInfo, "Sampling mesh (%s) %d x %d...  0%", self->name, self->dim[1], self->dim[2]);
   node_ijk[ self->axis ] = crossSection_I;
   for ( i = 0 ; i < self->dim[1]; i++ )
   {
      int percent = 100 * (i + 1) / self->dim[1];
      Journal_Printf(lucInfo, "\b\b\b\b%3d%%", percent);
      fflush(stdout);

      /* Reverse order if requested */
      int i0 = i;
      if (reverse) i0 = self->dim[1] - i - 1;

      node_ijk[ self->axis1 ] = i0;

      for ( j = 0 ; j < self->dim[2]; j++ )
      {
         self->vertices[i][j][0] = HUGE_VAL;
         self->vertices[i][j][2] = 0;
         node_ijk[ self->axis2 ] = j;
         node_gI = Grid_Project( vertGrid, node_ijk );
         /* Get coord and value if node is local... */
         if (Mesh_GlobalToDomain( mesh, MT_VERTEX, node_gI, &node_dI ) && node_dI < lSize)
         {  
            /* Found on this processor */
            double value[self->fieldDim];
            FeVariable_GetValueAtNode( fieldVariable, node_dI, value );
            double* pos = Mesh_GetVertex( mesh, node_dI );
            /*fprintf(stderr, "[%d] (%d,%d) Node %d %f,%f,%f value %f\n", self->context->rank, i, j, node_gI, pos[0], pos[1], pos[2], value);*/
         
            for (d=0; d<fieldVariable->dim; d++)
               self->vertices[i][j][d] = pos[d];

            for (d=0; d<self->fieldDim; d++)
               self->values[i][j][d] = (float)value[d];

            localcount++;
         }
      }
   }
   Journal_Printf(lucInfo, " %f sec. ", MPI_Wtime() - time);

   /* Collate */
   time = MPI_Wtime();
   for ( i=0 ; i < self->dim[1]; i++ )
   {
      for ( j=0 ; j < self->dim[2]; j++ )
      {
         /* Receive values at root */
         if (self->context->rank == 0)
         {
            /* Already have value? */
            if (self->vertices[i][j][0] != HUGE_VAL) {localcount--; continue; }

            /* Recv (pos and value together = (3 + fevar dims)*float) */
            float data[3 + self->fieldDim];
            (void)MPI_Recv(data, 3+self->fieldDim, MPI_FLOAT, MPI_ANY_SOURCE, i*self->dim[2]+j, self->context->communicator, MPI_STATUS_IGNORE);
            /* Copy */
            memcpy(self->vertices[i][j], data, 3 * sizeof(float));
            memcpy(self->values[i][j], &data[3], self->fieldDim * sizeof(float));
         }
         else
         {
            /* Found on this proc? */
            if (self->vertices[i][j][0] == HUGE_VAL) continue;

            /* Copy */
            float data[3 + self->fieldDim];
            memcpy(data, self->vertices[i][j], 3 * sizeof(float));
            memcpy(&data[3], self->values[i][j], self->fieldDim * sizeof(float));

            /* Send values to root (pos & value = 4 * float) */
            MPI_Ssend(data, 3+self->fieldDim, MPI_FLOAT, 0, i*self->dim[2]+j, self->context->communicator);
            localcount--;
         }
      }
   }
   MPI_Barrier(self->context->communicator);    /* Barrier required, prevent subsequent MPI calls from interfering with transfer */
   Journal_Printf(lucInfo, " Gather in %f sec.\n", MPI_Wtime() - time);
   Journal_Firewall(localcount == 0, lucError,
                     "Error - in %s: count of values sampled compared to sent/received by mpi on proc %d does not match (balance = %d)\n",
                     __func__, self->context->rank, localcount);
}
Esempio n. 8
0
void mpi_ssend (void *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest,
		MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *__ierr)
{
  *__ierr = MPI_Ssend (buf, *count, MPI_Type_f2c (*datatype), *dest, *tag, 
		       MPI_Comm_f2c (*comm));
}
Esempio n. 9
0
/* does the actual write of the file */
static void file_write_rays2bin(long fileNum, long firstTask, long lastTask, MPI_Comm fileComm)
{
  char name[MAX_FILENAME];
  long NumRaysInFile,i,j;
  long *NumRaysInPeanoCell,*StartRaysInPeanoCell,peano,rpeano;
  size_t buffSizeMB = 10;
  MPI_Status status;
  
  char *chunkRays;
  long k,chunkInd,firstInd,lastInd,NumRaysInChunkBase,NumRaysInChunk,NumChunks;
  double ra,dec;
  long nw=0,nwg=0,nwc=0,NtotToRecv;
  
  struct IOheader {
    long NumFiles;
    long PeanoCellHEALPixOrder;
    long RayHEALPixOrder;
    long flag_defl;
    long flag_phi;
    char pad[216]; //pad to 256 bytes
  } header;
  
  int dummy;
  FILE *fp = NULL;
  double t0 = 0.0;
  
  size_t rays = 0;
  rays += sizeof(long);
  rays += 2*sizeof(double);
  rays += 4*sizeof(double);
#ifdef OUTPUTRAYDEFLECTIONS
  rays += 2*sizeof(double);
#endif
#ifdef OUTPUTPHI
  rays += sizeof(double);
#endif
  
  sprintf(name,"%s/%s%04ld.%04ld",rayTraceData.OutputPath,rayTraceData.RayOutputName,rayTraceData.CurrentPlaneNum,fileNum);
  
  NumRaysInChunkBase = buffSizeMB*1024l*1024l/rays;
  chunkRays = (char*)malloc(rays*NumRaysInChunkBase);
  assert(chunkRays != NULL);
  
  /* build file layout*/
  NumRaysInPeanoCell = (long*)malloc(sizeof(long)*NbundleCells);
  assert(NumRaysInPeanoCell != NULL);
  StartRaysInPeanoCell = (long*)malloc(sizeof(long)*NbundleCells);
  assert(StartRaysInPeanoCell != NULL);
  for(i=0;i<NbundleCells;++i)
    StartRaysInPeanoCell[i] = 0;
  for(i=0;i<NbundleCells;++i)
    {
      if(ISSETBITFLAG(bundleCells[i].active,PRIMARY_BUNDLECELL))
	{
	  peano = nest2peano(bundleCells[i].nest,rayTraceData.bundleOrder);
	  StartRaysInPeanoCell[peano] = bundleCells[i].Nrays;
	  nwc += bundleCells[i].Nrays;
	}
    }
  MPI_Allreduce(StartRaysInPeanoCell,NumRaysInPeanoCell,(int) NbundleCells,MPI_LONG,MPI_SUM,fileComm);
  j = 0;
  for(i=0;i<NbundleCells;++i)
    {
      StartRaysInPeanoCell[i] = j;
      j += NumRaysInPeanoCell[i];
    }
  NumRaysInFile = j;
  
  //set header
  header.NumFiles = rayTraceData.NumRayOutputFiles;
  header.PeanoCellHEALPixOrder = rayTraceData.bundleOrder;
  header.RayHEALPixOrder = rayTraceData.rayOrder;
  
#ifdef OUTPUTRAYDEFLECTIONS
  header.flag_defl = 1;
#else
  header.flag_defl = 0;
#endif
#ifdef OUTPUTPHI
  header.flag_phi = 1;
#else
  header.flag_phi = 0;
#endif
  
  /* make the file and write header info */
  if(ThisTask == firstTask)
    {
      t0 = -MPI_Wtime();
      
      fp = fopen(name,"w");      
      if(fp == NULL)
	{
	  fprintf(stderr,"%d: could not open file '%s' for header!\n",ThisTask,name);
	  MPI_Abort(MPI_COMM_WORLD,666);
	}
      
      dummy = sizeof(struct IOheader);
      fwrite_errcheck(&dummy,(size_t) 1,sizeof(int),fp);
      fwrite_errcheck(&header,(size_t) 1,sizeof(struct IOheader),fp);
      fwrite_errcheck(&dummy,(size_t) 1,sizeof(int),fp);
      
      dummy = NbundleCells*sizeof(long);
      fwrite_errcheck(&dummy,(size_t) 1,sizeof(int),fp);
      fwrite_errcheck(NumRaysInPeanoCell,(size_t) NbundleCells,sizeof(long),fp);
      fwrite_errcheck(&dummy,(size_t) 1,sizeof(int),fp);
      
      dummy = NbundleCells*sizeof(long);
      fwrite_errcheck(&dummy,(size_t) 1,sizeof(int),fp);
      fwrite_errcheck(StartRaysInPeanoCell,(size_t) NbundleCells,sizeof(long),fp);
      fwrite_errcheck(&dummy,(size_t) 1,sizeof(int),fp);
      
      dummy = NumRaysInFile*rays;
      fwrite_errcheck(&dummy,(size_t) 1,sizeof(int),fp);
    }
  
  for(i=firstTask;i<=lastTask;++i)
    {
      if(ThisTask == i)
	{
#ifdef DEBUG
#if DEBUG_LEVEL > 0
	  fprintf(stderr,"%d: fileNum = %ld, first,last = %ld|%ld\n",ThisTask,fileNum,firstTask,lastTask);
#endif
#endif
	  if(ThisTask != firstTask)
	    {
	      MPI_Send(&nwc,1,MPI_LONG,(int) firstTask,TAG_RAYIO_TOTNUM,MPI_COMM_WORLD);
	    }
	  
	  for(rpeano=0;rpeano<NrestrictedPeanoInd;++rpeano)
	    {
	      j = bundleCellsRestrictedPeanoInd2Nest[rpeano];
	      
	      if(ISSETBITFLAG(bundleCells[j].active,PRIMARY_BUNDLECELL))
		{
		  peano = nest2peano(bundleCells[j].nest,rayTraceData.bundleOrder);
		  
		  assert(NumRaysInPeanoCell[peano] == ((1l) << (2*(rayTraceData.rayOrder-rayTraceData.bundleOrder))));
		  assert((StartRaysInPeanoCell[peano] - 
			  ((StartRaysInPeanoCell[peano])/(((1l) << (2*(rayTraceData.rayOrder-rayTraceData.bundleOrder)))))
			  *(((1l) << (2*(rayTraceData.rayOrder-rayTraceData.bundleOrder))))) == 0);
		  
		  NumChunks = NumRaysInPeanoCell[peano]/NumRaysInChunkBase;
		  if(NumChunks*NumRaysInChunkBase < NumRaysInPeanoCell[peano])
		    NumChunks += 1;
		  
		  for(chunkInd=0;chunkInd<NumChunks;++chunkInd)
		    {
		      firstInd = chunkInd*NumRaysInChunkBase;
		      lastInd = (chunkInd+1)*NumRaysInChunkBase-1;
		      if(lastInd >= NumRaysInPeanoCell[peano]-1)
			lastInd = NumRaysInPeanoCell[peano]-1;
		      NumRaysInChunk = lastInd - firstInd + 1;
		      
		      for(k=firstInd;k<=lastInd;++k)
			{
			  ++nw;
			  vec2radec(bundleCells[j].rays[k].n,&ra,&dec);
			  
			  *((long*) (&(chunkRays[(k-firstInd)*rays]))) = bundleCells[j].rays[k].nest;
			  *((double*) (&(chunkRays[(k-firstInd)*rays + sizeof(long)]))) = ra;
			  *((double*) (&(chunkRays[(k-firstInd)*rays + sizeof(long) + sizeof(double)]))) = dec;
			  *((double*) (&(chunkRays[(k-firstInd)*rays + sizeof(long) + 2*sizeof(double)]))) = bundleCells[j].rays[k].A[0];
			  *((double*) (&(chunkRays[(k-firstInd)*rays + sizeof(long) + 2*sizeof(double) + sizeof(double)]))) = bundleCells[j].rays[k].A[1];
			  *((double*) (&(chunkRays[(k-firstInd)*rays + sizeof(long) + 2*sizeof(double) + 2*sizeof(double)]))) = bundleCells[j].rays[k].A[2];
			  *((double*) (&(chunkRays[(k-firstInd)*rays + sizeof(long) + 2*sizeof(double) + 3*sizeof(double)]))) = bundleCells[j].rays[k].A[3];
			  
#ifdef OUTPUTRAYDEFLECTIONS
			  *((double*) (&(chunkRays[(k-firstInd)*rays + sizeof(long) + 2*sizeof(double) + 4*sizeof(double)]))) = bundleCells[j].rays[k].alpha[0];
			  *((double*) (&(chunkRays[(k-firstInd)*rays + sizeof(long) + 2*sizeof(double) + 4*sizeof(double) + sizeof(double)]))) = bundleCells[j].rays[k].alpha[1];
#endif
#ifdef OUTPUTPHI
			  *((double*) (&(chunkRays[(k-firstInd)*rays + sizeof(long) + 2*sizeof(double) + 4*sizeof(double) + 2*sizeof(double)]))) = bundleCells[j].rays[k].phi;
#endif
			}
		      
		      if(ThisTask != firstTask)
			{
			  MPI_Send(&NumRaysInChunk,1,MPI_LONG,(int) firstTask,TAG_RAYIO_NUMCHUNK,MPI_COMM_WORLD);
			  MPI_Ssend(chunkRays,(int) (rays*NumRaysInChunk),MPI_BYTE,(int) firstTask,TAG_RAYIO_CHUNKDATA,MPI_COMM_WORLD);
			}
		      else
			fwrite_errcheck(chunkRays,(size_t) NumRaysInChunk,rays,fp);
		      
		    }// for(chunkInd=0;chunkInd<NumChunks;++chunkInd)
		} //if(ISSETBITFLAG(bundleCells[j].active,PRIMARY_BUNDLECELL)).
	    } //for(j=0;j<NbundleCells;++j)
	} //if(ThisTask == i)
      
      if(i != firstTask && ThisTask == firstTask)
	{
	  MPI_Recv(&NtotToRecv,1,MPI_LONG,(int) i,TAG_RAYIO_TOTNUM,MPI_COMM_WORLD,&status);
	  
	  while(NtotToRecv > 0)
	    {
	      MPI_Recv(&NumRaysInChunk,1,MPI_LONG,(int) i,TAG_RAYIO_NUMCHUNK,MPI_COMM_WORLD,&status);
	      MPI_Recv(chunkRays,(int) (rays*NumRaysInChunk),MPI_BYTE,(int) i,TAG_RAYIO_CHUNKDATA,MPI_COMM_WORLD,&status);
	      fwrite_errcheck(chunkRays,(size_t) NumRaysInChunk,rays,fp);
	      nwg += NumRaysInChunk;
	      NtotToRecv -= NumRaysInChunk;
	    }
	}
      
      //////////////////////////////
      MPI_Barrier(fileComm);
      //////////////////////////////
    }
  
  if(ThisTask == firstTask)
    {
      dummy = NumRaysInFile*rays;
      fwrite_errcheck(&dummy,(size_t) 1,sizeof(int),fp);
      fclose(fp);
      t0 += MPI_Wtime();
      
      fprintf(stderr,"writing %ld rays to file '%s' took %g seconds.\n",NumRaysInFile,name,t0);
      
      assert(nwg == NumRaysInFile-nw); //error check # of rays recvd
    }

  //error check # of rays written
  MPI_Allreduce(&nw,&nwg,1,MPI_LONG,MPI_SUM,fileComm);
  assert(nw == nwc);
  assert(nwg == NumRaysInFile);
  
  free(StartRaysInPeanoCell);
  free(NumRaysInPeanoCell);
  free(chunkRays);
}
Esempio n. 10
0
void find_particles_and_save_them(int num)
{
  int n, k, count_local, *countlist, counttot, rep;
  double a3inv;
  MyFloat dx, dy, r2;
  char fname[1000];
  MPI_Status status;
  FILE *fd = 0;

  countlist = mymalloc(sizeof(int) * NTask);
  particles = mymalloc(sizeof(struct line_of_sight_particles) * N_gas);

  a3inv = 1.0 / (All.Time * All.Time * All.Time);

  for(n = 0, count_local = 0; n < N_gas; n++)
    {
      if(P[n].Type == 0)
	{
	  dx = los_periodic(P[n].Pos[Los->xaxis] - Los->Xpos);
	  dy = los_periodic(P[n].Pos[Los->yaxis] - Los->Ypos);

	  r2 = dx * dx + dy * dy;

	  if(r2 < PPP[n].Hsml * PPP[n].Hsml)
	    {
	      for(k = 0; k < 3; k++)
		particles[count_local].Pos[k] = P[n].Pos[k];

	      particles[count_local].Hsml = PPP[n].Hsml;
	      particles[count_local].Vz = P[n].Vel[Los->zaxis];
	      particles[count_local].Utherm = SphP[n].Entropy / GAMMA_MINUS1 * pow(SphP[n].d.Density *
										   a3inv, GAMMA_MINUS1);
	      particles[count_local].Mass = P[n].Mass;
	      particles[count_local].Metallicity = P[n].Metallicity;

	      count_local++;
	    }
	}
    }

  MPI_Gather(&count_local, 1, MPI_INT, countlist, 1, MPI_INT, 0, MPI_COMM_WORLD);

  if(ThisTask == 0)
    {
      sprintf(fname, "%s/los/part_los_z%05.3f_%03d.dat", All.OutputDir, 1 / All.Time - 1, num);

      if(!(fd = fopen(fname, "w")))
	{
	  printf("can't open file `%s`\n", fname);
	  endrun(112);
	}

      fwrite(&count_local, sizeof(int), 1, fd);	/* will be overwritten later */
      fwrite(&LosGlobal->xaxis, sizeof(int), 1, fd);
      fwrite(&LosGlobal->yaxis, sizeof(int), 1, fd);
      fwrite(&LosGlobal->zaxis, sizeof(int), 1, fd);
      fwrite(&LosGlobal->Xpos, sizeof(double), 1, fd);
      fwrite(&LosGlobal->Ypos, sizeof(double), 1, fd);
      fwrite(&LosGlobal->BoxSize, sizeof(double), 1, fd);
      fwrite(&LosGlobal->Wmax, sizeof(double), 1, fd);
      fwrite(&LosGlobal->Time, sizeof(double), 1, fd);
    }


  for(rep = 0, counttot = 0; rep < NTask; rep++)
    {
      if(ThisTask != 0 && rep == ThisTask && count_local > 0)
	MPI_Ssend(particles, sizeof(struct line_of_sight_particles) * count_local, MPI_BYTE, 0,
		  TAG_PDATA, MPI_COMM_WORLD);

      if(ThisTask == 0)
	{
	  if(rep > 0 && countlist[rep] > 0)
	    MPI_Recv(particles, sizeof(struct line_of_sight_particles) * countlist[rep],
		     MPI_BYTE, rep, TAG_PDATA, MPI_COMM_WORLD, &status);

	  fwrite(particles, sizeof(struct line_of_sight_particles), countlist[rep], fd);

	  counttot += countlist[rep];
	}
    }

  if(ThisTask == 0)
    {
      fclose(fd);
      if(!(fd = fopen(fname, "r+")))
	{
	  printf("can't open file `%s'\n", fname);
	  endrun(113);
	}

      fseek(fd, 0, SEEK_CUR);
      fwrite(&counttot, sizeof(int), 1, fd);
      fclose(fd);
    }

  myfree(particles);
  myfree(countlist);
}
Esempio n. 11
0
int *__ierr;
int		buflen;
va_list		ap;

va_start(ap, unknown);
buf = unknown;
if (_numargs() == NUMPARAMS+1) {
	buflen = va_arg(ap, int) / 8;		/* The length is in bits. */
}
count =		va_arg(ap, int *);
datatype =	va_arg(ap, MPI_Datatype*);
dest =		va_arg(ap, int *);
tag =		va_arg(ap, int *);
comm =		va_arg(ap, MPI_Comm*);
__ierr =	va_arg(ap, int *);
*__ierr = MPI_Ssend(MPIR_F_PTR(buf),*count,*datatype,*dest,*tag,*comm);
}
#else
 void mpi_ssend_( buf, count, datatype, dest, tag, comm, __ierr )
void             *buf;
int*count,*dest,*tag;
MPI_Datatype     *datatype;
MPI_Comm         *comm;
int *__ierr;
{
_fcd temp;
if (_isfcd(buf)) {
	temp = _fcdtocp(buf);
	buf = (void *)temp;
}
*__ierr = MPI_Ssend(MPIR_F_PTR(buf),*count,*datatype,*dest,*tag,*comm);
int Stg_MPI_Ssend( char* file, int line, void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm ) {
	Stream* stream = Journal_Register( Info_Type, "mpi" );
	Journal_Printf( stream, "%s %d, rank %d MPI_Ssend: tag = %d, count = %d, datatype = %d, dest = %d\n", file, line, Stg_Messaging_GetRank( comm ), tag, count, datatype, dest );
	return MPI_Ssend( buf, count, datatype, dest, tag, comm );
}
Esempio n. 13
0
int main(int argc, char * argv[]) {
	int world_size;
	int error;
	int n;
	int T_todo;
	int i,j,k,l;
	int dim;
	int type;
	int my_rank;
	double ratio;
	int *spin_out, *spin_in;
	int dest, recv;
	int swap;
	double r;
	int flips;
	int swap_attempts, swap_success;
	double *Es ,*Ts, *E_out, *T_out;
	double *Rs, *R_out;
	double e_recv, e_mine;
	double T_max, T_min, T_steps, T_curr, t_recv;
	spintype *s;
	MPI_Status status_info;
	double coupl[3] = {-1,-1,-1};
	coupling = coupl;
	
	/* Get in Command line Args */
	if (argc != 8) {
		printf("Usage: a.out N dim type T_Min T_step T_max Flips\n");
		exit(EXIT_FAILURE);
	}
	n = atoi(argv[1]);
	dim = atoi(argv[2]);
	type = atoi(argv[3]);
	T_min = atof(argv[4]);
	T_steps = atof(argv[5]);
	T_max = atof(argv[6]);
	flips = atoi(argv[7]);
	
	DEBUGLINE printf("Passed: %d %d %d %lf %lf %lf\n", n, dim, type, T_min, T_steps, T_max);
	
	/* Allocate S */
	s = malloc(sizeof(spintype)*pow(n,dim));
	spin_out = malloc(sizeof(int)* pow(n,dim));
	spin_in = malloc(sizeof(int) * pow(n,dim));
	if (s == NULL || spin_out == NULL || spin_in == NULL) {
		printf("Didn't get the memory for S :( \n");
		exit(EXIT_FAILURE);
	}
	/* Setup */
	switch (type) {
		case 1:
			setupSqrSystem(s,n, dim);
			break;
		case 2:
			setupTriSystem(s,n, dim);
			break;
		default:
			setupTriSystem(s,n, dim);
	}

	initSpins(s, n, dim);
	MPI_Init(&argc, &argv);
	//Find out how big the world is
	swap_attempts =0;
	swap_success =0;
	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
	T_todo = (int)ceil(((T_max - T_min)/T_steps)/world_size);
	DEBUGLINE printf("EACH HAS %d to do\n", T_todo);
	if (world_size == 1) {
		printf("Only got 1 processor... Bailing out\n");
		//exit(EXIT_FAILURE);
	}
	
	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
	if(my_rank ==0) 
		printf("#Got %d processors \n", world_size);
	
	Es = malloc(sizeof(double)*T_todo);
	Ts = malloc(sizeof(double)*T_todo);
	Rs = malloc(sizeof(double)*T_todo);
	
	if (!Es || !Ts) {
		printf("Didn't get enough memory");
		exit(EXIT_FAILURE);
	}
	
	if (my_rank ==0) {
		T_out =  malloc(sizeof(double) *(world_size*T_todo+GOOD_MEASURE));
		for(j=0; j<world_size;j++) {
			for (i = 0; i < T_todo; i++) {
				T_out[i+(j*T_todo)] = T_min + (world_size*i +j)* T_steps;
			//	printf("#%d = %lf\n",i+j*T_todo, T_out[i+j*T_todo]);
			}
		}
	}	
//	DEBUGLINE printf("Starting scatter\n", my_rank, j,T_curr);	
	MPI_Scatter(T_out, T_todo, MPI_DOUBLE, Ts, T_todo, MPI_DOUBLE, 0, MPI_COMM_WORLD);
//	DEBUGLINE printf("%d, Finished Scatter\n has %d to do", my_rank ,T_todo);
//	for(i=0; i < T_todo; i++)
//		printf("%d: %lf\n", my_rank, Ts[i]);
//	//DEBUGLINE printf("%d entering main loop\n", my_rank);
	for (l = 0; l < T_todo; l++) {
		T_curr = Ts[l];
	//	printf("#%d: Running Metroprolis at %lf\n", my_rank, T_curr);
		swap_success=0;
		swap_attempts=0;
		for ( j =0; j < SWAPS; j ++) {
			//DEBUGLINE printf("#%d: Running Metroprolis run %d at %lf\n", my_rank, j,T_curr);
			metropolis(s, n, dim, flips, T_curr, &ratio, 0);
			for (k = 0; k < 2; k++) {
				
				/* Prepare spins for transport */
				for (i =0; i <pow(n,dim); i++) 
						spin_out[i] = s[i].s;
				dest = my_rank-1;
				recv =  my_rank +1;
		
				e_mine = energy_calc(s,n,dim,0.0);
				
				if ((my_rank+1) %2 == k) {
					
					if(dest >=0 ) {
						swap_attempts++;
					//	DEBUGLINE printf("%d: has energy %lf, Partner: %d\n", my_rank, e_mine,dest);
						
						
						MPI_Recv(&t_recv, 1, MPI_DOUBLE, dest, TEMP, MPI_COMM_WORLD, &status_info);
						MPI_Recv(&e_recv, 1, MPI_DOUBLE, dest, ENERGY, MPI_COMM_WORLD, &status_info);
						
						
					//	DEBUGLINE printf("%d: ....Parnter answered\n", my_rank);
						r = rand();
						r = (double)r/RAND_MAX;
						if (r < exp((1/(kb*T_curr) - 1/(kb*t_recv))*(e_mine - e_recv))) {
							swap_success++;
							swap = 1;
							
							MPI_Ssend(&swap, 1, MPI_INT, dest, SWAP, MPI_COMM_WORLD);
						//	DEBUGLINE printf("%d: sending to %d\n", my_rank, dest);
							
							MPI_Ssend(spin_out, pow(n,dim), MPI_INT, dest, HIGH_T, MPI_COMM_WORLD);
						//	DEBUGLINE printf("%d: Sent\n", my_rank);
							
							MPI_Recv(spin_in, pow(n,dim), MPI_INT, dest, LOW_T, MPI_COMM_WORLD, &status_info);
						} else { 
							swap = 0;
							//DEBUGLINE printf("%d is at %lf has just rejected %d  at %lf\n", my_rank, T_curr, dest, t_recv);
							MPI_Ssend(&swap, 1, MPI_INT, dest, SWAP, MPI_COMM_WORLD);
						}
						
					}
					
					
				} else {
					swap=0;
					if (recv <world_size) {
					  swap_attempts++;
						MPI_Ssend(&T_curr, 1, MPI_DOUBLE, recv, TEMP, MPI_COMM_WORLD);
			//			DEBUGLINE printf("%d: has energy %lf, Partner %d\n", my_rank, e_mine,recv);
						MPI_Ssend(&e_mine, 1, MPI_DOUBLE, recv, ENERGY, MPI_COMM_WORLD);
			//			DEBUGLINE printf("%d: ....Parnter answered\n", my_rank);
			//			DEBUGLINE("%d: Waiting for Swap confirmation......\n", my_rank);
						MPI_Recv(&swap, 1, MPI_INT, recv, SWAP, MPI_COMM_WORLD, &status_info);
			//			DEBUGLINE printf("%d: ....Swap details recieved\n", my_rank);
						if(swap == 1) {
							swap_success++;
							
				//			DEBUGLINE printf("%d: Waiting for data from %d\n", my_rank, recv);
							MPI_Recv(spin_in, pow(n,dim), MPI_INT, recv, HIGH_T, MPI_COMM_WORLD, &status_info);
				//			DEBUGLINE printf("%d: Swapping\n", my_rank);
				//			DEBUGLINE printf("%d: Recieved\n", my_rank);
							MPI_Ssend(spin_out, pow(n,dim), MPI_INT, recv, LOW_T, MPI_COMM_WORLD);
						}
					}
						
				}
				/* Put new spins into our system */
				if (swap==1) 
				  {
				  for (i =0; i < pow(n,dim); i++)
				    s[i].s = spin_in[i];
				  }
				}
				metropolis(s, n, dim, flips, T_curr, &ratio, 0);
				
			
		}
		metropolis(s, n, dim, flips*10, T_curr, &ratio, 0);
		Es[l] = energy_calc(s, n, dim, 0.0);
		Rs[l] = (double) swap_success;
		//Rs[l] = (double) (my_rank==1 || my_rank==2)? (swap_success/2.0):swap_success;
		Rs[l] = (double) Rs[l]/swap_attempts;
		if (Rs[l] ==1 ) {
			printf("#%d: has swapped every time at %lf\n", my_rank, T_curr);
		}
	//	if (Es[l] == 0)
		//	printf("%d: Zero energy\n", my_rank);
		Ts[l] = T_curr;
	}	
	//if (my_rank ==0) {
			k = world_size*T_todo;
			E_out = malloc(sizeof(double) *(world_size*T_todo+GOOD_MEASURE));
			R_out = malloc(sizeof(double) *(world_size*T_todo+GOOD_MEASURE));
			
		//}
		
		DEBUGLINE printf("#GATHERING Es\n");
		MPI_Gather(Es, T_todo, MPI_DOUBLE, E_out, T_todo, MPI_DOUBLE, 0, MPI_COMM_WORLD);
		DEBUGLINE printf("#GATHERING Ts\n");
		MPI_Gather(Ts, T_todo, MPI_DOUBLE, T_out, T_todo, MPI_DOUBLE, 0, MPI_COMM_WORLD);
		MPI_Gather(Rs, T_todo, MPI_DOUBLE, R_out, T_todo, MPI_DOUBLE, 0, MPI_COMM_WORLD);
		if (my_rank == 0) {
			for(i =0; i < T_todo*world_size; i++)
			printf("%lf\t%lf\t%lf\n", T_out[i], E_out[i], R_out[i]);
		}
	
		printf("#%d: My ratio was: %lf\n", my_rank, (double)swap_success/swap_attempts);
	
	
	
	MPI_Finalize();
	return(0);
}
Esempio n. 14
0
int main( int argc, char **argv )
{
    MPI_Comm comm;
    MPI_Request r[MAX_REQ];
    MPI_Status  s[MAX_REQ];
    int msgsize, maxmsg, root, i, j, size, rank, err = 0, msgcnt, toterr;
    int *sbuf, *rbuf;

    MPI_Init( &argc, &argv );
    
    comm = MPI_COMM_WORLD;

    MPI_Comm_size( comm, &size );
    MPI_Comm_rank( comm, &rank );

    if (size < 2) {
	printf( "This test requires at least 2 processors\n" );
	MPI_Abort( comm, 1 );
    }

    /* First, try large blocking sends to root */
    root = 0;
    
    maxmsg =  MAX_MSG;
    msgsize = 128;
    msgcnt  = MAX_MSG_CNT;
    if (rank == root && verbose) printf( "Blocking sends: " );
    while (msgsize <= maxmsg) {
	if (rank == root) {
	    if (verbose) { printf( "%d ", msgsize ); fflush( stdout ); }
	    rbuf = (int *)malloc( msgsize * sizeof(int) );
	    if (!rbuf) {
		printf( "Could not allocate %d words\n", msgsize );
		MPI_Abort( comm, 1 );
	    }
	    for (i=0; i<size; i++) {
		if (i == rank) continue;
		for (j=0; j<msgcnt; j++) {
		    SetupRdata( rbuf, msgsize );
		    MPI_Recv( rbuf, msgsize, MPI_INT, i, 2*i, comm, s );
		    err += CheckData( rbuf, msgsize, 2*i, s );
		}
	    }
	    free( rbuf );
	}
	else {
	    sbuf = (int *)malloc( msgsize * sizeof(int) );
	    if (!sbuf) {
		printf( "Could not allocate %d words\n", msgsize );
		MPI_Abort( comm, 1 );
	    }
	    SetupData( sbuf, msgsize, 2*rank );
	    for (j=0; j<msgcnt; j++) 
		MPI_Send( sbuf, msgsize, MPI_INT, root, 2*rank, comm );
	    free( sbuf );
	}
	msgsize *= 4;
    }
    if (rank == 0 && verbose) { printf( "\n" ); fflush( stdout ); }

    /* Next, try unexpected messages with Isends */
    msgsize = 128;
    maxmsg  = MAX_MSG;
    msgcnt  = MAX_REQ;
    if (rank == root && verbose) printf( "Unexpected recvs: " );
    while (msgsize <= maxmsg) {
	if (rank == root) {
	    if (verbose) { printf( "%d ", msgsize ); fflush( stdout ); }
	    rbuf = (int *)malloc( msgsize * sizeof(int) );
	    if (!rbuf) {
		printf( "Could not allocate %d words\n", msgsize );
		MPI_Abort( comm, 1 );
	    }
	    MPI_Barrier( comm );
	    for (i=0; i<size; i++) {
		if (i == rank) continue;
		for (j=0; j<msgcnt; j++) {
		    SetupRdata( rbuf, msgsize );
		    MPI_Recv( rbuf, msgsize, MPI_INT, i, 2*i, comm, s );
		    err += CheckData( rbuf, msgsize, 2*i, s );
		}
	    }
	    free( rbuf );
	}
	else {
	    sbuf = (int *)malloc( msgsize * sizeof(int) );
	    if (!sbuf) {
		printf( "Could not allocate %d words\n", msgsize );
		MPI_Abort( comm, 1 );
	    }
	    SetupData( sbuf, msgsize, 2*rank );
	    for (j=0; j<msgcnt; j++) {
		MPI_Isend( sbuf, msgsize, MPI_INT, root, 2*rank, comm, &r[j] );
	    }
	    MPI_Barrier( comm );
	    MPI_Waitall( msgcnt, r, s );
	    free( sbuf );
	}
	msgsize *= 4;
    }
    if (rank == 0 && verbose) { printf( "\n" ); fflush( stdout ); }

    /* Try large synchronous blocking sends to root */
    root = 0;
    
    msgsize = 128;
    maxmsg  = MAX_MSG;
    if (rank == root && verbose) printf( "Synchronous sends: " );
    while (msgsize <= maxmsg) {
	if (rank == root) {
	    if (verbose) { printf( "%d ", msgsize ); fflush( stdout ); }
	    rbuf = (int *)malloc( msgsize * sizeof(int) );
	    if (!rbuf) {
		printf( "Could not allocate %d words\n", msgsize );
		MPI_Abort( comm, 1 );
	    }
	    for (i=0; i<size; i++) {
		if (i == rank) continue;
		for (j=0; j<msgcnt; j++) {
		    SetupRdata( rbuf, msgsize );
		    MPI_Recv( rbuf, msgsize, MPI_INT, i, 2*i, comm, s );
		    err += CheckData( rbuf, msgsize, 2*i, s );
		}
	    }
	    free( rbuf );
	}
	else {
	    sbuf = (int *)malloc( msgsize * sizeof(int) );
	    if (!sbuf) {
		printf( "Could not allocate %d words\n", msgsize );
		MPI_Abort( comm, 1 );
	    }
	    SetupData( sbuf, msgsize, 2*rank );
	    for (j=0; j<msgcnt; j++) 
		MPI_Ssend( sbuf, msgsize, MPI_INT, root, 2*rank, comm );
	    free( sbuf );
	}
	msgsize *= 4;
    }
    if (rank == 0 && verbose) { printf( "\n" ); fflush( stdout ); }

    MPI_Allreduce( &err, &toterr, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    
    if (rank == 0) {
	if (toterr == 0) printf( "No errors\n" );
	else printf( "!! found %d errors\n", toterr );
    }
    if (toterr) {
	printf( "!! found %d errors on processor %d\n", err, rank );
    }

    MPI_Finalize( );
    return 0;
}
Esempio n. 15
0
int main( int argc, char **argv)
{
    int rank; /* My Rank (0 or 1) */
    int act_size = 0;
    int flag, np, rval, i;
    int buffer[SIZE];
    double t0;
    char *Current_Test = NULL;
    MPI_Status status, status1, status2;
    int count1, count2;
    int sizes[4];

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size( MPI_COMM_WORLD, &np );
    /*if (np != 2) {
        fprintf(stderr, "*** This program uses exactly 2 processes! ***\n");
        MPI_Abort( MPI_COMM_WORLD, 1 );
        }*/

    sizes[0] = 0;
    sizes[1] = 1;
    sizes[2] = 1000;
    sizes[3] = SIZE;
/*    for (i = 0; i < 4; i++ ) { */
    for (i = 1; i < 2; i++ ) {
	act_size = sizes[i];
        if (rank == src) { 
            Generate_Data(buffer, SIZE);
            MPI_Recv( buffer, 0, MPI_INT, dest, 0, MPI_COMM_WORLD, &status );
            MPI_Send( buffer, 0, MPI_INT, dest, 0, MPI_COMM_WORLD );
            MPI_Ssend( buffer, act_size, MPI_INT, dest, 1, MPI_COMM_WORLD );
            MPI_Ssend( buffer, act_size, MPI_INT, dest, 2, MPI_COMM_WORLD );
            
        } else if (rank == dest) {
            Test_Init("ssendtest", rank);
            /* Test 1 */
            Current_Test = "Ssend Test (Synchronous Send -> Normal Recieve)";
            MPI_Send( buffer, 0, MPI_INT, src, 0, MPI_COMM_WORLD );
            MPI_Recv( buffer, 0, MPI_INT, src, 0, MPI_COMM_WORLD, &status );
            t0 = MPI_Wtime();
            flag = 0;
	    /* This test depends on a working wtime.  Make a simple check */
	    if (t0 == 0 && MPI_Wtime() == 0) {
		fprintf( stderr, 
		 "MPI_WTIME is returning 0; a working value is needed\n\
for this test.\n" );
		Test_Failed(Current_Test);
		fprintf( stderr, "[%i] Aborting\n",rank );fflush(stderr);
		MPI_Abort( MPI_COMM_WORLD, 1 );
	    }
            while (MPI_Wtime() - t0 < MAX_TIME) {
                MPI_Iprobe( src, 2, MPI_COMM_WORLD, &flag, &status );
                if (flag) {
                    Test_Failed(Current_Test);
                    break;
                    }
                }
            if (!flag) 
                Test_Passed(Current_Test);
            MPI_Recv( buffer, act_size, MPI_INT, src, 1, MPI_COMM_WORLD, 
                     &status1 );
            MPI_Recv( buffer, act_size, MPI_INT, src, 2, MPI_COMM_WORLD, 
                     &status2 );
            
            MPI_Get_count( &status1, MPI_INT, &count1 );
            MPI_Get_count( &status2, MPI_INT, &count2 );
            if (count1 != act_size) {
                fprintf( stdout, 
                        "(1) Wrong count from recv of ssend: got %d (%d)\n", 
                        count1, act_size );
                }
            if (status1.MPI_TAG != 1) {
                fprintf( stdout, "(1) Wrong tag from recv of ssend: got %d\n", 
                        status1.MPI_TAG );
                }
            if (count2 != act_size) {
                fprintf( stdout, 
                        "(2) Wrong count from recv of ssend: got %d (%d)\n", 
                        count1, act_size );
                }
            if (status2.MPI_TAG != 2) {
                fprintf( stdout, "(2) Wrong tag from recv of ssend: got %d\n", 
                        status2.MPI_TAG );
                }

            }
Esempio n. 16
0
/* Main */
int main(int argc, char **argv)
{
	/* Initialize MPI and get basic info. */
    int comm_sz;
    int my_rank;   
	MPI_Init(NULL, NULL);
    MPI_Comm_size(MPI_COMM_WORLD, &comm_sz);
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    
    /* Reject non-powers-of-2 number of processors. */
    if (comm_sz == 0) {
    	MPI_Finalize();
    	exit(0);
    }
    int n = comm_sz;
  	while (n != 1) {
    	if (n%2 != 0) {
    		if (my_rank == 0) {
      			printf("This program only accepts numbers of processors that are powers of 2.\n");
      		}
      		MPI_Finalize();
      		exit(0);
      	}
    	n = n/2;
  	}

    /* Get array_size from command line (must be a power of 2 and greater than or equal to num_processors). */
    if (argc < 2) {
    	if (my_rank == 0) {
        	printf("Usage: mpiexec -n num_processors ./pmerge array_size\n");
    	}
    	MPI_Finalize();
        exit(0);
    }
    if (isdigit(argv[1][0]) == 0) {
    	if (my_rank == 0) {
        	printf("Usage: mpiexec -n num_processors ./pmerge array_size\n");
        }
        MPI_Finalize();
        exit(0);
    }
    int array_size = strtol(argv[1], NULL, 10);
    if (array_size < comm_sz) {
    	if (my_rank == 0) {
        	printf("array_size must be larger than num_processors\n");
    	}
    	MPI_Finalize();
        exit(0);
    }
    n = array_size;
  	while (n != 1) {
    	if (n%2 != 0) {
    		if (my_rank == 0) {
      			printf("This program only accepts array sizes that are powers of 2.\n");
      		}
      		MPI_Finalize();
      		exit(0);
      	}
    	n = n/2;
  	}
    
    /* Seed random number generator. */
    unsigned int seed = my_rank;
    
    /* Synchronize processes and get start time. */
    double my_start, my_end, my_elapsed, global_elapsed;
    MPI_Barrier(MPI_COMM_WORLD);
    my_start = MPI_Wtime();
    
    /* Broadcast array_size to all processors. */
    if (comm_sz > 1) {
    	MPI_Bcast(&array_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
    }
    
    /* Generate array_size/comm_sz numbers to fill individual arrays. */
    int merge_array[array_size], my_array[array_size], recv_array[array_size];
    int i;
    for (i=0; i<(array_size/comm_sz); i++) {
    	my_array[i] = (rand_r(&seed) % 99) + 1;
    }
    
    if (comm_sz == 1) {
    	/* Do serial mergesort. */
    	mergesort(my_array, array_size);
    	/* Print results. */
    	printf("Final sorted array:\n");
    	for (i=0; i<array_size; i++) {
    		printf(" %d", my_array[i]);
   		}
   		printf("\n");
    }
    else {
    	/* Sort individual arrays and transfer to merge_array. */
    	qsort(my_array, array_size/comm_sz, sizeof(int), cmpfunc);
    	for (i=0; i<(array_size/comm_sz); i++) {
   			merge_array[i] = my_array[i];
   		}
    
    	/* Gather individual arrays to process 0 and print. */
    	MPI_Gather(&merge_array, array_size/comm_sz, MPI_INT, &recv_array, array_size/comm_sz, MPI_INT, 0, MPI_COMM_WORLD);
   		if (my_rank == 0) {
    		printf("Individual sorted arrays:\n");
    		for (i=0; i<array_size; i++) {
    			if ((i%(array_size/comm_sz)) == 0 && i != 0) {
   					printf("\n");
   				}
    			printf(" %d", recv_array[i]);
   			}
   			printf("\n");
   		}
   	
   		/* Merge individual arrays, two at a time, into a single sorted array. */
   		double time;
   		int divide, my, recv;
   		for (time=0; time<ceil(log2((double)comm_sz)); time+=1.0) {
    		divide = (int)pow(2.0, (double)(time+1));
        	if (my_rank % divide == 0) {
        		/* Receive from process my_rank+(divide/2). */
            	MPI_Recv(&recv_array, (array_size/comm_sz)*pow(2,time), MPI_INT, my_rank+(divide/2), 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
            	/* Merge arrays. */
            	my = recv = 0;
            	for (i=0; i<(array_size/comm_sz)*pow(2,(time+1)); i++) {
                	if ((my >= (array_size/comm_sz)*pow(2,time) || recv_array[recv] < my_array[my]) && recv < (array_size/comm_sz)*pow(2,time)) {
                		merge_array[i] = recv_array[recv];
                		recv++;
                	}
                	else {
                		merge_array[i] = my_array[my];
                		my++;
                	}
            	}
            	/* Copy merge_array to my_array. */
            	for (i=0; i<(array_size/comm_sz)*pow(2,(time+1)); i++) {
                	my_array[i] = merge_array[i];
            	}
    		}
    		else if (my_rank % (divide/2) == 0) {
    			/* Send to process my_rank-(divide/2). */
        		MPI_Ssend(&merge_array, (array_size/comm_sz)*pow(2,time), MPI_INT, my_rank-(divide/2), 0, MPI_COMM_WORLD);
    		}
    	}
    
    	/* Print results. */
    	if (my_rank == 0) {
    		printf("Final sorted array:\n");
    		for (i=0; i<array_size; i++) {
    			printf(" %d", merge_array[i]);
   			}
   			printf("\n");
   		}
   	}
    
    /* Get end time. */
    my_end = MPI_Wtime();
    
    /* Calculate elapsed time. */
    my_elapsed = my_end - my_start;
    MPI_Reduce(&my_elapsed, &global_elapsed, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    if (my_rank == 0) {
    	printf("Elapsed time: %.3f milliseconds\n", global_elapsed*1000);
    }
    
    /* Finalize MPI and exit. */
    MPI_Finalize();
    exit(0);
}
Esempio n. 17
0
  inline
  void operator()( const VectorType & v ) const
  {
    typedef typename VectorType::value_type  scalar_type ;

    const Teuchos::MpiComm<int> & teuchos_mpi_comm = dynamic_cast< const Teuchos::MpiComm<int> & >( *comm );

    MPI_Comm mpi_comm = * teuchos_mpi_comm.getRawMpiComm();

    const int mpi_tag = 42 ;
    const unsigned vchunk = v.dimension_1();

    // Subvector for receives
    const std::pair<unsigned,unsigned> recv_range( count_owned , count_owned + count_receive );
    const VectorType recv_vector = Kokkos::subview< VectorType >( v , recv_range );

    std::vector< MPI_Request > recv_request( recv_msg.dimension_0() , MPI_REQUEST_NULL );

    { // Post receives
      scalar_type * ptr =
        ReceiveInPlace ? recv_vector.ptr_on_device() : host_recv_buffer.ptr_on_device();

      for ( size_t i = 0 ; i < recv_msg.dimension_0() ; ++i ) {
        const int proc  = recv_msg(i,0);
        const int count = recv_msg(i,1) * vchunk ;

        MPI_Irecv( ptr , count * sizeof(scalar_type) , MPI_BYTE ,
                   proc , mpi_tag , mpi_comm , & recv_request[i] );

        ptr += count ;
      }
    }

    MPI_Barrier( mpi_comm );

    { // Pack and send 
      const Pack pack( send_nodeid , v , send_buffer );

      Kokkos::deep_copy( host_send_buffer , send_buffer );

      scalar_type * ptr = host_send_buffer.ptr_on_device();

      for ( size_t i = 0 ; i < send_msg.dimension_0() ; ++i ) {
        const int proc  = send_msg(i,0);
        const int count = send_msg(i,1) * vchunk ;

        // MPI_Ssend blocks until
        // (1) a receive is matched for the message and
        // (2) the send buffer can be re-used.
        //
        // It is suggested that MPI_Ssend will have the best performance:
        // http://www.mcs.anl.gov/research/projects/mpi/sendmode.html .

        MPI_Ssend( ptr ,
                   count * sizeof(scalar_type) , MPI_BYTE ,
                   proc , mpi_tag , mpi_comm );

        ptr += count ;
      }
    }

    // Wait for receives and verify:

    for ( size_t i = 0 ; i < recv_msg.dimension_0() ; ++i ) {
      MPI_Status recv_status ;
      int recv_which = 0 ;
      int recv_size  = 0 ;

      MPI_Waitany( recv_msg.dimension_0() , & recv_request[0] , & recv_which , & recv_status );

      const int recv_proc = recv_status.MPI_SOURCE ;

      MPI_Get_count( & recv_status , MPI_BYTE , & recv_size );

      // Verify message properly received:

      const int  expected_proc = recv_msg(recv_which,0);
      const int  expected_size = recv_msg(recv_which,1) * vchunk * sizeof(scalar_type);

      if ( ( expected_proc != recv_proc ) ||
           ( expected_size != recv_size ) ) {

        int local_rank  = 0 ;

        MPI_Comm_rank( mpi_comm , & local_rank );

        std::ostringstream msg ;
        msg << "VectorImport error:"
            << " P" << local_rank
            << " received from P" << recv_proc
            << " size "     << recv_size
            << " expected " << expected_size
            << " from P"    << expected_proc ;
        throw std::runtime_error( msg.str() );
      }
    }

    // Copy received data to device memory.

    if ( ! ReceiveInPlace ) { Kokkos::deep_copy( recv_vector , host_recv_buffer ); }
  }
Esempio n. 18
0
int main( int argc, char **argv )
{
  int size, rank, flag, i;
  int *buf1, *buf2, cnt;
  double t0;
  MPI_Status statuses[2];
  MPI_Request req[2];

  MPI_Init( &argc, &argv );
  MPI_Comm_size( MPI_COMM_WORLD, &size );
  MPI_Comm_rank( MPI_COMM_WORLD, &rank );

  if (size < 2) {
    printf( "This test requires at least 2 processors\n" );
    MPI_Abort( MPI_COMM_WORLD, 1 );
    return 1;
  }
  
  /* Large enough that almost certainly a rendezvous algorithm will be used
     by Issend.  buflimit.c will give you a more reliable value */
  cnt = 35000;

  /* Test:
     process 0                        process 1
                                      Irecv1
                                      Irecv2
     Sendrecv                         Sendrecv
     pause(2 sec)                     pause(2 sec)
     Issend2                          Waitall
     test(2) for 5 secs
     Ssend1
     Wait(2) if necessary

     If the test for Issend2 never succeeds, then the waitall appears to be
     waiting for req1 first.  By using Issend, we can keep the program from
     hanging.
  */
  buf1 = (int *)malloc( cnt * sizeof(int) );
  buf2 = (int *)malloc( cnt * sizeof(int) );
  if (!buf1 || !buf2) {
    printf( "Could not allocate buffers of size %d\n", cnt );
    MPI_Abort( MPI_COMM_WORLD, 1 );
    return 1;
  }

  for (i=0; i<cnt; i++) {
    buf1[i] = i;
    buf2[i] = i;
  }

  if (rank == 0) {
    MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, size - 1, 3, 
		  MPI_BOTTOM, 0, MPI_BYTE, size - 1, 3, 
		  MPI_COMM_WORLD, &statuses[0] );
    Pause( 2.0 );
    MPI_Issend( buf2, cnt, MPI_INT, size-1, 2, MPI_COMM_WORLD, &req[0] );
    t0 = MPI_Wtime();
    flag = 0;
    while (t0 + 5.0 > MPI_Wtime() && !flag) 
      MPI_Test( &req[0], &flag, &statuses[0] );
    MPI_Ssend( buf1, cnt, MPI_INT, size-1, 1, MPI_COMM_WORLD );
    if (!flag) {
      printf( 
    "*ERROR: MPI_Waitall appears to be waiting for requests in the order\n\
they appear in the request list\n" );
      MPI_Wait( &req[0], &statuses[0] );
    }
Esempio n. 19
0
/* combine sub domains in one -- checks & output */
void gather_check_output ( int myid, int numprocs, int *begin_slice,
                              int *end_slice, int *size_slice,
                                  int Nx, int iter, double h )
{
    int         i, ip;
    double      diff, difference, totaldiff;
    double      *final_phi;
    MPI_Status  recv_status;

    if( myid == 0 )
       fprintf( stderr,"\n Converged in %d iterations \n", iter );

                                        /* exchange the Ghost values */
    exchange ( myid, numprocs, begin_slice[myid], end_slice[myid] );

                                        /* check solution */
    diff = 0.0;
    for ( i=begin_slice[myid] ; i<=end_slice[myid] ; i++ )
      {
        if ( ibound[i] == 0 )
          {
            difference = 0.5 * ( phi[i-1] - 2*phi[i] + phi[i+1]
                                   - h*h*source[i] );
            if ( fabs( difference ) > diff )
                           diff = fabs( difference );
          }
      }
                                        /* find max amongst processes */
    MPI_Allreduce ( &diff, &totaldiff, 1, MPI_DOUBLE,
                                            MPI_MAX, MPI_COMM_WORLD );
    if ( myid == 0 )
        fprintf( stderr," Largest error: %e \n\n", totaldiff );


                                        /* gather pieces of phi */
    if ( myid == 0 )
      {
                                        /* space to gather final phi */
         final_phi = (double *)malloc( Nx*sizeof(double) );

                                        /* transfer own sub-domain */
                                        /* piece of phi            */
         for ( i=begin_slice[myid]; i<=end_slice[myid]; i++ )
           {
             final_phi[i] = phi[i];
           }
                                        /* receives pieces of    */
                                        /* phi from sub-domains  */
         for ( ip=1 ; ip<numprocs ; ip++ )
           {
              MPI_Recv( &final_phi[begin_slice[ip]], 
                      size_slice[ip], MPI_DOUBLE, ip, 229,
                                  MPI_COMM_WORLD, &recv_status); 
           }
                                        /* output results */
         for ( i=0; i<Nx ; i++ )
                   printf( " %f %f \n", i*h, final_phi[i] );

	                                /* free up memory space */
         free( (char *) final_phi );
      }
                                        /* nodes send phi to node 0 */
    else
      {
         MPI_Ssend( &phi[begin_slice[myid]], size_slice[myid], 
                       MPI_DOUBLE, 0, 229, MPI_COMM_WORLD );
      }

}
Esempio n. 20
0
int do_file(const char *key, size_t keylen, const FileInfo *fi)
{
    struct timespec tv1;
    clock_gettime(CLOCK_MONOTONIC, &tv1);

    int my_st = rank2st[mpi_rank];
    int P = GET_P(fi->locations);
    if (P == NO_P
            || P == rebuild_target
            || TEST_BIT(fi->locations, rebuild_target) == 0)
        return 0;

    hs.storage_target = my_st;
    hs.sample = &pr_sample;

    if (helper == my_st) {
        /* Send fi, key to rebuild_target so it knows whats up */
        MPI_Ssend((void*)fi, sizeof(FileInfo), MPI_BYTE, st2rank[rebuild_target], 0, MPI_COMM_WORLD);
        MPI_Ssend((void*)key, keylen, MPI_BYTE, st2rank[rebuild_target], 0, MPI_COMM_WORLD);
    }
    /* The rank that holds the P block needs read from parity in stead of
     * chunks, and the rebuild target has to write to chunks rather than
     * parity. If the rebuild target had the original P block, we don't want to
     * switch - we just want to recalculate it. */
    const char *load_pat = "         chunks";
    const char *save_pat = "         parity";
    if ((P != rebuild_target) && (P == my_st || rebuild_target == my_st)) {
        const char *tmp = load_pat;
        load_pat = save_pat;
        save_pat = tmp;
    }
    FileInfo mod_fi = *fi;
    if (P != rebuild_target) {
        mod_fi.locations |= (1 << P);
        mod_fi.locations &= ~(1 << rebuild_target);
        mod_fi.locations = WITH_P(mod_fi.locations, (uint64_t)rebuild_target);
    }
    TaskInfo ti = { load_pat, save_pat, (P != rebuild_target), P };
    int report = process_task(&hs, key, &mod_fi, ti);
#if 0
#define FIRST_8_BITS(x)     ((x) & 0x80 ? 1 : 0), ((x) & 0x40 ? 1 : 0), \
      ((x) & 0x20 ? 1 : 0), ((x) & 0x10 ? 1 : 0), ((x) & 0x08 ? 1 : 0), \
      ((x) & 0x04 ? 1 : 0), ((x) & 0x02 ? 1 : 0), ((x) & 0x01 ? 1 : 0) 
    int locs = mod_fi.locations & 0xFF;
    int cP = GET_P(mod_fi.locations);
    if (rank2st[mpi_rank] == rebuild_target)
    printf("process_task(%d, '%s', %d%d%d%d%d%d%d%d, op=%d, np=%d, '%s', '%s')\n", my_st, key, FIRST_8_BITS(locs), P, cP, load_pat, save_pat);
#endif

    struct timespec tv2;
    clock_gettime(CLOCK_MONOTONIC, &tv2);
    double dt = (tv2.tv_sec - tv1.tv_sec) * 1.0
        + (tv2.tv_nsec - tv1.tv_nsec) * 1e-9;
    if (report) {
        pr_sample.dt += dt;
        pr_sample.nfiles += 1;
    }
    if (pr_sample.dt >= 1.0) {
        pr_add_tmp_to_total(&pr_sample);
        pr_report_progress(&pr_sender, pr_sample);
        pr_clear_tmp(&pr_sample);
    }
    return 0;
}
Esempio n. 21
0
int main(int argc, char *argv[])
{
    int rank, size;
    int provided;
    int buffer[1];
    MPI_Comm comm1, comm2, comm4;

    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    /* Check that we're multi-threaded */
    if (provided != MPI_THREAD_MULTIPLE) {
        if (rank == 0) {
            printf
                ("MPI_Init_thread must return MPI_THREAD_MULTIPLE in order for this test to run.\n");
            fflush(stdout);
        }
        MPI_Finalize();
        return 1;
    }

    /* The test is this:
     * The main thread on ODD processors tells the other thread to start
     * a comm dup(on comm2), then starts a comm dup(on comm1) after a delay.
     * The main thread on even processors starts a comm dup(on comm1)
     *
     * The second thread on ODD processors waits until it gets a message
     * (from the same process) before starting the comm dup on comm2.
     */

    /* Create two communicators */
    MPI_Comm_dup(MPI_COMM_WORLD, &comm1);
    MPI_Comm_dup(MPI_COMM_WORLD, &comm2);

    /* Start a thread that will perform a dup comm2 */
    MTest_Start_thread(dup_thread, (void *) &comm2);

    /* If we're odd, send to our new thread and then delay */
    if (rank & 0x1) {
        MPI_Ssend(buffer, 0, MPI_INT, rank, 0, MPI_COMM_WORLD);
        MTestSleep(1);
    }
    MPI_Comm_dup(comm1, &comm4);

    /* Tell the threads to exit after we've created our new comm */
    MPI_Barrier(comm4);
    MPI_Ssend(buffer, 0, MPI_INT, rank, 1, MPI_COMM_WORLD);
    MPI_Recv(buffer, 0, MPI_INT, rank, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);

    MTest_Join_threads();

    MPI_Comm_free(&comm4);
    MPI_Comm_free(&comm1);
    MPI_Comm_free(&comm2);

    MTest_Finalize(0);
    MPI_Finalize();
    return 0;
}
Esempio n. 22
0
File: red.c Progetto: arkuzmin/ppp
int main (int argc,char **argv)
{
	MPI_Status status;

	int rank, size;
	struct
	{
		int value;
		int rank;
	} num, max, rcvd;

	MPI_Init(&argc,&argv);
	MPI_Comm_rank (MPI_COMM_WORLD,&rank);
	MPI_Comm_size (MPI_COMM_WORLD,&size);
	
	char *tracefile = getenv("TVTRACE");

	if( tracefile != NULL ){
		printf( "tv tracefile=%s\n", tracefile );
		MPI_Pcontrol(TRACEFILES, NULL, tracefile, 0);      
	}
	else{
		MPI_Pcontrol(TRACEFILES, NULL, "trace", 0);
	}
	MPI_Pcontrol(TRACELEVEL, 1, 1, 1);
	MPI_Pcontrol(TRACENODE, 1000000, 1, 1);

	num.value = my_random(rank);
	num.rank = rank;
	printf("Node %d: value = %d\n", num.rank, num.value);

	double sTime, eTime;
	sTime = MPI_Wtime();
	MPI_Pcontrol(TRACEEVENT, "entry", 2, 0, "");
	
	MPI_Reduce(&num, &max, 1, MPI_2INT, MPI_MAXLOC, 0, MPI_COMM_WORLD);
	
	MPI_Pcontrol(TRACEEVENT, "exit", 2, 0, "");
	eTime = MPI_Wtime();
	
	MPI_Barrier( MPI_COMM_WORLD );
	
	MPI_Pcontrol(TRACEEVENT, "entry", 1, 0, "");
	if (rank == 0)
	{
		print_result("MPI_Reduce", max.rank, max.value, eTime - sTime);
		sTime = MPI_Wtime();
		max.value = num.value;
		max.rank = num.rank;
		int i;
		for(i = 1; i < size; i++)
		{
			MPI_Recv(&rcvd, 1, MPI_2INT, i, TAG, MPI_COMM_WORLD, &status);
			if (rcvd.value > max.value)
			{
				max.value = rcvd.value;
				max.rank = rcvd.rank;
			} 
		}
		eTime = MPI_Wtime();
		print_result("Send-receive", max.rank, max.value, eTime - sTime);
	}
	else
	{
		MPI_Ssend(&num, 1, MPI_2INT, 0, TAG, MPI_COMM_WORLD);
	}
	MPI_Pcontrol(TRACEEVENT, "exit", 1, 0, "");

#if 0


	if( !rank ){
		double *a,*b,*c, *c0;
		int i,i1,j,k;
		int ann;
		MPI_Status *st;
		MPI_Request *rq,rq1;
		rq = (MPI_Request*) malloc( (size-1)*sizeof(MPI_Request) );
		st = (MPI_Status*) malloc( (size-1)*sizeof(MPI_Status) );


		ann=an/size+((an%size)?1:0);
		//      printf("[%d]ann=%d\n", rank, ann );

		a=(double*) malloc(am*an*sizeof(double));
		b=(double*) malloc(am*bm*sizeof(double));
		c=(double*) malloc(an*bm*sizeof(double));
		for(i=0;i<am*an;i++)
		a[i]=rand()%301;
		for(i=0;i<am*bm;i++)
		b[i]=rand()%251;
		printf( "Data ready [%d]\n", rank );
		
		c0 = (double*)malloc(an*bm*sizeof(double));

		
		time = MPI_Wtime();  
		for (i=0; i<an; i++)
		for (j=0; j<bm; j++)
		{
			double s = 0.0;
			for (k=0; k<am; k++)
			s+= a[i*am+k]*b[k*bm+j];
			c0[i*bm+j] = s;
		} 
		time = MPI_Wtime() - time;
		printf("Time seq[%d] = %lf\n", rank, time );
		time_seq = time;

		MPI_Barrier( MPI_COMM_WORLD );
		time=MPI_Wtime();

		MPI_Bcast( b, am*bm, MPI_DOUBLE, 0, MPI_COMM_WORLD);
		printf( "Data Bcast [%d]\n", rank );

		for( i1=0, j=1; j<size; j++, i1+=ann*am ){
			printf( "Data to Send [%d] %016x[%4d] =>> %d\n", rank, a+i1, i1, j );
			MPI_Isend( a+i1, ann*am, MPI_DOUBLE, j, 101, MPI_COMM_WORLD, &rq1 );
			MPI_Request_free( &rq1 ); 
			printf( "Data Send [%d] =>> %d\n", rank, j );
		}
		printf( "Data Send [%d]\n", rank );
		
		MPI_Isend( a+i1, 1, MPI_DOUBLE, 0, 101, MPI_COMM_WORLD, &rq1 );
		MPI_Request_free( &rq1 ); 
		
		printf( "Data Send [%d] =>> %d\n", rank, j );


		for(i=(i1/am);i<an;i++)
		for(j=0;j<bm;j++){
			double s=0.0;
			for(k=0;k<am;k++)
			s+=a[i*am+k]*b[k*bm+j];
			c[i*bm+j]=s;
		}

		printf( "Job done  [%d]\n", rank );
		for( i1=0, j=1; j<size; j++, i1+=(ann*bm) ){
			printf( "Data to Recv [%d] %016x[%4d] =>> %d\n", rank, c+i1, i1/bm, j );
			MPI_Irecv( c+i1, ann*am, MPI_DOUBLE, j, 102, MPI_COMM_WORLD, rq+(j-1) );
		}         
		MPI_Waitall( size-1, rq, st );
		
		time=MPI_Wtime()-time;
		printf("time [%d]=%12.8lf\n",rank,time);
		time_par = time;

		printf( "Data collected [%d]\n", rank );
		
		time=MPI_Wtime();
		int ok = 1;
		for(i=0;i<an*bm;i++)
		if( c[i] != c0[i] ){
			ok = 0;
			printf( "Fail [%d %d] %lf != %lf\n", i/bm, i%bm, c[i], c0[i] );
			break;
		}
		time=MPI_Wtime()-time;
		if( ok ){
			printf( "Data verifeid [%d] time = %lf\n", rank, time );
			printf( "SpeedUp S(%d) = %14.10lf\n", size, time_seq/time_par );
			printf( "Efitncy E(%d) = %14.10lf\n", size, time_seq/(time_par*size) );
		}
		
	}
	else
	{
		int ann;
		double *a,*b,*c;
		MPI_Status st;
		int i,j,k;

		MPI_Pcontrol(TRACEEVENT, "entry", 0, 0, "");

		ann= an/size + ((an%size)?1:0);
		//      if(rank==1)
		//        printf("[%d]ann=%d = %d / %d \n", rank, ann, an, size );
		
		a=(double*)malloc(ann*am*sizeof(double));
		b=(double*)malloc(bm*am*sizeof(double));
		c=(double*)malloc(ann*bm*sizeof(double));
		printf( "Mem allocated [%d]\n", rank );

		
		MPI_Barrier( MPI_COMM_WORLD );
		MPI_Pcontrol(TRACEEVENT, "exit", 0, 0, "");
		time = MPI_Wtime();


		MPI_Pcontrol(TRACEEVENT, "entry", 1, 0, "");
		
		MPI_Bcast(b,am*bm,MPI_DOUBLE,0,MPI_COMM_WORLD);
		printf( "Data Bcast [%d]\n", rank );
		
		MPI_Recv( a, ann*am, MPI_DOUBLE, 0, 101, MPI_COMM_WORLD, &st);
		printf( "Data Recv [%d]\n", rank );
		
		MPI_Pcontrol(TRACEEVENT, "exit", 1, 0, "");
		
		MPI_Pcontrol(TRACEEVENT, "entry", 2, 0, "");
		for( i=0; i<ann; i++ )
		for(j=0;j<bm;j++){
			double s=0.0;
			
			for( k=0; k<am; k++ ){
				s+=a[i*am+k]*b[k*bm+j];
			}
			/*    
			if(1==rank){
			if(0==j){
				printf( "c[%d<%d %d] = %lf\n", i,ann,j, s );
			}
			}
*/
			c[i*bm+j]=s;
		}
		printf( "Job done  [%d]\n", rank );
		MPI_Pcontrol(TRACEEVENT, "exit", 2, 0, "");

		MPI_Pcontrol(TRACEEVENT, "entry", 3, 0, "");
		MPI_Send( c, ann*bm,  MPI_DOUBLE, 0, 102, MPI_COMM_WORLD);
		printf( "Data returned [%d]\n", rank );
		MPI_Pcontrol(TRACEEVENT, "exit", 3, 0, "");

		time=MPI_Wtime()-time;
		printf("time [%d]=%12.8lf\n",rank,time);
	}

#endif

	MPI_Finalize();
	return 0;
}
void _ImportersToolbox_SurfaceProcessCoupler_tracer_output( ImportersToolbox_SurfaceProcessCoupler* self ) {
   /**
     Go though the passive tracer swarm and output the global id and change in height 
     */

   MaterialPointsSwarm* ms=NULL;
   MaterialPoint *mp=NULL;
   
   int ierr, rank, nprocs;
   double globalID;
   int ii, lParticleCount, vertAxis;

   FILE* fPtr=NULL;

   MPI_Status status;
   const int FINISHED_WRITING_TAG = 100;
   int canExecute = 0;

   ms = self->pts;
   lParticleCount = ms->particleLocalCount;
   assert(ms);

   vertAxis = self->vertAxis;
   rank = self->context->rank;
   nprocs = self->context->nproc;

   /* check to see if existing uw_ascii output is there. If so delete */
   if( self->context->rank == 0 ) {
      fPtr = fopen( self->ascii_path, "r" );
      if(fPtr) {
         fclose(fPtr);
         Journal_Firewall( remove(self->ascii_path)==0, global_error_stream,
               "Error in %s: Try to delete file at '%s' but i couldn't\n", __func__, self->ascii_path);
      }
   }

   /* wait for go-ahead from process ranked lower than me, to avoid competition writing to file */
   if ( rank != 0  && canExecute == 0 ) {
      ierr=MPI_Recv( &canExecute, 1, MPI_INT, rank - 1, FINISHED_WRITING_TAG, ms->comm, &status );
   }

      /* open in append mode */
      fPtr = fopen( self->ascii_path, "a" );

      for( ii = 0 ; ii < lParticleCount; ii++ ) {
         mp = (MaterialPoint*)Swarm_ParticleAt( ms, ii );

         /* note we pass globalID as a (double*) because the SwarmVariable_ValueAt can only use that */
         SwarmVariable_ValueAt( self->SP_globalid_var, ii, (&globalID) );

         fprintf( fPtr, "%g %.15g\n", globalID, (mp->coord[vertAxis] - self->SP_tracer_height) );
      }

      /* close file */
      fclose(fPtr);

   /* confirms this processor is finshed */
   canExecute = 1;
   /* send go-ahead from process ranked lower than me, to avoid competition writing to file */
   if ( rank != nprocs - 1 ) {
          MPI_Ssend( &canExecute, 1, MPI_INT, rank + 1, FINISHED_WRITING_TAG, ms->comm );
   }
}
Esempio n. 24
0
int main( int argc, char *argv[] )
{
    int errs = 0;
    MPI_Comm comm;
    MPI_Request r[2];
    MPI_Status  s[2];
    int         indices[2], outcount;
    int errval, errclass;
    int b1[20], b2[20], rank, size, src, dest, i, j;

    MTest_Init( &argc, &argv );

    /* Create some receive requests.  tags 0-9 will succeed, tags 10-19 
       will be used for ERR_TRUNCATE (fewer than 20 messages will be used) */
    comm = MPI_COMM_WORLD;

    MPI_Comm_rank( comm, &rank );
    MPI_Comm_size( comm, &size );

    src  = 1;
    dest = 0;
    if (rank == dest) {
	MPI_Errhandler_set( comm, MPI_ERRORS_RETURN );
	errval = MPI_Irecv( b1, 10, MPI_INT, src, 0, comm, &r[0] );
	if (errval) {
	    errs++;
	    MTestPrintError( errval );
	    printf( "Error returned from Irecv\n" );
	}
	errval = MPI_Irecv( b2, 10, MPI_INT, src, 10, comm, &r[1] );
	if (errval) {
	    errs++;
	    MTestPrintError( errval );
	    printf( "Error returned from Irecv\n" );
	}

	/* synchronize */
	errval = MPI_Recv(NULL, 0, MPI_INT, src, 10, comm, MPI_STATUS_IGNORE);
	if (errval) {
	    errs++;
	    MTestPrintError( errval );
	    printf( "Error returned from Recv\n" );
	}
	for (i=0; i<2; i++) {
	    s[i].MPI_ERROR = -1;
	}
	errval = MPI_Waitsome( 2, r, &outcount, indices, s );
	MPI_Error_class( errval, &errclass );
	if (errclass != MPI_ERR_IN_STATUS) {
	    errs++;
	    printf( "Did not get ERR_IN_STATUS in Waitsome.  Got %d.\n", errval );
	}
	else if (outcount != 2) {
	    errs++;
	    printf( "Wait returned outcount = %d\n", outcount );
	}
	else {
	    /* Check for success */
	    for (i=0; i<outcount; i++) {
		j = i;
		/* Indices is the request index */
		if (s[j].MPI_TAG < 10 && s[j].MPI_ERROR != MPI_SUCCESS) {
		    errs++;
		    printf( "correct msg had error class %d\n", 
			    s[j].MPI_ERROR );
		}
		else if (s[j].MPI_TAG >= 10 && s[j].MPI_ERROR == MPI_SUCCESS) {
		    errs++;
		    printf( "truncated msg had MPI_SUCCESS\n" );
		}
	    }
	}

    }
    else if (rank == src) {
	/* Send test messages, then send another message so that the test does
	   not start until we are sure that the sends have begun */
	MPI_Send( b1, 10, MPI_INT, dest, 0, comm );
	MPI_Send( b2, 11, MPI_INT, dest, 10, comm );

	/* synchronize */
	MPI_Ssend( NULL, 0, MPI_INT, dest, 10, comm );
    }

    MTest_Finalize( errs );
    MPI_Finalize();
    return 0;
  
}
Esempio n. 25
0
int main(int argc, char **argv)
{
    if (argc != 4)
    {
        fputs("We need 3 arguments\n", stdout);
        return 1;
    }

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &mpi_world_size);

    rebuild_target = atoi(argv[1]);
    const char *store_dir = argv[2];
    const char *data_file = argv[3];

    int ntargets = mpi_world_size - 1;
    if (ntargets > MAX_STORAGE_TARGETS)
        return 1;

    if (rebuild_target < 0 || rebuild_target > ntargets)
        return 1;
    helper = 1;
    while (helper == rebuild_target)
        helper += 1;
    if (helper == rebuild_target)
        return 1;

    PROF_START(total);
    PROF_START(init);

    int last_run_fd = -1;
    RunData last_run;
    memset(&last_run, 0, sizeof(RunData));
    if (mpi_rank == 0) {
        last_run_fd = open(data_file, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
        read(last_run_fd, &last_run, sizeof(RunData));
    }

    /* Create mapping from storage targets to ranks, and vice versa */
    Target targetIDs[MAX_STORAGE_TARGETS] = {{0,0}};
    Target targetID = {0,0};
    if (mpi_rank != 0)
    {
        int store_fd = open(store_dir, O_DIRECTORY | O_RDONLY);
        int target_ID_fd = openat(store_fd, "targetNumID", O_RDONLY);
        char targetID_s[20] = {0};
        read(target_ID_fd, targetID_s, sizeof(targetID_s));
        close(target_ID_fd);
        close(store_fd);
        targetID.id = atoi(targetID_s);
        targetID.rank = mpi_rank;
    }
    MPI_Gather(
            &targetID, sizeof(Target), MPI_BYTE,
            targetIDs, sizeof(Target), MPI_BYTE,
            0,
            MPI_COMM_WORLD);
    if (mpi_rank == 0) {
        if (last_run.ntargets != ntargets) {
            /* ERROR - new number of targets */
            assert(0);
        }
        for (int i = 0; i < ntargets; i++)
            targetIDs[i] = targetIDs[i+1];
        for (int i = 0; i < ntargets; i++)
            last_run.targetIDs[i].rank = -1;
        for (int i = 0; i < ntargets; i++) {
            Target target = targetIDs[i];
            int j = 0;
            int found = 0;
            for (; j < ntargets; j++)
                if (last_run.targetIDs[j].id == target.id) {
                    last_run.targetIDs[j] = target;
                    found = 1;
                }
            if (!found) {
                /* ERROR - new target introduced */
                printf(" > %d, %d\n", target.id, target.rank);
                assert(0);
            }
        }
        rank2st[0] = -1;
        for (int i = 0; i < ntargets; i++)
        {
            st2rank[i] = last_run.targetIDs[i].rank;
            rank2st[st2rank[i]] = i;
        }
    }
    MPI_Bcast(st2rank, sizeof(st2rank), MPI_BYTE, 0, MPI_COMM_WORLD);
    MPI_Bcast(rank2st, sizeof(rank2st), MPI_BYTE, 0, MPI_COMM_WORLD);

    PROF_END(init);

    if (mpi_rank == 0)
        printf("%d(rank=%d), %d(rank=%d)\n", rebuild_target, st2rank[rebuild_target], helper, st2rank[helper]);

    PROF_START(main_work);

    memset(&pr_sender, 0, sizeof(pr_sender));

    if (mpi_rank != 0 && rank2st[mpi_rank] != rebuild_target)
    {
        PersistentDB *pdb = pdb_init();
        pdb_iterate(pdb, do_file);
        pdb_term(pdb);

        if (rank2st[mpi_rank] == helper) {
            int dummy;
            MPI_Ssend((void*)&dummy, sizeof(dummy), MPI_BYTE, st2rank[rebuild_target], 0, MPI_COMM_WORLD);
        }
        pr_add_tmp_to_total(&pr_sample);
        pr_report_progress(&pr_sender, pr_sample);
        pr_report_done(&pr_sender);
    }
    else if (rank2st[mpi_rank] == rebuild_target)
    {
        int helper_rank = st2rank[helper];
        MPI_Status stat;
        int count;
        FileInfo fi;
        MPI_Recv(&fi, sizeof(FileInfo), MPI_BYTE, helper_rank, 0, MPI_COMM_WORLD, &stat);
        MPI_Get_count(&stat, MPI_BYTE, &count);
        while (count == sizeof(FileInfo)) {
            char key[200];
            MPI_Recv(key, sizeof(key), MPI_BYTE, helper_rank, 0, MPI_COMM_WORLD, &stat);
            int keylen;
            MPI_Get_count(&stat, MPI_BYTE, &keylen);
            key[keylen] = '\0';
            do_file(key, keylen, &fi);

            MPI_Recv(&fi, sizeof(FileInfo), MPI_BYTE, helper_rank, 0, MPI_COMM_WORLD, &stat);
            MPI_Get_count(&stat, MPI_BYTE, &count);
        }
        pr_add_tmp_to_total(&pr_sample);
        pr_report_progress(&pr_sender, pr_sample);
        pr_report_done(&pr_sender);
    }
    else if (mpi_rank == 0)
    {
        printf("st - total files   | data read     | data written  | disk I/O\n");
        pr_receive_loop(ntargets-1);
    }

    PROF_END(main_work);
    PROF_END(total);

    if (mpi_rank == 0) {
        printf("Overall timings: \n");
        printf("init       | %9.2f ms\n", 1e3*PROF_VAL(init));
        printf("main_work  | %9.2f ms\n", 1e3*PROF_VAL(main_work));
        printf("total      | %9.2f ms\n", 1e3*PROF_VAL(total));
    }

    MPI_Barrier(MPI_COMM_WORLD);
    char *iter = hs.corrupt;
    for (size_t i = 0; i < hs.corrupt_count; i++)
    {
        printf("Potentially corrupt chunk: '%s'\n", iter);
        iter += strlen(iter);
    }

    MPI_Finalize();
}
Esempio n. 26
0
//main program
int main(int argc, char *argv[])
{
	MPI_Init(&argc, &argv);
	int size, rank;
	long t1, t2;
	static int ranks[1] = { 0 };
	MPI_Request request1, request2, request3, request4;
	MPI_Status status, status1, status2, status3, status4;
	MPI_Group MPI_GROUP_WORLD, grprem;
	MPI_Comm commslave, newcomm;
	MPI_Comm commsp;
	MPI_Comm_group(MPI_COMM_WORLD, &MPI_GROUP_WORLD);
	MPI_Group_excl(MPI_GROUP_WORLD, 1, ranks, &grprem);
	MPI_Comm_create(MPI_COMM_WORLD, grprem, &commslave);
	MPI_Comm_size(MPI_COMM_WORLD, &size);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	printf("Node %d in %d is ready\n", rank, size);
	//Initialize
	double *grid0 = creat_grid(x_size, y_size, z_size, size, rank, GRID0);
	double *grid1 = creat_grid(x_size, y_size, z_size, size, rank, GRID1);
	MPI_Barrier(MPI_COMM_WORLD);
	if (size != 1)
	{
		if (rank == 0)
		{
			for (int i = 1; i < size; i++)
			{
				int len = (i == size - 1) ? (x_size / (size - 1)) : (x_size / (size - 1) + x_size % (size - 1));
				MPI_Ssend(grid0 + (i - 1)*x_size / (size - 1), len*y_size*z_size, MPI_DOUBLE, i, i, MPI_COMM_WORLD);
			}
		}
		else
		{
			for (int i = 1; i < size; i++)
			{
				if (rank == i)
				{
					int len = (i == size - 1) ? (x_size / (size - 1)) : (x_size / (size - 1) + x_size % (size - 1));
					MPI_Recv(grid0 + y_size*z_size, len*y_size*z_size, MPI_DOUBLE, 0, i, MPI_COMM_WORLD, &status);
				}
			}

		}
	}
	//Compute
	if (rank == 0) printf("Start computing...\n");
	if (rank != 0 && size > 1)
	{
		for (int t = 0; t < stepnum; t++)
		{
			compute(grid0, grid1, rank, size);
			//send right slice of data to next node, then receieve right slice of data form next node
			if (rank < size - 1)
			{
				MPI_Ssend(grid1 + (1 + x_size / (size - 1))*y_size*z_size,
					y_size*z_size, MPI_DOUBLE, rank + 1, rank, MPI_COMM_WORLD);
				MPI_Recv(grid1 + (1 + x_size / (size - 1))*y_size*z_size,
					y_size*z_size, MPI_DOUBLE, rank + 1, rank + 1, MPI_COMM_WORLD, &status1);
				//MPI_Wait(&request1, &status1);
				//MPI_Wait(&request2, &status2);
			}
			//receieve left slice of data from perior node, then send left slice of data to perior node
			if (rank > 1)
			{
				MPI_Recv(grid1, y_size*z_size, MPI_DOUBLE, rank - 1, rank - 1, MPI_COMM_WORLD, &status2);
				MPI_Ssend(grid1, y_size*z_size, MPI_DOUBLE, rank - 1, rank, MPI_COMM_WORLD);
				//MPI_Wait(&request3, &status3);
				//MPI_Wait(&request4, &status4);
			}
			double *temp;
			temp = grid0;
			grid0 = grid1;
			grid1 = temp;
			MPI_Barrier(commslave);
		}
	}
	else if (size == 1)
	{
		for (int t = 0; t < stepnum; t++)
		{
			compute(grid0, grid1, rank, size);
			double *temp;
			temp = grid0;
			grid0 = grid1;
			grid1 = temp;
		}
	}
	else { ; }
	MPI_Barrier(MPI_COMM_WORLD);
	printf("Rank %d finished computing!\n", rank);
	//Gather data form nodes to host
	if (size != 1)
	{
		if (stepnum % 2)
		{
			double *temp;
			temp = grid0;
			grid0 = grid1;
			grid1 = temp;
		}
		for (int i = 1; i < size; i++)
		{
			if (rank == i)
			{
				int len = (i == size - 1) ? (x_size / (size - 1)) : (x_size / (size - 1) + x_size % (size - 1));
				MPI_Ssend(grid0 + y_size*z_size, len*y_size*z_size, MPI_DOUBLE, 0, i, MPI_COMM_WORLD);
				//MPI_Wait(&request2, &status2);
			}
		}
		if (rank == 0)
		{
			for (int i = 1; i < size; i++)
			{
				int len = (i == size - 1) ? (x_size / (size - 1)) : (x_size / (size - 1) + x_size % (size - 1));
				MPI_Recv(grid1, len*y_size*z_size, MPI_DOUBLE, i, i, MPI_COMM_WORLD, &status3);
				//MPI_Wait(&request1, &status1);
			}
		}
	}
	MPI_Barrier(MPI_COMM_WORLD);
	if (rank == 0) printf("All work complete\n");
	MPI_Finalize();
	return 0;
}
Esempio n. 27
0
/*! This function reads a snapshot file and distributes the data it contains
 *  to tasks 'readTask' to 'lastTask'.
 */
void read_file(char *fname, int readTask, int lastTask)
{
  int blockmaxlen;
  int i, n_in_file, n_for_this_task, ntask, pc, offset = 0, task;
  int blksize1, blksize2;
  MPI_Status status;
  FILE *fd = 0;
  int nall;
  int type;
  char label[4];
  int nstart, bytes_per_blockelement, npart, nextblock, typelist[6];
  enum iofields blocknr;

#ifdef HAVE_HDF5
  char buf[500];
  int rank, pcsum;
  hid_t hdf5_file, hdf5_grp[6], hdf5_dataspace_in_file;
  hid_t hdf5_datatype, hdf5_dataspace_in_memory, hdf5_dataset;
  hsize_t dims[2], count[2], start[2];
#endif

#define SKIP  {my_fread(&blksize1,sizeof(int),1,fd);}
#define SKIP2  {my_fread(&blksize2,sizeof(int),1,fd);}

  if(ThisTask == readTask)
    {
      if(All.ICFormat == 1 || All.ICFormat == 2)
	{
	  if(!(fd = fopen(fname, "r")))
	    {
	      printf("can't open file `%s' for reading initial conditions.\n", fname);
	      endrun(123);
	    }

	  if(All.ICFormat == 2)
	    {
	      SKIP;
	      my_fread(&label, sizeof(char), 4, fd);
	      my_fread(&nextblock, sizeof(int), 1, fd);
	      printf("Reading header => '%c%c%c%c' (%d byte)\n", label[0], label[1], label[2], label[3],
		     nextblock);
	      SKIP2;
	    }

	  SKIP;
	  my_fread(&header, sizeof(header), 1, fd);
	  SKIP2;

	  if(blksize1 != 256 || blksize2 != 256)
	    {
	      printf("incorrect header format\n");
	      fflush(stdout);
	      endrun(890);
	    }
	}


#ifdef HAVE_HDF5
      if(All.ICFormat == 3)
	{
	  read_header_attributes_in_hdf5(fname);

	  hdf5_file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);

	  for(type = 0; type < 6; type++)
	    {
	      if(header.npart[type] > 0)
		{
		  sprintf(buf, "/PartType%d", type);
		  hdf5_grp[type] = H5Gopen(hdf5_file, buf);
		}
	    }
	}
#endif

      for(task = readTask + 1; task <= lastTask; task++)
	MPI_Ssend(&header, sizeof(header), MPI_BYTE, task, TAG_HEADER, MPI_COMM_WORLD);
    }
  else
    MPI_Recv(&header, sizeof(header), MPI_BYTE, readTask, TAG_HEADER, MPI_COMM_WORLD, &status);


  if(All.TotNumPart == 0)
    {
      if(header.num_files <= 1)
	for(i = 0; i < 6; i++)
	  header.npartTotal[i] = header.npart[i];

      All.TotN_gas = header.npartTotal[0] + (((long long) header.npartTotalHighWord[0]) << 32);

      for(i = 0, All.TotNumPart = 0; i < 6; i++)
	{
	  All.TotNumPart += header.npartTotal[i];
	  All.TotNumPart += (((long long) header.npartTotalHighWord[i]) << 32);
	}


      for(i = 0; i < 6; i++)
	All.MassTable[i] = header.mass[i];

      All.MaxPart = All.PartAllocFactor * (All.TotNumPart / NTask);	/* sets the maximum number of particles that may */
      All.MaxPartSph = All.PartAllocFactor * (All.TotN_gas / NTask);	/* sets the maximum number of particles that may 
									   reside on a processor */
      allocate_memory();

      if(RestartFlag == 2)
	All.Time = All.TimeBegin = header.time;
    }

  if(ThisTask == readTask)
    {
      for(i = 0, n_in_file = 0; i < 6; i++)
	n_in_file += header.npart[i];

      printf("\nreading file `%s' on task=%d (contains %d particles.)\n"
	     "distributing this file to tasks %d-%d\n"
	     "Type 0 (gas):   %8d  (tot=%6d%09d) masstab=%g\n"
	     "Type 1 (halo):  %8d  (tot=%6d%09d) masstab=%g\n"
	     "Type 2 (disk):  %8d  (tot=%6d%09d) masstab=%g\n"
	     "Type 3 (bulge): %8d  (tot=%6d%09d) masstab=%g\n"
	     "Type 4 (stars): %8d  (tot=%6d%09d) masstab=%g\n"
	     "Type 5 (bndry): %8d  (tot=%6d%09d) masstab=%g\n\n", fname, ThisTask, n_in_file, readTask,
	     lastTask, header.npart[0], (int) (header.npartTotal[0] / 1000000000),
	     (int) (header.npartTotal[0] % 1000000000), All.MassTable[0], header.npart[1],
	     (int) (header.npartTotal[1] / 1000000000), (int) (header.npartTotal[1] % 1000000000),
	     All.MassTable[1], header.npart[2], (int) (header.npartTotal[2] / 1000000000),
	     (int) (header.npartTotal[2] % 1000000000), All.MassTable[2], header.npart[3],
	     (int) (header.npartTotal[3] / 1000000000), (int) (header.npartTotal[3] % 1000000000),
	     All.MassTable[3], header.npart[4], (int) (header.npartTotal[4] / 1000000000),
	     (int) (header.npartTotal[4] % 1000000000), All.MassTable[4], header.npart[5],
	     (int) (header.npartTotal[5] / 1000000000), (int) (header.npartTotal[5] % 1000000000),
	     All.MassTable[5]);
      fflush(stdout);
    }


  ntask = lastTask - readTask + 1;


  /* to collect the gas particles all at the beginning (in case several
     snapshot files are read on the current CPU) we move the collisionless
     particles such that a gap of the right size is created */

  for(type = 0, nall = 0; type < 6; type++)
    {
      n_in_file = header.npart[type];

      n_for_this_task = n_in_file / ntask;
      if((ThisTask - readTask) < (n_in_file % ntask))
	n_for_this_task++;

      nall += n_for_this_task;
    }

  memmove(&P[N_gas + nall], &P[N_gas], (NumPart - N_gas) * sizeof(struct particle_data));
  nstart = N_gas;



  for(blocknr = 0; blocknr < IO_NBLOCKS; blocknr++)
    {
      if(blockpresent(blocknr))
	{
	  if(RestartFlag == 0 && blocknr > IO_U)
	    continue;		/* ignore all other blocks in initial conditions */

	  bytes_per_blockelement = get_bytes_per_blockelement(blocknr);

	  blockmaxlen = ((int) (All.BufferSize * 1024 * 1024)) / bytes_per_blockelement;

	  npart = get_particles_in_block(blocknr, &typelist[0]);

	  if(npart > 0)
	    {
	      if(ThisTask == readTask)
		{
		  if(All.ICFormat == 2)
		    {
		      SKIP;
		      my_fread(&label, sizeof(char), 4, fd);
		      my_fread(&nextblock, sizeof(int), 1, fd);
		      printf("Reading header => '%c%c%c%c' (%d byte)\n", label[0], label[1], label[2],
			     label[3], nextblock);
		      SKIP2;

		      if(strncmp(label, Tab_IO_Labels[blocknr], 4) != 0)
			{
			  printf("incorrect block-structure!\n");
			  printf("expected '%c%c%c%c' but found '%c%c%c%c'\n",
				 label[0], label[1], label[2], label[3],
				 Tab_IO_Labels[blocknr][0], Tab_IO_Labels[blocknr][1],
				 Tab_IO_Labels[blocknr][2], Tab_IO_Labels[blocknr][3]);
			  fflush(stdout);
			  endrun(1890);
			}
		    }

		  if(All.ICFormat == 1 || All.ICFormat == 2)
		    SKIP;
		}

	      for(type = 0, offset = 0; type < 6; type++)
		{
		  n_in_file = header.npart[type];
#ifdef HAVE_HDF5
		  pcsum = 0;
#endif
		  if(typelist[type] == 0)
		    {
		      n_for_this_task = n_in_file / ntask;
		      if((ThisTask - readTask) < (n_in_file % ntask))
			n_for_this_task++;

		      offset += n_for_this_task;
		    }
		  else
		    {
		      for(task = readTask; task <= lastTask; task++)
			{
			  n_for_this_task = n_in_file / ntask;
			  if((task - readTask) < (n_in_file % ntask))
			    n_for_this_task++;

			  if(task == ThisTask)
			    if(NumPart + n_for_this_task > All.MaxPart)
			      {
				printf("too many particles\n");
				endrun(1313);
			      }


			  do
			    {
			      pc = n_for_this_task;

			      if(pc > blockmaxlen)
				pc = blockmaxlen;

			      if(ThisTask == readTask)
				{
				  if(All.ICFormat == 1 || All.ICFormat == 2)
				    my_fread(CommBuffer, bytes_per_blockelement, pc, fd);
#ifdef HAVE_HDF5
				  if(All.ICFormat == 3)
				    {
				      get_dataset_name(blocknr, buf);
				      hdf5_dataset = H5Dopen(hdf5_grp[type], buf);

				      dims[0] = header.npart[type];
				      dims[1] = get_values_per_blockelement(blocknr);
				      if(dims[1] == 1)
					rank = 1;
				      else
					rank = 2;

				      hdf5_dataspace_in_file = H5Screate_simple(rank, dims, NULL);

				      dims[0] = pc;
				      hdf5_dataspace_in_memory = H5Screate_simple(rank, dims, NULL);

				      start[0] = pcsum;
				      start[1] = 0;

				      count[0] = pc;
				      count[1] = get_values_per_blockelement(blocknr);
				      pcsum += pc;

				      H5Sselect_hyperslab(hdf5_dataspace_in_file, H5S_SELECT_SET,
							  start, NULL, count, NULL);

				      switch (get_datatype_in_block(blocknr))
					{
					case 0:
					  hdf5_datatype = H5Tcopy(H5T_NATIVE_UINT);
					  break;
					case 1:
					  hdf5_datatype = H5Tcopy(H5T_NATIVE_FLOAT);
					  break;
					case 2:
					  hdf5_datatype = H5Tcopy(H5T_NATIVE_UINT64);
					  break;
					}

				      H5Dread(hdf5_dataset, hdf5_datatype, hdf5_dataspace_in_memory,
					      hdf5_dataspace_in_file, H5P_DEFAULT, CommBuffer);

				      H5Tclose(hdf5_datatype);
				      H5Sclose(hdf5_dataspace_in_memory);
				      H5Sclose(hdf5_dataspace_in_file);
				      H5Dclose(hdf5_dataset);
				    }
#endif
				}

			      if(ThisTask == readTask && task != readTask)
				MPI_Ssend(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, task, TAG_PDATA,
					  MPI_COMM_WORLD);

			      if(ThisTask != readTask && task == ThisTask)
				MPI_Recv(CommBuffer, bytes_per_blockelement * pc, MPI_BYTE, readTask,
					 TAG_PDATA, MPI_COMM_WORLD, &status);

			      if(ThisTask == task)
				{
				  empty_read_buffer(blocknr, nstart + offset, pc, type);

				  offset += pc;
				}

			      n_for_this_task -= pc;
			    }
			  while(n_for_this_task > 0);
			}
		    }
		}
	      if(ThisTask == readTask)
		{
		  if(All.ICFormat == 1 || All.ICFormat == 2)
		    {
		      SKIP2;
		      if(blksize1 != blksize2)
			{
			  printf("incorrect block-sizes detected!\n");
			  printf("Task=%d   blocknr=%d  blksize1=%d  blksize2=%d\n", ThisTask, blocknr,
				 blksize1, blksize2);
			  fflush(stdout);
			  endrun(1889);
			}
		    }
		}
	    }
	}
    }


  for(type = 0; type < 6; type++)
    {
      n_in_file = header.npart[type];

      n_for_this_task = n_in_file / ntask;
      if((ThisTask - readTask) < (n_in_file % ntask))
	n_for_this_task++;

      NumPart += n_for_this_task;

      if(type == 0)
	N_gas += n_for_this_task;
    }

  if(ThisTask == readTask)
    {
      if(All.ICFormat == 1 || All.ICFormat == 2)
	fclose(fd);
#ifdef HAVE_HDF5
      if(All.ICFormat == 3)
	{
	  for(type = 5; type >= 0; type--)
	    if(header.npart[type] > 0)
	      H5Gclose(hdf5_grp[type]);
	  H5Fclose(hdf5_file);
	}
#endif
    }
}
Esempio n. 28
0
int main( int argc, char *argv[] )
{
    int msglen, i;
    int msglen_min = MIN_MESSAGE_LENGTH;
    int msglen_max = MAX_MESSAGE_LENGTH;
    int rank,poolsize,Master;
    char *sendbuf,*recvbuf;
    char ival;
    MPI_Request request;
    MPI_Status status;
	
    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&poolsize);
    MPI_Comm_rank(MPI_COMM_WORLD,&rank);

    if(poolsize != 2) {
	printf("Expected exactly 2 MPI processes\n");
	MPI_Abort( MPI_COMM_WORLD, 1 );
    }

/* 
   The following test allows this test to run on small-memory systems
   that support the sysconf call interface.  This test keeps the test from
   becoming swap-bound.  For example, on an old Linux system or a
   Sony Playstation 2 (really!) 
 */
#if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
    { 
	long n_pages, pagesize;
	int  actmsglen_max;
	n_pages  = sysconf( _SC_PHYS_PAGES );
	pagesize = sysconf( _SC_PAGESIZE );
	/* We want to avoid integer overflow in the size calculation.
	   The best way is to avoid computing any products (such
	   as total memory = n_pages * pagesize) and instead
	   compute a msglen_max that fits within 1/4 of the available 
	   pages */
	if (n_pages > 0 && pagesize > 0) {
	    /* Recompute msglen_max */
	    int msgpages = 4 * ((msglen_max + pagesize - 1)/ pagesize);
	    while (n_pages < msgpages) { msglen_max /= 2; msgpages /= 2; }
	}
	/* printf ( "before = %d\n", msglen_max ); */
	MPI_Allreduce( &msglen_max, &actmsglen_max, 1, MPI_INT, 
		       MPI_MIN, MPI_COMM_WORLD );
	msglen_max = actmsglen_max;
	/* printf ( "after = %d\n", msglen_max ); */
    }
#endif

    Master = (rank == 0);	

    if(Master && verbose)
	printf("Size (bytes)\n------------\n");
    for(msglen = msglen_min; msglen <= msglen_max; msglen *= 2) {

	sendbuf = malloc(msglen);
	recvbuf = malloc(msglen);
	if(sendbuf == NULL || recvbuf == NULL) {
	    printf("Can't allocate %d bytes\n",msglen);
	    MPI_Abort( MPI_COMM_WORLD, 1 );
	}

	ival = 0;
	for (i=0; i<msglen; i++) {
	    sendbuf[i] = ival++;
	    recvbuf[i] = 0;
	}


	if(Master && verbose) 
	    printf("%d\n",msglen);
	fflush(stdout);

	MPI_Barrier(MPI_COMM_WORLD);
		
	/* Send/Recv */
	if(Master) 
	    MPI_Send(sendbuf,msglen,MPI_CHAR,1,TAG1,MPI_COMM_WORLD);
	else {
	    Resetbuf( recvbuf, msglen );
	    MPI_Recv(recvbuf,msglen,MPI_CHAR,0,TAG1,MPI_COMM_WORLD,&status);
	    Checkbuf( recvbuf, msglen, &status );
	}

	MPI_Barrier(MPI_COMM_WORLD);

	/* Ssend/Recv */
	if(Master) 
	    MPI_Ssend(sendbuf,msglen,MPI_CHAR,1,TAG2,MPI_COMM_WORLD);
	else {
	    Resetbuf( recvbuf, msglen );
	    MPI_Recv(recvbuf,msglen,MPI_CHAR,0,TAG2,MPI_COMM_WORLD,&status);
	    Checkbuf( recvbuf, msglen, &status );
	}

	MPI_Barrier(MPI_COMM_WORLD);
		
	/* Rsend/Recv */
	if (Master) {
	    MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, 1, TAGSR,
			  MPI_BOTTOM, 0, MPI_INT, 1, TAGSR,
			  MPI_COMM_WORLD, &status );
	    MPI_Rsend( sendbuf,msglen,MPI_CHAR,1,TAG3,MPI_COMM_WORLD );
	}
	else {
	    Resetbuf( recvbuf, msglen );
	    MPI_Irecv( recvbuf,msglen,MPI_CHAR,0,TAG3,MPI_COMM_WORLD,&request);
	    MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, 0, TAGSR,
			  MPI_BOTTOM, 0, MPI_INT, 0, TAGSR,
			  MPI_COMM_WORLD, &status );
	    MPI_Wait( &request, &status );
	    Checkbuf( recvbuf, msglen, &status );
	}
	    
	MPI_Barrier(MPI_COMM_WORLD);

	/* Isend/Recv - receive not ready */
	if(Master) {
	    MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, 1, TAGSR,
			  MPI_BOTTOM, 0, MPI_INT, 1, TAGSR,
			  MPI_COMM_WORLD, &status );
	    MPI_Isend(sendbuf,msglen,MPI_CHAR,1,TAG4,MPI_COMM_WORLD, &request);
	    MPI_Wait( &request, &status );
	}
	else {
	    Resetbuf( recvbuf, msglen );
	    MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, 0, TAGSR,
			  MPI_BOTTOM, 0, MPI_INT, 0, TAGSR,
			  MPI_COMM_WORLD, &status );
	    MPI_Recv(recvbuf,msglen,MPI_CHAR,0,TAG4,MPI_COMM_WORLD,&status);
	    Checkbuf( recvbuf, msglen, &status );
	}

	MPI_Barrier(MPI_COMM_WORLD);

	free(sendbuf);
	free(recvbuf);
    }

    if (rank == 0) {
	/* If we do not abort, we saw no errors */
	printf( " No Errors\n" );
    }

    MPI_Finalize();
    return 0;
}
Esempio n. 29
0
int main(int argc, char* argv[])
{
  int myrank;            /* the rank of this process */
  int left;              /* the rank of the process to the left */
  int right;             /* the rank of the process to the right */
  int size;              /* number of processes in the communicator */
  int tag = 0;           /* scope for adding extra information to a message */
  MPI_Status status;     /* struct used by MPI_Recv */
  char sendbuf[BUFSIZ];
  char recvbuf[BUFSIZ];

  /* MPI_Init returns once it has started up processes */
  MPI_Init( &argc, &argv );

  /* size and rank will become ubiquitous */ 
  MPI_Comm_size( MPI_COMM_WORLD, &size );
  MPI_Comm_rank( MPI_COMM_WORLD, &myrank );

  /* 
  ** Example constrained to a maximum of 4 processes
  ** (will work with fewer).
  ** However, the communication pattern will work for
  ** an arbitrary number of processes.
  */
  if (size > 4) {
    fprintf(stderr,"Error: this examples must be run over <= 4 processes\n");
    MPI_Abort(MPI_COMM_WORLD,1);
  }

  /* 
  ** determine process ranks to the left and right of myrank
  ** respecting periodic boundary conditions
  */
  right = (myrank + 1) % size;
  left = (myrank == 0) ? (myrank + size - 1) : (myrank - 1);

  /* compose messages */
  switch (myrank) {
  case 0:
    sprintf(sendbuf, "Message from Crosby (process %d)", myrank);
    break;
  case 1:
    sprintf(sendbuf, "Message from Stills (process %d)", myrank);
    break;
  case 2:
    sprintf(sendbuf, "Message from Nash (process %d)", myrank);
    break;
  case 3:
    sprintf(sendbuf, "Message from Young (process %d)", myrank);
    break;
  default:
    sprintf(sendbuf, "Program should never reach here");
  }

  /*
  ** Communication pattern:
  ** Even processes send first, then recieve.
  ** Odds do the converse.
  ** Note that I am using MPI_Ssend() to emphasise that this pattern
  ** will not deadlock due to blocking calls.
  */
  if ((myrank % 2) == 0) {
    /* first wave */
    MPI_Ssend(sendbuf,strlen(sendbuf)+1, MPI_CHAR, right, tag, MPI_COMM_WORLD);
    MPI_Recv(recvbuf, BUFSIZ, MPI_CHAR, left, tag, MPI_COMM_WORLD, &status);
    printf("rank %d: %s\n", myrank, recvbuf);
    /* second wave */
    MPI_Ssend(sendbuf,strlen(sendbuf)+1, MPI_CHAR, left, tag, MPI_COMM_WORLD);
    MPI_Recv(recvbuf, BUFSIZ, MPI_CHAR, right, tag, MPI_COMM_WORLD, &status);
    printf("rank %d: %s\n", myrank, recvbuf);
  }
  else {
    /* first wave */
    MPI_Recv(recvbuf, BUFSIZ, MPI_CHAR, left, tag, MPI_COMM_WORLD, &status);
    printf("rank %d: %s\n", myrank, recvbuf);
    MPI_Ssend(sendbuf,strlen(sendbuf)+1, MPI_CHAR, right, tag, MPI_COMM_WORLD);
    /* second wave */
    MPI_Recv(recvbuf, BUFSIZ, MPI_CHAR, right, tag, MPI_COMM_WORLD, &status);
    printf("rank %d: %s\n", myrank, recvbuf);
    MPI_Ssend(sendbuf,strlen(sendbuf)+1, MPI_CHAR, left, tag, MPI_COMM_WORLD);
  }

  /* don't forget to tidy up when we're done */
  MPI_Finalize();

  /* and exit the program */
  return EXIT_SUCCESS;
}
Esempio n. 30
0
/* does the actual write of the file */
static void file_write_rays2fits(long fileNum, long firstTask, long lastTask, MPI_Comm fileComm)
{
  const char *ttype[] = 
    { "nest", "ra", "dec", "A00", "A01", "A10", "A11"
#ifdef OUTPUTRAYDEFLECTIONS
      , "alpha0", "alpha1"
#endif
#ifdef OUTPUTPHI
      , "phi"
#endif
    };
  
  const char *tform[] = 
    { "K", "D", "D", "D", "D", "D", "D"
#ifdef OUTPUTRAYDEFLECTIONS
      , "D", "D"
#endif
#ifdef OUTPUTPHI
      , "D"
#endif
    };
  
  char name[MAX_FILENAME];
  char bangname[MAX_FILENAME];
  long NumRaysInFile,i,j;
  long *NumRaysInPeanoCell,*StartRaysInPeanoCell,peano;
  
  fitsfile *fptr;
  int status = 0;
  int naxis = 1;
  long naxes[1],fpixel[1];
  LONGLONG nrows;
  int tfields,colnum;
  long k,chunkInd,firstInd,lastInd,NumRaysInChunkBase,NumRaysInChunk,NumChunks;
  LONGLONG firstrow,firstelem,nelements;
  double *darr;
  long *larr;
  char *buff;
  double ra,dec;
  
  long nwc=0,NtotToRecv,nw=0,nwg=0,rpeano,rowloc;
  MPI_Status mpistatus;
  double t0 = 0.0;
  
  sprintf(name,"%s/%s%04ld.%04ld",rayTraceData.OutputPath,rayTraceData.RayOutputName,rayTraceData.CurrentPlaneNum,fileNum);
  sprintf(bangname,"!%s",name);
  
  /* build fits table layout*/
  tfields = 7;
#ifdef OUTPUTRAYDEFLECTIONS
  tfields += 2;
#endif
#ifdef OUTPUTPHI
  tfields += 1;
#endif
  
  /* build file layout*/
  NumRaysInPeanoCell = (long*)malloc(sizeof(long)*NbundleCells);
  assert(NumRaysInPeanoCell != NULL);
  StartRaysInPeanoCell = (long*)malloc(sizeof(long)*NbundleCells);
  assert(StartRaysInPeanoCell != NULL);
  for(i=0;i<NbundleCells;++i)
    StartRaysInPeanoCell[i] = 0;
  for(i=0;i<NbundleCells;++i)
    {
      if(ISSETBITFLAG(bundleCells[i].active,PRIMARY_BUNDLECELL))
	{
	  peano = nest2peano(bundleCells[i].nest,rayTraceData.bundleOrder);
	  StartRaysInPeanoCell[peano] = bundleCells[i].Nrays;
	  nwc += bundleCells[i].Nrays;
	}
    }
  MPI_Allreduce(StartRaysInPeanoCell,NumRaysInPeanoCell,(int) NbundleCells,MPI_LONG,MPI_SUM,fileComm);
  j = 0;
  for(i=0;i<NbundleCells;++i)
    {
      StartRaysInPeanoCell[i] = j;
      j += NumRaysInPeanoCell[i];
    }
  NumRaysInFile = j;
  
  /* make the file and write header info */
  if(ThisTask == firstTask)
    {
      t0 = -MPI_Wtime();
      
      remove(name);
      
      fits_create_file(&fptr,bangname,&status);
      if(status)
        fits_report_error(stderr,status);
      
      naxes[0] = 2l*NbundleCells;
      fits_create_img(fptr,LONGLONG_IMG,naxis,naxes,&status);
      if(status)
        fits_report_error(stderr,status);
      
      fpixel[0] = 0+1;
      fits_write_pix(fptr,TLONG,fpixel,(LONGLONG) (NbundleCells),NumRaysInPeanoCell,&status);
      if(status)
        fits_report_error(stderr,status);
      
      fpixel[0] = NbundleCells+1;
      fits_write_pix(fptr,TLONG,fpixel,(LONGLONG) (NbundleCells),StartRaysInPeanoCell,&status);
      if(status)
        fits_report_error(stderr,status);
      
      fits_write_key(fptr,TLONG,"NumFiles",&(rayTraceData.NumRayOutputFiles),"number of files that rays are split into",&status);
      if(status)
        fits_report_error(stderr,status);
      
      fits_write_key(fptr,TLONG,"PeanoCellHEALPixOrder",&(rayTraceData.bundleOrder),"HEALPix order of peano indexed cells rays are organized into",&status);
      if(status)
        fits_report_error(stderr,status);
      
      fits_write_key(fptr,TLONG,"RayHEALPixOrder",&(rayTraceData.rayOrder),"HEALPix order of ray grid",&status);
      if(status)
        fits_report_error(stderr,status);
      
      nrows = (LONGLONG) (NumRaysInFile);
      fits_create_tbl(fptr,BINARY_TBL,nrows,tfields,ttype,tform,NULL,"Rays",&status);
      if(status)
        fits_report_error(stderr,status);
      
      fits_get_rowsize(fptr,&NumRaysInChunkBase,&status);
      if(status)
	fits_report_error(stderr,status);
    }
  
  MPI_Bcast(&NumRaysInChunkBase,1,MPI_LONG,0,fileComm);
  if(sizeof(long) > sizeof(double))
    buff = (char*)malloc(sizeof(long)*NumRaysInChunkBase);
  else
    buff = (char*)malloc(sizeof(double)*NumRaysInChunkBase);
  assert(buff != NULL);
  darr = (double*) buff;
  larr = (long*) buff;
  
  for(i=firstTask;i<=lastTask;++i)
    {
      if(ThisTask == i)
	{
#ifdef DEBUG
#if DEBUG_LEVEL > 0
	  fprintf(stderr,"%d: fileNum = %ld, first,last = %ld|%ld\n",ThisTask,fileNum,firstTask,lastTask);
#endif
#endif
	  if(ThisTask != firstTask)
	    MPI_Send(&nwc,1,MPI_LONG,(int) firstTask,TAG_RAYIO_TOTNUM,MPI_COMM_WORLD);
	  
	  for(rpeano=0;rpeano<NrestrictedPeanoInd;++rpeano)
            {
              j = bundleCellsRestrictedPeanoInd2Nest[rpeano];
              
	      if(ISSETBITFLAG(bundleCells[j].active,PRIMARY_BUNDLECELL))
		{
		  peano = nest2peano(bundleCells[j].nest,rayTraceData.bundleOrder);
		  
		  assert(NumRaysInPeanoCell[peano] == ((1l) << (2*(rayTraceData.rayOrder-rayTraceData.bundleOrder))));
		  assert((StartRaysInPeanoCell[peano] - 
			  ((StartRaysInPeanoCell[peano])/(((1l) << (2*(rayTraceData.rayOrder-rayTraceData.bundleOrder)))))
			  *(((1l) << (2*(rayTraceData.rayOrder-rayTraceData.bundleOrder))))) == 0);
		  
		  NumChunks = NumRaysInPeanoCell[peano]/NumRaysInChunkBase;
		  if(NumChunks*NumRaysInChunkBase < NumRaysInPeanoCell[peano])
		    NumChunks += 1;
		  
		  firstrow = (LONGLONG) (StartRaysInPeanoCell[peano]) + (LONGLONG) 1;
		  firstelem = 1;
		  for(chunkInd=0;chunkInd<NumChunks;++chunkInd)
		    {
		      firstInd = chunkInd*NumRaysInChunkBase;
		      lastInd = (chunkInd+1)*NumRaysInChunkBase-1;
		      if(lastInd >= NumRaysInPeanoCell[peano]-1)
			lastInd = NumRaysInPeanoCell[peano]-1;
		      NumRaysInChunk = lastInd - firstInd + 1;
		      
		      nelements = (LONGLONG) NumRaysInChunk;
		      nw += NumRaysInChunk;
		      
		      if(ThisTask != firstTask)
			{
			  rowloc = firstrow;
			  MPI_Send(&rowloc,1,MPI_LONG,(int) firstTask,TAG_RAYIO_CHUNKDATA,MPI_COMM_WORLD);
			  MPI_Send(&NumRaysInChunk,1,MPI_LONG,(int) firstTask,TAG_RAYIO_NUMCHUNK,MPI_COMM_WORLD);
			  colnum = TAG_RAYIO_CHUNKDATA+1;
			  
			  for(k=firstInd;k<=lastInd;++k)
			    larr[k-firstInd] = bundleCells[j].rays[k].nest;
			  MPI_Ssend(larr,(int) NumRaysInChunk,MPI_LONG,(int) firstTask,colnum,MPI_COMM_WORLD);
			  ++colnum;
			  
			  for(k=firstInd;k<=lastInd;++k)
			    {
			      vec2radec(bundleCells[j].rays[k].n,&ra,&dec);
			      darr[k-firstInd] = ra;
			    }
			  MPI_Ssend(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) firstTask,colnum,MPI_COMM_WORLD);
			  ++colnum;
			  
			  for(k=firstInd;k<=lastInd;++k)
			    {
			      vec2radec(bundleCells[j].rays[k].n,&ra,&dec);
			      darr[k-firstInd] = dec;
			    }
			  MPI_Ssend(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) firstTask,colnum,MPI_COMM_WORLD);
			  ++colnum;
			  
			  for(k=firstInd;k<=lastInd;++k)
			    darr[k-firstInd] = bundleCells[j].rays[k].A[2*0+0];
			  MPI_Ssend(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) firstTask,colnum,MPI_COMM_WORLD);
                          ++colnum;
			  			  
			  for(k=firstInd;k<=lastInd;++k)
			    darr[k-firstInd] = bundleCells[j].rays[k].A[2*0+1];
			  MPI_Ssend(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) firstTask,colnum,MPI_COMM_WORLD);
			  ++colnum;
			  
			  for(k=firstInd;k<=lastInd;++k)
			    darr[k-firstInd] = bundleCells[j].rays[k].A[2*1+0];
			  MPI_Ssend(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) firstTask,colnum,MPI_COMM_WORLD);
			  ++colnum;
			  
			  for(k=firstInd;k<=lastInd;++k)
			    darr[k-firstInd] = bundleCells[j].rays[k].A[2*1+1];
			  MPI_Ssend(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) firstTask,colnum,MPI_COMM_WORLD);
			  ++colnum;
			  
#ifdef OUTPUTRAYDEFLECTIONS
			  for(k=firstInd;k<=lastInd;++k)
			    darr[k-firstInd] = bundleCells[j].rays[k].alpha[0];
			  MPI_Ssend(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) firstTask,colnum,MPI_COMM_WORLD);
			  ++colnum;
			  
			  for(k=firstInd;k<=lastInd;++k)
			    darr[k-firstInd] = bundleCells[j].rays[k].alpha[1];
			  MPI_Ssend(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) firstTask,colnum,MPI_COMM_WORLD);
			  ++colnum;
#endif
#ifdef OUTPUTPHI
			  for(k=firstInd;k<=lastInd;++k)
			    darr[k-firstInd] = bundleCells[j].rays[k].phi;
			  MPI_Ssend(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) firstTask,colnum,MPI_COMM_WORLD);
			  ++colnum;
#endif
			  firstrow += nelements;
			}
		      else
			{
			  colnum = 1;
			  for(k=firstInd;k<=lastInd;++k)
			    larr[k-firstInd] = bundleCells[j].rays[k].nest;
			  fits_write_col(fptr,TLONG,colnum,firstrow,firstelem,nelements,larr,&status);
			  if(status)
			    fits_report_error(stderr,status);
			  ++colnum;
			  
			  for(k=firstInd;k<=lastInd;++k)
			    {
			      vec2radec(bundleCells[j].rays[k].n,&ra,&dec);
			      darr[k-firstInd] = ra;
			    }
			  fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
			  if(status)
			    fits_report_error(stderr,status);
			  ++colnum;
			  
			  for(k=firstInd;k<=lastInd;++k)
			    {
			      vec2radec(bundleCells[j].rays[k].n,&ra,&dec);
			      darr[k-firstInd] = dec;
			    }
			  fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
			  if(status)
			    fits_report_error(stderr,status);
			  ++colnum;
			  
			  for(k=firstInd;k<=lastInd;++k)
			    darr[k-firstInd] = bundleCells[j].rays[k].A[2*0+0];
			  fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
			  if(status)
			    fits_report_error(stderr,status);
			  ++colnum;
			  
			  for(k=firstInd;k<=lastInd;++k)
			    darr[k-firstInd] = bundleCells[j].rays[k].A[2*0+1];
			  fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
			  if(status)
			    fits_report_error(stderr,status);
			  ++colnum;
			  
			  for(k=firstInd;k<=lastInd;++k)
			    darr[k-firstInd] = bundleCells[j].rays[k].A[2*1+0];
			  fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
			  if(status)
			    fits_report_error(stderr,status);
			  ++colnum;
			  
			  for(k=firstInd;k<=lastInd;++k)
			    darr[k-firstInd] = bundleCells[j].rays[k].A[2*1+1];
			  fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
			  if(status)
			    fits_report_error(stderr,status);
			  ++colnum;
			  
#ifdef OUTPUTRAYDEFLECTIONS
			  for(k=firstInd;k<=lastInd;++k)
			    darr[k-firstInd] = bundleCells[j].rays[k].alpha[0];
			  fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
			  if(status)
			    fits_report_error(stderr,status);
			  ++colnum;
			  
			  for(k=firstInd;k<=lastInd;++k)
			    darr[k-firstInd] = bundleCells[j].rays[k].alpha[1];
			  fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
			  if(status)
			    fits_report_error(stderr,status);
			  ++colnum;
#endif
#ifdef OUTPUTPHI
			  for(k=firstInd;k<=lastInd;++k)
			    darr[k-firstInd] = bundleCells[j].rays[k].phi;
			  fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
			  if(status)
			    fits_report_error(stderr,status);
			  ++colnum;
#endif
			  firstrow += nelements;
			}
		    }// for(chunkInd=0;chunkInd<NumChunks;++chunkInd)
		} //if(ISSETBITFLAG(bundleCells[j].active,PRIMARY_BUNDLECELL)).
	    } //for(j=0;j<NbundleCells;++j)
	} //if(ThisTask == i)
      
      if(i != firstTask && ThisTask == firstTask)
	{
	  MPI_Recv(&NtotToRecv,1,MPI_LONG,(int) i,TAG_RAYIO_TOTNUM,MPI_COMM_WORLD,&mpistatus);
	  
	  firstelem = 1;
	  while(NtotToRecv > 0)
	    {
	      MPI_Recv(&rowloc,1,MPI_LONG,(int) i,TAG_RAYIO_CHUNKDATA,MPI_COMM_WORLD,&mpistatus);
	      MPI_Recv(&NumRaysInChunk,1,MPI_LONG,(int) i,TAG_RAYIO_NUMCHUNK,MPI_COMM_WORLD,&mpistatus);
	      firstrow = (LONGLONG) (rowloc);
	      nelements = (LONGLONG) NumRaysInChunk;
	      colnum = 1;
	      
	      MPI_Recv(larr,(int) NumRaysInChunk,MPI_LONG,(int) i,colnum+TAG_RAYIO_CHUNKDATA,MPI_COMM_WORLD,&mpistatus);
	      fits_write_col(fptr,TLONG,colnum,firstrow,firstelem,nelements,larr,&status);
	      if(status)
		fits_report_error(stderr,status);
	      ++colnum;
	      
	      MPI_Recv(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) i,colnum+TAG_RAYIO_CHUNKDATA,MPI_COMM_WORLD,&mpistatus);
	      fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
	      if(status)
		fits_report_error(stderr,status);
	      ++colnum;
	      
	      MPI_Recv(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) i,colnum+TAG_RAYIO_CHUNKDATA,MPI_COMM_WORLD,&mpistatus);
	      fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
	      if(status)
		fits_report_error(stderr,status);
	      ++colnum;
	      
	      MPI_Recv(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) i,colnum+TAG_RAYIO_CHUNKDATA,MPI_COMM_WORLD,&mpistatus);
	      fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
	      if(status)
		fits_report_error(stderr,status);
	      ++colnum;
	      
	      MPI_Recv(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) i,colnum+TAG_RAYIO_CHUNKDATA,MPI_COMM_WORLD,&mpistatus);
	      fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
	      if(status)
		fits_report_error(stderr,status);
	      ++colnum;
	      
	      MPI_Recv(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) i,colnum+TAG_RAYIO_CHUNKDATA,MPI_COMM_WORLD,&mpistatus);
	      fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
	      if(status)
		fits_report_error(stderr,status);
	      ++colnum;
	      
	      MPI_Recv(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) i,colnum+TAG_RAYIO_CHUNKDATA,MPI_COMM_WORLD,&mpistatus);
	      fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
	      if(status)
		fits_report_error(stderr,status);
	      ++colnum;
	      
#ifdef OUTPUTRAYDEFLECTIONS
	      MPI_Recv(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) i,colnum+TAG_RAYIO_CHUNKDATA,MPI_COMM_WORLD,&mpistatus);
	      fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
	      if(status)
		fits_report_error(stderr,status);
	      ++colnum;
	      
	      MPI_Recv(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) i,colnum+TAG_RAYIO_CHUNKDATA,MPI_COMM_WORLD,&mpistatus);
	      fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
	      if(status)
		fits_report_error(stderr,status);
	      ++colnum;
#endif
#ifdef OUTPUTPHI
	      MPI_Recv(darr,(int) NumRaysInChunk,MPI_DOUBLE,(int) i,colnum+TAG_RAYIO_CHUNKDATA,MPI_COMM_WORLD,&mpistatus);
	      fits_write_col(fptr,TDOUBLE,colnum,firstrow,firstelem,nelements,darr,&status);
	      if(status)
		fits_report_error(stderr,status);
	      ++colnum;
#endif
	      	      
	      nwg += NumRaysInChunk;
              NtotToRecv -= NumRaysInChunk;
	    }
	}
      
      //////////////////////////////
      MPI_Barrier(fileComm);
      //////////////////////////////
    }
  
  if(ThisTask == firstTask)
    {
      fits_close_file(fptr,&status);
      if(status)
	fits_report_error(stderr,status);
      
      t0 += MPI_Wtime();

#ifdef DEBUG      
      fprintf(stderr,"writing %ld rays to file '%s' took %g seconds.\n",NumRaysInFile,name,t0);
#endif
      
      assert(nwg == NumRaysInFile-nw); //error check # of rays recvd
    }
  
  //error check # of rays written
  MPI_Allreduce(&nw,&nwg,1,MPI_LONG,MPI_SUM,fileComm);
  assert(nw == nwc);
  assert(nwg == NumRaysInFile);
  
  //clean up and close files for this task
  free(buff);
  free(StartRaysInPeanoCell);
  free(NumRaysInPeanoCell);
}