Example #1
0
void main( void )
{

	typedef enum
	{	CONFIGURE,
		INITIALISE1,
		RUN
	} MAINLOOP_STATE;

	static MAINLOOP_STATE MainLoopState = CONFIGURE;

	while( 1 ) // Main Super Loop
	{
		switch ( MainLoopState )
		{
		case CONFIGURE :
			
			if ( Configure() == Command_Complete )
         	{
				MainLoopState = INITIALISE1;
			}
		break;

		case INITIALISE1 :
			if ( Initialise() == Command_Complete )
			{
				MainLoopState = RUN;
			}
		break;

		case RUN :
		
			Manage_Comms();
			Manage_Movements();
		break;
		}
	}

}
Example #2
0
int main ( int argc, char *argv[] ) {

  // Solution arrays
  real *h_u; /* to be allocated in ROOT only */ 
  real *t_u;
  real *t_un;

  // Auxiliary variables
  int rank;
  int size;
  int step;
  dmn domain;
  double wtime;
  int nbrs[6];
  int i, j, k;

  // Initialize MPI
  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  // if number of np != Sx*Sy*Sz then terminate. 
  if (size != SX*SY*SZ){
    if (rank==ROOT) 
      fprintf(stderr,"%s: Needs at least %d processors.\n", argv[0], SX*SY*SZ);
    MPI_Finalize();
    return 1;
  }

  // verify subsizes
  if (NX%SX!=0 || NY%SY!=0 || NZ%SZ!=0) {
    if (rank==ROOT) 
      fprintf(stderr,"%s: Subdomain sizes not an integer value.\n", argv[0]);
    MPI_Finalize();
    return 1;
  }

  // Build a 2D cartessian communicator
  MPI_Comm Comm3d;
  int ndim=3;
  int dim[3]={SZ,SY,SX}; // domain decomposition subdomains
  int period[3]={false,false,false}; // periodic conditions
  int reorder={true}; // allow reorder if necesary
  int coord[3];
  MPI_Cart_create(MPI_COMM_WORLD,ndim,dim,period,reorder,&Comm3d);
  MPI_Comm_rank(Comm3d,&rank); // rank wrt to Comm2d
  MPI_Cart_coords(Comm3d,rank,3,coord); // rank coordinates
  
  // Map the neighbours ranks
  MPI_Cart_shift(Comm3d,0,1,&nbrs[TOP],&nbrs[BOTTOM]);
  MPI_Cart_shift(Comm3d,1,1,&nbrs[NORTH],&nbrs[SOUTH]);
  MPI_Cart_shift(Comm3d,2,1,&nbrs[WEST],&nbrs[EAST]);

  // Manage Domain sizes
  domain = Manage_Domain(rank,size,coord,nbrs); 

  // Allocate Memory
  Manage_Memory(0,domain,&h_u,&t_u,&t_un);

  // Root mode: Build Initial Condition 
  if (domain.rank==ROOT) Call_IC(2,h_u);

  // Build MPI data types
  MPI_Datatype myGlobal;
  MPI_Datatype myLocal;
  MPI_Datatype xySlice;
  MPI_Datatype yzSlice;
  MPI_Datatype xzSlice;
  //Manage_DataTypes(0,domain,&xySlice,&yzSlice,&xzSlice,&myLocal,&myGlobal);

  // Build a MPI data type for a subarray in Root processor
  MPI_Datatype global;
  int nx = domain.nx;
  int ny = domain.ny;
  int nz = domain.nz;
  int bigsizes[3] = {NZ,NY,NX};
  int subsizes[3] = {nz,ny,nx};
  int starts[3] = {0,0,0};
  MPI_Type_create_subarray(3, bigsizes, subsizes, starts, MPI_ORDER_C, MPI_CUSTOM_REAL, &global);
  MPI_Type_create_resized(global, 0, nx*sizeof(real), &myGlobal); // extend the type 
  MPI_Type_commit(&myGlobal);
    
  // Build a MPI data type for a subarray in workers
  int bigsizes2[3] = {R+nz+R,R+ny+R,R+nx+R};
  int subsizes2[3] = {nz,ny,nx};
  int starts2[3] = {R,R,R};
  MPI_Type_create_subarray(3, bigsizes2, subsizes2, starts2, MPI_ORDER_C, MPI_CUSTOM_REAL, &myLocal);
  MPI_Type_commit(&myLocal); // now we can use this MPI costum data type

  // halo data types
  MPI_Datatype yVector;
  MPI_Type_vector( ny, nx, nx+2*R, MPI_CUSTOM_REAL, &xySlice); MPI_Type_commit(&xySlice);
  MPI_Type_vector( ny,  1, nx+2*R, MPI_CUSTOM_REAL, &yVector); 
  MPI_Type_create_hvector(nz, 1, (nx+2*R)*(ny+2*R)*sizeof(real), yVector, &yzSlice); MPI_Type_commit(&yzSlice);
  MPI_Type_vector( nz, nx, (nx+2*R)*(ny+2*R), MPI_CUSTOM_REAL, &xzSlice); MPI_Type_commit(&xzSlice);
  
  // build sendcounts and displacements in root processor
  int sendcounts[size], displs[size];
  if (rank==ROOT) {
    for (i=0; i<size; i++) sendcounts[i]=1;
    int disp = 0; // displacement counter
    for (k=0; k<SZ; k++) {
      for (j=0; j<SY; j++) {
	for (i=0; i<SX; i++) {
	  displs[i+SX*j+SX*SY*k]=disp;  disp+=1; // x-displacements
	}
	disp += SX*(ny-1); // y-displacements
      }
      disp += SX*NY*(nz-1); // z-displacements
    } 
  }

  // Scatter global array data and exchange halo regions
  MPI_Scatterv(h_u, sendcounts, displs, myGlobal, t_u, 1, myLocal, ROOT, Comm3d);
  Manage_Comms(domain,Comm3d,xySlice,yzSlice,xzSlice,t_u); MPI_Barrier(Comm3d);
   
  // ROOT mode: Record the starting time.
  if (rank==ROOT) wtime=MPI_Wtime();

  // Asynchronous MPI Solver
  for (step = 0; step < NO_STEPS; step+=2) {
    // print iteration in ROOT mode
    if (rank==ROOT && step%10000==0) printf("  Step %d of %d\n",step,(int)NO_STEPS);
    
    // Exchange Boundaries and compute stencil
    Call_Laplace(domain,&t_u,&t_un);Manage_Comms(domain,Comm3d,xySlice,yzSlice,xzSlice,t_un);//1stIter
    Call_Laplace(domain,&t_un,&t_u);Manage_Comms(domain,Comm3d,xySlice,yzSlice,xzSlice,t_u );//2ndIter
  }
  
  // ROOT mode: Record the final time.
  if (rank==ROOT) {
    wtime = MPI_Wtime()-wtime; printf ("\n Wall clock elapsed = %f seconds\n\n", wtime );    
  }
  /*
  // CAREFUL: uncomment only for debugging. Print subroutine
  for (int p=0; p<size; p++) {
    if (rank == p) {
      printf("Local process on rank %d is:\n", rank);
      for (k=0; k<nz+2*R; k++) {
	printf("-- layer %d --\n",k);
	for (j=0; j<ny+2*R; j++) {
	  putchar('|');
	  for (i=0; i<nx+2*R; i++) printf("%3.0f ",t_u[i+(nx+2*R)*j+(nx+2*R)*(ny+2*R)*k]);
	  printf("|\n");
	}
	printf("\n");
      }
    }
    MPI_Barrier(Comm3d);
    }*/

  // gather all pieces into the big data array
  MPI_Gatherv(t_u, 1, myLocal, h_u, sendcounts, displs, myGlobal, ROOT, Comm3d);
 
  // save results to file
  //if (rank==0) Print(h_u,NX,NY,NZ);
  if (rank==ROOT) Save_Results(h_u); 

  // Free MPI types
  Manage_DataTypes(1,domain,&xySlice,&yzSlice,&xzSlice,&myLocal,&myGlobal);
  
  // Free Memory
  Manage_Memory(1,domain,&h_u,&t_u,&t_un); 
    
  // finalize MPI
  MPI_Finalize();

  // ROOT mode: Terminate.
  if (rank==ROOT) {
    printf ("HEAT_MPI:\n" );
    printf ("  Normal end of execution.\n\n" );
  }

  return 0;
}
Example #3
0
int main ( int argc, char *argv[] ) {

  // Auxiliary variables
  int rank;
  int size;
  int step;
  dmn domain;
  double wtime;

  // Solution arrays
  real *h_u; /* will be allocated in ROOT only */ 
  real *t_u; /* processors sub-domain */
  real *d_u;
  real *d_un;

  // Initialize devices
  //Manage_Devices();

  // Initialize MPI
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  // if number of np != SX then terminate. 
  if (size != SZ){
    if (rank==ROOT) fprintf(stderr,"%s: Needs at least %d processors.\n", argv[0], SZ);
    MPI_Finalize();
    return 1;
  }

  // verify subsizes
  if (NZ%SZ!=0) {
    if (rank==ROOT) fprintf(stderr,"%s: Subdomain sizes are not an integer value.\n", argv[0]);
    MPI_Finalize();
    return 1;
  }

  // Associate each rank with a GPU
  if (argc < size) {
    if (rank==ROOT) printf("Usage : mpirun -np# %s <GPU list per rank>\n", argv[0]);
    MPI_Finalize();
    return 1;
  }

  // Manage Domain sizes
  domain = Manage_Domain(rank,size,atoi(argv[rank+1])); MPI_Barrier(MPI_COMM_WORLD); 

  // Allocate Memory
  Manage_Memory(0,domain,&h_u,&t_u,&d_u,&d_un);

  // Root mode: Build Initial Condition and scatter it to the rest of processors
  if (domain.rank==ROOT) Call_IC(2,h_u);
  MPI_Scatter(h_u, domain.size, MPI_CUSTOM_REAL, t_u+R*NX*NY, domain.size, MPI_CUSTOM_REAL, ROOT, MPI_COMM_WORLD);

  // Send local domain to devices
  Manage_Comms(0,domain,&t_u,&d_u); 

  // Exchange halo data
  Manage_Comms(1,domain,&t_u,&d_u); MPI_Barrier(MPI_COMM_WORLD); 

  // ROOT mode: Record the starting time.
  if (rank==ROOT) wtime=MPI_Wtime();

  // Asynchronous MPI Solver
  for (step = 0; step < NO_STEPS; step+=2) {
    // print iteration in ROOT mode
    if (rank==ROOT && step%10000==0) printf("  Step %d of %d\n",step,(int)NO_STEPS);
    
    // Exchange Boundaries and compute stencil
    Call_Laplace(domain,&d_u,&d_un); Manage_Comms(1,domain,&t_u,&d_un); // 1st iter
    Call_Laplace(domain,&d_un,&d_u); Manage_Comms(1,domain,&t_u,&d_u ); // 2nd iter
  }
  MPI_Barrier(MPI_COMM_WORLD);

  // Gather local domain back from devices
  Manage_Comms(2,domain,&t_u,&d_u);

  // ! Uncommment for debugging
  // if (rank==0) Print_SubDomain(domain,t_u); MPI_Barrier(MPI_COMM_WORLD);
  // if (rank==1) Print_SubDomain(domain,t_u); MPI_Barrier(MPI_COMM_WORLD);
  // if (rank==0) Print_Domain(domain,h_u); MPI_Barrier(MPI_COMM_WORLD);

  // ROOT mode: Record the final time.
  if (rank==ROOT) {
    wtime = MPI_Wtime()-wtime; printf("\n Wall clock elapsed seconds = %f\n\n", wtime );
  }
  
  // Gather solutions to ROOT and write solution in ROOT mode
  MPI_Gather(t_u+R*NX*NY, domain.size, MPI_CUSTOM_REAL, h_u, domain.size, MPI_CUSTOM_REAL, ROOT, MPI_COMM_WORLD);
  if (rank==ROOT) Save_Results(h_u);

  // Free Memory
  Manage_Memory(2,domain,&h_u,&t_u,&d_u,&d_un); MPI_Barrier(MPI_COMM_WORLD);

  // Terminate MPI.
  MPI_Finalize();

  // ROOT mode: Terminate.
  if (rank==ROOT) {
    printf("HEAT_MPI:\n" );
    printf("  Normal end of execution.\n\n" );
  }

  return 0;
}
Example #4
0
int main() {
	time_t t;
	int tid;
	int step;
    // CPU (Host) variables
	float *h_p;			// Primitives Vector - entire domain
	float *h_pl;			// Primitives Vector - local (in-thread) domain
	// GPU (Device) variables
	float *d_p;			// Primitives on GPU
	float *d_u;			// Conserved quantity on GPU
	float *d_Fp;			// Forward Fluxes
	float *d_Fm;			// Backward Fluxes
	size_t size;

	// First, perform 1st phase memory management tasks
	Manage_Memory(0,0, &h_p, &h_pl, &d_p, &d_u, &d_Fp, &d_Fm);          

	// Set the number of threads
	omp_set_num_threads(2);
	#pragma omp parallel shared(h_p) private(tid, h_pl, d_p, d_u, d_Fp, d_Fm, step)
	{
		// Now living in multiple theads land. Get the Thread ID.
		tid = omp_get_thread_num();

		// Allocate memory on GPU for each thread
		Manage_Memory(1, tid, &h_p, &h_pl, &d_p, &d_u, &d_Fp, &d_Fm); 	

		// Compute the Initial Conditions on the GPU
		Call_GPU_Init(&d_p, &d_u, tid); 
		#pragma omp barrier

		// Request computers current time
		t = clock();	

		// Solver Loop
		for (step = 0; step < NO_STEPS; step++) {
			if (step % 1000 == 0) printf("Step %d of %d\n", step, NO_STEPS);

			// Synchronization work 
		    	// 1) Copy h_pl (on host) from d_p on the device
			Manage_Comms(3, tid, &h_p, &h_pl, &d_p, &d_u, &d_Fp, &d_Fm); 		
			#pragma omp barrier
			// 2) Update h_p from h_pl prior to bounds adjustment
			Manage_Bounds(-1, tid, h_p, h_pl);
			// 3) Update h_pl from h_p after bounds adjustment
			Manage_Bounds(0, tid, h_p, h_pl);
		    	// 4) Send h_pl from host to GPU (d_pl), ready for flux calculation
			Manage_Comms(2, tid, &h_p, &h_pl, &d_p, &d_u, &d_Fp, &d_Fm); 		

			// Calculate flux now
			Call_GPU_Calc_Flux(&d_p, &d_Fp, &d_Fm, tid);
			// Update state now
			Call_GPU_Calc_State(&d_p, &d_u, &d_Fp, &d_Fm, tid);
			#pragma omp barrier
		}

		// Measure computation time
		t = clock()-t;

		// Grab all results from the GPU devices and store in h_pl
		Manage_Comms(1, tid, &h_p, &h_pl, &d_p, &d_u, &d_Fp, &d_Fm); 	
		#pragma omp barrier

		// Save h_pl to h_p
		Manage_Bounds(1, tid, h_p, h_pl);
		
		// Free GPU memory on each thread
		Manage_Memory(2, tid, &h_p, &h_pl, &d_p, &d_u, &d_Fp, &d_Fm);		
		#pragma omp barrier
	}

	// Save results
	Save_Results(h_p);

	// Report time
	printf("CPU time (%f seconds).\n",((float)t)/CLOCKS_PER_SEC);

	// Free last memory on host
	Manage_Memory(3,0, &h_p, &h_pl, &d_p, &d_u, &d_Fp, &d_Fm);			

	return 0;
}
Example #5
-1
int main ( int argc, char *argv[] ) {

  // Auxiliary variables
  int rank;
  int npcs;
  int step;
  dmn domain;
  double wtime;

  // Solution arrays
  double *g_u; /* will be allocated in ROOT only */ 
  double *t_u;
  double *t_un;

  // Initialize MPI
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &npcs);

  // Manage Domain sizes
  domain = Manage_Domain(rank,npcs); 

  // Allocate Memory
  Manage_Memory(0,domain,&g_u,&t_u,&t_un);

  // Root mode: Build Initial Condition and scatter it to the rest of processors
  if (domain.rank==ROOT) Call_IC(2,g_u);
  MPI_Scatter(g_u, domain.size, MPI_DOUBLE, t_u+NX*NY, domain.size, MPI_DOUBLE, ROOT, MPI_COMM_WORLD);

  // Exchage Halo regions
  Manage_Comms(domain,&t_u); MPI_Barrier(MPI_COMM_WORLD);

  // ROOT mode: Record the starting time.
  if (rank==ROOT) wtime=MPI_Wtime();

  // Asynchronous MPI Solver
  for (step = 0; step < NO_STEPS; step+=2) {
    // print iteration in ROOT mode
    if (rank==ROOT && step%10000==0) printf("  Step %d of %d\n",step,(int)NO_STEPS);
    
    // Exchange Boundaries and compute stencil
    Call_Laplace(domain,&t_u,&t_un); Manage_Comms(domain,&t_un); // 1st iter
    Call_Laplace(domain,&t_un,&t_u); Manage_Comms(domain,&t_u ); // 2nd iter
  }
  MPI_Barrier(MPI_COMM_WORLD);

  // ROOT mode: Record the final time.
  if (rank==ROOT) {
    wtime = MPI_Wtime()-wtime;
    printf ("\n Wall clock elapsed seconds = %f\n\n", wtime );
  }
  
  // Gather solutions to ROOT and write solution in ROOT mode
  MPI_Gather(t_u+NX*NY, domain.size, MPI_DOUBLE, g_u, domain.size, MPI_DOUBLE, ROOT, MPI_COMM_WORLD);
  if (rank==ROOT) Save_Results(g_u);

  // Free Memory
  Manage_Memory(1,domain,&g_u,&t_u,&t_un); MPI_Barrier(MPI_COMM_WORLD);

  // Terminate MPI.
  MPI_Finalize();

  // ROOT mode: Terminate.
  if (rank==ROOT) {
    printf ("HEAT_MPI:\n" );
    printf ("  Normal end of execution.\n\n" );
  }

  return 0;
}