Beispiel #1
0
int main(int argc, char **argv) {
  /* Timing variables */
  struct timeval etstart, etstop;  /* Elapsed times using gettimeofday() */
  struct timezone tzdummy;
  clock_t etstart2, etstop2;  /* Elapsed times using times() */
  unsigned long long usecstart, usecstop;
  struct tms cputstart, cputstop;  /* CPU times for my processes */

  /* Process program parameters */
  parameters(argc, argv);

  /* Initialize A and B */
  initialize_inputs();

  /* Print input matrices */
  print_inputs();

  /* Start Clock */
  printf("\nStarting clock.\n");
  gettimeofday(&etstart, &tzdummy);
  etstart2 = times(&cputstart);

  /* Gaussian Elimination */
  gauss();

  /* Stop Clock */
  gettimeofday(&etstop, &tzdummy);
  etstop2 = times(&cputstop);
  printf("Stopped clock.\n");
  usecstart = (unsigned long long)etstart.tv_sec * 1000000 + etstart.tv_usec;
  usecstop = (unsigned long long)etstop.tv_sec * 1000000 + etstop.tv_usec;

  /* Display output */
  print_X();

  /* Display timing results */
  printf("\nElapsed time = %g ms.\n",
         (float)(usecstop - usecstart)/(float)1000);

  printf("(CPU times are accurate to the nearest %g ms)\n",
         1.0/(float)CLOCKS_PER_SEC * 1000.0);
  printf("My total CPU time for parent = %g ms.\n",
         (float)( (cputstop.tms_utime + cputstop.tms_stime) -
         (cputstart.tms_utime + cputstart.tms_stime) ) /
         (float)CLOCKS_PER_SEC * 1000);
  printf("My system CPU time for parent = %g ms.\n",
         (float)(cputstop.tms_stime - cputstart.tms_stime) /
         (float)CLOCKS_PER_SEC * 1000);
  printf("My total CPU time for child processes = %g ms.\n",
         (float)( (cputstop.tms_cutime + cputstop.tms_cstime) -
         (cputstart.tms_cutime + cputstart.tms_cstime) ) /
         (float)CLOCKS_PER_SEC * 1000);
      /* Contrary to the man pages, this appears not to include the parent */
  printf("--------------------------------------------\n");

  exit(0);
}
Beispiel #2
0
int main(int argc, char **argv) {
  // /* Timing variables */
  // struct timeval etstart, etstop;  /* Elapsed times using gettimeofday() */
  // struct timezone tzdummy;
  // clock_t etstart2, etstop2;  /* Elapsed times using times() */
  // unsigned long long usecstart, usecstop;
  // struct tms cputstart, cputstop;  /* CPU times for my processes */

  argc--;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &id);
  MPI_Comm_size(MPI_COMM_WORLD, &procs);
  /* Process program parameters */
  parameters(argc, argv);


  if(id == 0) {
  	/* Initialize A and B */
    initialize_inputs();
    /* Print input matrices */
    print_inputs();
  }

  /* Gaussian Elimination */
  gauss();
   
  // if(id == 0) {
  //   /* Display output */
  //   print_X();
  //   // gauss_test();
  //   // /* Compare the result*/

  //   // int right = 1;
  //   // int j = 0;
  //   // for(; j < N; j++) {
  //   //   float dif = X[j] - X1[j];
  //   //   if (dif < 0)  dif = -dif;
  //   //   if (dif > 0.0001) {
  //   //     printf("X: %f\n", X[j]);
  //   //     printf("X1: %f\n", X1[j]);
  //   //     right = 0;
  //   //     break;
  //   //   }
  //   // }

  //   printf("right: %d\n",right);
  //   if(right == 1)  printf("\nRight!\n");
  //   else  printf("\nWrong!\n");
  
  // }

  MPI_Finalize();
  return 0;
}
Beispiel #3
0
int main(int argc, char **argv) {

  parameters(argc, argv);
  initialize_inputs();
  print_inputs();

  gauss();

  print_X();
  
  exit(0);
}
Beispiel #4
0
/* Provided global variables are MAXN, N, A[][], B[], and X[],
 * defined in the beginning of this code.  X[] is initialized to zeros.
 */
void gauss( float A[],  float B[],  float X[], int my_rank, int p) {
  int norm, row, col, i, j;
  float multiplier, b_norm;
  int end_row;
  MPI_Status status;
  
  /*Slice up the rows */
  int row_workload = (N-1)/p;
  int excess_work = (N-1)%p;
  end_row = row_workload + excess_work;
  for(i = 1; i < p; i++) {
	MPI_Ssend(&A[(N*row_workload*i)+(N*excess_work)+N], row_workload*N, MPI_FLOAT, i, 0, MPI_COMM_WORLD);
	MPI_Ssend(&B[row_workload*i+excess_work+1], row_workload, MPI_FLOAT, i, 0, MPI_COMM_WORLD);
  }
  
  for(norm = 0; norm <= row_workload + excess_work; norm++) {
	/* If you have the latest norm vector */
	if(norm <= row_workload + excess_work) {
		/* Send it to all ranks after yours */
		for(i = my_rank+1; i < p; i++) {
			b_norm = B[norm];
			MPI_Ssend(&A[(N*norm)+norm], N-norm, MPI_FLOAT, i, 1, MPI_COMM_WORLD);
			MPI_Ssend(&b_norm, 1, MPI_FLOAT, i, 1, MPI_COMM_WORLD);
		}
		
		for (row = norm + 1; row <= end_row; row++) {
			multiplier = A[N*row+norm] / A[N*norm+norm];
			if(row <= end_row) {
				for (col = norm; col < N; col++) {
					A[N*row+col] -= A[N*norm+col] * multiplier;
				}
			}
			B[row] -= B[norm] * multiplier;
		}
	}
  }
  
  for(i = 1; i < p; i++) {
	MPI_Recv(&A[(N*row_workload*i)+(N*excess_work)+N], row_workload*N, MPI_FLOAT, i, 3, MPI_COMM_WORLD, &status);
	MPI_Recv(&B[(row_workload*i)+excess_work+1], row_workload, MPI_FLOAT, i, 3, MPI_COMM_WORLD, &status);
  }
  print_inputs(A,B);

  for (row = N - 1; row >= 0; row--) {
	X[row] = B[row];
	for (col = N-1; col > row; col--) {
		X[row] -= A[(N*row)+col] * X[col];
	}
	X[row] /= A[(N*row)+row];
  }
}
Beispiel #5
0
int main(int argc, char **argv) {
  
  /* Timing variables */
  double start_t;
  double end_t;
  
  /* MPI Variables */
  int my_rank;
  int p;
  int dest = 0;
  
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &p);
  
  if(my_rank == 0) {
	parameters(argc, argv);
  }
  MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
  
  if(my_rank == 0) {
	float A[N*N], B[N], X[N];
	/* Initialize A and B */
	initialize_inputs(A, B, X);

	/* Print input matrices */
	print_inputs(A, B);

	/* Start Clock */
	printf("\nStarting clock.\n");
	start_t = MPI_Wtime();
	
    gauss(A, B, X, my_rank, p);
	
	/* Stop Clock */
	end_t = MPI_Wtime();
	printf("Stopped clock.\n");
  
	/* Display output */
	print_X(X);

	/* Display timing results */
	printf("\nElapsed time = %g s\n", end_t - start_t);
	printf("--------------------------------------------\n");
  } else {
	workerGauss(my_rank, p);
  }
  
  MPI_Finalize();
}
int main(int argc, char **argv) {

    /* Prototype functions*/
    void gauss();

    MPI_Init(&argc, &argv);

    /* Get my process rank */
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    /* Find out how many processes are being used */
    MPI_Comm_size(MPI_COMM_WORLD, &p);

    printf("\nProcess number %d of %d says hi\n",
            my_rank+1, p);

    /* Every process reads the parameters to prepare dimension */
    parameters(argc, argv);

    /* Every process must allocate memory for the arrays */
    allocate_memory();

    if ( my_rank == SOURCE ) {
        /* Initialize A and B */
        initialize_inputs();

        /* Print input matrices */
        print_inputs();
    }

    /*printf("\nProcess number %d of %d says hi\n",
            my_rank+1, p);*/

    gauss();

    if ( my_rank == SOURCE ) {

        /* Print input matrices */
        print_A();
        print_B();
        print_X();
    }

    /* The barrier prevents any process to reach the finalize before the others have finished their communications */
    MPI_Barrier(MPI_COMM_WORLD);

    /* Free memory used for the arrays that we allocated previously */
    free_memory();

    MPI_Finalize();
}
Beispiel #7
0
int main(int argc, char **argv) {
  /* Timing variables */
  struct timeval etstart, etstop;  /* Elapsed times using gettimeofday() */
  struct timezone tzdummy;
  clock_t etstart2, etstop2;  /* Elapsed times using times() */
  unsigned long long usecstart, usecstop;
  struct tms cputstart, cputstop;  /* CPU times for my processes */

  ID = argv[argc-1];
  argc--;

  /* Process program parameters */
  parameters(argc, argv);

  /* Initialize A and B */
  initialize_inputs();

  /* Print input matrices */
  print_inputs();

  /* Start Clock */
  printf("\nStarting clock.\n");
  gettimeofday(&etstart, &tzdummy);
  etstart2 = times(&cputstart);

  /* Gaussian Elimination */
  gauss();

  /* Stop Clock */
  gettimeofday(&etstop, &tzdummy);
  etstop2 = times(&cputstop);
  printf("Stopped clock.\n");
  usecstart = (unsigned long long)etstart.tv_sec * 1000000 + etstart.tv_usec;
  usecstop = (unsigned long long)etstop.tv_sec * 1000000 + etstop.tv_usec;

  /* Display output */
  print_X();

  /* Display timing results */
  printf("\nElapsed time = %g ms.\n",
	 (float)(usecstop - usecstart)/(float)1000);


}
Beispiel #8
0
int main(int argc, char **argv) {
    ID = argv[argc-1];
    argc--;
    
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    MPI_Comm_size(MPI_COMM_WORLD, &procs);
    printf("\nProcess number %d", myid);
    /* Process program parameters */
    parameters(argc, argv);
    
    //alocate memory
    A = (float*)malloc(N*N*sizeof(float));
    B = (float*)malloc(N*sizeof(float));
    X = (float*)malloc(N*sizeof(float));
    
    /* Initialize A and B */
    if (myid == 0) {
        initialize_inputs();
        
        /* Print input matrices */
        print_inputs();
    }
    /* Gaussian Elimination */
    gauss();
    /* Back substitution */
    if (myid == 0) {
        int row, col;
        for (row = N - 1; row >= 0; row--) {
            X[row] = B[row];
            for (col = N-1; col > row; col--) {
                X[row] -= A[row*N + col] * X[col];
            }
            X[row] /= A[row * N + row];
        }
        /* Display output */
        print_X();
    }
    free(A);
    free(B);
    free(X);
    MPI_Finalize();
    return 0;
}
int main(int argc, char **argv) {

  
  

  /* Gaussian Elimination */
    int         my_rank;   /* My process rank           */

    int         p;         /* The number of processes   */
	
	int         norm;      /* The number of rows        */

                           /* calculated	            */

    int         row;       /* Row number                */

    int         col;       /* Column number             */
	
    int         source;    /* Process sending integral  */

    int         dest = 0;  /* All messages go to 0      */

    int         tag = 0;
	

    MPI_Status  status;
	
	void Get_data(int my_rank, int p);

    void Compute(int norm, int my_rank, int p);   
	
	MPI_Init(&argc, &argv);
	
    /* Get my process rank */

    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

    /* Find out how many processes are being used */

    MPI_Comm_size(MPI_COMM_WORLD, &p);
        
    /* Timing variables */
    double starttime = 0.0;
    double endtime = 0.0;
	
	
    printf("Computing Parallel via MPI.\n");
	
	
	if (my_rank == 0) {
      
	    /* Start Clock */
        printf("\nStarting clock.\n");
	
        starttime = MPI_Wtime();
	
	    /* Broadcast the value of N to all nodes */
     	MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
  


      /* Initialize A and B */
      initialize_inputs();

      /* Print input matrices */
      print_inputs();
  
  

  } else
  
	    /* Receive the broadcast N value */	
     	MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);

    /* Updating needed data in correspoding row of A and B for each process */
	
    Send_data(my_rank, p);
	
	/* Gauss elimination */
	for (norm = 0; norm < N - 1; norm++) {
	  int i;
	  for (i = norm; i < N; i++) {
	    MPI_Bcast(A[i], N, MPI_FLOAT, i%p, MPI_COMM_WORLD);
		MPI_Bcast(&B[i], 1, MPI_FLOAT, i%p, MPI_COMM_WORLD);
	  }
	  
	  Compute(norm, my_rank, p);
	  
	  MPI_Barrier(MPI_COMM_WORLD);
	  
	}
	
	MPI_Bcast(A[N-1], N, MPI_FLOAT, (N-1)%p, MPI_COMM_WORLD);
    MPI_Bcast(&B[N-1], 1, MPI_FLOAT, (N-1)%p, MPI_COMM_WORLD);
	
	if (my_rank == 0) {
	
	    int row, col;

        /* Back substitution */
		
		for (row = N - 1; row >= 0; row--) {
          X[row] = B[row];
          for (col = N-1; col > row; col--) {
            X[row] -= A[row][col] * X[col];
          }
          X[row] /= A[row][row];
        }

    
  
    /* Stop Clock */
	
    endtime = MPI_Wtime();
	
    /* Display timing results */
	
    printf("That tooks %f seconds.\n", endtime-starttime);  

    /* Display output */
    print_X();
	
  }
	
  /* Shut down MPI */

  MPI_Finalize();

  exit(0);
}
Beispiel #10
0
main(int argc, char **argv) {

//declare the required data structures

  int N =32;  /* Matrix size */

  /* Matrices and vectors */
  float *A= malloc(MAXN*MAXN);
  int i,j;
   
   //code commented. was used for testing.
  /*
  float temp[64] = {1,2,3,4,5,6,7,8,
		  2,3,4,1,7,4,5,6,
		  2,3,2,1,2,2,1,1,
		  4,5,4,5,5,3,4,2,
		  1,4,8,4,3,7,6,6,
		  9,7,7,3,2,8,5,4,
		  8,6,4,1,1,5,3,3,
		  8,3,2,6,4,6,9,7};

 for(i=0;i<N;i++){
	for(j=0;j<N;j++)
		{
		*(A+((N*i)+j))=temp[i*N+j];
		//printf(" %f",*(A+((8*i)+j)));
		}
	//printf("\n");	
	}

*/
float B[MAXN];// = {5,6,7,3,5,2,9,5};
float X[MAXN];// = {0,0,0,0,0,0,0,0};


  int my_rank=0;   /* My process rank           */
  int p;         /* The number of processes   */
  
  //clock time recording variables
  double      start_time,end_time=0.0; 

///////////////////MPI code starts////////////////////


  //status variable used to check status of communication operation.	 
  MPI_Status  status;

  /* Let the system do what it needs to start up MPI */
   MPI_Init(&argc, &argv);

  /* Get my process rank */
   MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

  /* Find out how many processes are being used */
   MPI_Comm_size(MPI_COMM_WORLD, &p);
     
  if(my_rank==0)
  {
  /* Process program parameters */
  N = parameters(argc, argv);
	
  /* Initialize A and B */

  initialize_inputs(A, B, X,N);

  /* Print input matrices */
  print_inputs(A, B,N);


  //Start clock and record the start time.
  start_time = MPI_Wtime();

} 

  //broadcast the size of the matrix read by the to all processes.
  MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);

  //we need all processes to wait here until all others arrive. 
  //we need to make sure that the input matrix has been initialized
  //by process 0 and the marix size has been propogated to all processes.
  MPI_Barrier(MPI_COMM_WORLD);


  //declare the local variables
   int local_no_of_rows;  //number of rows to be processesd by each process
   int local_matrix_size; //size of the matrix
   float local_norm_row[N]; //the current normaization row
   float local_matrix_A[N][N]; //the part of A matrix on which each process will work
   float local_matrix_B[N];  //the part of B matrix on which each process will work
   int rows_per_process[p];  //the number of rows distributed to each process
   float local_norm_B;      //the element on which B will be normalized
   int displ[p];	    //displacement variable
   int norm=0;              //the index of the current normalizing row
 
 //lets begin. The loop is outermost loop of Gaussian elimination operation.
 for (norm = 0; norm < N - 1; norm++) {

   //lets scatter the data accross all processes.   	
   //This method scatters the matrix A, and broadcasts the current normalizing row,
   // number of rows each process will work on.
   scatter_data(norm,
		my_rank,
		p,
		A,
		N,
		&local_no_of_rows,
		&local_matrix_size,
		local_norm_row,
		&(local_matrix_A[0][0]),
		&rows_per_process[0]);
   
   //lets calculate the send counts and displacement vector for scatter of B matrix.
   if(my_rank==0)
   {
	//printf(" %d", *(rows_per_process));
     *(displ)=0;
     for(j=1;j<p;j++)
		{
		 *(displ+j) = rows_per_process[j-1]+ *(displ+j-1);
		 //printf(" %d", *(rows_per_process+j));	
		}
   }
   
   //This method call scatter the matrix B. Different processes may have different
   //number of elements to work on, when the size of matrix is not completely divisible
   //by number of processes. Hence we have used MPI_Scatterv(), instead of MPI_Scatter
   MPI_Scatterv(B+norm+1, rows_per_process, displ, MPI_FLOAT,local_matrix_B,local_no_of_rows, MPI_FLOAT, 
                                                              0, MPI_COMM_WORLD); 

   //lets broadcast the element against which matrix B will be normalized.
   local_norm_B = B[norm];
   MPI_Bcast(&local_norm_B, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
   
   //each process performs the following elimination operation on their
   //share of the matrix A and B.
   eliminate(local_matrix_size,
		local_no_of_rows, 
		&local_norm_row[0], 
		&(local_matrix_A[0][0]),
		norm,
		&(local_matrix_B[0]),
		local_norm_B);   

   //we need to calculate the counts and displacement for the Gather operation
   //of the processed matrix A, after each iteration.
   int counts_for_gather[p];
     int displacements_for_gather[p];
    if(my_rank==0)
   {
     
     *(displacements_for_gather)=0;
     counts_for_gather[0] = rows_per_process[0]*local_matrix_size;
  	
     for(j=1;j<p;j++)
		{
		counts_for_gather[j] = rows_per_process[j]*local_matrix_size;
		 *(displacements_for_gather+j) = counts_for_gather[j-1]+ *(displacements_for_gather+j-1);
		}
   }


   //here we gather the processed matrix A from all processes and store it locally
   MPI_Gatherv(local_matrix_A,
		local_no_of_rows*local_matrix_size, 
		MPI_FLOAT,
                A+(N*(norm+1)),
		counts_for_gather, 
		displacements_for_gather,
                MPI_FLOAT, 
		0, 
		MPI_COMM_WORLD);

   //similarly we gather the processed matrix B.
   MPI_Gatherv(local_matrix_B,
		local_no_of_rows, 
		MPI_FLOAT,
                B+norm+1,
		rows_per_process, 
		displ,
                MPI_FLOAT, 
		0, 
		MPI_COMM_WORLD);



 }

  //We need to wait for al processes to complete before we go ahead with
  //back subsitution.
  MPI_Barrier(MPI_COMM_WORLD);

  //perform the back substitution operation only by process 0.
  int row,col;
  if(my_rank==0){
  /* Back substitution */
  for (row = N - 1; row >= 0; row--) {
    X[row] = B[row];
    for (col = N-1; col > row; col--) {
      X[row] -= *(A+(N*row)+col) * X[col];
    }
    X[row] /= *(A+(N*row)+col);
  }


  //Stop clock as operation is finished.
  end_time = MPI_Wtime();  
	
  //display X in matrix size is small.
  if (N < 100) {
    printf("\nX = [");
    for (row = 0; row < N; row++) {
      printf("%5.2f%s", X[row], (row < N-1) ? "; " : "]\n");
    }
  }

  //print the execution time for performance analysis purpose.
  printf("\n\nThe total execution time as recorded on process 0 = %f seconds!!\n!",end_time-start_time);
  
}
  MPI_Finalize();  

}
Beispiel #11
0
void main(int argc, char **argv)
{
/*               Elapsed times using <gettimeofday()>.                */
 struct timeval etstart, etstop;
 struct timezone tzdummy;
 clock_t etstartt, etstoptt;

/*                 Elapsed times using <times()>.                     */
 unsigned long usecstart, usecstop;

/*                  CPU times for the threads.                        */
 struct tms cputstart, cputstop;

 int row, col;


 parameters(argc, argv);
 initialise_inputs();
 print_inputs();

 CurrentRow = Norm+1;
 Count = NumThreads-1;

 printf("Starting clock ...\n");
 gettimeofday(&etstart, &tzdummy);
 etstartt = times(&cputstart);

 create_threads();

 wait_for_threads();

/*
 * Diagonal elements are not normalised to 1.
 * This is treated in back substitution.
 */

/*                         Back substitution.                         */
 for (row = N-1; row >= 0; row--)
 {
  X[row] = B[row];
  for (col = N-1; col > row; col--)
  X[row] -= A[row][col]*X[col];
  X[row] /= A[row][row];
  }

 gettimeofday(&etstop, &tzdummy);
 etstoptt = times(&cputstop);
 printf("Stopped clock.\n");
 
 usecstart = (unsigned long)etstart.tv_sec*1000000+etstart.tv_usec;
 usecstop = (unsigned long)etstop.tv_sec*1000000+etstop.tv_usec;

 print_X();

 printf("Elapsed time = %g ms.\n", 
(float)(usecstop-usecstart)/(float)1000);

 printf("Elapsed time according to <times()> = %g ms.\n", 
(etstoptt-etstartt)/(float)CLK_TCK*1000);

 printf("CPU times are accurate to the nearest %g ms.\n", 
1.0/(float)CLK_TCK*1000.0);

 printf("The total CPU time for parent = %g ms.\n", 
(float)((cputstop.tms_utime+cputstop.tms_stime)-
(cputstart.tms_utime+cputstart.tms_stime))/(float)CLK_TCK*1000);

 printf("The system CPU time for parent = %g ms.\n", 
(float)(cputstop.tms_stime-cputstart.tms_stime)/(float)CLK_TCK*1000);
 }
Beispiel #12
0
int main( int argc, char* argv[] )
{
	// =====================================================================
	// Initialization & Command Line Read-In
	// =====================================================================
	int version = 13;
	int mype = 0;
	int max_procs = omp_get_num_procs();
	int i, thread, mat;
	unsigned long seed;
	double omp_start, omp_end, p_energy;
	unsigned long long vhash = 0;
	int nprocs;

	#ifdef MPI
	MPI_Status stat;
	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
	MPI_Comm_rank(MPI_COMM_WORLD, &mype);
	#endif

	// rand() is only used in the serial initialization stages.
	// A custom RNG is used in parallel portions.
	#ifdef VERIFICATION
	srand(26);
	#else
	srand(time(NULL));
	#endif

	// Process CLI Fields -- store in "Inputs" structure
	Inputs in = read_CLI( argc, argv );

	// Set number of OpenMP Threads
	omp_set_num_threads(in.nthreads);

	// Print-out of Input Summary
	if( mype == 0 )
		print_inputs( in, nprocs, version );

	// =====================================================================
	// Prepare Nuclide Energy Grids, Unionized Energy Grid, & Material Data
	// =====================================================================

	// Allocate & fill energy grids
	#ifndef BINARY_READ
	if( mype == 0) printf("Generating Nuclide Energy Grids...\n");
	#endif

	NuclideGridPoint ** nuclide_grids = gpmatrix(in.n_isotopes,in.n_gridpoints);

	#ifdef VERIFICATION
	generate_grids_v( nuclide_grids, in.n_isotopes, in.n_gridpoints );
	#else
	generate_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints );
	#endif

	// Sort grids by energy
	#ifndef BINARY_READ
	if( mype == 0) printf("Sorting Nuclide Energy Grids...\n");
	sort_nuclide_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints );
	#endif

	// Prepare Unionized Energy Grid Framework
	#ifndef BINARY_READ
	GridPoint * energy_grid = generate_energy_grid( in.n_isotopes,
	                          in.n_gridpoints, nuclide_grids );
	#else
	GridPoint * energy_grid = (GridPoint *)malloc( in.n_isotopes *
	                           in.n_gridpoints * sizeof( GridPoint ) );
	int * index_data = (int *) malloc( in.n_isotopes * in.n_gridpoints
	                   * in.n_isotopes * sizeof(int));
	for( i = 0; i < in.n_isotopes*in.n_gridpoints; i++ )
		energy_grid[i].xs_ptrs = &index_data[i*in.n_isotopes];
	#endif

	// Double Indexing. Filling in energy_grid with pointers to the
	// nuclide_energy_grids.
	#ifndef BINARY_READ
	set_grid_ptrs( energy_grid, nuclide_grids, in.n_isotopes, in.n_gridpoints );
	#endif

	#ifdef BINARY_READ
	if( mype == 0 ) printf("Reading data from \"XS_data.dat\" file...\n");
	binary_read(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid);
	#endif

	// Get material data
	if( mype == 0 )
		printf("Loading Mats...\n");
	int *num_nucs  = load_num_nucs(in.n_isotopes);
	int **mats     = load_mats(num_nucs, in.n_isotopes);

	#ifdef VERIFICATION
	double **concs = load_concs_v(num_nucs);
	#else
	double **concs = load_concs(num_nucs);
	#endif

	#ifdef BINARY_DUMP
	if( mype == 0 ) printf("Dumping data to binary file...\n");
	binary_dump(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid);
	if( mype == 0 ) printf("Binary file \"XS_data.dat\" written! Exiting...\n");
	return 0;
	#endif

	// =====================================================================
	// Cross Section (XS) Parallel Lookup Simulation Begins
	// =====================================================================

	// Outer benchmark loop can loop through all possible # of threads
	#ifdef BENCHMARK
	for( int bench_n = 1; bench_n <=omp_get_num_procs(); bench_n++ )
	{
		in.nthreads = bench_n;
		omp_set_num_threads(in.nthreads);
 	#endif

	if( mype == 0 )
	{
		printf("\n");
		border_print();
		center_print("SIMULATION", 79);
		border_print();
	}

	omp_start = omp_get_wtime();

	//initialize papi with one thread (master) here
	#ifdef PAPI
	if ( PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT){
		fprintf(stderr, "PAPI library init error!\n");
		exit(1);
	}
	#endif

	// OpenMP compiler directives - declaring variables as shared or private
	#pragma omp parallel default(none) \
	private(i, thread, p_energy, mat, seed) \
	shared( max_procs, in, energy_grid, nuclide_grids, \
	        mats, concs, num_nucs, mype, vhash)
	{
		// Initialize parallel PAPI counters
		#ifdef PAPI
		int eventset = PAPI_NULL;
		int num_papi_events;
		#pragma omp critical
		{
			counter_init(&eventset, &num_papi_events);
		}
		#endif

		double macro_xs_vector[5];
		double * xs = (double *) calloc(5, sizeof(double));

		// Initialize RNG seeds for threads
		thread = omp_get_thread_num();
		seed   = (thread+1)*19+17;

		// XS Lookup Loop
		#pragma omp for schedule(dynamic)
		for( i = 0; i < in.lookups; i++ )
		{
			// Status text
			if( INFO && mype == 0 && thread == 0 && i % 1000 == 0 )
				printf("\rCalculating XS's... (%.0lf%% completed)",
						(i / ( (double)in.lookups / (double) in.nthreads ))
						/ (double) in.nthreads * 100.0);

			// Randomly pick an energy and material for the particle
			#ifdef VERIFICATION
			#pragma omp critical
			{
				p_energy = rn_v();
				mat      = pick_mat(&seed);
			}
			#else
			p_energy = rn(&seed);
			mat      = pick_mat(&seed);
			#endif

			// debugging
			//printf("E = %lf mat = %d\n", p_energy, mat);

			// This returns the macro_xs_vector, but we're not going
			// to do anything with it in this program, so return value
			// is written over.
			calculate_macro_xs( p_energy, mat, in.n_isotopes,
			                    in.n_gridpoints, num_nucs, concs,
			                    energy_grid, nuclide_grids, mats,
                                macro_xs_vector );

			// Copy results from above function call onto heap
			// so that compiler cannot optimize function out
			// (only occurs if -flto flag is used)
			memcpy(xs, macro_xs_vector, 5*sizeof(double));

			// Verification hash calculation
			// This method provides a consistent hash accross
			// architectures and compilers.
			#ifdef VERIFICATION
			char line[256];
			sprintf(line, "%.5lf %d %.5lf %.5lf %.5lf %.5lf %.5lf",
			       p_energy, mat,
				   macro_xs_vector[0],
				   macro_xs_vector[1],
				   macro_xs_vector[2],
				   macro_xs_vector[3],
				   macro_xs_vector[4]);
			unsigned long long vhash_local = hash(line, 10000);
			#pragma omp atomic
			vhash += vhash_local;
			#endif
		}

		// Prints out thread local PAPI counters
		#ifdef PAPI
		if( mype == 0 && thread == 0 )
		{
			printf("\n");
			border_print();
			center_print("PAPI COUNTER RESULTS", 79);
			border_print();
			printf("Count          \tSmybol      \tDescription\n");
		}
		{
		#pragma omp barrier
		}
		counter_stop(&eventset, num_papi_events);
		#endif

	}

	#ifndef PAPI
	if( mype == 0)
	{
		printf("\n" );
		printf("Simulation complete.\n" );
	}
	#endif

	omp_end = omp_get_wtime();

	// Print / Save Results and Exit
	print_results( in, mype, omp_end-omp_start, nprocs, vhash );

	#ifdef BENCHMARK
	}
	#endif

	#ifdef MPI
	MPI_Finalize();
	#endif

	return 0;
}
Beispiel #13
0
int main(int argc, char** argv)
{
    int my_rank, size, i;

    //char msg0[] = "Wasssssaaaaap Ich hasse mein Leif Hundin!\n";
    //char msg1[] = "Lol I hate my life\n";
    //char rcv0[100], rcv1[100];

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    printf("Started Process %d of %d\n", my_rank, (size - 1));

    if (my_rank == 0)
    {
        printf("Computing in Parallel on %d Processes\n", size);

        /* Process program parameters */
        parameters(argc, argv);

        /* Initialize A and B */
        initialize_inputs();

        /* Print input matrices */
        print_inputs();

        /* Gaussian elimination */
        for (norm = 0; norm < N - 1; norm++) /*Proceeding sequentially on each norm row because of
                          *Read-After-Write dependence between each norm variable iteration.
                          */
        {
            i = 0;
            for (row = norm + 1; row < N; row += blockSize) /*Putting values in the 'inidices' dynamic array described above.
                                 *Note that this loop increments with a step size equal to the blockSize value
                                 *which is the number of rows each thread will be handling.
                                 */
            {
                indices[3 * i] = row; /*First value storing the starting row index.*/

                if ((row + blockSize - 1) < N) /*Second value stores the ending row index.*/
                    indices[3 * i + 1] = row + blockSize - 1;
                else
                    indices[3 * i + 1] = N - 1;

                indices[3 * i + 2] = norm; /*Third value stores value of current normalization row index.*/
                i++;
            }

        numCPU = i; /*Ensures that number of threads launched is equal to the number of proceesing lbocks made.*/

            for (i = 0; i < numCPU; i++)
            {
                pthread_create(rowThreads + i, NULL, processRows, (indices + 3 * i)); /*Launching each thread to operate on different parts of the array*/
            }

            for (i = 0; i < numCPU; i++)
            {
                pthread_join(*(rowThreads + i), NULL); /*Consolidating all threads*/
            }
        }
        /* (Diagonal elements are not normalized to 1.  This is treated in back
         * substitution.)
         */

        for (i = 1; i < size; i++)
        {
            MPI_Send(A, (MAXN*MAXN), MPI_FLOAT, i, 0, MPI_COMM_WORLD);
            MPI_Send(B, MAXN, MPI_FLOAT, i, 1, MPI_COMM_WORLD);
            MPI_Send(&N, 1, MPI_INT, i, 2, MPI_COMM_WORLD);
            printf("Data sent to processor %d!\n", i);
        }
    }
    else
    {
        MPI_Recv(A, (MAXN*MAXN), MPI_FLOAT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Recv(B, MAXN, MPI_FLOAT, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Recv(&N, 1, MPI_INT, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        printf("Received size of data (Value of N) = %d\n", N);
        printf("Received data with tag 0 & 1\n");
        print_inputs();
    }

    MPI_Finalize();

    return 0;
}