int main(int argc, char **argv) { /* Timing variables */ struct timeval etstart, etstop; /* Elapsed times using gettimeofday() */ struct timezone tzdummy; clock_t etstart2, etstop2; /* Elapsed times using times() */ unsigned long long usecstart, usecstop; struct tms cputstart, cputstop; /* CPU times for my processes */ /* Process program parameters */ parameters(argc, argv); /* Initialize A and B */ initialize_inputs(); /* Print input matrices */ print_inputs(); /* Start Clock */ printf("\nStarting clock.\n"); gettimeofday(&etstart, &tzdummy); etstart2 = times(&cputstart); /* Gaussian Elimination */ gauss(); /* Stop Clock */ gettimeofday(&etstop, &tzdummy); etstop2 = times(&cputstop); printf("Stopped clock.\n"); usecstart = (unsigned long long)etstart.tv_sec * 1000000 + etstart.tv_usec; usecstop = (unsigned long long)etstop.tv_sec * 1000000 + etstop.tv_usec; /* Display output */ print_X(); /* Display timing results */ printf("\nElapsed time = %g ms.\n", (float)(usecstop - usecstart)/(float)1000); printf("(CPU times are accurate to the nearest %g ms)\n", 1.0/(float)CLOCKS_PER_SEC * 1000.0); printf("My total CPU time for parent = %g ms.\n", (float)( (cputstop.tms_utime + cputstop.tms_stime) - (cputstart.tms_utime + cputstart.tms_stime) ) / (float)CLOCKS_PER_SEC * 1000); printf("My system CPU time for parent = %g ms.\n", (float)(cputstop.tms_stime - cputstart.tms_stime) / (float)CLOCKS_PER_SEC * 1000); printf("My total CPU time for child processes = %g ms.\n", (float)( (cputstop.tms_cutime + cputstop.tms_cstime) - (cputstart.tms_cutime + cputstart.tms_cstime) ) / (float)CLOCKS_PER_SEC * 1000); /* Contrary to the man pages, this appears not to include the parent */ printf("--------------------------------------------\n"); exit(0); }
int main(int argc, char **argv) { // /* Timing variables */ // struct timeval etstart, etstop; /* Elapsed times using gettimeofday() */ // struct timezone tzdummy; // clock_t etstart2, etstop2; /* Elapsed times using times() */ // unsigned long long usecstart, usecstop; // struct tms cputstart, cputstop; /* CPU times for my processes */ argc--; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &id); MPI_Comm_size(MPI_COMM_WORLD, &procs); /* Process program parameters */ parameters(argc, argv); if(id == 0) { /* Initialize A and B */ initialize_inputs(); /* Print input matrices */ print_inputs(); } /* Gaussian Elimination */ gauss(); // if(id == 0) { // /* Display output */ // print_X(); // // gauss_test(); // // /* Compare the result*/ // // int right = 1; // // int j = 0; // // for(; j < N; j++) { // // float dif = X[j] - X1[j]; // // if (dif < 0) dif = -dif; // // if (dif > 0.0001) { // // printf("X: %f\n", X[j]); // // printf("X1: %f\n", X1[j]); // // right = 0; // // break; // // } // // } // printf("right: %d\n",right); // if(right == 1) printf("\nRight!\n"); // else printf("\nWrong!\n"); // } MPI_Finalize(); return 0; }
int main(int argc, char **argv) { parameters(argc, argv); initialize_inputs(); print_inputs(); gauss(); print_X(); exit(0); }
/* Provided global variables are MAXN, N, A[][], B[], and X[], * defined in the beginning of this code. X[] is initialized to zeros. */ void gauss( float A[], float B[], float X[], int my_rank, int p) { int norm, row, col, i, j; float multiplier, b_norm; int end_row; MPI_Status status; /*Slice up the rows */ int row_workload = (N-1)/p; int excess_work = (N-1)%p; end_row = row_workload + excess_work; for(i = 1; i < p; i++) { MPI_Ssend(&A[(N*row_workload*i)+(N*excess_work)+N], row_workload*N, MPI_FLOAT, i, 0, MPI_COMM_WORLD); MPI_Ssend(&B[row_workload*i+excess_work+1], row_workload, MPI_FLOAT, i, 0, MPI_COMM_WORLD); } for(norm = 0; norm <= row_workload + excess_work; norm++) { /* If you have the latest norm vector */ if(norm <= row_workload + excess_work) { /* Send it to all ranks after yours */ for(i = my_rank+1; i < p; i++) { b_norm = B[norm]; MPI_Ssend(&A[(N*norm)+norm], N-norm, MPI_FLOAT, i, 1, MPI_COMM_WORLD); MPI_Ssend(&b_norm, 1, MPI_FLOAT, i, 1, MPI_COMM_WORLD); } for (row = norm + 1; row <= end_row; row++) { multiplier = A[N*row+norm] / A[N*norm+norm]; if(row <= end_row) { for (col = norm; col < N; col++) { A[N*row+col] -= A[N*norm+col] * multiplier; } } B[row] -= B[norm] * multiplier; } } } for(i = 1; i < p; i++) { MPI_Recv(&A[(N*row_workload*i)+(N*excess_work)+N], row_workload*N, MPI_FLOAT, i, 3, MPI_COMM_WORLD, &status); MPI_Recv(&B[(row_workload*i)+excess_work+1], row_workload, MPI_FLOAT, i, 3, MPI_COMM_WORLD, &status); } print_inputs(A,B); for (row = N - 1; row >= 0; row--) { X[row] = B[row]; for (col = N-1; col > row; col--) { X[row] -= A[(N*row)+col] * X[col]; } X[row] /= A[(N*row)+row]; } }
int main(int argc, char **argv) { /* Timing variables */ double start_t; double end_t; /* MPI Variables */ int my_rank; int p; int dest = 0; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &p); if(my_rank == 0) { parameters(argc, argv); } MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD); if(my_rank == 0) { float A[N*N], B[N], X[N]; /* Initialize A and B */ initialize_inputs(A, B, X); /* Print input matrices */ print_inputs(A, B); /* Start Clock */ printf("\nStarting clock.\n"); start_t = MPI_Wtime(); gauss(A, B, X, my_rank, p); /* Stop Clock */ end_t = MPI_Wtime(); printf("Stopped clock.\n"); /* Display output */ print_X(X); /* Display timing results */ printf("\nElapsed time = %g s\n", end_t - start_t); printf("--------------------------------------------\n"); } else { workerGauss(my_rank, p); } MPI_Finalize(); }
int main(int argc, char **argv) { /* Prototype functions*/ void gauss(); MPI_Init(&argc, &argv); /* Get my process rank */ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* Find out how many processes are being used */ MPI_Comm_size(MPI_COMM_WORLD, &p); printf("\nProcess number %d of %d says hi\n", my_rank+1, p); /* Every process reads the parameters to prepare dimension */ parameters(argc, argv); /* Every process must allocate memory for the arrays */ allocate_memory(); if ( my_rank == SOURCE ) { /* Initialize A and B */ initialize_inputs(); /* Print input matrices */ print_inputs(); } /*printf("\nProcess number %d of %d says hi\n", my_rank+1, p);*/ gauss(); if ( my_rank == SOURCE ) { /* Print input matrices */ print_A(); print_B(); print_X(); } /* The barrier prevents any process to reach the finalize before the others have finished their communications */ MPI_Barrier(MPI_COMM_WORLD); /* Free memory used for the arrays that we allocated previously */ free_memory(); MPI_Finalize(); }
int main(int argc, char **argv) { /* Timing variables */ struct timeval etstart, etstop; /* Elapsed times using gettimeofday() */ struct timezone tzdummy; clock_t etstart2, etstop2; /* Elapsed times using times() */ unsigned long long usecstart, usecstop; struct tms cputstart, cputstop; /* CPU times for my processes */ ID = argv[argc-1]; argc--; /* Process program parameters */ parameters(argc, argv); /* Initialize A and B */ initialize_inputs(); /* Print input matrices */ print_inputs(); /* Start Clock */ printf("\nStarting clock.\n"); gettimeofday(&etstart, &tzdummy); etstart2 = times(&cputstart); /* Gaussian Elimination */ gauss(); /* Stop Clock */ gettimeofday(&etstop, &tzdummy); etstop2 = times(&cputstop); printf("Stopped clock.\n"); usecstart = (unsigned long long)etstart.tv_sec * 1000000 + etstart.tv_usec; usecstop = (unsigned long long)etstop.tv_sec * 1000000 + etstop.tv_usec; /* Display output */ print_X(); /* Display timing results */ printf("\nElapsed time = %g ms.\n", (float)(usecstop - usecstart)/(float)1000); }
int main(int argc, char **argv) { ID = argv[argc-1]; argc--; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_size(MPI_COMM_WORLD, &procs); printf("\nProcess number %d", myid); /* Process program parameters */ parameters(argc, argv); //alocate memory A = (float*)malloc(N*N*sizeof(float)); B = (float*)malloc(N*sizeof(float)); X = (float*)malloc(N*sizeof(float)); /* Initialize A and B */ if (myid == 0) { initialize_inputs(); /* Print input matrices */ print_inputs(); } /* Gaussian Elimination */ gauss(); /* Back substitution */ if (myid == 0) { int row, col; for (row = N - 1; row >= 0; row--) { X[row] = B[row]; for (col = N-1; col > row; col--) { X[row] -= A[row*N + col] * X[col]; } X[row] /= A[row * N + row]; } /* Display output */ print_X(); } free(A); free(B); free(X); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { /* Gaussian Elimination */ int my_rank; /* My process rank */ int p; /* The number of processes */ int norm; /* The number of rows */ /* calculated */ int row; /* Row number */ int col; /* Column number */ int source; /* Process sending integral */ int dest = 0; /* All messages go to 0 */ int tag = 0; MPI_Status status; void Get_data(int my_rank, int p); void Compute(int norm, int my_rank, int p); MPI_Init(&argc, &argv); /* Get my process rank */ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* Find out how many processes are being used */ MPI_Comm_size(MPI_COMM_WORLD, &p); /* Timing variables */ double starttime = 0.0; double endtime = 0.0; printf("Computing Parallel via MPI.\n"); if (my_rank == 0) { /* Start Clock */ printf("\nStarting clock.\n"); starttime = MPI_Wtime(); /* Broadcast the value of N to all nodes */ MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD); /* Initialize A and B */ initialize_inputs(); /* Print input matrices */ print_inputs(); } else /* Receive the broadcast N value */ MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD); /* Updating needed data in correspoding row of A and B for each process */ Send_data(my_rank, p); /* Gauss elimination */ for (norm = 0; norm < N - 1; norm++) { int i; for (i = norm; i < N; i++) { MPI_Bcast(A[i], N, MPI_FLOAT, i%p, MPI_COMM_WORLD); MPI_Bcast(&B[i], 1, MPI_FLOAT, i%p, MPI_COMM_WORLD); } Compute(norm, my_rank, p); MPI_Barrier(MPI_COMM_WORLD); } MPI_Bcast(A[N-1], N, MPI_FLOAT, (N-1)%p, MPI_COMM_WORLD); MPI_Bcast(&B[N-1], 1, MPI_FLOAT, (N-1)%p, MPI_COMM_WORLD); if (my_rank == 0) { int row, col; /* Back substitution */ for (row = N - 1; row >= 0; row--) { X[row] = B[row]; for (col = N-1; col > row; col--) { X[row] -= A[row][col] * X[col]; } X[row] /= A[row][row]; } /* Stop Clock */ endtime = MPI_Wtime(); /* Display timing results */ printf("That tooks %f seconds.\n", endtime-starttime); /* Display output */ print_X(); } /* Shut down MPI */ MPI_Finalize(); exit(0); }
main(int argc, char **argv) { //declare the required data structures int N =32; /* Matrix size */ /* Matrices and vectors */ float *A= malloc(MAXN*MAXN); int i,j; //code commented. was used for testing. /* float temp[64] = {1,2,3,4,5,6,7,8, 2,3,4,1,7,4,5,6, 2,3,2,1,2,2,1,1, 4,5,4,5,5,3,4,2, 1,4,8,4,3,7,6,6, 9,7,7,3,2,8,5,4, 8,6,4,1,1,5,3,3, 8,3,2,6,4,6,9,7}; for(i=0;i<N;i++){ for(j=0;j<N;j++) { *(A+((N*i)+j))=temp[i*N+j]; //printf(" %f",*(A+((8*i)+j))); } //printf("\n"); } */ float B[MAXN];// = {5,6,7,3,5,2,9,5}; float X[MAXN];// = {0,0,0,0,0,0,0,0}; int my_rank=0; /* My process rank */ int p; /* The number of processes */ //clock time recording variables double start_time,end_time=0.0; ///////////////////MPI code starts//////////////////// //status variable used to check status of communication operation. MPI_Status status; /* Let the system do what it needs to start up MPI */ MPI_Init(&argc, &argv); /* Get my process rank */ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* Find out how many processes are being used */ MPI_Comm_size(MPI_COMM_WORLD, &p); if(my_rank==0) { /* Process program parameters */ N = parameters(argc, argv); /* Initialize A and B */ initialize_inputs(A, B, X,N); /* Print input matrices */ print_inputs(A, B,N); //Start clock and record the start time. start_time = MPI_Wtime(); } //broadcast the size of the matrix read by the to all processes. MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD); //we need all processes to wait here until all others arrive. //we need to make sure that the input matrix has been initialized //by process 0 and the marix size has been propogated to all processes. MPI_Barrier(MPI_COMM_WORLD); //declare the local variables int local_no_of_rows; //number of rows to be processesd by each process int local_matrix_size; //size of the matrix float local_norm_row[N]; //the current normaization row float local_matrix_A[N][N]; //the part of A matrix on which each process will work float local_matrix_B[N]; //the part of B matrix on which each process will work int rows_per_process[p]; //the number of rows distributed to each process float local_norm_B; //the element on which B will be normalized int displ[p]; //displacement variable int norm=0; //the index of the current normalizing row //lets begin. The loop is outermost loop of Gaussian elimination operation. for (norm = 0; norm < N - 1; norm++) { //lets scatter the data accross all processes. //This method scatters the matrix A, and broadcasts the current normalizing row, // number of rows each process will work on. scatter_data(norm, my_rank, p, A, N, &local_no_of_rows, &local_matrix_size, local_norm_row, &(local_matrix_A[0][0]), &rows_per_process[0]); //lets calculate the send counts and displacement vector for scatter of B matrix. if(my_rank==0) { //printf(" %d", *(rows_per_process)); *(displ)=0; for(j=1;j<p;j++) { *(displ+j) = rows_per_process[j-1]+ *(displ+j-1); //printf(" %d", *(rows_per_process+j)); } } //This method call scatter the matrix B. Different processes may have different //number of elements to work on, when the size of matrix is not completely divisible //by number of processes. Hence we have used MPI_Scatterv(), instead of MPI_Scatter MPI_Scatterv(B+norm+1, rows_per_process, displ, MPI_FLOAT,local_matrix_B,local_no_of_rows, MPI_FLOAT, 0, MPI_COMM_WORLD); //lets broadcast the element against which matrix B will be normalized. local_norm_B = B[norm]; MPI_Bcast(&local_norm_B, 1, MPI_FLOAT, 0, MPI_COMM_WORLD); //each process performs the following elimination operation on their //share of the matrix A and B. eliminate(local_matrix_size, local_no_of_rows, &local_norm_row[0], &(local_matrix_A[0][0]), norm, &(local_matrix_B[0]), local_norm_B); //we need to calculate the counts and displacement for the Gather operation //of the processed matrix A, after each iteration. int counts_for_gather[p]; int displacements_for_gather[p]; if(my_rank==0) { *(displacements_for_gather)=0; counts_for_gather[0] = rows_per_process[0]*local_matrix_size; for(j=1;j<p;j++) { counts_for_gather[j] = rows_per_process[j]*local_matrix_size; *(displacements_for_gather+j) = counts_for_gather[j-1]+ *(displacements_for_gather+j-1); } } //here we gather the processed matrix A from all processes and store it locally MPI_Gatherv(local_matrix_A, local_no_of_rows*local_matrix_size, MPI_FLOAT, A+(N*(norm+1)), counts_for_gather, displacements_for_gather, MPI_FLOAT, 0, MPI_COMM_WORLD); //similarly we gather the processed matrix B. MPI_Gatherv(local_matrix_B, local_no_of_rows, MPI_FLOAT, B+norm+1, rows_per_process, displ, MPI_FLOAT, 0, MPI_COMM_WORLD); } //We need to wait for al processes to complete before we go ahead with //back subsitution. MPI_Barrier(MPI_COMM_WORLD); //perform the back substitution operation only by process 0. int row,col; if(my_rank==0){ /* Back substitution */ for (row = N - 1; row >= 0; row--) { X[row] = B[row]; for (col = N-1; col > row; col--) { X[row] -= *(A+(N*row)+col) * X[col]; } X[row] /= *(A+(N*row)+col); } //Stop clock as operation is finished. end_time = MPI_Wtime(); //display X in matrix size is small. if (N < 100) { printf("\nX = ["); for (row = 0; row < N; row++) { printf("%5.2f%s", X[row], (row < N-1) ? "; " : "]\n"); } } //print the execution time for performance analysis purpose. printf("\n\nThe total execution time as recorded on process 0 = %f seconds!!\n!",end_time-start_time); } MPI_Finalize(); }
void main(int argc, char **argv) { /* Elapsed times using <gettimeofday()>. */ struct timeval etstart, etstop; struct timezone tzdummy; clock_t etstartt, etstoptt; /* Elapsed times using <times()>. */ unsigned long usecstart, usecstop; /* CPU times for the threads. */ struct tms cputstart, cputstop; int row, col; parameters(argc, argv); initialise_inputs(); print_inputs(); CurrentRow = Norm+1; Count = NumThreads-1; printf("Starting clock ...\n"); gettimeofday(&etstart, &tzdummy); etstartt = times(&cputstart); create_threads(); wait_for_threads(); /* * Diagonal elements are not normalised to 1. * This is treated in back substitution. */ /* Back substitution. */ for (row = N-1; row >= 0; row--) { X[row] = B[row]; for (col = N-1; col > row; col--) X[row] -= A[row][col]*X[col]; X[row] /= A[row][row]; } gettimeofday(&etstop, &tzdummy); etstoptt = times(&cputstop); printf("Stopped clock.\n"); usecstart = (unsigned long)etstart.tv_sec*1000000+etstart.tv_usec; usecstop = (unsigned long)etstop.tv_sec*1000000+etstop.tv_usec; print_X(); printf("Elapsed time = %g ms.\n", (float)(usecstop-usecstart)/(float)1000); printf("Elapsed time according to <times()> = %g ms.\n", (etstoptt-etstartt)/(float)CLK_TCK*1000); printf("CPU times are accurate to the nearest %g ms.\n", 1.0/(float)CLK_TCK*1000.0); printf("The total CPU time for parent = %g ms.\n", (float)((cputstop.tms_utime+cputstop.tms_stime)- (cputstart.tms_utime+cputstart.tms_stime))/(float)CLK_TCK*1000); printf("The system CPU time for parent = %g ms.\n", (float)(cputstop.tms_stime-cputstart.tms_stime)/(float)CLK_TCK*1000); }
int main( int argc, char* argv[] ) { // ===================================================================== // Initialization & Command Line Read-In // ===================================================================== int version = 13; int mype = 0; int max_procs = omp_get_num_procs(); int i, thread, mat; unsigned long seed; double omp_start, omp_end, p_energy; unsigned long long vhash = 0; int nprocs; #ifdef MPI MPI_Status stat; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &mype); #endif // rand() is only used in the serial initialization stages. // A custom RNG is used in parallel portions. #ifdef VERIFICATION srand(26); #else srand(time(NULL)); #endif // Process CLI Fields -- store in "Inputs" structure Inputs in = read_CLI( argc, argv ); // Set number of OpenMP Threads omp_set_num_threads(in.nthreads); // Print-out of Input Summary if( mype == 0 ) print_inputs( in, nprocs, version ); // ===================================================================== // Prepare Nuclide Energy Grids, Unionized Energy Grid, & Material Data // ===================================================================== // Allocate & fill energy grids #ifndef BINARY_READ if( mype == 0) printf("Generating Nuclide Energy Grids...\n"); #endif NuclideGridPoint ** nuclide_grids = gpmatrix(in.n_isotopes,in.n_gridpoints); #ifdef VERIFICATION generate_grids_v( nuclide_grids, in.n_isotopes, in.n_gridpoints ); #else generate_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints ); #endif // Sort grids by energy #ifndef BINARY_READ if( mype == 0) printf("Sorting Nuclide Energy Grids...\n"); sort_nuclide_grids( nuclide_grids, in.n_isotopes, in.n_gridpoints ); #endif // Prepare Unionized Energy Grid Framework #ifndef BINARY_READ GridPoint * energy_grid = generate_energy_grid( in.n_isotopes, in.n_gridpoints, nuclide_grids ); #else GridPoint * energy_grid = (GridPoint *)malloc( in.n_isotopes * in.n_gridpoints * sizeof( GridPoint ) ); int * index_data = (int *) malloc( in.n_isotopes * in.n_gridpoints * in.n_isotopes * sizeof(int)); for( i = 0; i < in.n_isotopes*in.n_gridpoints; i++ ) energy_grid[i].xs_ptrs = &index_data[i*in.n_isotopes]; #endif // Double Indexing. Filling in energy_grid with pointers to the // nuclide_energy_grids. #ifndef BINARY_READ set_grid_ptrs( energy_grid, nuclide_grids, in.n_isotopes, in.n_gridpoints ); #endif #ifdef BINARY_READ if( mype == 0 ) printf("Reading data from \"XS_data.dat\" file...\n"); binary_read(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid); #endif // Get material data if( mype == 0 ) printf("Loading Mats...\n"); int *num_nucs = load_num_nucs(in.n_isotopes); int **mats = load_mats(num_nucs, in.n_isotopes); #ifdef VERIFICATION double **concs = load_concs_v(num_nucs); #else double **concs = load_concs(num_nucs); #endif #ifdef BINARY_DUMP if( mype == 0 ) printf("Dumping data to binary file...\n"); binary_dump(in.n_isotopes, in.n_gridpoints, nuclide_grids, energy_grid); if( mype == 0 ) printf("Binary file \"XS_data.dat\" written! Exiting...\n"); return 0; #endif // ===================================================================== // Cross Section (XS) Parallel Lookup Simulation Begins // ===================================================================== // Outer benchmark loop can loop through all possible # of threads #ifdef BENCHMARK for( int bench_n = 1; bench_n <=omp_get_num_procs(); bench_n++ ) { in.nthreads = bench_n; omp_set_num_threads(in.nthreads); #endif if( mype == 0 ) { printf("\n"); border_print(); center_print("SIMULATION", 79); border_print(); } omp_start = omp_get_wtime(); //initialize papi with one thread (master) here #ifdef PAPI if ( PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT){ fprintf(stderr, "PAPI library init error!\n"); exit(1); } #endif // OpenMP compiler directives - declaring variables as shared or private #pragma omp parallel default(none) \ private(i, thread, p_energy, mat, seed) \ shared( max_procs, in, energy_grid, nuclide_grids, \ mats, concs, num_nucs, mype, vhash) { // Initialize parallel PAPI counters #ifdef PAPI int eventset = PAPI_NULL; int num_papi_events; #pragma omp critical { counter_init(&eventset, &num_papi_events); } #endif double macro_xs_vector[5]; double * xs = (double *) calloc(5, sizeof(double)); // Initialize RNG seeds for threads thread = omp_get_thread_num(); seed = (thread+1)*19+17; // XS Lookup Loop #pragma omp for schedule(dynamic) for( i = 0; i < in.lookups; i++ ) { // Status text if( INFO && mype == 0 && thread == 0 && i % 1000 == 0 ) printf("\rCalculating XS's... (%.0lf%% completed)", (i / ( (double)in.lookups / (double) in.nthreads )) / (double) in.nthreads * 100.0); // Randomly pick an energy and material for the particle #ifdef VERIFICATION #pragma omp critical { p_energy = rn_v(); mat = pick_mat(&seed); } #else p_energy = rn(&seed); mat = pick_mat(&seed); #endif // debugging //printf("E = %lf mat = %d\n", p_energy, mat); // This returns the macro_xs_vector, but we're not going // to do anything with it in this program, so return value // is written over. calculate_macro_xs( p_energy, mat, in.n_isotopes, in.n_gridpoints, num_nucs, concs, energy_grid, nuclide_grids, mats, macro_xs_vector ); // Copy results from above function call onto heap // so that compiler cannot optimize function out // (only occurs if -flto flag is used) memcpy(xs, macro_xs_vector, 5*sizeof(double)); // Verification hash calculation // This method provides a consistent hash accross // architectures and compilers. #ifdef VERIFICATION char line[256]; sprintf(line, "%.5lf %d %.5lf %.5lf %.5lf %.5lf %.5lf", p_energy, mat, macro_xs_vector[0], macro_xs_vector[1], macro_xs_vector[2], macro_xs_vector[3], macro_xs_vector[4]); unsigned long long vhash_local = hash(line, 10000); #pragma omp atomic vhash += vhash_local; #endif } // Prints out thread local PAPI counters #ifdef PAPI if( mype == 0 && thread == 0 ) { printf("\n"); border_print(); center_print("PAPI COUNTER RESULTS", 79); border_print(); printf("Count \tSmybol \tDescription\n"); } { #pragma omp barrier } counter_stop(&eventset, num_papi_events); #endif } #ifndef PAPI if( mype == 0) { printf("\n" ); printf("Simulation complete.\n" ); } #endif omp_end = omp_get_wtime(); // Print / Save Results and Exit print_results( in, mype, omp_end-omp_start, nprocs, vhash ); #ifdef BENCHMARK } #endif #ifdef MPI MPI_Finalize(); #endif return 0; }
int main(int argc, char** argv) { int my_rank, size, i; //char msg0[] = "Wasssssaaaaap Ich hasse mein Leif Hundin!\n"; //char msg1[] = "Lol I hate my life\n"; //char rcv0[100], rcv1[100]; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &size); printf("Started Process %d of %d\n", my_rank, (size - 1)); if (my_rank == 0) { printf("Computing in Parallel on %d Processes\n", size); /* Process program parameters */ parameters(argc, argv); /* Initialize A and B */ initialize_inputs(); /* Print input matrices */ print_inputs(); /* Gaussian elimination */ for (norm = 0; norm < N - 1; norm++) /*Proceeding sequentially on each norm row because of *Read-After-Write dependence between each norm variable iteration. */ { i = 0; for (row = norm + 1; row < N; row += blockSize) /*Putting values in the 'inidices' dynamic array described above. *Note that this loop increments with a step size equal to the blockSize value *which is the number of rows each thread will be handling. */ { indices[3 * i] = row; /*First value storing the starting row index.*/ if ((row + blockSize - 1) < N) /*Second value stores the ending row index.*/ indices[3 * i + 1] = row + blockSize - 1; else indices[3 * i + 1] = N - 1; indices[3 * i + 2] = norm; /*Third value stores value of current normalization row index.*/ i++; } numCPU = i; /*Ensures that number of threads launched is equal to the number of proceesing lbocks made.*/ for (i = 0; i < numCPU; i++) { pthread_create(rowThreads + i, NULL, processRows, (indices + 3 * i)); /*Launching each thread to operate on different parts of the array*/ } for (i = 0; i < numCPU; i++) { pthread_join(*(rowThreads + i), NULL); /*Consolidating all threads*/ } } /* (Diagonal elements are not normalized to 1. This is treated in back * substitution.) */ for (i = 1; i < size; i++) { MPI_Send(A, (MAXN*MAXN), MPI_FLOAT, i, 0, MPI_COMM_WORLD); MPI_Send(B, MAXN, MPI_FLOAT, i, 1, MPI_COMM_WORLD); MPI_Send(&N, 1, MPI_INT, i, 2, MPI_COMM_WORLD); printf("Data sent to processor %d!\n", i); } } else { MPI_Recv(A, (MAXN*MAXN), MPI_FLOAT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Recv(B, MAXN, MPI_FLOAT, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Recv(&N, 1, MPI_INT, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); printf("Received size of data (Value of N) = %d\n", N); printf("Received data with tag 0 & 1\n"); print_inputs(); } MPI_Finalize(); return 0; }