int main(int argc, char* argv[]) { int i, j, loop, num_alive, maxloop; int ldboard, ldnbngb; double t1, t2; double temps; int *board; int *nbngb; if (argc < 2) { maxloop = 10; } else if (argc >= 2){ maxloop = atoi(argv[1]); if(argc > 2) BS = atoi(argv[2]); if(argc > 3){ num_threads = atoi(argv[3]); } } omp_set_num_threads(num_threads); num_alive = 0; /* Leading dimension of the board array */ ldboard = BS + 2; /* Leading dimension of the neigbour counters array */ ldnbngb = BS; board = malloc( ldboard * ldboard * sizeof(int) ); nbngb = malloc( ldnbngb * ldnbngb * sizeof(int) ); num_alive = generate_initial_board( BS, &(cell(1, 1)), ldboard ); #ifdef OUTPUT_BOARD output_board( BS, &(cell(1, 1)), ldboard, 0 ); #endif printf("Starting number of living cells = %d\n", num_alive); t1 = mytimer(); for (loop = 1; loop <= maxloop; loop++) { cell( 0, 0 ) = cell(BS, BS); cell( 0, BS+1) = cell(BS, 1); cell(BS+1, 0 ) = cell( 1, BS); cell(BS+1, BS+1) = cell( 1, 1); #pragma omp parallel for for (i = 1; i <= BS; i++) { cell( i, 0) = cell( i, BS); cell( i, BS+1) = cell( i, 1); cell( 0, i) = cell(BS, i); cell(BS+1, i) = cell( 1, i); } #pragma omp parallel for private(i) for (j = 1; j <= BS; j++) { for (i = 1; i <= BS; i++) { ngb( i, j ) = cell( i-1, j-1 ) + cell( i, j-1 ) + cell( i+1, j-1 ) + cell( i-1, j ) + cell( i+1, j ) + cell( i-1, j+1 ) + cell( i, j+1 ) + cell( i+1, j+1 ); } } num_alive = 0; #pragma omp parallel for private (i) reduction(+:num_alive) for (j = 1; j <= BS; j++) { for (i = 1; i <= BS; i++) { if ( (ngb( i, j ) < 2) || (ngb( i, j ) > 3) ) { cell(i, j) = 0; } else { if ((ngb( i, j )) == 3) cell(i, j) = 1; } if (cell(i, j) == 1) { num_alive ++; } } } #ifdef OUTPUT_BOARD output_board( BS, &(cell(1, 1)), ldboard, loop); #endif #ifdef PRINT_ALIVE printf("%d \n", num_alive); #endif } t2 = mytimer(); temps = t2 - t1; printf("Final number of living cells = %d\n", num_alive); printf("time=%.2lf ms\n",(double)temps * 1.e3); #ifdef BENCH char fname [40]; sprintf(fname, "time_omp_%d.dat", num_threads); FILE* f=fopen(fname, "w"); if (f != NULL) fprintf(f,"%.2lf", temps*1.e3); fclose(f); #endif #ifdef OUTPUT_BOARD output_board( BS, &(cell(1, 1)), ldboard, maxloop); #endif free(board); free(nbngb); return EXIT_SUCCESS; }
void * thread_compute(void *arg){ int tid = *(int *)arg; int subBSi=BS/nb_threads; int subBSj=BS; int ldboard = BS+2; int ldnbngb = BS; int *board = _board + tid*(subBSi); int *nbngb = _nbngb + tid*subBSi; int *num_alive = malloc(sizeof(*num_alive)); if(tid==nb_threads-1) subBSi+= BS%nb_threads; for (int loop = 1; loop <= maxloop; loop++) { for(int j=1; j<=subBSj; j++){ ngb(1, j) = cell(0, j-1) + cell(1, j-1) + cell(2, j-1) + cell(0, j) + cell(2, j) + cell(0, j+1) + cell(1, j+1) + cell(2, j+1); ngb(subBSi, j) = cell(subBSi-1, j-1) + cell(subBSi, j-1) + cell(subBSi+1, j-1) + cell(subBSi-1, j) + cell(subBSi+1, j) + cell(subBSi-1, j+1) + cell(subBSi, j+1) + cell(subBSi+1, j+1); } sem_post(nbdone+MOD((tid-1),nb_threads)); sem_post(nbdone+MOD((tid+1),nb_threads)); for (int j = 1; j <= subBSj; j++) { for (int i = 2; i <= subBSi-1; i++) { ngb( i, j ) = cell( i-1, j-1 ) + cell( i, j-1 ) + cell( i+1, j-1 ) + cell( i-1, j ) + cell( i+1, j ) + cell( i-1, j+1 ) + cell( i, j+1 ) + cell( i+1, j+1 ); } } *num_alive = 0; sem_wait(nbdone+tid); sem_wait(nbdone+tid); for(int j=1; j<=subBSj; j++){ switch (ngb(1, j)){ case 3: cell(1, j) = 1; case 2: break; default: cell(1, j) = 0; } if(cell(1,j)==1) (*num_alive)++; switch (ngb(subBSi, j)){ case 3: cell(subBSi, j) = 1; case 2: break; default: cell(subBSi, j) = 0; } if(cell(subBSi,j)==1) (*num_alive)++; } for(int i=1; i<=subBSi; i++){ cell(i, 0) = cell(i, subBSj); cell(i, subBSj+1) = cell(i, 1); } tbarrier(); if(tid == 0){ cell( 0, 0 ) = cell(BS, BS); cell( 0, BS+1) = cell(BS, 1); cell(BS+1, 0 ) = cell( 1, BS); cell(BS+1, BS+1) = cell( 1, 1); for(int j=1; j<=subBSj; j++){ cell( 0, j) = cell(BS, j); cell(BS+1, j) = cell(1, j); } output_board(BS, board, ldboard, loop); } tbarrier(); } free((int*)arg); return (void *)num_alive; }
int main(int argc, char* argv[]){ MPI_Init(NULL, NULL); int rank, size; int loop, num_alive, loop_iterations; int ldboard, ldnbngb, ldglobalboard; double t1, time, final_time; int periods[2] = {1, 1}; int *globboard= NULL; int *globboard2= NULL; int *board; int *nbngb; /* Initialization of MPI */ MPI_Comm_rank( MPI_COMM_WORLD, &rank ); MPI_Comm_size( MPI_COMM_WORLD, &size); if(argc >= 2){ if(!strcmp("-h",argv[1])){ if(!rank) helper(); MPI_Finalize(); return EXIT_SUCCESS; } } int i, j; int process_per_row = sqrt(size); int process_per_column = sqrt(size); int dims[2] = {process_per_row, process_per_column}; // It only works if the number of process in the input is a perfect square if(size != process_per_column*process_per_row){ fprintf(stderr, "Square Perfect needed as input size.\nExiting Program."); MPI_Finalize(); return EXIT_FAILURE; } MPI_Comm grid; // Initialize cartesian grid MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods,0, &grid); MPI_Comm_rank(grid, &rank); /* User input */ if (argc < 2) { loop_iterations = 10; BS = 30; } else if (argc >= 2){ loop_iterations = atoi(argv[1]); if(argc > 2) BS = atoi(argv[2]); else BS = 30; } num_alive = 0; /*Leading dimension of global board array*/ ldglobalboard = BS + 2; // +2 because of upper and above added (+ X +) /* Leading dimension of board array */ ldboard = BS/process_per_row + 2; // +2 because of upper and above added (+ X +) /* Leading dimension of neigbour array */ ldnbngb = BS/sqrt(size); // Same number of element in each process which is equal to this formula // Initialization of cells board board = (int *)malloc( ldboard * ldboard * sizeof(int) ); nbngb = (int *)malloc( ldnbngb * ldnbngb * sizeof(int) ); // Initialization of global cell board (which is common between all processes) if(!rank){ globboard = (int *)malloc(ldglobalboard*ldglobalboard * sizeof(int)); globboard2 = (int *)malloc(ldglobalboard*ldglobalboard * sizeof(int)); num_alive = generate_initial_board( BS, &globboard[1+ldglobalboard] , ldglobalboard ); output_board( BS, &globboard[1+ldglobalboard], ldglobalboard, 0 ); fprintf(stderr, "Starting number of living cells = %d\n", num_alive); } // Matrix block type used by each processes MPI_Datatype block2, block; MPI_Type_vector(ldboard-2, ldboard-2, ldglobalboard, MPI_INT, &block2); MPI_Type_create_resized(block2, 0, sizeof(int), &block); MPI_Type_commit(&block); // Matrix sub block type used by each processes MPI_Datatype sub_block2, sub_block; MPI_Type_vector(ldboard-2, ldboard-2, ldboard, MPI_INT, &sub_block2); MPI_Type_create_resized(sub_block2, 0, sizeof(int), &sub_block); MPI_Type_commit(&sub_block); int *process_count = (int*)malloc(size*sizeof(int)); // number of cells per processes int *cell_per_processes = (int*)malloc(size*sizeof(int)); // Prototyping moves for each processes (preparing matrix's scatter) for (i = 0; i < process_per_row; ++i){ for (j = 0; j < process_per_column; ++j){ process_count[i+j*process_per_column]= 1; cell_per_processes[i+j*process_per_column]= i*ldglobalboard*(ldboard-2)+j*(ldboard-2); } } /* Explodes matrix into sub_blocks elements */ MPI_Scatterv(&globboard[1+ldglobalboard], process_count, cell_per_processes, block, &board[ldboard+1], 1, sub_block,0, grid); // Initialize for each processes, a table of the neighbours. int neighbours[8]; neighbour_table(neighbours, grid, rank); /* Time to begin */ t1 = mytimer(); int blocksize = ldboard-2; MPI_Datatype row_blocks; MPI_Type_vector(blocksize, 1, ldboard, MPI_INT, &row_blocks); MPI_Type_commit(&row_blocks); // status for waiting time... MPI_Status mpi_status; // Create as much MPI request as number of neighbours possible (in the worst case 8) MPI_Request cart_request[8]; for (loop = 1; loop <= loop_iterations; ++loop) { /* Start communications to send and recv informations from neighbours */ inter_proc_communications(cart_request, neighbours, grid, blocksize, board, ldboard, row_blocks); /* Compute inside process cells */ for (j = 2; j <= blocksize-1; ++j) { for (i = 2; i <= blocksize-1; ++i) { ngb( i, j ) = cell( i-1, j-1 ) + cell( i, j-1 ) + cell( i+1, j-1 ) + cell( i-1, j ) + cell( i+1, j ) + cell( i-1, j+1 ) + cell( i, j+1 ) + cell( i+1, j+1 ); } } /* Computes cells on the border */ // Cell neighbour's composition // 4 2 5 4 4 2 5 4 2 5 4 2 5 // // 0 X 1 --> 0 --> 0 --> 0 1 --> 0 1 // // 6 3 7 6 6 6 7 6 3 7 // /* Column on the left needs data from the left process --> 4, 0, 6*/ MPI_Wait(&cart_request[0], &mpi_status); MPI_Wait(&cart_request[4], &mpi_status); MPI_Wait(&cart_request[6], &mpi_status); process_frontier(1, blocksize, board, COLUMN, ldboard, nbngb, ldnbngb); /* Line above needs data from the above process --> 2, 5 */ MPI_Wait(&cart_request[2], &mpi_status); MPI_Wait(&cart_request[5], &mpi_status); process_frontier(1, blocksize, board, ROW, ldboard, nbngb, ldnbngb); /* Column on the right needs data from the right process --> 1, 7 */ MPI_Wait(&cart_request[1], &mpi_status); MPI_Wait(&cart_request[7], &mpi_status); process_frontier(blocksize, blocksize, board, COLUMN, ldboard, nbngb, ldnbngb); /* Line under needs data from under process --> 3 */ MPI_Wait(&cart_request[3], &mpi_status); process_frontier(blocksize, blocksize, board, ROW, ldboard, nbngb, ldnbngb); /* Update the cell */ num_alive = 0; for (j = 1; j <= blocksize; ++j) { for (i = 1; i <= blocksize; ++i) { if ( (ngb( i, j ) < 2) || (ngb( i, j ) > 3) ) { cell(i, j) = 0; } else { if ((ngb( i, j )) == 3) cell(i, j) = 1; } if (cell(i, j) == 1) { num_alive+=1; } } } printf("%d \n", num_alive); } /* Reassembles matrix into one from the sub blocks in the block */ MPI_Gatherv(&board[ldboard+1], 1, sub_block, &globboard2[1+ldglobalboard], process_count, cell_per_processes, block, 0, grid); /* Reduction to determine max time execution */ time = mytimer() - t1; MPI_Allreduce(&time, &final_time, 1,MPI_DOUBLE, MPI_MAX, grid); /* Reduction to determine number of cells still alive in all processes */ MPI_Allreduce(MPI_IN_PLACE, &num_alive, 1, MPI_INT, MPI_SUM, grid); /* The END */ if(!rank){ // Combien de cellules sont en PLS à la fin de la soirée ? printf("Final number of living cells = %d\n", num_alive); printf("time=%.2lf ms\n",(double)time * 1.e3); char str [100]; // create debug file sprintf(str, "mpi_debug_%d.dat", size); FILE *fd = NULL; fd=fopen(str, "w"); // JUST TELL ME IF IT WORKS !! if (fd != NULL) fprintf(fd,"%.2lf", time*1.e3); else exit(EXIT_FAILURE); fclose(fd); output_board( BS, &globboard2[1+ldglobalboard], ldglobalboard, loop_iterations); } // FREE ALL free(process_count); free(cell_per_processes); free(board); free(nbngb); MPI_Finalize(); // The final end return EXIT_SUCCESS; }