Ejemplo n.º 1
 * This function process a nbngb block --> process frontier blocks
inline void process_frontier(int k, int blocksize, int* board, int frontier, int ldboard, int* nbngb, int ldnbngb){
  /* Different process if shared from a column or block frontier */
  if (frontier == ROW){
    int i = k;
    int j;
    for (j = 1; j <= blocksize; ++j) {
      ngb( i, j ) =
	cell( i-1, j-1 ) + cell( i, j-1 ) + cell( i+1, j-1 ) +
	cell( i-1, j   ) +                  cell( i+1, j   ) +
	cell( i-1, j+1 ) + cell( i, j+1 ) + cell( i+1, j+1 );
  if (frontier==COLUMN){
    int j = k;
    int l;
    for (l = 1; l <= blocksize; ++l) {
      ngb( l, j ) =
	cell( l-1, j-1 ) + cell( l, j-1 ) + cell( l+1, j-1 ) +
	cell( l-1, j   ) +                  cell( l+1, j   ) +
	cell( l-1, j+1 ) + cell( l, j+1 ) + cell( l+1, j+1 );
  printf("Error, not compatible frontier type in process frontier.\nExiting program\n");
Ejemplo n.º 2
void * thread_compute(void *arg){

  int tid = *(int *)arg;

  int subBSi=BS/nb_threads;
  int subBSj=BS;

  int ldboard = BS+2;
  int ldnbngb = BS;
  int *board = _board + tid*(subBSi);
  int *nbngb = _nbngb + tid*subBSi;

  int *num_alive = malloc(sizeof(*num_alive));

    subBSi+= BS%nb_threads;

    for (int loop = 1; loop <= maxloop; loop++) {
	for(int j=1; j<=subBSj; j++){
	  ngb(1, j) = cell(0, j-1) + cell(1, j-1) + cell(2, j-1) +
	              cell(0,   j) +                cell(2,   j) +
	              cell(0, j+1) + cell(1, j+1) + cell(2, j+1);
	  ngb(subBSi, j) = cell(subBSi-1, j-1) + cell(subBSi, j-1) + cell(subBSi+1, j-1) +
	                   cell(subBSi-1,   j)                     + cell(subBSi+1,   j) +
	                   cell(subBSi-1, j+1) + cell(subBSi, j+1) + cell(subBSi+1, j+1);


	for (int j = 1; j <= subBSj; j++) {
	  for (int i = 2; i <= subBSi-1; i++) {
		ngb( i, j ) =
		    cell( i-1, j-1 ) + cell( i, j-1 ) + cell( i+1, j-1 ) +
		    cell( i-1, j   ) +                  cell( i+1, j   ) +
		    cell( i-1, j+1 ) + cell( i, j+1 ) + cell( i+1, j+1 );

	*num_alive = 0;


	for(int j=1; j<=subBSj; j++){
	  switch (ngb(1, j)){
	  case 3:
	    cell(1, j) = 1;
	  case 2:
	    cell(1, j) = 0;

	  switch (ngb(subBSi, j)){
	  case 3:
	    cell(subBSi, j) = 1;
	  case 2:
	    cell(subBSi, j) = 0;

	for(int i=1; i<=subBSi; i++){
	  cell(i, 0) = cell(i, subBSj);
	  cell(i, subBSj+1) = cell(i, 1);


	if(tid == 0){
	  cell(   0, 0   ) = cell(BS, BS);
	  cell(   0, BS+1) = cell(BS,  1);
	  cell(BS+1, 0   ) = cell( 1, BS);
	  cell(BS+1, BS+1) = cell( 1,  1);	  
	  for(int j=1; j<=subBSj; j++){
	    cell(   0, j) = cell(BS, j);
	    cell(BS+1, j) = cell(1, j);
	  output_board(BS, board, ldboard, loop);
    return (void *)num_alive;
Ejemplo n.º 3
int main(int argc, char* argv[])
    int i, j, loop, num_alive, maxloop;
    int ldboard, ldnbngb;
    double t1, t2;
    double temps;
    int *board;
    int *nbngb;

    if (argc < 2) {
		maxloop = 10;
    else if (argc >= 2){
		maxloop = atoi(argv[1]);
		if(argc > 2)
			BS = atoi(argv[2]);
		if(argc > 3){
			num_threads = atoi(argv[3]); 
    num_alive = 0;

    /* Leading dimension of the board array */
    ldboard = BS + 2;
    /* Leading dimension of the neigbour counters array */
    ldnbngb = BS;

    board = malloc( ldboard * ldboard * sizeof(int) );
    nbngb = malloc( ldnbngb * ldnbngb * sizeof(int) );

    num_alive = generate_initial_board( BS, &(cell(1, 1)), ldboard );
    	output_board( BS, &(cell(1, 1)), ldboard, 0 );

    printf("Starting number of living cells = %d\n", num_alive);
    t1 = mytimer();

    for (loop = 1; loop <= maxloop; loop++) {

		cell(   0, 0   ) = cell(BS, BS);
		cell(   0, BS+1) = cell(BS,  1);
		cell(BS+1, 0   ) = cell( 1, BS);
		cell(BS+1, BS+1) = cell( 1,  1);

		#pragma omp parallel for
		for (i = 1; i <= BS; i++) {
		    cell(   i,    0) = cell( i, BS);
		    cell(   i, BS+1) = cell( i,  1);
		    cell(   0,    i) = cell(BS,  i);
		    cell(BS+1,    i) = cell( 1,  i);

		#pragma omp parallel for private(i)
		for (j = 1; j <= BS; j++) {
			for (i = 1; i <= BS; i++) {
			ngb( i, j ) =
			    cell( i-1, j-1 ) + cell( i, j-1 ) + cell( i+1, j-1 ) +
			    cell( i-1, j   ) +                  cell( i+1, j   ) +
			    cell( i-1, j+1 ) + cell( i, j+1 ) + cell( i+1, j+1 );

		num_alive = 0;
		#pragma omp parallel for private (i) reduction(+:num_alive)
		for (j = 1; j <= BS; j++) {
			for (i = 1; i <= BS; i++) {
				if ( (ngb( i, j ) < 2) || 
				     (ngb( i, j ) > 3) ) {
				    cell(i, j) = 0;
				else {
				    if ((ngb( i, j )) == 3)
					cell(i, j) = 1;
				if (cell(i, j) == 1) {
				    num_alive ++;
		output_board( BS, &(cell(1, 1)), ldboard, loop);
		printf("%d \n", num_alive);

    t2 = mytimer();
    temps = t2 - t1;
    printf("Final number of living cells = %d\n", num_alive);
    printf("time=%.2lf ms\n",(double)temps * 1.e3);
    #ifdef BENCH
		char fname [40];
		sprintf(fname, "time_omp_%d.dat", num_threads);
    	FILE* f=fopen(fname, "w");
    	if (f != NULL)
    		fprintf(f,"%.2lf", temps*1.e3);
    #ifdef OUTPUT_BOARD
    output_board( BS, &(cell(1, 1)), ldboard, maxloop);

    return EXIT_SUCCESS;
Ejemplo n.º 4
int main(int argc, char* argv[]){
  int rank, size;
  int loop, num_alive, loop_iterations;
  int ldboard, ldnbngb, ldglobalboard;
  double t1, time, final_time;
  int periods[2] = {1, 1};
  int *globboard= NULL;
  int *globboard2= NULL;
  int *board;
  int *nbngb;

  /* Initialization of MPI */
  MPI_Comm_rank( MPI_COMM_WORLD, &rank );
  MPI_Comm_size( MPI_COMM_WORLD, &size);
  if(argc >= 2){
      return EXIT_SUCCESS;
  int i, j;
  int process_per_row = sqrt(size);
  int process_per_column = sqrt(size);
  int dims[2] = {process_per_row, process_per_column};
  // It only works if the number of process in the input is a perfect square
  if(size != process_per_column*process_per_row){
    fprintf(stderr, "Square Perfect needed as input size.\nExiting Program.");
    return EXIT_FAILURE;

  MPI_Comm grid;

  // Initialize cartesian grid
  MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods,0, &grid);
  MPI_Comm_rank(grid, &rank);

  /* User input */
  if (argc < 2) {
    loop_iterations = 10;
    BS = 30;
  } else if (argc >= 2){
    loop_iterations = atoi(argv[1]);
    if(argc > 2)
      BS = atoi(argv[2]);
      BS = 30;
  num_alive = 0;

  /*Leading dimension of global board array*/
  ldglobalboard = BS + 2; // +2 because of upper and above added (+ X +)
  /* Leading dimension of board array */
  ldboard = BS/process_per_row + 2; // +2 because of upper and above added (+ X +)
  /* Leading dimension of neigbour array */
  ldnbngb = BS/sqrt(size); // Same number of element in each process which is equal to this formula

  // Initialization of cells board
  board = (int *)malloc( ldboard * ldboard * sizeof(int) );
  nbngb = (int *)malloc( ldnbngb * ldnbngb * sizeof(int) );

  // Initialization of global cell board (which is common between all processes)
    globboard = (int *)malloc(ldglobalboard*ldglobalboard * sizeof(int));
    globboard2 = (int *)malloc(ldglobalboard*ldglobalboard * sizeof(int));
    num_alive = generate_initial_board( BS, &globboard[1+ldglobalboard] , ldglobalboard );
    output_board( BS, &globboard[1+ldglobalboard], ldglobalboard, 0 );
    fprintf(stderr, "Starting number of living cells = %d\n", num_alive);

  // Matrix block type used by each processes
  MPI_Datatype block2, block;
  MPI_Type_vector(ldboard-2, ldboard-2, ldglobalboard, MPI_INT, &block2);
  MPI_Type_create_resized(block2, 0, sizeof(int), &block);

  // Matrix sub block type used by each processes
  MPI_Datatype sub_block2, sub_block;
  MPI_Type_vector(ldboard-2, ldboard-2, ldboard, MPI_INT, &sub_block2);
  MPI_Type_create_resized(sub_block2, 0, sizeof(int), &sub_block);

  int *process_count = (int*)malloc(size*sizeof(int));  
  // number of cells per processes
  int *cell_per_processes = (int*)malloc(size*sizeof(int));

  // Prototyping moves for each processes (preparing matrix's scatter)
  for (i = 0; i < process_per_row; ++i){
    for (j = 0; j < process_per_column; ++j){
      process_count[i+j*process_per_column]= 1;
      cell_per_processes[i+j*process_per_column]= i*ldglobalboard*(ldboard-2)+j*(ldboard-2);

  /* Explodes matrix into sub_blocks elements */
  MPI_Scatterv(&globboard[1+ldglobalboard], process_count, cell_per_processes, block, &board[ldboard+1], 1, sub_block,0, grid);

  // Initialize for each processes, a table of the neighbours.
  int neighbours[8];
  neighbour_table(neighbours, grid, rank);

  /* Time to begin */
  t1 = mytimer();
  int blocksize = ldboard-2;
  MPI_Datatype row_blocks;
  MPI_Type_vector(blocksize, 1, ldboard, MPI_INT, &row_blocks);

  // status for waiting time...
  MPI_Status mpi_status;

  // Create as much MPI request as number of neighbours possible (in the worst case 8) 
  MPI_Request cart_request[8];
  for (loop = 1; loop <= loop_iterations; ++loop) {
    /* Start communications to send and recv informations from neighbours */
    inter_proc_communications(cart_request, neighbours, grid, blocksize, board, ldboard, row_blocks);

    /* Compute inside process cells */
    for (j = 2; j <= blocksize-1; ++j) {
      for (i = 2; i <= blocksize-1; ++i) {
	ngb( i, j ) =
	  cell( i-1, j-1 ) + cell( i, j-1 ) + cell( i+1, j-1 ) +
	  cell( i-1, j   ) +                  cell( i+1, j   ) +
	  cell( i-1, j+1 ) + cell( i, j+1 ) + cell( i+1, j+1 );

    /* Computes cells on the border */

    // Cell neighbour's composition
    // 4 2 5       4           4 2 5       4 2 5       4 2 5 //
    // 0 X 1  -->  0      -->  0      -->  0   1  -->  0   1 //
    // 6 3 7       6           6           6   7       6 3 7 //
    /* Column on the left needs data from the left process --> 4, 0, 6*/ 
    MPI_Wait(&cart_request[0], &mpi_status);
    MPI_Wait(&cart_request[4], &mpi_status);
    MPI_Wait(&cart_request[6], &mpi_status);
    process_frontier(1, blocksize, board, COLUMN, ldboard, nbngb, ldnbngb);

    /* Line above needs data from the above process --> 2, 5 */
    MPI_Wait(&cart_request[2], &mpi_status);
    MPI_Wait(&cart_request[5], &mpi_status);
    process_frontier(1, blocksize, board, ROW, ldboard, nbngb, ldnbngb);

    /* Column on the right needs data from the right process --> 1, 7 */
    MPI_Wait(&cart_request[1], &mpi_status);
    MPI_Wait(&cart_request[7], &mpi_status);
    process_frontier(blocksize, blocksize, board, COLUMN, ldboard, nbngb, ldnbngb);

    /* Line under needs data from under process --> 3 */
    MPI_Wait(&cart_request[3], &mpi_status);
    process_frontier(blocksize, blocksize, board, ROW, ldboard, nbngb, ldnbngb);

    /* Update the cell */
    num_alive = 0;
    for (j = 1; j <= blocksize; ++j) {
      for (i = 1; i <= blocksize; ++i) {
	if ( (ngb( i, j ) < 2) ||
	     (ngb( i, j ) > 3) ) {
	  cell(i, j) = 0;
	else {
	  if ((ngb( i, j )) == 3)
	    cell(i, j) = 1;
	if (cell(i, j) == 1) {
    printf("%d \n", num_alive);

  /* Reassembles matrix into one from the sub blocks in the block */
  MPI_Gatherv(&board[ldboard+1], 1, sub_block, &globboard2[1+ldglobalboard], process_count, cell_per_processes, block, 0, grid);

  /* Reduction to determine max time execution */
  time = mytimer() - t1;
  MPI_Allreduce(&time, &final_time, 1,MPI_DOUBLE, MPI_MAX, grid);
  /* Reduction to determine number of cells still alive in all processes */
  MPI_Allreduce(MPI_IN_PLACE, &num_alive, 1, MPI_INT, MPI_SUM, grid);
  /* The END */
    // Combien de cellules sont en PLS à la fin de la soirée ?
    printf("Final number of living cells = %d\n", num_alive);
    printf("time=%.2lf ms\n",(double)time * 1.e3);
    char str [100];
    // create debug file 
    sprintf(str, "mpi_debug_%d.dat", size);
    FILE *fd = NULL;
    fd=fopen(str, "w");
    if (fd != NULL)
      fprintf(fd,"%.2lf", time*1.e3);
    output_board( BS, &globboard2[1+ldglobalboard], ldglobalboard, loop_iterations);
  // The final end
  return EXIT_SUCCESS;
Ejemplo n.º 5
int main(int argc, char* argv[])
  int i, j, loop, num_alive, maxloop;
  int ldboard, ldnbngb, ldlboard;
  double t1, t2;
  double temps;
  int *board;
  int *nbngb;

  int local_alive;
  int *global_board;

  struct grid grid;
  MPI_Comm comm;
  int nb_proc_row;
  int nb_proc_tot;
  int rank;
  int nb_in_block;

  MPI_Comm_size(MPI_COMM_WORLD, &nb_proc_tot);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  // initialization of the grid communicator
  if (EXIT_FAILURE == compute_communicator(nb_proc_tot,&nb_proc_row,&comm,&rank)){
    return EXIT_SUCCESS;

  if (argc < 2) {
    maxloop = 10;
  } else if (argc > 2){
    maxloop = atoi(argv[1]);
    BS = atoi(argv[2]);
  } else
    maxloop = atoi(argv[1]);
  num_alive = 0;
  local_alive = 0;

  /* Leading dimension of the board array */
  ldboard = BS;
  if (ldboard % nb_proc_row != 0){
    if (rank == 0)
      printf("Wrong BS (or wrong number of procs) ... exiting now.\n");
    return EXIT_FAILURE;

  /* Leading dimension of the neigbour counters array */
  nb_in_block = ldboard / nb_proc_row;
  ldnbngb = nb_in_block;
  ldlboard = nb_in_block + 2;

  board = malloc( ldlboard * ldlboard * sizeof(int) );
  nbngb = malloc( ldnbngb * ldnbngb * sizeof(int) );

  if (rank == 0){
    global_board = malloc( ldboard * ldboard * sizeof(int) );
    num_alive = generate_initial_board( &global_cell( 1, 1), ldboard );
    printf("Starting number of living cells = %d\n", num_alive);
    t1 = mytimer();

  matrix_placement_proc(nb_proc_row, nb_in_block, &comm, &(global_cell( 1, 1)), &(cell( 1, 1)), SCATTER, ldlboard);

  mpi_grid_init(&comm, &grid, rank);
  //printf("rank #%d: %d %d\n", rank, grid.rank_I, grid.rank_J);

  //output_lboard( nb_in_block, board, ldlboard, 0, rank );

  for (loop = 1; loop <= maxloop; loop++) {

    MPI_Datatype blocktype; // we need a specific type for row exchange
    MPI_Type_vector(nb_in_block, 1, ldlboard, MPI_INT, &blocktype);
    // for upper/lower ghost row
    MPI_Sendrecv(&(cell( 1, 1)), 1, blocktype, grid.proc_above, 99, 
		 &(cell( nb_in_block+1, 1)), 1, blocktype, grid.proc_under, 99,
    MPI_Sendrecv(&(cell( nb_in_block, 1)), 1, blocktype, grid.proc_under, 99,
		 &(cell( 0, 1)), 1, blocktype, grid.proc_above, 99, 

    // for left/right ghost col
    MPI_Sendrecv(&(cell( 0, 1)), ldlboard, MPI_INT, grid.proc_left, 98, 
		 &(cell( 0, nb_in_block+1)), ldlboard, MPI_INT, grid.proc_right, 98,
    MPI_Sendrecv(&(cell( 0, nb_in_block)), ldlboard, MPI_INT, grid.proc_right, 98,
		 &(cell( 0, 0)), ldlboard, MPI_INT, grid.proc_left, 98, 

    /* if (loop == 1) */
    /*   output_lboard( nb_in_block, board, ldlboard, 0, rank ); */

    //calcul du nombre de voisins
    for (j = 1; j <= nb_in_block; j++) {
      for (i = 1; i <= nb_in_block; i++) {
  	ngb( i, j ) =
  	  cell( i-1, j-1 ) + cell( i, j-1 ) + cell( i+1, j-1 ) +
  	  cell( i-1, j   ) +                  cell( i+1, j   ) +
  	  cell( i-1, j+1 ) + cell( i, j+1 ) + cell( i+1, j+1 );

    //mise à jour de la matrice
    local_alive = 0;
    for (j = 1; j <= nb_in_block; j++) {
      for (i = 1; i <= nb_in_block; i++) {
  	if ( (ngb( i, j ) < 2) ||
  	     (ngb( i, j ) > 3) ) {
  	  cell(i, j) = 0;
  	else {
  	  if ((ngb( i, j )) == 3)
  	    cell(i, j) = 1;
  	if (cell(i, j) == 1) {
  	  local_alive ++;

    //output_lboard( nb_in_block, board, ldlboard, loop, rank );
    MPI_Reduce(&local_alive, &num_alive, 1, MPI_INT, MPI_SUM, 0, comm);
    if (rank == 0)
      printf("%d \n", num_alive);

  matrix_placement_proc(nb_proc_row, nb_in_block, &comm, &(cell( 1, 1)), &(global_cell( 1, 1)), GATHER, ldlboard);
  MPI_Reduce(&local_alive, &num_alive, 1, MPI_INT, MPI_SUM, 0, comm);

  if (rank == 0){
    t2 = mytimer();
    temps = t2 - t1;
    printf("Final number of living cells = %d\n", num_alive);
    printf("time=%.2lf ms\n",(double)temps * 1.e3);
    //output_board( BS, &(global_cell(1, 1)), ldboard, maxloop);


  return EXIT_SUCCESS;
Ejemplo n.º 6
int main(int argc, char* argv[])
    int i, j, loop, num_alive, maxloop;
    int ldgboard,ldboard, ldnbngb;
    double t1, t2;
    double temps;
    int *gboard;
    int *board;
    int *nbngb;

    int size;
    int coord[2], id;
    int procs_per_lines_col;

    MPI_Comm_size(MPI_COMM_WORLD, &size);
    procs_per_lines_col = sqrt(size);
    if(procs_per_lines_col * procs_per_lines_col != size) {
      fprintf(stderr, "Renseignez un nombre carré de processeurs siouplait !\n");

    int dims[2]; dims[0] = procs_per_lines_col; dims[1] = procs_per_lines_col;
    int periods[2]; periods[0] = 1; periods[1] = 1;
    MPI_Comm comm_cart;
    MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 0, &comm_cart);
    MPI_Comm_rank(comm_cart, &id);
    MPI_Cart_coords(comm_cart, id, 2, coord);

    if (argc < 3) {
	printf("Usage: %s nb_iterations size\n", argv[0]);
    } else {
	maxloop = atoi(argv[1]);
	BS = atoi(argv[2]);
	//printf("Running sequential version, grid of size %d, %d iterations\n", BS, maxloop);
    num_alive = 0;

    //Generate the neighbours table
    /* Leading dimension of the global board array */
    ldgboard = BS + 2;
    /* Leading dimension of the board array */
    ldboard = BS/procs_per_lines_col + 2;
    /* Leading dimension of the neigbour counters array */
    ldnbngb = BS/procs_per_lines_col;

    board = malloc( ldboard * ldboard * sizeof(int) );
    nbngb = malloc( ldnbngb * ldnbngb * sizeof(int) );
    if(id == 0) {
      gboard = malloc(ldgboard * ldgboard * sizeof(int));
      num_alive = generate_initial_board( BS, &gboard[1+ldgboard], ldgboard );
      //fprintf(stderr,"Starting number of living cells = %d\n", num_alive);

    MPI_Datatype block;
    MPI_Type_vector(ldboard-2, ldboard-2, ldgboard, MPI_INT, &block);
    MPI_Type_create_resized(block, 0, sizeof(int), &block);

    MPI_Datatype subblock;
    MPI_Type_vector(ldboard-2, ldboard-2, ldboard, MPI_INT, &subblock);
    MPI_Type_create_resized(subblock, 0, sizeof(int), &subblock);
    int * counts = (int*) malloc(size*sizeof(int));
    int * displs = (int*) malloc(size*sizeof(int));
    // Définition des déplacements pour chaque proc
    for (int i = 0; i < procs_per_lines_col; ++i)
	for (int j = 0; j < procs_per_lines_col; ++j)
	    counts[i+j*procs_per_lines_col]= 1;
	    displs[i+j*procs_per_lines_col]= i*ldgboard*(ldboard-2)+j*(ldboard-2);
    MPI_Scatterv(&gboard[1+ldgboard], counts, displs, block, &board[ldboard+1], 1,
				subblock,0, comm_cart);

    int neighbours[8];
    make_neighbours_table(neighbours, comm_cart);    
    MPI_Request req[8];

    int block_size = ldboard - 2;
    MPI_Datatype block_line;
    MPI_Type_vector(block_size+2, 1, ldboard,MPI_INT, &block_line);

    t1 = mytimer();

    for (loop = 1; loop <= maxloop; loop++) {
      make_communications(req, comm_cart, neighbours, block_size, board, ldboard, block_line);
	  /*	cell(   0, 0   ) = cell(BS, BS);
	cell(   0, BS+1) = cell(BS,  1);
	cell(BS+1, 0   ) = cell( 1, BS);
	cell(BS+1, BS+1) = cell( 1,  1);

	for (i = 1; i <= BS; i++) {
	    cell(   i,    0) = cell( i, BS);
	    cell(   i, BS+1) = cell( i,  1);
	    cell(   0,    i) = cell(BS,  i);
	    cell(BS+1,    i) = cell( 1,  i);

      //Inner cells 
	for (j = 2; j <= block_size; j++) {
	    for (i = 2; i <= block_size; i++) {
		ngb( i, j ) =
		    cell( i-1, j-1 ) + cell( i, j-1 ) + cell( i+1, j-1 ) +
		    cell( i-1, j   ) +                  cell( i+1, j   ) +
		    cell( i-1, j+1 ) + cell( i, j+1 ) + cell( i+1, j+1 );

	//On LEFT
	MPI_Wait(&req[0], MPI_STATUS_IGNORE);
	MPI_Wait(&req[4], MPI_STATUS_IGNORE);
	MPI_Wait(&req[6], MPI_STATUS_IGNORE);
	for(j = 1; j <= block_size; j++) {
	  ngb( 1, j ) =
	    cell( 0, j-1 ) + cell( 1, j-1 ) + cell( 2, j-1 ) +
	    cell( 0, j   ) +                  cell( 2, j   ) +
	    cell( 0, j+1 ) + cell( 1, j+1 ) + cell( 2, j+1 );
	//On TOP
	MPI_Wait(&req[1], MPI_STATUS_IGNORE);
	MPI_Wait(&req[5], MPI_STATUS_IGNORE);
	for(i = 1; i <= block_size; i++) {
	  ngb( i, 1 ) =
	    cell( i - 1, 0) + cell( i, 0 ) + cell( i + 1, 0 ) +
	    cell( i - 1, 1) +                cell( i + 1, 1 ) +
	    cell( i - 1, 2) + cell( i, 2 ) + cell( i + 1, 2 );

	MPI_Wait(&req[2], MPI_STATUS_IGNORE);
	MPI_Wait(&req[7], MPI_STATUS_IGNORE);
	for(j = 1; j <= block_size; j++) {
	  ngb( block_size, j ) =
	    cell( block_size - 1, j-1 ) + cell( block_size , j-1 ) + cell( block_size + 1, j-1 ) +
	    cell( block_size - 1, j   ) +                            cell( block_size + 1, j   ) +
	    cell( block_size - 1, j+1 ) + cell( block_size, j+1 ) + cell(  block_size + 1, j+1 );

	MPI_Wait(&req[3], MPI_STATUS_IGNORE);
	for(i = 1; i <= block_size; i++) {
	  ngb( i, block_size ) =
	    cell( i - 1, block_size - 1) + cell( i, block_size - 1 ) + cell( i + 1, block_size - 1 ) +
	    cell( i - 1, block_size ) +                cell( i + 1, block_size ) +
	    cell( i - 1, block_size + 1 ) + cell( i, block_size + 1 ) + cell( i + 1, block_size + 1 );

	num_alive = 0;
	for (j = 1; j <= block_size; j++) {
	    for (i = 1; i <= block_size; i++) {
		if ( (ngb( i, j ) < 2) ||
		     (ngb( i, j ) > 3) ) {
		    cell(i, j) = 0;
		else {
		    if ((ngb( i, j )) == 3)
			cell(i, j) = 1;
		if (cell(i, j) == 1) {
		    num_alive ++;

        /* Avec les celluls sur les bords (utile pour vérifier les comm MPI) */
        /* output_board( BS+2, &(cell(0, 0)), ldboard, loop ); */

        /* Avec juste les "vraies" cellules: on commence à l'élément (1,1) */
	//output_board( BS, &(cell(1, 1)), ldboard, loop);

	//printf("%d cells are alive\n", num_alive);
    MPI_Gatherv(&board[ldboard+1], 1, subblock,&gboard[ldgboard+1], counts,displs, block, 0, comm_cart);

    t2 = mytimer();

    temps = t2 - t1;
    MPI_Allreduce(MPI_IN_PLACE,&temps, 1, MPI_DOUBLE, MPI_MAX, comm_cart);
    MPI_Allreduce(MPI_IN_PLACE,&num_alive, 1, MPI_INT, MPI_SUM, comm_cart);
    if(id == 0) {
      //printf("Final number of living cells = %d\n", num_alive);
      printf("%.2lf\n",(double)temps * 1.e3);
    return EXIT_SUCCESS;