Beispiel #1
0
int main (int argc, char **argv) {

	FILE *fp;
	double **A = NULL, **B = NULL, **C = NULL, *A_array = NULL, *B_array = NULL, *C_array = NULL;
	double *A_local_block = NULL, *B_local_block = NULL, *C_local_block = NULL;
	double *A_shared_block_1 = NULL, *A_shared_block_2 = NULL, *B_shared_block_1 = NULL, *B_shared_block_2 = NULL;
	int A_rows, A_columns, A_local_block_rows, A_local_block_columns, A_local_block_size;
	int B_rows, B_columns, B_local_block_rows, B_local_block_columns, B_local_block_size;
	int rank, size, sqrt_size, matrices_a_b_dimensions[4];
	MPI_Comm cartesian_grid_communicator, row_communicator, column_communicator;
	MPI_Status status;

	// used to manage the cartesian grid
	int dimensions[2], periods[2], coordinates[2], remain_dims[2];

	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &size);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);

	/* For square mesh */
	sqrt_size = (int)sqrt((double) size);
	if(sqrt_size * sqrt_size != size){
		if( rank == 0 ) perror("need to run mpiexec with a perfect square number of processes\n");
		MPI_Abort(MPI_COMM_WORLD, -1);
	}

	// create a 2D cartesian grid
	dimensions[0] = dimensions[1] = sqrt_size;
	periods[0] = periods[1] = 1;
	MPI_Cart_create(MPI_COMM_WORLD, 2, dimensions, periods, 1, &cartesian_grid_communicator);
	MPI_Cart_coords(cartesian_grid_communicator, rank, 2, coordinates);

	// create a row communicator
	remain_dims[0] = 0;
	remain_dims[1] = 1;
	MPI_Cart_sub(cartesian_grid_communicator, remain_dims, &row_communicator);

	// create a column communicator
	remain_dims[0] = 1;
	remain_dims[1] = 0;
	MPI_Cart_sub(cartesian_grid_communicator, remain_dims, &column_communicator);

	// getting matrices from files at rank 0 only
	// example: mpiexec -n 64 ./cannon matrix1 matrix2 [test]
	if (rank == 0){
		int row, column;
		if ((fp = fopen (argv[1], "r")) != NULL){
			fscanf(fp, "%d %d\n", &matrices_a_b_dimensions[0], &matrices_a_b_dimensions[1]);
			A = (double **) malloc (matrices_a_b_dimensions[0] * sizeof(double *));
			for (row = 0; row < matrices_a_b_dimensions[0]; row++){
				A[row] = (double *) malloc(matrices_a_b_dimensions[1] * sizeof(double));
				for (column = 0; column < matrices_a_b_dimensions[1]; column++)
					fscanf(fp, "%lf", &A[row][column]);
			}
			fclose(fp);
		} else {
			if(rank == 0) fprintf(stderr, "error opening file for matrix A (%s)\n", argv[1]);
			MPI_Abort(MPI_COMM_WORLD, -1);
		}
		if((fp = fopen (argv[2], "r")) != NULL){
			fscanf(fp, "%d %d\n", &matrices_a_b_dimensions[2], &matrices_a_b_dimensions[3]);
			B = (double **) malloc (matrices_a_b_dimensions[2] * sizeof(double *));
			for(row = 0; row < matrices_a_b_dimensions[2]; row++){
				B[row] = (double *) malloc(matrices_a_b_dimensions[3] * sizeof(double *));
				for(column = 0; column < matrices_a_b_dimensions[3]; column++)
					fscanf(fp, "%lf", &B[row][column]);
			}
			fclose(fp);
		} else {
			if(rank == 0) fprintf(stderr, "error opening file for matrix B (%s)\n", argv[2]);
			MPI_Abort(MPI_COMM_WORLD, -1);
		}

		// need to check that the multiplication is possible given dimensions
		// matrices_a_b_dimensions[0] = row size of A
		// matrices_a_b_dimensions[1] = column size of A
		// matrices_a_b_dimensions[2] = row size of B
		// matrices_a_b_dimensions[3] = column size of B
		if(matrices_a_b_dimensions[1] != matrices_a_b_dimensions[2]){
			if(rank == 0) fprintf(stderr, "A's column size (%d) must match B's row size (%d)\n",
					matrices_a_b_dimensions[1], matrices_a_b_dimensions[2]);
			MPI_Abort(MPI_COMM_WORLD, -1);
		}

		// this implementation is limited to cases where thematrices can be partitioned perfectly
		if( matrices_a_b_dimensions[0] % sqrt_size != 0
				|| matrices_a_b_dimensions[1] % sqrt_size != 0
				|| matrices_a_b_dimensions[2] % sqrt_size != 0
				|| matrices_a_b_dimensions[3] % sqrt_size != 0 ){
			if(rank == 0) fprintf(stderr, "cannot distribute work evenly among processe\n"
					"all dimensions (A: r:%d c:%d; B: r:%d c:%d) need to be divisible by %d\n",
					matrices_a_b_dimensions[0],matrices_a_b_dimensions[1],
					matrices_a_b_dimensions[2],matrices_a_b_dimensions[3], sqrt_size );
			MPI_Abort(MPI_COMM_WORLD, -1);
		}
	}

	// send dimensions to all peers
	if(rank == 0) {
		int i;
		for(i = 1; i < size; i++){

			MPI_Send(matrices_a_b_dimensions, 4, MPI_INT, i, 0, cartesian_grid_communicator);
		}
	} else {

		MPI_Recv(matrices_a_b_dimensions, 4, MPI_INT, 0, 0, cartesian_grid_communicator, &status);
	}

	A_rows = matrices_a_b_dimensions[0];
	A_columns = matrices_a_b_dimensions[1];
	B_rows = matrices_a_b_dimensions[2];
	B_columns = matrices_a_b_dimensions[3];

	// local metadata for A
	A_local_block_rows = A_rows / sqrt_size;
	A_local_block_columns = A_columns / sqrt_size;
	A_local_block_size = A_local_block_rows * A_local_block_columns;
	A_local_block = (double *) malloc (A_local_block_size * sizeof(double));
	A_shared_block_1 = (double *) malloc (A_local_block_size * sizeof(double));
	A_shared_block_2 = (double *) malloc (A_local_block_size * sizeof(double));

	// local metadata for B
	B_local_block_rows = B_rows / sqrt_size;
	B_local_block_columns = B_columns / sqrt_size;
	B_local_block_size = B_local_block_rows * B_local_block_columns;
	B_local_block = (double *) malloc (B_local_block_size * sizeof(double));
	B_shared_block_1 = (double *) malloc (B_local_block_size * sizeof(double));
	B_shared_block_2 = (double *) malloc (B_local_block_size * sizeof(double));

	MPI_Win win_A1, win_A2;
	MPI_Win win_B1, win_B2;

	MPI_Win_create(A_shared_block_1, A_local_block_size*sizeof(double), sizeof(double), MPI_INFO_NULL, row_communicator, &win_A1);
	MPI_Win_create(A_shared_block_2, A_local_block_size*sizeof(double), sizeof(double), MPI_INFO_NULL, row_communicator, &win_A2);
	MPI_Win_create(B_shared_block_1, B_local_block_size*sizeof(double), sizeof(double), MPI_INFO_NULL, column_communicator, &win_B1);
	MPI_Win_create(B_shared_block_2, B_local_block_size*sizeof(double), sizeof(double), MPI_INFO_NULL, column_communicator, &win_B2);

	// local metadata for C
	C_local_block = (double *) malloc (A_local_block_rows * B_local_block_columns * sizeof(double));
	// C needs to be initialized at 0 (accumulates partial dot-products)
	int i;
	for(i=0; i < A_local_block_rows * B_local_block_columns; i++){
		C_local_block[i] = 0;
	}

	// full arrays only needed at root
	if(rank == 0){
		A_array = (double *) malloc(sizeof(double) * A_rows * A_columns);
		B_array = (double *) malloc(sizeof(double) * B_rows * B_columns);
		C_array = (double *) malloc(sizeof(double) * A_rows * B_columns);
		// generate the 1D arrays of the matrices at root
		int row, column, i, j;
		for (i = 0; i < sqrt_size; i++){
			for (j = 0; j < sqrt_size; j++){
				for (row = 0; row < A_local_block_rows; row++){
					for (column = 0; column < A_local_block_columns; column++){
						A_array[((i * sqrt_size + j) * A_local_block_size) + (row * A_local_block_columns) + column]
							= A[i * A_local_block_rows + row][j * A_local_block_columns + column];
					}
				}
				for (row = 0; row < B_local_block_rows; row++){
					for (column = 0; column < B_local_block_columns; column++){
						B_array[((i * sqrt_size + j) * B_local_block_size) + (row * B_local_block_columns) + column]
							= B[i * B_local_block_rows + row][j * B_local_block_columns + column];
					}
				}
			}
		}
		// allocate output matrix C
		C = (double **) malloc(A_rows * sizeof(double *));
		for(i=0; i<A_rows ;i++){
			C[i] = (double *) malloc(B_columns * sizeof(double));
		}
	}

	// send a block to each process
	if(rank == 0) {
		int i;
		for(i = 1; i < size; i++){

			MPI_Send((A_array + (i * A_local_block_size)), A_local_block_size, MPI_DOUBLE, i, 0, cartesian_grid_communicator);
			MPI_Send((B_array + (i * B_local_block_size)), B_local_block_size, MPI_DOUBLE, i, 0, cartesian_grid_communicator);
		}
		for(i = 0; i < A_local_block_size; i++){
			A_local_block[i] = A_array[i];
		}
		for(i = 0; i < B_local_block_size; i++){
			B_local_block[i] = B_array[i];
		}
	} else {

		MPI_Recv(A_local_block, A_local_block_size, MPI_DOUBLE, 0, 0, cartesian_grid_communicator, &status);
		MPI_Recv(B_local_block, B_local_block_size, MPI_DOUBLE, 0, 0, cartesian_grid_communicator, &status);
	}

	// cannon's algorithm

	int cannon_block_cycle;
	double compute_time = 0, mpi_time = 0, start;
	int C_index, A_row, A_column, B_column;

	// The main loop is expanded in {cycle 0, cycle 1, middle loop, cycle sqrt_size - 2, cycle sqrt_size - 1}.
	// A bit different things happen in every cycle and we want to eliminate if-statements
	// as much as possible.
	// We use two windows and "buffers" for each matrix, in an attempt to increase the
	// communication-computations overlap.

	// Cycle 0:
	//   - Synchronize the windows.
	//   - Start forwarding A,B to A1,B1.
	//	 - Compute.
	start = MPI_Wtime();
		MPI_Win_fence(0, win_A1);
		MPI_Put(A_local_block, A_local_block_size, MPI_DOUBLE, (coordinates[1] + sqrt_size - 1) % sqrt_size, 0,
			A_local_block_size, MPI_DOUBLE, win_A1);

		MPI_Win_fence(0, win_B1);
		MPI_Put(B_local_block, B_local_block_size, MPI_DOUBLE, (coordinates[0] + sqrt_size - 1) % sqrt_size, 0,
			B_local_block_size, MPI_DOUBLE, win_B1);
	mpi_time += MPI_Wtime() - start;

	// compute partial result for cycle 0
	start = MPI_Wtime();
		for(C_index = 0, A_row = 0; A_row < A_local_block_rows; A_row++){
			for(B_column = 0; B_column < B_local_block_columns; B_column++, C_index++){
				for(A_column = 0; A_column < A_local_block_columns; A_column++){
					C_local_block[C_index] += A_local_block[A_row * A_local_block_columns + A_column] *
						B_local_block[A_column * B_local_block_columns + B_column];
				}
			}
		}
	compute_time += MPI_Wtime() - start;

	// Cycle 1:
	//   - Complete transfering A,B to A1,B1.
	//   - Start forwarding A1,B1 to A2,B2 to create a second data stream that is one step ahead.
	//	 - Copy A1,B1 to the local blocks.
	//   - Make sure every process completed copying.
	//   - Start cycling the A1,B1.
	//   - Compute.
	start = MPI_Wtime();
		MPI_Win_fence(0, win_A1);
		*A_local_block = *A_shared_block_1;
		MPI_Win_fence(0, win_A1);
		MPI_Win_fence(0, win_A2);
		MPI_Put(A_local_block, A_local_block_size, MPI_DOUBLE, (coordinates[1] + sqrt_size - 1) % sqrt_size, 0,
			A_local_block_size, MPI_DOUBLE, win_A2);

		MPI_Win_fence(0, win_B1);
		*B_local_block = *B_shared_block_1;
		MPI_Win_fence(0, win_B1);
		MPI_Win_fence(0, win_B2);
		MPI_Put(B_local_block, B_local_block_size, MPI_DOUBLE, (coordinates[0] + sqrt_size - 1) % sqrt_size, 0,
			B_local_block_size, MPI_DOUBLE, win_B2);

		MPI_Win_fence(0, win_A1);
		MPI_Put(A_local_block, A_local_block_size, MPI_DOUBLE, (coordinates[1] + sqrt_size - 1) % sqrt_size, 0,
			A_local_block_size, MPI_DOUBLE, win_A1);

		MPI_Win_fence(0, win_B1);
		MPI_Put(B_local_block, B_local_block_size, MPI_DOUBLE, (coordinates[0] + sqrt_size - 1) % sqrt_size, 0,
			B_local_block_size, MPI_DOUBLE, win_B1);
	mpi_time += MPI_Wtime() - start;

	// compute partial result for cycle 1
	start = MPI_Wtime();
		for(C_index = 0, A_row = 0; A_row < A_local_block_rows; A_row++){
			for(B_column = 0; B_column < B_local_block_columns; B_column++, C_index++){
				for(A_column = 0; A_column < A_local_block_columns; A_column++){
					C_local_block[C_index] += A_local_block[A_row * A_local_block_columns + A_column] *
						B_local_block[A_column * B_local_block_columns + B_column];
				}
			}
		}
	compute_time += MPI_Wtime() - start;

	// Middle loop:
	//   Switch between the two different windows 1 and 2 to allow more time for communication completion.
	//	 For the even values of the loop index, work with the A2/B2. For the odd work with the A1/B1.
	//   - Complete the cycling of the blocks (that started two iterations before), or the cycle 1 forwarding.
	//   - Copy the shared blocks to the local blocks.
	//   - Make sure every process completed copying.
	//   - Start again cycling the blocks.
	//   - Compute.
	for(cannon_block_cycle = 2; cannon_block_cycle < sqrt_size - 2; cannon_block_cycle++){

		start = MPI_Wtime();
			if (cannon_block_cycle % 2 == 0) {
				MPI_Win_fence(0, win_A2);
				*A_local_block = *A_shared_block_2;
				MPI_Win_fence(0, win_A2);
				MPI_Put(A_local_block, A_local_block_size, MPI_DOUBLE, (coordinates[1] + sqrt_size - 1) % sqrt_size, 0,
					A_local_block_size, MPI_DOUBLE, win_A2);

				MPI_Win_fence(0, win_B2);
				*B_local_block = *B_shared_block_2;
				MPI_Win_fence(0, win_B2);
				MPI_Put(B_local_block, B_local_block_size, MPI_DOUBLE, (coordinates[0] + sqrt_size - 1) % sqrt_size, 0,
					B_local_block_size, MPI_DOUBLE, win_B2);
			} else {
				MPI_Win_fence(0, win_A1);
				*A_local_block = *A_shared_block_1;
				MPI_Win_fence(0, win_A1);
				MPI_Put(A_local_block, A_local_block_size, MPI_DOUBLE, (coordinates[1] + sqrt_size - 1) % sqrt_size, 0,
					A_local_block_size, MPI_DOUBLE, win_A1);

				MPI_Win_fence(0, win_B1);
				*B_local_block = *B_shared_block_1;
				MPI_Win_fence(0, win_B1);
				MPI_Put(B_local_block, B_local_block_size, MPI_DOUBLE, (coordinates[0] + sqrt_size - 1) % sqrt_size, 0,
					B_local_block_size, MPI_DOUBLE, win_B1);
			}
		mpi_time += MPI_Wtime() - start;

		// compute partial result for this block cycle
		start = MPI_Wtime();
			for(C_index = 0, A_row = 0; A_row < A_local_block_rows; A_row++){
				for(B_column = 0; B_column < B_local_block_columns; B_column++, C_index++){
					for(A_column = 0; A_column < A_local_block_columns; A_column++){
						C_local_block[C_index] += A_local_block[A_row * A_local_block_columns + A_column] *
							B_local_block[A_column * B_local_block_columns + B_column];
					}
				}
			}
		compute_time += MPI_Wtime() - start;

	}

	// Cycle sqrt_size - 2:
	//   - Complete the previous communications on win_A2 and win_B2.
	//   - Copy the shared blocks to the local blocks.
	//   - Compute.
	//   - No new connections are started.
	start = MPI_Wtime();
		MPI_Win_fence(0, win_A2);
		*A_local_block = *A_shared_block_2;

		MPI_Win_fence(0, win_B2);
		*B_local_block = *B_shared_block_2;
	mpi_time += MPI_Wtime() - start;

	// compute partial result for cycle sqrt_size - 2
	start = MPI_Wtime();
		for(C_index = 0, A_row = 0; A_row < A_local_block_rows; A_row++){
			for(B_column = 0; B_column < B_local_block_columns; B_column++, C_index++){
				for(A_column = 0; A_column < A_local_block_columns; A_column++){
					C_local_block[C_index] += A_local_block[A_row * A_local_block_columns + A_column] *
						B_local_block[A_column * B_local_block_columns + B_column];
				}
			}
		}
	compute_time += MPI_Wtime() - start;

	// Cycle sqrt_size - 1:
	//   - Complete the previous communications on win_A1 and win_B1.
	//   - Copy the shared blocks to the local blocks.
	//   - Compute.
	//   - No new connections are started.
	start = MPI_Wtime();
		MPI_Win_fence(0, win_A1);
		*A_local_block = *A_shared_block_1;

		MPI_Win_fence(0, win_B1);
		*B_local_block = *B_shared_block_1;
	mpi_time += MPI_Wtime() - start;

	// compute partial result for cycle sqrt_size - 1
	start = MPI_Wtime();
		for(C_index = 0, A_row = 0; A_row < A_local_block_rows; A_row++){
			for(B_column = 0; B_column < B_local_block_columns; B_column++, C_index++){
				for(A_column = 0; A_column < A_local_block_columns; A_column++){
					C_local_block[C_index] += A_local_block[A_row * A_local_block_columns + A_column] *
						B_local_block[A_column * B_local_block_columns + B_column];
				}
			}
		}
	compute_time += MPI_Wtime() - start;

	// get C parts from other processes at rank 0
	if(rank == 0) {
		for(i = 0; i < A_local_block_rows * B_local_block_columns; i++){
			C_array[i] = C_local_block[i];
		}
		int i;
		for(i = 1; i < size; i++){
			MPI_Recv(C_array + (i * A_local_block_rows * B_local_block_columns), A_local_block_rows * B_local_block_columns,
				MPI_DOUBLE, i, 0, cartesian_grid_communicator, &status);
		}
	} else {
		MPI_Send(C_local_block, A_local_block_rows * B_local_block_columns, MPI_DOUBLE, 0, 0, cartesian_grid_communicator);
	}

	// generating output at rank 0
	if (rank == 0) {
		// convert the ID array into the actual C matrix
		int i, j, k, row, column;
		for (i = 0; i < sqrt_size; i++){  // block row index
			for (j = 0; j < sqrt_size; j++){ // block column index
				for (row = 0; row < A_local_block_rows; row++){
					for (column = 0; column < B_local_block_columns; column++){
						C[i * A_local_block_rows + row] [j * B_local_block_columns + column] =
							C_array[((i * sqrt_size + j) * A_local_block_rows * B_local_block_columns)
							+ (row * B_local_block_columns) + column];
					}
				}
			}
		}

		printf("(%d,%d)x(%d,%d)=(%d,%d)\n", A_rows, A_columns, B_rows, B_columns, A_rows, B_columns);
		printf("Computation time: %lf\n", compute_time);
		printf("MPI time:         %lf\n", mpi_time);

		if (argc == 4){
			// present results on the screen
			printf("\nA( %d x %d ):\n", A_rows, A_columns);
			for(row = 0; row < A_rows; row++) {
				for(column = 0; column < A_columns; column++)
					printf ("%7.3f ", A[row][column]);
				printf ("\n");
			}
			printf("\nB( %d x %d ):\n", B_rows, B_columns);
			for(row = 0; row < B_rows; row++){
				for(column = 0; column < B_columns; column++)
					printf("%7.3f ", B[row][column]);
				printf("\n");
			}
			printf("\nC( %d x %d ) = AxB:\n", A_rows, B_columns);
			for(row = 0; row < A_rows; row++){
				for(column = 0; column < B_columns; column++)
					printf("%7.3f ",C[row][column]);
				printf("\n");
			}


			printf("\nPerforming serial consistency check. Be patient...\n");
			fflush(stdout);
			int pass = 1;
			double temp;
			for(i=0; i<A_rows; i++){
				for(j=0; j<B_columns; j++){
					temp = 0;
					for(k=0; k<B_rows; k++){
						temp += A[i][k] * B[k][j];
					}
					printf("%7.3f ", temp);
					if(temp != C[i][j]){
						pass = 0;
					}
				}
				printf("\n");
			}
			if (pass) printf("Consistency check: PASS\n");
			else printf("Consistency check: FAIL\n");
		}
	}

	// free all memory
	if(rank == 0){
		int i;
		for(i = 0; i < A_rows; i++){
			free(A[i]);
		}
		for(i = 0; i < B_rows; i++){
			free(B[i]);
		}
		for(i = 0; i < A_rows; i++){
			free(C[i]);
		}
		free(A);
		free(B);
		free(C);
		free(A_array);
		free(B_array);
		free(C_array);
	}

	free(A_local_block);
	free(B_local_block);
	free(C_local_block);
	MPI_Win_free(&win_A1);
	MPI_Win_free(&win_A2);
	MPI_Win_free(&win_B1);
	MPI_Win_free(&win_B2);
	// finalize MPI
	MPI_Finalize();
}
Beispiel #2
0
void begin_scatter_constant(scatter_constant * sc)
{
	assert(!sc->valid);
	sc->valid = 1;
	MPI_Win_fence(MPI_MODE_NOPRECEDE, sc->win);
}
Beispiel #3
0
void begin_gather(gather * g)
{
	assert(!g->valid);
	g->valid = 1;
	MPI_Win_fence(MPI_MODE_NOPRECEDE | MPI_MODE_NOPUT, g->win);
}
Beispiel #4
0
void IMB_window(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                MODES RUN_MODE, double* time)
/*

                      
                      MPI-2 benchmark kernel
                      MPI_Win_create + MPI_Win_fence + MPI_Win_free
                      


Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-size                 (type int)                      
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule)                      
                      Repetition scheduling

-RUN_MODE             (type MODES)                      
                      Mode (aggregate/non aggregate; blocking/nonblocking);
                      see "IMB_benchmark.h" for definition


Output variables: 

-time                 (type double*)                      
                      Timing result per sample


*/
{
  double t1, t2;
  int    i, dum;

  ierr = 0;

  if(c_info->rank!=-1)
    {
      for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);

      t1 = MPI_Wtime();
      for(i=0;i< ITERATIONS->n_sample;i++)
	{
          ierr = MPI_Win_create(c_info->r_buffer,size,1,MPI_INFO_NULL,
                                c_info->communicator, &c_info->WIN);
          MPI_ERRHAND(ierr);
          ierr = MPI_Win_fence(0, c_info->WIN);
          MPI_ERRHAND(ierr);
/* July 2002 fix V2.2.1, empty window case */
          if(size>0)
          {
          ierr = MPI_Put(c_info->s_buffer, 1, c_info->s_data_type,
                         c_info->rank, 0, 1, c_info->r_data_type, c_info->WIN);
          MPI_ERRHAND(ierr);
          }

          ierr = MPI_Win_fence(0, c_info->WIN);
          MPI_ERRHAND(ierr);
	  
          ierr = MPI_Win_free(&c_info->WIN);
          MPI_ERRHAND(ierr);
	}
      t2 = MPI_Wtime();
      *time=(t2 - t1)/(ITERATIONS->n_sample);
    }
  else
    { 
      *time = 0.; 
    }
}
int main(int argc, char *argv[])
{
	int rank, nprocs, i;
	int *A, *B;

	MPI_Win win;

	MPI_Init(&argc,&argv);
	Test_Init_No_File();
	MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
	MPI_Comm_rank(MPI_COMM_WORLD,&rank);

	if (nprocs != 2) {
		printf("Run this program with 2 processes\n");
		MPI_Abort(MPI_COMM_WORLD,1);
	}

	i = MPI_Alloc_mem(SIZE * sizeof(int), MPI_INFO_NULL, &A);
	if (i) {
		printf("Can't allocate memory in test program\n");
		MPI_Abort(MPI_COMM_WORLD, 1);
	}
	i = MPI_Alloc_mem(SIZE * sizeof(int), MPI_INFO_NULL, &B);
	if (i) {
		printf("Can't allocate memory in test program\n");
		MPI_Abort(MPI_COMM_WORLD, 1);
	}

	if (rank == 0) {
		for (i=0; i<SIZE; i++)
			A[i] = B[i] = i;
	}
	else {
		for (i=0; i<SIZE; i++) {
			A[i] = (-3)*i;
			B[i] = (-4)*i;
		}
	}

	MPI_Win_create(B, SIZE*sizeof(int), sizeof(int), MPI_INFO_NULL,
			MPI_COMM_WORLD, &win);

	MPI_Win_fence(0, win);

	if (rank == 0) {
		for (i=0; i<SIZE-1; i++)
			MPI_Put(A+i, 1, MPI_INT, 1, i, 1, MPI_INT, win);
	}
	else {
		for (i=0; i<SIZE-1; i++)
			MPI_Get(A+i, 1, MPI_INT, 0, i, 1, MPI_INT, win);

		MPI_Accumulate(A+i, 1, MPI_INT, 0, i, 1, MPI_INT, MPI_SUM, win);
	}
	MPI_Win_fence(0, win);

	if (rank == 1) {
		for (i=0; i<SIZE-1; i++) {
			if (A[i] != B[i]) {
				printf("Put/Get Error: A[i]=%d, B[i]=%d\n", A[i], B[i]);
				Test_Failed(NULL);
			}
		}
	}
	else {
		if (B[SIZE-1] != SIZE - 1 - 3*(SIZE-1)) {
			printf("Accumulate Error: B[SIZE-1] is %d, should be %d\n", B[SIZE-1], SIZE - 1 - 3*(SIZE-1));
			Test_Failed(NULL);
		}
	}

	MPI_Win_free(&win);

	MPI_Free_mem(A);
	MPI_Free_mem(B);

	Test_Waitforall();
	Test_Global_Summary();

	MPI_Finalize();
	return 0;
}
Beispiel #6
0
int main(int argc, char **argv)
{
    FILE    *fp, *fp2;
    char    testName[32] = "MPI_Get_Fence", file1[64], file2[64];
    int     dblSize, proc, nprocs, npairs, partner;
    unsigned int i, j, k, size, localSize, NLOOP = NLOOP_MAX;
    unsigned int smin = MIN_P2P_SIZE, smed = MED_P2P_SIZE, smax = MAX_P2P_SIZE;
    double  tScale = USEC, bwScale = MB_8;
    double  tStart, timeMin, timeMinGlobal, overhead, threshold_lo, threshold_hi;
    double  msgBytes, sizeBytes, localMax, UsedMem;
    double  tElapsed[NREPS], tElapsedGlobal[NREPS];
    double  *A, *B;
    MPI_Win   win;

    // Initialize parallel environment
    MPI_Init(&argc, &argv);
    MPI_Comm_size( MPI_COMM_WORLD, &nprocs );
    MPI_Comm_rank( MPI_COMM_WORLD, &proc );

    // Test input parameters
    if( nprocs%2 != 0 && proc == 0 )
        fatalError( "P2P test requires an even number of processors" );

    // Check for user defined limits
    checkEnvP2P( proc, &NLOOP, &smin, &smed, &smax );

    // Initialize local variables
    localMax = 0.0;
    npairs   = nprocs/2;
    if( proc < npairs  ) partner = proc + npairs;
    if( proc >= npairs ) partner = proc - npairs;
    UsedMem = (double)smax*(double)sizeof(double)*2.0;

    // Allocate and initialize arrays
    srand( SEED );
    A = doubleVector( smax );
    B = doubleVector( smax );

    // Open output file and write header
    if( proc == 0 ){
        // Check timer overhead in seconds
        timerTest( &overhead, &threshold_lo, &threshold_hi );
        // Open output files and write headers
        sprintf( file1, "getfence_time-np_%.4d.dat", nprocs );
        sprintf( file2, "getfence_bw-np_%.4d.dat",   nprocs );
        fp  = fopen( file1, "a" );
        fp2 = fopen( file2, "a" );
        printHeaders( fp, fp2, testName, UsedMem, overhead, threshold_lo );
    }

    // Get type size
    MPI_Type_size( MPI_DOUBLE, &dblSize );
    // Set up a window for RMA
    MPI_Win_create( A, smax*dblSize, dblSize, MPI_INFO_NULL, MPI_COMM_WORLD, &win );

    //================================================================
    // Single loop with minimum size to verify that inner loop length  
    // is long enough for the timings to be accurate                     
    //================================================================
    // Warmup with a medium size message
    if( proc < npairs ){
        MPI_Win_fence( 0, win );
        MPI_Get( B, smed, MPI_DOUBLE, partner, 0, smed, MPI_DOUBLE, win );
        MPI_Win_fence( 0, win );
    }else{
        MPI_Win_fence( 0, win );
        MPI_Win_fence( 0, win );
    }
    // Test if current NLOOP is enough to capture fastest test cases
    MPI_Barrier( MPI_COMM_WORLD );
    tStart = benchTimer();
    if( proc < npairs ){
        for(j = 0; j < NLOOP; j++){
            MPI_Win_fence( 0, win );
        	MPI_Get( B, smin, MPI_DOUBLE, partner, 0, smin, MPI_DOUBLE, win );
            MPI_Win_fence( 0, win );
        }
    }else{
        for(j = 0; j < NLOOP; j++){
            MPI_Win_fence( 0, win );
            MPI_Win_fence( 0, win );
        }
    }
    timeMin = benchTimer() - tStart;
    MPI_Reduce( &timeMin, &timeMinGlobal, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD );
    if( proc == 0 ) resetInnerLoop( timeMinGlobal, threshold_lo, &NLOOP );
    MPI_Bcast( &NLOOP, 1, MPI_INT, 0, MPI_COMM_WORLD );


    //================================================================
    // Execute test for each requested size                  
    //================================================================
    for( size = smin; size <= smax; size = size*2 ){

        // Warmup with a medium size message
        if( proc < npairs ){
            MPI_Win_fence( 0, win );
            MPI_Get( B, smed, MPI_DOUBLE, partner, 0, smed, MPI_DOUBLE, win );
            MPI_Win_fence( 0, win );
        }else{
            MPI_Win_fence( 0, win );
            MPI_Win_fence( 0, win );
        }

        // Repeat NREPS to collect statistics
        for(i = 0; i < NREPS; i++){
            MPI_Barrier( MPI_COMM_WORLD );
            tStart = benchTimer();
            if( proc < npairs ){
                for(j = 0; j < NLOOP; j++){
                    MPI_Win_fence( 0, win );
        	        MPI_Get( B, size, MPI_DOUBLE, partner, 0, size, MPI_DOUBLE, win );
                    MPI_Win_fence( 0, win );
                }
            }else{
                for(j = 0; j < NLOOP; j++){
                    MPI_Win_fence( 0, win );
                    MPI_Win_fence( 0, win );
                }
            }
        	tElapsed[i] = benchTimer() - tStart;
        }
        MPI_Reduce( tElapsed, tElapsedGlobal, NREPS, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD );
        // Only task 0 needs to do the analysis of the collected data
        if( proc == 0 ){
            // sizeBytes is size to write to file
            // msgBytes is actual data exchanged on the wire
            msgBytes  = (double)size*(double)npairs*(double)dblSize;
            sizeBytes = (double)size*(double)dblSize;
            post_process( fp, fp2, threshold_hi, tElapsedGlobal, tScale, 
                          bwScale, size*dblSize, sizeBytes, msgBytes, &NLOOP, 
                          &localMax, &localSize );
        }
        MPI_Bcast( &NLOOP, 1, MPI_INT, 0, MPI_COMM_WORLD );

    }
    MPI_Win_free( &win );
    MPI_Barrier( MPI_COMM_WORLD );
    free( A );
    free( B );

    //================================================================
    // Print completion message, free memory and exit                  
    //================================================================
    if( proc == 0 ){
        printSummary( fp2, testName, localMax, localSize );
        fclose( fp2 ); 
        fclose( fp );
    }

    MPI_Finalize();
    return 0;
}
Beispiel #7
0
void relax_surf(){
	double dQ[3][6];
	int i, j, n, nb;
	double third = 1 / 3.0;
	int degen, inf;
	double Wstr;
	int ref = length * myid;

	//for hel
	double Qelas[6] = {0};
	int xm, xp, ym, yp, zm, zp;

	//for hch
	double Qch[6] ={0};
	for (i = 0; i < 3; i ++){
		for(j = 0; j < 6; j ++){
			dQ[i][j] = 0;
		}
	}

	//for degenerate
	double Qdiff[6] = {0};
	double Qin[6] = {0};
	double loc_nu[3] = {0};

	nb = 0;
	for(i = 0; i < length; i ++){
		if(sign[i] >= 2 && sign[i] <= 8){
			//for channel boundary
			inf = 1;
			if(sign[i] == 3 || sign[i] == 2){
				degen = degenerate;
				inf = infinite;
				Wstr = W;
			}
			//for nanoparticle boundary
			else if(sign[i] == 4 || sign[i] == 5){
				degen = 0;
				inf = 0;
				Wstr = Wp;
			}
			else if(sign[i] == 6 || sign[i] == 7){
				degen = 1;
				inf = 0;
				Wstr = Wp;
			}
			if(inf == 0){
				for(n = 0; n < 3; n ++)	loc_nu[n] = nu_p[nb * 3 + n];
				for(n = 0; n < 6; n ++)	Qin[n] = q[i * 6 + n];
				if((sign[i] % 2) == 0){
					xm = neigb[i * 6 + 0];
					xp = neigb[i * 6 + 1];
					ym = neigb[i * 6 + 2];
					yp = neigb[i * 6 + 3];
					zm = neigb[i * 6 + 4];
					zp = neigb[i * 6 + 5];
					for (n = 0; n < 6; n++) {
						dQ[0][n] = (-(double)q[xp * 6 + n]+4*(double)q[xm * 6 + n]-3*Qin[n])*0.5*idx;
						dQ[1][n] = (-(double)q[yp * 6 + n]+4*(double)q[ym * 6 + n]-3*Qin[n])*0.5*idy;
						dQ[2][n] = (-(double)q[zp * 6 + n]+4*(double)q[zm * 6 + n]-3*Qin[n])*0.5*idz;
						Qelas[n] = dQ[0][n] * fabs(loc_nu[0]) + dQ[1][n] * fabs(loc_nu[1]) + dQ[2][n] * fabs(loc_nu[2]);
					}
				}
				else if((sign[i] % 2) == 1){
					xm = neigb[i * 6 + 0];
					xp = neigb[i * 6 + 1];
					ym = neigb[i * 6 + 2];
					yp = neigb[i * 6 + 3];
					zm = neigb[i * 6 + 4];
					zp = neigb[i * 6 + 5];
					for (n = 0; n < 6; n++) {
						if((xm + ref) == -1){
							dQ[0][n] = 0;
						}
						else if((xp + ref) == -1){
							dQ[0][n] = ((double)q[xm * 6 + n]-Qin[n])*idx;
						}
						else{
							dQ[0][n] = (-(double)q[xp * 6 + n]+4*(double)q[xm * 6 + n]-3*Qin[n])*0.5*idx;
						}
						if((ym + ref) == -1){
							dQ[1][n] = 0;
						}
						else if((yp + ref) == -1){
							dQ[1][n] = ((double)q[ym * 6 + n]-Qin[n])*idy;
						}
						else{
							dQ[1][n] = (-(double)q[yp * 6 + n]+4*(double)q[ym * 6 + n]-3*Qin[n])*0.5*idy;
						}
						if((zm + ref) == -1){
							dQ[2][n] = 0;
						}
						else if((zp + ref) == -1){
							dQ[2][n] = ((double)q[zm * 6 + n]-Qin[n])*idz;
						}
						else{
							dQ[2][n] = (-(double)q[zp * 6 + n]+4*(double)q[zm * 6 + n]-3*Qin[n])*0.5*idz;
						}
						Qelas[n] = dQ[0][n] * fabs(loc_nu[0]) + dQ[1][n] * fabs(loc_nu[1]) + dQ[2][n] * fabs(loc_nu[2]);
					}
				}
				else{
					printf("Problems in defining share or sign array.\n");
				}
				if(chiral == 1){
					Qch[0] = loc_nu[2] * Qin[1] - loc_nu[1] * Qin[2];
					Qch[3] = loc_nu[0] * Qin[4] - loc_nu[2] * Qin[1];
					Qch[5] = loc_nu[1] * Qin[2] - loc_nu[0] * Qin[4];
					Qch[1] = 0.5 * (loc_nu[2] * Qin[3] - loc_nu[1] * Qin[4] + loc_nu[0] * Qin[2] - loc_nu[2] * Qin[0]);
					Qch[2] = 0.5 * (loc_nu[2] * Qin[4] - loc_nu[1] * Qin[5] + loc_nu[1] * Qin[0] - loc_nu[0] * Qin[1]);
					Qch[4] = 0.5 * (loc_nu[0] * Qin[5] - loc_nu[2] * Qin[2] + loc_nu[1] * Qin[1] - loc_nu[0] * Qin[3]);
				}
			}
			if(degen == 1){
				for(n = 0; n<6; n++){
					relax_degen(Qin, loc_nu, Qdiff);
					qn[i * 6 + n] = Qin[n] + dt*(L1 * Qelas[n] + L1 * chiral * 2 * qch * Qch[n] - 2 * Wstr * Qdiff[n]);
				}
			}
			else if(degen == 0 && inf == 0){
				for(n = 0; n < 6; n++){
					qn[i * 6 + n] = Qin[n] + dt*(L1 * Qelas[n] + L1 * chiral * 2 * qch * Qch[n] - Wstr* (Qin[n]-qo_p[nb * 6 + n]));
				}
			}
			nb ++;
		}
	}
	MPI_Barrier(MPI_COMM_WORLD);	
	MPI_Win_fence(0, win);
	for(i = 0; i < length; i ++){	
		if((sign[i] >= 4 && sign[i] < 8) || ((sign[i] == 2 || sign[i] == 3) && infinite == 0)){
			for (n = 0; n < 6; n++) {
				q[i * 6 + n] = qn[i * 6 + n];
			}
		}
	}
}
Beispiel #8
0
int main(int argc, char *argv[])
{
    int errs = 0, err;
    int i, rank, size, source, dest;
    int blksize, totsize;
    int *recvBuf = 0, *srcBuf = 0;
    MPI_Comm comm;
    MPI_Win win;
    MPI_Aint extent;
    MPI_Datatype originType;
    int counts[2];
    int displs[2];

    MTest_Init(&argc, &argv);

    /* Select the communicator and datatypes */
    comm = MPI_COMM_WORLD;

    /* Create the datatype */
    /* One MPI Implementation fails this test with sufficiently large
     * values of blksize - it appears to convert this type to an
     * incorrect contiguous move */
    blksize = 2048;
    counts[0] = blksize;
    counts[1] = blksize;
    displs[0] = 0;
    displs[1] = blksize + 1;
    MPI_Type_indexed(2, counts, displs, MPI_INT, &originType);
    MPI_Type_commit(&originType);

    totsize = 2 * blksize;

    /* Determine the sender and receiver */
    MPI_Comm_rank(comm, &rank);
    MPI_Comm_size(comm, &size);
    source = 0;
    dest = size - 1;

    recvBuf = (int *) malloc(totsize * sizeof(int));
    srcBuf = (int *) malloc((totsize + 1) * sizeof(int));

    if (!recvBuf || !srcBuf) {
        fprintf(stderr, "Could not allocate buffers\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* Initialize the send and recv buffers */
    for (i = 0; i < totsize; i++) {
        recvBuf[i] = -1;
    }
    for (i = 0; i < blksize; i++) {
        srcBuf[i] = i;
        srcBuf[blksize + 1 + i] = blksize + i;
    }
    srcBuf[blksize] = -1;

    MPI_Type_extent(MPI_INT, &extent);
    MPI_Win_create(recvBuf, totsize * extent, extent, MPI_INFO_NULL, comm, &win);
    MPI_Win_fence(0, win);
    if (rank == source) {
        /* To improve reporting of problems about operations, we
         * change the error handler to errors return */
        MPI_Win_set_errhandler(win, MPI_ERRORS_RETURN);

        err = MPI_Put(srcBuf, 1, originType, dest, 0, totsize, MPI_INT, win);
        errs += CheckMPIErr(err);
        err = MPI_Win_fence(0, win);
        errs += CheckMPIErr(err);
    }
    else if (rank == dest) {
        MPI_Win_fence(0, win);
        for (i = 0; i < totsize; i++) {
            if (recvBuf[i] != i) {
                errs++;
                if (errs < 10) {
                    printf("recvBuf[%d] = %d should = %d\n", i, recvBuf[i], i);
                }
            }
        }
    }
    else {
        MPI_Win_fence(0, win);
    }

    MPI_Type_free(&originType);
    MPI_Win_free(&win);
    free(recvBuf);
    free(srcBuf);

    MTest_Finalize(errs);
    MPI_Finalize();
    return 0;
}
int main(int argc, char **argv) {
    int rank, nranks, rank_world, nranks_world;
    int i, j, peer, bufsize, errors;
    double *win_buf, *src_buf, *dst_buf;
    MPI_Win buf_win;
    MPI_Comm shr_comm;

    MTest_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank_world);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks_world);

    MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &shr_comm);

    MPI_Comm_rank(shr_comm, &rank);
    MPI_Comm_size(shr_comm, &nranks);

    bufsize = XDIM * YDIM * sizeof(double);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &dst_buf);

    MPI_Win_allocate_shared(bufsize, 1, MPI_INFO_NULL, shr_comm, &win_buf, &buf_win);

    MPI_Win_fence(0, buf_win);

    for (i = 0; i < XDIM*YDIM; i++) {
        *(win_buf + i) = -1.0;
        *(src_buf + i) =  1.0 + rank;
    }

    MPI_Win_fence(0, buf_win);

    peer = (rank+1) % nranks;

    /* Perform ITERATIONS strided accumulate operations */

    for (i = 0; i < ITERATIONS; i++) {
        int idx_rem[SUB_YDIM];
        int blk_len[SUB_YDIM];
        MPI_Datatype src_type, dst_type;

        for (j = 0; j < SUB_YDIM; j++) {
            idx_rem[j] = j*XDIM;
            blk_len[j] = SUB_XDIM;
        }

        MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &src_type);
        MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &dst_type);

        MPI_Type_commit(&src_type);
        MPI_Type_commit(&dst_type);

        /* PUT */
        MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win);
        MPI_Get_accumulate(src_buf, 1, src_type, dst_buf, 1, src_type, peer, 0,
                           1, dst_type, MPI_REPLACE, buf_win);
        MPI_Win_unlock(peer, buf_win);

        /* GET */
        MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win);
        MPI_Get_accumulate(src_buf, 1, src_type, dst_buf, 1, src_type, peer, 0,
                           1, dst_type, MPI_NO_OP, buf_win);
        MPI_Win_unlock(peer, buf_win);

        MPI_Type_free(&src_type);
        MPI_Type_free(&dst_type);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Verify that the results are correct */

    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win);
    errors = 0;
    for (i = 0; i < SUB_XDIM; i++) {
        for (j = 0; j < SUB_YDIM; j++) {
            const double actual   = *(win_buf + i + j*XDIM);
            const double expected = (1.0 + ((rank+nranks-1)%nranks));
            if (fabs(actual - expected) > 1.0e-10) {
                SQUELCH( printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
                                rank, j, i, expected, actual); );
                errors++;
                fflush(stdout);
            }
        }
Beispiel #10
0
int main(int argc, char ** argv)
{
  long Block_order;        /* number of columns owned by rank       */
  long Block_size;         /* size of a single block                */
  long Colblock_size;      /* size of column block                  */
  int Tile_order=32;       /* default Tile order                    */
  int tiling;              /* boolean: true if tiling is used       */
  int Num_procs;           /* number of ranks                       */
  long order;              /* order of overall matrix               */
  int send_to, recv_from;  /* ranks with which to communicate       */
  long bytes;              /* combined size of matrices             */
  int my_ID;               /* rank                                  */
  int root=0;              /* rank of root                          */
  int iterations;          /* number of times to do the transpose   */
  int i, j, it, jt, istart;/* dummies                               */
  int iter;                /* index of iteration                    */
  int phase;               /* phase inside staged communication     */
  int colstart;            /* starting column for owning rank       */
  int error;               /* error flag                            */
  double RESTRICT *A_p;    /* original matrix column block          */
  double RESTRICT *B_p;    /* transposed matrix column block        */
  double RESTRICT *Work_in_p;/* workspace for transpose function    */
  double RESTRICT *Work_out_p;/* workspace for transpose function   */
  double abserr,           /* absolute error                        */
         abserr_tot;       /* aggregate absolute error              */
  double epsilon = 1.e-8;  /* error tolerance                       */
  double local_trans_time, /* timing parameters                     */
         trans_time,
         avgtime;
  MPI_Win  rma_win = MPI_WIN_NULL;
  MPI_Info rma_winfo = MPI_INFO_NULL;
  int passive_target = 0;  /* use passive target RMA sync           */
#if MPI_VERSION >= 3
  int  flush_local  = 1;   /* flush local (or remote) after put     */
  int  flush_bundle = 1;   /* flush every <bundle> put calls        */
#endif

/*********************************************************************
** Initialize the MPI environment
*********************************************************************/
  MPI_Init(&argc,&argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &my_ID);
  MPI_Comm_size(MPI_COMM_WORLD, &Num_procs);

/*********************************************************************
** process, test and broadcast input parameters
*********************************************************************/
  error = 0;
  if (my_ID == root) {
    printf("Parallel Research Kernels version %s\n", PRKVERSION);
    printf("MPIRMA matrix transpose: B = A^T\n");

    if (argc <= 3){
      printf("Usage: %s <# iterations> <matrix order> [Tile size]"
             "[sync (0=fence, 1=flush)] [flush local?] [flush bundle]\n",
             *argv);
      error = 1; goto ENDOFTESTS;
    }

    iterations  = atoi(*++argv);
    if(iterations < 1){
      printf("ERROR: iterations must be >= 1 : %d \n",iterations);
      error = 1; goto ENDOFTESTS;
    }

    order = atol(*++argv);
    if (order < Num_procs) {
      printf("ERROR: matrix order %ld should at least # procs %d\n",
             order, Num_procs);
      error = 1; goto ENDOFTESTS;
    }
    if (order%Num_procs) {
      printf("ERROR: matrix order %ld should be divisible by # procs %d\n",
             order, Num_procs);
      error = 1; goto ENDOFTESTS;
    }

    if (argc >= 4) Tile_order     = atoi(*++argv);
    if (argc >= 5) passive_target = atoi(*++argv);
#if MPI_VERSION >= 3
    if (argc >= 6) flush_local    = atoi(*++argv);
    if (argc >= 7) flush_bundle   = atoi(*++argv);
#endif

    ENDOFTESTS:;
  }
  bail_out(error);

  if (my_ID == root) {
    printf("Number of ranks      = %d\n", Num_procs);
    printf("Matrix order         = %ld\n", order);
    printf("Number of iterations = %d\n", iterations);
    if ((Tile_order > 0) && (Tile_order < order))
          printf("Tile size            = %d\n", Tile_order);
    else  printf("Untiled\n");
    if (passive_target) {
#if MPI_VERSION < 3
        printf("Synchronization      = MPI_Win_(un)lock\n");
#else
        printf("Synchronization      = MPI_Win_flush%s (bundle=%d)\n", flush_local ? "_local" : "", flush_bundle);
#endif
    } else {
        printf("Synchronization      = MPI_Win_fence\n");
    }
  }

  /*  Broadcast input data to all ranks */
  MPI_Bcast (&order,          1, MPI_LONG, root, MPI_COMM_WORLD);
  MPI_Bcast (&iterations,     1, MPI_INT,  root, MPI_COMM_WORLD);
  MPI_Bcast (&Tile_order,     1, MPI_INT,  root, MPI_COMM_WORLD);
  MPI_Bcast (&passive_target, 1, MPI_INT,  root, MPI_COMM_WORLD);
#if MPI_VERSION >= 3
  MPI_Bcast (&flush_local,    1, MPI_INT,  root, MPI_COMM_WORLD);
  MPI_Bcast (&flush_bundle,   1, MPI_INT,  root, MPI_COMM_WORLD);
#endif

  /* a non-positive tile size means no tiling of the local transpose */
  tiling = (Tile_order > 0) && (Tile_order < order);
  bytes = 2 * sizeof(double) * order * order;

/*********************************************************************
** The matrix is broken up into column blocks that are mapped one to a
** rank.  Each column block is made up of Num_procs smaller square
** blocks of order block_order.
*********************************************************************/

  Block_order    = order/Num_procs;
  colstart       = Block_order * my_ID;
  Colblock_size  = order * Block_order;
  Block_size     = Block_order * Block_order;

  /* debug message size effects */
  if (my_ID == root) {
    printf("Block_size           = %ld\n", Block_size);
  }

/*********************************************************************
** Create the column block of the test matrix, the row block of the
** transposed matrix, and workspace (workspace only if #procs>1)
*********************************************************************/
  A_p = (double *)prk_malloc(Colblock_size*sizeof(double));
  if (A_p == NULL){
    printf(" Error allocating space for original matrix on node %d\n",my_ID);
    error = 1;
  }
  bail_out(error);

  MPI_Info_create (&rma_winfo);
  MPI_Info_set (rma_winfo, "no locks", "true");
  B_p = (double *)prk_malloc(Colblock_size*sizeof(double));
  if (B_p == NULL){
    printf(" Error allocating space for transpose matrix on node %d\n",my_ID);
    error = 1;
  }
  bail_out(error);

  if (Num_procs>1) {
    Work_out_p = (double *) prk_malloc(Block_size*(Num_procs-1)*sizeof(double));
    if (Work_out_p == NULL){
      printf(" Error allocating space for work_out on node %d\n",my_ID);
      error = 1;
    }
    bail_out(error);

    PRK_Win_allocate(Block_size*(Num_procs-1)*sizeof(double), sizeof(double),
                     rma_winfo, MPI_COMM_WORLD, &Work_in_p, &rma_win);
    if (Work_in_p == NULL){
      printf(" Error allocating space for work on node %d\n",my_ID);
      error = 1;
    }
    bail_out(error);
  }

#if MPI_VERSION >= 3
  if (passive_target && Num_procs>1) {
    MPI_Win_lock_all(MPI_MODE_NOCHECK,rma_win);
  }
#endif

  /* Fill the original column matrix                                                */
  istart = 0;
  for (j=0;j<Block_order;j++) {
    for (i=0;i<order; i++) {
      A(i,j) = (double) (order*(j+colstart) + i);
      B(i,j) = 0.0;
    }
  }

  MPI_Barrier(MPI_COMM_WORLD);

  for (iter = 0; iter<=iterations; iter++) {

    /* start timer after a warmup iteration                                        */
    if (iter == 1) {
      MPI_Barrier(MPI_COMM_WORLD);
      local_trans_time = wtime();
    }

    /* do the local transpose                                                     */
    istart = colstart;
    if (!tiling) {
      for (i=0; i<Block_order; i++) {
        for (j=0; j<Block_order; j++) {
          B(j,i) += A(i,j);
          A(i,j) += 1.0;
        }
      }
    } else {
      for (i=0; i<Block_order; i+=Tile_order) {
        for (j=0; j<Block_order; j+=Tile_order) {
          for (it=i; it<MIN(Block_order,i+Tile_order); it++) {
            for (jt=j; jt<MIN(Block_order,j+Tile_order);jt++) {
              B(jt,it) += A(it,jt);
              A(it,jt) += 1.0;
            }
          }
        }
      }
    }

    if (!passive_target && Num_procs>1) {
      MPI_Win_fence(MPI_MODE_NOSTORE | MPI_MODE_NOPRECEDE, rma_win);
    }

    for (phase=1; phase<Num_procs; phase++){
      send_to = (my_ID - phase + Num_procs)%Num_procs;

      istart = send_to*Block_order;
      if (!tiling) {
        for (i=0; i<Block_order; i++) {
          for (j=0; j<Block_order; j++) {
            Work_out(phase-1,j,i) = A(i,j);
            A(i,j) += 1.0;
          }
        }
      } else {
        for (i=0; i<Block_order; i+=Tile_order) {
          for (j=0; j<Block_order; j+=Tile_order) {
            for (it=i; it<MIN(Block_order,i+Tile_order); it++) {
              for (jt=j; jt<MIN(Block_order,j+Tile_order);jt++) {
                Work_out(phase-1,jt,it) = A(it,jt);
                A(it,jt) += 1.0;
              }
            }
          }
        }
      }

#if MPI_VERSION < 3
      if (passive_target) {
          MPI_Win_lock(MPI_LOCK_SHARED, send_to, MPI_MODE_NOCHECK, rma_win);
      }
#endif
      MPI_Put(Work_out_p+Block_size*(phase-1), Block_size, MPI_DOUBLE, send_to,
              Block_size*(phase-1), Block_size, MPI_DOUBLE, rma_win);

      if (passive_target) {
#if MPI_VERSION < 3
        MPI_Win_unlock(send_to, rma_win);
#else
        if (flush_bundle==1) {
          if (flush_local==1) {
              MPI_Win_flush_local(send_to, rma_win);
          } else {
              MPI_Win_flush(send_to, rma_win);
          }
        } else if ( (phase%flush_bundle) == 0) {
          /* Too lazy to record all targets, so let MPI do it internally (hopefully) */
          if (flush_local==1) {
              MPI_Win_flush_local_all(rma_win);
          } else {
              MPI_Win_flush_all(rma_win);
          }
        }
#endif
      }
    }  /* end of phase loop for puts  */
    if (Num_procs>1) {
      if (passive_target) {
#if MPI_VERSION >= 3
          MPI_Win_flush_all(rma_win);
#endif
          MPI_Barrier(MPI_COMM_WORLD);
      } else {
          MPI_Win_fence(MPI_MODE_NOSTORE, rma_win);
      }
    }

    for (phase=1; phase<Num_procs; phase++) {
      recv_from = (my_ID + phase)%Num_procs;
      istart = recv_from*Block_order;
      /* scatter received block to transposed matrix; no need to tile */
      for (j=0; j<Block_order; j++) {
        for (i=0; i<Block_order; i++) {
          B(i,j) += Work_in(phase-1,i,j);
        }
      }
    } /* end of phase loop for scatters */

    /* for the flush case we need to make sure we have consumed Work_in
       before overwriting it in the next iteration                    */
    if (Num_procs>1 && passive_target) {
      MPI_Barrier(MPI_COMM_WORLD);
    }

  } /* end of iterations */

  local_trans_time = wtime() - local_trans_time;
  MPI_Reduce(&local_trans_time, &trans_time, 1, MPI_DOUBLE, MPI_MAX, root,
             MPI_COMM_WORLD);

  abserr = 0.0;
  istart = 0;
  double addit = ((double)(iterations+1) * (double) (iterations))/2.0;
  for (j=0;j<Block_order;j++) {
    for (i=0;i<order; i++) {
      abserr += ABS(B(i,j) - ((double)(order*i + j+colstart)*(iterations+1)+addit));
    }
  }

  MPI_Reduce(&abserr, &abserr_tot, 1, MPI_DOUBLE, MPI_SUM, root, MPI_COMM_WORLD);

  if (my_ID == root) {
    if (abserr_tot < epsilon) {
      printf("Solution validates\n");
      avgtime = trans_time/(double)iterations;
      printf("Rate (MB/s): %lf Avg time (s): %lf\n",1.0E-06*bytes/avgtime, avgtime);
    }
    else {
      printf("ERROR: Aggregate absolute error %lf exceeds threshold %e\n", abserr_tot, epsilon);
      error = 1;
    }
  }

  bail_out(error);

  if (rma_win!=MPI_WIN_NULL) {
#if MPI_VERSION >=3
    if (passive_target) {
      MPI_Win_unlock_all(rma_win);
    }
#endif
    PRK_Win_free(&rma_win);
  }

  MPI_Finalize();
  exit(EXIT_SUCCESS);

}  /* end of main */
Beispiel #11
0
int main(int argc, char ** argv)
{
  MPI_Aint win_size = WIN_SIZE;
  MPI_Win win;
  MPI_Group group;
  char* base;
  int disp_unit = 1;
  int rank, size, target_rank, target_disp = 1;
  int r, flag;

  /*************************************************************/
  /* Init and set values */
  /*************************************************************/
  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  target_rank = (rank + 1) % size;
  MPI_Alloc_mem(WIN_SIZE, MPI_INFO_NULL, &base);
  if ( NULL == base )
  {
    printf("failed to alloc %d\n", WIN_SIZE);
    exit(16);
  }


  /*************************************************************/
  /* Win_create */
  /*************************************************************/
  /* MPI_Win_create(void *base, MPI_Aint size, int disp_unit, MPI_Info info,
     MPI_Comm comm, MPI_Win *win); */
  r = MPI_Win_create(base, win_size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); 
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_create\n", rank);

  /*************************************************************/
  /* First epoch: Tests Put, Get, Get_group, Post, Start,      */
  /*              Complete, Wait, Lock, Unlock                 */
  /*************************************************************/
  r = MPI_Win_get_group(win, &group);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_get_group\n", rank);

  r = MPI_Win_post(group, 0, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_post\n", rank);

  r = MPI_Win_start(group, 0, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_start\n", rank);

  r = MPI_Win_lock(MPI_LOCK_SHARED, target_rank, 0, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_lock\n", rank);

  /* MPI_Put(void *origin_addr, int origin_count, MPI_Datatype
     origin_datatype, int target_rank, MPI_Aint target_disp,
     int target_count, MPI_Datatype target_datatype, MPI_Win win) */
  r = MPI_Put(base, WIN_SIZE, MPI_BYTE, target_rank, target_disp,
     WIN_SIZE, MPI_BYTE, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Put\n", rank);

  r = MPI_Win_unlock(target_rank, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_unlock\n", rank);

  /* MPI_Get(void *origin_addr, int origin_count, MPI_Datatype
     origin_datatype, int target_rank, MPI_Aint target_disp,
     int target_count, MPI_Datatype target_datatype, MPI_Win win); */
  r = MPI_Get(base, WIN_SIZE, MPI_BYTE, target_rank, target_disp,
      WIN_SIZE, MPI_BYTE, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Get\n", rank);

  r = MPI_Win_complete(win);
  if ( MPI_SUCCESS TEST_OP r ) 
    printf("Rank %d failed MPI_Win_complete\n", rank);

  r = MPI_Win_test(win, &flag);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_test\n", rank);

  r = MPI_Win_wait(win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_wait\n", rank);

  /*************************************************************************/
  /* Second epoch: Tests Accumulate and Fence */
  /*************************************************************************/
  r = MPI_Win_fence(0, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_fence\n", rank);

  if ( rank == 0 )
  {
    /* MPI_Accumulate(void *origin_addr, int origin_count, MPI_Datatype
       origin_datatype, int target_rank, MPI_Aint target_disp, 
       int target_count, MPI_Datatype target_datatype, 
       MPI_Op op, MPI_Win win) */
    r = MPI_Accumulate(base, WIN_SIZE, MPI_BYTE, 0,
        target_disp, WIN_SIZE, MPI_BYTE, MPI_SUM, win);
    if ( MPI_SUCCESS TEST_OP r ) 
      printf("Rank %d failed MPI_Accumulate\n", rank);
  }
  r = MPI_Win_fence(0, win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_fence\n", rank);


  /*************************************************************/
  /* Win_free and Finalize */
  /*************************************************************/
  r = MPI_Win_free(&win);
  if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_free\n", rank);

  free(base);

  MPI_Finalize();
}
Beispiel #12
0
int main(int argc, char *argv[]){
        MPI_Init(&argc, &argv);
        MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm);
        MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
        MPI_Comm_rank(MPI_COMM_WORLD, &myid);

	int i, j, k, n, l, indx, signal;
	bool flag;
	double deltat;

	double time_spend;
	double begin, end;

	FILE* energy;
	FILE* grid;

	begin = MPI_Wtime();
	//read in parameters
	if(!read_param()){
		MPI_Comm_free(&shmcomm);
		MPI_Finalize();
		return 1;
	}
	
	flag = true;
	dE = 1;
	el_old = 1;
	cycle = 0;
	deltat = (0.1 - dt) / increment;

	S = 0.25 * (1 + 3 * sqrt(1 - 8 / (3 * U)));
	//	printf("Theoretical value is %lf.\n", third * (1 - third * U) * S * S - 2 * third * third * third * S * S * S * U + U / 9 * S * S * S * S );

	if(myid == root){
		
		printf("S is %lf.\n\n", S);
	
		//define droplet and boundary, introduce particles, initialize qtensor;
		if(!initial()){
			flag = false;
		}		
		energy = fopen("energy.out", "w");
		fprintf(energy,"cycle\tEnergy_diff\tEnergy_ldg\tEnergy_el\tEnergy_ch\tEnergy_surf\tEnergy_tot\n");
		fclose(energy);

		grid = fopen("grid.bin", "wb");
		l = 0;
		for(l = 0; l < tot; l++){
			if(boundary[l] || nboundary[l])	signal = 1;
			else if(drop[l]) 	signal = 0;			
			else signal = -1;
			fwrite(&signal, sizeof(int), 1, grid);
		}
		fclose(grid);	
		
	}
	
	//For all infomation initialized in root processor, scatter them to all other processors.
	if(!scatter()){
		flag = false;
		return 1;
	}		

	//Evolution
	while(flag){
		//Every 1000 steps calculate energies.
		if(cycle % 1000 == 0){
			free_energy();
			if(fabs(dE) < accuracy){
				flag = false;
			}
		}

		if(cycle % 10000 == 0){ 
			//Every 10000 steps check the trace of Qtensor
			if(myid == root){	
				for(i = 0; i < droplet; i++){
					if(!checktr(&q[i * 6])){
						flag = false;
						printf("Error in the trace of q.\n");
					}
					if(flag == false)	break;
				}
				//print output file
				output();
			}	
			MPI_Bcast(&flag, 1, MPI_BYTE, root, MPI_COMM_WORLD);	
		}

		//Wait until all the processors are ready and relax the system, first bulk and then boundary.
		MPI_Barrier(MPI_COMM_WORLD);
		MPI_Win_fence(0, win);
		if(flag) relax_bulk();

		MPI_Barrier(MPI_COMM_WORLD);
		MPI_Win_fence(0, win);
		if(flag)	relax_surf();

		if(dt < 0.1){
			dt += deltat;
			if(dt >= 0.1)	dt = 0.1;
		}

		cycle ++;
		MPI_Barrier(MPI_COMM_WORLD);
		MPI_Win_fence(0, win);
		
	}

	free_energy();

	end = MPI_Wtime();

	if(myid == root){
		output();

		//Calculate time used.
		time_spend = (double)(end - begin) / 60.0;

		energy = fopen("energy.out", "a");
		if(time_spend < 60){
			fprintf(energy, "\nTime used:	%lf min.\n", time_spend);
			printf("\nTime used:	%lf min.\n", time_spend);
		}
		else{
			fprintf(energy, "\nTime used:	%lf h.\n", time_spend / 60.0);
			printf("\nTime used:	%lf h.\n", time_spend / 60.0);
		}
		fclose(energy);	
	}

	//deallocate dynamic arrays
	free_q();
        MPI_Win_free(&win);
        MPI_Comm_free(&shmcomm);
        MPI_Finalize();

	return 0;
}
Beispiel #13
0
int main(int argc, char *argv[])
{
  int rank, nprocs, i, j;
  int *A;
  MPI_Win win;
  MPI_Datatype column, xpose;
  
  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  if (nprocs !=2) {
      printf("Run this program with 2 processes\n");
      fflush(stdout);
      MPI_Abort(MPI_COMM_WORLD, 1);
  }

  MPI_Alloc_mem(NROWS * sizeof(int), MPI_INFO_NULL, &A);
  for (i=0; i<NROWS; i++) {
    A[i] = rank;
  }
  if (rank == 0) {
    printf("MPI_Win_create start\n");
    fflush(stdout);
  }

  MPI_Win_create(A, NROWS*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
  MPI_Win_fence(0, win);
  if (rank == 0) {
    printf("MPI_Win_create end\n");
    fflush(stdout);
  }

#if 1
  if (rank == 0)  {
    int target_rank = 1;
    MPI_Aint target_disp = 0;
    int target_count = NROWS;
    printf("MPI_Get start\n");
    fflush(stdout);

    MPI_Get(A, NROWS, MPI_INT, target_rank, target_disp, target_count, MPI_INT, win);

    printf("MPI_Get end\n");
    fflush(stdout);

    printf("MPI_Fence start\n");
    fflush(stdout);

    MPI_Win_fence(0, win);

    printf("MPI_Fence end \n");
    fflush(stdout);

    /* check data transferred correctly */
    for (i=0; i<NROWS; i++){
      printf("Rank %d: A[%d]=%d \n", rank, i, A[i]);
      fflush(stdout);
    }
  } else { /* rank = 1 */
    MPI_Win_fence(0, win);
  }

  MPI_Win_free(&win);
#endif

  MPI_Finalize();
  return 0;
}
Beispiel #14
0
int main( int argc, char *argv[] )
{
    int           errs = 0, err;
    int           rank, size;
    int           *buf, bufsize;
    int           *result;
    int           *rmabuf, rsize, rcount;
    MPI_Comm      comm;
    MPI_Win       win;
    MPI_Request   req;
    MPI_Datatype  derived_dtp;

    MTest_Init( &argc, &argv );

    bufsize = 256 * sizeof(int);
    buf     = (int *)malloc( bufsize );
    if (!buf) {
        fprintf( stderr, "Unable to allocated %d bytes\n", bufsize );
        MPI_Abort( MPI_COMM_WORLD, 1 );
    }
    result  = (int *)malloc( bufsize );
    if (!result) {
        fprintf( stderr, "Unable to allocated %d bytes\n", bufsize );
        MPI_Abort( MPI_COMM_WORLD, 1 );
    }
    rcount   = 16;
    rsize    = rcount * sizeof(int);
    rmabuf   = (int *)malloc( rsize );
    if (!rmabuf) {
        fprintf( stderr, "Unable to allocated %d bytes\n", rsize );
        MPI_Abort( MPI_COMM_WORLD, 1 );
    }

    MPI_Type_contiguous(2, MPI_INT, &derived_dtp);
    MPI_Type_commit(&derived_dtp);

    /* The following loop is used to run through a series of communicators
     * that are subsets of MPI_COMM_WORLD, of size 1 or greater. */
    while (MTestGetIntracommGeneral( &comm, 1, 1 )) {
        int count = 0;

        if (comm == MPI_COMM_NULL) continue;
        /* Determine the sender and receiver */
        MPI_Comm_rank( comm, &rank );
        MPI_Comm_size( comm, &size );

        MPI_Win_create( buf, bufsize, 2*sizeof(int), MPI_INFO_NULL, comm, &win );
        /* To improve reporting of problems about operations, we
           change the error handler to errors return */
        MPI_Win_set_errhandler( win, MPI_ERRORS_RETURN );

        /** TEST OPERATIONS USING ACTIVE TARGET (FENCE) SYNCHRONIZATION **/
        MPI_Win_fence( 0, win );

        TEST_FENCE_OP("Put",
                      MPI_Put( rmabuf, count, MPI_INT, TARGET, 0,
                               count, MPI_INT, win );
                     );

        TEST_FENCE_OP("Get",
                      MPI_Get( rmabuf, count, MPI_INT, TARGET, 0,
                               count, MPI_INT, win );
                     );
Beispiel #15
0
void Accumulate (struct comm_info* c_info,
                 int size,int n_sample,MODES RUN_MODE,double* time)
/*************************************************************************/

/*------------------------------------------------------------
             VARIABLE |       TYPE        |   MEANING
------------------------------------------------------------
Input      : c_info   | struct comm_info* | see comm_info.h 
             size     | int               | message length in byte
	     n_sample | int               | repetition count
             RUN_MODE | MODES (typedef,   | Distinction aggregate/
                      | see Benchmark.h)  | non aggr., see docu.
                      |                   |
Output     : time     | double*           | *time: time/sample in usec
                      |                   |
In/Out     :  -       | -                 | -
                      |                   |  
------------------------------------------------------------
------------------------------------------------------------
Description: see the accompanying document
-------------------------------------------------------------*/
{
  double t1, t2;
  
  Type_Size s_size,r_size;
  int s_num, r_num;
  int s_tag, r_tag;
  int dest, source, root;
  int i;
  MPI_Status stat;


#ifdef CHECK 
  defect=0;
#endif
  ierr = 0;

  /*  GET SIZE OF DATA TYPE */  
MPI_Type_size(c_info->red_data_type,&s_size);
if (s_size!=0) s_num=size/s_size;

root = (c_info-> rank == 0);


if( c_info-> rank < 0 )
*time = 0.;
else
{

if( !RUN_MODE->AGGREGATE )
{

*time = MPI_Wtime();

for(i=0;i< n_sample;i++)
	{

       ierr = MPI_Accumulate
                     (c_info->s_buffer, s_num, c_info->red_data_type,
                      0, i*s_num, s_num, c_info->red_data_type, c_info->op_type,
                      c_info->WIN );
       MPI_ERRHAND(ierr);

       ierr = MPI_Win_fence(0, c_info->WIN);
       MPI_ERRHAND(ierr);
#ifdef CHECK
if( root ) 
{
       CHK_DIFF("Accumulate",c_info, (void*)(c_info->r_data+i*s_num), 0,
                size, size, asize, 
                put, 0, n_sample, i,
                -1, &defect);
       ass_buf(c_info->r_buffer, 0, 0, size-1, 0);
}
MPI_Barrier(c_info->communicator);
#endif

	}
*time=(MPI_Wtime()-*time)/n_sample;
}

if( RUN_MODE->AGGREGATE )
{

for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);

*time = MPI_Wtime();

for(i=0;i< n_sample;i++)
	{


       ierr = MPI_Accumulate
                ((void*)(c_info->s_data+i*s_num), s_num, c_info->red_data_type,
                 0, i*s_num, s_num, c_info->red_data_type, c_info->op_type,
                 c_info->WIN );
       MPI_ERRHAND(ierr);

	}

       ierr = MPI_Win_fence(0, c_info->WIN);
       MPI_ERRHAND(ierr);

*time=(MPI_Wtime()-*time)/n_sample;

#ifdef CHECK
if( root ) 
{
    CHK_DIFF("Accumulate",c_info, c_info->r_buffer, 0,
             n_sample*size, n_sample*size, asize, 
             put, 0, n_sample, -1,
             -1, &defect);
}
#endif



}

}
}
Beispiel #16
0
int main(int argc, char *argv[])
{
    complex_t coord_point, julia_constant;
    double x_max, x_min, y_max, y_min, x_resolution, y_resolution;
    double divergent_limit;
    char file_message[160];
    char filename[100];
    int icount, imax_iterations;
    int ipixels_across, ipixels_down;
    int i, j, k, julia, alternate_equation;
    int imin, imax, jmin, jmax;
    int *work;
    /* make an integer array of size [N x M] to hold answers. */
    int *grid_array = NULL;
    int numprocs;
    int  namelen;
    char processor_name[MPI_MAX_PROCESSOR_NAME];
    int num_colors;
    color_t *colors = NULL;
    int listener;
    int save_image = 0;
    int optval;
    int big_size;
    MPI_Win win;
    int error;
    int done;
    int use_datatypes = 1;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    MPI_Get_processor_name(processor_name, &namelen);

    if (numprocs == 1)
    {
	PrintUsage();
	MPI_Finalize();
	exit(0);
    }

    if (myid == 0)
    {
	printf("Welcome to the Mandelbrot/Julia set explorer.\n");

	/* Get inputs-- region to view (must be within x/ymin to x/ymax, make sure
	xmax>xmin and ymax>ymin) and resolution (number of pixels along an edge,
	N x M, i.e. 256x256)
	*/

	read_mand_args(argc, argv, &imax_iterations, &ipixels_across, &ipixels_down,
	    &x_min, &x_max, &y_min, &y_max, &julia, &julia_constant.real,
	    &julia_constant.imaginary, &divergent_limit,
	    &alternate_equation, filename, &num_colors, &use_stdin, &save_image, &use_datatypes);
	check_mand_params(&imax_iterations, &ipixels_across, &ipixels_down,
	    &x_min, &x_max, &y_min, &y_max, &divergent_limit);

	if (julia == 1) /* we're doing a julia figure */
	    check_julia_params(&julia_constant.real, &julia_constant.imaginary);

	MPI_Bcast(&num_colors, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(&imax_iterations, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(&ipixels_across, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(&ipixels_down, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(&divergent_limit, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Bcast(&julia, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(&julia_constant.real, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Bcast(&julia_constant.imaginary, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Bcast(&alternate_equation, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(&use_datatypes, 1, MPI_INT, 0, MPI_COMM_WORLD);
    }
    else
    {
	MPI_Bcast(&num_colors, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(&imax_iterations, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(&ipixels_across, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(&ipixels_down, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(&divergent_limit, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Bcast(&julia, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(&julia_constant.real, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Bcast(&julia_constant.imaginary, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Bcast(&alternate_equation, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(&use_datatypes, 1, MPI_INT, 0, MPI_COMM_WORLD);
    }

    if (myid == 0)
    {
	colors = malloc((num_colors+1)* sizeof(color_t));
	if (colors == NULL)
	{
	    MPI_Abort(MPI_COMM_WORLD, -1);
	    exit(-1);
	}
	Make_color_array(num_colors, colors);
	colors[num_colors] = 0; /* add one on the top to avoid edge errors */
    }

    /* allocate memory */
    big_size = ipixels_across * ipixels_down * sizeof(int);
    if (myid == 0)
    {
	/* window memory should be allocated by MPI in case the provider requires it */
	error = MPI_Alloc_mem(big_size, MPI_INFO_NULL, &grid_array);
	if (error != MPI_SUCCESS)
	{
	    printf("Memory allocation failed for data array, aborting.\n");
	    MPI_Abort(MPI_COMM_WORLD, -1);
	    exit(-1);
	}
	/* allocate an array to put the workers tasks in */
	work = (int*)malloc(numprocs * sizeof(int) * 5);
	if (work == NULL)
	{
	    printf("Memory allocation failed for work array, aborting.\n");
	    MPI_Abort(MPI_COMM_WORLD, -1);
	    exit(-1);
	}
    }
    else
    {
	/* the non-root processes just need scratch space to store data in */
	if ( (grid_array = (int *)calloc(big_size, 1)) == NULL)
	{
	    printf("Memory allocation failed for data array, aborting.\n");
	    MPI_Abort(MPI_COMM_WORLD, -1);
	    exit(-1);
	}
	/* window memory should be allocated by MPI in case the provider requires it */
	error = MPI_Alloc_mem(5 * sizeof(int), MPI_INFO_NULL, &work);
	if (error != MPI_SUCCESS)
	{
	    printf("Memory allocation failed for work array, aborting.\n");
	    MPI_Abort(MPI_COMM_WORLD, -1);
	    exit(-1);
	}
    }

    if (myid == 0)
    {
	int istep, jstep;
	int i1[400], i2[400], j1[400], j2[400];
	int ii, jj;
	struct sockaddr_in addr;
	int len;
	char line[1024], *token;

	srand(getpid());

	if (!use_stdin)
	{
	    addr.sin_family = AF_INET;
	    addr.sin_addr.s_addr = INADDR_ANY;
	    addr.sin_port = htons(DEFAULT_PORT);

	    listener = socket(AF_INET, SOCK_STREAM, 0);
	    if (listener == -1)
	    {
		printf("unable to create a listener socket.\n");
		MPI_Abort(MPI_COMM_WORLD, -1);
		exit(-1);
	    }
	    if (bind(listener, &addr, sizeof(addr)) == -1)
	    {
		addr.sin_port = 0;
		if (bind(listener, &addr, sizeof(addr)) == -1)
		{
		    printf("unable to create a listener socket.\n");
		    MPI_Abort(MPI_COMM_WORLD, -1);
		    exit(-1);
		}
	    }
	    if (listen(listener, 1) == -1)
	    {
		printf("unable to listen.\n");
		MPI_Abort(MPI_COMM_WORLD, -1);
		exit(-1);
	    }
	    len = sizeof(addr);
	    getsockname(listener, &addr, &len);
	    
	    printf("%s listening on port %d\n", processor_name, ntohs(addr.sin_port));
	    fflush(stdout);

	    sock = accept(listener, NULL, NULL);
	    if (sock == -1)
	    {
		printf("unable to accept a socket connection.\n");
		MPI_Abort(MPI_COMM_WORLD, -1);
		exit(-1);
	    }
	    printf("accepted connection from visualization program.\n");
	    fflush(stdout);

#ifdef HAVE_WINDOWS_H
	    optval = 1;
	    setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval));
#endif

	    printf("sending image size to visualizer.\n");
	    sock_write(sock, &ipixels_across, sizeof(int));
	    sock_write(sock, &ipixels_down, sizeof(int));
	    sock_write(sock, &num_colors, sizeof(int));
	    sock_write(sock, &imax_iterations, sizeof(int));
	}

	error = MPI_Win_create(grid_array, big_size, sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
	if (error != MPI_SUCCESS)
	{
	    printf("MPI_Win_create failed, error 0x%x\n", error);
	    MPI_Abort(MPI_COMM_WORLD, -1);
	}

	for (;;)
	{
	    /* get x_min, x_max, y_min, and y_max */
	    if (use_stdin)
	    {
		printf("input xmin ymin xmax ymax max_iter, (0 0 0 0 0 to quit):\n");fflush(stdout);
		fgets(line, 1024, stdin);
		printf("read <%s> from stdin\n", line);fflush(stdout);
		token = strtok(line, " \n");
		x_min = atof(token);
		token = strtok(NULL, " \n");
		y_min = atof(token);
		token = strtok(NULL, " \n");
		x_max = atof(token);
		token = strtok(NULL, " \n");
		y_max = atof(token);
		token = strtok(NULL, " \n");
		imax_iterations = atoi(token);
	    }
	    else
	    {
		printf("reading xmin,ymin,xmax,ymax.\n");fflush(stdout);
		sock_read(sock, &x_min, sizeof(double));
		sock_read(sock, &y_min, sizeof(double));
		sock_read(sock, &x_max, sizeof(double));
		sock_read(sock, &y_max, sizeof(double));
		sock_read(sock, &imax_iterations, sizeof(int));
	    }
	    printf("x0,y0 = (%f, %f) x1,y1 = (%f,%f) max_iter = %d\n", x_min, y_min, x_max, y_max, imax_iterations);fflush(stdout);

	    /* break the work up into 400 pieces */
	    istep = ipixels_across / 20;
	    jstep = ipixels_down / 20;
	    if (istep < 1)
		istep = 1;
	    if (jstep < 1)
		jstep = 1;
	    k = 0;
	    for (i=0; i<20; i++)
	    {
		for (j=0; j<20; j++)
		{
		    i1[k] = MIN(istep * i, ipixels_across - 1);
		    i2[k] = MIN((istep * (i+1)) - 1, ipixels_across - 1);
		    j1[k] = MIN(jstep * j, ipixels_down - 1);
		    j2[k] = MIN((jstep * (j+1)) - 1, ipixels_down - 1);
		    k++;
		}
	    }

	    /* shuffle the work */
	    for (i=0; i<500; i++)
	    {
		ii = rand() % 400;
		jj = rand() % 400;
		swap(&i1[ii], &i1[jj]);
		swap(&i2[ii], &i2[jj]);
		swap(&j1[ii], &j1[jj]);
		swap(&j2[ii], &j2[jj]);
	    }

	    /*printf("bcasting the limits: (%f,%f)(%f,%f)\n", x_min, y_min, x_max, y_max);fflush(stdout);*/
	    /* let everyone know the limits */
	    MPI_Bcast(&x_min, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	    MPI_Bcast(&x_max, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	    MPI_Bcast(&y_min, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	    MPI_Bcast(&y_max, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	    MPI_Bcast(&imax_iterations, 1, MPI_INT, 0, MPI_COMM_WORLD);

	    /* check for the end condition */
	    if (x_min == x_max && y_min == y_max)
	    {
		break;
	    }

	    /* put one piece of work to each worker for each epoch until the work is exhausted */
	    k = 0;
	    done = 0;
	    while (!done)
	    {
		error = MPI_Win_fence(0, win);
		if (error != MPI_SUCCESS)
		{
		    printf("'handout work' fence failed, error 0x%x\n", error);
		    MPI_Abort(MPI_COMM_WORLD, -1);
		}
		/* hand out work */
		for (i=1; i<numprocs; i++)
		{
		    if (!done)
		    {
			work[(i*5)+0] = k+1;
			work[(i*5)+1] = i1[k]; /* imin */
			work[(i*5)+2] = i2[k]; /* imax */
			work[(i*5)+3] = j1[k]; /* jmin */
			work[(i*5)+4] = j2[k]; /* jmax */
		    }
		    else
		    {
			work[(i*5)+0] = -1;
			work[(i*5)+1] = -1;
			work[(i*5)+2] = -1;
			work[(i*5)+3] = -1;
			work[(i*5)+4] = -1;
		    }
		    /*printf("sending work(%d) to %d\n", k+1, cur_proc);fflush(stdout);*/
		    error = MPI_Put(&work[i*5], 5, MPI_INT, i, 0, 5, MPI_INT, win);
		    if (error != MPI_SUCCESS)
		    {
			printf("put failed, error 0x%x\n", error);
			MPI_Abort(MPI_COMM_WORLD, -1);
		    }
		    if (k<399)
			k++;
		    else
			done = 1;
		}
		error = MPI_Win_fence(0, win);
		if (error != MPI_SUCCESS)
		{
		    printf("'handout work' -> 'do work' fence failed, error 0x%x\n", error);
		    MPI_Abort(MPI_COMM_WORLD, -1);
		}
		/* do work */
		error = MPI_Win_fence(0, win);
		if (error != MPI_SUCCESS)
		{
		    printf("'do work' -> 'collect results' fence failed, error 0x%x\n", error);
		    MPI_Abort(MPI_COMM_WORLD, -1);
		}
		/* send the results to the visualizer */
		for (i=1; i<numprocs; i++)
		{
		    if (work[i*5] != -1)
		    {
			sock_write(sock, &work[i*5 + 1], 4*sizeof(int));
			for (j=work[i*5+3]; j<=work[i*5+4]; j++)
			{
			    sock_write(sock,
				&grid_array[(j*ipixels_across)+work[i*5+1]],
				(work[i*5+2]+1-work[i*5+1])*sizeof(int));
			}
		    }
		}
	    }
	    error = MPI_Win_fence(0, win);
	    if (error != MPI_SUCCESS)
	    {
		printf("'collect results' -> 'done work' fence failed, error 0x%x\n", error);
		MPI_Abort(MPI_COMM_WORLD, -1);
	    }
	    /* hand out "done" work */
	    for (i=1; i<numprocs; i++)
	    {
		work[(i*5)+0] = 0;
		work[(i*5)+1] = 0;
		work[(i*5)+2] = 0;
		work[(i*5)+3] = 0;
		work[(i*5)+4] = 0;

		error = MPI_Put(&work[i*5], 5, MPI_INT, i, 0, 5, MPI_INT, win);
		if (error != MPI_SUCCESS)
		{
		    printf("put failed, error 0x%x\n", error);
		    MPI_Abort(MPI_COMM_WORLD, -1);
		}
	    }
	    error = MPI_Win_fence(0, win);
	    if (error != MPI_SUCCESS)
	    {
		printf("'done work' -> 'done' fence failed, error 0x%x\n", error);
		MPI_Abort(MPI_COMM_WORLD, -1);
	    }

	    /* tell the visualizer the image is done */
	    if (!use_stdin)
	    {
		work[0] = 0;
		work[1] = 0;
		work[2] = 0;
		work[3] = 0;
		sock_write(sock, work, 4 * sizeof(int));
	    }
	}
    }
    else
    {
	MPI_Datatype dtype;

	error = MPI_Win_create(work, 5*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
	if (error != MPI_SUCCESS)
	{
	    printf("MPI_Win_create failed, error 0x%x\n", error);
	    MPI_Abort(MPI_COMM_WORLD, -1);
	}
	for (;;)
	{
	    MPI_Bcast(&x_min, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	    MPI_Bcast(&x_max, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	    MPI_Bcast(&y_min, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	    MPI_Bcast(&y_max, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	    MPI_Bcast(&imax_iterations, 1, MPI_INT, 0, MPI_COMM_WORLD);

	    /* check for the end condition */
	    if (x_min == x_max && y_min == y_max)
	    {
		break;
	    }

	    x_resolution = (x_max-x_min)/ ((double)ipixels_across);
	    y_resolution = (y_max-y_min)/ ((double)ipixels_down);

	    error = MPI_Win_fence(0, win);
	    if (error != MPI_SUCCESS)
	    {
		printf("'receive work' fence failed, error 0x%x\n", error);
		MPI_Abort(MPI_COMM_WORLD, -1);
	    }
	    /* receive work from the root */
	    error = MPI_Win_fence(0, win);
	    if (error != MPI_SUCCESS)
	    {
		printf("'receive work' -> 'do work' fence failed, error 0x%x\n", error);
		MPI_Abort(MPI_COMM_WORLD, -1);
	    }
	    while (work[0] != 0)
	    {
		imin = work[1];
		imax = work[2];
		jmin = work[3];
		jmax = work[4];

		if (use_datatypes)
		{
		    MPI_Type_vector(jmax - jmin + 1, /* rows */
			imax - imin + 1, /* column width */
			ipixels_across, /* stride, distance between rows */
			MPI_INT,
			&dtype);
		    MPI_Type_commit(&dtype);
		    k = 0;
		}

		for (j=jmin; j<=jmax; ++j)
		{
		    coord_point.imaginary = y_max - j*y_resolution; /* go top to bottom */

		    for (i=imin; i<=imax; ++i)
		    {
			/* Call Mandelbrot routine for each code, fill array with number of iterations. */

			coord_point.real = x_min + i*x_resolution; /* go left to right */
			if (julia == 1)
			{
			    /* doing Julia set */
			    /* julia eq:  z = z^2 + c, z_0 = grid coordinate, c = constant */
			    icount = single_mandelbrot_point(coord_point, julia_constant, imax_iterations, divergent_limit);
			}
			else if (alternate_equation == 1)
			{
			    /* doing experimental form 1 */
			    icount = subtractive_mandelbrot_point(coord_point, julia_constant, imax_iterations, divergent_limit);
			}
			else if (alternate_equation == 2)
			{
			    /* doing experimental form 2 */
			    icount = additive_mandelbrot_point(coord_point, julia_constant, imax_iterations, divergent_limit);
			}
			else
			{
			    /* default to doing Mandelbrot set */
			    /* mandelbrot eq: z = z^2 + c, z_0 = c, c = grid coordinate */
			    icount = single_mandelbrot_point(coord_point, coord_point, imax_iterations, divergent_limit);
			}
			if (use_datatypes)
			{
			    grid_array[k++] = icount;
			}
			else
			{
			    grid_array[(j*ipixels_across) + i] = icount;
			    error = MPI_Put(&grid_array[(j*ipixels_across) + i], 1, MPI_INT, 0, (j * ipixels_across) + i, 1, MPI_INT, win);
			    if (error != MPI_SUCCESS)
			    {
				printf("put failed, error 0x%x\n", error);
				MPI_Abort(MPI_COMM_WORLD, -1);
			    }
			}
		    }
		}
		if (use_datatypes)
		{
		    MPI_Put(grid_array, k, MPI_INT, 0, (jmin * ipixels_across) + imin, 1, dtype, win);
		}
		/* synch with the root */
		error = MPI_Win_fence(0, win);
		if (error != MPI_SUCCESS)
		{
		    printf("'do work' -> 'wait for work to be collected' fence failed, error 0x%x\n", error);
		    MPI_Abort(MPI_COMM_WORLD, -1);
		}
		if (use_datatypes)
		{
		    MPI_Type_free(&dtype);
		}
		/* fence while the root writes to the visualizer. */
		error = MPI_Win_fence(0, win);
		if (error != MPI_SUCCESS)
		{
		    printf("'wait for work to be collected' -> 'receive work' fence failed, error 0x%x\n", error);
		    MPI_Abort(MPI_COMM_WORLD, -1);
		}
		/* fence to allow the root to put the next piece of work */
		error = MPI_Win_fence(0, win);
		if (error != MPI_SUCCESS)
		{
		    printf("'receive work' -> 'do work' fence failed, error 0x%x\n", error);
		    MPI_Abort(MPI_COMM_WORLD, -1);
		}
	    }
	}
    }

    if (myid == 0 && save_image)
    {
	imax_iterations = 0;
	for (i=0; i<ipixels_across * ipixels_down; ++i)
	{
	    /* look for "brightest" pixel value, for image use */
	    if (grid_array[i] > imax_iterations)
		imax_iterations = grid_array[i];
	}

	if (julia == 0)
	    printf("Done calculating mandelbrot, now creating file\n");
	else
	    printf("Done calculating julia, now creating file\n");
	fflush(stdout);

	/* Print out the array in some appropriate form. */
	if (julia == 0)
	{
	    /* it's a mandelbrot */
	    sprintf(file_message, "Mandelbrot over (%lf-%lf,%lf-%lf), size %d x %d",
		x_min, x_max, y_min, y_max, ipixels_across, ipixels_down);
	}
	else
	{
	    /* it's a julia */
	    sprintf(file_message, "Julia over (%lf-%lf,%lf-%lf), size %d x %d, center (%lf, %lf)",
		x_min, x_max, y_min, y_max, ipixels_across, ipixels_down,
		julia_constant.real, julia_constant.imaginary);
	}

	dumpimage(filename, grid_array, ipixels_across, ipixels_down, imax_iterations, file_message, num_colors, colors);
    }

    MPI_Finalize();
    if (colors)
	free(colors);
    return 0;
} /* end of main */
int main(int argc, char *argv[])
{
  int i, j, length, my_rank, left, right, size, test_value, mid;    
  double start, finish, transfer_time; 
  float snd_buf_left[max_length], snd_buf_right[max_length];
  float rcv_buf_left[max_length], rcv_buf_right[max_length];

  MPI_Win win_rcv_buf_left, win_rcv_buf_right;

/* Naming conventions                                                                */
/* Processes:                                                                        */
/*     my_rank-1                        my_rank                         my_rank+1    */
/* "left neighbor"                     "myself"                     "right neighbor" */
/*   ...    rcv_buf_right <--- snd_buf_left snd_buf_right ---> rcv_buf_left    ...   */
/*   ... snd_buf_right ---> rcv_buf_left       rcv_buf_right <--- snd_buf_left ...   */
/*                        |                                  |                       */
/*              halo-communication                 halo-communication                */

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  right = (my_rank+1)      % size;
  left  = (my_rank-1+size) % size;

  MPI_Win_create(rcv_buf_left,  (MPI_Aint)(max_length*sizeof(float)), sizeof(float), MPI_INFO_NULL, MPI_COMM_WORLD, &win_rcv_buf_left );
  MPI_Win_create(rcv_buf_right, (MPI_Aint)(max_length*sizeof(float)), sizeof(float), MPI_INFO_NULL, MPI_COMM_WORLD, &win_rcv_buf_right);

  if (my_rank == 0) printf("    message size      transfertime  duplex bandwidth per process and neighbor\n");

  length = start_length;

  for (j = 1; j <= number_package_sizes; j++)
  { 
    
    for (i = 0; i <= number_of_messages; i++)
    {
      if(i==1) start = MPI_Wtime();

      test_value = j*1000000 + i*10000 + my_rank*10 ; mid = (length-1)/number_of_messages*i;

      snd_buf_left[0]=test_value+1  ; snd_buf_left[mid]=test_value+2  ; snd_buf_left[length-1]=test_value+3;
      snd_buf_right[0]=test_value+6 ; snd_buf_right[mid]=test_value+7 ; snd_buf_right[length-1]=test_value+8;

      MPI_Win_fence(MPI_MODE_NOSTORE + MPI_MODE_NOPRECEDE, win_rcv_buf_left );
      MPI_Win_fence(MPI_MODE_NOSTORE + MPI_MODE_NOPRECEDE, win_rcv_buf_right);

      MPI_Put(snd_buf_left,  length, MPI_FLOAT, left,  (MPI_Aint)0, length, MPI_FLOAT, win_rcv_buf_right);
      MPI_Put(snd_buf_right, length, MPI_FLOAT, right, (MPI_Aint)0, length, MPI_FLOAT, win_rcv_buf_left );

      MPI_Win_fence(MPI_MODE_NOSTORE + MPI_MODE_NOPUT + MPI_MODE_NOSUCCEED, win_rcv_buf_left );
      MPI_Win_fence(MPI_MODE_NOSTORE + MPI_MODE_NOPUT + MPI_MODE_NOSUCCEED, win_rcv_buf_right);

/*    ...snd_buf_... is used to store the values that were stored in snd_buf_... in the neighbor process */
      test_value = j*1000000 + i*10000 + left*10  ; mid = (length-1)/number_of_messages*i;
      snd_buf_right[0]=test_value+6 ; snd_buf_right[mid]=test_value+7 ; snd_buf_right[length-1]=test_value+8;
      test_value = j*1000000 + i*10000 + right*10 ; mid = (length-1)/number_of_messages*i;
      snd_buf_left[0]=test_value+1  ; snd_buf_left[mid]=test_value+2  ; snd_buf_left[length-1]=test_value+3;
      if ((rcv_buf_left[0] != snd_buf_right[0]) || (rcv_buf_left[mid] != snd_buf_right[mid]) || 
                                                   (rcv_buf_left[length-1] != snd_buf_right[length-1])) {
         printf("%d: j=%d, i=%d --> snd_buf_right[0,%d,%d]=(%f,%f,%f)\n",
                    my_rank, j, i, mid, length-1, snd_buf_right[0], snd_buf_right[mid], snd_buf_right[length-1]);
         printf("%d:     is not identical to rcv_buf_left[0,%d,%d]=(%f,%f,%f)\n",
                    my_rank,       mid, length-1, rcv_buf_left[0],  rcv_buf_left[mid],  rcv_buf_left[length-1]);
      }
      if ((rcv_buf_right[0] != snd_buf_left[0]) || (rcv_buf_right[mid] != snd_buf_left[mid]) ||
                                                   (rcv_buf_right[length-1] != snd_buf_left[length-1])) {
         printf("%d: j=%d, i=%d --> snd_buf_left[0,%d,%d]=(%f,%f,%f)\n",
                    my_rank, j, i, mid, length-1, snd_buf_left[0],  snd_buf_left[mid],  snd_buf_left[length-1]);
         printf("%d:     is not identical to rcv_buf_right[0,%d,%d]=(%f,%f,%f)\n",
                    my_rank,       mid, length-1, rcv_buf_right[0], rcv_buf_right[mid], rcv_buf_right[length-1]);
      }

    }
    finish = MPI_Wtime();

    if (my_rank == 0) 
    {
      transfer_time = (finish - start) / number_of_messages;
      printf("%10i bytes %12.3f usec %13.3f MB/s\n", 
             length*(int)sizeof(float), transfer_time*1e6, 1.0e-6*2*length*sizeof(float) / transfer_time);
    }

    length = length * length_factor;
  }
  MPI_Win_free(&win_rcv_buf_left );
  MPI_Win_free(&win_rcv_buf_right);

  MPI_Finalize();
}
Beispiel #18
0
int main( int argc, char *argv[] )
{
    int rank, nproc, i;
    int errors = 0, all_errors = 0;
    int *buf;
    MPI_Win window;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    if (nproc < 2) {
        if (rank == 0) printf("Error: must be run with two or more processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /** Create using MPI_Win_create() **/

    if (rank == 0) {
      MPI_Alloc_mem(4*sizeof(int), MPI_INFO_NULL, &buf);
      *buf = nproc-1;
    } else
      buf = NULL;

    MPI_Win_create(buf, 4*sizeof(int)*(rank == 0), 1, MPI_INFO_NULL, MPI_COMM_WORLD, &window);

    /* PROC_NULL Communication */
    {
        MPI_Request pn_req[4];
        int val[4], res;

        MPI_Win_lock_all(0, window);

        MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, MPI_PROC_NULL, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]);
        MPI_Rget(&val[1], 1, MPI_INT, MPI_PROC_NULL, 1, 1, MPI_INT, window, &pn_req[1]);
        MPI_Rput(&val[2], 1, MPI_INT, MPI_PROC_NULL, 2, 1, MPI_INT, window, &pn_req[2]);
        MPI_Raccumulate(&val[3], 1, MPI_INT, MPI_PROC_NULL, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]);

        assert(pn_req[0] != MPI_REQUEST_NULL);
        assert(pn_req[1] != MPI_REQUEST_NULL);
        assert(pn_req[2] != MPI_REQUEST_NULL);
        assert(pn_req[3] != MPI_REQUEST_NULL);

        MPI_Win_unlock_all(window);

        MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, window);

    /* GET-ACC: Test third-party communication, through rank 0. */
    for (i = 0; i < ITER; i++) {
        MPI_Request gacc_req;
        int val = -1, exp = -1;

        /* Processes form a ring.  Process 0 starts first, then passes a token
         * to the right.  Each process, in turn, performs third-party
         * communication via process 0's window. */
        if (rank > 0) {
            MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        }

        MPI_Rget_accumulate(&rank, 1, MPI_INT, &val, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_REPLACE, window, &gacc_req);
        assert(gacc_req != MPI_REQUEST_NULL);
        MPI_Wait(&gacc_req, MPI_STATUS_IGNORE);

        MPI_Win_flush(0, window);

        exp = (rank + nproc-1) % nproc;

        if (val != exp) {
            printf("%d - Got %d, expected %d\n", rank, val, exp);
            errors++;
        }

        if (rank < nproc-1) {
            MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD);
        }

        MPI_Barrier(MPI_COMM_WORLD);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    if (rank == 0) *buf = nproc-1;
    MPI_Win_sync(window);

    /* GET+PUT: Test third-party communication, through rank 0. */
    for (i = 0; i < ITER; i++) {
        MPI_Request req;
        int val = -1, exp = -1;

        /* Processes form a ring.  Process 0 starts first, then passes a token
         * to the right.  Each process, in turn, performs third-party
         * communication via process 0's window. */
        if (rank > 0) {
            MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        }

        MPI_Rget(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req);
        assert(req != MPI_REQUEST_NULL);
        MPI_Wait(&req, MPI_STATUS_IGNORE);

        MPI_Rput(&rank, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req);
        assert(req != MPI_REQUEST_NULL);
        MPI_Wait(&req, MPI_STATUS_IGNORE);

        MPI_Win_flush(0, window);

        exp = (rank + nproc-1) % nproc;

        if (val != exp) {
            printf("%d - Got %d, expected %d\n", rank, val, exp);
            errors++;
        }

        if (rank < nproc-1) {
            MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD);
        }

        MPI_Barrier(MPI_COMM_WORLD);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    if (rank == 0) *buf = nproc-1;
    MPI_Win_sync(window);

    /* GET+ACC: Test third-party communication, through rank 0. */
    for (i = 0; i < ITER; i++) {
        MPI_Request req;
        int val = -1, exp = -1;

        /* Processes form a ring.  Process 0 starts first, then passes a token
         * to the right.  Each process, in turn, performs third-party
         * communication via process 0's window. */
        if (rank > 0) {
            MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        }

        MPI_Rget(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req);
        assert(req != MPI_REQUEST_NULL);
        MPI_Wait(&req, MPI_STATUS_IGNORE);

        MPI_Raccumulate(&rank, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_REPLACE, window, &req);
        assert(req != MPI_REQUEST_NULL);
        MPI_Wait(&req, MPI_STATUS_IGNORE);

        MPI_Win_flush(0, window);

        exp = (rank + nproc-1) % nproc;

        if (val != exp) {
            printf("%d - Got %d, expected %d\n", rank, val, exp);
            errors++;
        }

        if (rank < nproc-1) {
            MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD);
        }

        MPI_Barrier(MPI_COMM_WORLD);
    }
    MPI_Win_unlock(0, window);

    MPI_Barrier(MPI_COMM_WORLD);

    /* Wait inside of an epoch */
    {
        MPI_Request pn_req[4];
        int val[4], res;
        const int target = 0;

        MPI_Win_lock_all(0, window);

        MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]);
        MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]);
        MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]);
        MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]);

        assert(pn_req[0] != MPI_REQUEST_NULL);
        assert(pn_req[1] != MPI_REQUEST_NULL);
        assert(pn_req[2] != MPI_REQUEST_NULL);
        assert(pn_req[3] != MPI_REQUEST_NULL);

        MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE);

        MPI_Win_unlock_all(window);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Wait outside of an epoch */
    {
        MPI_Request pn_req[4];
        int val[4], res;
        const int target = 0;

        MPI_Win_lock_all(0, window);

        MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]);
        MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]);
        MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]);
        MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]);

        assert(pn_req[0] != MPI_REQUEST_NULL);
        assert(pn_req[1] != MPI_REQUEST_NULL);
        assert(pn_req[2] != MPI_REQUEST_NULL);
        assert(pn_req[3] != MPI_REQUEST_NULL);

        MPI_Win_unlock_all(window);

        MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE);
    }

    /* Wait in a different epoch */
    {
        MPI_Request pn_req[4];
        int val[4], res;
        const int target = 0;

        MPI_Win_lock_all(0, window);

        MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]);
        MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]);
        MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]);
        MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]);

        assert(pn_req[0] != MPI_REQUEST_NULL);
        assert(pn_req[1] != MPI_REQUEST_NULL);
        assert(pn_req[2] != MPI_REQUEST_NULL);
        assert(pn_req[3] != MPI_REQUEST_NULL);

        MPI_Win_unlock_all(window);

        MPI_Win_lock_all(0, window);
        MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE);
        MPI_Win_unlock_all(window);
    }

    /* Wait in a fence epoch */
    {
        MPI_Request pn_req[4];
        int val[4], res;
        const int target = 0;

        MPI_Win_lock_all(0, window);

        MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]);
        MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]);
        MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]);
        MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]);

        assert(pn_req[0] != MPI_REQUEST_NULL);
        assert(pn_req[1] != MPI_REQUEST_NULL);
        assert(pn_req[2] != MPI_REQUEST_NULL);
        assert(pn_req[3] != MPI_REQUEST_NULL);

        MPI_Win_unlock_all(window);

        MPI_Win_fence(0, window);
        MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE);
        MPI_Win_fence(0, window);
    }

    MPI_Win_free(&window);
    if (buf) MPI_Free_mem(buf);

    MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

    if (rank == 0 && all_errors == 0)
        printf(" No Errors\n");

    MPI_Finalize();

    return 0;
}
Beispiel #19
0
void mpi_win_fence_( int* assert,  int* win, int* ierr){
  *ierr =  MPI_Win_fence(* assert, *(MPI_Win*)win);
}
int main(int argc, char* argv[])
{
  int myrank;              /* the rank of this process */
  int left;                /* the rank of the process to the left */
  int right;               /* the rank of the process to the right */
  int size;                /* number of processes in the communicator */
  char sendbuf[BUFSIZ];
  char recvbuf[BUFSIZ];
  MPI_Win send_win;        /* RMA window for one-sided MPI calls */

  /* MPI_Init returns once it has started up processes */
  MPI_Init( &argc, &argv );

  /* size and rank will become ubiquitous */ 
  MPI_Comm_size( MPI_COMM_WORLD, &size );
  MPI_Comm_rank( MPI_COMM_WORLD, &myrank );

  /* example constrained to 4 processes, but code designed to work in more general cases */
  if (size != 4) {
    fprintf(stderr,"Error: this examples must be run over 4 processes\n");
    MPI_Abort(MPI_COMM_WORLD,1);
  }

  /*
  ** All ranks collectively declare an RMA window
  */
  MPI_Win_create(&sendbuf, BUFSIZ, 1, MPI_INFO_NULL,
		 MPI_COMM_WORLD, &send_win);

  /* 
  ** determine process ranks to the left and right of myrank
  ** respecting periodic boundary conditions
  */
  right = (myrank + 1) % size;
  left = (myrank == 0) ? (myrank + size - 1) : (myrank - 1);
  
  /* compose messages */
  switch (myrank) {
  case 0:
    sprintf(sendbuf, "Message from Crosby (process %d)", myrank);
    break;
  case 1:
    sprintf(sendbuf, "Message from Stills (process %d)", myrank);
    break;
  case 2:
    sprintf(sendbuf, "Message from Nash (process %d)", myrank);
    break;
  case 3:
    sprintf(sendbuf, "Message from Young (process %d)", myrank);
    break;
  default:
    sprintf(sendbuf, "Program should never reach here");
  }
  
  /*
  ** communication pattern:
  ** In this example, we'll use RMA, i.e. one-sided MPI calls.
  ** Again, this can provide a relaxed pattern.
  ** We must ensure memory syncronisation at appropriate times, 
  ** however.  We do this with a memory barrier, or 'fence'.
  ** Sometimes (i.e. on the right hardware) RMA calls can be 
  ** very efficient, as the CPU may not need to be interupted,
  ** from other calculations that it is doing, in order for data
  ** to be transferred.
  ** MPI_Get() and MPI_Put() are both available to us.  However,
  ** this pattern only logically needs one.  We will arbitrarily
  ** choose to use MPI_Get():
  ** i) get from the right
  ** ii) then get from the left
  */

  MPI_Win_fence(0,send_win);
  MPI_Get(recvbuf, BUFSIZ, MPI_CHAR, right, 0, BUFSIZ, MPI_CHAR, send_win);
  MPI_Win_fence(0,send_win);
  printf("rank %d: %s\n", myrank, recvbuf);
  MPI_Get(recvbuf, BUFSIZ, MPI_CHAR, left, 0, BUFSIZ, MPI_CHAR, send_win);
  MPI_Win_fence(0,send_win);
  printf("rank %d: %s\n", myrank, recvbuf);
  
  /* destroy the RMA window when we've finished with it*/
  MPI_Win_free(&send_win);

  /* don't forget to tidy up when we're done */
  MPI_Finalize();

  /* and exit the program */
  return EXIT_SUCCESS;
}
Beispiel #21
0
void relax_bulk(){
	int i, j, n;
	double third = 1 / 3.0;

	//for hldg
	double QQ[6] = {0};
	double Qin[6] = {0};
	double Qldg[6] = {0};
	double delta[6] = {1, 0, 0, 1, 0, 1};
	double trQQ = 0;

	//for hel
	double ddQmn = 0;
	double Qelas[6] = {0};
	double Qelas2[6] = {0};
	double ddQ[6][6];
	int xm, xp, ym, yp, zm, zp;
	for (i = 0; i < 6; i ++){
		for(j = 0; j < 6; j ++){
			ddQ[i][j] = 0;
		}
	}

	//for hch
	double Qch[6] ={0};
	double dQ[3][6];
	for (i = 0; i < 3; i ++){
		for(j = 0; j < 6; j ++){
			dQ[i][j] = 0;
		}
	}

	for (i = 0; i < length; i++){
		if(sign[i] == 0){
			//hldg
			for (n = 0; n < 6; n++) Qin[n] = q[i * 6 + n];
			QQ[0] = Qin[0]*Qin[0]+Qin[1]*Qin[1]+Qin[2]*Qin[2];
			QQ[1] = Qin[0]*Qin[1]+Qin[1]*Qin[3]+Qin[2]*Qin[4];
			QQ[2] = Qin[0]*Qin[2]+Qin[1]*Qin[4]+Qin[2]*Qin[5];
			QQ[3] = Qin[1]*Qin[1]+Qin[3]*Qin[3]+Qin[4]*Qin[4];
			QQ[4] = Qin[1]*Qin[2]+Qin[3]*Qin[4]+Qin[4]*Qin[5];
			QQ[5] = Qin[2]*Qin[2]+Qin[4]*Qin[4]+Qin[5]*Qin[5];
			trQQ = trqq(Qin);
			for (n = 0; n < 6; n++) {
				Qldg[n] = (1-U*third)*Qin[n]-U*(QQ[n]-trQQ*(Qin[n]+delta[n]*third));
			}
			xm = neigb[i * 6 + 0];
			xp = neigb[i * 6 + 1];
			ym = neigb[i * 6 + 2];
			yp = neigb[i * 6 + 3];
			zm = neigb[i * 6 + 4];
			zp = neigb[i * 6 + 5];
			for (n = 0; n < 6; n++) {
				//ddQ is de second derivative of qtensor:
				//first index, 0:xx; 1:xy; 2:xz; 3:yy; 4:yz; 5:zz;
				//second index, for qtensor index; 
				ddQ[0][n] = ((double)q[xp * 6 + n]+(double)q[xm * 6 + n]-2*Qin[n])*iddx;
				ddQ[3][n] = ((double)q[yp * 6 + n]+(double)q[ym * 6 + n]-2*Qin[n])*iddy;
				ddQ[5][n] = ((double)q[zp * 6 + n]+(double)q[zm * 6 + n]-2*Qin[n])*iddz;
				Qelas[n] = ddQ[0][n] + ddQ[3][n] + ddQ[5][n];
			}
			/*
			   if(L2 != 0){
			   for (n = 0; n < 6; n++) {
			   ddQ[1][n] = (q[xp][yp][k][n]+q[xm][ym][k][n]-q[xp][ym][k][n]-q[xm][yp][k][n])*idx*idy * 0.25;
			   ddQ[2][n] = (q[xp][j][zp][n]+q[xm][j][zm][n]-q[xp][j][zm][n]-q[xm][j][zp][n])*idx*idz * 0.25;
			   ddQ[4][n] = (q[i][yp][zp][n]+q[i][ym][zm][n]-q[i][ym][zp][n]-q[i][yp][zm][n])*idy*idz * 0.25;
			   }
			   ddQmn = ddQ[0][0] + ddQ[3][3] + ddQ[5][5] + 2 * (ddQ[4][4] + ddQ[1][1] + ddQ[2][2]);
			   Qelas2[0] = ddQ[0][0] + ddQ[1][1] + ddQ[2][2] - third * delta[0] * ddQmn;
			   Qelas2[1] = 0.5 * (ddQ[1][0] + ddQ[0][1] + ddQ[1][3] + ddQ[3][1] + ddQ[2][4] + ddQ[4][2]) - third * delta[1] * ddQmn;
			   Qelas2[2] = 0.5 * (ddQ[2][0] + ddQ[0][2] + ddQ[1][4] + ddQ[4][1] + ddQ[2][5] + ddQ[5][2]) - third * delta[2] * ddQmn;
			   Qelas2[3] = ddQ[1][1] + ddQ[3][3] + ddQ[4][4] - third * delta[3] * ddQmn;
			   Qelas2[4] = 0.5 * (ddQ[1][2] + ddQ[2][1] + ddQ[5][4] + ddQ[4][5] + ddQ[3][4] + ddQ[4][3]) - third * delta[4] * ddQmn;
			   Qelas2[5] = ddQ[2][2] + ddQ[4][4] + ddQ[5][5] - third * delta[5] * ddQmn;
			   if(!checktr(Qelas2)){
			   printf("Error in the helas22.\n");
			   return false;
			   }
			   }
			   */
			if(chiral == 1){
				//dQ first derivative: detail see energy.c
				for (n = 0; n < 6; n ++) {
					dQ[0][n] = ((double)q[xp * 6 + n] - (double)q[xm * 6 + n]) * 0.5 * idx;
					dQ[1][n] = ((double)q[yp * 6 + n] - (double)q[ym * 6 + n]) * 0.5 * idy;
					dQ[2][n] = ((double)q[zp * 6 + n] - (double)q[zm * 6 + n]) * 0.5 * idz;
				}
				Qch[0] = 2 * (dQ[1][2] - dQ[2][1]);
				Qch[3] = 2 * (dQ[2][1] - dQ[0][4]);
				Qch[5] = 2 * (dQ[0][4] - dQ[1][2]);
				Qch[1] = dQ[1][4] - dQ[2][3] + dQ[2][0] - dQ[0][2];
				Qch[2] = dQ[1][5] - dQ[2][4] + dQ[0][1] - dQ[1][0];
				Qch[4] = dQ[2][2] - dQ[0][5] + dQ[0][3] - dQ[1][1];
				for (n = 0; n < 6; n++) {
					qn[i * 6 + n] = Qin[n] + dt*(- Qldg[n] + L1 * Qelas[n] - 2 * qch * L1 * Qch[n]);
				}
			}
			else{
				for (n = 0; n < 6; n++) {
					qn[i * 6 + n] = Qin[n] + dt*(- Qldg[n] + L1 * Qelas[n]);
				}
			}
		}
	}

	//Wait until all nodes are updated, populat q with qnew
	MPI_Barrier(MPI_COMM_WORLD);	
	MPI_Win_fence(0, win);
	for(i = 0; i < length; i ++){	
		if(sign[i] == 0){
			for (n = 0; n < 6; n++) {
				q[i * 6 + n] = qn[i * 6 + n];
			}
		}
	}
}
Beispiel #22
0
int main( int argc, char **argv )
{
    int errs = 0, err;
    int rank, size, source, dest;
    int minsize = 2, count; 
    MPI_Comm      comm;
    MPI_Win       win;
    MPI_Aint      extent;
    MTestDatatype sendtype, recvtype;
    int           onlyInt = 0;

    MTest_Init( &argc, &argv );
    /* Check for a simple choice of communicator and datatypes */
    if (getenv( "MTEST_SIMPLE" )) onlyInt = 1;

    while (MTestGetIntracommGeneral( &comm, minsize, 1 )) {
	if (comm == MPI_COMM_NULL) continue;
	/* Determine the sender and receiver */
	MPI_Comm_rank( comm, &rank );
	MPI_Comm_size( comm, &size );
	source = 0;
	dest   = size - 1;
	
	for (count = 1; count < 65000; count = count * 2) {
	    while (MTestGetDatatypes( &sendtype, &recvtype, count )) {

		MTestPrintfMsg( 1, 
		       "Putting count = %d of sendtype %s receive type %s\n", 
				count, MTestGetDatatypeName( &sendtype ),
				MTestGetDatatypeName( &recvtype ) );

		/* Make sure that everyone has a recv buffer */
		recvtype.InitBuf( &recvtype );

		MPI_Type_extent( recvtype.datatype, &extent );
		MPI_Win_create( recvtype.buf, recvtype.count * extent, 
				extent, MPI_INFO_NULL, comm, &win );
		/* To improve reporting of problems about operations, we
		   change the error handler to errors return */
		MPI_Win_set_errhandler( win, MPI_ERRORS_RETURN );

		/* At this point, we have all of the elements that we 
		   need to begin the multiple fence and put tests */
		/* Fence 1 */
		err = MPI_Win_fence( MPI_MODE_NOPRECEDE, win ); 
		if (err) { if (errs++ < MAX_PERR) MTestPrintError(err); }
		/* Source puts */
		if (rank == source) {
		    sendtype.InitBuf( &sendtype );
		    
		    err = MPI_Put( sendtype.buf, sendtype.count, 
				   sendtype.datatype, dest, 0, 
				   recvtype.count, recvtype.datatype, win );
		    if (err) { if (errs++ < MAX_PERR) MTestPrintError(err); }
		}

		/* Fence 2 */
		err = MPI_Win_fence( 0, win );
		if (err) { if (errs++ < MAX_PERR) MTestPrintError(err); }
		/* dest checks data, then Dest puts */
		if (rank == dest) {
		    err = MTestCheckRecv( 0, &recvtype );
		    if (err) { if (errs++ < MAX_PERR) { 
			    PrintRecvedError( "fence 2", &sendtype, &recvtype );
			}
		    }
		    sendtype.InitBuf( &sendtype );
		    
		    err = MPI_Put( sendtype.buf, sendtype.count, 
				   sendtype.datatype, source, 0, 
				   recvtype.count, recvtype.datatype, win );
		    if (err) { if (errs++ < MAX_PERR) MTestPrintError(err); }
		}

		/* Fence 3 */
		err = MPI_Win_fence( 0, win );
		if (err) { if (errs++ < MAX_PERR) MTestPrintError(err); }
		/* src checks data, then Src and dest puts*/
		if (rank == source) {
		    err = MTestCheckRecv( 0, &recvtype );
		    if (err) { if (errs++ < MAX_PERR) { 
			    PrintRecvedError( "fence 3", &sendtype, &recvtype );
			}
		    }
		    sendtype.InitBuf( &sendtype );
		    
		    err = MPI_Put( sendtype.buf, sendtype.count, 
				   sendtype.datatype, dest, 0, 
				   recvtype.count, recvtype.datatype, win );
		    if (err) { if (errs++ < MAX_PERR) MTestPrintError(err); }
		}
		if (rank == dest) {
		    sendtype.InitBuf( &sendtype );
		    
		    err = MPI_Put( sendtype.buf, sendtype.count, 
				   sendtype.datatype, source, 0, 
				   recvtype.count, recvtype.datatype, win );
		    if (err) { if (errs++ < MAX_PERR) MTestPrintError(err); }
		}

		/* Fence 4 */
		err = MPI_Win_fence( MPI_MODE_NOSUCCEED, win );
		if (err) { if (errs++ < MAX_PERR) MTestPrintError(err); }
		/* src and dest checks data */
		if (rank == source) {
		    err = MTestCheckRecv( 0, &recvtype );
		    if (err) { if (errs++ < MAX_PERR) { 
			    PrintRecvedError( "src fence4", &sendtype, &recvtype );
			}
		    }
		}
		if (rank == dest) {
		    err = MTestCheckRecv( 0, &recvtype );
		    if (err) { if (errs++ < MAX_PERR) { 
			    PrintRecvedError( "dest fence4", &sendtype, &recvtype );
			}
		    }
		}

		MPI_Win_free( &win );
		MTestFreeDatatype( &sendtype );
		MTestFreeDatatype( &recvtype );

		/* Only do one datatype in the simple case */
		if (onlyInt) break;
	    }
	    /* Only do one count in the simple case */
	    if (onlyInt) break;
	}
        MTestFreeComm(&comm);
	/* Only do one communicator in the simple case */
	if (onlyInt) break;
    }

    MTest_Finalize( errs );

    
    
    MPI_Finalize();
    return 0;
}
int main(int argc, char *argv[])
{
  int i, j, k, length, my_rank, left, right, size, test_value, mid;    
  double start, finish, transfer_time; 
  float snd_buf_left[max_length], snd_buf_right[max_length];
  float *rcv_buf_left, *rcv_buf_right;
  float *rcv_buf_left_neighbor, *rcv_buf_right_neighbor;

  MPI_Win win_rcv_buf_left, win_rcv_buf_right;
  MPI_Info info_noncontig;
  MPI_Aint buf_size;
  int disp_unit;
  

/* Naming conventions                                                                */
/* Processes:                                                                        */
/*     my_rank-1                        my_rank                         my_rank+1    */
/* "left neighbor"                     "myself"                     "right neighbor" */
/*   ...    rcv_buf_right <--- snd_buf_left snd_buf_right ---> rcv_buf_left    ...   */
/*   ... snd_buf_right ---> rcv_buf_left       rcv_buf_right <--- snd_buf_left ...   */
/*                        |                                  |                       */
/*              halo-communication                 halo-communication                */

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  right = (my_rank+1)      % size;
  left  = (my_rank-1+size) % size;

  MPI_Info_create(&info_noncontig);
  MPI_Info_set(info_noncontig, "alloc_shared_noncontig", "true");
  MPI_Win_allocate_shared((MPI_Aint)(max_length*sizeof(float)), sizeof(float), info_noncontig, MPI_COMM_WORLD, &rcv_buf_left,  &win_rcv_buf_left );
  MPI_Win_allocate_shared((MPI_Aint)(max_length*sizeof(float)), sizeof(float), info_noncontig, MPI_COMM_WORLD, &rcv_buf_right, &win_rcv_buf_right);

/*... shared memory access to the rcv_buf_left, of the RIGHT neighbor process */
  MPI_Win_shared_query(win_rcv_buf_left,  right, &buf_size, &disp_unit, &rcv_buf_left_neighbor );

/*... shared memory access to the rcv_buf_right, of the LEFT neighbor process */
  MPI_Win_shared_query(win_rcv_buf_right, left,  &buf_size, &disp_unit, &rcv_buf_right_neighbor);


  if (my_rank == 0) printf("    message size      transfertime  duplex bandwidth per process and neighbor\n");

  length = start_length;

  for (j = 1; j <= number_package_sizes; j++)
  { 
    
    for (i = 0; i <= number_of_messages; i++)
    {
      if(i==1) start = MPI_Wtime();

      test_value = j*1000000 + i*10000 + my_rank*10 ; mid = (length-1)/number_of_messages*i;

      snd_buf_left[0]=test_value+1  ; snd_buf_left[mid]=test_value+2  ; snd_buf_left[length-1]=test_value+3;
      snd_buf_right[0]=test_value+6 ; snd_buf_right[mid]=test_value+7 ; snd_buf_right[length-1]=test_value+8;

/*    MPI_Win_fence(MPI_MODE_NOSTORE + MPI_MODE_NOPRECEDE, win_rcv_buf_left ); */
/*    MPI_Win_fence(MPI_MODE_NOSTORE + MPI_MODE_NOPRECEDE, win_rcv_buf_right); */
/*      ... instead of above, work-around for a bug with shared memory windows in some libraries: */
      MPI_Win_fence(MPI_MODE_NOSTORE, win_rcv_buf_left );
      MPI_Win_fence(MPI_MODE_NOSTORE, win_rcv_buf_right);

/*    MPI_Put(snd_buf_left,  length, MPI_FLOAT, left,  (MPI_Aint)0, length, MPI_FLOAT, win_rcv_buf_right); */
/*    MPI_Put(snd_buf_right, length, MPI_FLOAT, right, (MPI_Aint)0, length, MPI_FLOAT, win_rcv_buf_left ); */
/*      ... is substited by: */
      for(k=0; k<length; k++) rcv_buf_right_neighbor[k]  = snd_buf_left [k];
      for(k=0; k<length; k++) rcv_buf_left_neighbor [k] = snd_buf_right[k];

/*    MPI_Win_fence(MPI_MODE_NOSTORE + MPI_MODE_NOPUT + MPI_MODE_NOSUCCEED, win_rcv_buf_left ); */
/*    MPI_Win_fence(MPI_MODE_NOSTORE + MPI_MODE_NOPUT + MPI_MODE_NOSUCCEED, win_rcv_buf_right); */
/*      ... instead of above, work-around for a bug with shared memory windows in some libraries: */
      MPI_Win_fence(MPI_MODE_NOSTORE + MPI_MODE_NOPUT, win_rcv_buf_left );
      MPI_Win_fence(MPI_MODE_NOSTORE + MPI_MODE_NOPUT, win_rcv_buf_right);

/*    ...snd_buf_... is used to store the values that were stored in snd_buf_... in the neighbor process */
      test_value = j*1000000 + i*10000 + left*10  ; mid = (length-1)/number_of_messages*i;
      snd_buf_right[0]=test_value+6 ; snd_buf_right[mid]=test_value+7 ; snd_buf_right[length-1]=test_value+8;
      test_value = j*1000000 + i*10000 + right*10 ; mid = (length-1)/number_of_messages*i;
      snd_buf_left[0]=test_value+1  ; snd_buf_left[mid]=test_value+2  ; snd_buf_left[length-1]=test_value+3;
      if ((rcv_buf_left[0] != snd_buf_right[0]) || (rcv_buf_left[mid] != snd_buf_right[mid]) || 
                                                   (rcv_buf_left[length-1] != snd_buf_right[length-1])) {
         printf("%d: j=%d, i=%d --> snd_buf_right[0,%d,%d]=(%f,%f,%f)\n",
                    my_rank, j, i, mid, length-1, snd_buf_right[0], snd_buf_right[mid], snd_buf_right[length-1]);
         printf("%d:     is not identical to rcv_buf_left[0,%d,%d]=(%f,%f,%f)\n",
                    my_rank,       mid, length-1, rcv_buf_left[0],  rcv_buf_left[mid],  rcv_buf_left[length-1]);
      }
      if ((rcv_buf_right[0] != snd_buf_left[0]) || (rcv_buf_right[mid] != snd_buf_left[mid]) ||
                                                   (rcv_buf_right[length-1] != snd_buf_left[length-1])) {
         printf("%d: j=%d, i=%d --> snd_buf_left[0,%d,%d]=(%f,%f,%f)\n",
                    my_rank, j, i, mid, length-1, snd_buf_left[0],  snd_buf_left[mid],  snd_buf_left[length-1]);
         printf("%d:     is not identical to rcv_buf_right[0,%d,%d]=(%f,%f,%f)\n",
                    my_rank,       mid, length-1, rcv_buf_right[0], rcv_buf_right[mid], rcv_buf_right[length-1]);
      }

    }
    finish = MPI_Wtime();

    if (my_rank == 0) 
    {
      transfer_time = (finish - start) / number_of_messages;
      printf("%10i bytes %12.3f usec %13.3f MB/s\n", 
             length*(int)sizeof(float), transfer_time*1e6, 1.0e-6*2*length*sizeof(float) / transfer_time);
    }

    length = length * length_factor;
  }
  MPI_Win_free(&win_rcv_buf_left );
  MPI_Win_free(&win_rcv_buf_right);

  MPI_Finalize();
}
Beispiel #24
0
int main(int argc, char *argv[]) 
{ 
    int rank, nprocs, **A, *A_data, i, j;
    MPI_Win win;
    MPI_Datatype column, xpose;
    int errs = 0;
 
    MTest_Init(&argc,&argv); 
    MPI_Comm_size(MPI_COMM_WORLD,&nprocs); 
    MPI_Comm_rank(MPI_COMM_WORLD,&rank); 

    if (nprocs != 2)
    {
        printf("Run this program with 2 processes\n");
        MPI_Abort(MPI_COMM_WORLD,1);
    }

    A_data = (int *) malloc(NROWS * NCOLS * sizeof(int));
    A = (int **) malloc(NROWS * sizeof(int *));

    A[0] = A_data;
    for (i=1; i<NROWS; i++)
	A[i] = A[i-1] + NCOLS;

    if (rank == 0)
    {
        for (i=0; i<NROWS; i++)
            for (j=0; j<NCOLS; j++)
                A[i][j] = i*NCOLS + j;

        /* create datatype for one column */
        MPI_Type_vector(NROWS, 1, NCOLS, MPI_INT, &column);
        /* create datatype for matrix in column-major order */
        MPI_Type_hvector(NCOLS, 1, sizeof(int), column, &xpose);
        MPI_Type_commit(&xpose);
 
        MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); 

        MPI_Win_fence(0, win); 

        MPI_Accumulate(&A[0][0], NROWS*NCOLS, MPI_INT, 1, 0, 1, xpose, MPI_SUM, win);
    
        MPI_Type_free(&column);
        MPI_Type_free(&xpose);

        MPI_Win_fence(0, win); 
    }        
    else
    {  /* rank = 1 */
        for (i=0; i<NROWS; i++) 
            for (j=0; j<NCOLS; j++)
                A[i][j] = i*NCOLS + j;
        MPI_Win_create(&A[0][0], NROWS*NCOLS*sizeof(int), sizeof(int), MPI_INFO_NULL, 
                       MPI_COMM_WORLD, &win); 
        MPI_Win_fence(0, win); 

        MPI_Win_fence(0, win); 

        for (j=0; j<NCOLS; j++)
	{
            for (i=0; i<NROWS; i++)
	    {
                if (A[j][i] != i*NCOLS + j + j*NCOLS + i)
		{
		    if (errs < 50)
		    {
			printf("Error: A[%d][%d]=%d should be %d\n", j, i,
			    A[j][i], i*NCOLS + j + j*NCOLS + i);
		    }
                    errs++;
                }
	    }
	}
	if (errs >= 50)
	{
	    printf("Total number of errors: %d\n", errs);
	}
    }

    MPI_Win_free(&win);

    free(A_data);
    free(A);

    MTest_Finalize(errs);
    MPI_Finalize(); 
    return 0; 
} 
Beispiel #25
0
/*
 * Class:     mpi_Win
 * Method:    fence
 * Signature: (JI)V
 */
JNIEXPORT void JNICALL Java_mpi_Win_fence(
        JNIEnv *env, jobject jthis, jlong win, jint assertion)
{
    int rc = MPI_Win_fence(assertion, (MPI_Win)win);
    ompi_java_exceptionCheck(env, rc);
}
void begin_scatter(scatter* sc) {
  assert (!sc->valid);
  sc->valid = 1;
  sc->request_count = 0;
  MPI_Win_fence(MPI_MODE_NOPRECEDE, sc->win);
}
Beispiel #27
0
void end_scatter(scatter * sc)
{
	assert(sc->valid);
	MPI_Win_fence(MPI_MODE_NOSUCCEED | MPI_MODE_NOSTORE, sc->win);
	sc->valid = 0;
}
Beispiel #28
0
void p_iden()
{
	int ipar, xlo, xhi, ylo, yhi, zlo, zhi, x1, y1, z1, x2, y2, z2, i1, j1, k1, i2, j2, k2, i3, j3, k3;
	int i, j, k, ii, id, id1, id2, id3, di, p1, p2, nlink=0;
	int si, sj, sk, sid, sii, sjj;
	double x0, y0, z0, r, x, y, z, ubx, uby, ubz, omegax, omegay, omegaz, ub_dot_e, q[6], as;

	for (ipar=0; ipar<npar; ipar++) {
		x0  = p_pos[ipar*3];	// C.O.M of ipar'th particle
		y0  = p_pos[ipar*3+1];
		z0  = p_pos[ipar*3+2];
		r   = p_rad[ipar];
		if (r<0) {
			r=-r;
			as=-1.;
		} else {
			as=1.;
		}
//		boundaries of search box for particle # ipar
		xlo = (int)(x0-r-1.01);
		xhi = (int)(x0+r+1.01);
		ylo = (int)(y0-r-1.01);
		yhi = (int)(y0+r+1.01);
		zlo = (int)(z0-r-1.01);
		zhi = (int)(z0+r+1.01);
		if (wall_on!=0) {
			if(zlo <  0)  zlo = 0;
			if(zhi >= Nz) zhi = Nz-1;
		}
		if (as==-1) {
			xlo=0;
			xhi=Nx-1;
                        ylo=0;
                        yhi=Ny-1;
                        zlo=0;
                        zhi=Nz-1;
		}

		for (z1=zlo; z1<=zhi; z1++) {
			for (y1=ylo; y1<=yhi; y1++) {
				for (x1=xlo; x1<=xhi; x1++) {
					p1=onsurface(x1, y1, z1, ipar);
					i1 = x1;
					if(x1<0)   i1 += Nx;
					if(x1>=Nx) i1 -= Nx;
					j1 = y1;
					if(y1<0)   j1 += Ny;
					if(y1>=Ny) j1 -= Ny;
					k1 = z1;
					if (wall_on==0) {
						if(k1<0)   k1 += Nz;
						if(k1>=Nz) k1 -= Nz;
					}
					id1 = i1 + (j1+k1*Ny)*Nx;
					//	inside particle
					if (p1>0 && Q_on!=0 && id1/point==myid) info[id1%point]=-ipar-2;
					
					for (j=2; j<=10; j++) {
						z2 = z1 + e[j][2];
						if ( j!=3 && j!=5 && ( wall_on==0 || (z2>=0 && z2<Nz) ) ) {
							x2 = x1 + e[j][0];
							y2 = y1 + e[j][1];
							p2=onsurface(x2, y2, z2, ipar);

							if (p1 * p2 < 0) {
								i2 = x2;
								if(i2<0)   i2 += Nx;
								if(i2>=Nx) i2 -= Nx;
								j2 = y2;
								if(j2<0)   j2 += Ny;
								if(j2>=Ny) j2 -= Ny;
								k2 = z2;
								if (wall_on==0) {
									if(k2<0)   k2 += Nz;
									if(k2>=Nz) k2 -= Nz;
								}
								k   = bounce(j);
								id2 = i2 + (j2+k2*Ny)*Nx;
								
								x   = (double)x1 + 0.5*e[j][0];
								y   = (double)y1 + 0.5*e[j][1];
								z   = (double)z1 + 0.5*e[j][2];
								x  -= x0;
								y  -= y0;
								z  -= z0;
								x  *= as;
                                                                y  *= as;
                                                                z  *= as;
								
								if (flow_on!=0) {			//	modify streaming
									omegax = p_angv[ipar*3];
									omegay = p_angv[ipar*3+1];
									omegaz = p_angv[ipar*3+2];
									ubx    = p_vel[ipar*3]   + omegay*z - omegaz*y;
									uby    = p_vel[ipar*3+1] + omegaz*x - omegax*z;
									ubz    = p_vel[ipar*3+2] + omegax*y - omegay*x;
									ub_dot_e = ubx*e[j][0] + uby*e[j][1] + ubz*e[j][2];
									if(id1/point==myid) {
										nextf[(id1%point)*15+j]=(id1%point)*15+k;
										ubounce[(id1%point)*15+j] = ub_dot_e;
									}
									if(id2/point==myid) {
										nextf[(id2%point)*15+k]=(id2%point)*15+j;
										ubounce[(id2%point)*15+k] =-ub_dot_e;
									}
									nlink++;
									if (j<7) {
//										vsurf[nsurf*3]  = ubx;
//										vsurf[nsurf*3+1]= uby;
//										vsurf[nsurf*3+2]= ubz;
									}
								}
								
								if (j<7 && Q_on!=0) {			//	create surface point
									if (nsurf/node==myid) {
										id        = 10*(nsurf%node);
										surf[id]  = p_Wtype[ipar];
										surf[id+1]= p_W[ipar];
										normalize(&x,&y,&z);
										surf[id+2]= x;
										surf[id+3]= y;
										surf[id+4]= z;
										ntoq(x, y, z, &q[0], &q[1], &q[2], &q[3], &q[4], &q[5]);
										for (ii=0; ii<5; ii++) {
											surf[id+5+ii]  = q[ii];
										}
									}
									
									//	look for the nearest bulk point for the surface point
									if (p1<0) {
										si = i1;
										sj = j1;
										sk = k1;
										sid= id1;
										sii= j;
										sjj= k;
									} else {
										si = i2;
										sj = j2;
										sk = k2;
										sid= id2;
										sii= k;
										sjj= j;
									}
									if(sid/point==myid) neighb[(sid%point)*6+sii-1]=(nsurf-myid*node)*5-1;

									//	do the first pair neighbors
									i3 = si + e[sjj][0];
									j3 = sj + e[sjj][1];
									k3 = sk + e[sjj][2];
									if(i3<0)   i3 += Nx;
									if(i3>=Nx) i3 -= Nx;
									if(j3<0)   j3 += Ny;
									if(j3>=Ny) j3 -= Ny;
									if (wall_on==0) {
										if(k3<0)   k3 += Nz;
										if(k3>=Nz) k3 -= Nz;
									}
									id3 = i3 +(j3+k3*Ny)*Nx;
									if(nsurf/node==myid) {
										if (sii<sjj) {
											neighbsurf[(nsurf%node)*6+sii-1]=(sid-myid*point)*5;
											neighbsurf[(nsurf%node)*6+sjj-1]=(id3-myid*point)*5;
										} else {
											neighbsurf[(nsurf%node)*6+sjj-1]=(sid-myid*point)*5;
											neighbsurf[(nsurf%node)*6+sii-1]=(id3-myid*point)*5;
										}
									
										// find neighbors for surface point
										if (sii!=1 && sjj!=1) {
											if (x>0) di= 1;
											if (x<0) di=-1;
											i3 = si - di;
											j3 = sj;
											k3 = sk;
											if(i3<0)   i3 += Nx;
											if(i3>=Nx) i3 -= Nx;
											
											if (onsurface(i3,j3,k3,ipar)==-1) {
												neighbsurf[(nsurf%node)*6]  = 5*(i3 +(j3+k3*Ny)*Nx - myid*point)-1;
												i3 = si + di;
												if(i3<0)   i3 += Nx;
												if(i3>=Nx) i3 -= Nx;
												neighbsurf[(nsurf%node)*6+1]= 5*(i3 +(j3+k3*Ny)*Nx - myid*point);
											} else {
												i3 = si + di;
												if(i3<0)   i3 += Nx;
												if(i3>=Nx) i3 -= Nx;
												neighbsurf[(nsurf%node)*6]=   5*(i3 +(j3+k3*Ny)*Nx - myid*point)-2;
												i3 = si + 2*di;
												if(i3<0)   i3 += Nx;
												if(i3>=Nx) i3 -= Nx;
												neighbsurf[(nsurf%node)*6+1]= 5*(i3 +(j3+k3*Ny)*Nx - myid*point);
											}
										}
										
										if (sii!=3 && sjj!=3) {
											if (y>0) di= 1;
											if (y<0) di=-1;
											i3 = si;
											j3 = sj - di;
											k3 = sk;
											if(j3<0)   j3 += Ny;
											if(j3>=Ny) j3 -= Ny;
											
											if (onsurface(i3,j3,k3,ipar)==-1) {
												neighbsurf[(nsurf%node)*6+2]= 5*(i3 +(j3+k3*Ny)*Nx -myid*point)-1;
												j3 = sj + di;
												if(j3<0)   j3 += Ny;
												if(j3>=Nx) j3 -= Ny;
												neighbsurf[(nsurf%node)*6+3]= 5*(i3 +(j3+k3*Ny)*Nx - myid*point);
											} else {
												j3 = sj + di;
												if(j3<0)   j3 += Ny;
												if(j3>=Nx) j3 -= Ny;
												neighbsurf[(nsurf%node)*6+2]= 5*(i3 +(j3+k3*Ny)*Nx - myid*point)-2;
												j3 = sj + 2*di;
												if(j3<0)   j3 += Ny;
												if(j3>=Nx) j3 -= Ny;
												neighbsurf[(nsurf%node)*6+3]= 5*(i3 +(j3+k3*Ny)*Nx - myid*point);
											}
										}

										if (sii!=5 && sjj!=5) {
											if (z>0) di= 1;
											if (z<0) di=-1;
											i3 = si;
											j3 = sj;
											k3 = sk - di;
											if (wall_on==0) {
												if(k3<0)   k3 += Nz;
												if(k3>=Nz) k3 -= Nz;
											}
											
											if (onsurface(i3,j3,k3,ipar)==-1) {
												neighbsurf[(nsurf%node)*6+4]= 5*(i3 +(j3+k3*Ny)*Nx - myid*point)-1;
												k3 = sk + di;
												if (wall_on==0) {
													if(k3<0)   k3 += Nz;
													if(k3>=Nz) k3 -= Nz;
												}
												neighbsurf[(nsurf%node)*6+5]= 5*(i3 +(j3+k3*Ny)*Nx -myid*point);
											} else {
												k3 = sk + di;
												if (wall_on==0) {
													if(k3<0)   k3 += Nz;
													if(k3>=Nz) k3 -= Nz;
												}
												neighbsurf[(nsurf%node)*6+4]= 5*(i3 +(j3+k3*Ny)*Nx - myid*point)-2;
												k3 = sk + 2*di;
												if (wall_on==0) {
													if(k3<0)   k3 += Nz;
													if(k3>=Nz) k3 -= Nz;
												}
												neighbsurf[(nsurf%node)*6+5]= 5*(i3 +(j3+k3*Ny)*Nx - myid*point);
											}
										}
									}
									nsurf++;
								}
							}
						}
					}
				}
			}
		}
	}
	if (Q_on!=0 && (npar>0 || wall_on!=0) ) {
		MPI_Win_fence(0, winQsurf);
		MPI_Win_fence(0, winsurf);
		MPI_Win_fence(0, winneighbsurf);
	}
	if(flow_on!=0) MPI_Win_fence(0, winnf);
	MPI_Barrier(MPI_COMM_WORLD);
}
Beispiel #29
0
void end_gather(gather * g)
{
	assert(g->valid);
	MPI_Win_fence(MPI_MODE_NOSUCCEED, g->win);
	g->valid = 0;
}
Beispiel #30
0
static int run_test(int time)
{
    int i, x, errs = 0, errs_total = 0;
    MPI_Status stat;
    int dst;
    int winbuf_offset = 0;
    double t0, avg_total_time = 0.0, t_total = 0.0;
    double sum = 0.0;

    if (nprocs < NPROCS_M) {
        ITER = ITER_S;
    }
    else if (nprocs >= NPROCS_M && nprocs < NPROCS_M * 2) {
        ITER = ITER_M;
    }
    else {
        ITER = ITER_L;
    }

    t0 = MPI_Wtime();
    if (rank == 0) {
        for (x = 0; x < ITER; x++) {
            MPI_Win_fence(MPI_MODE_NOPRECEDE, win);

            for (dst = 0; dst < nprocs; dst++) {
                for (i = 1; i < NOP; i++) {
                    MPI_Accumulate(&locbuf[i], 1, MPI_DOUBLE, dst, rank, 1, MPI_DOUBLE, MPI_SUM,
                                   win);
                }
            }

            MPI_Win_fence(MPI_MODE_NOSUCCEED, win);
        }
    }
    else {
        for (x = 0; x < ITER; x++) {
            MPI_Win_fence(MPI_MODE_NOPRECEDE, win);

            if (time > 0)
                usleep_by_count(time);

            MPI_Win_fence(MPI_MODE_NOSUCCEED, win);
        }
    }
    t_total = MPI_Wtime() - t0;
    t_total /= ITER;

//    MPI_Reduce(&t_total, &avg_total_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
//    MPI_Allreduce(&errs, &errs_total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

    if (rank == 0) {
        avg_total_time = t_total / nprocs * 1000 * 1000;
#ifdef MTCORE
        fprintf(stdout,
                "mtcore: iter %d comp_size %d num_op %d nprocs %d nh %d total_time %.2lf\n",
                ITER, time, NOP, nprocs, MTCORE_NUM_H, avg_total_time);
#else
        fprintf(stdout,
                "orig: iter %d comp_size %d num_op %d nprocs %d total_time %.2lf\n",
                ITER, time, NOP, nprocs, avg_total_time);
#endif
    }

    return errs_total;
}