int main(int argc, char* argv[]) { double *A, *B, *b, *y; int n; int my_rank, p; int i; /* Obtain number of rows and columns. We do not check for eroneous input. */ n = atoi(argv[1]); MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &p); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* Find how many rows per process. */ int *rows; rows = (int*)Malloc(p, sizeof(int), MPI_COMM_WORLD, my_rank); calcNumsPerProcess(n, p, rows); /* Allocate memory. */ b = Malloc(n, sizeof(double), MPI_COMM_WORLD, my_rank); if (my_rank == 0) { A = (double*)Malloc(n*n, sizeof(double), MPI_COMM_WORLD, my_rank); y = (double*)Malloc(n, sizeof(double), MPI_COMM_WORLD, my_rank); } B = (double*)Malloc(rows[my_rank]*n, sizeof(double), MPI_COMM_WORLD, my_rank); /* Generate matrix and vector */ if (my_rank == 0) { genMatrix(n, n, A); genVector(n, b); } /* Distribute A */ int *displs; int *sendcounts; if (my_rank == 0) { displs = malloc(sizeof(int)*p); sendcounts = malloc(sizeof(int)*p); for (i=0; i<p; i++) sendcounts[i] = rows[i]*n; displs[0] = 0; for (i=1; i<p; i++) displs[i] = displs[i-1] + sendcounts[i-1]; } MPI_Scatterv(A, sendcounts, displs, MPI_DOUBLE, B, rows[my_rank]*n, MPI_DOUBLE, 0, MPI_COMM_WORLD); /* Distribute b */ MPI_Bcast(b, n, MPI_DOUBLE, 0, MPI_COMM_WORLD); double time = MPI_Wtime(); parallelMatrixTimesVector(rows[my_rank], n, B, b, y, 0, my_rank, p, MPI_COMM_WORLD); time = MPI_Wtime()-time; /* Collect the max time from all processes. */ double timerecv; MPI_Reduce(&time,&timerecv, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if (my_rank==0) { printf("%d %d % .2e\n", p, n, timerecv); } if (my_rank==0){ free(sendcounts); free(displs); free(y); } free(A); free(b); free(rows); MPI_Finalize(); return 0; }
int main(int argc, char* argv[]) { double *A, *B, *b, *y; int num_rows, num_cols; int my_rank, p; int i; /* Obtain number of rows and columns. We do not check for eroneous input. */ num_rows = atoi(argv[1]); num_cols= atoi(argv[2]); MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &p); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* number of rows on my_rank */ int local_num_rows = NUM_ROWS(my_rank, p, num_rows); /* Allocate memory */ b = Malloc(num_cols, sizeof(double), MPI_COMM_WORLD, my_rank); if (my_rank == 0) { A = (double*)Malloc(num_rows*num_cols, sizeof(double), MPI_COMM_WORLD, my_rank); y = (double*)Malloc(num_rows, sizeof(double), MPI_COMM_WORLD, my_rank); } B = (double*)Malloc(local_num_rows*num_cols, sizeof(double), MPI_COMM_WORLD, my_rank); /* Generate matrix and vector */ if (my_rank == 0) { genMatrix(num_rows, num_cols, A); genVector(num_cols, b); } /* Distribute A */ int *displs; int *sendcounts; if (my_rank == 0) { displs = malloc(sizeof(int)*p); sendcounts = malloc(sizeof(int)*p); sendcounts[0] = NUM_ROWS(0,p,num_rows)*num_cols; displs[0] = 0; for (i=1; i<p; i++) { displs[i] = displs[i-1] + sendcounts[i-1]; sendcounts[i] = NUM_ROWS(i,p,num_rows)*num_cols; } } MPI_Scatterv(A, sendcounts, displs, MPI_DOUBLE, B, local_num_rows*num_cols, MPI_DOUBLE, 0, MPI_COMM_WORLD); /* Distribute b */ MPI_Bcast(b, num_cols, MPI_DOUBLE, 0, MPI_COMM_WORLD); /* Multiply */ double time = MPI_Wtime(); parallelMatrixTimesVector(local_num_rows, num_cols, B, b, y, 0, my_rank, p, MPI_COMM_WORLD); time = MPI_Wtime()-time; /* Collect the max time from all processes. */ double timerecv; MPI_Reduce(&time,&timerecv, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if (my_rank==0) printf("Computed with p = %d, m = %d, n = %d in % .2e seconds\n", p, num_rows, num_cols, timerecv); if (my_rank == 0) getResult(num_rows, num_cols, A, b, y); if (my_rank==0){ free(sendcounts); free(displs); free(y); free(A); free(b); } free(B); MPI_Finalize(); return 0; }