void parallelMatrixTimesVector(int local_rows, int cols, double *local_A, double *b, double *y, int root, int my_rank, int p, MPI_Comm comm) { /* This function performs parallel matrix-vector multiplication of a matrix A times vector b. The matrix is distributed by rows. Each process contains (local_rows)x(cols) matrix local_A stored as a one-dimensional array. The vector b is stored on each process. Each process computes its result and then process root collects the resutls and returns it in y. local_rows is the number of rows on my_rank cols is the number of columns on each process local_A is a pointer to the matrix on my_rank b is a pointer to the vector b of size cols y is a pointer to the result on the root process. y is significant only on root. */ double *local_y = malloc(sizeof(double)*local_rows); /* Compute the local matrix times vector */ compMatrixTimesVector(local_rows, cols, local_A, b, local_y); int sendcount = local_rows; /* number of doubles sent by process my_rank */ int *reccounts; /* reccounts[i] is the number of doubles received from process i */ int *displs; /* displs for the MPI_Gatherv function */ if (my_rank != root) { /* Send the sendcounts to the root process. reccounts does not matter here. */ MPI_Gather(&sendcount, 1, MPI_INT, reccounts, 1, MPI_INT, root, comm); /* Send the computed results to the root process. The receive buffer, reccounts, and displs do not matter here. */ MPI_Gatherv(local_y, sendcount, MPI_DOUBLE, y, reccounts, displs, MPI_DOUBLE, root, comm); } else /* we are on root process */ { /* Gatter the receive counts from each process */ reccounts = malloc(sizeof(int)*p); MPI_Gather(&sendcount, 1, MPI_INT, reccounts, 1, MPI_INT, 0, comm); /* Calculate displs for MPI_Gatterv */ displs = malloc(sizeof(int)*p); int i; displs[0] = 0; for (i = 1; i < p; i++) displs[i] = displs[i-1] + reccounts[i-1]; /* Gather the results on process 0 */ MPI_Gatherv(local_y, sendcount, MPI_DOUBLE, y, reccounts, displs, MPI_DOUBLE, root, comm); free(displs); free(reccounts); } free(local_y); }
void parallelMatrixTimesVector(int num_local_rows, int cols, double *local_A, double *b, double *y, int root, int my_rank, int p, MPI_Comm comm) { /* This function performs parallel matrix-vector multiplication of a matrix A times vector b. The matrix is distributed by rows. Each process contains (num_local_rows)x(cols) matrix local_A stored as a one-dimensional array. The vector b is stored on each process. Each process computes its result and then process root collects the resutls and returns it in y. num_local_rows is the number of rows on my_rank cols is the number of columns on each process local_A is a pointer to the matrix on my_rank b is a pointer to the vector b of size cols y is a pointer to the result on the root process. y is significant only on root. */ /* Allocate memory for the local result on my_rank */ double *local_y = malloc(sizeof(double)*num_local_rows); /* Compute the local matrix times vector */ compMatrixTimesVector(num_local_rows, cols, local_A, b, local_y); int *reccounts; /* reccounts[i] is the number of doubles to be received from process i */ int *displs; /* displs for the MPI_Gatherv function */ if (my_rank==root) reccounts = malloc(sizeof(int)*p); /* Gatter num_local_rows from each process */ MPI_Gather(&num_local_rows, 1, MPI_INT, reccounts, 1, MPI_INT, root, comm); if (my_rank==root) { /* Calculate displs for MPI_Gatterv */ int i; displs = malloc(sizeof(int)*p); displs[0] = 0; for (i = 1; i < p; i++) displs[i] = displs[i-1] + reccounts[i-1]; } MPI_Gatherv(local_y, num_local_rows, MPI_DOUBLE, y, reccounts, displs, MPI_DOUBLE, root, comm); if (my_rank==root) { free(displs); free(reccounts); } free(local_y); }