//#define Debug int main(int argc, char **argv) { if(argc != 3) { printf("Usage: test {N}\n"); exit(-1); } size_t N = atoi(argv[1]); size_t npro = atoi(argv[2]); omp_set_num_threads(npro); int iam = 0, np = 1; #pragma omp parallel private(iam, np) { np = omp_get_num_threads(); iam = omp_get_thread_num(); #ifdef Debug printf("Hello from thread %d out of %d\n", iam, np); #endif } struct timeval tv1,tv2; // generate a random matrix. printf("Size is %d,numP is %d\n",N,npro); int *mat = (int*)malloc(sizeof(int)*N*N); GenMatrix(mat, N); // compute the reference result. int *ref = (int*)malloc(sizeof(int)*N*N); memcpy(ref, mat, sizeof(int)*N*N); gettimeofday(&tv1,NULL); ST_APSP(ref, N); gettimeofday(&tv2,NULL); printf("Sequential time = %ld usecs\n", (tv2.tv_sec-tv1.tv_sec)*1000000+tv2.tv_usec-tv1.tv_usec); // compute your results int *result = (int*)malloc(sizeof(int)*N*N); memcpy(result, mat, sizeof(int)*N*N); // ST_APSP(result, N); gettimeofday(&tv1,NULL); OMP_APSP(result,N); gettimeofday(&tv2,NULL); printf("OpenMp time = %ld usecs\n", (tv2.tv_sec-tv1.tv_sec)*1000000+tv2.tv_usec-tv1.tv_usec); // compare your result with reference result if(CmpArray(result, ref, N*N)) printf("Your result is correct.\n"); else printf("Your result is wrong.\n"); }
int main(int argc, char **argv) { if(argc != 2) { printf("Usage: test {N}\n"); exit(-1); } size_t N = atoi(argv[1]); // matrix related variables int *mat, *ref, *result, *part, *k_row; int rows, k, root; int i, j, vij, vik, vkj; int npes, rank; struct timeval tv1, tv2; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &npes); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if(rank == 0){ // generate matrix and compute sequential mat = (int*)malloc(sizeof(int)*N*N); ref = (int*)malloc(sizeof(int)*N*N); result = (int*)malloc(sizeof(int)*N*N); GenMatrix(mat, N); memcpy(ref, mat, sizeof(int)*N*N); gettimeofday(&tv1, NULL); ST_APSP(ref, N); gettimeofday(&tv2, NULL); printf("Sequential: %ld usecs\n",(tv2.tv_sec-tv1.tv_sec)*1000000+tv2.tv_usec-tv1.tv_usec); gettimeofday(&tv1, NULL); } // scatter the matrix rows = N/npes; part = (int*)malloc(sizeof(int)*N*rows); MPI_Scatter(mat, N*rows, MPI_INT, part, N*rows, MPI_INT, 0, MPI_COMM_WORLD); // parallel computing k_row = (int*)malloc(sizeof(int)*N); for(k = 0; k < N; k++){ root = k/rows; if(rank == root){ for(i = 0; i < N; i ++){ *(k_row+i) = *(part + N*(k-rows*root) + i); } } MPI_Bcast(k_row, N, MPI_INT, root, MPI_COMM_WORLD); for(i = 0; i < rows; i++){ for(j = 0; j < N; j++){ vij = *(part + N*i + j); vik = *(part + N*i + k); vkj = *(k_row + j); if(vik != -1 && vkj != -1){ if(vij == -1 || vij > vik+vkj) *(part + N*i + j) = vik + vkj; } } } } // gather the matrix MPI_Gather(part, N*rows, MPI_INT, result, N*rows, MPI_INT, 0, MPI_COMM_WORLD); //compare your result with reference result if(rank == 0){ gettimeofday(&tv2, NULL); printf("Parallel: %ld usecs\n",(tv2.tv_sec-tv1.tv_sec)*1000000+tv2.tv_usec-tv1.tv_usec); // printmat(mat, N); // printmat(ref, N); // printmat(result, N); if(CmpArray(result, ref, N*N)) printf("Your result is correct.\n"); else printf("Your result is wrong.\n"); } // free memory if(rank == 0){ free(mat); free(ref); free(result); } free(part); free(k_row); MPI_Finalize(); }
int main(int argc, char **argv) { MPI_Init(NULL, NULL); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &total_rank); if(argc != 2) { printf("Usage: test {mat_size}\n"); exit(-1); } size_t mat_size = atoi(argv[1]); if (mat_size % total_rank) { for (int i = 0; i < 20; i++) printf("~"); printf("\n"); printf("Using MPT_*, only support divisible number of vertex...\n"); for (int i = 0; i < 20; i++) printf("~"); printf("\n"); exit(-1); } int* mat; int* ref; int* result; if (my_rank == 0) { for (int i = 0; i < 20; i++) printf("~"); printf("\n"); printf("Using MPT_*, only support divisible number of vertex...\n"); printf("Input size: %ld\n", mat_size); printf("Total process: %d\n", total_rank); prepare_data(&mat, &ref, &result, mat_size); // start the timer gettimeofday(&timer_parallel, NULL); } MPI_Barrier(MPI_COMM_WORLD); // know the set of rows I am working on according to my_rank int rows_in_charge = mat_size / total_rank; // should this be included in the timer? int* my_rows = (int*)malloc(sizeof(int) * mat_size * rows_in_charge); //rows the current process have int* k_to_j = (int*)malloc(sizeof(int) * mat_size); // the vertical (column) if (my_rank == 0) gettimeofday(&timer_comm, NULL); // divide the matrix for each process // send rows to each process using scatter, sendbuf:*result, recvbuf:*my_rows int sendrecvcount = mat_size * rows_in_charge; MPI_Scatter( result, sendrecvcount, MPI_INT, my_rows, sendrecvcount, MPI_INT, 0, MPI_COMM_WORLD); if (my_rank == 0) time_comm += get_time_and_replace(&timer_comm); // preprocess_graph(my_rows, rows_in_charge, mat_size); for (int k = 0; k < mat_size; k++) { if (my_rank == 0) gettimeofday(&timer_comm, NULL); // broadcast k-th row to other process if I am the owner int owner_of_k_row = k / rows_in_charge; if (my_rank == owner_of_k_row) memcpy(k_to_j, my_rows + mat_size * (k % rows_in_charge), sizeof(int) * mat_size); MPI_Bcast(k_to_j, mat_size, MPI_INT, owner_of_k_row, MPI_COMM_WORLD); if (my_rank == 0) time_comm += get_time_and_replace(&timer_comm); for (int i = 0; i < rows_in_charge; i++) { for (int j = 0; j < mat_size; j++) { int ij = i * mat_size + j; int ik = i * mat_size + k; // if (my_rows[ij] > ikj) // my_rows[ij] = ikj; if (my_rows[ik] != -1 && k_to_j[j] != -1) { int ikj = my_rows[ik] + k_to_j[j]; if (my_rows[ij] == -1 || my_rows[ij] > ikj) my_rows[ij] = ikj; } } } } if (my_rank == 0) gettimeofday(&timer_comm, NULL); // collect result to process 0 MPI_Gather( my_rows, sendrecvcount, MPI_INT, result, sendrecvcount, MPI_INT, 0, MPI_COMM_WORLD); if (my_rank == 0) time_comm += get_time_and_replace(&timer_comm); if (my_rank == 0) { //stop the timer time_used_parallel = get_time_and_replace(&timer_parallel); printf("Time used (parallel ): %8ld usecs\n", time_used_parallel); printf("Time used (parallel ) comm : %6ld usecs (%2.3lf%%) \n", time_comm, time_comm / (double)time_used_parallel * 100); printf("Speed up (sequential / parallel): %.3lf\n", time_used_sequential / (double)time_used_parallel); //compare your result with reference result if(CmpArray(result, ref, mat_size * mat_size)) printf("Your result is correct.\n"); else printf("Your result is wrong.\n"); for (int i = 0; i < 20; i++) printf("~"); printf("\n"); } // Finalize the MPI environment. MPI_Finalize(); }
int main(int argc, char *argv[]) { if(argc != 2) { printf("Missing Argument\n"); exit(-1); } int i, j; int *mat, *ref; int P, myrank; size_t N = atoi(argv[1]); //matrix size struct timeval tv1,tv2; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &P); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); if(myrank == 0) { // generate a random matrix. printf("input Size is %d; number of process is %d \n",N,P); mat = (int*)malloc(sizeof(int)*N*N); GenMatrix(mat, N); // compute the reference result. ref = (int*)malloc(sizeof(int)*N*N); memcpy(ref, mat, sizeof(int)*N*N); gettimeofday(&tv1, NULL); ST_APSP(ref, N); gettimeofday(&tv2, NULL); printf("Sequential time = %ld usecs\n", (tv2.tv_sec-tv1.tv_sec)*1000000+tv2.tv_usec-tv1.tv_usec); // compute your results } int strip = N/P; int *part = (int*)malloc(sizeof(int)*N*strip); /*array holding the part for this processor*/ // Scatter data to all processors MPI_Barrier(MPI_COMM_WORLD); if (myrank == 0) { gettimeofday(&tv1,NULL); } MPI_Scatter(mat, N*strip, MPI_INT, part, N*strip, MPI_INT, 0, MPI_COMM_WORLD); // Compute matrix in parallel MT_APSP (part, MPI_COMM_WORLD, myrank, N, P); //Gather the results MPI_Gather(part, N*strip, MPI_INT, mat, N*strip, MPI_INT, 0, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); if(myrank == 0) { gettimeofday(&tv2, NULL); printf("Parallel time = %ld usecs\n\n", (tv2.tv_sec-tv1.tv_sec)*1000000+tv2.tv_usec-tv1.tv_usec); #ifdef test // compare your result with reference result if(CmpArray(mat, ref, N*N)) printf("Your result is correct.\n"); else printf("Your result is wrong.\n"); #endif free(mat); free(ref); } free(part); MPI_Finalize(); return 0; }