int main(int, char **) { srand(42); std::cout << "-- Generating matrix --" << std::endl; std::size_t dof_per_dim = 64; //number of grid points per coordinate direction std::size_t n = dof_per_dim * dof_per_dim * dof_per_dim; //total number of unknowns std::vector< std::map<int, double> > matrix = gen_3d_mesh_matrix(dof_per_dim, dof_per_dim, dof_per_dim, false); //If last parameter is 'true', a tetrahedral grid instead of a hexahedral grid is used. /** * Shuffle the generated matrix **/ std::vector<int> r = generate_random_reordering(n); std::vector< std::map<int, double> > matrix2 = reorder_matrix(matrix, r); /** * Print some statistics about the generated matrix: **/ std::cout << " * Unknowns: " << n << std::endl; std::cout << " * Initial bandwidth: " << calc_bw(matrix) << std::endl; std::cout << " * Randomly reordered bandwidth: " << calc_bw(matrix2) << std::endl; /** * Reorder using Cuthill-McKee algorithm and print new bandwidth: **/ std::cout << "-- Cuthill-McKee algorithm --" << std::endl; r = viennacl::reorder(matrix2, viennacl::cuthill_mckee_tag()); r = viennacl::reorder(matrix2, viennacl::cuthill_mckee_tag()); std::cout << " * Reordered bandwidth: " << calc_reordered_bw(matrix2, r) << std::endl; /** * Reorder using advanced Cuthill-McKee algorithm and print new bandwidth: **/ std::cout << "-- Advanced Cuthill-McKee algorithm --" << std::endl; double a = 0.0; std::size_t gmax = 1; r = viennacl::reorder(matrix2, viennacl::advanced_cuthill_mckee_tag(a, gmax)); std::cout << " * Reordered bandwidth: " << calc_reordered_bw(matrix2, r) << std::endl; /** * Reorder using Gibbs-Poole-Stockmeyer algorithm and print new bandwidth: **/ std::cout << "-- Gibbs-Poole-Stockmeyer algorithm --" << std::endl; r = viennacl::reorder(matrix2, viennacl::gibbs_poole_stockmeyer_tag()); std::cout << " * Reordered bandwidth: " << calc_reordered_bw(matrix2, r) << std::endl; /** * That's it. **/ std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { int op, ret; struct iovec s_iov[IOV_CNT], r_iov[IOV_CNT]; char *s_buf, *r_buf; int align_size; int pairs, print_rate; int window_varied; int c, j; int curr_size; enum send_recv_type_e type; ctpm_Init(&argc, &argv); ctpm_Rank(&myid); ctpm_Job_size(&numprocs); /* default values */ pairs = numprocs / 2; window_size = DEFAULT_WINDOW; window_varied = 0; print_rate = 1; hints = fi_allocinfo(); if (!hints) return -1; while ((op = getopt(argc, argv, "hp:w:vr:" CT_STD_OPTS)) != -1) { switch (op) { default: ct_parse_std_opts(op, optarg, hints); break; case 'p': pairs = atoi(optarg); if (pairs > (numprocs / 2)) { print_usage(); return EXIT_FAILURE; } break; case 'w': window_size = atoi(optarg); break; case 'v': window_varied = 1; break; case 'r': print_rate = atoi(optarg); if (0 != print_rate && 1 != print_rate) { print_usage(); return EXIT_FAILURE; } break; case '?': case 'h': print_usage(); return EXIT_FAILURE; } } hints->ep_attr->type = FI_EP_RDM; hints->caps = FI_MSG | FI_DIRECTED_RECV; hints->mode = FI_CONTEXT | FI_LOCAL_MR; if (numprocs < 2) { if (!myid) { fprintf(stderr, "This test requires at least two processes\n"); } ctpm_Finalize(); return -1; } /* Fabric initialization */ ret = init_fabric(); if (ret) { fprintf(stderr, "Problem in fabric initialization\n"); return ret; } ret = init_av(); if (ret) { fprintf(stderr, "Problem in AV initialization\n"); return ret; } /* Data initialization */ align_size = getpagesize(); assert(align_size <= MAX_ALIGNMENT); /* Allocate page aligned buffers */ for (c = 0; c < IOV_CNT; c++) { assert(!posix_memalign(&s_iov[c].iov_base, align_size, MAX_MSG_SIZE)); assert(!posix_memalign(&r_iov[c].iov_base, align_size, MAX_MSG_SIZE)); } assert(!posix_memalign((void **)&s_buf, align_size, MAX_MSG_SIZE * IOV_CNT)); assert(!posix_memalign((void **)&r_buf, align_size, MAX_MSG_SIZE * IOV_CNT)); for (type = 0; type < FIN; type++) { if (!myid) { fprintf(stdout, HEADER); switch (type) { case SEND_RECV: fprintf(stdout, SEND_RECV_DESC); break; case SENDV_RECVV: fprintf(stdout, SENDV_RECVV_DESC); break; case SEND_RECVV: fprintf(stdout, SEND_RECVV_DESC); break; case SENDV_RECV: fprintf(stdout, SENDV_RECV_DESC); break; default: abort(); } if (window_varied) { fprintf(stdout, "# [ pairs: %d ] [ window size: varied ]\n", pairs); fprintf(stdout, "\n# Uni-directional Bandwidth (MB/sec)\n"); } else { fprintf(stdout, "# [ pairs: %d ] [ window size: %d ]\n", pairs, window_size); if (print_rate) { fprintf(stdout, "%-*s%*s%*s%*s\n", 10, "# Size", FIELD_WIDTH, "Iov count", FIELD_WIDTH, "MB/s", FIELD_WIDTH, "Messages/s"); } else { fprintf(stdout, "%-*s%*s%*s\n", 10, "# Size", FIELD_WIDTH, "Iov count", FIELD_WIDTH, "MB/s"); } } fflush(stdout); } if (window_varied) { int window_array[] = WINDOW_SIZES; double **bandwidth_results; int log_val = 1, tmp_message_size = MAX_MSG_SIZE; int i, j; for (i = 0; i < WINDOW_SIZES_COUNT; i++) { if (window_array[i] > window_size) { window_size = window_array[i]; } } while (tmp_message_size >>= 1) { log_val++; } bandwidth_results = (double **)malloc(sizeof(double *) * log_val); for (i = 0; i < log_val; i++) { bandwidth_results[i] = (double *)malloc(sizeof(double) * WINDOW_SIZES_COUNT); } if (!myid) { fprintf(stdout, "# "); for (i = 0; i < WINDOW_SIZES_COUNT; i++) { fprintf(stdout, " %10d", window_array[i]); } fprintf(stdout, "\n"); fflush(stdout); } for (j = 0, curr_size = 1; curr_size <= MAX_MSG_SIZE; curr_size *= 2, j++) { if (!myid) { fprintf(stdout, "%-7d", curr_size); } for (i = 0; i < WINDOW_SIZES_COUNT; i++) { for (c = 0; c < IOV_CNT; c++) { r_iov[c].iov_len = s_iov[c].iov_len = curr_size; bandwidth_results[j][i] = calc_bw(myid, pairs, window_array[i], s_iov, r_iov, c + 1, s_buf, (c + 1) * curr_size, r_buf, (c + 1) * curr_size, type); if (!myid) { fprintf(stdout, "%*d %10.*f", FIELD_WIDTH, c + 1, FLOAT_PRECISION, bandwidth_results[j][i]); } fprintf(stdout, c == IOV_CNT - 1 ? "\n" : ""); } } if (!myid) { fprintf(stdout, "\n"); fflush(stdout); } } if (!myid && print_rate) { fprintf(stdout, "\n# Message Rate Profile\n"); fprintf(stdout, "# "); for (i = 0; i < WINDOW_SIZES_COUNT; i++) { fprintf(stdout, " %10d", window_array[i]); } fprintf(stdout, "\n"); fflush(stdout); for (c = 0; c < IOV_CNT; c++) { for (j = 0, curr_size = 1; curr_size <= MAX_MSG_SIZE; curr_size *= 2) { fprintf(stdout, "%-7d,%*d", curr_size * (c + 1), FIELD_WIDTH, c + 1); for (i = 0; i < WINDOW_SIZES_COUNT; i++) { double rate = 1e6 * bandwidth_results[j][i] / (curr_size * (c + 1)); fprintf(stdout, " %10.2f", rate); } fprintf(stdout, "\n"); fflush(stdout); j++; } } } } else { /* Just one window size */ for (curr_size = 1; curr_size <= MAX_MSG_SIZE; curr_size *= 2) { double bw, rate; for (c = 0; c < IOV_CNT; c++) { r_iov[c].iov_len = s_iov[c].iov_len = curr_size; bw = calc_bw(myid, pairs, window_size, s_iov, r_iov, c + 1, s_buf, (c + 1) * curr_size, r_buf, (c + 1) * curr_size, type); if (!myid) { rate = 1e6 * bw / (curr_size * (c + 1)); if (print_rate) { fprintf(stdout, "%-*d%*d%*.*f%*.*f\n", 10, curr_size * (c + 1), FIELD_WIDTH, c + 1, FIELD_WIDTH, FLOAT_PRECISION, bw, FIELD_WIDTH, FLOAT_PRECISION, rate); fflush(stdout); } else { fprintf(stdout, "%-*d%*d%*.*f\n", 10, curr_size * (c + 1), FIELD_WIDTH, FIELD_WIDTH, c + 1, FLOAT_PRECISION, bw); fflush(stdout); } } fprintf(stdout, c == IOV_CNT - 1 ? "\n" : ""); } } } }
int main(int argc, char *argv[]) { char *s_buf, *r_buf; int numprocs, rank, align_size; int pairs, print_rate; int window_size, window_varied; int c, curr_size; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* default values */ pairs = numprocs / 2; window_size = DEFAULT_WINDOW; window_varied = 0; print_rate = 1; while((c = getopt(argc, argv, "p:w:r:vh")) != -1) { switch (c) { case 'p': pairs = atoi(optarg); if(pairs > (numprocs / 2)) { if(0 == rank) { usage(); } goto error; } break; case 'w': window_size = atoi(optarg); break; case 'v': window_varied = 1; break; case 'r': print_rate = atoi(optarg); if(0 != print_rate && 1 != print_rate) { if(0 == rank) { usage(); } goto error; } break; default: if(0 == rank) { usage(); } goto error; } } align_size = getpagesize(); assert(align_size <= MAX_ALIGNMENT); s_buf = (char *) (((unsigned long) s_buf1 + (align_size - 1)) / align_size * align_size); r_buf = (char *) (((unsigned long) r_buf1 + (align_size - 1)) / align_size * align_size); if(numprocs < 2) { if(rank == 0) { fprintf(stderr, "This test requires at least two processes\n"); } MPI_Finalize(); return EXIT_FAILURE; } if(rank == 0) { fprintf(stdout, "# %s v%s\n", BENCHMARK, PACKAGE_VERSION); if(window_varied) { fprintf(stdout, "# [ pairs: %d ] [ window size: varied ]\n", pairs); fprintf(stdout, "\n# Uni-directional Bandwidth (MB/sec)\n"); } else { fprintf(stdout, "# [ pairs: %d ] [ window size: %d ]\n", pairs, window_size); if(print_rate) { fprintf(stdout, "%-*s%*s%*s\n", 10, "# Size", FIELD_WIDTH, "MB/s", FIELD_WIDTH, "Messages/s"); } else { fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "MB/s"); } } fflush(stdout); } /* More than one window size */ if(window_varied) { int window_array[] = WINDOW_SIZES; double ** bandwidth_results; int log_val = 1, tmp_message_size = MAX_MSG_SIZE; int i, j; for(i = 0; i < WINDOW_SIZES_COUNT; i++) { if(window_array[i] > window_size) { window_size = window_array[i]; } } request = (MPI_Request *) malloc(sizeof(MPI_Request) * window_size); reqstat = (MPI_Status *) malloc(sizeof(MPI_Status) * window_size); while(tmp_message_size >>= 1) { log_val++; } bandwidth_results = (double **) malloc(sizeof(double *) * log_val); for(i = 0; i < log_val; i++) { bandwidth_results[i] = (double *)malloc(sizeof(double) * WINDOW_SIZES_COUNT); } if(rank == 0) { fprintf(stdout, "# "); for(i = 0; i < WINDOW_SIZES_COUNT; i++) { fprintf(stdout, " %10d", window_array[i]); } fprintf(stdout, "\n"); fflush(stdout); } for(j = 0, curr_size = 1; curr_size <= MAX_MSG_SIZE; curr_size *= 2, j++) { if(rank == 0) { fprintf(stdout, "%-7d", curr_size); } for(i = 0; i < WINDOW_SIZES_COUNT; i++) { bandwidth_results[j][i] = calc_bw(rank, curr_size, pairs, window_array[i], s_buf, r_buf); if(rank == 0) { fprintf(stdout, " %10.*f", FLOAT_PRECISION, bandwidth_results[j][i]); } } if(rank == 0) { fprintf(stdout, "\n"); fflush(stdout); } } if(rank == 0 && print_rate) { fprintf(stdout, "\n# Message Rate Profile\n"); fprintf(stdout, "# "); for(i = 0; i < WINDOW_SIZES_COUNT; i++) { fprintf(stdout, " %10d", window_array[i]); } fprintf(stdout, "\n"); fflush(stdout); for(c = 0, curr_size = 1; curr_size <= MAX_MSG_SIZE; curr_size *= 2) { fprintf(stdout, "%-7d", curr_size); for(i = 0; i < WINDOW_SIZES_COUNT; i++) { double rate = 1e6 * bandwidth_results[c][i] / curr_size; fprintf(stdout, " %10.2f", rate); } fprintf(stdout, "\n"); fflush(stdout); c++; } } } else {