예제 #1
0
int main(int, char **)
{
  srand(42);
  std::cout << "-- Generating matrix --" << std::endl;
  std::size_t dof_per_dim = 64;   //number of grid points per coordinate direction
  std::size_t n = dof_per_dim * dof_per_dim * dof_per_dim; //total number of unknowns
  std::vector< std::map<int, double> > matrix = gen_3d_mesh_matrix(dof_per_dim, dof_per_dim, dof_per_dim, false);  //If last parameter is 'true', a tetrahedral grid instead of a hexahedral grid is used.

  /**
  * Shuffle the generated matrix
  **/
  std::vector<int> r = generate_random_reordering(n);
  std::vector< std::map<int, double> > matrix2 = reorder_matrix(matrix, r);


  /**
  * Print some statistics about the generated matrix:
  **/
  std::cout << " * Unknowns: " << n << std::endl;
  std::cout << " * Initial bandwidth: " << calc_bw(matrix) << std::endl;
  std::cout << " * Randomly reordered bandwidth: " << calc_bw(matrix2) << std::endl;

  /**
  * Reorder using Cuthill-McKee algorithm and print new bandwidth:
  **/
  std::cout << "-- Cuthill-McKee algorithm --" << std::endl;
  r = viennacl::reorder(matrix2, viennacl::cuthill_mckee_tag());
  r = viennacl::reorder(matrix2, viennacl::cuthill_mckee_tag());
  std::cout << " * Reordered bandwidth: " << calc_reordered_bw(matrix2, r) << std::endl;

  /**
  * Reorder using advanced Cuthill-McKee algorithm and print new bandwidth:
  **/
  std::cout << "-- Advanced Cuthill-McKee algorithm --" << std::endl;
  double a = 0.0;
  std::size_t gmax = 1;
  r = viennacl::reorder(matrix2, viennacl::advanced_cuthill_mckee_tag(a, gmax));
  std::cout << " * Reordered bandwidth: " << calc_reordered_bw(matrix2, r) << std::endl;

  /**
  * Reorder using Gibbs-Poole-Stockmeyer algorithm and print new bandwidth:
  **/
  std::cout << "-- Gibbs-Poole-Stockmeyer algorithm --" << std::endl;
  r = viennacl::reorder(matrix2, viennacl::gibbs_poole_stockmeyer_tag());
  std::cout << " * Reordered bandwidth: " << calc_reordered_bw(matrix2, r) << std::endl;

  /**
  *  That's it.
  **/
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;

  return EXIT_SUCCESS;
}
예제 #2
0
int main(int argc, char *argv[])
{
	int op, ret;

	struct iovec s_iov[IOV_CNT], r_iov[IOV_CNT];
	char *s_buf, *r_buf;
	int align_size;
	int pairs, print_rate;
	int window_varied;
	int c, j;
	int curr_size;
	enum send_recv_type_e type;

	ctpm_Init(&argc, &argv);
	ctpm_Rank(&myid);
	ctpm_Job_size(&numprocs);

	/* default values */
	pairs            = numprocs / 2;
	window_size      = DEFAULT_WINDOW;
	window_varied    = 0;
	print_rate       = 1;

	hints = fi_allocinfo();
	if (!hints)
		return -1;

	while ((op = getopt(argc, argv, "hp:w:vr:" CT_STD_OPTS)) != -1) {
		switch (op) {
		default:
			ct_parse_std_opts(op, optarg, hints);
			break;
		case 'p':
			pairs = atoi(optarg);
			if (pairs > (numprocs / 2)) {
				print_usage();
				return EXIT_FAILURE;
			}
			break;
		case 'w':
			window_size = atoi(optarg);
			break;
		case 'v':
			window_varied = 1;
			break;
		case 'r':
			print_rate = atoi(optarg);
			if (0 != print_rate && 1 != print_rate) {
				print_usage();
				return EXIT_FAILURE;
			}
			break;
		case '?':
		case 'h':
			print_usage();
			return EXIT_FAILURE;
		}
	}

	hints->ep_attr->type	= FI_EP_RDM;
	hints->caps		= FI_MSG | FI_DIRECTED_RECV;
	hints->mode		= FI_CONTEXT | FI_LOCAL_MR;

	if (numprocs < 2) {
		if (!myid) {
			fprintf(stderr, "This test requires at least two processes\n");
		}
		ctpm_Finalize();
		return -1;
	}

	/* Fabric initialization */
	ret = init_fabric();
	if (ret) {
		fprintf(stderr, "Problem in fabric initialization\n");
		return ret;
	}

	ret = init_av();
	if (ret) {
		fprintf(stderr, "Problem in AV initialization\n");
		return ret;
	}

	/* Data initialization */
	align_size = getpagesize();
	assert(align_size <= MAX_ALIGNMENT);

	/* Allocate page aligned buffers */
	for (c = 0; c < IOV_CNT; c++) {
		assert(!posix_memalign(&s_iov[c].iov_base, align_size, MAX_MSG_SIZE));
		assert(!posix_memalign(&r_iov[c].iov_base, align_size, MAX_MSG_SIZE));
	}

	assert(!posix_memalign((void **)&s_buf, align_size, MAX_MSG_SIZE * IOV_CNT));
	assert(!posix_memalign((void **)&r_buf, align_size, MAX_MSG_SIZE * IOV_CNT));

	for (type = 0; type < FIN; type++) {
		if (!myid) {
			fprintf(stdout, HEADER);
			switch (type) {
			case SEND_RECV:
				fprintf(stdout, SEND_RECV_DESC);
				break;
			case SENDV_RECVV:
				fprintf(stdout, SENDV_RECVV_DESC);
				break;
			case SEND_RECVV:
				fprintf(stdout, SEND_RECVV_DESC);
				break;
			case SENDV_RECV:
				fprintf(stdout, SENDV_RECV_DESC);
				break;
			default:
				abort();
			}

			if (window_varied) {
				fprintf(stdout, "# [ pairs: %d ] [ window size: varied ]\n", pairs);
				fprintf(stdout, "\n# Uni-directional Bandwidth (MB/sec)\n");
			} else {
				fprintf(stdout, "# [ pairs: %d ] [ window size: %d ]\n", pairs,
					window_size);
				if (print_rate) {
					fprintf(stdout, "%-*s%*s%*s%*s\n", 10, "# Size", FIELD_WIDTH,
						"Iov count", FIELD_WIDTH, "MB/s", FIELD_WIDTH, "Messages/s");
				} else {
					fprintf(stdout, "%-*s%*s%*s\n", 10, "# Size", FIELD_WIDTH,
						"Iov count", FIELD_WIDTH, "MB/s");
				}
			}
			fflush(stdout);
		}

		if (window_varied) {
			int window_array[] = WINDOW_SIZES;
			double **bandwidth_results;
			int log_val = 1, tmp_message_size = MAX_MSG_SIZE;
			int i, j;

			for (i = 0; i < WINDOW_SIZES_COUNT; i++) {
				if (window_array[i] > window_size) {
					window_size = window_array[i];
				}
			}

			while (tmp_message_size >>= 1) {
				log_val++;
			}

			bandwidth_results = (double **)malloc(sizeof(double *) * log_val);

			for (i = 0; i < log_val; i++) {
				bandwidth_results[i] = (double *)malloc(sizeof(double) *
									WINDOW_SIZES_COUNT);
			}

			if (!myid) {
				fprintf(stdout, "#      ");

				for (i = 0; i < WINDOW_SIZES_COUNT; i++) {
					fprintf(stdout, "  %10d", window_array[i]);
				}

				fprintf(stdout, "\n");
				fflush(stdout);
			}

			for (j = 0, curr_size = 1; curr_size <= MAX_MSG_SIZE; curr_size *= 2, j++) {
				if (!myid) {
					fprintf(stdout, "%-7d", curr_size);
				}

				for (i = 0; i < WINDOW_SIZES_COUNT; i++) {
					for (c = 0; c < IOV_CNT; c++) {
						r_iov[c].iov_len = s_iov[c].iov_len = curr_size;
						bandwidth_results[j][i] = calc_bw(myid, pairs,
										  window_array[i], s_iov, r_iov, c + 1,
										  s_buf, (c + 1) * curr_size, r_buf,
										  (c + 1) * curr_size, type);

						if (!myid) {
							fprintf(stdout, "%*d  %10.*f", FIELD_WIDTH, c + 1,
								FLOAT_PRECISION,
								bandwidth_results[j][i]);
						}

						fprintf(stdout, c == IOV_CNT - 1 ? "\n" : "");
					}
				}

				if (!myid) {
					fprintf(stdout, "\n");
					fflush(stdout);
				}
			}

			if (!myid && print_rate) {
				fprintf(stdout, "\n# Message Rate Profile\n");
				fprintf(stdout, "#      ");

				for (i = 0; i < WINDOW_SIZES_COUNT; i++) {
					fprintf(stdout, "  %10d", window_array[i]);
				}

				fprintf(stdout, "\n");
				fflush(stdout);

				for (c = 0; c < IOV_CNT; c++) {
					for (j = 0, curr_size = 1; curr_size <= MAX_MSG_SIZE; curr_size *= 2) {
						fprintf(stdout, "%-7d,%*d", curr_size * (c + 1), FIELD_WIDTH, c + 1);

						for (i = 0; i < WINDOW_SIZES_COUNT; i++) {
							double rate = 1e6 * bandwidth_results[j][i] / (curr_size * (c + 1));

							fprintf(stdout, "  %10.2f", rate);
						}

						fprintf(stdout, "\n");
						fflush(stdout);
						j++;
					}
				}
			}
		} else {
			/* Just one window size */
			for (curr_size = 1; curr_size <= MAX_MSG_SIZE; curr_size *= 2) {
				double bw, rate;

				for (c = 0; c < IOV_CNT; c++) {
					r_iov[c].iov_len = s_iov[c].iov_len = curr_size;
					bw = calc_bw(myid, pairs, window_size, s_iov, r_iov, c + 1,
						     s_buf, (c + 1) * curr_size, r_buf,
						     (c + 1) * curr_size, type);

					if (!myid) {
						rate = 1e6 * bw / (curr_size * (c + 1));

						if (print_rate) {
							fprintf(stdout, "%-*d%*d%*.*f%*.*f\n", 10, curr_size * (c + 1),
								FIELD_WIDTH, c + 1, FIELD_WIDTH,
								FLOAT_PRECISION, bw, FIELD_WIDTH,
								FLOAT_PRECISION, rate);
							fflush(stdout);
						} else {
							fprintf(stdout, "%-*d%*d%*.*f\n", 10, curr_size * (c + 1), FIELD_WIDTH,
								FIELD_WIDTH, c + 1, FLOAT_PRECISION, bw);
							fflush(stdout);
						}
					}
					fprintf(stdout, c == IOV_CNT - 1 ? "\n" : "");
				}
			}
		}
	}
예제 #3
0
int main(int argc, char *argv[])
{
    char *s_buf, *r_buf;

    int numprocs, rank, align_size;
    int pairs, print_rate;
    int window_size, window_varied;
    int c, curr_size;

    MPI_Init(&argc, &argv);

    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    /* default values */
    pairs            = numprocs / 2;
    window_size      = DEFAULT_WINDOW;
    window_varied    = 0;
    print_rate       = 1;

    while((c = getopt(argc, argv, "p:w:r:vh")) != -1) {
        switch (c) {
            case 'p':
                pairs = atoi(optarg);

                if(pairs > (numprocs / 2)) {
                    if(0 == rank) {
                        usage();
                    }

                    goto error;
                }

                break;

            case 'w':
                window_size = atoi(optarg);
                break;

            case 'v':
                window_varied = 1;
                break;

            case 'r':
                print_rate = atoi(optarg);

                if(0 != print_rate && 1 != print_rate) {
                    if(0 == rank) {
                        usage();
                    }

                    goto error;
                }

                break;

            default:
                if(0 == rank) {
                    usage();
                }

                goto error;
        }
    }

    align_size = getpagesize();
    assert(align_size <= MAX_ALIGNMENT);

    s_buf =
        (char *) (((unsigned long) s_buf1 + (align_size - 1)) /
                  align_size * align_size);
    r_buf =
        (char *) (((unsigned long) r_buf1 + (align_size - 1)) /
                  align_size * align_size);

    if(numprocs < 2) {
        if(rank == 0) {
            fprintf(stderr, "This test requires at least two processes\n");
        }

        MPI_Finalize();

        return EXIT_FAILURE;
    }

    if(rank == 0) {
        fprintf(stdout, "# %s v%s\n", BENCHMARK, PACKAGE_VERSION);

        if(window_varied) {
            fprintf(stdout, "# [ pairs: %d ] [ window size: varied ]\n", pairs);
            fprintf(stdout, "\n# Uni-directional Bandwidth (MB/sec)\n");
        }

        else {
            fprintf(stdout, "# [ pairs: %d ] [ window size: %d ]\n", pairs,
                    window_size);

            if(print_rate) {
                fprintf(stdout, "%-*s%*s%*s\n", 10, "# Size", FIELD_WIDTH,
                        "MB/s", FIELD_WIDTH, "Messages/s");
            }

            else {
                fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "MB/s");
            }
        }

        fflush(stdout);
    }

   /* More than one window size */

   if(window_varied) {
       int window_array[] = WINDOW_SIZES;
       double ** bandwidth_results;
       int log_val = 1, tmp_message_size = MAX_MSG_SIZE;
       int i, j;

       for(i = 0; i < WINDOW_SIZES_COUNT; i++) {
           if(window_array[i] > window_size) {
               window_size = window_array[i];
           }
       }

       request = (MPI_Request *) malloc(sizeof(MPI_Request) * window_size);
       reqstat = (MPI_Status *) malloc(sizeof(MPI_Status) * window_size);

       while(tmp_message_size >>= 1) {
           log_val++;
       }

       bandwidth_results = (double **) malloc(sizeof(double *) * log_val);

       for(i = 0; i < log_val; i++) {
           bandwidth_results[i] = (double *)malloc(sizeof(double) *
                   WINDOW_SIZES_COUNT);
       }

       if(rank == 0) {
           fprintf(stdout, "#      ");

           for(i = 0; i < WINDOW_SIZES_COUNT; i++) {
               fprintf(stdout, "  %10d", window_array[i]);
           }

           fprintf(stdout, "\n");
           fflush(stdout);
       }
    
       for(j = 0, curr_size = 1; curr_size <= MAX_MSG_SIZE; curr_size *= 2, j++) {
           if(rank == 0) {
               fprintf(stdout, "%-7d", curr_size);
           }

           for(i = 0; i < WINDOW_SIZES_COUNT; i++) {
               bandwidth_results[j][i] = calc_bw(rank, curr_size, pairs,
                       window_array[i], s_buf, r_buf);

               if(rank == 0) {
                   fprintf(stdout, "  %10.*f", FLOAT_PRECISION,
                           bandwidth_results[j][i]);
               }
           }

           if(rank == 0) {
               fprintf(stdout, "\n");
               fflush(stdout);
           }
       }

       if(rank == 0 && print_rate) {
            fprintf(stdout, "\n# Message Rate Profile\n");
            fprintf(stdout, "#      ");

            for(i = 0; i < WINDOW_SIZES_COUNT; i++) {
                fprintf(stdout, "  %10d", window_array[i]);
            }       

            fprintf(stdout, "\n");
            fflush(stdout);

            for(c = 0, curr_size = 1; curr_size <= MAX_MSG_SIZE; curr_size *= 2) { 
                fprintf(stdout, "%-7d", curr_size); 

                for(i = 0; i < WINDOW_SIZES_COUNT; i++) {
                    double rate = 1e6 * bandwidth_results[c][i] / curr_size;

                    fprintf(stdout, "  %10.2f", rate);
                }       

                fprintf(stdout, "\n");
                fflush(stdout);
                c++;    
            }
       }
   }

   else {