int main() { std::ifstream input_file("../input.txt"); unsigned N; input_file >> N; std::vector<float> input(N); std::vector<float> output(N); for (int i = 0; i < N; ++i) { input_file >> input[i]; } try { cl_environment cl_env = cl_environment("../inclusive_scan.cpp"); output = prefix_sum(input, cl_env, 256); std::ofstream output_file("../output.txt"); for (int j = 0; j < output.size(); ++j) { output_file << output[j] << " "; } } catch (const cl::Error& e) { std::cout << e.what() << " " << e.err() << std::endl; } return 0; }
int main(int argc, char** argv) { // Declarations struct array * array; // Load input data from file array = array_read(argv[1]); // Compute and output result printf("Computing summation... %i\n", prefix_sum(array)); // Free memory out array_free(array); // Everything went well return 0; }
bool run () { bool passed = true; /* create vector with random numbers */ const size_t N = 100; std::vector<size_t> array(N); std::vector<atomic<size_t>> prefix_sum(N); for (size_t j=0; j<N; j++) array[j] = rand() % 10; /* dry run only counts */ ParallelPrefixSumState<size_t> state; size_t S0 = parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t sum) -> size_t { size_t s = 0; for (size_t i=r.begin(); i<r.end(); i++) s += array[i]; return s; }, [](size_t v0, size_t v1) { return v0+v1; }); /* final run calculates prefix sum */ size_t S1 = parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), size_t(0), [&](const range<size_t>& r, const size_t sum) -> size_t { size_t s = 0; for (size_t i=r.begin(); i<r.end(); i++) { prefix_sum[i].store(sum+s); s += array[i]; } return s; }, [](size_t v0, size_t v1) { return v0+v1; }); /* check calculated prefix sum */ size_t sum=0; for (size_t i=0; i<N; sum+=array[i++]) { passed &= (prefix_sum[i] == sum); } passed &= (S0 == sum); passed &= (S1 == sum); return passed; }
int main(int argc, char** argv) { if (argc != 2) { fprintf(stderr, "Usage: bin numbers_per_proc\n"); exit(1); } // Get the amount of random numbers to create per process int numbers_per_proc = atoi(argv[1]); MPI_Init(NULL, NULL); int world_rank; MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); int world_size; MPI_Comm_size(MPI_COMM_WORLD, &world_size); // Seed the random number generator to get different results each time srand(time(NULL) * world_rank); // Create the random numbers on this process. Note that all numbers // will be between 0 and 1 float *rand_nums = create_random_numbers(numbers_per_proc); // Given the array of random numbers, determine how many will be sent // to each process (based on the which process owns the number). // The return value from this function is an array of counts // for each rank in the communicator. // The count represents how many numbers each process will receive // when they are binned from this process. int *send_amounts_per_proc = get_send_amounts_per_proc(rand_nums, numbers_per_proc, world_size); // Determine how many numbers you will receive from each process. This // information is needed to set up the binning call. int *recv_amounts_per_proc = get_recv_amounts_per_proc(send_amounts_per_proc, world_size); // Do a prefix sum for the send/recv amounts to get the send/recv offsets for // the MPI_Alltoallv call (the binning call). int *send_offsets_per_proc = prefix_sum(send_amounts_per_proc, world_size); int *recv_offsets_per_proc = prefix_sum(recv_amounts_per_proc, world_size); // Allocate an array to hold the binned numbers for this process based on the total // amount of numbers this process will receive from others. int total_recv_amount = sum(recv_amounts_per_proc, world_size); float *binned_nums = (float *)malloc(sizeof(float) * total_recv_amount); // The final step before binning - arrange all of the random numbers so that they // are ordered by bin. For simplicity, we are simply going to sort the random // numbers, however, this could be optimized since the numbers don't need to be // fully sorted. qsort(rand_nums, numbers_per_proc, sizeof(float), &compare_float); // Perform the binning step with MPI_Alltoallv. This will send all of the numbers in // the rand_nums array to their proper bin. Each process will only contain numbers // belonging to its bin after this step. For example, if there are 4 processes, process // 0 will contain numbers in the [0, .25) range. MPI_Alltoallv(rand_nums, send_amounts_per_proc, send_offsets_per_proc, MPI_FLOAT, binned_nums, recv_amounts_per_proc, recv_offsets_per_proc, MPI_FLOAT, MPI_COMM_WORLD); // Print results printf("Process %d received %d numbers in bin [%f - %f)\n", world_rank, total_recv_amount, get_bin_start(world_rank, world_size), get_bin_end(world_rank, world_size)); // Check that the bin numbers are correct verify_bin_nums(binned_nums, total_recv_amount, world_rank, world_size); MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); // Clean up free(rand_nums); free(send_amounts_per_proc); free(recv_amounts_per_proc); free(send_offsets_per_proc); free(recv_offsets_per_proc); free(binned_nums); }
int main(int argc, char **argv) { plan_tests(82); TCHAR buffer[64], *suggest; RadixTree<int> irt; irt.add(_T("foo"), 42); ok1(all_sum(irt) == 42); ok1(prefix_sum(irt, _T("")) == 42); ok1(prefix_sum(irt, _T("f")) == 42); ok1(prefix_sum(irt, _T("fo")) == 42); ok1(prefix_sum(irt, _T("foo")) == 42); ok1(prefix_sum(irt, _T("foobar")) == 0); irt.add(_T("foa"), 0); ok1(all_sum(irt) == 42); check_ascending_keys(irt); suggest = irt.suggest(_T("xyz"), buffer, 64); ok1(suggest == NULL); suggest = irt.suggest(_T(""), buffer, 64); ok1(suggest != NULL); ok1(_tcscmp(suggest, _T("f")) == 0); suggest = irt.suggest(_T("f"), buffer, 64); ok1(suggest != NULL); ok1(_tcscmp(suggest, _T("o")) == 0); suggest = irt.suggest(_T("foo"), buffer, 64); ok1(suggest != NULL); ok1(_tcscmp(suggest, _T("")) == 0); irt.add(_T("bar"), 1); ok1(all_sum(irt) == 43); ok1(prefix_sum(irt, _T("")) == 43); ok1(prefix_sum(irt, _T("f")) == 42); suggest = irt.suggest(_T(""), buffer, 64); ok1(suggest != NULL); ok1(_tcscmp(suggest, _T("bf")) == 0); suggest = irt.suggest(_T("ba"), buffer, 64); ok1(suggest != NULL); ok1(_tcscmp(suggest, _T("r")) == 0); irt.add(_T("foo"), 2); ok1(all_sum(irt) == 45); ok1(prefix_sum(irt, _T("")) == 45); ok1(prefix_sum(irt, _T("f")) == 44); ok1(prefix_sum(irt, _T("fo")) == 44); ok1(prefix_sum(irt, _T("foo")) == 44); ok1(prefix_sum(irt, _T("foobar")) == 0); suggest = irt.suggest(_T("foo"), buffer, 64); ok1(suggest != NULL); ok1(_tcscmp(suggest, _T("")) == 0); irt.add(_T("baz"), 3); ok1(all_sum(irt) == 48); ok1(prefix_sum(irt, _T("b")) == 4); ok1(prefix_sum(irt, _T("ba")) == 4); ok1(prefix_sum(irt, _T("bar")) == 1); ok1(prefix_sum(irt, _T("baz")) == 3); suggest = irt.suggest(_T(""), buffer, 64); ok1(suggest != NULL); ok1(_tcscmp(suggest, _T("bf")) == 0); suggest = irt.suggest(_T("ba"), buffer, 64); ok1(suggest != NULL); ok1(_tcscmp(suggest, _T("rz")) == 0); irt.add(_T("foobar"), 4); ok1(all_sum(irt) == 52); ok1(prefix_sum(irt, _T("f")) == 48); ok1(prefix_sum(irt, _T("fo")) == 48); ok1(prefix_sum(irt, _T("foo")) == 48); ok1(prefix_sum(irt, _T("foobar")) == 4); suggest = irt.suggest(_T("foo"), buffer, 64); ok1(suggest != NULL); ok1(_tcscmp(suggest, _T("b")) == 0); irt.add(_T("fo"), 5); ok1(all_sum(irt) == 57); ok1(prefix_sum(irt, _T("f")) == 53); ok1(prefix_sum(irt, _T("fo")) == 53); ok1(prefix_sum(irt, _T("foo")) == 48); ok1(prefix_sum(irt, _T("foobar")) == 4); irt.add(_T("fooz"), 6); ok1(all_sum(irt) == 63); ok1(prefix_sum(irt, _T("f")) == 59); ok1(prefix_sum(irt, _T("fo")) == 59); ok1(prefix_sum(irt, _T("foo")) == 54); suggest = irt.suggest(_T("foo"), buffer, 64); ok1(suggest != NULL); ok1(_tcscmp(suggest, _T("bz")) == 0); irt.add(_T("fooy"), 7); ok1(all_sum(irt) == 70); ok1(prefix_sum(irt, _T("f")) == 66); ok1(prefix_sum(irt, _T("fo")) == 66); ok1(prefix_sum(irt, _T("foo")) == 61); suggest = irt.suggest(_T("foo"), buffer, 64); ok1(suggest != NULL); ok1(_tcscmp(suggest, _T("byz")) == 0); irt.add(_T("foo"), 8); ok1(all_sum(irt) == 78); ok1(prefix_sum(irt, _T("foo")) == 69); irt.remove(_T("foo"), 42); ok1(all_sum(irt) == 36); ok1(prefix_sum(irt, _T("foo")) == 27); irt.remove(_T("foo")); ok1(all_sum(irt) == 26); ok1(prefix_sum(irt, _T("")) == 26); ok1(prefix_sum(irt, _T("foo")) == 17); irt.add(_T(""), 9); ok1(all_sum(irt) == 35); ok1(prefix_sum(irt, _T("")) == 35); ok1(prefix_sum(irt, _T("foo")) == 17); check_ascending_keys(irt); return exit_status(); }