コード例 #1
0
ファイル: bitonicsort.hpp プロジェクト: eipi10ydz/mxx
void bitonic_merge(_Iterator begin, _Iterator end, _Compare comp, const mxx::comm& comm, int pbeg, int pend, int dir) {
    MXX_ASSERT(pbeg <= comm.rank() && comm.rank() < pend);

    // get size and terminate at recursive base-case
    int size = pend - pbeg;
    if (size <= 1)
        return;

    // get next greater power of 2
    int p2 = pow(2, ceil(log(size)/log(2)));
    // merge with splits as is done in the power of 2 case
    int pmid = pbeg + p2/2;

    if (comm.rank() < pmid && comm.rank() + p2/2 < pend) {
        // this processor has a partner in the second half
        int partner_rank = comm.rank() + p2/2;
        bitonic_split(begin, end, comp, comm, partner_rank, dir);
        bitonic_merge(begin, end, comp, comm, pbeg, pmid, dir);
    } else if (comm.rank() < pmid) {
        // this process doesn't have a partner but has to recursively
        // participate in the next merge
        bitonic_merge(begin, end, comp, comm, pbeg, pmid, dir);
    } else { // if (comm.rank() >= pmid)
        int partner_rank = comm.rank() - p2/2;
        bitonic_split(begin, end, comp, comm, partner_rank, dir);
        bitonic_merge(begin, end, comp, comm, pmid, pend, dir);
    }
}
コード例 #2
0
ファイル: new_datatype.hpp プロジェクト: patflick/mxx
    static void unpack_envelope(MPI_Datatype type, flat_repr& f) {
        int num_ints, num_addr, num_dt, comb;
        MPI_Type_get_envelope(type, &num_ints, &num_addr, &num_dt, &comb);

        if (comb == MPI_COMBINER_NAMED) {
            //std::cout << "Type: " << builtin_typename_map::get_typeid_name(type) << std::endl;
            f.m.emplace(f.cur_offset, type);
            return;
        }

        // allocate the output for get_contents
        std::vector<int> ints; ints.resize(num_ints);
        std::vector<MPI_Aint> addrs; addrs.resize(num_addr);
        std::vector<MPI_Datatype> types; types.resize(num_dt);

        MPI_Type_get_contents(type, num_ints, num_addr, num_dt,
                              &ints[0], &addrs[0], &types[0]);

        switch(comb) {
          case MPI_COMBINER_DUP:
            MXX_ASSERT(num_ints == 0 && num_addr == 0 && num_dt == 1);
            unpack_envelope(types[0], f);
            break;
          case MPI_COMBINER_CONTIGUOUS:
            std::cout << "Contiguous: " << ints[0] << " x ";
            unpack_envelope(types[0], f);
            break;
          case MPI_COMBINER_VECTOR:
          case MPI_COMBINER_HVECTOR:
          case MPI_COMBINER_INDEXED:
          case MPI_COMBINER_HINDEXED:
          case MPI_COMBINER_INDEXED_BLOCK:
          case MPI_COMBINER_HINDEXED_BLOCK:
            std::cout << "NOT YET SUPPORTED vector/indexed/indexed_block" << std::endl;
            break;
          case MPI_COMBINER_STRUCT:
            {
                int count = ints[0];
                std::vector<int> blen(&ints[1], &ints[0]+count);
                std::vector<MPI_Aint> displ = addrs;
                std::cout << "Struct: " << std::endl;
                MPI_Aint offset = f.cur_offset;
                for (int i = 0; i < count; ++i) {
                    f.cur_offset = offset + displ[i];
                    unpack_envelope(types[i], f);
                }
                f.cur_offset = offset;
            }
            break;
          case MPI_COMBINER_RESIZED:
            // TODO
            std::cout << "resized to [" << addrs[0] << "," << addrs[1] << "): " << std::endl;
            unpack_envelope(types[0], f);
            break;
          case MPI_COMBINER_SUBARRAY:
          case MPI_COMBINER_DARRAY:
            std::cout << "NOT YET SUPPORTED subarray/darray" << std::endl;
            break;
        }
    }
コード例 #3
0
ファイル: new_datatype.hpp プロジェクト: patflick/mxx
 size_t offset_from_ptr(M* m) {
     size_t offset = reinterpret_cast<size_t>(m);
     MXX_ASSERT(0 <= offset && offset + sizeof(M) <= sizeof(U));
     return offset;
 }
コード例 #4
0
ファイル: benchmark_a2a.cpp プロジェクト: patflick/mxx
int main(int argc, char* argv[]) {
    mxx::env e(argc, argv);
    mxx::comm comm;

    // print out node and rank distribution
    mxx::print_node_distribution(comm);

    // create shared-mem MPI+MPI hybrid communicator
    mxx::hybrid_comm hc(comm);

    // assert same number processors per node
    int proc_per_node = hc.local.size();
    if (!mxx::all_same(proc_per_node, comm)) {
        std::cerr << "Error: this benchmark assumes the same number of processors per node" << std::endl;
        MPI_Abort(comm, -1);
    }

    // assert we have an even number of nodes
    int num_nodes = hc.num_nodes();
    if (num_nodes > 1 && num_nodes % 2 != 0) {
        std::cerr << "Error: this benchmark assumes an even number of nodes" << std::endl;
        MPI_Abort(comm, -1);
    }

    // default args
    size_t mem_per_node_gb = 32; // setting the max experiment at 32 GB per node
    std::string filename = "all2all_benchmark.csv";

    // parse input arguments
    exec_name = argv[0];
    argv++; argc--;
    if (argc >= 2) {
        std::string x(argv[0]);
        if (x == "-m") {
            mem_per_node_gb = atoi(argv[1]);
            argv+=2; argc-=2;
        }
        if (x != "-m" || mem_per_node_gb > 1024 || mem_per_node_gb == 0) {
            print_usage();
            MPI_Abort(comm, -1);
        }
    }
    if (argc > 0) {
        filename = argv[0];
        argv++; argc--;
    }
    if (argc > 0) {
            print_usage();
            MPI_Abort(comm, -1);
    }

    MXX_ASSERT(mxx::all_same(mem_per_node_gb, comm));

    // benchmark all:
    std::ofstream of;
    if (hc.global.rank() == 0) {
        of.open(filename);
        of << "p,nnodes,q,m,n,min,avg,max" << std::endl;
    }

    // 32 GB/node max?
    size_t mempernode = mem_per_node_gb << 30;

    mxx::forall_p2_nnodes_and_ppn(hc, [&](const mxx::hybrid_comm& hc){
        bm_all2all(hc, of, mempernode);
    });

    return 0;
}