Example #1
0
double logistic_regression::calc_loss() {
    double loss = 0.;
    for(size_t i = 0; i < samples.size(); ++i) {
        double j = lg_hypothesis(samples[i]);
        loss += j * j;
    }
    auto worker_comm = get_comm();
    worker_comm.allreduce(loss);
    int sz = samples.size();
    worker_comm.allreduce(sz);
    return loss / sz;
}
Example #2
0
static void swap3d_element_based(Mesh* mesh, AdaptOpts const& opts) {
  auto comm = mesh->comm();
  auto edges_are_keys = mesh->get_array<I8>(EDGE, "key");
  mesh->remove_tag(EDGE, "key");
  auto edges_configs = mesh->get_array<I8>(EDGE, "config");
  mesh->remove_tag(EDGE, "config");
  auto keys2edges = collect_marked(edges_are_keys);
  if (opts.verbosity >= EACH_REBUILD) {
    auto nkeys = keys2edges.size();
    auto ntotal_keys = comm->allreduce(GO(nkeys), OMEGA_H_SUM);
    if (comm->rank() == 0) {
      std::cout << "swapping " << ntotal_keys << " 3D edges\n";
    }
  }
  auto new_mesh = mesh->copy_meta();
  new_mesh.set_verts(mesh->nverts());
  new_mesh.set_owners(VERT, mesh->ask_owners(VERT));
  transfer_copy(mesh, &new_mesh, VERT);
  auto keys2prods = swap3d_keys_to_prods(mesh, keys2edges);
  auto prod_verts2verts =
      swap3d_topology(mesh, keys2edges, edges_configs, keys2prods);
  auto old_lows2new_lows = LOs(mesh->nverts(), 0, 1);
  for (Int ent_dim = EDGE; ent_dim <= mesh->dim(); ++ent_dim) {
    auto prods2new_ents = LOs();
    auto same_ents2old_ents = LOs();
    auto same_ents2new_ents = LOs();
    auto old_ents2new_ents = LOs();
    modify_ents(mesh, &new_mesh, ent_dim, EDGE, keys2edges, keys2prods[ent_dim],
        prod_verts2verts[ent_dim], old_lows2new_lows, &prods2new_ents,
        &same_ents2old_ents, &same_ents2new_ents, &old_ents2new_ents);
    transfer_swap(mesh, &new_mesh, ent_dim, keys2edges, keys2prods[ent_dim],
        prods2new_ents, same_ents2old_ents, same_ents2new_ents);
    old_lows2new_lows = old_ents2new_ents;
  }
  *mesh = new_mesh;
}
Example #3
0
bool Comm::reduce_and(bool x) const {
  I8 y = x;
  y = allreduce(y, OMEGA_H_MIN);
  return static_cast<bool>(y);
}
Example #4
0
void get_contention()
{

    unsigned int iter, size, dst;
    unsigned int i, j, k, s;
    unsigned int xdim, ydim, zdim;
    unsigned int xdisp, ydisp, zdisp;
    DCMF_Request_t get_req[ITERATIONS];
    DCMF_Callback_t get_done;
    unsigned int done_count;
    DCMF_NetworkCoord_t myaddr, dstaddr;
    DCMF_Network ntwk;
    char buf[50];

    get_done.function = done;
    get_done.clientdata = (void *) &done_count;

    DCMF_Messager_rank2network(nranks - 1, DCMF_TORUS_NETWORK, &dstaddr);
    xdim = dstaddr.torus.x + 1;
    ydim = dstaddr.torus.y + 1;
    zdim = dstaddr.torus.z + 1;

    if (myrank == 0)
    {
        printf("Dimensions of Torus : %d, %d, %d \n", xdim, ydim, zdim);
        fflush(stdout);
    }

    DCMF_Messager_rank2network(myrank, DCMF_TORUS_NETWORK, &myaddr);
    dstaddr.network = myaddr.network;
    dstaddr.torus.t = myaddr.torus.t;

    int size_array[] = { 8, 64, 512, 4096, 32768, 262144, 1048576 };
    int size_count = sizeof(size_array) / sizeof(int);

    int disp_array[][3] = { { 0, 0, 1 }, { 0, 0, 3 }, { 0, 3, 3 },
                             { 3, 3, 3 }, { 0, 1, 3 }, { 1, 1, 3 },
                             { 0, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 },
                             { 1, 3, 3 }, { 2, 3, 3 } };
    int disp_count = sizeof(disp_array) / (sizeof(int) * 3);

    for (s = 0; s < size_count; s++)
    {
        size = size_array[s];

        if (myrank == 0)
        {
            printf("Message Size : %20d \n", size);
            printf("%30s  %20s \n",
                   "Displacement b/w Pairs",
                   "Avg Bandwidth (Mbps)");
            fflush(stdout);
        }

        /*Assumes all dimensions are equal*/
        for (i = 0; i < disp_count; i++)
        {
            xdisp = disp_array[i][0];
            ydisp = disp_array[i][1];
            zdisp = disp_array[i][2];

            dstaddr.torus.x = (myaddr.torus.x + xdisp) % xdim;
            dstaddr.torus.y = (myaddr.torus.y + ydisp) % ydim;
            dstaddr.torus.z = (myaddr.torus.z + zdisp) % zdim;

            DCMF_Messager_network2rank(&dstaddr, &dst, &ntwk);

            barrier();

            /***********************
             * start timer          *
             ***********************/
            t_start = DCMF_Timebase();

            done_count = ITERATIONS;
            for (iter = 0; iter < ITERATIONS; iter++)
            {
                DCMF_Get(&get_reg,
                         &get_req[iter],
                         get_done,
                         DCMF_SEQUENTIAL_CONSISTENCY,
                         dst,
                         size,
                         memregion[dst],
                         memregion[myrank],
                         MAX_MSG_SIZE * ITERATIONS + iter * size,
                         iter * size);
            }
            while (done_count)
                DCMF_Messager_advance();

            t_stop = DCMF_Timebase();
            /***********************
             * stop timer          *
             ***********************/
            t_sec = (t_stop - t_start) / (clockMHz * 1000000);
            bw = (ITERATIONS * size) / (t_sec * 1024 * 1024);

            barrier();
            allreduce(-1,
                      (char *) &bw,
                      (char *) &bw_avg,
                      1,
                      DCMF_DOUBLE,
                      DCMF_SUM);

            if (myrank == 0)
            {
                bw_avg = bw_avg / nranks;
                sprintf(buf, "(%d)(%d)(%d)", xdisp, ydisp, zdisp);
                printf("%30s %20.0f \n", buf, bw_avg);
                fflush(stdout);
            }
        }

    }
}
Example #5
0
void send_localvsremote()
{

    DCMF_Request_t send_req[ITERATIONS];
    DCMF_Callback_t send_done, nocallback;
    int done_count;
    unsigned int msgsize, i, dst;
    DCMF_NetworkCoord_t myaddr, dstaddr;
    DCMF_Network ntwk;
    DCQuad msginfo[ITERATIONS];

    DCMF_Messager_rank2network(myrank, DCMF_TORUS_NETWORK, &myaddr);

    dstaddr.network = myaddr.network;
    dstaddr.torus.x = (myaddr.torus.x + 3) % 8;
    dstaddr.torus.y = (myaddr.torus.y + 3) % 8;
    dstaddr.torus.z = (myaddr.torus.z + 3) % 8;
    dstaddr.torus.t = myaddr.torus.t;

    DCMF_Messager_network2rank(&dstaddr, &dst, &ntwk);

    send_done.function = done;
    send_done.clientdata = (void *) &done_count;
    nocallback.function = NULL;
    nocallback.clientdata = NULL;

    if (myrank == 0)
    {
        printf("Send call overhead in usec\n");
        fflush(stdout);
    }

    if (myrank == 0)
    {
        char buffer[100];
        sprintf(buffer,
                "%20s  %20s %20s",
                "Msg Size",
                "Farthest pairs",
                "Closest pairs");
        printf("%s \n", buffer);
        fflush(stdout);
    }

    for (msgsize = 1; msgsize < MAX_MSG_SIZE; msgsize *= 2)
    {

        /***********************
         * warmup               *
         ***********************/
        snd_rcv_active += SKIP;
        done_count += SKIP;
        for (i = 0; i < SKIP; i++)
        {
            DCMF_Send(&snd_reg,
                      &send_req[i],
                      send_done,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      dst,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
        }
        while (done_count || snd_rcv_active)
            DCMF_Messager_advance();

        t_avg = 0;
        t_avg1 = 0, t_avg2 = 0;
        target_index = 0;
        barrier();

        snd_rcv_active += ITERATIONS;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            DCMF_Send(&snd_reg,
                      &send_req[i],
                      nocallback,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      dst,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
        }

        t_stop = DCMF_Timebase();
        t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS);

        while (snd_rcv_active)
            DCMF_Messager_advance();

        barrier();
        allreduce(-1,
                  (char *) &t_usec,
                  (char *) &t_avg,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();
        target_index = 0;

        snd_rcv_active += ITERATIONS;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            DCMF_Send(&snd_reg,
                      &send_req[i],
                      nocallback,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      (myrank + 1) % nranks,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
        }

        t_stop = DCMF_Timebase();
        t_usec1 = (t_stop - t_start) / (clockMHz * ITERATIONS);

        while (snd_rcv_active)
            DCMF_Messager_advance();

        barrier();
        allreduce(-1,
                  (char *) &t_usec1,
                  (char *) &t_avg1,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();

        if (myrank == 0)
        {
            t_avg = t_avg / nranks;
            t_avg1 = t_avg1 / nranks;
            printf("%20d %20.2f %20.2f \n", msgsize, t_avg, t_avg1);
            fflush(stdout);
        }
    }

    if (myrank == 0)
    {
        printf("Send latency in usec with local vs remote completion \n");
        fflush(stdout);
    }

    if (myrank == 0)
    {
        char buffer[100];
        sprintf(buffer,
                "%20s  %20s  %20s  %20s  %20s %20s  %20s",
                "Msg Size",
                "Farthest pairs-local",
                "Farthest pairs-remote",
                "Farthest pairs-both",
                "Closest pairs-local",
                "Closest pairs-remote",
                "Closest pairs-both");
        printf("%s \n", buffer);
        fflush(stdout);
    }

    barrier();

    for (msgsize = 1; msgsize < MAX_MSG_SIZE; msgsize *= 2)
    {

        /***********************
         * start timer          *
         ***********************/

        snd_rcv_active += ITERATIONS;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            done_count = 1;
            DCMF_Send(&snd_reg,
                      &send_req[i],
                      send_done,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      dst,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (done_count)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS);

        while (snd_rcv_active)
            DCMF_Messager_advance();

        barrier();
        allreduce(-1,
                  (char *) &t_usec,
                  (char *) &t_avg,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();
        target_index = 0;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            ack_rcv_active = 1;
            DCMF_Send(&rcb_snd_reg,
                      &send_req[i],
                      nocallback,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      dst,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (ack_rcv_active)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec1 = (t_stop - t_start) / (clockMHz * ITERATIONS);

        barrier();
        allreduce(-1,
                  (char *) &t_usec1,
                  (char *) &t_avg1,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();
        target_index = 0;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            done_count = 1;
            ack_rcv_active = 1;
            DCMF_Send(&rcb_snd_reg,
                      &send_req[i],
                      send_done,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      dst,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (done_count || ack_rcv_active)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec2 = (t_stop - t_start) / (clockMHz * ITERATIONS);

        /***********************
         * stop timer          *
         ***********************/

        barrier();
        allreduce(-1,
                  (char *) &t_usec2,
                  (char *) &t_avg2,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();

        if (myrank == 0)
        {
            t_avg = t_avg / nranks;
            t_avg1 = t_avg1 / nranks;
            t_avg2 = t_avg2 / nranks;
            printf("%20d %20.2f %20.2f %20.2f", msgsize, t_avg, t_avg1, t_avg2);
            fflush(stdout);
        }

        t_avg = 0;
        t_avg1 = 0, t_avg2 = 0;
        target_index = 0;

        barrier();

        /***********************
         * start timer          *
         ***********************/

        snd_rcv_active += ITERATIONS;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            done_count = 1;
            DCMF_Send(&snd_reg,
                      &send_req[i],
                      send_done,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      (myrank + 1) % nranks,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (done_count)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS);

        while (snd_rcv_active)
            DCMF_Messager_advance();

        barrier();
        allreduce(-1,
                  (char *) &t_usec,
                  (char *) &t_avg,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();
        target_index = 0;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            ack_rcv_active = 1;
            DCMF_Send(&rcb_snd_reg,
                      &send_req[i],
                      nocallback,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      (myrank + 1) % nranks,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (ack_rcv_active)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec1 = (t_stop - t_start) / (clockMHz * ITERATIONS);

        barrier();
        allreduce(-1,
                  (char *) &t_usec1,
                  (char *) &t_avg1,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();
        target_index = 0;

        t_start = DCMF_Timebase();

        for (i = 0; i < ITERATIONS; i++)
        {
            done_count = 1;
            ack_rcv_active = 1;
            DCMF_Send(&rcb_snd_reg,
                      &send_req[i],
                      send_done,
                      DCMF_SEQUENTIAL_CONSISTENCY,
                      (myrank + 1) % nranks,
                      msgsize,
                      source + i * msgsize,
                      &msginfo[i],
                      1);
            while (done_count || ack_rcv_active)
                DCMF_Messager_advance();
        }

        t_stop = DCMF_Timebase();
        t_usec2 = (t_stop - t_start) / (clockMHz * ITERATIONS);

        /***********************
         * stop timer          *
         ***********************/

        allreduce(-1,
                  (char *) &t_usec2,
                  (char *) &t_avg2,
                  1,
                  DCMF_DOUBLE,
                  DCMF_SUM);
        barrier();

        if (myrank == 0)
        {
            t_avg = t_avg / nranks;
            t_avg1 = t_avg1 / nranks;
            t_avg2 = t_avg2 / nranks;
            printf("%20.2f %20.2f %20.2f \n", t_avg, t_avg1, t_avg2);
            fflush(stdout);
        }

    }
}
Example #6
0
  void init_paras() {
    auto local_parser = [] (const std::string & line) {
      return paracel::str_split(line, ',');
    };
    auto f_parser = paracel::gen_parser(local_parser);
    paracel_load_as_graph(local_graph, input, f_parser, "fmap");
    if(get_worker_id() == 0) std::cout << "load done" << std::endl;

    auto cnt_lambda = [&] (const node_t & a,
                           const node_t & b,
                           double c) {
      if(!kvmap.count(a)) {
        kvmap[a] = 1.;
      } else {
        kvmap[a] += 1.;
      }
    };
    local_graph.traverse(cnt_lambda);
    
    // make sure there are no same pieces
    // generate kv + local combine
    auto kvinit_lambda = [&] (const node_t & a,
                              const node_t & b,
                              double c) {
      klstmap[b].push_back(std::make_pair(a, kvmap[a]));
    };
    local_graph.traverse(kvinit_lambda);
    if(get_worker_id() == 0) std::cout << "stat done" << std::endl;

    // init push to construct global connect info
    std::unordered_map<std::string,
        std::vector<std::pair<node_t, double> > > klstmap_tmp;
    for(auto & kv : klstmap) {
      if(kv.first == SENTINEL) continue; // little tricky here
      klstmap_tmp[paracel::cvt(kv.first) + "_links"] = kv.second;
    }
    paracel_bupdate_multi(klstmap_tmp,
                          handle_file,
                          update_function);
    if(get_worker_id() == 0) std::cout << "first bupdate done" << std::endl;
    paracel_sync();

    // read connect info only once
    klstmap.clear();
    for(auto & kv : kvmap) {
      // notice: limit memory here
      paracel_read<std::vector<std::pair<node_t, double> > >
          (paracel::cvt(kv.first) + "_links",
           klstmap[kv.first]);
    }
    if(get_worker_id() == 0) std::cout << "first read done" << std::endl;

    // reuse kvmap to store pr
    // init pr with 1. / total_node_sz
    auto worker_comm = get_comm();
    long node_sz = kvmap.size();
    worker_comm.allreduce(node_sz);
    double init_val = 1. / node_sz;
    std::unordered_map<std::string, double> tmp;
    for(auto & kv : kvmap) {
      kvmap[kv.first] = init_val; 
      tmp[paracel::cvt(kv.first) + "_pr"] = init_val;
    }
    paracel_write_multi(tmp);
    paracel_sync();
  }