double logistic_regression::calc_loss() { double loss = 0.; for(size_t i = 0; i < samples.size(); ++i) { double j = lg_hypothesis(samples[i]); loss += j * j; } auto worker_comm = get_comm(); worker_comm.allreduce(loss); int sz = samples.size(); worker_comm.allreduce(sz); return loss / sz; }
static void swap3d_element_based(Mesh* mesh, AdaptOpts const& opts) { auto comm = mesh->comm(); auto edges_are_keys = mesh->get_array<I8>(EDGE, "key"); mesh->remove_tag(EDGE, "key"); auto edges_configs = mesh->get_array<I8>(EDGE, "config"); mesh->remove_tag(EDGE, "config"); auto keys2edges = collect_marked(edges_are_keys); if (opts.verbosity >= EACH_REBUILD) { auto nkeys = keys2edges.size(); auto ntotal_keys = comm->allreduce(GO(nkeys), OMEGA_H_SUM); if (comm->rank() == 0) { std::cout << "swapping " << ntotal_keys << " 3D edges\n"; } } auto new_mesh = mesh->copy_meta(); new_mesh.set_verts(mesh->nverts()); new_mesh.set_owners(VERT, mesh->ask_owners(VERT)); transfer_copy(mesh, &new_mesh, VERT); auto keys2prods = swap3d_keys_to_prods(mesh, keys2edges); auto prod_verts2verts = swap3d_topology(mesh, keys2edges, edges_configs, keys2prods); auto old_lows2new_lows = LOs(mesh->nverts(), 0, 1); for (Int ent_dim = EDGE; ent_dim <= mesh->dim(); ++ent_dim) { auto prods2new_ents = LOs(); auto same_ents2old_ents = LOs(); auto same_ents2new_ents = LOs(); auto old_ents2new_ents = LOs(); modify_ents(mesh, &new_mesh, ent_dim, EDGE, keys2edges, keys2prods[ent_dim], prod_verts2verts[ent_dim], old_lows2new_lows, &prods2new_ents, &same_ents2old_ents, &same_ents2new_ents, &old_ents2new_ents); transfer_swap(mesh, &new_mesh, ent_dim, keys2edges, keys2prods[ent_dim], prods2new_ents, same_ents2old_ents, same_ents2new_ents); old_lows2new_lows = old_ents2new_ents; } *mesh = new_mesh; }
bool Comm::reduce_and(bool x) const { I8 y = x; y = allreduce(y, OMEGA_H_MIN); return static_cast<bool>(y); }
void get_contention() { unsigned int iter, size, dst; unsigned int i, j, k, s; unsigned int xdim, ydim, zdim; unsigned int xdisp, ydisp, zdisp; DCMF_Request_t get_req[ITERATIONS]; DCMF_Callback_t get_done; unsigned int done_count; DCMF_NetworkCoord_t myaddr, dstaddr; DCMF_Network ntwk; char buf[50]; get_done.function = done; get_done.clientdata = (void *) &done_count; DCMF_Messager_rank2network(nranks - 1, DCMF_TORUS_NETWORK, &dstaddr); xdim = dstaddr.torus.x + 1; ydim = dstaddr.torus.y + 1; zdim = dstaddr.torus.z + 1; if (myrank == 0) { printf("Dimensions of Torus : %d, %d, %d \n", xdim, ydim, zdim); fflush(stdout); } DCMF_Messager_rank2network(myrank, DCMF_TORUS_NETWORK, &myaddr); dstaddr.network = myaddr.network; dstaddr.torus.t = myaddr.torus.t; int size_array[] = { 8, 64, 512, 4096, 32768, 262144, 1048576 }; int size_count = sizeof(size_array) / sizeof(int); int disp_array[][3] = { { 0, 0, 1 }, { 0, 0, 3 }, { 0, 3, 3 }, { 3, 3, 3 }, { 0, 1, 3 }, { 1, 1, 3 }, { 0, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 1, 3, 3 }, { 2, 3, 3 } }; int disp_count = sizeof(disp_array) / (sizeof(int) * 3); for (s = 0; s < size_count; s++) { size = size_array[s]; if (myrank == 0) { printf("Message Size : %20d \n", size); printf("%30s %20s \n", "Displacement b/w Pairs", "Avg Bandwidth (Mbps)"); fflush(stdout); } /*Assumes all dimensions are equal*/ for (i = 0; i < disp_count; i++) { xdisp = disp_array[i][0]; ydisp = disp_array[i][1]; zdisp = disp_array[i][2]; dstaddr.torus.x = (myaddr.torus.x + xdisp) % xdim; dstaddr.torus.y = (myaddr.torus.y + ydisp) % ydim; dstaddr.torus.z = (myaddr.torus.z + zdisp) % zdim; DCMF_Messager_network2rank(&dstaddr, &dst, &ntwk); barrier(); /*********************** * start timer * ***********************/ t_start = DCMF_Timebase(); done_count = ITERATIONS; for (iter = 0; iter < ITERATIONS; iter++) { DCMF_Get(&get_reg, &get_req[iter], get_done, DCMF_SEQUENTIAL_CONSISTENCY, dst, size, memregion[dst], memregion[myrank], MAX_MSG_SIZE * ITERATIONS + iter * size, iter * size); } while (done_count) DCMF_Messager_advance(); t_stop = DCMF_Timebase(); /*********************** * stop timer * ***********************/ t_sec = (t_stop - t_start) / (clockMHz * 1000000); bw = (ITERATIONS * size) / (t_sec * 1024 * 1024); barrier(); allreduce(-1, (char *) &bw, (char *) &bw_avg, 1, DCMF_DOUBLE, DCMF_SUM); if (myrank == 0) { bw_avg = bw_avg / nranks; sprintf(buf, "(%d)(%d)(%d)", xdisp, ydisp, zdisp); printf("%30s %20.0f \n", buf, bw_avg); fflush(stdout); } } } }
void send_localvsremote() { DCMF_Request_t send_req[ITERATIONS]; DCMF_Callback_t send_done, nocallback; int done_count; unsigned int msgsize, i, dst; DCMF_NetworkCoord_t myaddr, dstaddr; DCMF_Network ntwk; DCQuad msginfo[ITERATIONS]; DCMF_Messager_rank2network(myrank, DCMF_TORUS_NETWORK, &myaddr); dstaddr.network = myaddr.network; dstaddr.torus.x = (myaddr.torus.x + 3) % 8; dstaddr.torus.y = (myaddr.torus.y + 3) % 8; dstaddr.torus.z = (myaddr.torus.z + 3) % 8; dstaddr.torus.t = myaddr.torus.t; DCMF_Messager_network2rank(&dstaddr, &dst, &ntwk); send_done.function = done; send_done.clientdata = (void *) &done_count; nocallback.function = NULL; nocallback.clientdata = NULL; if (myrank == 0) { printf("Send call overhead in usec\n"); fflush(stdout); } if (myrank == 0) { char buffer[100]; sprintf(buffer, "%20s %20s %20s", "Msg Size", "Farthest pairs", "Closest pairs"); printf("%s \n", buffer); fflush(stdout); } for (msgsize = 1; msgsize < MAX_MSG_SIZE; msgsize *= 2) { /*********************** * warmup * ***********************/ snd_rcv_active += SKIP; done_count += SKIP; for (i = 0; i < SKIP; i++) { DCMF_Send(&snd_reg, &send_req[i], send_done, DCMF_SEQUENTIAL_CONSISTENCY, dst, msgsize, source + i * msgsize, &msginfo[i], 1); } while (done_count || snd_rcv_active) DCMF_Messager_advance(); t_avg = 0; t_avg1 = 0, t_avg2 = 0; target_index = 0; barrier(); snd_rcv_active += ITERATIONS; t_start = DCMF_Timebase(); for (i = 0; i < ITERATIONS; i++) { DCMF_Send(&snd_reg, &send_req[i], nocallback, DCMF_SEQUENTIAL_CONSISTENCY, dst, msgsize, source + i * msgsize, &msginfo[i], 1); } t_stop = DCMF_Timebase(); t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS); while (snd_rcv_active) DCMF_Messager_advance(); barrier(); allreduce(-1, (char *) &t_usec, (char *) &t_avg, 1, DCMF_DOUBLE, DCMF_SUM); barrier(); target_index = 0; snd_rcv_active += ITERATIONS; t_start = DCMF_Timebase(); for (i = 0; i < ITERATIONS; i++) { DCMF_Send(&snd_reg, &send_req[i], nocallback, DCMF_SEQUENTIAL_CONSISTENCY, (myrank + 1) % nranks, msgsize, source + i * msgsize, &msginfo[i], 1); } t_stop = DCMF_Timebase(); t_usec1 = (t_stop - t_start) / (clockMHz * ITERATIONS); while (snd_rcv_active) DCMF_Messager_advance(); barrier(); allreduce(-1, (char *) &t_usec1, (char *) &t_avg1, 1, DCMF_DOUBLE, DCMF_SUM); barrier(); if (myrank == 0) { t_avg = t_avg / nranks; t_avg1 = t_avg1 / nranks; printf("%20d %20.2f %20.2f \n", msgsize, t_avg, t_avg1); fflush(stdout); } } if (myrank == 0) { printf("Send latency in usec with local vs remote completion \n"); fflush(stdout); } if (myrank == 0) { char buffer[100]; sprintf(buffer, "%20s %20s %20s %20s %20s %20s %20s", "Msg Size", "Farthest pairs-local", "Farthest pairs-remote", "Farthest pairs-both", "Closest pairs-local", "Closest pairs-remote", "Closest pairs-both"); printf("%s \n", buffer); fflush(stdout); } barrier(); for (msgsize = 1; msgsize < MAX_MSG_SIZE; msgsize *= 2) { /*********************** * start timer * ***********************/ snd_rcv_active += ITERATIONS; t_start = DCMF_Timebase(); for (i = 0; i < ITERATIONS; i++) { done_count = 1; DCMF_Send(&snd_reg, &send_req[i], send_done, DCMF_SEQUENTIAL_CONSISTENCY, dst, msgsize, source + i * msgsize, &msginfo[i], 1); while (done_count) DCMF_Messager_advance(); } t_stop = DCMF_Timebase(); t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS); while (snd_rcv_active) DCMF_Messager_advance(); barrier(); allreduce(-1, (char *) &t_usec, (char *) &t_avg, 1, DCMF_DOUBLE, DCMF_SUM); barrier(); target_index = 0; t_start = DCMF_Timebase(); for (i = 0; i < ITERATIONS; i++) { ack_rcv_active = 1; DCMF_Send(&rcb_snd_reg, &send_req[i], nocallback, DCMF_SEQUENTIAL_CONSISTENCY, dst, msgsize, source + i * msgsize, &msginfo[i], 1); while (ack_rcv_active) DCMF_Messager_advance(); } t_stop = DCMF_Timebase(); t_usec1 = (t_stop - t_start) / (clockMHz * ITERATIONS); barrier(); allreduce(-1, (char *) &t_usec1, (char *) &t_avg1, 1, DCMF_DOUBLE, DCMF_SUM); barrier(); target_index = 0; t_start = DCMF_Timebase(); for (i = 0; i < ITERATIONS; i++) { done_count = 1; ack_rcv_active = 1; DCMF_Send(&rcb_snd_reg, &send_req[i], send_done, DCMF_SEQUENTIAL_CONSISTENCY, dst, msgsize, source + i * msgsize, &msginfo[i], 1); while (done_count || ack_rcv_active) DCMF_Messager_advance(); } t_stop = DCMF_Timebase(); t_usec2 = (t_stop - t_start) / (clockMHz * ITERATIONS); /*********************** * stop timer * ***********************/ barrier(); allreduce(-1, (char *) &t_usec2, (char *) &t_avg2, 1, DCMF_DOUBLE, DCMF_SUM); barrier(); if (myrank == 0) { t_avg = t_avg / nranks; t_avg1 = t_avg1 / nranks; t_avg2 = t_avg2 / nranks; printf("%20d %20.2f %20.2f %20.2f", msgsize, t_avg, t_avg1, t_avg2); fflush(stdout); } t_avg = 0; t_avg1 = 0, t_avg2 = 0; target_index = 0; barrier(); /*********************** * start timer * ***********************/ snd_rcv_active += ITERATIONS; t_start = DCMF_Timebase(); for (i = 0; i < ITERATIONS; i++) { done_count = 1; DCMF_Send(&snd_reg, &send_req[i], send_done, DCMF_SEQUENTIAL_CONSISTENCY, (myrank + 1) % nranks, msgsize, source + i * msgsize, &msginfo[i], 1); while (done_count) DCMF_Messager_advance(); } t_stop = DCMF_Timebase(); t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS); while (snd_rcv_active) DCMF_Messager_advance(); barrier(); allreduce(-1, (char *) &t_usec, (char *) &t_avg, 1, DCMF_DOUBLE, DCMF_SUM); barrier(); target_index = 0; t_start = DCMF_Timebase(); for (i = 0; i < ITERATIONS; i++) { ack_rcv_active = 1; DCMF_Send(&rcb_snd_reg, &send_req[i], nocallback, DCMF_SEQUENTIAL_CONSISTENCY, (myrank + 1) % nranks, msgsize, source + i * msgsize, &msginfo[i], 1); while (ack_rcv_active) DCMF_Messager_advance(); } t_stop = DCMF_Timebase(); t_usec1 = (t_stop - t_start) / (clockMHz * ITERATIONS); barrier(); allreduce(-1, (char *) &t_usec1, (char *) &t_avg1, 1, DCMF_DOUBLE, DCMF_SUM); barrier(); target_index = 0; t_start = DCMF_Timebase(); for (i = 0; i < ITERATIONS; i++) { done_count = 1; ack_rcv_active = 1; DCMF_Send(&rcb_snd_reg, &send_req[i], send_done, DCMF_SEQUENTIAL_CONSISTENCY, (myrank + 1) % nranks, msgsize, source + i * msgsize, &msginfo[i], 1); while (done_count || ack_rcv_active) DCMF_Messager_advance(); } t_stop = DCMF_Timebase(); t_usec2 = (t_stop - t_start) / (clockMHz * ITERATIONS); /*********************** * stop timer * ***********************/ allreduce(-1, (char *) &t_usec2, (char *) &t_avg2, 1, DCMF_DOUBLE, DCMF_SUM); barrier(); if (myrank == 0) { t_avg = t_avg / nranks; t_avg1 = t_avg1 / nranks; t_avg2 = t_avg2 / nranks; printf("%20.2f %20.2f %20.2f \n", t_avg, t_avg1, t_avg2); fflush(stdout); } } }
void init_paras() { auto local_parser = [] (const std::string & line) { return paracel::str_split(line, ','); }; auto f_parser = paracel::gen_parser(local_parser); paracel_load_as_graph(local_graph, input, f_parser, "fmap"); if(get_worker_id() == 0) std::cout << "load done" << std::endl; auto cnt_lambda = [&] (const node_t & a, const node_t & b, double c) { if(!kvmap.count(a)) { kvmap[a] = 1.; } else { kvmap[a] += 1.; } }; local_graph.traverse(cnt_lambda); // make sure there are no same pieces // generate kv + local combine auto kvinit_lambda = [&] (const node_t & a, const node_t & b, double c) { klstmap[b].push_back(std::make_pair(a, kvmap[a])); }; local_graph.traverse(kvinit_lambda); if(get_worker_id() == 0) std::cout << "stat done" << std::endl; // init push to construct global connect info std::unordered_map<std::string, std::vector<std::pair<node_t, double> > > klstmap_tmp; for(auto & kv : klstmap) { if(kv.first == SENTINEL) continue; // little tricky here klstmap_tmp[paracel::cvt(kv.first) + "_links"] = kv.second; } paracel_bupdate_multi(klstmap_tmp, handle_file, update_function); if(get_worker_id() == 0) std::cout << "first bupdate done" << std::endl; paracel_sync(); // read connect info only once klstmap.clear(); for(auto & kv : kvmap) { // notice: limit memory here paracel_read<std::vector<std::pair<node_t, double> > > (paracel::cvt(kv.first) + "_links", klstmap[kv.first]); } if(get_worker_id() == 0) std::cout << "first read done" << std::endl; // reuse kvmap to store pr // init pr with 1. / total_node_sz auto worker_comm = get_comm(); long node_sz = kvmap.size(); worker_comm.allreduce(node_sz); double init_val = 1. / node_sz; std::unordered_map<std::string, double> tmp; for(auto & kv : kvmap) { kvmap[kv.first] = init_val; tmp[paracel::cvt(kv.first) + "_pr"] = init_val; } paracel_write_multi(tmp); paracel_sync(); }