static void send_data (msg_task_t msg) { char mailbox[MAILBOX_ALIAS_SIZE]; double data_size; size_t my_id; task_info_t ti; my_id = get_worker_id (MSG_host_self ()); sprintf (mailbox, TASK_MAILBOX, get_worker_id (MSG_task_get_source (msg)), MSG_process_get_PID (MSG_task_get_sender (msg))); if (message_is (msg, SMS_GET_CHUNK)) { MSG_task_dsend (MSG_task_create ("DATA-C", 0.0, config.chunk_size, NULL), mailbox, NULL); } else if (message_is (msg, SMS_GET_INTER_PAIRS)) { ti = (task_info_t) MSG_task_get_data (msg); data_size = job.map_output[my_id][ti->id] - ti->map_output_copied[my_id]; MSG_task_dsend (MSG_task_create ("DATA-IP", 0.0, data_size, NULL), mailbox, NULL); } MSG_task_destroy (msg); }
void learning() { // first read std::unordered_map<node_t, double> kvmap_stale; for(auto & kv : klstmap) { for(auto & kkv : kv.second) { if(!kvmap_stale.count(kkv.first)) { //if(klstmap[kkv.first].size() == 0) continue; kvmap_stale[kkv.first] = paracel_read<double>(paracel::cvt(kkv.first) + "_pr"); } } } paracel_sync(); for(int rd = 0; rd < rounds; ++rd) { if(get_worker_id() == 0) std::cout << rd << std::endl; // pull paracel::list_type<paracel::str_type> keys; for(auto & kv : kvmap_stale) { keys.push_back(paracel::cvt(kv.first) + "_pr"); } auto result_tmp = paracel_read_multi<double>(keys); keys.resize(0); int cnt = 0; for(auto & kv : kvmap_stale) { kvmap_stale[kv.first] = result_tmp[cnt]; cnt ++; } result_tmp.resize(0); // map for(auto & kv : klstmap) { double sigma = 0.; for(auto & item : kv.second) { sigma += (kvmap_stale[item.first] / item.second); } kvmap[kv.first] = (1. - damping_factor) + damping_factor * sigma; } paracel_sync(); std::unordered_map<std::string, double> kvmap_dct; // reduce for(auto & kv : kvmap) { kvmap_dct[paracel::cvt(kv.first) + "_pr"] = kv.second; } paracel_write_multi(kvmap_dct); paracel_sync(); } // last pull all auto kvmap_tmp = paracel_read_special<double>(handle_file, filter_function); auto tear_lambda = [] (const std::string & str) { auto pos = str.find('_'); return str.substr(0, pos); }; for(auto & kv : kvmap_tmp) { std::string tmp = tear_lambda(kv.first); kvmap[paracel::cvt(tmp)] = kv.second; } }
void mls_learning() { // learn with local item vectors local_learning(item_vects); sync(); // calc similarity with items in other proces for(int node_id = 0; node_id < (int)get_worker_size(); ++node_id) { if(node_id == (int)get_worker_id()) continue; auto id_bag = paracel_read<std::vector<std::string> >( "item_bag_" + std::to_string(node_id) ); for(auto & iv : item_vects) { for(auto & iid : id_bag) { auto jv = paracel_read<std::vector<double> >(iid); double sim = paracel::dot_product(iv.second, jv); result[iv.first].push_back(std::make_pair(iid, sim)); } // id_bag } // for iv } // bcast_ring sync(); // get ktop select_top(); sync(); }
int data_node (int argc, char* argv[]) { char mailbox[MAILBOX_ALIAS_SIZE]; msg_error_t status; msg_task_t msg = NULL; sprintf (mailbox, DATANODE_MAILBOX, get_worker_id (MSG_host_self ())); while (!job.finished) { msg = NULL; status = receive (&msg, mailbox); if (status == MSG_OK) { if (message_is (msg, SMS_FINISH)) { MSG_task_destroy (msg); break; } else { send_data (msg); } } } return 0; }
void init_paras() { for(auto & kv : item_vects) { paracel_write(kv.first, kv.second); // push vector item_bag.push_back(kv.first); } std::string key = "item_bag_" + std::to_string(get_worker_id()); paracel_write(key, item_bag); // push iids }
void * master_worker_computation_routine(void * arg) { launchArg_t * launchArg = (launchArg_t *) arg; ocrPolicyDomain_t * pd = launchArg->PD; ocrWorker_t * worker = (ocrWorker_t *) launchArg->arg; DPRINTF(DEBUG_LVL_INFO, "Starting scheduler routine of master worker %d\n", get_worker_id(worker)); worker_loop(pd, worker); return NULL; }
void dump_result() { std::unordered_map<std::string, double> kvmap_dump; for(auto & kv : kvmap) { kvmap_dump[paracel::cvt(kv.first)] = kv.second; } kvmap.clear(); if(get_worker_id() == 0) { paracel_dump_dict(kvmap_dump, "pagerank_"); } }
void logistic_regression::agd_learning() { int data_sz = samples.size(), data_dim = samples[0].size(); int cnt = 0, read_batch = data_sz / 100, update_batch = data_sz / 100; if(read_batch == 0) { read_batch = 10; } if(update_batch == 0) { update_batch = 10; } theta = paracel::random_double_list(data_dim); paracel_write("theta", theta); // init push vector<int> idx; for(int i = 0; i < data_sz; ++i) { idx.push_back(i); } paracel_register_bupdate("/mfs/user/wuhong/paracel/build/lib/liblg_update.so", "lg_theta_update"); double coff2 = 2. * beta * alpha; vector<double> delta(data_dim); // main loop for(int rd = 0; rd < rounds; ++rd) { std::random_shuffle(idx.begin(), idx.end()); theta = paracel_read<vector<double> >("theta"); vector<double> theta_old(theta); // traverse data cnt = 0; for(auto sample_id : idx) { if( (cnt % read_batch == 0) || (cnt == (int)idx.size() - 1) ) { theta = paracel_read<vector<double> >("theta"); theta_old = theta; } for(int i = 0; i < data_dim; ++i) { double coff1 = alpha * (labels[sample_id] - lg_hypothesis(samples[sample_id])); double t = coff1 * samples[sample_id][i] - coff2 * theta[i]; theta[i] += t; } if(debug) { loss_error.push_back(calc_loss()); } if( (cnt % update_batch == 0) || (cnt == (int)idx.size() - 1) ) { for(int i = 0; i < data_dim; ++i) { delta[i] = theta[i] - theta_old[i]; } paracel_bupdate("theta", delta); } cnt += 1; } // traverse sync(); std::cout << "worker" << get_worker_id() << " at the end of rd" << rd << std::endl; } // rounds theta = paracel_read<vector<double> >("theta"); // last pull }
void * worker_computation_routine(void * arg) { // Need to pass down a data-structure launchArg_t * launchArg = (launchArg_t *) arg; ocrPolicyDomain_t * pd = launchArg->PD; ocrWorker_t * worker = (ocrWorker_t *) launchArg->arg; // associate current thread with the worker associate_comp_platform_and_worker(pd, worker); // Setting up this worker context to takeEdts // This assumes workers are not relocatable DPRINTF(DEBUG_LVL_INFO, "Starting scheduler routine of worker %d\n", get_worker_id(worker)); worker_loop(pd, worker); return NULL; }
size_t fiber_control::pick_fiber_worker(fiber* fib) { // first try to use the original worker if possible size_t choice = get_worker_id(); if (choice == (size_t)(-1) || fib->affinity.get(choice) == 0) { //choice rejected, pick randomly from the available choices size_t ra = graphlab::random::fast_uniform<size_t>(0,fib->affinity_array.size() - 1); size_t rb = graphlab::random::fast_uniform<size_t>(0,fib->affinity_array.size() - 1); ra = fib->affinity_array[ra]; rb = fib->affinity_array[rb]; choice = (schedule[ra].nactive <= schedule[rb].nactive) ? ra : rb; } return choice; }
/** * @brief Checks if a worker is a straggler. * @param worker The worker to be probed. * @return 1 if true, 0 if false. */ static int is_straggler (msg_host_t worker) { int task_count; size_t wid; wid = get_worker_id (worker); task_count = (config.slots[MAP] + config.slots[REDUCE]) - (job.heartbeats[wid].slots_av[MAP] + job.heartbeats[wid].slots_av[REDUCE]); if (MSG_get_host_speed (worker) < config.grid_average_speed && task_count > 0) return 1; return 0; }
void logistic_regression::ipm_learning() { int data_sz = samples.size(), data_dim = samples[0].size(); theta = paracel::random_double_list(data_dim); paracel_write("theta", theta); // init push vector<int> idx; for(int i = 0; i < data_sz; ++i) { idx.push_back(i); } paracel_register_bupdate("/mfs/user/wuhong/paracel/build/lib/liblg_update.so", "lg_theta_update"); double coff2 = 2. * beta * alpha; double wgt = 1. / get_worker_size(); vector<double> delta(data_dim); // main loop for(int rd = 0; rd < rounds; ++rd) { std::random_shuffle(idx.begin(), idx.end()); theta = paracel_read<vector<double> >("theta"); vector<double> theta_old(theta); // traverse data for(auto sample_id : idx) { for(int i = 0; i < data_dim; ++i) { double coff1 = alpha * (labels[sample_id] - lg_hypothesis(samples[sample_id])); double t = coff1 * samples[sample_id][i] - coff2 * theta[i]; theta[i] += t; } if(debug) { loss_error.push_back(calc_loss()); } } // traverse for(int i = 0; i < data_dim; ++i) { delta[i] = wgt * (theta[i] - theta_old[i]); } sync(); // sync for map paracel_bupdate("theta", delta); // update with delta sync(); // sync for reduce std::cout << "worker" << get_worker_id() << " at the end of rd" << rd << std::endl; } // rounds theta = paracel_read<vector<double> >("theta"); // last pull }
/** * @brief Mark the tasks of a straggler as possible speculative tasks. * @param worker The straggler worker. */ static void set_speculative_tasks (msg_host_t worker) { size_t tid; size_t wid; task_info_t ti; wid = get_worker_id (worker); if (job.heartbeats[wid].slots_av[MAP] < config.slots[MAP]) { for (tid = 0; tid < config.amount_of_tasks[MAP]; tid++) { if (job.task_list[MAP][tid][0] != NULL) { ti = (task_info_t) MSG_task_get_data (job.task_list[MAP][tid][0]); if (ti->wid == wid && task_time_elapsed (job.task_list[MAP][tid][0]) > 60) { job.task_status[MAP][tid] = T_STATUS_TIP_SLOW; } } } } if (job.heartbeats[wid].slots_av[REDUCE] < config.slots[REDUCE]) { for (tid = 0; tid < config.amount_of_tasks[REDUCE]; tid++) { if (job.task_list[REDUCE][tid][0] != NULL) { ti = (task_info_t) MSG_task_get_data (job.task_list[REDUCE][tid][0]); if (ti->wid == wid && task_time_elapsed (job.task_list[REDUCE][tid][0]) > 60) { job.task_status[REDUCE][tid] = T_STATUS_TIP_SLOW; } } } } }
/** @brief Main master function. */ int master (int argc, char* argv[]) { heartbeat_t heartbeat; msg_error_t status; msg_host_t worker; msg_task_t msg = NULL; size_t wid; task_info_t ti; double total_cpu_time = 0.0; double total_task_time = 0.0; print_config (); XBT_INFO ("JOB BEGIN"); XBT_INFO (" "); tasks_log = fopen ("tasks.csv", "w"); fprintf (tasks_log, "task_id,phase,worker_id,time,action,shuffle_end\n"); while (job.tasks_pending[MAP] + job.tasks_pending[REDUCE] > 0) { msg = NULL; status = receive (&msg, MASTER_MAILBOX); if (status == MSG_OK) { worker = MSG_task_get_source (msg); wid = get_worker_id (worker); if (message_is (msg, SMS_HEARTBEAT)) { heartbeat = &job.heartbeats[wid]; if (is_straggler (worker)) { set_speculative_tasks (worker); } else { if (heartbeat->slots_av[MAP] > 0) send_scheduler_task(MAP, wid); if (heartbeat->slots_av[REDUCE] > 0) send_scheduler_task(REDUCE, wid); } } else if (message_is (msg, SMS_TASK_DONE)) { ti = (task_info_t) MSG_task_get_data (msg); if (job.task_status[ti->phase][ti->id] != T_STATUS_DONE) { job.task_status[ti->phase][ti->id] = T_STATUS_DONE; finish_all_task_copies (ti); job.tasks_pending[ti->phase]--; if (job.tasks_pending[ti->phase] <= 0) { XBT_INFO (" "); XBT_INFO ("%s PHASE DONE", (ti->phase==MAP?"MAP":"REDUCE")); XBT_INFO (" "); } ti->finished_time = MSG_get_clock(); ti->elapsed_time = ti->finished_time - ti->start_time; total_task_time += ti->elapsed_time; total_cpu_time += ti->cpu_time; } xbt_free_ref (&ti); } MSG_task_destroy (msg); } } fclose (tasks_log); job.finished = 1; print_config (); print_stats (); XBT_INFO ("JOB END"); XBT_INFO ("\tclock_time: %f", MSG_get_clock()); XBT_INFO ("\ttotal_task_time: %f(%f)", total_task_time, total_task_time / MSG_get_clock()); XBT_INFO ("\ttotal_cpu_time: %f(%f)", total_cpu_time, total_cpu_time / MSG_get_clock()); return 0; }
void init(const string & pattern) { // load miu auto lines = paracel_loadall(input_miu); auto temp = paracel::str_split(lines[1], '\t'); miu = std::stod(temp[1]); // load item bias lines = paracel_loadall(input_ibias); auto local_ibias_parser = [&] (const vector<string> & linelst, const char sep = '\t') { for(auto & line : linelst) { auto v = paracel::str_split(line, sep); ibias[v[0]] = std::stod(v[1]); } }; local_ibias_parser(lines, '\t'); lines.resize(0); std::cout << "print: " << ibias.size() << std::endl; /* // load some of ifactor lines = paracel_load(input_ifac); auto local_ifac_parser = [&] (const vector<string> & linelst, const char sep1 = '\t', const char sep2 = '|') { auto tmp1 = paracel::str_split(linelst[0], sep1); auto tmp2 = paracel::str_split(tmp1[1], sep2); // init fac_dim fac_dim = tmp2.size(); for(auto & line : linelst) { vector<double> tmp; auto v = paracel::str_split(line, sep1); auto vv = paracel::str_split(v[1], sep2); for(size_t i = 0; i < vv.size(); ++i) { tmp.push_back(std::stod(vv[i])); } ifactor[v[0]] = tmp; } }; local_ifac_parser(lines, '\t', '|'); lines.resize(0); */ // init global ifactor if(get_worker_id() == 0) { auto handler_lambda = [&] (const vector<string> & linelst) { for(auto & line : linelst) { vector<double> tmp; auto v = paracel::str_split(line, '\t'); auto vv = paracel::str_split(v[1], '|'); for(size_t i = 0;i < vv.size(); ++i) { tmp.push_back(std::stod(vv[i])); } ifactor[v[0]] = tmp; paracel_write(v[0] + "_ifactor", tmp); // key: "iid_ifactor" } }; paracel_sequential_loadall(input_ifac, handler_lambda); } sync(); ifactor.clear(); // load bigraph auto local_rating_parser = [] (const std::string & line) { return paracel::str_split(line, ','); }; auto rating_parser = paracel::gen_parser(local_rating_parser); paracel_load_as_graph(rating_graph, input_rating, rating_parser, pattern); // split bigraph into user rating list auto split_lambda = [&] (const std::string & a, const std::string & b, double c) { // default fmap: first dim is uid usr_rating_lst[a].push_back( std::make_pair(b, c) ); }; rating_graph.traverse(split_lambda); std::cout << "traverse done" << std::endl; // init ufactor with specified ufac auto select_lambda = [&] (const vector<string> & linelst) { auto tmp1 = paracel::str_split(linelst[0], '\t'); auto tmp2 = paracel::str_split(tmp1[1], '|'); // init fac_dim fac_dim = tmp2.size(); for(auto & line : linelst) { vector<double> tmp; auto v = paracel::str_split(line, '\t'); if(usr_rating_lst.count(v[0]) == 0) { continue; } auto vv = paracel::str_split(v[1], '|'); for(size_t i = 0; i < vv.size(); ++i) { tmp.push_back(std::stod(vv[i])); } ufactor[v[0]] = tmp; } }; // select_lambda // load started user factor paracel_sequential_loadall(input_ufac, select_lambda); std::cout << "load ufactor done" << ufactor.size() << "|" << std::endl; // init ubias with specified ubias auto filter_lambda = [&] (const vector<string> & linelst) { for(auto & line : linelst) { auto v = paracel::str_split(line, '\t'); string uid = v[0]; if(usr_rating_lst.count(uid) == 0) { continue; } ubias[uid] = std::stod(v[1]); } }; // load started user bias paracel_sequential_loadall(input_ubias, filter_lambda); std::cout << "load ubias done" << ubias.size() << std::endl; // resize ufactor/ubias here, with no ufac specified for(auto & kv : usr_rating_lst) { if(ufactor.count(kv.first) == 0) { ufactor[kv.first] = paracel::random_double_list(fac_dim, 0.001); } if(ubias.count(kv.first) == 0) { ubias[kv.first] = 0.001 * paracel::random_double(); } } }
void init_paras() { auto local_parser = [] (const std::string & line) { return paracel::str_split(line, ','); }; auto f_parser = paracel::gen_parser(local_parser); paracel_load_as_graph(local_graph, input, f_parser, "fmap"); if(get_worker_id() == 0) std::cout << "load done" << std::endl; auto cnt_lambda = [&] (const node_t & a, const node_t & b, double c) { if(!kvmap.count(a)) { kvmap[a] = 1.; } else { kvmap[a] += 1.; } }; local_graph.traverse(cnt_lambda); // make sure there are no same pieces // generate kv + local combine auto kvinit_lambda = [&] (const node_t & a, const node_t & b, double c) { klstmap[b].push_back(std::make_pair(a, kvmap[a])); }; local_graph.traverse(kvinit_lambda); if(get_worker_id() == 0) std::cout << "stat done" << std::endl; // init push to construct global connect info std::unordered_map<std::string, std::vector<std::pair<node_t, double> > > klstmap_tmp; for(auto & kv : klstmap) { if(kv.first == SENTINEL) continue; // little tricky here klstmap_tmp[paracel::cvt(kv.first) + "_links"] = kv.second; } paracel_bupdate_multi(klstmap_tmp, handle_file, update_function); if(get_worker_id() == 0) std::cout << "first bupdate done" << std::endl; paracel_sync(); // read connect info only once klstmap.clear(); for(auto & kv : kvmap) { // notice: limit memory here paracel_read<std::vector<std::pair<node_t, double> > > (paracel::cvt(kv.first) + "_links", klstmap[kv.first]); } if(get_worker_id() == 0) std::cout << "first read done" << std::endl; // reuse kvmap to store pr // init pr with 1. / total_node_sz auto worker_comm = get_comm(); long node_sz = kvmap.size(); worker_comm.allreduce(node_sz); double init_val = 1. / node_sz; std::unordered_map<std::string, double> tmp; for(auto & kv : kvmap) { kvmap[kv.first] = init_val; tmp[paracel::cvt(kv.first) + "_pr"] = init_val; } paracel_write_multi(tmp); paracel_sync(); }
void logistic_regression::dump_result() { if(get_worker_id() == 0) { paracel_dump_vector(theta, "lg_theta_", "|"); paracel_dump_vector(loss_error, "lg_loss_error_", "\n"); } }