void init() { auto f_parser = paracel::gen_parser(local_parser); paracel_load_as_graph(grp, input, f_parser, "fmap"); // init vertex_val_map auto lambda = [&] (const std::string & rid, const std::string & cid, double wgt) { vertex_val_map[rid] = std::stod(rid); }; grp.traverse(lambda); // init vertex_adj_edge_val_map & vertex_active_map for(auto & vertex : vertex_val_map) { vertex_adj_edge_val_map[vertex.first] = grp.adjacent(vertex.first); vertex_active_map[vertex.first] = true; } }
void init(const string & pattern) { // load miu auto lines = paracel_loadall(input_miu); auto temp = paracel::str_split(lines[1], '\t'); miu = std::stod(temp[1]); // load item bias lines = paracel_loadall(input_ibias); auto local_ibias_parser = [&] (const vector<string> & linelst, const char sep = '\t') { for(auto & line : linelst) { auto v = paracel::str_split(line, sep); ibias[v[0]] = std::stod(v[1]); } }; local_ibias_parser(lines, '\t'); lines.resize(0); std::cout << "print: " << ibias.size() << std::endl; /* // load some of ifactor lines = paracel_load(input_ifac); auto local_ifac_parser = [&] (const vector<string> & linelst, const char sep1 = '\t', const char sep2 = '|') { auto tmp1 = paracel::str_split(linelst[0], sep1); auto tmp2 = paracel::str_split(tmp1[1], sep2); // init fac_dim fac_dim = tmp2.size(); for(auto & line : linelst) { vector<double> tmp; auto v = paracel::str_split(line, sep1); auto vv = paracel::str_split(v[1], sep2); for(size_t i = 0; i < vv.size(); ++i) { tmp.push_back(std::stod(vv[i])); } ifactor[v[0]] = tmp; } }; local_ifac_parser(lines, '\t', '|'); lines.resize(0); */ // init global ifactor if(get_worker_id() == 0) { auto handler_lambda = [&] (const vector<string> & linelst) { for(auto & line : linelst) { vector<double> tmp; auto v = paracel::str_split(line, '\t'); auto vv = paracel::str_split(v[1], '|'); for(size_t i = 0;i < vv.size(); ++i) { tmp.push_back(std::stod(vv[i])); } ifactor[v[0]] = tmp; paracel_write(v[0] + "_ifactor", tmp); // key: "iid_ifactor" } }; paracel_sequential_loadall(input_ifac, handler_lambda); } sync(); ifactor.clear(); // load bigraph auto local_rating_parser = [] (const std::string & line) { return paracel::str_split(line, ','); }; auto rating_parser = paracel::gen_parser(local_rating_parser); paracel_load_as_graph(rating_graph, input_rating, rating_parser, pattern); // split bigraph into user rating list auto split_lambda = [&] (const std::string & a, const std::string & b, double c) { // default fmap: first dim is uid usr_rating_lst[a].push_back( std::make_pair(b, c) ); }; rating_graph.traverse(split_lambda); std::cout << "traverse done" << std::endl; // init ufactor with specified ufac auto select_lambda = [&] (const vector<string> & linelst) { auto tmp1 = paracel::str_split(linelst[0], '\t'); auto tmp2 = paracel::str_split(tmp1[1], '|'); // init fac_dim fac_dim = tmp2.size(); for(auto & line : linelst) { vector<double> tmp; auto v = paracel::str_split(line, '\t'); if(usr_rating_lst.count(v[0]) == 0) { continue; } auto vv = paracel::str_split(v[1], '|'); for(size_t i = 0; i < vv.size(); ++i) { tmp.push_back(std::stod(vv[i])); } ufactor[v[0]] = tmp; } }; // select_lambda // load started user factor paracel_sequential_loadall(input_ufac, select_lambda); std::cout << "load ufactor done" << ufactor.size() << "|" << std::endl; // init ubias with specified ubias auto filter_lambda = [&] (const vector<string> & linelst) { for(auto & line : linelst) { auto v = paracel::str_split(line, '\t'); string uid = v[0]; if(usr_rating_lst.count(uid) == 0) { continue; } ubias[uid] = std::stod(v[1]); } }; // load started user bias paracel_sequential_loadall(input_ubias, filter_lambda); std::cout << "load ubias done" << ubias.size() << std::endl; // resize ufactor/ubias here, with no ufac specified for(auto & kv : usr_rating_lst) { if(ufactor.count(kv.first) == 0) { ufactor[kv.first] = paracel::random_double_list(fac_dim, 0.001); } if(ubias.count(kv.first) == 0) { ubias[kv.first] = 0.001 * paracel::random_double(); } } }
void init_paras() { auto local_parser = [] (const std::string & line) { return paracel::str_split(line, ','); }; auto f_parser = paracel::gen_parser(local_parser); paracel_load_as_graph(local_graph, input, f_parser, "fmap"); if(get_worker_id() == 0) std::cout << "load done" << std::endl; auto cnt_lambda = [&] (const node_t & a, const node_t & b, double c) { if(!kvmap.count(a)) { kvmap[a] = 1.; } else { kvmap[a] += 1.; } }; local_graph.traverse(cnt_lambda); // make sure there are no same pieces // generate kv + local combine auto kvinit_lambda = [&] (const node_t & a, const node_t & b, double c) { klstmap[b].push_back(std::make_pair(a, kvmap[a])); }; local_graph.traverse(kvinit_lambda); if(get_worker_id() == 0) std::cout << "stat done" << std::endl; // init push to construct global connect info std::unordered_map<std::string, std::vector<std::pair<node_t, double> > > klstmap_tmp; for(auto & kv : klstmap) { if(kv.first == SENTINEL) continue; // little tricky here klstmap_tmp[paracel::cvt(kv.first) + "_links"] = kv.second; } paracel_bupdate_multi(klstmap_tmp, handle_file, update_function); if(get_worker_id() == 0) std::cout << "first bupdate done" << std::endl; paracel_sync(); // read connect info only once klstmap.clear(); for(auto & kv : kvmap) { // notice: limit memory here paracel_read<std::vector<std::pair<node_t, double> > > (paracel::cvt(kv.first) + "_links", klstmap[kv.first]); } if(get_worker_id() == 0) std::cout << "first read done" << std::endl; // reuse kvmap to store pr // init pr with 1. / total_node_sz auto worker_comm = get_comm(); long node_sz = kvmap.size(); worker_comm.allreduce(node_sz); double init_val = 1. / node_sz; std::unordered_map<std::string, double> tmp; for(auto & kv : kvmap) { kvmap[kv.first] = init_val; tmp[paracel::cvt(kv.first) + "_pr"] = init_val; } paracel_write_multi(tmp); paracel_sync(); }