void init() {
   auto f_parser = paracel::gen_parser(local_parser);
   paracel_load_as_graph(grp, input, f_parser, "fmap");
   // init vertex_val_map
   auto lambda = [&] (const std::string & rid,
                      const std::string & cid,
                      double wgt) {
     vertex_val_map[rid] = std::stod(rid);
   };
   grp.traverse(lambda);
   // init vertex_adj_edge_val_map & vertex_active_map
   for(auto & vertex : vertex_val_map) {
     vertex_adj_edge_val_map[vertex.first] = grp.adjacent(vertex.first);
     vertex_active_map[vertex.first] = true;
   }
 }
Exemple #2
0
  void init(const string & pattern) {
    // load miu
    auto lines = paracel_loadall(input_miu);
    auto temp = paracel::str_split(lines[1], '\t');
    miu = std::stod(temp[1]);
    
    // load item bias
    lines = paracel_loadall(input_ibias); 
    auto local_ibias_parser = [&] (const vector<string> & linelst,
                                   const char sep = '\t') {
      for(auto & line : linelst) {
        auto v = paracel::str_split(line, sep);
        ibias[v[0]] = std::stod(v[1]);
      }
    };
    local_ibias_parser(lines, '\t');
    lines.resize(0);
    std::cout << "print: " << ibias.size() << std::endl;

/*
    // load some of ifactor
    lines = paracel_load(input_ifac);
    auto local_ifac_parser = [&] (const vector<string> & linelst,
                                  const char sep1 = '\t',
                                  const char sep2 = '|') {
      auto tmp1 = paracel::str_split(linelst[0], sep1);
      auto tmp2 = paracel::str_split(tmp1[1], sep2);
      // init fac_dim
      fac_dim = tmp2.size();
      
      for(auto & line : linelst) {
        vector<double> tmp;
        auto v = paracel::str_split(line, sep1);
        auto vv = paracel::str_split(v[1], sep2);
        for(size_t i = 0; i < vv.size(); ++i) {
          tmp.push_back(std::stod(vv[i]));
        }
        ifactor[v[0]] = tmp;
      }
    };
    local_ifac_parser(lines, '\t', '|');
    lines.resize(0);
*/

    // init global ifactor
    if(get_worker_id() == 0) {
      auto handler_lambda = [&] (const vector<string> & linelst) {
        for(auto & line : linelst) {
          vector<double> tmp;
          auto v = paracel::str_split(line, '\t');
          auto vv = paracel::str_split(v[1], '|');
          for(size_t i = 0;i < vv.size(); ++i) {
            tmp.push_back(std::stod(vv[i]));
          }
          ifactor[v[0]] = tmp;
          paracel_write(v[0] + "_ifactor", tmp); // key: "iid_ifactor"
        }
      };
      paracel_sequential_loadall(input_ifac, handler_lambda);
    }
    sync();
    ifactor.clear();
    
    // load bigraph
    auto local_rating_parser = [] (const std::string & line) {
      return paracel::str_split(line, ',');
    };
    auto rating_parser = paracel::gen_parser(local_rating_parser);
    paracel_load_as_graph(rating_graph, 
                          input_rating, 
                          rating_parser, 
                          pattern);
    // split bigraph into user rating list
    auto split_lambda = [&] (const std::string & a,
                             const std::string & b,
                             double c) {
      // default fmap: first dim is uid
      usr_rating_lst[a].push_back(
          std::make_pair(b, c)
          );
    };
    rating_graph.traverse(split_lambda);
    std::cout << "traverse done" << std::endl;
    
    // init ufactor with specified ufac 
    auto select_lambda = [&] (const vector<string> & linelst) {
      auto tmp1 = paracel::str_split(linelst[0], '\t');
      auto tmp2 = paracel::str_split(tmp1[1], '|');
      // init fac_dim
      fac_dim = tmp2.size();
      for(auto & line : linelst) {
        vector<double> tmp;
        auto v = paracel::str_split(line, '\t');
        if(usr_rating_lst.count(v[0]) == 0) { continue; }
        auto vv = paracel::str_split(v[1], '|');
        for(size_t i = 0; i < vv.size(); ++i) {
          tmp.push_back(std::stod(vv[i]));
        }
        ufactor[v[0]] = tmp;
      }
    }; // select_lambda
    // load started user factor
    paracel_sequential_loadall(input_ufac, select_lambda);
    std::cout << "load ufactor done" << ufactor.size() << "|" << std::endl;

    // init ubias with specified ubias
    auto filter_lambda = [&] (const vector<string> & linelst) {
      for(auto & line : linelst) {
        auto v = paracel::str_split(line, '\t');
        string uid = v[0];
        if(usr_rating_lst.count(uid) == 0) { continue; }
        ubias[uid] = std::stod(v[1]);
      }
    };
    // load started user bias
    paracel_sequential_loadall(input_ubias, filter_lambda);
    std::cout << "load ubias done" << ubias.size() << std::endl;
    
    // resize ufactor/ubias here, with no ufac specified
    for(auto & kv : usr_rating_lst) {
      if(ufactor.count(kv.first) == 0) {
        ufactor[kv.first] = paracel::random_double_list(fac_dim, 0.001);
      }
      if(ubias.count(kv.first) == 0) {
        ubias[kv.first] = 0.001 * paracel::random_double();
      }
    }

  }
Exemple #3
0
  void init_paras() {
    auto local_parser = [] (const std::string & line) {
      return paracel::str_split(line, ',');
    };
    auto f_parser = paracel::gen_parser(local_parser);
    paracel_load_as_graph(local_graph, input, f_parser, "fmap");
    if(get_worker_id() == 0) std::cout << "load done" << std::endl;

    auto cnt_lambda = [&] (const node_t & a,
                           const node_t & b,
                           double c) {
      if(!kvmap.count(a)) {
        kvmap[a] = 1.;
      } else {
        kvmap[a] += 1.;
      }
    };
    local_graph.traverse(cnt_lambda);
    
    // make sure there are no same pieces
    // generate kv + local combine
    auto kvinit_lambda = [&] (const node_t & a,
                              const node_t & b,
                              double c) {
      klstmap[b].push_back(std::make_pair(a, kvmap[a]));
    };
    local_graph.traverse(kvinit_lambda);
    if(get_worker_id() == 0) std::cout << "stat done" << std::endl;

    // init push to construct global connect info
    std::unordered_map<std::string,
        std::vector<std::pair<node_t, double> > > klstmap_tmp;
    for(auto & kv : klstmap) {
      if(kv.first == SENTINEL) continue; // little tricky here
      klstmap_tmp[paracel::cvt(kv.first) + "_links"] = kv.second;
    }
    paracel_bupdate_multi(klstmap_tmp,
                          handle_file,
                          update_function);
    if(get_worker_id() == 0) std::cout << "first bupdate done" << std::endl;
    paracel_sync();

    // read connect info only once
    klstmap.clear();
    for(auto & kv : kvmap) {
      // notice: limit memory here
      paracel_read<std::vector<std::pair<node_t, double> > >
          (paracel::cvt(kv.first) + "_links",
           klstmap[kv.first]);
    }
    if(get_worker_id() == 0) std::cout << "first read done" << std::endl;

    // reuse kvmap to store pr
    // init pr with 1. / total_node_sz
    auto worker_comm = get_comm();
    long node_sz = kvmap.size();
    worker_comm.allreduce(node_sz);
    double init_val = 1. / node_sz;
    std::unordered_map<std::string, double> tmp;
    for(auto & kv : kvmap) {
      kvmap[kv.first] = init_val; 
      tmp[paracel::cvt(kv.first) + "_pr"] = init_val;
    }
    paracel_write_multi(tmp);
    paracel_sync();
  }