std::vector<std::vector<T>> create_vertex_data_from_const(const sgraph& g, const T& init) { std::vector<std::vector<T>> ret(g.get_num_partitions()); for (size_t i = 0; i < g.get_num_partitions(); ++i) { ret[i] = std::vector<T>(g.vertex_partition(i).size(), init); } return ret; }
std::vector<std::shared_ptr<sarray<flexible_type>>> vertex_apply(sgraph& g, flex_type_enum result_type, Fn fn) { std::vector<std::shared_ptr<sarray<flexible_type>>> ret(g.get_num_partitions()); // get all the vertex partitions. const std::vector<sframe>& vdata = g.vertex_group(); parallel_for((size_t)(0), (size_t)g.get_num_partitions(), [&](size_t i) { std::shared_ptr<sarray<flexible_type>> ret_partition = std::make_shared<sarray<flexible_type>>(); ret_partition->open_for_write(1); ret_partition->set_type(result_type); transform(vdata[i], *ret_partition, fn); ret_partition->close(); ret[i] = ret_partition; }); return ret; }
std::vector<std::shared_ptr<sarray<flexible_type>>> vertex_apply(sgraph& g, std::string column_name, std::vector<std::shared_ptr<sarray<T>>> & other, flex_type_enum result_type, Fn fn) { ASSERT_EQ(g.get_num_partitions(), other.size()); std::vector<std::shared_ptr<sarray<flexible_type>>> ret(g.get_num_partitions()); // get all the vertex partitions. const std::vector<sframe>& vdata = g.vertex_group(); parallel_for((size_t)(0), (size_t)g.get_num_partitions(), [&](size_t i) { std::shared_ptr<sarray<flexible_type>> graph_field = vdata[i].select_column(column_name); std::shared_ptr<sarray<flexible_type>> ret_partition = std::make_shared<sarray<flexible_type>>(); ret_partition->open_for_write(1); ret_partition->set_type(result_type); binary_transform(*graph_field, *other[i], *ret_partition, fn); ret_partition->close(); ret[i] = ret_partition; }); return ret; }
ResultType vertex_reduce(sgraph& g, std::string column_name, Reducer fn, Combiner combine, ResultType init = ResultType()) { const std::vector<sframe>& vdata = g.vertex_group(); mutex lock; ResultType ret = init; parallel_for((size_t)(0), (size_t)g.get_num_partitions(), [&](size_t i) { std::shared_ptr<sarray<flexible_type>> graph_field = vdata[i].select_column(column_name); std::vector<ResultType> result = graphlab::reduce(*graph_field, [&](const flexible_type& left, ResultType& right) { fn(left, right); return true; }, init); std::unique_lock<mutex> result_lock(lock); for (ResultType& res: result) { combine(res, ret); } }); return ret; }
typename std::enable_if<!std::is_convertible<Reducer, std::string>::value, ResultType>::type /*ResultType*/ vertex_reduce(sgraph& g, Reducer fn, Combiner combine, ResultType init = ResultType()) { const std::vector<sframe>& vdata = g.vertex_group(); mutex lock; ResultType ret = init; parallel_for((size_t)(0), (size_t)g.get_num_partitions(), [&](size_t i) { std::vector<ResultType> result = graphlab::reduce(vdata[i], [&](const std::vector<flexible_type>& left, ResultType& right) { fn(left, right); return true; }, init); std::unique_lock<mutex> result_lock(lock); for (ResultType& res: result) { combine(res, ret); } }); return ret; }
virtual void execute(sgraph& output, const std::vector<sgraph*>& parents) { output.copy_edge_field(field, new_field, groupa, groupb); }
virtual void execute(sgraph& output, const std::vector<sgraph*>& parents) { output.add_edges(*data, source_field_name, target_field_name, groupa, groupb); }
virtual void execute(sgraph& output, const std::vector<sgraph*>& parents) { output.add_vertices(*data, id_field_name); }
virtual void execute(sgraph& output, const std::vector<sgraph*>& parents) { output.select_edge_fields(fields, groupa, groupb); }
virtual void execute(sgraph& output, const std::vector<sgraph*>& parents) { output.select_vertex_fields(fields, group); }
virtual void execute(sgraph& output, const std::vector<sgraph*>& parents) { output.remove_vertex_field(field, group); }
void triple_apply_pagerank(sgraph& g, size_t& num_iter, double& total_pagerank, double& total_delta) { typedef sgraph_compute::sgraph_engine<flexible_type>::graph_data_type graph_data_type; typedef sgraph::edge_direction edge_direction; // initialize every vertex with core id kmin g.init_vertex_field(PAGERANK_COLUMN, reset_probability); g.init_vertex_field(PREV_PAGERANK_COLUMN, 1.0); g.init_vertex_field(DELTA_COLUMN, 0.0); // Initialize degree count sgraph_compute::sgraph_engine<flexible_type> ga; auto degrees = ga.gather( g, [=](const graph_data_type& center, const graph_data_type& edge, const graph_data_type& other, edge_direction edgedir, flexible_type& combiner) { combiner += 1; }, flexible_type(0), edge_direction::OUT_EDGE); g.add_vertex_field(degrees, OUT_DEGREE_COLUMN); num_iter = 0; total_delta = 0.0; total_pagerank = 0.0; timer mytimer; // Triple apply double w = (1 - reset_probability); const size_t degree_idx = g.get_vertex_field_id(OUT_DEGREE_COLUMN); const size_t pr_idx = g.get_vertex_field_id(PAGERANK_COLUMN); const size_t old_pr_idx = g.get_vertex_field_id(PREV_PAGERANK_COLUMN); sgraph_compute::triple_apply_fn_type apply_fn = [&](sgraph_compute::edge_scope& scope) { auto& source = scope.source(); auto& target = scope.target(); scope.lock_vertices(); target[pr_idx] += w * source[old_pr_idx] / source[degree_idx]; scope.unlock_vertices(); }; table_printer table({{"Iteration", 0}, {"L1 change in pagerank", 0}}); table.print_header(); for (size_t iter = 0; iter < max_iterations; ++iter) { if(cppipc::must_cancel()) { log_and_throw(std::string("Toolkit cancelled by user.")); } mytimer.start(); ++num_iter; g.init_vertex_field(PAGERANK_COLUMN, reset_probability); sgraph_compute::triple_apply(g, apply_fn, {PAGERANK_COLUMN}); // compute the change in pagerank auto delta = sgraph_compute::vertex_apply( g, flex_type_enum::FLOAT, [&](const std::vector<flexible_type>& vdata) { return std::abs((double)(vdata[pr_idx]) - (double)(vdata[old_pr_idx])); }); // make the current pagerank the old pagerank g.copy_vertex_field(PAGERANK_COLUMN, PREV_PAGERANK_COLUMN); g.replace_vertex_field(delta, DELTA_COLUMN); total_delta = sgraph_compute::vertex_reduce<double>(g, DELTA_COLUMN, [](const flexible_type& v, double& acc) { acc += (flex_float)v; }, [](const double& v, double& acc) { acc += v; }); table.print_row(iter+1, total_delta); // check convergence if (total_delta < threshold) { break; } } // end of pagerank iterations table.print_footer(); // cleanup g.remove_vertex_field(PREV_PAGERANK_COLUMN); g.remove_vertex_field(OUT_DEGREE_COLUMN); total_pagerank = sgraph_compute::vertex_reduce<double>(g, PAGERANK_COLUMN, [](const flexible_type& v, double& acc) { acc += (flex_float)v; }, [](const double& v, double& acc) { acc += v; }); }
/** * We start with every vertex having core_id = KMAX, * Each iteration, while the gather will +1 for neighbors whose core_id > CURRENT_K * If the gather is > 0 and <= CURRENT_K, then we set the core_id to CURRENT_K (indicate its deleted). * And repeat... */ void triple_apply_kcore(sgraph& g) { typedef sgraph_compute::sgraph_engine<flexible_type>::graph_data_type graph_data_type; typedef sgraph::edge_direction edge_direction; // initialize every vertex with core id kmin g.init_vertex_field(CORE_ID_COLUMN, KMIN); g.init_vertex_field(DEGREE_COLUMN, 0); g.init_vertex_field(DELETED_COLUMN, 0); g.init_edge_field(DELETED_COLUMN, 0); // Initialize degree count sgraph_compute::sgraph_engine<flexible_type> ga; auto degrees = ga.gather( g, [=](const graph_data_type& center, const graph_data_type& edge, const graph_data_type& other, edge_direction edgedir, flexible_type& combiner) { combiner += 1; }, flexible_type(0), edge_direction::ANY_EDGE); g.replace_vertex_field(degrees, DEGREE_COLUMN); // Initialize fields long vertices_left = g.num_vertices(); std::atomic<long> num_vertices_changed; const size_t core_idx = g.get_vertex_field_id(CORE_ID_COLUMN); const size_t degree_idx = g.get_vertex_field_id(DEGREE_COLUMN); const size_t v_deleted_idx= g.get_vertex_field_id(DELETED_COLUMN); const size_t e_deleted_idx= g.get_edge_field_id(DELETED_COLUMN); // Triple apply sgraph_compute::triple_apply_fn_type apply_fn = [&](sgraph_compute::edge_scope& scope) { auto& source = scope.source(); auto& target = scope.target(); auto& edge = scope.edge(); scope.lock_vertices(); // edge is not deleted if (!edge[e_deleted_idx]) { // check source degree if (!source[v_deleted_idx] && source[degree_idx] <= CURRENT_K) { source[core_idx] = CURRENT_K; source[v_deleted_idx] = 1; num_vertices_changed++; } // check target degree if (!target[v_deleted_idx] && target[degree_idx] <= CURRENT_K) { target[core_idx] = CURRENT_K; target[v_deleted_idx] = 1; num_vertices_changed ++; } // delete the edge if either side is deleted if (source[v_deleted_idx] || target[v_deleted_idx]) { edge[e_deleted_idx] = 1; --source[degree_idx]; --target[degree_idx]; // We need to check again if the deletion of this edge // causing either source or target vertex to be deleted. if (!source[v_deleted_idx] && source[degree_idx] <= CURRENT_K) { source[core_idx] = CURRENT_K; source[v_deleted_idx] = 1; num_vertices_changed++; } // check target degree if (!target[v_deleted_idx] && target[degree_idx] <= CURRENT_K) { target[core_idx] = CURRENT_K; target[v_deleted_idx] = 1; num_vertices_changed++; } } } scope.unlock_vertices(); }; for (CURRENT_K = KMIN; CURRENT_K < KMAX; ++CURRENT_K) { while (true) { if(cppipc::must_cancel()) { log_and_throw(std::string("Toolkit cancelled by user.")); } num_vertices_changed = 0; sgraph_compute::triple_apply(g, apply_fn, {CORE_ID_COLUMN, DEGREE_COLUMN, DELETED_COLUMN}, {DELETED_COLUMN}); if (num_vertices_changed == 0) break; vertices_left -= num_vertices_changed; if (CURRENT_K == 0 || num_vertices_changed == 0 || vertices_left == 0) { // we are done with the current core. break; } ASSERT_GT(vertices_left, 0); } logprogress_stream << "Finish computing core " << CURRENT_K << "\t Vertices left: " << vertices_left << std::endl; if (vertices_left == 0) { break; } } // end of kcore iterations auto final_core_ids = sgraph_compute::vertex_apply( g, degrees, flex_type_enum::INTEGER, [&](const std::vector<flexible_type>& vdata, const flexible_type& actual_degree) -> flexible_type { if (!vdata[v_deleted_idx]) { // active vertices gets KMAX return flexible_type(KMAX); } else if (actual_degree == 0) { // singleton degree gets KMIN return flexible_type(KMIN); } else { return vdata[core_idx]; } }); g.replace_vertex_field(final_core_ids, CORE_ID_COLUMN); // cleanup g.remove_vertex_field(DEGREE_COLUMN); g.remove_vertex_field(DELETED_COLUMN); g.remove_edge_field(DELETED_COLUMN); }