/** * Initialize the Parallel SFrame iterator. * \note This operation is more expensive than the SFrame iterator creation. */ parallel_sframe_iterator_initializer::parallel_sframe_iterator_initializer( const std::vector<sframe>& data_sources, const size_t& _row_start, const size_t& _row_end) { column_offsets.clear(); sources.clear(); DASSERT_FALSE(data_sources.empty()); size_t current_offset = 0; sf_size = data_sources.front().size(); // Get each of the columns we want. for(const sframe& sf : data_sources) { column_offsets.push_back(current_offset); current_offset += sf.num_columns(); for(size_t i = 0; i < sf.num_columns(); ++i) { sources.push_back(sf.select_column(i)->get_reader()); ASSERT_EQ(sf.size(), sf_size); } } // One last one column_offsets.push_back(current_offset); set_global_block(_row_start, _row_end); }
static void exec(ArcType& a, char s[len]) { size_t length; deserialize_impl<ArcType, size_t, false>::exec(a, length); ASSERT_LE(length, len); a.i->read(reinterpret_cast<char*>(s), length); DASSERT_FALSE(a.i->fail()); }
void pysgraph_synchronize::load_vertex_partition(size_t partition_id, std::vector<sgraph_vertex_data>& vertices) { DASSERT_LT(partition_id, m_num_partitions); DASSERT_FALSE(m_is_partition_loaded[partition_id]); m_vertex_partitions[partition_id] = std::move(vertices); m_is_partition_loaded[partition_id] = true; DASSERT_TRUE(is_loaded(partition_id)); }
std::shared_ptr<unity_sgraph_base> unity_sgraph::lambda_triple_apply(const std::string& lambda_str, const std::vector<std::string>& mutated_fields) { log_func_entry(); if (mutated_fields.empty()) { log_and_throw("mutated_fields cannot be empty"); } std::shared_ptr<sgraph> g = std::make_shared<sgraph>((*m_graph)()); std::vector<std::string> mutated_vertex_fields, mutated_edge_fields; const auto& all_vertex_fields = g->get_vertex_fields(); const auto& all_edge_fields = g->get_edge_fields(); std::set<std::string> all_vertex_field_set(all_vertex_fields.begin(), all_vertex_fields.end()); std::set<std::string> all_edge_field_set(all_edge_fields.begin(), all_edge_fields.end()); for (auto& f : mutated_fields) { if (f == sgraph::VID_COLUMN_NAME || f == sgraph::SRC_COLUMN_NAME || f == sgraph::DST_COLUMN_NAME) { log_and_throw("mutated fields cannot contain id field: " + f); } if (!all_vertex_field_set.count(f) && !all_edge_field_set.count(f)) { log_and_throw("mutated field \"" + f + "\" cannot be found in graph"); } if (all_vertex_field_set.count(f)) mutated_vertex_fields.push_back(f); if (all_edge_field_set.count(f)) mutated_edge_fields.push_back(f); } DASSERT_FALSE(mutated_fields.empty()); sgraph_compute::triple_apply(*g, lambda_str, mutated_vertex_fields, mutated_edge_fields); std::shared_ptr<unity_sgraph> ret(new unity_sgraph(g)); return ret; }
static void exec(OutArcType& oarc, char* const& s) { // save the length // ++ for the \0 size_t length = strlen(s); length++; oarc << length; oarc.write(reinterpret_cast<const char*>(s), length); DASSERT_FALSE(oarc.fail()); }
static void exec(ArcType &a, const char* const &s) { // save the length // ++ for the \0 size_t length = strlen(s); length++; serialize_impl<ArcType, size_t, false>::exec(a, length); a.o->write(reinterpret_cast<const char*>(s), length); DASSERT_FALSE(a.o->fail()); }
static void exec(InArcType& iarc, char*& s) { // Save the length and check if lengths match size_t length; iarc >> length; s = new char[length]; //operator>> the rest iarc.read(reinterpret_cast<char*>(s), length); DASSERT_FALSE(iarc.fail()); }
static void exec(ArcType &a, std::string &s) { //read the length size_t length; deserialize_impl<ArcType, size_t, false>::exec(a, length); //resize the string and read the characters s.resize(length); a.i->read(const_cast<char*>(s.c_str()), (std::streamsize)length); DASSERT_FALSE(a.i->fail()); }
static void exec(ArcType& a, char*& s) { // Save the length and check if lengths match size_t length; deserialize_impl<ArcType, size_t, false>::exec(a, length); s = new char[length]; //operator>> the rest a.i->read(reinterpret_cast<char*>(s), length); DASSERT_FALSE(a.i->fail()); }
static void exec(OutArcType& oarc, const char s[len] ) { size_t length = len; oarc << length; oarc.write(reinterpret_cast<const char*>(s), length); DASSERT_FALSE(oarc.fail()); }
inline void load_vertex_partition(size_t partition_id, std::vector<sgraph_vertex_data>& vertices) { DASSERT_LT(partition_id, m_num_partitions); DASSERT_FALSE(m_is_partition_loaded[partition_id]); m_vertex_partitions[partition_id] = &vertices; m_is_partition_loaded[partition_id] = true; }
static void exec(ArcType& a, const char s[len] ) { size_t length = len; serialize_impl<ArcType, size_t, false>::exec(a, length); a.o->write(reinterpret_cast<const char*>(s), length); DASSERT_FALSE(a.o->fail()); }
static void exec(ArcType &a, const std::string& s) { size_t length = s.length(); serialize_impl<ArcType, size_t, false>::exec(a, length); a.o->write(reinterpret_cast<const char*>(s.c_str()), (std::streamsize)length); DASSERT_FALSE(a.o->fail()); }
std::shared_ptr<unity_sgraph_base> unity_sgraph::lambda_triple_apply_native(const lambda_triple_apply_fn& lambda, const std::vector<std::string>& mutated_fields) { log_func_entry(); if (mutated_fields.empty()) { log_and_throw("mutated_fields cannot be empty"); } std::shared_ptr<sgraph> g = std::make_shared<sgraph>((*m_graph)()); std::vector<std::string> mutated_vertex_fields, mutated_edge_fields; const auto& all_vertex_fields = g->get_vertex_fields(); const auto& all_edge_fields = g->get_edge_fields(); std::vector<size_t> mutated_vertex_field_ids; std::vector<size_t> mutated_edge_field_ids; std::set<std::string> all_vertex_field_set(all_vertex_fields.begin(), all_vertex_fields.end()); std::set<std::string> all_edge_field_set(all_edge_fields.begin(), all_edge_fields.end()); for (auto& f : mutated_fields) { if (f == sgraph::VID_COLUMN_NAME || f == sgraph::SRC_COLUMN_NAME || f == sgraph::DST_COLUMN_NAME) { log_and_throw("mutated fields cannot contain id field: " + f); } if (!all_vertex_field_set.count(f) && !all_edge_field_set.count(f)) { log_and_throw("mutated field \"" + f + "\" cannot be found in graph"); } if (all_vertex_field_set.count(f)) { mutated_vertex_fields.push_back(f); mutated_vertex_field_ids.push_back(std::find(all_vertex_fields.begin(), all_vertex_fields.end(), f) - all_vertex_fields.begin()); } if (all_edge_field_set.count(f)) { mutated_edge_fields.push_back(f); mutated_edge_field_ids.push_back(std::find(all_edge_fields.begin(), all_edge_fields.end(), f) - all_edge_fields.begin()); } } DASSERT_FALSE(mutated_fields.empty()); // get all the field names in flexible_type form since we can do CoW on it std::vector<flexible_type> flex_vertex_fields(all_vertex_fields.begin(), all_vertex_fields.end()); std::vector<flexible_type> flex_edge_fields(all_edge_fields.begin(), all_edge_fields.end()); auto new_lambda = [=](sgraph_compute::edge_scope& e)->void { e.lock_vertices(); edge_triple triple; for (size_t i = 0;i < e.source().size(); ++i) { triple.source[flex_vertex_fields[i]] = e.source()[i]; triple.target[flex_vertex_fields[i]] = e.target()[i]; } for (size_t i = 0;i < e.edge().size(); ++i) { triple.edge[flex_edge_fields[i]] = e.edge()[i]; } lambda(triple); // update just the potentially changed fields for (size_t vtxfield : mutated_vertex_field_ids) { e.source()[vtxfield] = std::move(triple.source[flex_vertex_fields[vtxfield]]); e.target()[vtxfield] = std::move(triple.target[flex_vertex_fields[vtxfield]]); } for (size_t edgefield : mutated_edge_field_ids) { e.edge()[edgefield] = std::move(triple.edge[flex_edge_fields[edgefield]]); } e.unlock_vertices(); }; sgraph_compute::triple_apply(*g, new_lambda, mutated_vertex_fields, mutated_edge_fields); std::shared_ptr<unity_sgraph> ret(new unity_sgraph(g)); return ret; }