gl_sarray gl_sarray_writer_impl::close() { m_output_iterators.clear(); m_out_sarray->close(); auto usarray = std::make_shared<unity_sarray>(); usarray->construct_from_sarray(m_out_sarray); return usarray; }
std::shared_ptr<unity_sframe_base> unity_sgraph::get_edges_lazy(size_t groupa, size_t groupb) { std::shared_ptr<unity_sframe> ret(new unity_sframe()); auto& g = (*m_graph)(); if (g.num_edges(groupa, groupb) == 0) { sframe sf; sf.open_for_write(get_edge_fields(), get_edge_field_types()); sf.close(); ret->construct_from_sframe(sf); return ret; } size_t num_partitions = g.get_num_partitions(); std::vector<sframe>& egroup = g.edge_group(groupa, groupb); std::vector<sframe>& vdata_groupa = g.vertex_group(groupa); std::vector<sframe>& vdata_groupb = g.vertex_group(groupb); std::vector<std::string> edge_column_names = g.get_edge_fields(); std::vector<flex_type_enum> edge_column_types = g.get_edge_field_types(); flex_type_enum id_type = g.vertex_id_type(); std::map<std::pair<size_t, size_t>, lazy_id_translation_functor> id_column_translators; std::vector<std::shared_ptr<unity_sframe_base>> edge_partition_sframes; for (size_t i = 0; i < num_partitions; ++i) { for (size_t j = 0; j < num_partitions; ++j) { auto& eframe = egroup[i * num_partitions + j]; // make the id transform functor for source and target id columns if (!id_column_translators.count({groupa, i})) { std::shared_ptr<sarray<flexible_type>> source_id_column = vdata_groupa[i].select_column(sgraph::VID_COLUMN_NAME); std::vector<flexible_type> id_vec; source_id_column->get_reader()->read_rows(0, source_id_column->size(), id_vec); std::shared_ptr<const std::vector<flexible_type>> id_vec_ptr = std::make_shared<const std::vector<flexible_type>>(id_vec); id_column_translators[{groupa, i}] = lazy_id_translation_functor(id_vec_ptr); } if (!id_column_translators.count({groupb, j})) { std::shared_ptr<sarray<flexible_type>> target_id_column = vdata_groupb[j].select_column(sgraph::VID_COLUMN_NAME); std::vector<flexible_type> id_vec; target_id_column->get_reader()->read_rows(0, target_id_column->size(), id_vec); std::shared_ptr<const std::vector<flexible_type>> id_vec_ptr = std::make_shared<const std::vector<flexible_type>>(id_vec); id_column_translators[{groupb, j}] = lazy_id_translation_functor(id_vec_ptr); } // construct the lazy source and target sarrays. std::shared_ptr<unity_sarray> source_array, target_array; source_array = std::make_shared<unity_sarray>(); source_array->construct_from_sarray(eframe.select_column(sgraph::SRC_COLUMN_NAME)); target_array = std::make_shared<unity_sarray>(); target_array->construct_from_sarray(eframe.select_column(sgraph::DST_COLUMN_NAME)); auto source_translator = id_column_translators[{groupa, i}]; auto target_translator = id_column_translators[{groupb, j}]; auto lazy_source_array = source_array->transform_lambda( [=](const flexible_type& local_vid) { return source_translator(local_vid); }, id_type, false, 0); auto lazy_target_array = target_array->transform_lambda( [=](const flexible_type& local_vid) { return target_translator(local_vid); }, id_type, false, 0); // Construct new sframe for the edge segment, replacing the raw ids with new ids. auto new_eframe = std::make_shared<unity_sframe>(); size_t src_column_idx = eframe.column_index(sgraph::SRC_COLUMN_NAME); size_t dst_column_idx = eframe.column_index(sgraph::DST_COLUMN_NAME); for (size_t i = 0; i < eframe.num_columns(); ++i) { if (i == src_column_idx) { new_eframe->add_column(lazy_source_array, sgraph::SRC_COLUMN_NAME); } else if (i == dst_column_idx) { new_eframe->add_column(lazy_target_array, sgraph::DST_COLUMN_NAME); } else { auto column_data = std::make_shared<unity_sarray>(); column_data->construct_from_sarray(eframe.select_column(i)); auto column_name = eframe.column_names()[i]; new_eframe->add_column(column_data, column_name); } } edge_partition_sframes.push_back(std::static_pointer_cast<unity_sframe_base>(new_eframe)); } } // now we want to append N = num_partitions * num_partiions lazy sframes into one final sframe and // with append tree depth log(N) auto lazy_append_edges = binary_sframe_append(edge_partition_sframes, 0, edge_partition_sframes.size()); return lazy_append_edges; }