Пример #1
0
gl_sarray gl_sarray_writer_impl::close() {
  m_output_iterators.clear();
  m_out_sarray->close();
  auto usarray = std::make_shared<unity_sarray>();
  usarray->construct_from_sarray(m_out_sarray);
  return usarray;
}
Пример #2
0
std::shared_ptr<unity_sframe_base> unity_sgraph::get_edges_lazy(size_t groupa, size_t groupb) {
  std::shared_ptr<unity_sframe> ret(new unity_sframe());
  auto& g = (*m_graph)();

  if (g.num_edges(groupa, groupb) == 0) {
    sframe sf;
    sf.open_for_write(get_edge_fields(), get_edge_field_types());
    sf.close();
    ret->construct_from_sframe(sf);
    return ret;
  }

  size_t num_partitions = g.get_num_partitions();
  std::vector<sframe>& egroup = g.edge_group(groupa, groupb);
  std::vector<sframe>& vdata_groupa = g.vertex_group(groupa);
  std::vector<sframe>& vdata_groupb = g.vertex_group(groupb);

  std::vector<std::string> edge_column_names = g.get_edge_fields();
  std::vector<flex_type_enum> edge_column_types = g.get_edge_field_types();
  flex_type_enum id_type = g.vertex_id_type();

  std::map<std::pair<size_t, size_t>, lazy_id_translation_functor> id_column_translators;

  std::vector<std::shared_ptr<unity_sframe_base>> edge_partition_sframes;

  for (size_t i = 0; i < num_partitions; ++i) {
    for (size_t j = 0; j < num_partitions; ++j) {

      auto& eframe = egroup[i * num_partitions + j];

      // make the id transform functor for source and target id columns
      if (!id_column_translators.count({groupa, i})) {
        std::shared_ptr<sarray<flexible_type>> source_id_column = vdata_groupa[i].select_column(sgraph::VID_COLUMN_NAME);
        std::vector<flexible_type> id_vec;
        source_id_column->get_reader()->read_rows(0, source_id_column->size(), id_vec);
        std::shared_ptr<const std::vector<flexible_type>> id_vec_ptr = 
          std::make_shared<const std::vector<flexible_type>>(id_vec);
        id_column_translators[{groupa, i}] = lazy_id_translation_functor(id_vec_ptr);
      }
      if (!id_column_translators.count({groupb, j})) {
        std::shared_ptr<sarray<flexible_type>> target_id_column = vdata_groupb[j].select_column(sgraph::VID_COLUMN_NAME);
        std::vector<flexible_type> id_vec;
        target_id_column->get_reader()->read_rows(0, target_id_column->size(), id_vec);
        std::shared_ptr<const std::vector<flexible_type>> id_vec_ptr = 
          std::make_shared<const std::vector<flexible_type>>(id_vec);
        id_column_translators[{groupb, j}] = lazy_id_translation_functor(id_vec_ptr);
      }

      // construct the lazy source and target sarrays.
      std::shared_ptr<unity_sarray> source_array, target_array;
      source_array = std::make_shared<unity_sarray>();
      source_array->construct_from_sarray(eframe.select_column(sgraph::SRC_COLUMN_NAME));
      target_array = std::make_shared<unity_sarray>();
      target_array->construct_from_sarray(eframe.select_column(sgraph::DST_COLUMN_NAME));
      auto source_translator = id_column_translators[{groupa, i}];
      auto target_translator = id_column_translators[{groupb, j}];
      auto lazy_source_array = source_array->transform_lambda(
        [=](const flexible_type& local_vid) { return source_translator(local_vid); },
        id_type, false, 0);
      auto lazy_target_array = target_array->transform_lambda(
        [=](const flexible_type& local_vid) { return target_translator(local_vid); },
        id_type, false, 0);

      // Construct new sframe for the edge segment, replacing the raw ids with new ids.
      auto new_eframe = std::make_shared<unity_sframe>();
      size_t src_column_idx = eframe.column_index(sgraph::SRC_COLUMN_NAME);
      size_t dst_column_idx = eframe.column_index(sgraph::DST_COLUMN_NAME);
      for (size_t i = 0; i < eframe.num_columns(); ++i) {
        if (i == src_column_idx) {
          new_eframe->add_column(lazy_source_array, sgraph::SRC_COLUMN_NAME);
        } else if (i == dst_column_idx) {
          new_eframe->add_column(lazy_target_array, sgraph::DST_COLUMN_NAME);
        } else {
          auto column_data = std::make_shared<unity_sarray>();
          column_data->construct_from_sarray(eframe.select_column(i));
          auto column_name = eframe.column_names()[i];
          new_eframe->add_column(column_data, column_name);
        }
      }
      edge_partition_sframes.push_back(std::static_pointer_cast<unity_sframe_base>(new_eframe));

    }
  }

  // now we want to append N = num_partitions * num_partiions lazy sframes into one final sframe and
  // with append tree depth log(N)
  auto lazy_append_edges = binary_sframe_append(edge_partition_sframes, 0, edge_partition_sframes.size());
  return lazy_append_edges;
}