/** 
 * Initialize the Parallel SFrame iterator.
 * \note This operation is more expensive than the SFrame iterator creation.
 */
parallel_sframe_iterator_initializer::parallel_sframe_iterator_initializer(
    const std::vector<sframe>& data_sources,
    const size_t& _row_start,
    const size_t& _row_end) {

  column_offsets.clear();
  sources.clear(); 
  
  DASSERT_FALSE(data_sources.empty());
    
  size_t current_offset = 0; 

  sf_size = data_sources.front().size(); 
    
  // Get each of the columns we want. 
  for(const sframe& sf : data_sources) {
    column_offsets.push_back(current_offset);
    current_offset += sf.num_columns(); 
      
    for(size_t i = 0; i < sf.num_columns(); ++i) {
      sources.push_back(sf.select_column(i)->get_reader());
      ASSERT_EQ(sf.size(), sf_size);
    }
  }
  // One last one 
  column_offsets.push_back(current_offset);
  set_global_block(_row_start, _row_end);

}
Beispiel #2
0
 static void exec(ArcType& a, char s[len]) { 
   size_t length;
   deserialize_impl<ArcType, size_t, false>::exec(a, length);
   ASSERT_LE(length, len);
   a.i->read(reinterpret_cast<char*>(s), length);
   DASSERT_FALSE(a.i->fail());
 }
void pysgraph_synchronize::load_vertex_partition(size_t partition_id, std::vector<sgraph_vertex_data>& vertices) {
  DASSERT_LT(partition_id, m_num_partitions);
  DASSERT_FALSE(m_is_partition_loaded[partition_id]);
  m_vertex_partitions[partition_id] = std::move(vertices);
  m_is_partition_loaded[partition_id] = true;
  DASSERT_TRUE(is_loaded(partition_id));
}
Beispiel #4
0
std::shared_ptr<unity_sgraph_base>
unity_sgraph::lambda_triple_apply(const std::string& lambda_str,
                                  const std::vector<std::string>& mutated_fields) {
  log_func_entry();
  if (mutated_fields.empty()) {
    log_and_throw("mutated_fields cannot be empty");
  }
  std::shared_ptr<sgraph> g = std::make_shared<sgraph>((*m_graph)());
  std::vector<std::string> mutated_vertex_fields, mutated_edge_fields;
  const auto& all_vertex_fields = g->get_vertex_fields();
  const auto& all_edge_fields = g->get_edge_fields();
  std::set<std::string> all_vertex_field_set(all_vertex_fields.begin(), all_vertex_fields.end());
  std::set<std::string> all_edge_field_set(all_edge_fields.begin(), all_edge_fields.end());
  for (auto& f : mutated_fields) {
    if (f == sgraph::VID_COLUMN_NAME || f == sgraph::SRC_COLUMN_NAME || f == sgraph::DST_COLUMN_NAME) {
      log_and_throw("mutated fields cannot contain id field: " + f);
    }
    if (!all_vertex_field_set.count(f) && !all_edge_field_set.count(f)) {
      log_and_throw("mutated field \"" + f + "\" cannot be found in graph");
    }
    if (all_vertex_field_set.count(f))
      mutated_vertex_fields.push_back(f);
    if (all_edge_field_set.count(f))
      mutated_edge_fields.push_back(f);
  }
  DASSERT_FALSE(mutated_fields.empty());
  sgraph_compute::triple_apply(*g, lambda_str, mutated_vertex_fields, mutated_edge_fields);
  std::shared_ptr<unity_sgraph> ret(new unity_sgraph(g));
  return ret;
}
Beispiel #5
0
 static void exec(OutArcType& oarc, char* const& s) {
   // save the length
   // ++ for the \0
   size_t length = strlen(s); length++;
   oarc << length;
   oarc.write(reinterpret_cast<const char*>(s), length);
   DASSERT_FALSE(oarc.fail());
 }
Beispiel #6
0
 static void exec(ArcType &a, const char* const &s) {
   // save the length
   // ++ for the \0
   size_t length = strlen(s); length++;
   serialize_impl<ArcType, size_t, false>::exec(a, length);
   a.o->write(reinterpret_cast<const char*>(s), length);
   DASSERT_FALSE(a.o->fail());
 }
Beispiel #7
0
 static void exec(InArcType& iarc, char*& s) {
   // Save the length and check if lengths match
   size_t length;
   iarc >> length;
   s = new char[length];
   //operator>> the rest
   iarc.read(reinterpret_cast<char*>(s), length);
   DASSERT_FALSE(iarc.fail());
 }
Beispiel #8
0
 static void exec(ArcType &a, std::string &s) {
   //read the length
   size_t length;
   deserialize_impl<ArcType, size_t, false>::exec(a, length);
   //resize the string and read the characters
   s.resize(length);
   a.i->read(const_cast<char*>(s.c_str()), (std::streamsize)length);
   DASSERT_FALSE(a.i->fail());
 }
Beispiel #9
0
 static void exec(ArcType& a, char*& s) {
   // Save the length and check if lengths match
   size_t length;
   deserialize_impl<ArcType, size_t, false>::exec(a, length);
   s = new char[length];
   //operator>> the rest
   a.i->read(reinterpret_cast<char*>(s), length);
   DASSERT_FALSE(a.i->fail());
 }
Beispiel #10
0
 static void exec(OutArcType& oarc, const char s[len] ) { 
   size_t length = len;
   oarc << length;
   oarc.write(reinterpret_cast<const char*>(s), length);
   DASSERT_FALSE(oarc.fail());
 }
 inline void load_vertex_partition(size_t partition_id, std::vector<sgraph_vertex_data>& vertices) {
   DASSERT_LT(partition_id, m_num_partitions);
   DASSERT_FALSE(m_is_partition_loaded[partition_id]);
   m_vertex_partitions[partition_id] = &vertices;
   m_is_partition_loaded[partition_id] = true;
 }
Beispiel #12
0
 static void exec(ArcType& a, const char s[len] ) { 
   size_t length = len;
   serialize_impl<ArcType, size_t, false>::exec(a, length);
   a.o->write(reinterpret_cast<const char*>(s), length);
   DASSERT_FALSE(a.o->fail());
 }
Beispiel #13
0
 static void exec(ArcType &a, const std::string& s) {
   size_t length = s.length();
   serialize_impl<ArcType, size_t, false>::exec(a, length);
   a.o->write(reinterpret_cast<const char*>(s.c_str()), (std::streamsize)length);
   DASSERT_FALSE(a.o->fail());
 }
Beispiel #14
0
std::shared_ptr<unity_sgraph_base>
unity_sgraph::lambda_triple_apply_native(const lambda_triple_apply_fn& lambda,
                                         const std::vector<std::string>& mutated_fields) {
  log_func_entry();
  if (mutated_fields.empty()) {
    log_and_throw("mutated_fields cannot be empty");
  }
  std::shared_ptr<sgraph> g = std::make_shared<sgraph>((*m_graph)());
  std::vector<std::string> mutated_vertex_fields, mutated_edge_fields;
  const auto& all_vertex_fields = g->get_vertex_fields();
  const auto& all_edge_fields = g->get_edge_fields();
  std::vector<size_t> mutated_vertex_field_ids;
  std::vector<size_t> mutated_edge_field_ids;
  std::set<std::string> all_vertex_field_set(all_vertex_fields.begin(), all_vertex_fields.end());
  std::set<std::string> all_edge_field_set(all_edge_fields.begin(), all_edge_fields.end());
  for (auto& f : mutated_fields) {
    if (f == sgraph::VID_COLUMN_NAME || f == sgraph::SRC_COLUMN_NAME || f == sgraph::DST_COLUMN_NAME) {
      log_and_throw("mutated fields cannot contain id field: " + f);
    }
    if (!all_vertex_field_set.count(f) && !all_edge_field_set.count(f)) {
      log_and_throw("mutated field \"" + f + "\" cannot be found in graph");
    }
    if (all_vertex_field_set.count(f)) {
      mutated_vertex_fields.push_back(f);
      mutated_vertex_field_ids.push_back(std::find(all_vertex_fields.begin(), 
                                                   all_vertex_fields.end(), f) -
                                          all_vertex_fields.begin());
    }
    if (all_edge_field_set.count(f)) {
      mutated_edge_fields.push_back(f);
      mutated_edge_field_ids.push_back(std::find(all_edge_fields.begin(),
                                                 all_edge_fields.end(), f) -
                                       all_edge_fields.begin());
    }
  }
  DASSERT_FALSE(mutated_fields.empty());
  // get all the field names in flexible_type form since we can do CoW on it
  std::vector<flexible_type> flex_vertex_fields(all_vertex_fields.begin(),
                                                all_vertex_fields.end());
  std::vector<flexible_type> flex_edge_fields(all_edge_fields.begin(),
                                              all_edge_fields.end());
  auto new_lambda = 
      [=](sgraph_compute::edge_scope& e)->void {
        e.lock_vertices();
        edge_triple triple;
        for (size_t i = 0;i < e.source().size(); ++i) {
          triple.source[flex_vertex_fields[i]] = e.source()[i];
          triple.target[flex_vertex_fields[i]] = e.target()[i];
        }
        for (size_t i = 0;i < e.edge().size(); ++i) {
          triple.edge[flex_edge_fields[i]] = e.edge()[i];
        }

        lambda(triple);

        // update just the potentially changed fields
        for (size_t vtxfield : mutated_vertex_field_ids) {
          e.source()[vtxfield] = std::move(triple.source[flex_vertex_fields[vtxfield]]);
          e.target()[vtxfield] = std::move(triple.target[flex_vertex_fields[vtxfield]]);
        }
        for (size_t edgefield : mutated_edge_field_ids) {
          e.edge()[edgefield] = std::move(triple.edge[flex_edge_fields[edgefield]]);
        }
        e.unlock_vertices();
      };
  sgraph_compute::triple_apply(*g, new_lambda, mutated_vertex_fields, mutated_edge_fields);
  std::shared_ptr<unity_sgraph> ret(new unity_sgraph(g));
  return ret;
}