예제 #1
0
std::shared_ptr<unity_sgraph_base>
unity_sgraph::lambda_triple_apply_native(const function_closure_info& toolkit_fn_name,
                                  const std::vector<std::string>& mutated_fields) {
  auto native_execute_function = 
                  get_unity_global_singleton()
                  ->get_toolkit_function_registry()
                  ->get_native_function(toolkit_fn_name);

  log_func_entry();

  auto lambda = [=](edge_triple& args)->void {
    std::vector<variant_type> var(3);
    var[0] = to_variant(_map_to_flex_dict(std::move(args.source)));
    var[1] = to_variant(_map_to_flex_dict(std::move(args.edge)));
    var[2] = to_variant(_map_to_flex_dict(std::move(args.target)));

    variant_type ret = native_execute_function(var);
    var = variant_get_value<std::vector<variant_type>>(ret);

    args.source = _map_from_flex_dict(variant_get_value<flexible_type>(var[0]));
    args.edge = _map_from_flex_dict(variant_get_value<flexible_type>(var[1]));
    args.target = _map_from_flex_dict(variant_get_value<flexible_type>(var[2]));
  };

  return lambda_triple_apply_native(lambda, mutated_fields);
}
예제 #2
0
std::shared_ptr<unity_sgraph_base>
unity_sgraph::lambda_triple_apply(const std::string& lambda_str,
                                  const std::vector<std::string>& mutated_fields) {
  log_func_entry();
  if (mutated_fields.empty()) {
    log_and_throw("mutated_fields cannot be empty");
  }
  std::shared_ptr<sgraph> g = std::make_shared<sgraph>((*m_graph)());
  std::vector<std::string> mutated_vertex_fields, mutated_edge_fields;
  const auto& all_vertex_fields = g->get_vertex_fields();
  const auto& all_edge_fields = g->get_edge_fields();
  std::set<std::string> all_vertex_field_set(all_vertex_fields.begin(), all_vertex_fields.end());
  std::set<std::string> all_edge_field_set(all_edge_fields.begin(), all_edge_fields.end());
  for (auto& f : mutated_fields) {
    if (f == sgraph::VID_COLUMN_NAME || f == sgraph::SRC_COLUMN_NAME || f == sgraph::DST_COLUMN_NAME) {
      log_and_throw("mutated fields cannot contain id field: " + f);
    }
    if (!all_vertex_field_set.count(f) && !all_edge_field_set.count(f)) {
      log_and_throw("mutated field \"" + f + "\" cannot be found in graph");
    }
    if (all_vertex_field_set.count(f))
      mutated_vertex_fields.push_back(f);
    if (all_edge_field_set.count(f))
      mutated_edge_fields.push_back(f);
  }
  DASSERT_FALSE(mutated_fields.empty());
  sgraph_compute::triple_apply(*g, lambda_str, mutated_vertex_fields, mutated_edge_fields);
  std::shared_ptr<unity_sgraph> ret(new unity_sgraph(g));
  return ret;
}
예제 #3
0
bool unity_sgraph::load_graph(std::string target_dir) {
  log_func_entry();
  try {
    dir_archive dir;
    dir.open_directory_for_read(target_dir);
    std::string contents;
    if (dir.get_metadata("contents", contents) == false ||
        contents != "graph") {
      log_and_throw(std::string("Archive does not contain a graph."));
    }
    iarchive iarc(dir);
    load(iarc);
    dir.close();
  } catch (std::ios_base::failure& e) {
    std::string message = "Unable to load graph from " + sanitize_url(target_dir)
      + ": " + e.what();
    log_and_throw_io_failure(message);
  } catch (std::string& e) {
    std::string message = "Unable to load graph from " + sanitize_url(target_dir)
      + ": " + e;
    log_and_throw(message);
  } catch (...) {
    std::string message = "Unable to load graph from " + sanitize_url(target_dir)
      + ": Unknown Error.";
    log_and_throw(message);
  }
  return true;
}
예제 #4
0
void comm_server::start() {
  log_func_entry();
  if (!started) {
    pollset->start_poll_thread();
    started = true;
  }
}
예제 #5
0
bool unity_sgraph::save_graph(std::string target, std::string format) {
  log_func_entry();
  try {
    if (format == "binary") {
      dir_archive dir;
      dir.open_directory_for_write(target);
      dir.set_metadata("contents", "graph");
      oarchive oarc(dir);
      if (dir.get_output_stream()->fail()) {
        log_and_throw_io_failure("Fail to write");
      }
      save(oarc);
      dir.close();
    } else if (format == "json") {
      save_sgraph_to_json(get_graph(), target);
    } else if (format == "csv") {
      save_sgraph_to_csv(get_graph(), target);
    } else {
      log_and_throw("Unable to save to format : " + format);
    }
  } catch (std::ios_base::failure& e) {
    std::string message =
        "Unable to save graph to " + sanitize_url(target) + ": " + e.what();
    log_and_throw_io_failure(message);
  } catch (std::string& e) {
    std::string message =
        "Unable to save graph to " + sanitize_url(target) + ": " + e;
    log_and_throw(message);
  } catch (...) {
    std::string message =
        "Unable to save graph to " + sanitize_url(target) + ": Unknown Error.";
    log_and_throw(message);
  }
  return true;
}
예제 #6
0
options_map_t unity_sgraph::summary() {
  log_func_entry();
  options_map_t ret;
  auto& g = (*m_graph)();
  ret["num_vertices"] = g.num_vertices();
  ret["num_edges"] = g.num_edges();
  return ret;
}
예제 #7
0
void comm_server::start() {
  log_func_entry();
  if (!started) {
    control_socket->start_polling();
    object_socket->start_polling();
    started = true;
  }
}
예제 #8
0
  void _insert_sframe(std::shared_ptr<unity_sframe_base> sf, const std::string &table_name, bool append_if_exists) {
    log_func_entry();
    logstream(LOG_INFO) << "append: " << append_if_exists << std::endl;
    auto sf_derived = std::dynamic_pointer_cast<unity_sframe>(sf);

    auto real_sf = sf_derived->get_underlying_sframe();
    m_db_connector.insert_data(*real_sf, table_name, append_if_exists);
  }
예제 #9
0
 flexible_type unity_global::eval_lambda(const std::string& string, const flexible_type& arg) {
   log_func_entry();
   lambda::pylambda_master& evaluator = lambda::pylambda_master::get_instance();
   auto lambda_hash = evaluator.make_lambda(string);
   std::vector<flexible_type> return_val =  evaluator.bulk_eval(lambda_hash, {arg}, false, 0);
   evaluator.release_lambda(lambda_hash);
   return return_val[0];
 }
예제 #10
0
void comm_server::stop() {
  log_func_entry();
  if (started) {
    started = false;
  }

  // Attempt to cancel any currently running command
  get_srv_running_command().store((unsigned long long)uint64_t(-1));  
}
예제 #11
0
 std::shared_ptr<unity_sgraph_base> unity_global::load_graph(std::string fname) {
   log_func_entry();
   std::shared_ptr<unity_sgraph> g(new unity_sgraph());
   try {
     g->load_graph(fname);
   } catch (...) {
     throw;
   }
   return g;
 }
예제 #12
0
 flexible_type unity_global::eval_dict_lambda(const std::string& pylambda_string,
                           const std::vector<std::string>& keys,
                           const std::vector<flexible_type>& values) {
   log_func_entry();
   lambda::pylambda_master& evaluator = lambda::pylambda_master::get_instance();
   auto lambda_hash = evaluator.make_lambda(pylambda_string);
   std::vector<flexible_type> return_val =  evaluator.bulk_eval(lambda_hash, keys, {values}, false, 0);
   evaluator.release_lambda(lambda_hash);
   return return_val[0];
 }
예제 #13
0
std::shared_ptr<unity_sgraph_base> 
unity_sgraph::rename_edge_fields(const std::vector<std::string>& oldnames,
                                 const std::vector<std::string>& newnames) {
  log_func_entry();
  std::lock_guard<mutex> lock(dag_access_mutex);
  sgraph* new_graph = new sgraph((*m_graph)());
  new_graph->rename_edge_fields(oldnames, newnames);
  std::shared_ptr<unity_sgraph> g(new unity_sgraph(*this));
  g->m_graph.reset(unity_sgraph::get_dag()->add_value(new_graph));
  return g;
}
예제 #14
0
std::shared_ptr<unity_sgraph_base> unity_sgraph::select_vertex_fields(
    const std::vector<std::string>& fields, size_t group) {
  log_func_entry();
  std::lock_guard<mutex> lock(dag_access_mutex);
  std::vector<std::string> fields_with_id({sgraph::VID_COLUMN_NAME});
  fields_with_id.insert(fields_with_id.end(), fields.begin(), fields.end());
  std::shared_ptr<unity_sgraph> g(new unity_sgraph(*this));
  g->m_graph.reset(unity_sgraph::get_dag()->add_operation(
        new select_vertex_fields_op(fields_with_id, group), {m_graph.get()}));

  return g;
}
bool toolkit_function_registry::register_toolkit_function(
    toolkit_function_specification spec,
    std::string prefix) {
  log_func_entry();
  // if there is something in the registry with this name, fail
  if (prefix.length() > 0) {
    spec.name = prefix + "." + spec.name;
  }
  if (registry.count(spec.name)) return false;
  registry[spec.name] = spec;
  return true;
}
예제 #16
0
std::shared_ptr<unity_sgraph_base> 
unity_sgraph::swap_vertex_fields(const std::string& field1, const std::string& field2) {
  log_func_entry();
  std::lock_guard<mutex> lock(dag_access_mutex);
  if (field1 == sgraph::VID_COLUMN_NAME || field2 == sgraph::VID_COLUMN_NAME) {
    log_and_throw("Cannot swap id fields " + field1 + " , " + field2);
  }
  sgraph* new_graph = new sgraph((*m_graph)());
  new_graph->swap_vertex_fields(field1, field2);
  std::shared_ptr<unity_sgraph> g(new unity_sgraph(*this));
  g->m_graph.reset(unity_sgraph::get_dag()->add_value(new_graph));
  return g;
}
bool toolkit_function_registry::unregister_toolkit_function(std::string name) {
  log_func_entry();
  // look for the name
  auto iter = registry.find(name);
  if (iter != registry.end()) {
    // found! erase
    registry.erase(iter);
    return true;
  } else {
    // not found! fail
    return false;
  }
}
bool toolkit_class_registry::register_toolkit_class(
    const std::string& class_name,
    std::function<model_base*()> constructor,
    std::map<std::string, flexible_type> description) {
  log_func_entry();
  if (registry.count(class_name)) {
    return false;
  } else {
    registry[class_name] = constructor;
    description["name"] = class_name;
    descriptions[class_name] = description;
    return true;
  }
}
예제 #19
0
  std::vector<flexible_type> unity_global::parallel_eval_lambda(const std::string& string, const std::vector<flexible_type>& arg) {
    log_func_entry();
    lambda::pylambda_master& evaluator = lambda::pylambda_master::get_instance();
    auto lambda_hash = evaluator.make_lambda(string);

    std::vector<flexible_type> ret(arg.size());
    ret.reserve(arg.size());
    parallel_for (0, arg.size(), [&](size_t i) {
      ret[i] = evaluator.bulk_eval(lambda_hash, {arg[i]}, false, 0)[0];
    });

    evaluator.release_lambda(lambda_hash);
    return ret;
  }
예제 #20
0
std::shared_ptr<unity_sgraph_base> 
unity_sgraph::delete_vertex_field(const std::string field, size_t group) {
  log_func_entry();
  std::lock_guard<mutex> lock(dag_access_mutex);
  if (field == sgraph::VID_COLUMN_NAME) {
    log_and_throw("Cannot delete required field " + field);
  }

  std::shared_ptr<unity_sgraph> g(new unity_sgraph(*this));
  g->m_graph.reset(unity_sgraph::get_dag()->add_operation(
        new delete_vertex_field_op(field, group),
        {m_graph.get()}));
  return g;
}
예제 #21
0
std::shared_ptr<unity_sgraph_base> 
unity_sgraph::select_edge_fields(const std::vector<std::string>& fields,
                                 size_t groupa, size_t groupb) {
  log_func_entry();
  std::lock_guard<mutex> lock(dag_access_mutex);
  std::vector<std::string> fields_with_id({sgraph::SRC_COLUMN_NAME, sgraph::DST_COLUMN_NAME});
  fields_with_id.insert(fields_with_id.end(), fields.begin(), fields.end());
  std::shared_ptr<unity_sgraph> g(new unity_sgraph(*this));
  g->m_graph.reset(unity_sgraph::get_dag()->add_operation(
        new select_edge_fields_op(fields_with_id, groupa, groupb),
        {m_graph.get()}));
  std::map<std::string, flex_type_enum> new_field_type_map;
  return g;
}
예제 #22
0
std::shared_ptr<unity_sgraph_base> unity_sgraph::add_vertex_field(
    std::shared_ptr<unity_sarray_base> in_column_data, std::string field) {
  log_func_entry();
  std::lock_guard<mutex> lock(dag_access_mutex);
  if (field == sgraph::VID_COLUMN_NAME) {
    log_and_throw("Cannot add id field " + field);
  }
  sgraph* new_graph = new sgraph((*m_graph)());
  std::shared_ptr<unity_sarray> column_data = 
      std::static_pointer_cast<unity_sarray>(in_column_data);
  new_graph->add_vertex_field(column_data->get_underlying_sarray(), field);
  std::shared_ptr<unity_sgraph> g(new unity_sgraph(*this));
  g->m_graph.reset(unity_sgraph::get_dag()->add_value(new_graph));
  return g;
}
예제 #23
0
comm_server::~comm_server() {
  log_func_entry();
  stop();
  object_socket->close();
  control_socket->close();
  publishsock->close();
  registered_objects.clear();
  delete object_socket;
  delete control_socket;
  delete publishsock;
  for (auto& dispatcher: dispatch_map) {
    delete dispatcher.second;
  }
  registered_objects.clear();
}
예제 #24
0
void unity_sgraph::load(iarchive& iarc) {
  log_func_entry();
  std::lock_guard<mutex> lock(dag_access_mutex);
  char buf[256] = "";
  size_t magic_header_size = strlen(GRAPH_MAGIC_HEADER);
  iarc.read(buf, magic_header_size);
  if (strcmp(buf, GRAPH_MAGIC_HEADER)) {
    log_and_throw(std::string("Invalid graph file."));
  }
  size_t num_partitions = 0;
  iarc >> num_partitions;
  sgraph* g = new sgraph(num_partitions);
  iarc >> *g;
  m_graph.reset(unity_sgraph::get_dag()->add_value(g));
}
예제 #25
0
std::shared_ptr<unity_sgraph_base> unity_sgraph::add_vertices(
    std::shared_ptr<unity_sframe_base> vertices,
    const std::string& id_field_name,
    size_t group) {
  log_func_entry();
  std::lock_guard<mutex> lock(dag_access_mutex);
  std::shared_ptr<unity_sframe> unity_sf = std::static_pointer_cast<unity_sframe>(vertices);
  ASSERT_TRUE(unity_sf != nullptr);
  std::shared_ptr<sframe> sf = unity_sf->get_underlying_sframe();

  fast_validate_add_vertices(*sf, id_field_name, group);
  std::shared_ptr<unity_sgraph> g(new unity_sgraph(*this));
  g->m_graph.reset(unity_sgraph::get_dag()->
      add_operation(new add_vertices_op<sframe>(sf, id_field_name, group), {m_graph.get()}));
  return g;
}
bool toolkit_function_registry::register_toolkit_function(
    std::vector<toolkit_function_specification> specvec,
    std::string prefix) {
  log_func_entry();
  // if there is something in the registry with this name, fail
  for (auto& spec: specvec) {
    if (prefix.length() > 0) {
      spec.name = prefix + "." + spec.name;
    }
    if (registry.count(spec.name)) return false;
  }
  // now register
  for (const auto& spec: specvec) {
    registry[spec.name] = spec;
  }
  return true;
}
예제 #27
0
comm_server::~comm_server() {
  log_func_entry();
  stop();
  object_socket->close();
  control_socket->close();
  publishsock->close();
  registered_objects.clear();
  delete object_socket;
  delete control_socket;
  delete publishsock;
  delete pollset;
  for (auto& dispatcher: dispatch_map) {
    delete dispatcher.second;
  }
  if (keyval != NULL) delete keyval;
  registered_objects.clear();
  zmq_ctx_destroy(zmq_ctx);
}
예제 #28
0
std::shared_ptr<unity_sgraph_base> 
unity_sgraph::copy_edge_field(const std::string field,
                              const std::string newfield,
                              size_t groupa, size_t groupb) {
  log_func_entry();
  std::lock_guard<mutex> lock(dag_access_mutex);
  if (field == newfield) {
    log_and_throw("Cannot copy to the same field");
  }
  if (newfield == sgraph::SRC_COLUMN_NAME ||
      newfield == sgraph::DST_COLUMN_NAME) {
    log_and_throw("Cannot copy to required field " + newfield);
  }
  std::shared_ptr<unity_sgraph> g(new unity_sgraph(*this));
  g->m_graph.reset(unity_sgraph::get_dag()->add_operation(
        new copy_edge_field_op(field, newfield, groupa, groupb),
        {m_graph.get()}));
  return g;
}
예제 #29
0
  std::shared_ptr<sframe> sort(
    std::shared_ptr<lazy_sframe> sframe_ptr,
    const std::vector<std::string>& sort_column_names,
    const std::vector<bool>& sort_orders) {

    log_func_entry();

    // get sort column indexes from column names and also check column types
    std::vector<size_t> sort_column_indexes(sort_column_names.size());
    std::vector<flex_type_enum> supported_types =
        {flex_type_enum::STRING, flex_type_enum::INTEGER, flex_type_enum::FLOAT,flex_type_enum::DATETIME};
    std::set<flex_type_enum> supported_type_set(supported_types.begin(), supported_types.end());

    for(size_t i = 0; i < sort_column_names.size(); i++) {
      sort_column_indexes[i] = sframe_ptr->column_index(sort_column_names[i]);
      auto col_type = sframe_ptr->column_type(sort_column_indexes[i]);

      if (supported_type_set.count(col_type) == 0) {
        log_and_throw("Only column with type 'int', 'float', 'string', and 'datetime' can be sorted. Column '" +
            sort_column_names[i] + "'' is type: " + flex_type_enum_to_name(col_type));
      }
    }

    // Estimate the size of the sframe so that we could decide number of
    // chunks.  To account for strings, we estimate each cell is 64 bytes.
    // I'd love to estimate better.
    size_t estimated_sframe_size = sframe_num_cells(sframe_ptr) * 64.0;
    size_t num_partitions = std::ceil((1.0 * estimated_sframe_size) / sframe_config::SFRAME_SORT_BUFFER_SIZE);

    // Make partitions small enough for each thread to (theoretically) sort at once
    num_partitions = num_partitions * thread::cpu_count();

    // If we have more partitions than this, we could run into open file
    // descriptor limits
    num_partitions = std::min<size_t>(num_partitions, SFRAME_SORT_MAX_SEGMENTS);
    DASSERT_TRUE(num_partitions > 0);

    // Shortcut -- if only one partition, do a in memory sort and we are done
    if (num_partitions <= thread::cpu_count()) {
      logstream(LOG_INFO) << "Sorting SFrame in memory" << std::endl;
      return sframe_sort_impl::sort_sframe_in_memory(sframe_ptr, sort_column_indexes, sort_orders);
    }

    // This is a collection of partition keys sorted in the required order.
    // Each key is a flex_list value that contains the spliting value for
    // each sort column. Together they defines the "cut line" for all rows in
    // the SFrame.
    std::vector<flexible_type> partition_keys;


    // Do a quantile sketch on the sort columns to figure out the "splitting" points
    // for the SFrame
    timer ti;
    bool all_sorted = sframe_sort_impl::get_partition_keys(
      sframe_ptr->select_columns(sort_column_names),
      sort_orders, num_partitions, // in parameters
      partition_keys);  // out parameters
    logstream(LOG_INFO) << "Pivot estimation step: " << ti.current_time() << std::endl;

    // In rare case all values in the SFrame are the same, so no need to sort
    if (all_sorted) return sframe_ptr->get_sframe_ptr();

    // scatter partition the sframe into multiple chunks, chunks are relatively
    // sorted, but each chunk is not sorted. The sorting of each chunk is delayed
    // until it is consumed. Each chunk is stored as one segment for a sarray.
    // The chunk stores a serailized version of key and value
    std::vector<size_t> partition_sizes;

    // In the case where all sort keys in a given partition are the same, then
    // there is no need to sort the partition. This information is derived from
    // scattering
    std::vector<bool> partition_sorted(num_partitions, true);
    ti.start();
    auto partition_array = sframe_sort_impl::scatter_partition(
      sframe_ptr, sort_column_indexes, sort_orders, partition_keys, partition_sizes, partition_sorted);
    logstream(LOG_INFO) << "Scatter step: " << ti.current_time() << std::endl;

    // return a lazy sframe_ptr that would emit the sorted data lazily
    auto lazy_sort = std::make_shared<le_sort>(
      partition_array, partition_sorted, partition_sizes, sort_column_indexes,
      sort_orders, sframe_ptr->column_names(), sframe_ptr->column_types());

    return lazy_sort->eager_sort();
  }
예제 #30
0
 unity_global::unity_global(toolkit_function_registry* _toolkit_functions,
                            toolkit_class_registry* _classes,
                            cppipc::comm_server* server)
     :toolkit_functions(_toolkit_functions), classes(_classes), server(server) {
   log_func_entry();
 }