示例#1
0
void unity_server::start(const unity_server_initializer& server_initializer) {

  // log files
  if (!options.log_file.empty()) {
    if (options.log_rotation_interval) {
      graphlab::begin_log_rotation(options.log_file,
                                   options.log_rotation_interval,
                                   options.log_rotation_truncate);
    } else {
      global_logger().set_log_file(options.log_file);
    }
  }

  graphlab::configure_global_environment(options.root_path);
  graphlab::global_startup::get_instance().perform_startup();

  // server address
  options.server_address = parse_server_address(options.server_address);

  // construct the server
  server = new cppipc::comm_server(std::vector<std::string>(), "", 
                                   options.server_address,
                                   options.control_address,
                                   options.publish_address,
                                   options.secret_key);

  set_log_progress(true);

  // initialize built-in data structures, toolkits and models, defined in unity_server_init.cpp
  server_initializer.init_toolkits(*toolkit_functions);
  server_initializer.init_models(*toolkit_classes);
  create_unity_global_singleton(toolkit_functions,
                                toolkit_classes,
                                server);
  auto unity_global_ptr = get_unity_global_singleton();
  server_initializer.register_base_classes(server, unity_global_ptr);

  // initialize extension modules and lambda workers
  server_initializer.init_extensions(options.root_path, unity_global_ptr);
  lambda::set_pylambda_worker_binary_from_environment_variables();

  // start the cppipc server
  server->start();
  logstream(LOG_EMPH) << "Unity server listening on: " <<  options.server_address << std::endl;
  logstream(LOG_EMPH) << "Total System Memory Detected: " << total_mem() << std::endl;
}
示例#2
0
void configure_global_environment(std::string argv0) {
  // file limit upgrade has to be the very first thing that happens. The 
  // reason is that on Mac, once a file descriptor has been used (even STDOUT),
  // the file handle limit increase will appear to work, but will in fact fail
  // silently.
  upgrade_file_handle_limit(4096);
  int file_handle_limit = get_file_handle_limit();
  if (file_handle_limit < 4096) {
    logstream(LOG_WARNING) 
        << "Unable to raise the file handle limit to 4096. "
        << "Current file handle limit = " << file_handle_limit << ". "
        << "You may be limited to frames with about " << file_handle_limit / 16 
        << " columns" << std::endl;
  }
  // if file handle limit is >= 512,
  //    we take either 3/4 of the limit
  // otherwise
  //    we keep it to 128
  if (file_handle_limit >= 512) {
    size_t practical_limit = file_handle_limit / 4 * 3;
    graphlab::SFRAME_FILE_HANDLE_POOL_SIZE = practical_limit;
  } else {
    graphlab::SFRAME_FILE_HANDLE_POOL_SIZE = 128;
  }

  graphlab::SFRAME_DEFAULT_NUM_SEGMENTS = graphlab::thread::cpu_count();
  graphlab::SFRAME_MAX_BLOCKS_IN_CACHE = 16 * graphlab::thread::cpu_count();
  graphlab::SFRAME_SORT_MAX_SEGMENTS = 
      std::max(graphlab::SFRAME_SORT_MAX_SEGMENTS, graphlab::SFRAME_FILE_HANDLE_POOL_SIZE / 4);
  // configure all memory constants
  // use up at most half of system memory.
  size_t total_system_memory = total_mem();
  total_system_memory /= 2;
  boost::optional<std::string> envval = getenv_str("DISABLE_MEMORY_AUTOTUNE");
  bool disable_memory_autotune = ((bool)envval) && (std::string(*envval) == "1");

  
  // memory limit
  envval = getenv_str("GRAPHLAB_MEMORY_LIMIT_IN_MB");
  if (envval) {
    size_t limit = atoll((*envval).c_str()) * 1024 * 1024; /* MB */
    if (limit == 0) {
      logstream(LOG_WARNING) << "GRAPHLAB_MEMORY_LIMIT_IN_MB environment "
                                "variable cannot be parsed" << std::endl;
    } else {
      total_system_memory = limit;
    }
  }

  if (total_system_memory > 0 && !disable_memory_autotune) {
    // TODO: MANY MANY HEURISTICS 
    // assume we have 1/2 of working memory to do things like sort, join, etc.
    // and the other 1/2 of working memory goes to file caching
    // HUERISTIC 1: Cell size estimate is 64
    // HUERISTIC 2: Row size estimate is Cell size estimate * 5
    //
    // Also, we only allow upgrades on the existing conservative values when
    // duing these estimates to prevent us from having impractically small 
    // values.
    size_t CELL_SIZE_ESTIMATE = 64;
    size_t ROW_SIZE_ESTIMATE = CELL_SIZE_ESTIMATE * 5;
    size_t max_cell_estimate = total_system_memory / 4 / CELL_SIZE_ESTIMATE;
    size_t max_row_estimate = total_system_memory / 4 / ROW_SIZE_ESTIMATE;

    graphlab::SFRAME_GROUPBY_BUFFER_NUM_ROWS = max_row_estimate;
    graphlab::SFRAME_JOIN_BUFFER_NUM_CELLS = max_cell_estimate;
    graphlab::sframe_config::SFRAME_SORT_BUFFER_SIZE = total_system_memory / 4;
    graphlab::fileio::FILEIO_MAXIMUM_CACHE_CAPACITY_PER_FILE = total_system_memory / 2;
    graphlab::fileio::FILEIO_MAXIMUM_CACHE_CAPACITY = total_system_memory / 2;
  }
  graphlab::globals::initialize_globals_from_environment(argv0);


  // force initialize rng
  graphlab::random::get_source();
}