Ejemplo n.º 1
0
/* fill opts with default values for all options */
void dr_options_default_(dr_options * opts) {
  * opts = dr_options_default_values;
  if (getenv_bool("DAG_RECORDER",             &opts->on)
      || getenv_bool("DR",                    &opts->on)) {}
  if (getenv_str("DAG_RECORDER_FILE_PREFIX",  &opts->dag_file_prefix)
      || getenv_str("DR_PREFIX",              &opts->dag_file_prefix)) {}
  if (getenv_bool("DAG_RECORDER_DAG_FILE",    &opts->dag_file_yes)
      || getenv_bool("DR_DAG",                &opts->dag_file_yes)) {}
  if (getenv_bool("DAG_RECORDER_STAT_FILE",   &opts->stat_file_yes)
      || getenv_bool("DR_STAT",               &opts->stat_file_yes)) {}
  if (getenv_bool("DAG_RECORDER_GPL_FILE",    &opts->gpl_file_yes)
      || getenv_bool("DR_GPL",                &opts->gpl_file_yes)) {}
  if (getenv_bool("DAG_RECORDER_DOT_FILE",    &opts->dot_file_yes)
      || getenv_bool("DR_DOT",                &opts->dot_file_yes)) {}
  if (getenv_bool("DAG_RECORDER_TEXT_FILE",   &opts->text_file_yes)
      || getenv_bool("DR_TEXT",               &opts->text_file_yes)) {}
  /* NOTE: we do not set sqlite_file via environment variables */
  if (getenv_int("DAG_RECORDER_GPL_SIZE",     &opts->gpl_sz)
      || getenv_int("DR_GPL_SZ",              &opts->gpl_sz)) {}
  if (getenv_str("DAG_RECORDER_TEXT_FILE_SEP",    &opts->text_file_sep)
      || getenv_str("DR_TEXT_SEP",                &opts->text_file_sep)) {}
  if (getenv_byte("DAG_RECORDER_DBG_LEVEL",   &opts->dbg_level)
      || getenv_byte("DR_DBG",                &opts->dbg_level)) {}
  if (getenv_byte("DAG_RECORDER_VERBOSE_LEVEL",  &opts->verbose_level)
      || getenv_byte("DR_VERBOSE",               &opts->verbose_level)) {}
  if (getenv_byte("DAG_RECORDER_CHK_LEVEL",   &opts->chk_level)
      || getenv_byte("DR_CHK",                &opts->chk_level)) {}
  if (getenv_ull("DAG_RECORDER_UNCOLLAPSE_MIN", &opts->uncollapse_min)
      || getenv_ull("DR_UNCOLLAPSE_MIN",        &opts->uncollapse_min)) {}
  if (getenv_ull("DAG_RECORDER_COLLAPSE_MAX", &opts->collapse_max)
      || getenv_ull("DR_COLLAPSE_MAX",        &opts->collapse_max)) {}
  if (getenv_long("DAG_RECORDER_NODE_COUNT",  &opts->node_count_target)
      || getenv_long("DR_NC",                 &opts->node_count_target)) {}
  if (getenv_long("DAG_RECORDER_PRUNE_THRESHOLD",  &opts->prune_threshold)
      || getenv_long("DR_PRUNE",              &opts->prune_threshold)) {}

  if (getenv_long("DAG_RECORDER_COLLAPSE_MAX_COUNT",  &opts->collapse_max_count)
      || getenv_long("DR_COLLAPSE_MAX_COUNT", &opts->collapse_max_count)) {}


  if (getenv_long("DAG_RECORDER_ALLOC_UNIT_MB", &opts->alloc_unit_mb)
      || getenv_long("DR_ALLOC_UNIT_MB",      &opts->alloc_unit_mb)) {}
  if (getenv_long("DAG_RECORDER_PRE_ALLOC_PER_WORKER",   
		  &opts->pre_alloc_per_worker)
      || getenv_long("DR_PRE_ALLOC_PER_WORKER", 
		     &opts->pre_alloc_per_worker)) {}
  if (getenv_long("DAG_RECORDER_PRE_ALLOC",   &opts->pre_alloc)
      || getenv_long("DR_PRE_ALLOC",          &opts->pre_alloc)) {}
}
Ejemplo n.º 2
0
void configure_global_environment(std::string argv0) {
  // file limit upgrade has to be the very first thing that happens. The 
  // reason is that on Mac, once a file descriptor has been used (even STDOUT),
  // the file handle limit increase will appear to work, but will in fact fail
  // silently.
  upgrade_file_handle_limit(4096);
  int file_handle_limit = get_file_handle_limit();
  if (file_handle_limit < 4096) {
    logstream(LOG_WARNING) 
        << "Unable to raise the file handle limit to 4096. "
        << "Current file handle limit = " << file_handle_limit << ". "
        << "You may be limited to frames with about " << file_handle_limit / 16 
        << " columns" << std::endl;
  }
  // if file handle limit is >= 512,
  //    we take either 3/4 of the limit
  // otherwise
  //    we keep it to 128
  if (file_handle_limit >= 512) {
    size_t practical_limit = file_handle_limit / 4 * 3;
    graphlab::SFRAME_FILE_HANDLE_POOL_SIZE = practical_limit;
  } else {
    graphlab::SFRAME_FILE_HANDLE_POOL_SIZE = 128;
  }

  graphlab::SFRAME_DEFAULT_NUM_SEGMENTS = graphlab::thread::cpu_count();
  graphlab::SFRAME_MAX_BLOCKS_IN_CACHE = 16 * graphlab::thread::cpu_count();
  graphlab::SFRAME_SORT_MAX_SEGMENTS = 
      std::max(graphlab::SFRAME_SORT_MAX_SEGMENTS, graphlab::SFRAME_FILE_HANDLE_POOL_SIZE / 4);
  // configure all memory constants
  // use up at most half of system memory.
  size_t total_system_memory = total_mem();
  total_system_memory /= 2;
  boost::optional<std::string> envval = getenv_str("DISABLE_MEMORY_AUTOTUNE");
  bool disable_memory_autotune = ((bool)envval) && (std::string(*envval) == "1");

  
  // memory limit
  envval = getenv_str("GRAPHLAB_MEMORY_LIMIT_IN_MB");
  if (envval) {
    size_t limit = atoll((*envval).c_str()) * 1024 * 1024; /* MB */
    if (limit == 0) {
      logstream(LOG_WARNING) << "GRAPHLAB_MEMORY_LIMIT_IN_MB environment "
                                "variable cannot be parsed" << std::endl;
    } else {
      total_system_memory = limit;
    }
  }

  if (total_system_memory > 0 && !disable_memory_autotune) {
    // TODO: MANY MANY HEURISTICS 
    // assume we have 1/2 of working memory to do things like sort, join, etc.
    // and the other 1/2 of working memory goes to file caching
    // HUERISTIC 1: Cell size estimate is 64
    // HUERISTIC 2: Row size estimate is Cell size estimate * 5
    //
    // Also, we only allow upgrades on the existing conservative values when
    // duing these estimates to prevent us from having impractically small 
    // values.
    size_t CELL_SIZE_ESTIMATE = 64;
    size_t ROW_SIZE_ESTIMATE = CELL_SIZE_ESTIMATE * 5;
    size_t max_cell_estimate = total_system_memory / 4 / CELL_SIZE_ESTIMATE;
    size_t max_row_estimate = total_system_memory / 4 / ROW_SIZE_ESTIMATE;

    graphlab::SFRAME_GROUPBY_BUFFER_NUM_ROWS = max_row_estimate;
    graphlab::SFRAME_JOIN_BUFFER_NUM_CELLS = max_cell_estimate;
    graphlab::sframe_config::SFRAME_SORT_BUFFER_SIZE = total_system_memory / 4;
    graphlab::fileio::FILEIO_MAXIMUM_CACHE_CAPACITY_PER_FILE = total_system_memory / 2;
    graphlab::fileio::FILEIO_MAXIMUM_CACHE_CAPACITY = total_system_memory / 2;
  }
  graphlab::globals::initialize_globals_from_environment(argv0);


  // force initialize rng
  graphlab::random::get_source();
}