/* fill opts with default values for all options */ void dr_options_default_(dr_options * opts) { * opts = dr_options_default_values; if (getenv_bool("DAG_RECORDER", &opts->on) || getenv_bool("DR", &opts->on)) {} if (getenv_str("DAG_RECORDER_FILE_PREFIX", &opts->dag_file_prefix) || getenv_str("DR_PREFIX", &opts->dag_file_prefix)) {} if (getenv_bool("DAG_RECORDER_DAG_FILE", &opts->dag_file_yes) || getenv_bool("DR_DAG", &opts->dag_file_yes)) {} if (getenv_bool("DAG_RECORDER_STAT_FILE", &opts->stat_file_yes) || getenv_bool("DR_STAT", &opts->stat_file_yes)) {} if (getenv_bool("DAG_RECORDER_GPL_FILE", &opts->gpl_file_yes) || getenv_bool("DR_GPL", &opts->gpl_file_yes)) {} if (getenv_bool("DAG_RECORDER_DOT_FILE", &opts->dot_file_yes) || getenv_bool("DR_DOT", &opts->dot_file_yes)) {} if (getenv_bool("DAG_RECORDER_TEXT_FILE", &opts->text_file_yes) || getenv_bool("DR_TEXT", &opts->text_file_yes)) {} /* NOTE: we do not set sqlite_file via environment variables */ if (getenv_int("DAG_RECORDER_GPL_SIZE", &opts->gpl_sz) || getenv_int("DR_GPL_SZ", &opts->gpl_sz)) {} if (getenv_str("DAG_RECORDER_TEXT_FILE_SEP", &opts->text_file_sep) || getenv_str("DR_TEXT_SEP", &opts->text_file_sep)) {} if (getenv_byte("DAG_RECORDER_DBG_LEVEL", &opts->dbg_level) || getenv_byte("DR_DBG", &opts->dbg_level)) {} if (getenv_byte("DAG_RECORDER_VERBOSE_LEVEL", &opts->verbose_level) || getenv_byte("DR_VERBOSE", &opts->verbose_level)) {} if (getenv_byte("DAG_RECORDER_CHK_LEVEL", &opts->chk_level) || getenv_byte("DR_CHK", &opts->chk_level)) {} if (getenv_ull("DAG_RECORDER_UNCOLLAPSE_MIN", &opts->uncollapse_min) || getenv_ull("DR_UNCOLLAPSE_MIN", &opts->uncollapse_min)) {} if (getenv_ull("DAG_RECORDER_COLLAPSE_MAX", &opts->collapse_max) || getenv_ull("DR_COLLAPSE_MAX", &opts->collapse_max)) {} if (getenv_long("DAG_RECORDER_NODE_COUNT", &opts->node_count_target) || getenv_long("DR_NC", &opts->node_count_target)) {} if (getenv_long("DAG_RECORDER_PRUNE_THRESHOLD", &opts->prune_threshold) || getenv_long("DR_PRUNE", &opts->prune_threshold)) {} if (getenv_long("DAG_RECORDER_COLLAPSE_MAX_COUNT", &opts->collapse_max_count) || getenv_long("DR_COLLAPSE_MAX_COUNT", &opts->collapse_max_count)) {} if (getenv_long("DAG_RECORDER_ALLOC_UNIT_MB", &opts->alloc_unit_mb) || getenv_long("DR_ALLOC_UNIT_MB", &opts->alloc_unit_mb)) {} if (getenv_long("DAG_RECORDER_PRE_ALLOC_PER_WORKER", &opts->pre_alloc_per_worker) || getenv_long("DR_PRE_ALLOC_PER_WORKER", &opts->pre_alloc_per_worker)) {} if (getenv_long("DAG_RECORDER_PRE_ALLOC", &opts->pre_alloc) || getenv_long("DR_PRE_ALLOC", &opts->pre_alloc)) {} }
void configure_global_environment(std::string argv0) { // file limit upgrade has to be the very first thing that happens. The // reason is that on Mac, once a file descriptor has been used (even STDOUT), // the file handle limit increase will appear to work, but will in fact fail // silently. upgrade_file_handle_limit(4096); int file_handle_limit = get_file_handle_limit(); if (file_handle_limit < 4096) { logstream(LOG_WARNING) << "Unable to raise the file handle limit to 4096. " << "Current file handle limit = " << file_handle_limit << ". " << "You may be limited to frames with about " << file_handle_limit / 16 << " columns" << std::endl; } // if file handle limit is >= 512, // we take either 3/4 of the limit // otherwise // we keep it to 128 if (file_handle_limit >= 512) { size_t practical_limit = file_handle_limit / 4 * 3; graphlab::SFRAME_FILE_HANDLE_POOL_SIZE = practical_limit; } else { graphlab::SFRAME_FILE_HANDLE_POOL_SIZE = 128; } graphlab::SFRAME_DEFAULT_NUM_SEGMENTS = graphlab::thread::cpu_count(); graphlab::SFRAME_MAX_BLOCKS_IN_CACHE = 16 * graphlab::thread::cpu_count(); graphlab::SFRAME_SORT_MAX_SEGMENTS = std::max(graphlab::SFRAME_SORT_MAX_SEGMENTS, graphlab::SFRAME_FILE_HANDLE_POOL_SIZE / 4); // configure all memory constants // use up at most half of system memory. size_t total_system_memory = total_mem(); total_system_memory /= 2; boost::optional<std::string> envval = getenv_str("DISABLE_MEMORY_AUTOTUNE"); bool disable_memory_autotune = ((bool)envval) && (std::string(*envval) == "1"); // memory limit envval = getenv_str("GRAPHLAB_MEMORY_LIMIT_IN_MB"); if (envval) { size_t limit = atoll((*envval).c_str()) * 1024 * 1024; /* MB */ if (limit == 0) { logstream(LOG_WARNING) << "GRAPHLAB_MEMORY_LIMIT_IN_MB environment " "variable cannot be parsed" << std::endl; } else { total_system_memory = limit; } } if (total_system_memory > 0 && !disable_memory_autotune) { // TODO: MANY MANY HEURISTICS // assume we have 1/2 of working memory to do things like sort, join, etc. // and the other 1/2 of working memory goes to file caching // HUERISTIC 1: Cell size estimate is 64 // HUERISTIC 2: Row size estimate is Cell size estimate * 5 // // Also, we only allow upgrades on the existing conservative values when // duing these estimates to prevent us from having impractically small // values. size_t CELL_SIZE_ESTIMATE = 64; size_t ROW_SIZE_ESTIMATE = CELL_SIZE_ESTIMATE * 5; size_t max_cell_estimate = total_system_memory / 4 / CELL_SIZE_ESTIMATE; size_t max_row_estimate = total_system_memory / 4 / ROW_SIZE_ESTIMATE; graphlab::SFRAME_GROUPBY_BUFFER_NUM_ROWS = max_row_estimate; graphlab::SFRAME_JOIN_BUFFER_NUM_CELLS = max_cell_estimate; graphlab::sframe_config::SFRAME_SORT_BUFFER_SIZE = total_system_memory / 4; graphlab::fileio::FILEIO_MAXIMUM_CACHE_CAPACITY_PER_FILE = total_system_memory / 2; graphlab::fileio::FILEIO_MAXIMUM_CACHE_CAPACITY = total_system_memory / 2; } graphlab::globals::initialize_globals_from_environment(argv0); // force initialize rng graphlab::random::get_source(); }