struct category *category_lookup_or_create(struct hash_table *categories, const char *name) { struct category *c; if(!name) name = "default"; c = hash_table_lookup(categories, name); if(c) return c; c = calloc(1, sizeof(struct category)); c->name = xxstrdup(name); c->fast_abort = -1; c->total_tasks = 0; c->first_allocation = NULL; c->max_allocation = rmsummary_create(-1); c->autolabel_resource = rmsummary_create(0); c->max_resources_seen = rmsummary_create(-1); c->cores_histogram = histogram_create(1); c->wall_time_histogram = histogram_create(time_bucket_size); c->cpu_time_histogram = histogram_create(time_bucket_size); c->memory_histogram = histogram_create(memory_bucket_size); c->swap_memory_histogram = histogram_create(memory_bucket_size); c->virtual_memory_histogram = histogram_create(memory_bucket_size); c->bytes_read_histogram = histogram_create(bytes_bucket_size); c->bytes_written_histogram = histogram_create(bytes_bucket_size); c->bytes_received_histogram = histogram_create(bytes_bucket_size); c->bytes_sent_histogram = histogram_create(bytes_bucket_size); c->bandwidth_histogram = histogram_create(bandwidth_bucket_size); c->total_files_histogram = histogram_create(1); c->disk_histogram = histogram_create(disk_bucket_size); c->total_processes_histogram = histogram_create(1); c->max_concurrent_processes_histogram = histogram_create(1); c->time_peak_independece = 0; c->steady_state = 0; c->completions_since_last_reset = 0; c->allocation_mode = CATEGORY_ALLOCATION_MODE_FIXED; hash_table_insert(categories, name, c); return c; }
struct dag_node *dag_node_create(struct dag *d, int linenum) { struct dag_node *n; n = malloc(sizeof(struct dag_node)); memset(n, 0, sizeof(struct dag_node)); n->d = d; n->linenum = linenum; n->state = DAG_NODE_STATE_WAITING; n->nodeid = d->nodeid_counter++; n->variables = hash_table_create(0, 0); n->source_files = list_create(0); n->target_files = list_create(0); n->remote_names = itable_create(0); n->remote_names_inv = hash_table_create(0, 0); n->descendants = set_create(0); n->ancestors = set_create(0); n->ancestor_depth = -1; n->resources_requested = rmsummary_create(-1); n->resources_measured = NULL; n->resource_request = CATEGORY_ALLOCATION_FIRST; return n; }
struct rmsummary *json_to_rmsummary(struct jx *j) { if(!j || !jx_istype(j, JX_OBJECT)) return NULL; struct rmsummary *s = rmsummary_create(-1); struct jx_pair *head = j->u.pairs; while(head) { if(!jx_istype(head->key, JX_STRING)) continue; char *key = head->key->u.string_value; struct jx *value = head->value; if(jx_istype(value, JX_STRING)) { rmsummary_assign_char_field(s, key, value->u.string_value); } else if(jx_istype(value, JX_INTEGER)) { rmsummary_assign_int_field(s, key, value->u.integer_value); } else if(jx_istype(value, JX_ARRAY)) { int64_t number; int status = json_number_of_array(value, key, &number); if(status) { rmsummary_assign_int_field(s, key, number); } } head = head->next; } return s; }
const struct rmsummary *category_dynamic_task_min_resources(struct category *c, struct rmsummary *user, category_allocation_t request) { static struct rmsummary *internal = NULL; const struct rmsummary *max = category_dynamic_task_max_resources(c, user, request); if(internal) { rmsummary_delete(internal); } internal = rmsummary_create(-1); /* load seen values */ struct rmsummary *seen = c->max_resources_seen; if(c->allocation_mode != CATEGORY_ALLOCATION_MODE_FIXED) { internal->cores = seen->cores; internal->memory = seen->memory; internal->disk = seen->disk; } rmsummary_merge_override(internal, max); return internal; }
const struct rmsummary *category_dynamic_task_max_resources(struct category *c, struct rmsummary *user, category_allocation_t request) { /* we keep an internal label so that the caller does not have to worry * about memory leaks. */ static struct rmsummary *internal = NULL; if(internal) { rmsummary_delete(internal); } internal = rmsummary_create(-1); struct rmsummary *max = c->max_allocation; struct rmsummary *first = c->first_allocation; struct rmsummary *seen = c->max_resources_seen; if(c->steady_state && c->allocation_mode != CATEGORY_ALLOCATION_MODE_FIXED) { internal->cores = seen->cores; internal->memory = seen->memory; internal->disk = seen->disk; } /* load max values */ rmsummary_merge_override(internal, max); if(c->allocation_mode != CATEGORY_ALLOCATION_MODE_FIXED && request == CATEGORY_ALLOCATION_FIRST) { rmsummary_merge_override(internal, first); } /* chip in user values */ rmsummary_merge_override(internal, user); return internal; }
void categories_initialize(struct hash_table *categories, struct rmsummary *top, const char *summaries_file) { struct list *summaries = rmsummary_parse_file_multiple(summaries_file); if(!summaries) { fatal("Could not read '%s' file: %s\n", strerror(errno)); } char *name; struct category *c; hash_table_firstkey(categories); while(hash_table_nextkey(categories, &name, (void **) &c)) { category_clear_histograms(c); if(c->first_allocation) { rmsummary_delete(c->first_allocation); c->first_allocation = rmsummary_create(-1); } } struct rmsummary *s; list_first_item(summaries); while((s = list_pop_head(summaries))) { if(s->category) { c = category_lookup_or_create(categories, s->category); category_accumulate_summary(c, s, NULL); } rmsummary_delete(s); } hash_table_firstkey(categories); while(hash_table_nextkey(categories, &name, (void **) &c)) { category_update_first_allocation(c, NULL); category_clear_histograms(c); } }
void rmDsummary_print(FILE *output, struct rmDsummary *so) { struct rmsummary *s = rmsummary_create(-1); s->command = xxstrdup(so->command); if(so->category) { s->category = xxstrdup(so->category); } else if(so->command) { s->category = xxstrdup(so->command); } else { s->category = xxstrdup(DEFAULT_CATEGORY); s->command = xxstrdup(DEFAULT_CATEGORY); } if(so->task_id) { s->task_id = xxstrdup(so->task_id); } s->start = so->start; s->end = so->end; s->wall_time = so->wall_time; to_internal(so, s, start, "us"); to_internal(so, s, end, "us"); to_internal(so, s, wall_time, "s"); to_internal(so, s, cpu_time, "s"); to_internal(so, s, cores, "cores"); to_internal(so, s, total_processes, "procs"); to_internal(so, s, max_concurrent_processes,"procs"); to_internal(so, s, memory, "MB"); to_internal(so, s, virtual_memory, "MB"); to_internal(so, s, swap_memory, "MB"); to_internal(so, s, bytes_read, "MB"); to_internal(so, s, bytes_written, "MB"); to_internal(so, s, bytes_received, "MB"); to_internal(so, s, bytes_sent, "MB"); to_internal(so, s, bandwidth, "Mbps"); to_internal(so, s, total_files, "files"); to_internal(so, s, disk, "MB"); rmsummary_print(output, s, /* pprint */ 1, /* extra fields */ 0); rmsummary_delete(s); return; }
void category_specify_first_allocation_guess(struct category *c, const struct rmsummary *s) { /* assume user knows what they are doing. */ c->steady_state = 1; rmsummary_merge_max(c->max_resources_seen, s); rmsummary_delete(c->first_allocation); c->first_allocation = rmsummary_create(-1); rmsummary_merge_max(c->first_allocation, s); }
int main(int argc, char *argv[]) { batch_queue_type_t batch_queue_type = BATCH_QUEUE_TYPE_UNKNOWN; catalog_host = CATALOG_HOST; catalog_port = CATALOG_PORT; batch_submit_options = getenv("BATCH_OPTIONS"); debug_config(argv[0]); resources = rmsummary_create(-1); int c; while((c = getopt_long(argc, argv, "B:C:F:N:M:T:t:w:W:E:P:S:cd:o:O:vh", long_options, NULL)) > -1) { switch (c) { case 'B': batch_submit_options = xxstrdup(optarg); break; case 'C': config_file = xxstrdup(optarg); break; case 'F': foremen_regex = xxstrdup(optarg); break; case 'N': case 'M': project_regex = xxstrdup(optarg); break; case 'T': batch_queue_type = batch_queue_type_from_string(optarg); if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) { fprintf(stderr, "unknown batch queue type: %s\n", optarg); return EXIT_FAILURE; } break; case 't': worker_timeout = atoi(optarg); break; case 'w': workers_min = atoi(optarg); break; case 'W': workers_max = atoi(optarg); break; case LONG_OPT_WORKERS_PER_CYCLE: workers_per_cycle = atoi(optarg); break; case LONG_OPT_TASKS_PER_WORKER: tasks_per_worker = atof(optarg); break; case 'E': extra_worker_args = xxstrdup(optarg); break; case LONG_OPT_CORES: resources->cores = atoi(optarg); break; case LONG_OPT_AMAZON_CREDENTIALS: amazon_credentials = xxstrdup(optarg); break; case LONG_OPT_AMAZON_AMI: amazon_ami = xxstrdup(optarg); break; case LONG_OPT_MEMORY: resources->memory = atoi(optarg); break; case LONG_OPT_DISK: resources->disk = atoi(optarg); break; case LONG_OPT_GPUS: resources->gpus = atoi(optarg); break; case LONG_OPT_AUTOSIZE: autosize = 1; break; case LONG_OPT_FACTORY_TIMEOUT: factory_timeout = MAX(0, atoi(optarg)); break; case LONG_OPT_CONDOR_REQUIREMENTS: if(condor_requirements) { char *tmp = condor_requirements; condor_requirements = string_format("(%s && (%s))", tmp, optarg); free(tmp); } else { condor_requirements = string_format("(%s)", optarg); } break; case LONG_OPT_WRAPPER: wrapper_command = optarg; break; case LONG_OPT_WRAPPER_INPUT: if(!wrapper_input) { wrapper_input = strdup(optarg); } else { wrapper_input = string_format("%s,%s",wrapper_input,optarg); } break; case 'P': password_file = optarg; break; case 'S': scratch_dir = optarg; break; case 'c': consider_capacity = 1; break; case 'd': debug_flags_set(optarg); break; case 'o': debug_config_file(optarg); break; case 'O': debug_config_file_size(string_metric_parse(optarg)); break; case 'v': cctools_version_print(stdout, argv[0]); exit(EXIT_SUCCESS); case 'h': show_help(argv[0]); exit(EXIT_SUCCESS); default: show_help(argv[0]); return EXIT_FAILURE; } } if(project_regex) { using_catalog = 1; } else if((argc - optind) == 2) { master_host = argv[optind]; master_port = atoi(argv[optind+1]); } else { fprintf(stderr,"work_queue_factory: You must either give a project name with the -M option or master-name option with a configuration file, or give the master's host and port.\n"); show_help(argv[0]); exit(1); } cctools_version_debug(D_DEBUG, argv[0]); if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) { fprintf(stderr,"work_queue_factory: You must specify a batch type with the -T option.\n"); fprintf(stderr, "valid options:\n"); fprintf(stderr, "%s\n", batch_queue_type_string()); return 1; } if(config_file) { char abs_path_name[PATH_MAX]; if(!realpath(config_file, abs_path_name)) { fprintf(stderr, "work_queue_factory: could not resolve configuration file path: '%s'.\n", config_file); exit(EXIT_FAILURE); } free(config_file); /* From now on, read config_file from absolute path */ config_file = xxstrdup(abs_path_name); if(!read_config_file(config_file)) { fprintf(stderr,"work_queue_factory: There were errors in the configuration file: %s\n", config_file); return 1; } } if(workers_min>workers_max) { fprintf(stderr,"work_queue_factory: min workers (%d) is greater than max workers (%d)\n",workers_min, workers_max); return 1; } /* Careful here: most of the supported batch systems expect that jobs are submitting from a single shared filesystem. Changing to /tmp only works in the case of Condor. */ if(!scratch_dir) { if(batch_queue_type==BATCH_QUEUE_TYPE_CONDOR) { scratch_dir = string_format("/tmp/wq-pool-%d",getuid()); } else { scratch_dir = string_format("wq-pool-%d",getuid()); } } if(!create_dir(scratch_dir,0777)) { fprintf(stderr,"work_queue_factory: couldn't create %s: %s",scratch_dir,strerror(errno)); return 1; } char cmd[1024]; sprintf(cmd,"cp \"$(which work_queue_worker)\" '%s'",scratch_dir); if (system(cmd)) { fprintf(stderr, "work_queue_factory: please add work_queue_worker to your PATH.\n"); exit(EXIT_FAILURE); } if(password_file) { sprintf(cmd,"cp %s %s/pwfile",password_file,scratch_dir); system(cmd); } if(chdir(scratch_dir)!=0) { fprintf(stderr,"work_queue_factory: couldn't chdir to %s: %s",scratch_dir,strerror(errno)); return 1; } signal(SIGINT, handle_abort); signal(SIGQUIT, handle_abort); signal(SIGTERM, handle_abort); signal(SIGHUP, ignore_signal); queue = batch_queue_create(batch_queue_type); if(!queue) { fprintf(stderr,"work_queue_factory: couldn't establish queue type %s",batch_queue_type_to_string(batch_queue_type)); return 1; } batch_queue_set_option(queue, "batch-options", batch_submit_options); batch_queue_set_option(queue, "autosize", autosize ? "yes" : NULL); set_worker_resources_options( queue ); if (amazon_credentials != NULL) { batch_queue_set_option(queue, "amazon-credentials", amazon_credentials); } if (amazon_ami != NULL) { batch_queue_set_option(queue, "amazon-ami", amazon_ami); } if(condor_requirements != NULL && batch_queue_type != BATCH_QUEUE_TYPE_CONDOR) { debug(D_NOTICE, "condor_requirements will be ignored as workers will not be running in condor."); } else { batch_queue_set_option(queue, "condor-requirements", condor_requirements); } mainloop( queue ); batch_queue_delete(queue); return 0; }
void category_specify_max_allocation(struct category *c, const struct rmsummary *s) { rmsummary_delete(c->max_allocation); c->max_allocation = rmsummary_create(-1); rmsummary_merge_max(c->max_allocation, s); }
int category_update_first_allocation(struct category *c, const struct rmsummary *max_worker) { /* buffer used only for debug output. */ static buffer_t *b = NULL; if(!b) { b = malloc(sizeof(buffer_t)); buffer_init(b); } if(c->allocation_mode == CATEGORY_ALLOCATION_MODE_FIXED) return 0; if(c->total_tasks < 1) return 0; struct rmsummary *top = rmsummary_create(-1); rmsummary_merge_override(top, max_worker); rmsummary_merge_override(top, c->max_resources_seen); rmsummary_merge_override(top, c->max_allocation); if(!c->first_allocation) { c->first_allocation = rmsummary_create(-1); } update_first_allocation_field(c, top, 1, cpu_time); update_first_allocation_field(c, top, 1, wall_time); update_first_allocation_field(c, top, c->time_peak_independece, cores); update_first_allocation_field(c, top, c->time_peak_independece, virtual_memory); update_first_allocation_field(c, top, c->time_peak_independece, memory); update_first_allocation_field(c, top, c->time_peak_independece, swap_memory); update_first_allocation_field(c, top, c->time_peak_independece, bytes_read); update_first_allocation_field(c, top, c->time_peak_independece, bytes_written); update_first_allocation_field(c, top, c->time_peak_independece, bytes_received); update_first_allocation_field(c, top, c->time_peak_independece, bytes_sent); update_first_allocation_field(c, top, c->time_peak_independece, bandwidth); update_first_allocation_field(c, top, c->time_peak_independece, total_files); update_first_allocation_field(c, top, c->time_peak_independece, disk); update_first_allocation_field(c, top, c->time_peak_independece, max_concurrent_processes); update_first_allocation_field(c, top, c->time_peak_independece, total_processes); /* From here on we only print debugging info. */ struct jx *jsum = rmsummary_to_json(c->first_allocation, 1); if(jsum) { char *str = jx_print_string(jsum); debug(D_DEBUG, "Updating first allocation '%s':", c->name); debug(D_DEBUG, "%s", str); jx_delete(jsum); free(str); } jsum = rmsummary_to_json(top, 1); if(jsum) { char *str = jx_print_string(jsum); debug(D_DEBUG, "From max resources '%s':", c->name); debug(D_DEBUG, "%s", str); jx_delete(jsum); free(str); } rmsummary_delete(top); return 1; }