Пример #1
0
struct category *category_lookup_or_create(struct hash_table *categories, const char *name) {
	struct category *c;

	if(!name)
		name = "default";

	c = hash_table_lookup(categories, name);
	if(c) return c;

	c = calloc(1, sizeof(struct category));

	c->name       = xxstrdup(name);
	c->fast_abort = -1;

	c->total_tasks = 0;

	c->first_allocation    = NULL;
	c->max_allocation      = rmsummary_create(-1);
	c->autolabel_resource  = rmsummary_create(0);

	c->max_resources_seen      = rmsummary_create(-1);

	c->cores_histogram           = histogram_create(1);
	c->wall_time_histogram       = histogram_create(time_bucket_size);
	c->cpu_time_histogram        = histogram_create(time_bucket_size);
	c->memory_histogram          = histogram_create(memory_bucket_size);
	c->swap_memory_histogram     = histogram_create(memory_bucket_size);
	c->virtual_memory_histogram  = histogram_create(memory_bucket_size);
	c->bytes_read_histogram      = histogram_create(bytes_bucket_size);
	c->bytes_written_histogram   = histogram_create(bytes_bucket_size);
	c->bytes_received_histogram  = histogram_create(bytes_bucket_size);
	c->bytes_sent_histogram      = histogram_create(bytes_bucket_size);
	c->bandwidth_histogram       = histogram_create(bandwidth_bucket_size);
	c->total_files_histogram     = histogram_create(1);
	c->disk_histogram            = histogram_create(disk_bucket_size);
	c->total_processes_histogram = histogram_create(1);
	c->max_concurrent_processes_histogram = histogram_create(1);

	c->time_peak_independece = 0;

	c->steady_state = 0;
	c->completions_since_last_reset = 0;

	c->allocation_mode = CATEGORY_ALLOCATION_MODE_FIXED;

	hash_table_insert(categories, name, c);

	return c;
}
Пример #2
0
struct dag_node *dag_node_create(struct dag *d, int linenum)
{
	struct dag_node *n;

	n = malloc(sizeof(struct dag_node));
	memset(n, 0, sizeof(struct dag_node));
	n->d = d;
	n->linenum = linenum;
	n->state = DAG_NODE_STATE_WAITING;
	n->nodeid = d->nodeid_counter++;
	n->variables = hash_table_create(0, 0);

	n->source_files = list_create(0);
	n->target_files = list_create(0);

	n->remote_names = itable_create(0);
	n->remote_names_inv = hash_table_create(0, 0);

	n->descendants = set_create(0);
	n->ancestors = set_create(0);

	n->ancestor_depth = -1;

	n->resources_requested = rmsummary_create(-1);
	n->resources_measured  = NULL;

	n->resource_request = CATEGORY_ALLOCATION_FIRST;

	return n;
}
Пример #3
0
struct rmsummary *json_to_rmsummary(struct jx *j) {
	if(!j || !jx_istype(j, JX_OBJECT))
		return NULL;

	struct rmsummary *s = rmsummary_create(-1);

	struct jx_pair *head = j->u.pairs;
	while(head) {
		if(!jx_istype(head->key, JX_STRING))
			continue;

		char *key = head->key->u.string_value;
		struct jx *value = head->value;

		if(jx_istype(value, JX_STRING)) {
			rmsummary_assign_char_field(s, key, value->u.string_value);
		} else if(jx_istype(value, JX_INTEGER)) {
			rmsummary_assign_int_field(s, key, value->u.integer_value);
		} else if(jx_istype(value, JX_ARRAY)) {
			int64_t number;
			int status = json_number_of_array(value, key, &number);
			if(status) {
				rmsummary_assign_int_field(s, key, number);
			}
		}

		head = head->next;
	}

	return s;
}
Пример #4
0
const struct rmsummary *category_dynamic_task_min_resources(struct category *c, struct rmsummary *user, category_allocation_t request) {

	static struct rmsummary *internal = NULL;

	const struct rmsummary *max = category_dynamic_task_max_resources(c, user, request);

	if(internal) {
		rmsummary_delete(internal);
	}

	internal = rmsummary_create(-1);

	/* load seen values */
	struct rmsummary *seen = c->max_resources_seen;

	if(c->allocation_mode != CATEGORY_ALLOCATION_MODE_FIXED) {
			internal->cores  = seen->cores;
			internal->memory = seen->memory;
			internal->disk   = seen->disk;
	}

	rmsummary_merge_override(internal, max);

	return internal;
}
Пример #5
0
const struct rmsummary *category_dynamic_task_max_resources(struct category *c, struct rmsummary *user, category_allocation_t request) {
	/* we keep an internal label so that the caller does not have to worry
	 * about memory leaks. */
	static struct rmsummary *internal = NULL;

	if(internal) {
		rmsummary_delete(internal);
	}

	internal = rmsummary_create(-1);

	struct rmsummary *max   = c->max_allocation;
	struct rmsummary *first = c->first_allocation;
	struct rmsummary *seen  = c->max_resources_seen;

	if(c->steady_state && c->allocation_mode != CATEGORY_ALLOCATION_MODE_FIXED) {
		internal->cores  = seen->cores;
		internal->memory = seen->memory;
		internal->disk   = seen->disk;
	}

	/* load max values */
	rmsummary_merge_override(internal, max);

	if(c->allocation_mode != CATEGORY_ALLOCATION_MODE_FIXED
			&& request == CATEGORY_ALLOCATION_FIRST) {
		rmsummary_merge_override(internal, first);
	}

	/* chip in user values */
	rmsummary_merge_override(internal, user);

	return internal;
}
Пример #6
0
void categories_initialize(struct hash_table *categories, struct rmsummary *top, const char *summaries_file) {
	struct list *summaries = rmsummary_parse_file_multiple(summaries_file);

	if(!summaries) {
		fatal("Could not read '%s' file: %s\n", strerror(errno));
	}

	char *name;
	struct category *c;
	hash_table_firstkey(categories);
	while(hash_table_nextkey(categories, &name, (void **) &c)) {
		category_clear_histograms(c);
		if(c->first_allocation) {
			rmsummary_delete(c->first_allocation);
			c->first_allocation = rmsummary_create(-1);
		}
	}

	struct rmsummary *s;
	list_first_item(summaries);
	while((s = list_pop_head(summaries))) {
		if(s->category) {
			c = category_lookup_or_create(categories, s->category);
			category_accumulate_summary(c, s, NULL);
		}
		rmsummary_delete(s);
	}

	hash_table_firstkey(categories);
	while(hash_table_nextkey(categories, &name, (void **) &c)) {
		category_update_first_allocation(c, NULL);
		category_clear_histograms(c);
	}
}
Пример #7
0
void rmDsummary_print(FILE *output, struct rmDsummary *so) {
	struct rmsummary *s = rmsummary_create(-1);

	s->command    = xxstrdup(so->command);

	if(so->category)
	{
		s->category   = xxstrdup(so->category);
	}
	else if(so->command)
	{
		s->category   = xxstrdup(so->command);
	}
	else
	{
		s->category   = xxstrdup(DEFAULT_CATEGORY);
		s->command    = xxstrdup(DEFAULT_CATEGORY);
	}

	if(so->task_id) {
		s->task_id = xxstrdup(so->task_id);
	}

	s->start     = so->start;
	s->end       = so->end;
	s->wall_time = so->wall_time;

	to_internal(so, s, start,     "us");
	to_internal(so, s, end,       "us");
	to_internal(so, s, wall_time, "s");
	to_internal(so, s, cpu_time,  "s");

	to_internal(so, s, cores,                   "cores");
	to_internal(so, s, total_processes,         "procs");
	to_internal(so, s, max_concurrent_processes,"procs");

	to_internal(so, s, memory,         "MB");
	to_internal(so, s, virtual_memory, "MB");
	to_internal(so, s, swap_memory,    "MB");

	to_internal(so, s, bytes_read,    "MB");
	to_internal(so, s, bytes_written, "MB");

	to_internal(so, s, bytes_received, "MB");
	to_internal(so, s, bytes_sent,     "MB");
	to_internal(so, s, bandwidth,      "Mbps");

	to_internal(so, s, total_files, "files");
	to_internal(so, s, disk, "MB");

	rmsummary_print(output, s, /* pprint */ 1, /* extra fields */ 0);
	rmsummary_delete(s);

	return;
}
Пример #8
0
void category_specify_first_allocation_guess(struct category *c, const struct rmsummary *s) {

	/* assume user knows what they are doing. */
	c->steady_state = 1;
	rmsummary_merge_max(c->max_resources_seen, s);

	rmsummary_delete(c->first_allocation);

	c->first_allocation = rmsummary_create(-1);

	rmsummary_merge_max(c->first_allocation, s);
}
Пример #9
0
int main(int argc, char *argv[])
{
	batch_queue_type_t batch_queue_type = BATCH_QUEUE_TYPE_UNKNOWN;

	catalog_host = CATALOG_HOST;
	catalog_port = CATALOG_PORT;

	batch_submit_options = getenv("BATCH_OPTIONS");

	debug_config(argv[0]);

	resources = rmsummary_create(-1);

	int c;

	while((c = getopt_long(argc, argv, "B:C:F:N:M:T:t:w:W:E:P:S:cd:o:O:vh", long_options, NULL)) > -1) {
		switch (c) {
			case 'B':
				batch_submit_options = xxstrdup(optarg);
				break;
			case 'C':
				config_file = xxstrdup(optarg);
				break;
			case 'F':
				foremen_regex = xxstrdup(optarg);
				break;
			case 'N':
			case 'M':
				project_regex = xxstrdup(optarg);
				break;
			case 'T':
				batch_queue_type = batch_queue_type_from_string(optarg);
				if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) {
					fprintf(stderr, "unknown batch queue type: %s\n", optarg);
					return EXIT_FAILURE;
				}
				break;
			case 't':
				worker_timeout = atoi(optarg);
				break;
			case 'w':
				workers_min = atoi(optarg);
				break;
			case 'W':
				workers_max = atoi(optarg);
				break;
			case LONG_OPT_WORKERS_PER_CYCLE:
				workers_per_cycle = atoi(optarg);
				break;
			case LONG_OPT_TASKS_PER_WORKER:
				tasks_per_worker = atof(optarg);
				break;
			case 'E':
				extra_worker_args = xxstrdup(optarg);
				break;
			case LONG_OPT_CORES:
				resources->cores = atoi(optarg);
				break;
			case LONG_OPT_AMAZON_CREDENTIALS:
				amazon_credentials = xxstrdup(optarg);
				break;
			case LONG_OPT_AMAZON_AMI:
				amazon_ami = xxstrdup(optarg);
				break;
			case LONG_OPT_MEMORY:
				resources->memory = atoi(optarg);
				break;
			case LONG_OPT_DISK:
				resources->disk = atoi(optarg);
				break;
			case LONG_OPT_GPUS:
				resources->gpus = atoi(optarg);
				break;
			case LONG_OPT_AUTOSIZE:
				autosize = 1;
				break;
			case LONG_OPT_FACTORY_TIMEOUT:
				factory_timeout = MAX(0, atoi(optarg));
				break;
			case LONG_OPT_CONDOR_REQUIREMENTS:
				if(condor_requirements) {
					char *tmp = condor_requirements;
					condor_requirements = string_format("(%s && (%s))", tmp, optarg);
					free(tmp);
				} else {
					condor_requirements = string_format("(%s)", optarg);
				}
				break;
			case LONG_OPT_WRAPPER:
				wrapper_command = optarg;
				break;
			case LONG_OPT_WRAPPER_INPUT:
				if(!wrapper_input) {
					wrapper_input = strdup(optarg);
				} else {
					wrapper_input = string_format("%s,%s",wrapper_input,optarg);
				}
				break;
			case 'P':
				password_file = optarg;
				break;
			case 'S':
				scratch_dir = optarg;
				break;
			case 'c':
				consider_capacity = 1;
				break;
			case 'd':
				debug_flags_set(optarg);
				break;
			case 'o':
				debug_config_file(optarg);
				break;
			case 'O':
				debug_config_file_size(string_metric_parse(optarg));
				break;
			case 'v':
				cctools_version_print(stdout, argv[0]);
				exit(EXIT_SUCCESS);
			case 'h':
				show_help(argv[0]);
				exit(EXIT_SUCCESS);
			default:
				show_help(argv[0]);
				return EXIT_FAILURE;
		}
	}

	if(project_regex) {
		using_catalog = 1;
	}
	else if((argc - optind) == 2) {
		master_host = argv[optind];
		master_port = atoi(argv[optind+1]);
	}
	else {
		fprintf(stderr,"work_queue_factory: You must either give a project name with the -M option or master-name option with a configuration file, or give the master's host and port.\n");
		show_help(argv[0]);
		exit(1);
	}
	

	cctools_version_debug(D_DEBUG, argv[0]);

	if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) {
		fprintf(stderr,"work_queue_factory: You must specify a batch type with the -T option.\n");
		fprintf(stderr, "valid options:\n");
		fprintf(stderr, "%s\n", batch_queue_type_string());
		return 1;
	}

	if(config_file) {
		char abs_path_name[PATH_MAX];

		if(!realpath(config_file, abs_path_name)) {
			fprintf(stderr, "work_queue_factory: could not resolve configuration file path: '%s'.\n", config_file);
			exit(EXIT_FAILURE);
		}

		free(config_file);

		/* From now on, read config_file from absolute path */
		config_file = xxstrdup(abs_path_name);

		if(!read_config_file(config_file)) {
			fprintf(stderr,"work_queue_factory: There were errors in the configuration file: %s\n", config_file);
			return 1;
		}
	}	

	if(workers_min>workers_max) {
		fprintf(stderr,"work_queue_factory: min workers (%d) is greater than max workers (%d)\n",workers_min, workers_max);
		return 1;
	}

	/*
	Careful here: most of the supported batch systems expect
	that jobs are submitting from a single shared filesystem.
	Changing to /tmp only works in the case of Condor.
	*/

	if(!scratch_dir) {
		if(batch_queue_type==BATCH_QUEUE_TYPE_CONDOR) {
			scratch_dir = string_format("/tmp/wq-pool-%d",getuid());
		} else {
			scratch_dir = string_format("wq-pool-%d",getuid());
		}
	}

	if(!create_dir(scratch_dir,0777)) {
		fprintf(stderr,"work_queue_factory: couldn't create %s: %s",scratch_dir,strerror(errno));
		return 1;
	}

	char cmd[1024];
	sprintf(cmd,"cp \"$(which work_queue_worker)\" '%s'",scratch_dir);
	if (system(cmd)) {
		fprintf(stderr, "work_queue_factory: please add work_queue_worker to your PATH.\n");
		exit(EXIT_FAILURE);
	}

	if(password_file) {
		sprintf(cmd,"cp %s %s/pwfile",password_file,scratch_dir);
		system(cmd);
	}

	if(chdir(scratch_dir)!=0) {
		fprintf(stderr,"work_queue_factory: couldn't chdir to %s: %s",scratch_dir,strerror(errno));
		return 1;
	}

	signal(SIGINT, handle_abort);
	signal(SIGQUIT, handle_abort);
	signal(SIGTERM, handle_abort);
	signal(SIGHUP, ignore_signal);

	queue = batch_queue_create(batch_queue_type);
	if(!queue) {
		fprintf(stderr,"work_queue_factory: couldn't establish queue type %s",batch_queue_type_to_string(batch_queue_type));
		return 1;
	}

	batch_queue_set_option(queue, "batch-options", batch_submit_options);
	batch_queue_set_option(queue, "autosize", autosize ? "yes" : NULL);
	set_worker_resources_options( queue );

	if (amazon_credentials != NULL) {
		batch_queue_set_option(queue, "amazon-credentials", amazon_credentials);
	}
	if (amazon_ami != NULL) {
		batch_queue_set_option(queue, "amazon-ami", amazon_ami);
	}

	if(condor_requirements != NULL && batch_queue_type != BATCH_QUEUE_TYPE_CONDOR) {
		debug(D_NOTICE, "condor_requirements will be ignored as workers will not be running in condor.");
	} else {
		batch_queue_set_option(queue, "condor-requirements", condor_requirements);
	}

	mainloop( queue );

	batch_queue_delete(queue);

	return 0;
}
Пример #10
0
void category_specify_max_allocation(struct category *c, const struct rmsummary *s) {
	rmsummary_delete(c->max_allocation);
	c->max_allocation = rmsummary_create(-1);

	rmsummary_merge_max(c->max_allocation, s);
}
Пример #11
0
int category_update_first_allocation(struct category *c, const struct rmsummary *max_worker) {
	/* buffer used only for debug output. */
	static buffer_t *b = NULL;
	if(!b) {
		b = malloc(sizeof(buffer_t));
		buffer_init(b);
	}

	if(c->allocation_mode == CATEGORY_ALLOCATION_MODE_FIXED)
		return 0;

	if(c->total_tasks < 1)
		return 0;

	struct rmsummary *top = rmsummary_create(-1);
	rmsummary_merge_override(top, max_worker);
	rmsummary_merge_override(top, c->max_resources_seen);
	rmsummary_merge_override(top, c->max_allocation);

	if(!c->first_allocation) {
		c->first_allocation = rmsummary_create(-1);
	}

	update_first_allocation_field(c, top, 1, cpu_time);
	update_first_allocation_field(c, top, 1, wall_time);
	update_first_allocation_field(c, top, c->time_peak_independece, cores);
	update_first_allocation_field(c, top, c->time_peak_independece, virtual_memory);
	update_first_allocation_field(c, top, c->time_peak_independece, memory);
	update_first_allocation_field(c, top, c->time_peak_independece, swap_memory);
	update_first_allocation_field(c, top, c->time_peak_independece, bytes_read);
	update_first_allocation_field(c, top, c->time_peak_independece, bytes_written);
	update_first_allocation_field(c, top, c->time_peak_independece, bytes_received);
	update_first_allocation_field(c, top, c->time_peak_independece, bytes_sent);
	update_first_allocation_field(c, top, c->time_peak_independece, bandwidth);
	update_first_allocation_field(c, top, c->time_peak_independece, total_files);
	update_first_allocation_field(c, top, c->time_peak_independece, disk);
	update_first_allocation_field(c, top, c->time_peak_independece, max_concurrent_processes);
	update_first_allocation_field(c, top, c->time_peak_independece, total_processes);

	/* From here on we only print debugging info. */
	struct jx *jsum = rmsummary_to_json(c->first_allocation, 1);
	if(jsum) {
		char *str = jx_print_string(jsum);
		debug(D_DEBUG, "Updating first allocation '%s':", c->name);
		debug(D_DEBUG, "%s", str);
		jx_delete(jsum);
		free(str);
	}

	jsum = rmsummary_to_json(top, 1);
	if(jsum) {
		char *str = jx_print_string(jsum);
		debug(D_DEBUG, "From max resources '%s':", c->name);
		debug(D_DEBUG, "%s", str);
		jx_delete(jsum);
		free(str);
	}

	rmsummary_delete(top);

	return 1;
}