Esempio n. 1
0
static batch_job_id_t batch_job_wq_submit (struct batch_queue * q, const char *cmd, const char *extra_input_files, const char *extra_output_files, struct jx *envlist, const struct rmsummary *resources)
{
	struct work_queue_task *t;

	int caching_flag = WORK_QUEUE_CACHE;

	if(string_istrue(hash_table_lookup(q->options, "caching"))) {
		caching_flag = WORK_QUEUE_CACHE;
	} else {
		caching_flag = WORK_QUEUE_NOCACHE;
	}

	t = work_queue_task_create(cmd);

	specify_files(t, extra_input_files, extra_output_files, caching_flag);
	specify_envlist(t,envlist);

	if(envlist) {
		const char *category = jx_lookup_string(envlist, "CATEGORY");
		if(category) {
			work_queue_task_specify_category(t, category);
		}
	}

	if(resources)
	{
		work_queue_task_specify_resources(t, resources);
	}

	work_queue_submit(q->data, t);

	return t->taskid;
}
static batch_job_id_t batch_job_wq_submit (struct batch_queue * q, const char *cmd, const char *extra_input_files, const char *extra_output_files, struct nvpair *envlist )
{
	struct work_queue_task *t;

	int caching_flag = WORK_QUEUE_CACHE;

	if(string_istrue(hash_table_lookup(q->options, "caching"))) {
		caching_flag = WORK_QUEUE_CACHE;
	} else {
		caching_flag = WORK_QUEUE_NOCACHE;
	}

	t = work_queue_task_create(cmd);

	specify_files(t, extra_input_files, extra_output_files, caching_flag);
	specify_envlist(t,envlist);

	struct rmsummary *resources = parse_batch_options_resources(hash_table_lookup(q->options, "batch-options"));
	if(resources)
	{
		work_queue_task_specify_resources(t, resources);
		free(resources);
	}

	work_queue_submit(q->data, t);

	return t->taskid;
}
Esempio n. 3
0
static int dag_parse_node_command(struct lexer *bk, struct dag_node *n)
{
	struct token *t;

	//Jump COMMAND token.
	t = lexer_next_token(bk);
	lexer_free_token(t);

	char *local = dag_variable_lookup_string("BATCH_LOCAL", bk->environment);
	if(local) {
		if(string_istrue(local))
			n->local_job = 1;
		free(local);
	}

	/* Read command modifiers. */
	while((t = lexer_peek_next_token(bk)) && t->type != TOKEN_COMMAND_MOD_END)
	{
		t = lexer_next_token(bk);

		if(strcmp(t->lexeme, "LOCAL") == 0)
		{
			n->local_job = 1;
		}
		else if(strcmp(t->lexeme, "MAKEFLOW") == 0)
		{
			n->nested_job = 1;
		}
		else
		{
			lexer_report_error(bk, "Parser does not know about modifier: %s.\n", t->lexeme);
		}

		lexer_free_token(t);
	}

	if(!t)
	{
		lexer_report_error(bk, "Malformed command.");
	}

	//Free COMMAND_MOD_END token.
	t = lexer_next_token(bk);
	lexer_free_token(t);

	if(n->nested_job)
	{
		return dag_parse_node_nested_makeflow(bk, n);
	}
	else
	{
		return dag_parse_node_regular_command(bk, n);
	}
}
Esempio n. 4
0
void dag_parse_node_set_command(struct lexer_book *bk, struct dag_node *n, char *command)
{
	struct dag_lookup_set s = { bk->d, bk->category, n, NULL };
	char *local = dag_lookup_str("BATCH_LOCAL", &s);

	if(local) {
		if(string_istrue(local))
			n->local_job = 1;
		free(local);
	}

	n->original_command = xxstrdup(command);
	n->command = translate_command(n, command, n->local_job);
	debug(D_DEBUG, "node command=%s", n->command);
}
Esempio n. 5
0
static void batch_queue_wq_option_update (struct batch_queue *q, const char *what, const char *value)
{
	if(strcmp(what, "password") == 0) {
		if(value)
			work_queue_specify_password(q->data, value);
	} else if(strcmp(what, "master-mode") == 0) {
		if(strcmp(value, "catalog") == 0)
			work_queue_specify_master_mode(q->data, WORK_QUEUE_MASTER_MODE_CATALOG);
		else if(strcmp(value, "standalone") == 0)
			work_queue_specify_master_mode(q->data, WORK_QUEUE_MASTER_MODE_STANDALONE);
	} else if(strcmp(what, "name") == 0) {
		if(value)
			work_queue_specify_name(q->data, value);
	} else if(strcmp(what, "priority") == 0) {
		if(value)
			work_queue_specify_priority(q->data, atoi(value));
		else
			work_queue_specify_priority(q->data, 0);
	} else if(strcmp(what, "fast-abort") == 0) {
		if(value)
			work_queue_activate_fast_abort(q->data, atof(value));
	} else if(strcmp(what, "estimate-capacity") == 0) {
		work_queue_specify_estimate_capacity_on(q->data, string_istrue(value));
	} else if(strcmp(what, "keepalive-interval") == 0) {
		if(value)
			work_queue_specify_keepalive_interval(q->data, atoi(value));
		else
			work_queue_specify_keepalive_interval(q->data, WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL);
	} else if(strcmp(what, "keepalive-timeout") == 0) {
		if(value)
			work_queue_specify_keepalive_timeout(q->data, atoi(value));
		else
			work_queue_specify_keepalive_timeout(q->data, WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT);
	} else if(strcmp(what, "master-preferred-connection") == 0) {
		if(value)
			work_queue_master_preferred_connection(q->data, value);
		else
			work_queue_master_preferred_connection(q->data, "by_ip");
	} else if(strcmp(what, "category-limits") == 0) {
		struct rmsummary *s = rmsummary_parse_string(value);
		if(s) {
			work_queue_specify_category_max_resources(q->data, s->category, s);
			rmsummary_delete(s);
		} else {
			debug(D_NOTICE, "Could no parse '%s' as a summary of resorces encoded in JSON\n", value);
		}
	}
}
static void batch_queue_wq_option_update (struct batch_queue *q, const char *what, const char *value)
{
	if(strcmp(what, "password") == 0) {
		if(value)
			work_queue_specify_password(q->data, value);
	} else if(strcmp(what, "master-mode") == 0) {
		if(strcmp(value, "catalog") == 0)
			work_queue_specify_master_mode(q->data, WORK_QUEUE_MASTER_MODE_CATALOG);
		else if(strcmp(value, "standalone") == 0)
			work_queue_specify_master_mode(q->data, WORK_QUEUE_MASTER_MODE_STANDALONE);
	} else if(strcmp(what, "name") == 0) {
		if(value)
			work_queue_specify_name(q->data, value);
	} else if(strcmp(what, "priority") == 0) {
		if(value)
			work_queue_specify_priority(q->data, atoi(value));
		else
			work_queue_specify_priority(q->data, 0);
	} else if(strcmp(what, "estimate-capacity") == 0) {
		work_queue_specify_estimate_capacity_on(q->data, string_istrue(value));
	} else if(strcmp(what, "keepalive-interval") == 0) {
		if(value)
			work_queue_specify_keepalive_interval(q->data, atoi(value));
		else
			work_queue_specify_keepalive_interval(q->data, WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL);
	} else if(strcmp(what, "keepalive-timeout") == 0) {
		if(value)
			work_queue_specify_keepalive_timeout(q->data, atoi(value));
		else
			work_queue_specify_keepalive_timeout(q->data, WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT);
	} else if(strcmp(what, "wait-queue-size") == 0) {
		if(value)
			work_queue_activate_worker_waiting(q->data, atoi(value));
		else
			work_queue_activate_worker_waiting(q->data, 0);
	} else if(strcmp(what, "master-preferred-connection") == 0) {
		if(value)
			work_queue_master_preferred_connection(q->data, value);
		else
			work_queue_master_preferred_connection(q->data, "by_ip");
	}
}
Esempio n. 7
0
static batch_job_id_t batch_job_condor_submit (struct batch_queue *q, const char *cmd, const char *extra_input_files, const char *extra_output_files, struct jx *envlist, const struct rmsummary *resources )
{
	FILE *file;
	int njobs;
	int jobid;
	const char *options = hash_table_lookup(q->options, "batch-options");

	if(setup_condor_wrapper("condor.sh") < 0) {
		debug(D_BATCH, "could not create condor.sh: %s", strerror(errno));
		return -1;
	}

	if(!string_istrue(hash_table_lookup(q->options, "skip-afs-check"))) {
		char *cwd = path_getcwd();
		if(!strncmp(cwd, "/afs", 4)) {
			debug(D_NOTICE|D_BATCH, "The working directory is '%s':", cwd);
			debug(D_NOTICE|D_BATCH, "This won't work because Condor is not able to write to files in AFS.");
			debug(D_NOTICE|D_BATCH, "Instead, run makeflow from a local disk like /tmp.");
			debug(D_NOTICE|D_BATCH, "Or, use the Work Queue with -T wq and condor_submit_workers.");
			free(cwd);
			exit(EXIT_FAILURE);
		}
		free(cwd);
	}

	file = fopen("condor.submit", "w");
	if(!file) {
		debug(D_BATCH, "could not create condor.submit: %s", strerror(errno));
		return -1;
	}

	fprintf(file, "universe = vanilla\n");
	fprintf(file, "executable = condor.sh\n");
	char *escaped = string_escape_condor(cmd);
	fprintf(file, "arguments = %s\n", escaped);
	free(escaped);
	if(extra_input_files)
		fprintf(file, "transfer_input_files = %s\n", extra_input_files);
	// Note that we do not use transfer_output_files, because that causes the job
	// to get stuck in a system hold if the files are not created.
	fprintf(file, "should_transfer_files = yes\n");
	fprintf(file, "when_to_transfer_output = on_exit\n");
	fprintf(file, "notification = never\n");
	fprintf(file, "copy_to_spool = true\n");
	fprintf(file, "transfer_executable = true\n");
	fprintf(file, "keep_claim_idle = 30\n");
	fprintf(file, "log = %s\n", q->logfile);

	const char *c_req = batch_queue_get_option(q, "condor-requirements");
	char *bexp = blacklisted_expression(q);

	if(c_req && bexp) {
		fprintf(file, "requirements = %s && %s\n", c_req, bexp);
	} else if(c_req) {
		fprintf(file, "requirements = %s\n", c_req);
	} else if(bexp) {
		fprintf(file, "requirements = %s\n", bexp);
	}

	if(bexp)
		free(bexp);

	/*
	Getting environment variables formatted for a condor submit
	file is very hairy, due to some strange quoting rules.
	To avoid problems, we simply export vars to the environment,
	and then tell condor getenv=true, which pulls in the environment.
	*/

	fprintf(file, "getenv = true\n");

	if(envlist) {
		jx_export(envlist);
	}

	if(options)
		fprintf(file, "%s\n", options);

	/* set same deafults as condor_submit_workers */
	int64_t cores  = 1;
	int64_t memory = 1024;
	int64_t disk   = 1024;

	if(resources) {
		cores  = resources->cores  > -1 ? resources->cores  : cores;
		memory = resources->memory > -1 ? resources->memory : memory;
		disk   = resources->disk   > -1 ? resources->disk   : disk;
	}

	/* convert disk to KB */
	disk *= 1024;

	if(batch_queue_get_option(q, "autosize")) {
		fprintf(file, "request_cpus   = ifThenElse(%" PRId64 " > TotalSlotCpus, %" PRId64 ", TotalSlotCpus)\n", cores, cores);
		fprintf(file, "request_memory = ifThenElse(%" PRId64 " > TotalSlotMemory, %" PRId64 ", TotalSlotMemory)\n", memory, memory);
		fprintf(file, "request_disk   = ifThenElse((%" PRId64 ") > TotalSlotDisk, (%" PRId64 "), TotalSlotDisk)\n", disk, disk);
	}
	else {
			fprintf(file, "request_cpus = %" PRId64 "\n", cores);
			fprintf(file, "request_memory = %" PRId64 "\n", memory);
			fprintf(file, "request_disk = %" PRId64 "\n", disk);
	}

	fprintf(file, "queue\n");
	fclose(file);

	file = popen("condor_submit condor.submit", "r");
	if(!file)
		return -1;

	char line[BATCH_JOB_LINE_MAX];
	while(fgets(line, sizeof(line), file)) {
		if(sscanf(line, "%d job(s) submitted to cluster %d", &njobs, &jobid) == 2) {
			pclose(file);
			debug(D_BATCH, "job %d submitted to condor", jobid);
			struct batch_job_info *info;
			info = malloc(sizeof(*info));
			memset(info, 0, sizeof(*info));
			info->submitted = time(0);
			itable_insert(q->job_table, jobid, info);
			return jobid;
		}
	}

	pclose(file);
	debug(D_BATCH, "failed to submit job to condor!");
	return -1;
}