Beispiel #1
0
static void makeflow_node_export_variables( struct dag *d, struct dag_node *n )
{
	struct jx *j = dag_node_env_create(d,n);
	if(j) {
		jx_export(j);
		jx_delete(j);
	}
}
Beispiel #2
0
static batch_job_id_t batch_job_condor_submit (struct batch_queue *q, const char *cmd, const char *extra_input_files, const char *extra_output_files, struct jx *envlist, const struct rmsummary *resources )
{
	FILE *file;
	int njobs;
	int jobid;
	const char *options = hash_table_lookup(q->options, "batch-options");

	if(setup_condor_wrapper("condor.sh") < 0) {
		debug(D_BATCH, "could not create condor.sh: %s", strerror(errno));
		return -1;
	}

	if(!string_istrue(hash_table_lookup(q->options, "skip-afs-check"))) {
		char *cwd = path_getcwd();
		if(!strncmp(cwd, "/afs", 4)) {
			debug(D_NOTICE|D_BATCH, "The working directory is '%s':", cwd);
			debug(D_NOTICE|D_BATCH, "This won't work because Condor is not able to write to files in AFS.");
			debug(D_NOTICE|D_BATCH, "Instead, run makeflow from a local disk like /tmp.");
			debug(D_NOTICE|D_BATCH, "Or, use the Work Queue with -T wq and condor_submit_workers.");
			free(cwd);
			exit(EXIT_FAILURE);
		}
		free(cwd);
	}

	file = fopen("condor.submit", "w");
	if(!file) {
		debug(D_BATCH, "could not create condor.submit: %s", strerror(errno));
		return -1;
	}

	fprintf(file, "universe = vanilla\n");
	fprintf(file, "executable = condor.sh\n");
	char *escaped = string_escape_condor(cmd);
	fprintf(file, "arguments = %s\n", escaped);
	free(escaped);
	if(extra_input_files)
		fprintf(file, "transfer_input_files = %s\n", extra_input_files);
	// Note that we do not use transfer_output_files, because that causes the job
	// to get stuck in a system hold if the files are not created.
	fprintf(file, "should_transfer_files = yes\n");
	fprintf(file, "when_to_transfer_output = on_exit\n");
	fprintf(file, "notification = never\n");
	fprintf(file, "copy_to_spool = true\n");
	fprintf(file, "transfer_executable = true\n");
	fprintf(file, "keep_claim_idle = 30\n");
	fprintf(file, "log = %s\n", q->logfile);

	const char *c_req = batch_queue_get_option(q, "condor-requirements");
	char *bexp = blacklisted_expression(q);

	if(c_req && bexp) {
		fprintf(file, "requirements = %s && %s\n", c_req, bexp);
	} else if(c_req) {
		fprintf(file, "requirements = %s\n", c_req);
	} else if(bexp) {
		fprintf(file, "requirements = %s\n", bexp);
	}

	if(bexp)
		free(bexp);

	/*
	Getting environment variables formatted for a condor submit
	file is very hairy, due to some strange quoting rules.
	To avoid problems, we simply export vars to the environment,
	and then tell condor getenv=true, which pulls in the environment.
	*/

	fprintf(file, "getenv = true\n");

	if(envlist) {
		jx_export(envlist);
	}

	if(options)
		fprintf(file, "%s\n", options);

	/* set same deafults as condor_submit_workers */
	int64_t cores  = 1;
	int64_t memory = 1024;
	int64_t disk   = 1024;

	if(resources) {
		cores  = resources->cores  > -1 ? resources->cores  : cores;
		memory = resources->memory > -1 ? resources->memory : memory;
		disk   = resources->disk   > -1 ? resources->disk   : disk;
	}

	/* convert disk to KB */
	disk *= 1024;

	if(batch_queue_get_option(q, "autosize")) {
		fprintf(file, "request_cpus   = ifThenElse(%" PRId64 " > TotalSlotCpus, %" PRId64 ", TotalSlotCpus)\n", cores, cores);
		fprintf(file, "request_memory = ifThenElse(%" PRId64 " > TotalSlotMemory, %" PRId64 ", TotalSlotMemory)\n", memory, memory);
		fprintf(file, "request_disk   = ifThenElse((%" PRId64 ") > TotalSlotDisk, (%" PRId64 "), TotalSlotDisk)\n", disk, disk);
	}
	else {
			fprintf(file, "request_cpus = %" PRId64 "\n", cores);
			fprintf(file, "request_memory = %" PRId64 "\n", memory);
			fprintf(file, "request_disk = %" PRId64 "\n", disk);
	}

	fprintf(file, "queue\n");
	fclose(file);

	file = popen("condor_submit condor.submit", "r");
	if(!file)
		return -1;

	char line[BATCH_JOB_LINE_MAX];
	while(fgets(line, sizeof(line), file)) {
		if(sscanf(line, "%d job(s) submitted to cluster %d", &njobs, &jobid) == 2) {
			pclose(file);
			debug(D_BATCH, "job %d submitted to condor", jobid);
			struct batch_job_info *info;
			info = malloc(sizeof(*info));
			memset(info, 0, sizeof(*info));
			info->submitted = time(0);
			itable_insert(q->job_table, jobid, info);
			return jobid;
		}
	}

	pclose(file);
	debug(D_BATCH, "failed to submit job to condor!");
	return -1;
}
static batch_job_id_t batch_job_cluster_submit (struct batch_queue * q, const char *cmd, const char *extra_input_files, const char *extra_output_files, struct jx *envlist, const struct rmsummary *resources )
{
	batch_job_id_t jobid;
	struct batch_job_info *info;
	const char *options = hash_table_lookup(q->options, "batch-options");

	if(!setup_batch_wrapper(q, cluster_name)) {
		debug(D_NOTICE|D_BATCH,"couldn't setup wrapper file: %s",strerror(errno));
		return 0;
	}

	/* Use the first word in the command line as a name for the job. */

	char *name = xxstrdup(cmd);
	{
		char *s = strchr(name, ' ');
		if(s)
			*s = 0;
	}

	/*
	Experiment shows that passing environment variables
	through the command-line doesn't work, due to multiple
	levels of quote interpretation.  So, we export all
	variables into the environment, and rely upon the -V
	option to load the environment into the job.
	*/

	if(envlist) {
		jx_export(envlist);
	}

	/*
	Pass the command to run through the environment as well.
	*/
	setenv("BATCH_JOB_COMMAND", cmd, 1);

	char *command = string_format("%s %s %s '%s' %s %s.wrapper",
		cluster_submit_cmd,
		cluster_options,
		cluster_jobname_var,
		path_basename(name),
		options ? options : "",
		cluster_name);

	free(name);

	debug(D_BATCH, "%s", command);

	FILE *file = popen(command, "r");
	free(command);
	if(!file) {
		debug(D_BATCH, "couldn't submit job: %s", strerror(errno));
		return -1;
	}

	char line[BATCH_JOB_LINE_MAX] = "";
	while(fgets(line, sizeof(line), file)) {
		if(sscanf(line, "Your job %" SCNbjid, &jobid) == 1
		|| sscanf(line, "Submitted batch job %" SCNbjid, &jobid) == 1
		|| sscanf(line, "%" SCNbjid, &jobid) == 1 ) {
			debug(D_BATCH, "job %" PRIbjid " submitted", jobid);
			pclose(file);
			info = malloc(sizeof(*info));
			memset(info, 0, sizeof(*info));
			info->submitted = time(0);
			itable_insert(q->job_table, jobid, info);
			return jobid;
		}
	}

	if(strlen(line)) {
		debug(D_NOTICE, "job submission failed: %s", line);
	} else {
		debug(D_NOTICE, "job submission failed: no output from %s", cluster_name);
	}
	pclose(file);
	return -1;
}