示例#1
0
文件: batch_job.c 项目: pbui/cctools
struct batch_queue *batch_queue_create(batch_queue_type_t type)
{
    int i;
    struct batch_queue *q;

    q = xxmalloc(sizeof(*q));
    q->type = type;
    strncpy(q->logfile, "", sizeof(q->logfile));
    q->options = hash_table_create(0, NULL);
    q->features = hash_table_create(0, NULL);
    q->job_table = itable_create(0);
    q->output_table = itable_create(0);
    q->data = NULL;

    batch_queue_set_feature(q, "local_job_queue", "yes");
    batch_queue_set_feature(q, "batch_log_name", "%s.batchlog");
    batch_queue_set_feature(q, "gc_size", "yes");

    q->module = NULL;
    for (i = 0; batch_queue_modules[i]->type != BATCH_QUEUE_TYPE_UNKNOWN; i++)
        if (batch_queue_modules[i]->type == type)
            q->module = batch_queue_modules[i];
    if (q->module == NULL) {
        batch_queue_delete(q);
        return NULL;
    }

    if(q->module->create(q) == -1) {
        batch_queue_delete(q);
        return NULL;
    }

    debug(D_BATCH, "created queue %p (%s)", q, q->module->typestr);

    return q;
}
示例#2
0
int main(int argc, char *argv[])
{
	batch_queue_type_t batch_queue_type = BATCH_QUEUE_TYPE_UNKNOWN;

	catalog_host = CATALOG_HOST;
	catalog_port = CATALOG_PORT;

	batch_submit_options = getenv("BATCH_OPTIONS");

	debug_config(argv[0]);

	resources = rmsummary_create(-1);

	int c;

	while((c = getopt_long(argc, argv, "B:C:F:N:M:T:t:w:W:E:P:S:cd:o:O:vh", long_options, NULL)) > -1) {
		switch (c) {
			case 'B':
				batch_submit_options = xxstrdup(optarg);
				break;
			case 'C':
				config_file = xxstrdup(optarg);
				break;
			case 'F':
				foremen_regex = xxstrdup(optarg);
				break;
			case 'N':
			case 'M':
				project_regex = xxstrdup(optarg);
				break;
			case 'T':
				batch_queue_type = batch_queue_type_from_string(optarg);
				if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) {
					fprintf(stderr, "unknown batch queue type: %s\n", optarg);
					return EXIT_FAILURE;
				}
				break;
			case 't':
				worker_timeout = atoi(optarg);
				break;
			case 'w':
				workers_min = atoi(optarg);
				break;
			case 'W':
				workers_max = atoi(optarg);
				break;
			case LONG_OPT_WORKERS_PER_CYCLE:
				workers_per_cycle = atoi(optarg);
				break;
			case LONG_OPT_TASKS_PER_WORKER:
				tasks_per_worker = atof(optarg);
				break;
			case 'E':
				extra_worker_args = xxstrdup(optarg);
				break;
			case LONG_OPT_CORES:
				resources->cores = atoi(optarg);
				break;
			case LONG_OPT_AMAZON_CREDENTIALS:
				amazon_credentials = xxstrdup(optarg);
				break;
			case LONG_OPT_AMAZON_AMI:
				amazon_ami = xxstrdup(optarg);
				break;
			case LONG_OPT_MEMORY:
				resources->memory = atoi(optarg);
				break;
			case LONG_OPT_DISK:
				resources->disk = atoi(optarg);
				break;
			case LONG_OPT_GPUS:
				resources->gpus = atoi(optarg);
				break;
			case LONG_OPT_AUTOSIZE:
				autosize = 1;
				break;
			case LONG_OPT_FACTORY_TIMEOUT:
				factory_timeout = MAX(0, atoi(optarg));
				break;
			case LONG_OPT_CONDOR_REQUIREMENTS:
				if(condor_requirements) {
					char *tmp = condor_requirements;
					condor_requirements = string_format("(%s && (%s))", tmp, optarg);
					free(tmp);
				} else {
					condor_requirements = string_format("(%s)", optarg);
				}
				break;
			case LONG_OPT_WRAPPER:
				wrapper_command = optarg;
				break;
			case LONG_OPT_WRAPPER_INPUT:
				if(!wrapper_input) {
					wrapper_input = strdup(optarg);
				} else {
					wrapper_input = string_format("%s,%s",wrapper_input,optarg);
				}
				break;
			case 'P':
				password_file = optarg;
				break;
			case 'S':
				scratch_dir = optarg;
				break;
			case 'c':
				consider_capacity = 1;
				break;
			case 'd':
				debug_flags_set(optarg);
				break;
			case 'o':
				debug_config_file(optarg);
				break;
			case 'O':
				debug_config_file_size(string_metric_parse(optarg));
				break;
			case 'v':
				cctools_version_print(stdout, argv[0]);
				exit(EXIT_SUCCESS);
			case 'h':
				show_help(argv[0]);
				exit(EXIT_SUCCESS);
			default:
				show_help(argv[0]);
				return EXIT_FAILURE;
		}
	}

	if(project_regex) {
		using_catalog = 1;
	}
	else if((argc - optind) == 2) {
		master_host = argv[optind];
		master_port = atoi(argv[optind+1]);
	}
	else {
		fprintf(stderr,"work_queue_factory: You must either give a project name with the -M option or master-name option with a configuration file, or give the master's host and port.\n");
		show_help(argv[0]);
		exit(1);
	}
	

	cctools_version_debug(D_DEBUG, argv[0]);

	if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) {
		fprintf(stderr,"work_queue_factory: You must specify a batch type with the -T option.\n");
		fprintf(stderr, "valid options:\n");
		fprintf(stderr, "%s\n", batch_queue_type_string());
		return 1;
	}

	if(config_file) {
		char abs_path_name[PATH_MAX];

		if(!realpath(config_file, abs_path_name)) {
			fprintf(stderr, "work_queue_factory: could not resolve configuration file path: '%s'.\n", config_file);
			exit(EXIT_FAILURE);
		}

		free(config_file);

		/* From now on, read config_file from absolute path */
		config_file = xxstrdup(abs_path_name);

		if(!read_config_file(config_file)) {
			fprintf(stderr,"work_queue_factory: There were errors in the configuration file: %s\n", config_file);
			return 1;
		}
	}	

	if(workers_min>workers_max) {
		fprintf(stderr,"work_queue_factory: min workers (%d) is greater than max workers (%d)\n",workers_min, workers_max);
		return 1;
	}

	/*
	Careful here: most of the supported batch systems expect
	that jobs are submitting from a single shared filesystem.
	Changing to /tmp only works in the case of Condor.
	*/

	if(!scratch_dir) {
		if(batch_queue_type==BATCH_QUEUE_TYPE_CONDOR) {
			scratch_dir = string_format("/tmp/wq-pool-%d",getuid());
		} else {
			scratch_dir = string_format("wq-pool-%d",getuid());
		}
	}

	if(!create_dir(scratch_dir,0777)) {
		fprintf(stderr,"work_queue_factory: couldn't create %s: %s",scratch_dir,strerror(errno));
		return 1;
	}

	char cmd[1024];
	sprintf(cmd,"cp \"$(which work_queue_worker)\" '%s'",scratch_dir);
	if (system(cmd)) {
		fprintf(stderr, "work_queue_factory: please add work_queue_worker to your PATH.\n");
		exit(EXIT_FAILURE);
	}

	if(password_file) {
		sprintf(cmd,"cp %s %s/pwfile",password_file,scratch_dir);
		system(cmd);
	}

	if(chdir(scratch_dir)!=0) {
		fprintf(stderr,"work_queue_factory: couldn't chdir to %s: %s",scratch_dir,strerror(errno));
		return 1;
	}

	signal(SIGINT, handle_abort);
	signal(SIGQUIT, handle_abort);
	signal(SIGTERM, handle_abort);
	signal(SIGHUP, ignore_signal);

	queue = batch_queue_create(batch_queue_type);
	if(!queue) {
		fprintf(stderr,"work_queue_factory: couldn't establish queue type %s",batch_queue_type_to_string(batch_queue_type));
		return 1;
	}

	batch_queue_set_option(queue, "batch-options", batch_submit_options);
	batch_queue_set_option(queue, "autosize", autosize ? "yes" : NULL);
	set_worker_resources_options( queue );

	if (amazon_credentials != NULL) {
		batch_queue_set_option(queue, "amazon-credentials", amazon_credentials);
	}
	if (amazon_ami != NULL) {
		batch_queue_set_option(queue, "amazon-ami", amazon_ami);
	}

	if(condor_requirements != NULL && batch_queue_type != BATCH_QUEUE_TYPE_CONDOR) {
		debug(D_NOTICE, "condor_requirements will be ignored as workers will not be running in condor.");
	} else {
		batch_queue_set_option(queue, "condor-requirements", condor_requirements);
	}

	mainloop( queue );

	batch_queue_delete(queue);

	return 0;
}
示例#3
0
int main(int argc, char *argv[])
{
	batch_queue_type_t batch_queue_type = BATCH_QUEUE_TYPE_UNKNOWN;

	catalog_host = CATALOG_HOST;
	catalog_port = CATALOG_PORT;

	debug_config(argv[0]);

	int c;

	while((c = getopt_long(argc, argv, "F:N:M:T:t:w:W:E:P:S:cd:o:O:vh", long_options, NULL)) > -1) {
		switch (c) {
			case 'F':
				foremen_regex = optarg;
				break;
			case 'N':
			case 'M':
				project_regex = optarg;
				break;
			case 'T':
				batch_queue_type = batch_queue_type_from_string(optarg);
				if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) {
					fprintf(stderr, "unknown batch queue type: %s\n", optarg);
					return EXIT_FAILURE;
				}
				break;
			case 't':
				worker_timeout = atoi(optarg);
				break;
			case 'w':
				workers_min = atoi(optarg);
				break;
			case 'W':
				workers_max = atoi(optarg);
				break;
			case LONG_OPT_TASKS_PER_WORKER:
				tasks_per_worker = atof(optarg);
				break;
			case 'E':
				extra_worker_args = optarg;
				break;
			case LONG_OPT_CORES:
				num_cores_option = xxstrdup(optarg);
				break;
			case LONG_OPT_MEMORY:
				num_memory_option = xxstrdup(optarg);
				break;
			case LONG_OPT_DISK:
				num_disk_option = xxstrdup(optarg);
				break;
			case LONG_OPT_GPUS:
				num_gpus_option = xxstrdup(optarg);
				break;
			case 'P':
				password_file = optarg;
				break;
			case 'S':
				scratch_dir = optarg;
				break;
			case 'c':
				consider_capacity = 1;
				break;
			case 'd':
				debug_flags_set(optarg);
				break;
			case 'o':
				debug_config_file(optarg);
				break;
			case 'O':
				debug_config_file_size(string_metric_parse(optarg));
				break;
			case 'v':
				cctools_version_print(stdout, argv[0]);
				exit(EXIT_SUCCESS);
			case 'h':
				show_help(argv[0]);
				exit(EXIT_SUCCESS);
			default:
				show_help(argv[0]);
				return EXIT_FAILURE;
		}
	}

	cctools_version_debug(D_DEBUG, argv[0]);

	if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) {
		fprintf(stderr,"work_queue_pool: You must specify a batch type with the -T option.\n");
		fprintf(stderr, "valid options:\n");
		fprintf(stderr, "%s\n", batch_queue_type_string());
		return 1;
	}

	if(!project_regex) {
		fprintf(stderr,"work_queue_pool: You must give a project name with the -M option.\n");
		return 1;
	}

	if(workers_min>workers_max) {
		fprintf(stderr,"work_queue_pool: --min-workers (%d) is greater than --max-workers (%d)\n",workers_min,workers_max);
		return 1;
	}

	if(tasks_per_worker < 1)
	{
		tasks_per_worker = num_cores_option ? atof(num_cores_option) : 1;
	}

	if(!scratch_dir) {
		scratch_dir = string_format("/tmp/wq-pool-%d",getuid());
	}

	if(!create_dir(scratch_dir,0777)) {
		fprintf(stderr,"work_queue_pool: couldn't create %s: %s",scratch_dir,strerror(errno));
		return 1;
	}

	char cmd[1024];
	sprintf(cmd,"cp \"$(which work_queue_worker)\" '%s'",scratch_dir);
	if (system(cmd)) {
		fprintf(stderr, "work_queue_pool: please add work_queue_worker to your PATH.\n");
		exit(EXIT_FAILURE);
	}

	if(password_file) {
		sprintf(cmd,"cp %s %s/pwfile",password_file,scratch_dir);
		system(cmd);
	}

	if(chdir(scratch_dir)!=0) {
		fprintf(stderr,"work_queue_pool: couldn't chdir to %s: %s",scratch_dir,strerror(errno));
		return 1;
	}

	signal(SIGINT, handle_abort);
	signal(SIGQUIT, handle_abort);
	signal(SIGTERM, handle_abort);
	signal(SIGHUP, ignore_signal);

	struct batch_queue * queue = batch_queue_create(batch_queue_type);
	if(!queue) {
		fprintf(stderr,"work_queue_pool: couldn't establish queue type %s",batch_queue_type_to_string(batch_queue_type));
		return 1;
	}

	set_worker_resources( queue );

	mainloop( queue, project_regex, foremen_regex );

	batch_queue_delete(queue);

	return 0;
}
示例#4
0
struct batch_queue *batch_queue_create(batch_queue_type_t type)
{
	struct batch_queue *q;

	if(type == BATCH_QUEUE_TYPE_UNKNOWN)
		return 0;

	q = malloc(sizeof(*q));
	q->type = type;
	q->options_text = 0;
	q->job_table = itable_create(0);
	q->output_table = itable_create(0);

	if(type == BATCH_QUEUE_TYPE_CONDOR)
		q->logfile = strdup("condor.logfile");
	else if(type == BATCH_QUEUE_TYPE_WORK_QUEUE || type == BATCH_QUEUE_TYPE_WORK_QUEUE_SHAREDFS)
		q->logfile = strdup("wq.log");
	else
		q->logfile = NULL;

	if(type == BATCH_QUEUE_TYPE_WORK_QUEUE || type == BATCH_QUEUE_TYPE_WORK_QUEUE_SHAREDFS) {
		q->work_queue = work_queue_create(0);
		if(!q->work_queue) {
			batch_queue_delete(q);
			return 0;
		}
	} else {
		q->work_queue = 0;
	}
	
	if(type == BATCH_QUEUE_TYPE_MPI_QUEUE) {
		q->mpi_queue = mpi_queue_create(0);
		if(!q->mpi_queue) {
			batch_queue_delete(q);
			return 0;
		}
	} else {
		q->mpi_queue = 0;
	}
	
	if(type == BATCH_QUEUE_TYPE_SGE || type == BATCH_QUEUE_TYPE_MOAB || type == BATCH_QUEUE_TYPE_TORQUE || type == BATCH_QUEUE_TYPE_CLUSTER) {
		batch_job_setup_cluster(q);
	}

	if(type == BATCH_QUEUE_TYPE_HADOOP) {
		int fail = 0;

		if(!getenv("HADOOP_HOME")) {
			debug(D_NOTICE, "error: environment variable HADOOP_HOME not set\n");
			fail = 1;
		}
		if(!getenv("HDFS_ROOT_DIR")) {
			debug(D_NOTICE, "error: environment variable HDFS_ROOT_DIR not set\n");
			fail = 1;
		}
		if(!getenv("HADOOP_USER_TMP")) {
			debug(D_NOTICE, "error: environment variable HADOOP_USER_TMP not set\n");
			fail = 1;
		}
		if(!getenv("HADOOP_PARROT_PATH")) {
			/* Note: HADOOP_PARROT_PATH is the path to Parrot on the remote node, not on the local machine. */
			debug(D_NOTICE, "error: environment variable HADOOP_PARROT_PATH not set\n");
			fail = 1;
		}
	
		if(fail) {
			batch_queue_delete(q);
			return 0;
		}
	}

	return q;
}