struct batch_queue *batch_queue_create(batch_queue_type_t type) { int i; struct batch_queue *q; q = xxmalloc(sizeof(*q)); q->type = type; strncpy(q->logfile, "", sizeof(q->logfile)); q->options = hash_table_create(0, NULL); q->features = hash_table_create(0, NULL); q->job_table = itable_create(0); q->output_table = itable_create(0); q->data = NULL; batch_queue_set_feature(q, "local_job_queue", "yes"); batch_queue_set_feature(q, "batch_log_name", "%s.batchlog"); batch_queue_set_feature(q, "gc_size", "yes"); q->module = NULL; for (i = 0; batch_queue_modules[i]->type != BATCH_QUEUE_TYPE_UNKNOWN; i++) if (batch_queue_modules[i]->type == type) q->module = batch_queue_modules[i]; if (q->module == NULL) { batch_queue_delete(q); return NULL; } if(q->module->create(q) == -1) { batch_queue_delete(q); return NULL; } debug(D_BATCH, "created queue %p (%s)", q, q->module->typestr); return q; }
int main(int argc, char *argv[]) { batch_queue_type_t batch_queue_type = BATCH_QUEUE_TYPE_UNKNOWN; catalog_host = CATALOG_HOST; catalog_port = CATALOG_PORT; batch_submit_options = getenv("BATCH_OPTIONS"); debug_config(argv[0]); resources = rmsummary_create(-1); int c; while((c = getopt_long(argc, argv, "B:C:F:N:M:T:t:w:W:E:P:S:cd:o:O:vh", long_options, NULL)) > -1) { switch (c) { case 'B': batch_submit_options = xxstrdup(optarg); break; case 'C': config_file = xxstrdup(optarg); break; case 'F': foremen_regex = xxstrdup(optarg); break; case 'N': case 'M': project_regex = xxstrdup(optarg); break; case 'T': batch_queue_type = batch_queue_type_from_string(optarg); if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) { fprintf(stderr, "unknown batch queue type: %s\n", optarg); return EXIT_FAILURE; } break; case 't': worker_timeout = atoi(optarg); break; case 'w': workers_min = atoi(optarg); break; case 'W': workers_max = atoi(optarg); break; case LONG_OPT_WORKERS_PER_CYCLE: workers_per_cycle = atoi(optarg); break; case LONG_OPT_TASKS_PER_WORKER: tasks_per_worker = atof(optarg); break; case 'E': extra_worker_args = xxstrdup(optarg); break; case LONG_OPT_CORES: resources->cores = atoi(optarg); break; case LONG_OPT_AMAZON_CREDENTIALS: amazon_credentials = xxstrdup(optarg); break; case LONG_OPT_AMAZON_AMI: amazon_ami = xxstrdup(optarg); break; case LONG_OPT_MEMORY: resources->memory = atoi(optarg); break; case LONG_OPT_DISK: resources->disk = atoi(optarg); break; case LONG_OPT_GPUS: resources->gpus = atoi(optarg); break; case LONG_OPT_AUTOSIZE: autosize = 1; break; case LONG_OPT_FACTORY_TIMEOUT: factory_timeout = MAX(0, atoi(optarg)); break; case LONG_OPT_CONDOR_REQUIREMENTS: if(condor_requirements) { char *tmp = condor_requirements; condor_requirements = string_format("(%s && (%s))", tmp, optarg); free(tmp); } else { condor_requirements = string_format("(%s)", optarg); } break; case LONG_OPT_WRAPPER: wrapper_command = optarg; break; case LONG_OPT_WRAPPER_INPUT: if(!wrapper_input) { wrapper_input = strdup(optarg); } else { wrapper_input = string_format("%s,%s",wrapper_input,optarg); } break; case 'P': password_file = optarg; break; case 'S': scratch_dir = optarg; break; case 'c': consider_capacity = 1; break; case 'd': debug_flags_set(optarg); break; case 'o': debug_config_file(optarg); break; case 'O': debug_config_file_size(string_metric_parse(optarg)); break; case 'v': cctools_version_print(stdout, argv[0]); exit(EXIT_SUCCESS); case 'h': show_help(argv[0]); exit(EXIT_SUCCESS); default: show_help(argv[0]); return EXIT_FAILURE; } } if(project_regex) { using_catalog = 1; } else if((argc - optind) == 2) { master_host = argv[optind]; master_port = atoi(argv[optind+1]); } else { fprintf(stderr,"work_queue_factory: You must either give a project name with the -M option or master-name option with a configuration file, or give the master's host and port.\n"); show_help(argv[0]); exit(1); } cctools_version_debug(D_DEBUG, argv[0]); if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) { fprintf(stderr,"work_queue_factory: You must specify a batch type with the -T option.\n"); fprintf(stderr, "valid options:\n"); fprintf(stderr, "%s\n", batch_queue_type_string()); return 1; } if(config_file) { char abs_path_name[PATH_MAX]; if(!realpath(config_file, abs_path_name)) { fprintf(stderr, "work_queue_factory: could not resolve configuration file path: '%s'.\n", config_file); exit(EXIT_FAILURE); } free(config_file); /* From now on, read config_file from absolute path */ config_file = xxstrdup(abs_path_name); if(!read_config_file(config_file)) { fprintf(stderr,"work_queue_factory: There were errors in the configuration file: %s\n", config_file); return 1; } } if(workers_min>workers_max) { fprintf(stderr,"work_queue_factory: min workers (%d) is greater than max workers (%d)\n",workers_min, workers_max); return 1; } /* Careful here: most of the supported batch systems expect that jobs are submitting from a single shared filesystem. Changing to /tmp only works in the case of Condor. */ if(!scratch_dir) { if(batch_queue_type==BATCH_QUEUE_TYPE_CONDOR) { scratch_dir = string_format("/tmp/wq-pool-%d",getuid()); } else { scratch_dir = string_format("wq-pool-%d",getuid()); } } if(!create_dir(scratch_dir,0777)) { fprintf(stderr,"work_queue_factory: couldn't create %s: %s",scratch_dir,strerror(errno)); return 1; } char cmd[1024]; sprintf(cmd,"cp \"$(which work_queue_worker)\" '%s'",scratch_dir); if (system(cmd)) { fprintf(stderr, "work_queue_factory: please add work_queue_worker to your PATH.\n"); exit(EXIT_FAILURE); } if(password_file) { sprintf(cmd,"cp %s %s/pwfile",password_file,scratch_dir); system(cmd); } if(chdir(scratch_dir)!=0) { fprintf(stderr,"work_queue_factory: couldn't chdir to %s: %s",scratch_dir,strerror(errno)); return 1; } signal(SIGINT, handle_abort); signal(SIGQUIT, handle_abort); signal(SIGTERM, handle_abort); signal(SIGHUP, ignore_signal); queue = batch_queue_create(batch_queue_type); if(!queue) { fprintf(stderr,"work_queue_factory: couldn't establish queue type %s",batch_queue_type_to_string(batch_queue_type)); return 1; } batch_queue_set_option(queue, "batch-options", batch_submit_options); batch_queue_set_option(queue, "autosize", autosize ? "yes" : NULL); set_worker_resources_options( queue ); if (amazon_credentials != NULL) { batch_queue_set_option(queue, "amazon-credentials", amazon_credentials); } if (amazon_ami != NULL) { batch_queue_set_option(queue, "amazon-ami", amazon_ami); } if(condor_requirements != NULL && batch_queue_type != BATCH_QUEUE_TYPE_CONDOR) { debug(D_NOTICE, "condor_requirements will be ignored as workers will not be running in condor."); } else { batch_queue_set_option(queue, "condor-requirements", condor_requirements); } mainloop( queue ); batch_queue_delete(queue); return 0; }
int main(int argc, char *argv[]) { batch_queue_type_t batch_queue_type = BATCH_QUEUE_TYPE_UNKNOWN; catalog_host = CATALOG_HOST; catalog_port = CATALOG_PORT; debug_config(argv[0]); int c; while((c = getopt_long(argc, argv, "F:N:M:T:t:w:W:E:P:S:cd:o:O:vh", long_options, NULL)) > -1) { switch (c) { case 'F': foremen_regex = optarg; break; case 'N': case 'M': project_regex = optarg; break; case 'T': batch_queue_type = batch_queue_type_from_string(optarg); if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) { fprintf(stderr, "unknown batch queue type: %s\n", optarg); return EXIT_FAILURE; } break; case 't': worker_timeout = atoi(optarg); break; case 'w': workers_min = atoi(optarg); break; case 'W': workers_max = atoi(optarg); break; case LONG_OPT_TASKS_PER_WORKER: tasks_per_worker = atof(optarg); break; case 'E': extra_worker_args = optarg; break; case LONG_OPT_CORES: num_cores_option = xxstrdup(optarg); break; case LONG_OPT_MEMORY: num_memory_option = xxstrdup(optarg); break; case LONG_OPT_DISK: num_disk_option = xxstrdup(optarg); break; case LONG_OPT_GPUS: num_gpus_option = xxstrdup(optarg); break; case 'P': password_file = optarg; break; case 'S': scratch_dir = optarg; break; case 'c': consider_capacity = 1; break; case 'd': debug_flags_set(optarg); break; case 'o': debug_config_file(optarg); break; case 'O': debug_config_file_size(string_metric_parse(optarg)); break; case 'v': cctools_version_print(stdout, argv[0]); exit(EXIT_SUCCESS); case 'h': show_help(argv[0]); exit(EXIT_SUCCESS); default: show_help(argv[0]); return EXIT_FAILURE; } } cctools_version_debug(D_DEBUG, argv[0]); if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) { fprintf(stderr,"work_queue_pool: You must specify a batch type with the -T option.\n"); fprintf(stderr, "valid options:\n"); fprintf(stderr, "%s\n", batch_queue_type_string()); return 1; } if(!project_regex) { fprintf(stderr,"work_queue_pool: You must give a project name with the -M option.\n"); return 1; } if(workers_min>workers_max) { fprintf(stderr,"work_queue_pool: --min-workers (%d) is greater than --max-workers (%d)\n",workers_min,workers_max); return 1; } if(tasks_per_worker < 1) { tasks_per_worker = num_cores_option ? atof(num_cores_option) : 1; } if(!scratch_dir) { scratch_dir = string_format("/tmp/wq-pool-%d",getuid()); } if(!create_dir(scratch_dir,0777)) { fprintf(stderr,"work_queue_pool: couldn't create %s: %s",scratch_dir,strerror(errno)); return 1; } char cmd[1024]; sprintf(cmd,"cp \"$(which work_queue_worker)\" '%s'",scratch_dir); if (system(cmd)) { fprintf(stderr, "work_queue_pool: please add work_queue_worker to your PATH.\n"); exit(EXIT_FAILURE); } if(password_file) { sprintf(cmd,"cp %s %s/pwfile",password_file,scratch_dir); system(cmd); } if(chdir(scratch_dir)!=0) { fprintf(stderr,"work_queue_pool: couldn't chdir to %s: %s",scratch_dir,strerror(errno)); return 1; } signal(SIGINT, handle_abort); signal(SIGQUIT, handle_abort); signal(SIGTERM, handle_abort); signal(SIGHUP, ignore_signal); struct batch_queue * queue = batch_queue_create(batch_queue_type); if(!queue) { fprintf(stderr,"work_queue_pool: couldn't establish queue type %s",batch_queue_type_to_string(batch_queue_type)); return 1; } set_worker_resources( queue ); mainloop( queue, project_regex, foremen_regex ); batch_queue_delete(queue); return 0; }
struct batch_queue *batch_queue_create(batch_queue_type_t type) { struct batch_queue *q; if(type == BATCH_QUEUE_TYPE_UNKNOWN) return 0; q = malloc(sizeof(*q)); q->type = type; q->options_text = 0; q->job_table = itable_create(0); q->output_table = itable_create(0); if(type == BATCH_QUEUE_TYPE_CONDOR) q->logfile = strdup("condor.logfile"); else if(type == BATCH_QUEUE_TYPE_WORK_QUEUE || type == BATCH_QUEUE_TYPE_WORK_QUEUE_SHAREDFS) q->logfile = strdup("wq.log"); else q->logfile = NULL; if(type == BATCH_QUEUE_TYPE_WORK_QUEUE || type == BATCH_QUEUE_TYPE_WORK_QUEUE_SHAREDFS) { q->work_queue = work_queue_create(0); if(!q->work_queue) { batch_queue_delete(q); return 0; } } else { q->work_queue = 0; } if(type == BATCH_QUEUE_TYPE_MPI_QUEUE) { q->mpi_queue = mpi_queue_create(0); if(!q->mpi_queue) { batch_queue_delete(q); return 0; } } else { q->mpi_queue = 0; } if(type == BATCH_QUEUE_TYPE_SGE || type == BATCH_QUEUE_TYPE_MOAB || type == BATCH_QUEUE_TYPE_TORQUE || type == BATCH_QUEUE_TYPE_CLUSTER) { batch_job_setup_cluster(q); } if(type == BATCH_QUEUE_TYPE_HADOOP) { int fail = 0; if(!getenv("HADOOP_HOME")) { debug(D_NOTICE, "error: environment variable HADOOP_HOME not set\n"); fail = 1; } if(!getenv("HDFS_ROOT_DIR")) { debug(D_NOTICE, "error: environment variable HDFS_ROOT_DIR not set\n"); fail = 1; } if(!getenv("HADOOP_USER_TMP")) { debug(D_NOTICE, "error: environment variable HADOOP_USER_TMP not set\n"); fail = 1; } if(!getenv("HADOOP_PARROT_PATH")) { /* Note: HADOOP_PARROT_PATH is the path to Parrot on the remote node, not on the local machine. */ debug(D_NOTICE, "error: environment variable HADOOP_PARROT_PATH not set\n"); fail = 1; } if(fail) { batch_queue_delete(q); return 0; } } return q; }