int makeflow_catalog_summary(struct dag* d, char* name, batch_queue_type_t type, timestamp_t start){ struct dag_node *n; dag_node_state_t state; int tasks_completed = 0; int tasks_aborted = 0; int tasks_waiting = 0; int tasks_running = 0; int tasks_failed = 0; for (n = d->nodes; n; n = n->next) { state = n->state; if (state == DAG_NODE_STATE_FAILED) tasks_failed++; else if (state == DAG_NODE_STATE_ABORTED) tasks_aborted++; else if (state == DAG_NODE_STATE_COMPLETE) tasks_completed++; else if(state == DAG_NODE_STATE_RUNNING) tasks_running++; else if(state == DAG_NODE_STATE_WAITING) tasks_waiting++; } //transmit report here char* host = CATALOG_HOST; char username[USERNAME_MAX]; username_get(username); const char* batch_type = batch_queue_type_to_string(type); struct jx *j = jx_object(0); jx_insert_string(j,"type","makeflow"); jx_insert_integer(j,"total",itable_size(d->node_table)); jx_insert_integer(j,"running",tasks_running); jx_insert_integer(j,"waiting",tasks_waiting); jx_insert_integer(j,"aborted",tasks_aborted); jx_insert_integer(j,"completed",tasks_completed); jx_insert_integer(j,"failed",tasks_failed); jx_insert_string(j,"project",name); jx_insert_string(j,"owner",username); char* timestring = string_format("%" PRIu64 "", start); jx_insert_string(j,"time_started",timestring); jx_insert_string(j,"batch_type",batch_type); //creates memory char* text = jx_print_string(j); int resp = catalog_query_send_update(host, text); free(text); free(timestring); jx_delete(j); return resp;//all good }
static int batch_queue_cluster_create (struct batch_queue *q) { if(cluster_name) free(cluster_name); if(cluster_submit_cmd) free(cluster_submit_cmd); if(cluster_remove_cmd) free(cluster_remove_cmd); if(cluster_options) free(cluster_options); cluster_name = cluster_submit_cmd = cluster_remove_cmd = cluster_options = NULL; switch(q->type) { case BATCH_QUEUE_TYPE_SGE: cluster_name = strdup("sge"); cluster_submit_cmd = strdup("qsub"); cluster_remove_cmd = strdup("qdel"); cluster_options = strdup("-cwd -o /dev/null -j y -V"); break; case BATCH_QUEUE_TYPE_MOAB: cluster_name = strdup("moab"); cluster_submit_cmd = strdup("msub"); cluster_remove_cmd = strdup("mdel"); cluster_options = strdup("-d . -o /dev/null -j oe -V"); break; case BATCH_QUEUE_TYPE_TORQUE: cluster_name = strdup("torque"); cluster_submit_cmd = strdup("qsub"); cluster_remove_cmd = strdup("qdel"); cluster_options = strdup("-d . -o /dev/null -j oe -V"); break; case BATCH_QUEUE_TYPE_CLUSTER: cluster_name = getenv("BATCH_QUEUE_CLUSTER_NAME"); cluster_submit_cmd = getenv("BATCH_QUEUE_CLUSTER_SUBMIT_COMMAND"); cluster_remove_cmd = getenv("BATCH_QUEUE_CLUSTER_REMOVE_COMMAND"); cluster_options = getenv("BATCH_QUEUE_CLUSTER_SUBMIT_OPTIONS"); break; default: debug(D_BATCH, "Invalid cluster type: %s\n", batch_queue_type_to_string(q->type)); return -1; } if(cluster_name && cluster_submit_cmd && cluster_remove_cmd && cluster_options) return 0; if(!cluster_name) debug(D_NOTICE, "Environment variable BATCH_QUEUE_CLUSTER_NAME unset\n"); if(!cluster_submit_cmd) debug(D_NOTICE, "Environment variable BATCH_QUEUE_CLUSTER_SUBMIT_COMMAND unset\n"); if(!cluster_remove_cmd) debug(D_NOTICE, "Environment variable BATCH_QUEUE_CLUSTER_REMOVE_COMMAND unset\n"); if(!cluster_options) debug(D_NOTICE, "Environment variable BATCH_QUEUE_CLUSTER_SUBMIT_OPTIONS unset\n"); return -1; }
int main(int argc, char *argv[]) { batch_queue_type_t batch_queue_type = BATCH_QUEUE_TYPE_UNKNOWN; catalog_host = CATALOG_HOST; catalog_port = CATALOG_PORT; batch_submit_options = getenv("BATCH_OPTIONS"); debug_config(argv[0]); resources = rmsummary_create(-1); int c; while((c = getopt_long(argc, argv, "B:C:F:N:M:T:t:w:W:E:P:S:cd:o:O:vh", long_options, NULL)) > -1) { switch (c) { case 'B': batch_submit_options = xxstrdup(optarg); break; case 'C': config_file = xxstrdup(optarg); break; case 'F': foremen_regex = xxstrdup(optarg); break; case 'N': case 'M': project_regex = xxstrdup(optarg); break; case 'T': batch_queue_type = batch_queue_type_from_string(optarg); if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) { fprintf(stderr, "unknown batch queue type: %s\n", optarg); return EXIT_FAILURE; } break; case 't': worker_timeout = atoi(optarg); break; case 'w': workers_min = atoi(optarg); break; case 'W': workers_max = atoi(optarg); break; case LONG_OPT_WORKERS_PER_CYCLE: workers_per_cycle = atoi(optarg); break; case LONG_OPT_TASKS_PER_WORKER: tasks_per_worker = atof(optarg); break; case 'E': extra_worker_args = xxstrdup(optarg); break; case LONG_OPT_CORES: resources->cores = atoi(optarg); break; case LONG_OPT_AMAZON_CREDENTIALS: amazon_credentials = xxstrdup(optarg); break; case LONG_OPT_AMAZON_AMI: amazon_ami = xxstrdup(optarg); break; case LONG_OPT_MEMORY: resources->memory = atoi(optarg); break; case LONG_OPT_DISK: resources->disk = atoi(optarg); break; case LONG_OPT_GPUS: resources->gpus = atoi(optarg); break; case LONG_OPT_AUTOSIZE: autosize = 1; break; case LONG_OPT_FACTORY_TIMEOUT: factory_timeout = MAX(0, atoi(optarg)); break; case LONG_OPT_CONDOR_REQUIREMENTS: if(condor_requirements) { char *tmp = condor_requirements; condor_requirements = string_format("(%s && (%s))", tmp, optarg); free(tmp); } else { condor_requirements = string_format("(%s)", optarg); } break; case LONG_OPT_WRAPPER: wrapper_command = optarg; break; case LONG_OPT_WRAPPER_INPUT: if(!wrapper_input) { wrapper_input = strdup(optarg); } else { wrapper_input = string_format("%s,%s",wrapper_input,optarg); } break; case 'P': password_file = optarg; break; case 'S': scratch_dir = optarg; break; case 'c': consider_capacity = 1; break; case 'd': debug_flags_set(optarg); break; case 'o': debug_config_file(optarg); break; case 'O': debug_config_file_size(string_metric_parse(optarg)); break; case 'v': cctools_version_print(stdout, argv[0]); exit(EXIT_SUCCESS); case 'h': show_help(argv[0]); exit(EXIT_SUCCESS); default: show_help(argv[0]); return EXIT_FAILURE; } } if(project_regex) { using_catalog = 1; } else if((argc - optind) == 2) { master_host = argv[optind]; master_port = atoi(argv[optind+1]); } else { fprintf(stderr,"work_queue_factory: You must either give a project name with the -M option or master-name option with a configuration file, or give the master's host and port.\n"); show_help(argv[0]); exit(1); } cctools_version_debug(D_DEBUG, argv[0]); if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) { fprintf(stderr,"work_queue_factory: You must specify a batch type with the -T option.\n"); fprintf(stderr, "valid options:\n"); fprintf(stderr, "%s\n", batch_queue_type_string()); return 1; } if(config_file) { char abs_path_name[PATH_MAX]; if(!realpath(config_file, abs_path_name)) { fprintf(stderr, "work_queue_factory: could not resolve configuration file path: '%s'.\n", config_file); exit(EXIT_FAILURE); } free(config_file); /* From now on, read config_file from absolute path */ config_file = xxstrdup(abs_path_name); if(!read_config_file(config_file)) { fprintf(stderr,"work_queue_factory: There were errors in the configuration file: %s\n", config_file); return 1; } } if(workers_min>workers_max) { fprintf(stderr,"work_queue_factory: min workers (%d) is greater than max workers (%d)\n",workers_min, workers_max); return 1; } /* Careful here: most of the supported batch systems expect that jobs are submitting from a single shared filesystem. Changing to /tmp only works in the case of Condor. */ if(!scratch_dir) { if(batch_queue_type==BATCH_QUEUE_TYPE_CONDOR) { scratch_dir = string_format("/tmp/wq-pool-%d",getuid()); } else { scratch_dir = string_format("wq-pool-%d",getuid()); } } if(!create_dir(scratch_dir,0777)) { fprintf(stderr,"work_queue_factory: couldn't create %s: %s",scratch_dir,strerror(errno)); return 1; } char cmd[1024]; sprintf(cmd,"cp \"$(which work_queue_worker)\" '%s'",scratch_dir); if (system(cmd)) { fprintf(stderr, "work_queue_factory: please add work_queue_worker to your PATH.\n"); exit(EXIT_FAILURE); } if(password_file) { sprintf(cmd,"cp %s %s/pwfile",password_file,scratch_dir); system(cmd); } if(chdir(scratch_dir)!=0) { fprintf(stderr,"work_queue_factory: couldn't chdir to %s: %s",scratch_dir,strerror(errno)); return 1; } signal(SIGINT, handle_abort); signal(SIGQUIT, handle_abort); signal(SIGTERM, handle_abort); signal(SIGHUP, ignore_signal); queue = batch_queue_create(batch_queue_type); if(!queue) { fprintf(stderr,"work_queue_factory: couldn't establish queue type %s",batch_queue_type_to_string(batch_queue_type)); return 1; } batch_queue_set_option(queue, "batch-options", batch_submit_options); batch_queue_set_option(queue, "autosize", autosize ? "yes" : NULL); set_worker_resources_options( queue ); if (amazon_credentials != NULL) { batch_queue_set_option(queue, "amazon-credentials", amazon_credentials); } if (amazon_ami != NULL) { batch_queue_set_option(queue, "amazon-ami", amazon_ami); } if(condor_requirements != NULL && batch_queue_type != BATCH_QUEUE_TYPE_CONDOR) { debug(D_NOTICE, "condor_requirements will be ignored as workers will not be running in condor."); } else { batch_queue_set_option(queue, "condor-requirements", condor_requirements); } mainloop( queue ); batch_queue_delete(queue); return 0; }
int main(int argc, char *argv[]) { batch_queue_type_t batch_queue_type = BATCH_QUEUE_TYPE_UNKNOWN; catalog_host = CATALOG_HOST; catalog_port = CATALOG_PORT; debug_config(argv[0]); int c; while((c = getopt_long(argc, argv, "F:N:M:T:t:w:W:E:P:S:cd:o:O:vh", long_options, NULL)) > -1) { switch (c) { case 'F': foremen_regex = optarg; break; case 'N': case 'M': project_regex = optarg; break; case 'T': batch_queue_type = batch_queue_type_from_string(optarg); if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) { fprintf(stderr, "unknown batch queue type: %s\n", optarg); return EXIT_FAILURE; } break; case 't': worker_timeout = atoi(optarg); break; case 'w': workers_min = atoi(optarg); break; case 'W': workers_max = atoi(optarg); break; case LONG_OPT_TASKS_PER_WORKER: tasks_per_worker = atof(optarg); break; case 'E': extra_worker_args = optarg; break; case LONG_OPT_CORES: num_cores_option = xxstrdup(optarg); break; case LONG_OPT_MEMORY: num_memory_option = xxstrdup(optarg); break; case LONG_OPT_DISK: num_disk_option = xxstrdup(optarg); break; case LONG_OPT_GPUS: num_gpus_option = xxstrdup(optarg); break; case 'P': password_file = optarg; break; case 'S': scratch_dir = optarg; break; case 'c': consider_capacity = 1; break; case 'd': debug_flags_set(optarg); break; case 'o': debug_config_file(optarg); break; case 'O': debug_config_file_size(string_metric_parse(optarg)); break; case 'v': cctools_version_print(stdout, argv[0]); exit(EXIT_SUCCESS); case 'h': show_help(argv[0]); exit(EXIT_SUCCESS); default: show_help(argv[0]); return EXIT_FAILURE; } } cctools_version_debug(D_DEBUG, argv[0]); if(batch_queue_type == BATCH_QUEUE_TYPE_UNKNOWN) { fprintf(stderr,"work_queue_pool: You must specify a batch type with the -T option.\n"); fprintf(stderr, "valid options:\n"); fprintf(stderr, "%s\n", batch_queue_type_string()); return 1; } if(!project_regex) { fprintf(stderr,"work_queue_pool: You must give a project name with the -M option.\n"); return 1; } if(workers_min>workers_max) { fprintf(stderr,"work_queue_pool: --min-workers (%d) is greater than --max-workers (%d)\n",workers_min,workers_max); return 1; } if(tasks_per_worker < 1) { tasks_per_worker = num_cores_option ? atof(num_cores_option) : 1; } if(!scratch_dir) { scratch_dir = string_format("/tmp/wq-pool-%d",getuid()); } if(!create_dir(scratch_dir,0777)) { fprintf(stderr,"work_queue_pool: couldn't create %s: %s",scratch_dir,strerror(errno)); return 1; } char cmd[1024]; sprintf(cmd,"cp \"$(which work_queue_worker)\" '%s'",scratch_dir); if (system(cmd)) { fprintf(stderr, "work_queue_pool: please add work_queue_worker to your PATH.\n"); exit(EXIT_FAILURE); } if(password_file) { sprintf(cmd,"cp %s %s/pwfile",password_file,scratch_dir); system(cmd); } if(chdir(scratch_dir)!=0) { fprintf(stderr,"work_queue_pool: couldn't chdir to %s: %s",scratch_dir,strerror(errno)); return 1; } signal(SIGINT, handle_abort); signal(SIGQUIT, handle_abort); signal(SIGTERM, handle_abort); signal(SIGHUP, ignore_signal); struct batch_queue * queue = batch_queue_create(batch_queue_type); if(!queue) { fprintf(stderr,"work_queue_pool: couldn't establish queue type %s",batch_queue_type_to_string(batch_queue_type)); return 1; } set_worker_resources( queue ); mainloop( queue, project_regex, foremen_regex ); batch_queue_delete(queue); return 0; }
static void show_help(const char *cmd) { fprintf(stdout, "Use: %s [options] <command> <xsize> <ysize>\n", cmd); fprintf(stdout, "where options are:\n"); fprintf(stdout, " %-30s Show this help screen\n", "-h,--help"); fprintf(stdout, " %-30s Show version string\n", "-v,--version"); fprintf(stdout, " %-30s Enable debugging for this subsystem. (Try -d all to start.)\n", "-d,--debug=<subsystem>"); fprintf(stdout, " %-30s Automatically choose between multicore and batch mode.\n", "-A,--auto"); fprintf(stdout, " %-30s Save progress image to this file.\n", "-i,--bitmap=<file.bmp>"); fprintf(stdout, " %-30s Save progress log to this file.\n", "-l,--log-file=<file>"); fprintf(stdout, " %-30s Manually set the block size for batch mode.\n", "-b,--block-size=<size>"); fprintf(stdout, " %-30s Run the whole problem locally in multicore mode. (default)\n", "-L,--multicore"); fprintf(stdout, " %-30s Manually set the number of process to run at once.\n", "-n,--jobs=<njobs>"); fprintf(stdout, " %-30s Send debugging to this file. (can also be :stderr, :stdout, :syslog, or :journal)\n", "-o,--debug-file=<file>"); fprintf(stdout, " %-30s Interval between image writes, in seconds. (default=%d)\n", "-t,--bitmap-interval=<secs>",progress_bitmap_interval); fprintf(stdout, " %-30s Run in distributed mode with <type> batch system: (default=%s)\n", "-T,--batch-type=<type>", batch_queue_type_to_string(batch_system_type)); fprintf(stdout, " %-30s %s\n", "", batch_queue_type_string()); fprintf(stdout, " %-30s Verify mode: check the configuration and then exit.\n", "-V,--verify"); }