static batch_job_id_t batch_job_wq_submit (struct batch_queue * q, const char *cmd, const char *extra_input_files, const char *extra_output_files, struct jx *envlist, const struct rmsummary *resources) { struct work_queue_task *t; int caching_flag = WORK_QUEUE_CACHE; if(string_istrue(hash_table_lookup(q->options, "caching"))) { caching_flag = WORK_QUEUE_CACHE; } else { caching_flag = WORK_QUEUE_NOCACHE; } t = work_queue_task_create(cmd); specify_files(t, extra_input_files, extra_output_files, caching_flag); specify_envlist(t,envlist); if(envlist) { const char *category = jx_lookup_string(envlist, "CATEGORY"); if(category) { work_queue_task_specify_category(t, category); } } if(resources) { work_queue_task_specify_resources(t, resources); } work_queue_submit(q->data, t); return t->taskid; }
static batch_job_id_t batch_job_wq_submit (struct batch_queue * q, const char *cmd, const char *extra_input_files, const char *extra_output_files, struct nvpair *envlist ) { struct work_queue_task *t; int caching_flag = WORK_QUEUE_CACHE; if(string_istrue(hash_table_lookup(q->options, "caching"))) { caching_flag = WORK_QUEUE_CACHE; } else { caching_flag = WORK_QUEUE_NOCACHE; } t = work_queue_task_create(cmd); specify_files(t, extra_input_files, extra_output_files, caching_flag); specify_envlist(t,envlist); struct rmsummary *resources = parse_batch_options_resources(hash_table_lookup(q->options, "batch-options")); if(resources) { work_queue_task_specify_resources(t, resources); free(resources); } work_queue_submit(q->data, t); return t->taskid; }
static int dag_parse_node_command(struct lexer *bk, struct dag_node *n) { struct token *t; //Jump COMMAND token. t = lexer_next_token(bk); lexer_free_token(t); char *local = dag_variable_lookup_string("BATCH_LOCAL", bk->environment); if(local) { if(string_istrue(local)) n->local_job = 1; free(local); } /* Read command modifiers. */ while((t = lexer_peek_next_token(bk)) && t->type != TOKEN_COMMAND_MOD_END) { t = lexer_next_token(bk); if(strcmp(t->lexeme, "LOCAL") == 0) { n->local_job = 1; } else if(strcmp(t->lexeme, "MAKEFLOW") == 0) { n->nested_job = 1; } else { lexer_report_error(bk, "Parser does not know about modifier: %s.\n", t->lexeme); } lexer_free_token(t); } if(!t) { lexer_report_error(bk, "Malformed command."); } //Free COMMAND_MOD_END token. t = lexer_next_token(bk); lexer_free_token(t); if(n->nested_job) { return dag_parse_node_nested_makeflow(bk, n); } else { return dag_parse_node_regular_command(bk, n); } }
void dag_parse_node_set_command(struct lexer_book *bk, struct dag_node *n, char *command) { struct dag_lookup_set s = { bk->d, bk->category, n, NULL }; char *local = dag_lookup_str("BATCH_LOCAL", &s); if(local) { if(string_istrue(local)) n->local_job = 1; free(local); } n->original_command = xxstrdup(command); n->command = translate_command(n, command, n->local_job); debug(D_DEBUG, "node command=%s", n->command); }
static void batch_queue_wq_option_update (struct batch_queue *q, const char *what, const char *value) { if(strcmp(what, "password") == 0) { if(value) work_queue_specify_password(q->data, value); } else if(strcmp(what, "master-mode") == 0) { if(strcmp(value, "catalog") == 0) work_queue_specify_master_mode(q->data, WORK_QUEUE_MASTER_MODE_CATALOG); else if(strcmp(value, "standalone") == 0) work_queue_specify_master_mode(q->data, WORK_QUEUE_MASTER_MODE_STANDALONE); } else if(strcmp(what, "name") == 0) { if(value) work_queue_specify_name(q->data, value); } else if(strcmp(what, "priority") == 0) { if(value) work_queue_specify_priority(q->data, atoi(value)); else work_queue_specify_priority(q->data, 0); } else if(strcmp(what, "fast-abort") == 0) { if(value) work_queue_activate_fast_abort(q->data, atof(value)); } else if(strcmp(what, "estimate-capacity") == 0) { work_queue_specify_estimate_capacity_on(q->data, string_istrue(value)); } else if(strcmp(what, "keepalive-interval") == 0) { if(value) work_queue_specify_keepalive_interval(q->data, atoi(value)); else work_queue_specify_keepalive_interval(q->data, WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL); } else if(strcmp(what, "keepalive-timeout") == 0) { if(value) work_queue_specify_keepalive_timeout(q->data, atoi(value)); else work_queue_specify_keepalive_timeout(q->data, WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT); } else if(strcmp(what, "master-preferred-connection") == 0) { if(value) work_queue_master_preferred_connection(q->data, value); else work_queue_master_preferred_connection(q->data, "by_ip"); } else if(strcmp(what, "category-limits") == 0) { struct rmsummary *s = rmsummary_parse_string(value); if(s) { work_queue_specify_category_max_resources(q->data, s->category, s); rmsummary_delete(s); } else { debug(D_NOTICE, "Could no parse '%s' as a summary of resorces encoded in JSON\n", value); } } }
static void batch_queue_wq_option_update (struct batch_queue *q, const char *what, const char *value) { if(strcmp(what, "password") == 0) { if(value) work_queue_specify_password(q->data, value); } else if(strcmp(what, "master-mode") == 0) { if(strcmp(value, "catalog") == 0) work_queue_specify_master_mode(q->data, WORK_QUEUE_MASTER_MODE_CATALOG); else if(strcmp(value, "standalone") == 0) work_queue_specify_master_mode(q->data, WORK_QUEUE_MASTER_MODE_STANDALONE); } else if(strcmp(what, "name") == 0) { if(value) work_queue_specify_name(q->data, value); } else if(strcmp(what, "priority") == 0) { if(value) work_queue_specify_priority(q->data, atoi(value)); else work_queue_specify_priority(q->data, 0); } else if(strcmp(what, "estimate-capacity") == 0) { work_queue_specify_estimate_capacity_on(q->data, string_istrue(value)); } else if(strcmp(what, "keepalive-interval") == 0) { if(value) work_queue_specify_keepalive_interval(q->data, atoi(value)); else work_queue_specify_keepalive_interval(q->data, WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL); } else if(strcmp(what, "keepalive-timeout") == 0) { if(value) work_queue_specify_keepalive_timeout(q->data, atoi(value)); else work_queue_specify_keepalive_timeout(q->data, WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT); } else if(strcmp(what, "wait-queue-size") == 0) { if(value) work_queue_activate_worker_waiting(q->data, atoi(value)); else work_queue_activate_worker_waiting(q->data, 0); } else if(strcmp(what, "master-preferred-connection") == 0) { if(value) work_queue_master_preferred_connection(q->data, value); else work_queue_master_preferred_connection(q->data, "by_ip"); } }
static batch_job_id_t batch_job_condor_submit (struct batch_queue *q, const char *cmd, const char *extra_input_files, const char *extra_output_files, struct jx *envlist, const struct rmsummary *resources ) { FILE *file; int njobs; int jobid; const char *options = hash_table_lookup(q->options, "batch-options"); if(setup_condor_wrapper("condor.sh") < 0) { debug(D_BATCH, "could not create condor.sh: %s", strerror(errno)); return -1; } if(!string_istrue(hash_table_lookup(q->options, "skip-afs-check"))) { char *cwd = path_getcwd(); if(!strncmp(cwd, "/afs", 4)) { debug(D_NOTICE|D_BATCH, "The working directory is '%s':", cwd); debug(D_NOTICE|D_BATCH, "This won't work because Condor is not able to write to files in AFS."); debug(D_NOTICE|D_BATCH, "Instead, run makeflow from a local disk like /tmp."); debug(D_NOTICE|D_BATCH, "Or, use the Work Queue with -T wq and condor_submit_workers."); free(cwd); exit(EXIT_FAILURE); } free(cwd); } file = fopen("condor.submit", "w"); if(!file) { debug(D_BATCH, "could not create condor.submit: %s", strerror(errno)); return -1; } fprintf(file, "universe = vanilla\n"); fprintf(file, "executable = condor.sh\n"); char *escaped = string_escape_condor(cmd); fprintf(file, "arguments = %s\n", escaped); free(escaped); if(extra_input_files) fprintf(file, "transfer_input_files = %s\n", extra_input_files); // Note that we do not use transfer_output_files, because that causes the job // to get stuck in a system hold if the files are not created. fprintf(file, "should_transfer_files = yes\n"); fprintf(file, "when_to_transfer_output = on_exit\n"); fprintf(file, "notification = never\n"); fprintf(file, "copy_to_spool = true\n"); fprintf(file, "transfer_executable = true\n"); fprintf(file, "keep_claim_idle = 30\n"); fprintf(file, "log = %s\n", q->logfile); const char *c_req = batch_queue_get_option(q, "condor-requirements"); char *bexp = blacklisted_expression(q); if(c_req && bexp) { fprintf(file, "requirements = %s && %s\n", c_req, bexp); } else if(c_req) { fprintf(file, "requirements = %s\n", c_req); } else if(bexp) { fprintf(file, "requirements = %s\n", bexp); } if(bexp) free(bexp); /* Getting environment variables formatted for a condor submit file is very hairy, due to some strange quoting rules. To avoid problems, we simply export vars to the environment, and then tell condor getenv=true, which pulls in the environment. */ fprintf(file, "getenv = true\n"); if(envlist) { jx_export(envlist); } if(options) fprintf(file, "%s\n", options); /* set same deafults as condor_submit_workers */ int64_t cores = 1; int64_t memory = 1024; int64_t disk = 1024; if(resources) { cores = resources->cores > -1 ? resources->cores : cores; memory = resources->memory > -1 ? resources->memory : memory; disk = resources->disk > -1 ? resources->disk : disk; } /* convert disk to KB */ disk *= 1024; if(batch_queue_get_option(q, "autosize")) { fprintf(file, "request_cpus = ifThenElse(%" PRId64 " > TotalSlotCpus, %" PRId64 ", TotalSlotCpus)\n", cores, cores); fprintf(file, "request_memory = ifThenElse(%" PRId64 " > TotalSlotMemory, %" PRId64 ", TotalSlotMemory)\n", memory, memory); fprintf(file, "request_disk = ifThenElse((%" PRId64 ") > TotalSlotDisk, (%" PRId64 "), TotalSlotDisk)\n", disk, disk); } else { fprintf(file, "request_cpus = %" PRId64 "\n", cores); fprintf(file, "request_memory = %" PRId64 "\n", memory); fprintf(file, "request_disk = %" PRId64 "\n", disk); } fprintf(file, "queue\n"); fclose(file); file = popen("condor_submit condor.submit", "r"); if(!file) return -1; char line[BATCH_JOB_LINE_MAX]; while(fgets(line, sizeof(line), file)) { if(sscanf(line, "%d job(s) submitted to cluster %d", &njobs, &jobid) == 2) { pclose(file); debug(D_BATCH, "job %d submitted to condor", jobid); struct batch_job_info *info; info = malloc(sizeof(*info)); memset(info, 0, sizeof(*info)); info->submitted = time(0); itable_insert(q->job_table, jobid, info); return jobid; } } pclose(file); debug(D_BATCH, "failed to submit job to condor!"); return -1; }