static void mainloop( struct batch_queue *queue ) { int workers_submitted = 0; struct itable *job_table = itable_create(0); struct list *masters_list = NULL; struct list *foremen_list = NULL; int64_t factory_timeout_start = time(0); while(!abort_flag) { if(config_file && !read_config_file(config_file)) { debug(D_NOTICE, "Error re-reading '%s'. Using previous values.", config_file); } else { set_worker_resources_options( queue ); batch_queue_set_option(queue, "autosize", autosize ? "yes" : NULL); } submission_regex = foremen_regex ? foremen_regex : project_regex; if(using_catalog) { masters_list = work_queue_catalog_query(catalog_host,catalog_port,project_regex); } else { masters_list = do_direct_query(master_host,master_port); } if(masters_list && list_size(masters_list) > 0) { factory_timeout_start = time(0); } else { // check to see if factory timeout is triggered, factory timeout will be 0 if flag isn't set if(factory_timeout > 0) { if(time(0) - factory_timeout_start > factory_timeout) { fprintf(stderr, "There have been no masters for longer then the factory timeout, exiting\n"); abort_flag=1; break; } } } debug(D_WQ,"evaluating master list..."); int workers_needed = count_workers_needed(masters_list, 0); int workers_connected = count_workers_connected(masters_list); debug(D_WQ,"%d total workers needed across %d masters", workers_needed, masters_list ? list_size(masters_list) : 0); if(foremen_regex) { debug(D_WQ,"evaluating foremen list..."); foremen_list = work_queue_catalog_query(catalog_host,catalog_port,foremen_regex); /* add workers on foremen. Also, subtract foremen from workers * connected, as they were not deployed by the pool. */ workers_needed += count_workers_needed(foremen_list, 1); workers_connected += MAX(count_workers_connected(foremen_list) - list_size(foremen_list), 0); debug(D_WQ,"%d total workers needed across %d foremen",workers_needed,list_size(foremen_list)); } debug(D_WQ,"raw workers needed: %d", workers_needed); if(workers_needed > workers_max) { debug(D_WQ,"applying maximum of %d workers",workers_max); workers_needed = workers_max; } if(workers_needed < workers_min) { debug(D_WQ,"applying minimum of %d workers",workers_min); workers_needed = workers_min; } int new_workers_needed = workers_needed - workers_submitted; if(workers_per_cycle > 0 && new_workers_needed > workers_per_cycle) { debug(D_WQ,"applying maximum workers per cycle of %d",workers_per_cycle); new_workers_needed = workers_per_cycle; } if(workers_per_cycle > 0 && workers_submitted > new_workers_needed + workers_connected) { debug(D_WQ,"waiting for %d previously submitted workers to connect", workers_submitted - workers_connected); new_workers_needed = 0; } debug(D_WQ,"workers needed: %d", workers_needed); debug(D_WQ,"workers submitted: %d", workers_submitted); debug(D_WQ,"workers requested: %d", new_workers_needed); print_stats(masters_list, foremen_list, workers_submitted, workers_needed, new_workers_needed, workers_connected); update_blacklisted_workers(queue, masters_list); if(new_workers_needed>0) { debug(D_WQ,"submitting %d new workers to reach target",new_workers_needed); workers_submitted += submit_workers(queue,job_table,new_workers_needed); } else if(new_workers_needed<0) { debug(D_WQ,"too many workers, will wait for some to exit"); } else { debug(D_WQ,"target number of workers is reached."); } debug(D_WQ,"checking for exited workers..."); time_t stoptime = time(0)+5; while(1) { struct batch_job_info info; batch_job_id_t jobid; jobid = batch_job_wait_timeout(queue,&info,stoptime); if(jobid>0) { if(itable_lookup(job_table,jobid)) { itable_remove(job_table,jobid); debug(D_WQ,"worker job %"PRId64" exited",jobid); workers_submitted--; } else { // it may have been a job from a previous run. } } else { break; } } delete_projects_list(masters_list); delete_projects_list(foremen_list); sleep(factory_period); } remove_all_workers(queue,job_table); itable_delete(job_table); }
struct cluster *nearest_neighbor_clustering(struct list *initial_clusters, double (*cmp)(struct cluster *, struct cluster *)) { struct cluster *top, *closest, *subtop; struct list *stack; struct itable *active_clusters; double dclosest, dsubtop; int merge = 0; list_first_item(initial_clusters); top = list_next_item(initial_clusters); /* Return immediately if top is NULL, or there is a unique * initial cluster */ if(list_size(initial_clusters) < 2) return top; stack = list_create(0); list_push_head(stack, top); /* Add all of the initial clusters as active clusters. */ active_clusters = itable_create(0); while( (top = list_next_item(initial_clusters)) ) itable_insert(active_clusters, (uintptr_t) top, (void *) top); do { /* closest might be NULL if all of the clusters are in * the stack now. subtop might be NULL if top was the * only cluster in the stack */ top = list_pop_head( stack ); closest = cluster_nearest_neighbor(active_clusters, top, cmp); subtop = list_peek_head( stack ); dclosest = -1; dsubtop = -1; if(closest) dclosest = cluster_ward_distance(top, closest); if(subtop) dsubtop = cluster_ward_distance(top, subtop); /* The nearest neighbor of top is either one of the * remaining active clusters, or the second topmost * cluster in the stack */ if( closest && subtop ) { /* Use pointer address to systematically break ties. */ if(dclosest < dsubtop || ((dclosest == dsubtop) && (uintptr_t)closest < (uintptr_t)subtop)) merge = 0; else merge = 1; } else if( subtop ) merge = 1; else if( closest ) merge = 0; else fatal("Zero clusters?\n"); //We should never reach here. if(merge) { /* If the two topmost clusters in the stack are * mutual nearest neighbors, merge them into a single * cluster */ subtop = list_pop_head( stack ); list_push_head(stack, cluster_merge(top, subtop)); } else { /* Otherwise, push the nearest neighbor of top to the * stack */ itable_remove(active_clusters, (uintptr_t) closest); list_push_head(stack, top); list_push_head(stack, closest); } debug(D_DEBUG, "stack: %d active: %d closest: %lf subtop: %lf\n", list_size(stack), itable_size(active_clusters), dclosest, dsubtop); /* If there are no more active_clusters, but there is not * a single cluster in the stack, we try again, * converting the clusters in the stack into new active * clusters. */ if(itable_size(active_clusters) == 0 && list_size(stack) > 3) { itable_delete(active_clusters); return nearest_neighbor_clustering(stack, cmp); } }while( !(itable_size(active_clusters) == 0 && list_size(stack) == 1) ); /* top is now the root of a cluster hierarchy, of * cluster->right, cluster->left. */ top = list_pop_head(stack); list_delete(stack); itable_delete(active_clusters); return top; }
static void mainloop( struct batch_queue *queue, const char *project_regex, const char *foremen_regex ) { int workers_submitted = 0; struct itable *job_table = itable_create(0); struct list *masters_list = NULL; struct list *foremen_list = NULL; const char *submission_regex = foremen_regex ? foremen_regex : project_regex; while(!abort_flag) { masters_list = work_queue_catalog_query(catalog_host,catalog_port,project_regex); debug(D_WQ,"evaluating master list..."); int workers_needed = count_workers_needed(masters_list, 0); debug(D_WQ,"%d total workers needed across %d masters", workers_needed, masters_list ? list_size(masters_list) : 0); if(foremen_regex) { debug(D_WQ,"evaluating foremen list..."); foremen_list = work_queue_catalog_query(catalog_host,catalog_port,foremen_regex); workers_needed += count_workers_needed(foremen_list, 1); debug(D_WQ,"%d total workers needed across %d foremen",workers_needed,list_size(foremen_list)); } debug(D_WQ,"raw workers needed: %d", workers_needed); if(workers_needed > workers_max) { debug(D_WQ,"applying maximum of %d workers",workers_max); workers_needed = workers_max; } if(workers_needed < workers_min) { debug(D_WQ,"applying minimum of %d workers",workers_min); workers_needed = workers_min; } int new_workers_needed = workers_needed - workers_submitted; debug(D_WQ,"workers needed: %d",workers_needed); debug(D_WQ,"workers in queue: %d",workers_submitted); print_stats(masters_list, foremen_list, workers_submitted, workers_needed, new_workers_needed); if(new_workers_needed>0) { debug(D_WQ,"submitting %d new workers to reach target",new_workers_needed); workers_submitted += submit_workers(queue,job_table,new_workers_needed,submission_regex); } else if(new_workers_needed<0) { debug(D_WQ,"too many workers, will wait for some to exit"); } else { debug(D_WQ,"target number of workers is reached."); } debug(D_WQ,"checking for exited workers..."); time_t stoptime = time(0)+5; while(1) { struct batch_job_info info; batch_job_id_t jobid; jobid = batch_job_wait_timeout(queue,&info,stoptime); if(jobid>0) { if(itable_lookup(job_table,jobid)) { itable_remove(job_table,jobid); debug(D_WQ,"worker job %"PRId64" exited",jobid); workers_submitted--; } else { // it may have been a job from a previous run. } } else { break; } } delete_projects_list(masters_list); delete_projects_list(foremen_list); sleep(30); } remove_all_workers(queue,job_table); itable_delete(job_table); }
static struct internal_amazon_batch_amazon_ids initialize(struct batch_queue* q){ if(initialized){ return initialized_data; } char* config_file = hash_table_lookup(q->options,"amazon-batch-config"); if(!config_file) { fatal("No amazon config file passed!"); } struct jx* config = jx_parse_file(config_file); initialized = 1; instID = time(NULL); queue_name = string_format("%i_ccl_amazon_batch_queue",instID);//should be unique done_jobs = itable_create(0);//default size amazon_job_ids = itable_create(0); done_files = itable_create(0); submitted_files = hash_table_create(0,0); char* amazon_ami = hash_table_lookup(q->options,"amazon-batch-img"); if(amazon_ami == NULL) { fatal("No image id passed. Please pass file containing ami image id using --amazon-batch-img flag"); } char* aws_access_key_id = (char*)jx_lookup_string(config, "aws_id"); char* aws_secret_access_key = (char*)jx_lookup_string(config, "aws_key"); char* aws_region = (char*)jx_lookup_string(config,"aws_reg"); bucket_name = (char*)jx_lookup_string(config,"bucket"); vpc = (char*)jx_lookup_string(config,"vpc"); sec_group = (char*)jx_lookup_string(config,"sec_group"); queue_name = (char*)jx_lookup_string(config,"queue_name"); compute_env_name = (char*)jx_lookup_string(config,"env_name"); subnet = (char*)jx_lookup_string(config,"subnet"); if(!aws_access_key_id) fatal("credentials file %s does not contain aws_id",config_file); if(!aws_secret_access_key) fatal("credentials file %s does not contain aws_key",config_file); if(!aws_region) fatal("credentials file %s does not contain aws_reg",config_file); if(!bucket_name) fatal("credentials file %s does not contain bucket",config_file); if(!queue_name) fatal("credentials file %s does not contain queue_name",config_file); if(!compute_env_name) fatal("credentials file %s does not contain env_name",config_file); if(!vpc) fatal("credentials file %s does not contain vpc",config_file); if(!subnet) fatal("credentials file %s does not contain subnet",config_file); char* env_var = string_format("AWS_ACCESS_KEY_ID=%s AWS_SECRET_ACCESS_KEY=%s AWS_DEFAULT_REGION=%s ",aws_access_key_id,aws_secret_access_key,aws_region); initialized_data.aws_access_key_id = aws_access_key_id; initialized_data.aws_secret_access_key = aws_secret_access_key; initialized_data.aws_region=aws_region; initialized_data.master_env_prefix = env_var; return initialized_data; }
int main( int argc, char *argv[] ) { signed char c; const char *progname = "wavefront"; debug_config(progname); progress_log_file = stdout; struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'v'}, {"debug", required_argument, 0, 'd'}, {"jobs", required_argument, 0, 'n'}, {"block-size", required_argument, 0, 'b'}, {"debug-file", required_argument, 0, 'o'}, {"log-file", required_argument, 0, 'l'}, {"bitmap", required_argument, 0, 'B'}, {"bitmap-interval", required_argument, 0, 'i'}, {"auto", no_argument, 0, 'A'}, {"local", no_argument, 0, 'L'}, {"batch-type", required_argument, 0, 'T'}, {"verify", no_argument, 0, 'V'}, {0,0,0,0} }; while((c=getopt_long(argc,argv,"n:b:d:o:l:B:i:qALDT:VX:Y:vh", long_options, NULL)) > -1) { switch(c) { case 'n': manual_max_jobs_running = atoi(optarg); break; case 'b': manual_block_size = atoi(optarg); break; case 'd': debug_flags_set(optarg); break; case 'o': debug_config_file(optarg); break; case 'B': progress_bitmap_file = optarg; break; case 'i': progress_bitmap_interval = atoi(optarg); break; case 'l': progress_log_file = fopen(optarg,"w"); if(!progress_log_file) { fprintf(stderr,"couldn't open %s: %s\n",optarg,strerror(errno)); return 1; } break; case 'A': wavefront_mode = WAVEFRONT_MODE_AUTO; break; case 'L': wavefront_mode = WAVEFRONT_MODE_MULTICORE; break; case 'T': wavefront_mode = WAVEFRONT_MODE_DISTRIBUTED; batch_system_type = batch_queue_type_from_string(optarg); if(batch_system_type==BATCH_QUEUE_TYPE_UNKNOWN) { fprintf(stderr,"unknown batch system type: %s\n",optarg); exit(1); } break; case 'V': verify_mode = 1; break; case 'X': xstart = atoi(optarg); break; case 'Y': ystart = atoi(optarg); break; case 'v': cctools_version_print(stdout, progname); exit(0); break; case 'h': show_help(progname); exit(0); break; } } cctools_version_debug(D_DEBUG, argv[0]); if( (argc-optind<3) ) { show_help(progname); exit(1); } function = argv[optind]; xsize=atoi(argv[optind+1]); ysize=atoi(argv[optind+2]); total_cells = xsize*ysize; if(!verify_mode && !check_configuration(function,xsize,ysize)) exit(1); int ncpus = load_average_get_cpus(); if(wavefront_mode!=WAVEFRONT_MODE_MULTICORE) { double task_time = measure_task_time(); printf("Each function takes %.02lfs to run.\n",task_time); block_size = find_best_block_size(xsize,1000,2,task_time,average_dispatch_time); double distributed_time = wavefront_distributed_model(xsize,1000,2,task_time,block_size,average_dispatch_time); double multicore_time = wavefront_multicore_model(xsize,ncpus,task_time); double ideal_multicore_time = wavefront_multicore_model(xsize,xsize,task_time); double sequential_time = wavefront_multicore_model(xsize,1,task_time); printf("---------------------------------\n"); printf("This workload would take:\n"); printf("%.02lfs sequentially\n",sequential_time); printf("%.02lfs on this %d-core machine\n",multicore_time,ncpus); printf("%.02lfs on a %d-core machine\n",ideal_multicore_time,xsize); printf("%.02lfs on a 1000-node distributed system with block size %d\n",distributed_time,block_size); printf("---------------------------------\n"); if(wavefront_mode==WAVEFRONT_MODE_AUTO) { if(multicore_time < distributed_time*2) { wavefront_mode = WAVEFRONT_MODE_MULTICORE; } else { wavefront_mode = WAVEFRONT_MODE_DISTRIBUTED; } } } if(wavefront_mode==WAVEFRONT_MODE_MULTICORE) { batch_system_type = BATCH_QUEUE_TYPE_LOCAL; max_jobs_running = ncpus; } else { max_jobs_running = 1000; } if(manual_block_size!=0) { block_size = manual_block_size; } if(manual_max_jobs_running!=0) { max_jobs_running = manual_max_jobs_running; } if(wavefront_mode==WAVEFRONT_MODE_MULTICORE) { printf("Running in multicore mode with %d CPUs.\n",max_jobs_running); } else { printf("Running in distributed mode with block size %d on up to %d CPUs\n",block_size,max_jobs_running); } batch_q = batch_queue_create(batch_system_type); if(verify_mode) exit(0); struct bitmap * b = bitmap_create(xsize+1,ysize+1); struct list *ready_list = list_create(); struct itable *running_table = itable_create(0); struct batch_job_info info; UINT64_T jobid; struct wavefront_task *task; wavefront_task_initialize(b,ready_list); printf("Starting workload...\n"); fprintf(progress_log_file,"# elapsed time : waiting jobs / running jobs / cells complete (percent complete)\n"); while(1) { if(abort_mode) { while((task=list_pop_tail(ready_list))) { wavefront_task_delete(task); } itable_firstkey(running_table); while(itable_nextkey(running_table,&jobid,(void**)&task)) { batch_job_remove(batch_q,jobid); } } if(list_size(ready_list)==0 && itable_size(running_table)==0) break; while(1) { if(itable_size(running_table)>=max_jobs_running) break; task = list_pop_tail(ready_list); if(!task) break; jobid = wavefront_task_submit(task); if(jobid>0) { itable_insert(running_table,jobid,task); wavefront_task_mark_range(task,b,WAVEFRONT_TASK_STATE_RUNNING); } else { abort(); sleep(1); list_push_head(ready_list,task); } } save_status(b,ready_list,running_table); jobid = batch_job_wait(batch_q,&info); if(jobid>0) { task = itable_remove(running_table,jobid); if(task) { if(info.exited_normally && info.exit_code==0) { total_dispatch_time += info.started-info.submitted; total_execute_time += MAX(info.finished-info.started,1); total_cells_complete+=task->width*task->height; total_jobs_complete++; average_dispatch_time = 1.0*total_dispatch_time / total_jobs_complete; average_task_time = 1.0*total_execute_time / total_cells_complete; wavefront_task_complete(b,ready_list,task); } else { printf("job %" PRIu64 " failed, aborting this workload\n",jobid); abort_mode = 1; } } } } save_status(b,ready_list,running_table); if(abort_mode) { printf("Workload was aborted.\n"); } else { printf("Workload complete.\n"); } return 0; }
int main(int argc, char *argv[]) { char c; int did_explicit_auth = 0; char *tickets = NULL; struct fuse_args fa; fa.argc = 0; fa.argv = string_array_new(); fa.allocated = 1; debug_config(argv[0]); while((c = getopt(argc, argv, "a:b:d:Dfhi:m:o:t:v")) != -1) { switch (c) { case 'd': debug_flags_set(optarg); break; case 'D': enable_small_file_optimizations = 0; break; case 'b': chirp_reli_blocksize_set(atoi(optarg)); break; case 'i': tickets = xxstrdup(optarg); break; case 'm': fa.argc += 1; fa.argv = string_array_append(fa.argv, optarg); break; case 'o': debug_config_file(optarg); break; case 'a': auth_register_byname(optarg); did_explicit_auth = 1; break; case 't': chirp_fuse_timeout = string_time_parse(optarg); break; case 'f': run_in_foreground = 1; break; case 'v': cctools_version_print(stdout, argv[0]); return 0; break; case 'h': default: show_help(argv[0]); return 1; break; } } cctools_version_debug(D_DEBUG, argv[0]); if((argc - optind) != 1) { show_help(argv[0]); return 1; } fuse_mountpoint = argv[optind]; if(!did_explicit_auth) auth_register_all(); if(tickets) { auth_ticket_load(tickets); free(tickets); } else if(getenv(CHIRP_CLIENT_TICKETS)) { auth_ticket_load(getenv(CHIRP_CLIENT_TICKETS)); } else { auth_ticket_load(NULL); } file_table = itable_create(0); signal(SIGHUP, exit_handler); signal(SIGINT, exit_handler); signal(SIGTERM, exit_handler); fuse_chan = fuse_mount(fuse_mountpoint, &fa); if(!fuse_chan) { fprintf(stderr, "chirp_fuse: couldn't access %s\n", fuse_mountpoint); return 1; } fuse_instance = fuse_new(fuse_chan, &fa, &chirp_fuse_operations, sizeof(chirp_fuse_operations), 0); if(!fuse_instance) { fuse_unmount(fuse_mountpoint, fuse_chan); fprintf(stderr, "chirp_fuse: couldn't access %s\n", fuse_mountpoint); return 1; } printf("chirp_fuse: mounted chirp on %s\n", fuse_mountpoint); #ifdef CCTOOLS_OPSYS_DARWIN printf("chirp_fuse: to unmount: umount %s\n", fuse_mountpoint); #else printf("chirp_fuse: to unmount: fusermount -u %s\n", fuse_mountpoint); #endif fflush(0); if(!run_in_foreground) daemon(0, 0); fuse_loop(fuse_instance); fuse_unmount(fuse_mountpoint, fuse_chan); fuse_destroy(fuse_instance); free(fa.argv); return 0; }
/** The clean_mode variable was added so that we could better print out error messages * apply in the situation. Currently only used to silence node rerun checking. */ void makeflow_log_recover(struct dag *d, const char *filename, int verbose_mode, struct batch_queue *queue, makeflow_clean_depth clean_mode) { char *line, *name, file[MAX_BUFFER_SIZE]; int nodeid, state, jobid, file_state; int first_run = 1; struct dag_node *n; struct dag_file *f; struct stat buf; timestamp_t previous_completion_time; d->logfile = fopen(filename, "r"); if(d->logfile) { int linenum = 0; first_run = 0; printf("recovering from log file %s...\n",filename); while((line = get_line(d->logfile))) { linenum++; if(sscanf(line, "# %d %s %" SCNu64 "", &file_state, file, &previous_completion_time) == 3) { f = dag_file_lookup_or_create(d, file); f->state = file_state; if(file_state == DAG_FILE_STATE_EXISTS){ d->completed_files += 1; f->creation_logged = (time_t) (previous_completion_time / 1000000); } else if(file_state == DAG_FILE_STATE_DELETE){ d->deleted_files += 1; } continue; } if(line[0] == '#') continue; if(sscanf(line, "%" SCNu64 " %d %d %d", &previous_completion_time, &nodeid, &state, &jobid) == 4) { n = itable_lookup(d->node_table, nodeid); if(n) { n->state = state; n->jobid = jobid; /* Log timestamp is in microseconds, we need seconds for diff. */ n->previous_completion = (time_t) (previous_completion_time / 1000000); continue; } } fprintf(stderr, "makeflow: %s appears to be corrupted on line %d\n", filename, linenum); exit(1); } fclose(d->logfile); } d->logfile = fopen(filename, "a"); if(!d->logfile) { fprintf(stderr, "makeflow: couldn't open logfile %s: %s\n", filename, strerror(errno)); exit(1); } if(setvbuf(d->logfile, NULL, _IOLBF, BUFSIZ) != 0) { fprintf(stderr, "makeflow: couldn't set line buffer on logfile %s: %s\n", filename, strerror(errno)); exit(1); } if(first_run && verbose_mode) { struct dag_file *f; struct dag_node *p; for(n = d->nodes; n; n = n->next) { /* Record node information to log */ fprintf(d->logfile, "# NODE\t%d\t%s\n", n->nodeid, n->command); /* Record the node category to the log */ fprintf(d->logfile, "# CATEGORY\t%d\t%s\n", n->nodeid, n->category->label); fprintf(d->logfile, "# SYMBOL\t%d\t%s\n", n->nodeid, n->category->label); /* also write the SYMBOL as alias of CATEGORY, deprecated. */ /* Record node parents to log */ fprintf(d->logfile, "# PARENTS\t%d", n->nodeid); list_first_item(n->source_files); while( (f = list_next_item(n->source_files)) ) { p = f->created_by; if(p) fprintf(d->logfile, "\t%d", p->nodeid); } fputc('\n', d->logfile); /* Record node inputs to log */ fprintf(d->logfile, "# SOURCES\t%d", n->nodeid); list_first_item(n->source_files); while( (f = list_next_item(n->source_files)) ) { fprintf(d->logfile, "\t%s", f->filename); } fputc('\n', d->logfile); /* Record node outputs to log */ fprintf(d->logfile, "# TARGETS\t%d", n->nodeid); list_first_item(n->target_files); while( (f = list_next_item(n->target_files)) ) { fprintf(d->logfile, "\t%s", f->filename); } fputc('\n', d->logfile); /* Record translated command to log */ fprintf(d->logfile, "# COMMAND\t%d\t%s\n", n->nodeid, n->command); } } dag_count_states(d); // Check for log consistency if(!first_run) { hash_table_firstkey(d->files); while(hash_table_nextkey(d->files, &name, (void **) &f)) { if(dag_file_should_exist(f) && !dag_file_is_source(f) && !(batch_fs_stat(queue, f->filename, &buf) >= 0)){ fprintf(stderr, "makeflow: %s is reported as existing, but does not exist.\n", f->filename); makeflow_log_file_state_change(d, f, DAG_FILE_STATE_UNKNOWN); continue; } if(S_ISDIR(buf.st_mode)) continue; if(dag_file_should_exist(f) && !dag_file_is_source(f) && difftime(buf.st_mtime, f->creation_logged) > 0) { fprintf(stderr, "makeflow: %s is reported as existing, but has been modified (%" SCNu64 " ,%" SCNu64 ").\n", f->filename, (uint64_t)buf.st_mtime, (uint64_t)f->creation_logged); makeflow_clean_file(d, queue, f, 0); makeflow_log_file_state_change(d, f, DAG_FILE_STATE_UNKNOWN); } } } int silent = 0; if(clean_mode != MAKEFLOW_CLEAN_NONE) silent = 1; // Decide rerun tasks if(!first_run) { struct itable *rerun_table = itable_create(0); for(n = d->nodes; n; n = n->next) { makeflow_node_decide_rerun(rerun_table, d, n, silent); } itable_delete(rerun_table); } //Update file reference counts from nodes in log for(n = d->nodes; n; n = n->next) { if(n->state == DAG_NODE_STATE_COMPLETE) { struct dag_file *f; list_first_item(n->source_files); while((f = list_next_item(n->source_files))) f->ref_count += -1; } } }
static batch_job_id_t batch_job_mesos_wait (struct batch_queue * q, struct batch_job_info * info_out, time_t stoptime) { char line[MAX_BUF_SIZE]; FILE *task_state_fp; int last_pos = 0; int curr_pos = 0; int read_len = 0; if(!finished_tasks) { finished_tasks = itable_create(0); } while(access(FILE_TASK_STATE, F_OK) == -1) {} task_state_fp = fopen(FILE_TASK_STATE, "r"); while(1) { char *task_id_str; char *task_stat_str; const char *task_exit_code; int task_id; while(fgets(line, MAX_BUF_SIZE, task_state_fp) != NULL) { curr_pos = ftell(task_state_fp); read_len = curr_pos - last_pos; last_pos = curr_pos; // trim the newline character if (line[read_len-1] == '\n') { line[read_len-1] = '\0'; --read_len; } task_id_str = strtok(line, ","); task_id = atoi(task_id_str); // There is a new task finished if(itable_lookup(finished_tasks, task_id) == NULL) { struct batch_job_info *info = itable_remove(q->job_table, task_id); info->finished = time(0); task_stat_str = strtok(NULL, ","); if (strcmp(task_stat_str, "finished") == 0) { info->exited_normally = 1; } else if (strcmp(task_stat_str, "failed") == 0) { info->exited_normally = 0; task_exit_code = strtok(NULL, ","); // 444 is an arbitrary exit code set in mf_mesos_scheduler, // which means the task failed to retrieve the outpus if(atoi(task_exit_code) == 444) { info->exit_code = 444; debug(D_BATCH, "Task %s failed to retrieve the output.", task_id_str); } info->exit_code = atoi(task_exit_code); } else { info->exited_normally = 0; } memcpy(info_out, info, sizeof(*info)); free(info); fclose(task_state_fp); int itable_val = 1; itable_insert(finished_tasks, task_id, &itable_val); return task_id; } } sleep(1); if(stoptime != 0 && time(0) >= stoptime) { fclose(task_state_fp); return -1; } } }
int master_main(const char *host, int port, const char *addr) { time_t idle_stoptime; struct link *master = NULL; int num_workers, i; struct mpi_queue_job **workers; struct itable *active_jobs = itable_create(0); struct itable *waiting_jobs = itable_create(0); struct list *complete_jobs = list_create(); MPI_Comm_size(MPI_COMM_WORLD, &num_workers); workers = malloc(num_workers * sizeof(*workers)); memset(workers, 0, num_workers * sizeof(*workers)); idle_stoptime = time(0) + idle_timeout; while(!abort_flag) { char line[MPI_QUEUE_LINE_MAX]; if(time(0) > idle_stoptime) { if(master) { printf("mpi master: gave up after waiting %ds to receive a task.\n", idle_timeout); } else { printf("mpi master: gave up after waiting %ds to connect to %s port %d.\n", idle_timeout, host, port); } break; } if(!master) { char working_dir[MPI_QUEUE_LINE_MAX]; master = link_connect(addr, port, idle_stoptime); if(!master) { sleep(5); continue; } link_tune(master, LINK_TUNE_INTERACTIVE); link_readline(master, line, sizeof(line), time(0) + active_timeout); memset(working_dir, 0, MPI_QUEUE_LINE_MAX); if(sscanf(line, "workdir %s", working_dir) == 1) { MPI_Bcast(working_dir, MPI_QUEUE_LINE_MAX, MPI_CHAR, 0, MPI_COMM_WORLD); } else { link_close(master); master = NULL; continue; } } if(link_readline(master, line, sizeof(line), time(0) + short_timeout)) { struct mpi_queue_operation *op; int jobid, mode; INT64_T length; char path[MPI_QUEUE_LINE_MAX]; op = NULL; debug(D_MPI, "received: %s\n", line); if(!strcmp(line, "get results")) { struct mpi_queue_job *job; debug(D_MPI, "results requested: %d available\n", list_size(complete_jobs)); link_putfstring(master, "num results %d\n", time(0) + active_timeout, list_size(complete_jobs)); while(list_size(complete_jobs)) { job = list_pop_head(complete_jobs); link_putfstring(master, "result %d %d %d %lld\n", time(0) + active_timeout, job->jobid, job->status, job->result, job->output_length); if(job->output_length) { link_write(master, job->output, job->output_length, time(0)+active_timeout); } mpi_queue_job_delete(job); } } else if(sscanf(line, "work %d %lld", &jobid, &length)) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_WORK; op->buffer_length = length+1; op->buffer = malloc(length+1); op->buffer[op->buffer_length] = 0; link_read(master, op->buffer, length, time(0) + active_timeout); op->result = -1; } else if(sscanf(line, "stat %d %s", &jobid, path) == 2) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_STAT; sprintf(op->args, "%s", path); op->result = -1; } else if(sscanf(line, "unlink %d %s", &jobid, path) == 2) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_UNLINK; sprintf(op->args, "%s", path); op->result = -1; } else if(sscanf(line, "mkdir %d %s %o", &jobid, path, &mode) == 3) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_MKDIR; sprintf(op->args, "%s %o", path, mode); op->result = -1; } else if(sscanf(line, "close %d", &jobid) == 1) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_CLOSE; op->result = -1; // } else if(sscanf(line, "symlink %d %s %s", &jobid, path, filename) == 3) { // } else if(sscanf(line, "put %d %s %lld %o", &jobid, filename, &length, &mode) == 4) { // } else if(sscanf(line, "rget %d %s", &jobid, filename) == 2) { // } else if(sscanf(line, "get %d %s", &jobid, filename) == 2) { // } else if(sscanf(line, "thirdget %d %d %s %[^\n]", &jobid, &mode, filename, path) == 4) { // } else if(sscanf(line, "thirdput %d %d %s %[^\n]", &jobid, &mode, filename, path) == 4) { } else if(!strcmp(line, "exit")) { break; } else { abort_flag = 1; continue; } if(op) { struct mpi_queue_job *job; job = itable_lookup(active_jobs, jobid); if(!job) { job = itable_lookup(waiting_jobs, jobid); } if(!job) { job = malloc(sizeof(*job)); memset(job, 0, sizeof(*job)); job->jobid = jobid; job->operations = list_create(); job->status = MPI_QUEUE_JOB_WAITING; job->worker_rank = -1; itable_insert(waiting_jobs, jobid, job); } list_push_tail(job->operations, op); } idle_stoptime = time(0) + idle_timeout; } else { link_close(master); master = 0; sleep(5); } int num_waiting_jobs = itable_size(waiting_jobs); int num_unvisited_jobs = itable_size(active_jobs); for(i = 1; i < num_workers && (num_unvisited_jobs > 0 || num_waiting_jobs > 0); i++) { struct mpi_queue_job *job; struct mpi_queue_operation *op; int flag = 0; UINT64_T jobid; if(!workers[i]) { if(num_waiting_jobs) { itable_firstkey(waiting_jobs); itable_nextkey(waiting_jobs, &jobid, (void **)&job); itable_remove(waiting_jobs, jobid); itable_insert(active_jobs, jobid, job); workers[i] = job; num_waiting_jobs--; job->worker_rank = i; job->status = MPI_QUEUE_JOB_READY; } else { continue; } } else { num_unvisited_jobs--; if(workers[i]->status == MPI_QUEUE_JOB_BUSY) { MPI_Test(&workers[i]->request, &flag, &workers[i]->mpi_status); if(flag) { op = list_pop_head(workers[i]->operations); if(op->output_length) { op->output_buffer = malloc(op->output_length); MPI_Recv(op->output_buffer, op->output_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->mpi_status); } workers[i]->status = MPI_QUEUE_JOB_READY; if(op->type == MPI_QUEUE_OP_WORK || op->result < 0) { if(workers[i]->output) free(workers[i]->output); workers[i]->output = op->output_buffer; op->output_buffer = NULL; workers[i]->output_length = op->output_length; workers[i]->result = op->result; if(op->result < 0) { workers[i]->status = MPI_QUEUE_JOB_FAILED | op->type; op->type = MPI_QUEUE_OP_CLOSE; list_push_head(workers[i]->operations, op); op = NULL; } } if(op) { if(op->buffer) free(op->buffer); if(op->output_buffer) free(op->output_buffer); free(op); } } } } if( workers[i]->status != MPI_QUEUE_JOB_BUSY && list_size(workers[i]->operations)) { op = list_peek_head(workers[i]->operations); if(op->type == MPI_QUEUE_OP_CLOSE) { itable_remove(active_jobs, workers[i]->jobid); list_push_tail(complete_jobs, workers[i]); if(!(workers[i]->status & MPI_QUEUE_JOB_FAILED)) workers[i]->status = MPI_QUEUE_JOB_COMPLETE; workers[i] = NULL; i--; continue; } MPI_Send(op, sizeof(*op), MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD); if(op->buffer_length) { MPI_Send(op->buffer, op->buffer_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD); free(op->buffer); op->buffer_length = 0; op->buffer = NULL; } MPI_Irecv(op, sizeof(*op), MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->request); workers[i]->status = MPI_QUEUE_JOB_BUSY; } } } /** Clean up waiting & complete jobs, send Exit commands to each worker */ if(!master) { // If the master link hasn't been set up yet // the workers will be waiting for the working directory char line[MPI_QUEUE_LINE_MAX]; memset(line, 0, MPI_QUEUE_LINE_MAX); MPI_Bcast(line, MPI_QUEUE_LINE_MAX, MPI_CHAR, 0, MPI_COMM_WORLD); } else { link_close(master); } for(i = 1; i < num_workers; i++) { struct mpi_queue_operation *op, close; memset(&close, 0, sizeof(close)); close.type = MPI_QUEUE_OP_EXIT; if(workers[i]) { if(workers[i]->status == MPI_QUEUE_JOB_BUSY) { MPI_Wait(&workers[i]->request, &workers[i]->mpi_status); op = list_peek_head(workers[i]->operations); if(op->output_length) { op->output_buffer = malloc(op->output_length); MPI_Recv(op->output_buffer, op->output_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->mpi_status); } } itable_remove(active_jobs, workers[i]->jobid); list_push_tail(complete_jobs, workers[i]); } MPI_Send(&close, sizeof(close), MPI_BYTE, i, 0, MPI_COMM_WORLD); } itable_firstkey(waiting_jobs); while(itable_size(waiting_jobs)) { struct mpi_queue_job *job; UINT64_T jobid; itable_nextkey(waiting_jobs, &jobid, (void **)&job); itable_remove(waiting_jobs, jobid); list_push_tail(complete_jobs, job); } while(list_size(complete_jobs)) { mpi_queue_job_delete(list_pop_head(complete_jobs)); } MPI_Finalize(); return abort_flag; }
struct batch_queue *batch_queue_create(batch_queue_type_t type) { struct batch_queue *q; if(type == BATCH_QUEUE_TYPE_UNKNOWN) return 0; q = malloc(sizeof(*q)); q->type = type; q->options_text = 0; q->job_table = itable_create(0); q->output_table = itable_create(0); if(type == BATCH_QUEUE_TYPE_CONDOR) q->logfile = strdup("condor.logfile"); else if(type == BATCH_QUEUE_TYPE_WORK_QUEUE || type == BATCH_QUEUE_TYPE_WORK_QUEUE_SHAREDFS) q->logfile = strdup("wq.log"); else q->logfile = NULL; if(type == BATCH_QUEUE_TYPE_WORK_QUEUE || type == BATCH_QUEUE_TYPE_WORK_QUEUE_SHAREDFS) { q->work_queue = work_queue_create(0); if(!q->work_queue) { batch_queue_delete(q); return 0; } } else { q->work_queue = 0; } if(type == BATCH_QUEUE_TYPE_MPI_QUEUE) { q->mpi_queue = mpi_queue_create(0); if(!q->mpi_queue) { batch_queue_delete(q); return 0; } } else { q->mpi_queue = 0; } if(type == BATCH_QUEUE_TYPE_SGE || type == BATCH_QUEUE_TYPE_MOAB || type == BATCH_QUEUE_TYPE_TORQUE || type == BATCH_QUEUE_TYPE_CLUSTER) { batch_job_setup_cluster(q); } if(type == BATCH_QUEUE_TYPE_HADOOP) { int fail = 0; if(!getenv("HADOOP_HOME")) { debug(D_NOTICE, "error: environment variable HADOOP_HOME not set\n"); fail = 1; } if(!getenv("HDFS_ROOT_DIR")) { debug(D_NOTICE, "error: environment variable HDFS_ROOT_DIR not set\n"); fail = 1; } if(!getenv("HADOOP_USER_TMP")) { debug(D_NOTICE, "error: environment variable HADOOP_USER_TMP not set\n"); fail = 1; } if(!getenv("HADOOP_PARROT_PATH")) { /* Note: HADOOP_PARROT_PATH is the path to Parrot on the remote node, not on the local machine. */ debug(D_NOTICE, "error: environment variable HADOOP_PARROT_PATH not set\n"); fail = 1; } if(fail) { batch_queue_delete(q); return 0; } } return q; }