void save_status( struct bitmap *b, struct list *ready_list, struct itable *running_table ) { static time_t last_saved = 0; static time_t start_time = 0; time_t current = time(0); if(!start_time) start_time = current; if(progress_bitmap_file) { if((current-last_saved) >= progress_bitmap_interval) { bitmap_save_bmp(b,progress_bitmap_file); } } fprintf(progress_log_file, "%.2lf %% %d s %d %d %d %.02lf %.02lf\n", 100.0*total_cells_complete/total_cells, (int)(current-start_time), list_size(ready_list), itable_size(running_table), total_cells_complete, average_dispatch_time, average_task_time); fflush(0); }
int makeflow_catalog_summary(struct dag* d, char* name, batch_queue_type_t type, timestamp_t start){ struct dag_node *n; dag_node_state_t state; int tasks_completed = 0; int tasks_aborted = 0; int tasks_waiting = 0; int tasks_running = 0; int tasks_failed = 0; for (n = d->nodes; n; n = n->next) { state = n->state; if (state == DAG_NODE_STATE_FAILED) tasks_failed++; else if (state == DAG_NODE_STATE_ABORTED) tasks_aborted++; else if (state == DAG_NODE_STATE_COMPLETE) tasks_completed++; else if(state == DAG_NODE_STATE_RUNNING) tasks_running++; else if(state == DAG_NODE_STATE_WAITING) tasks_waiting++; } //transmit report here char* host = CATALOG_HOST; char username[USERNAME_MAX]; username_get(username); const char* batch_type = batch_queue_type_to_string(type); struct jx *j = jx_object(0); jx_insert_string(j,"type","makeflow"); jx_insert_integer(j,"total",itable_size(d->node_table)); jx_insert_integer(j,"running",tasks_running); jx_insert_integer(j,"waiting",tasks_waiting); jx_insert_integer(j,"aborted",tasks_aborted); jx_insert_integer(j,"completed",tasks_completed); jx_insert_integer(j,"failed",tasks_failed); jx_insert_string(j,"project",name); jx_insert_string(j,"owner",username); char* timestring = string_format("%" PRIu64 "", start); jx_insert_string(j,"time_started",timestring); jx_insert_string(j,"batch_type",batch_type); //creates memory char* text = jx_print_string(j); int resp = catalog_query_send_update(host, text); free(text); free(timestring); jx_delete(j); return resp;//all good }
static batch_job_id_t batch_job_cluster_wait (struct batch_queue * q, struct batch_job_info * info_out, time_t stoptime) { struct batch_job_info *info; batch_job_id_t jobid; int t, c; while(1) { UINT64_T ujobid; itable_firstkey(q->job_table); while(itable_nextkey(q->job_table, &ujobid, (void **) &info)) { jobid = ujobid; char *statusfile = string_format("%s.status.%" PRIbjid, cluster_name, jobid); FILE *file = fopen(statusfile, "r"); if(file) { char line[BATCH_JOB_LINE_MAX]; while(fgets(line, sizeof(line), file)) { if(sscanf(line, "start %d", &t)) { info->started = t; } else if(sscanf(line, "stop %d %d", &c, &t) == 2) { debug(D_BATCH, "job %" PRIbjid " complete", jobid); if(!info->started) info->started = t; info->finished = t; info->exited_normally = 1; info->exit_code = c; } } fclose(file); if(info->finished != 0) { unlink(statusfile); info = itable_remove(q->job_table, jobid); *info_out = *info; free(info); free(statusfile); return jobid; } } else { debug(D_BATCH, "could not open status file \"%s\"", statusfile); } free(statusfile); } if(itable_size(q->job_table) <= 0) return 0; if(stoptime != 0 && time(0) >= stoptime) return -1; if(process_pending()) return -1; sleep(1); } return -1; }
jint ClassInfo::itable_size() { int nof_methods = 0; for (int index = 0; index < itable_length(); index++) { InstanceClass::Raw ic = itable_interface_at(index); ObjArray::Raw methods = ic().methods(); nof_methods += methods().length(); } return itable_size(itable_length(), nof_methods); }
int histogram_size(struct histogram *h) { int count = 0; if(h->buckets) { count += itable_size(h->buckets); } return count; }
void remove_all_workers( struct batch_queue *queue, struct itable *job_table ) { uint64_t jobid; void *value; debug(D_WQ,"removing all remaining worker jobs..."); int count = itable_size(job_table); itable_firstkey(job_table); while(itable_nextkey(job_table,&jobid,&value)) { debug(D_WQ,"removing job %"PRId64,jobid); batch_job_remove(queue,jobid); } debug(D_WQ,"%d workers removed.",count); }
struct cluster *nearest_neighbor_clustering(struct list *initial_clusters, double (*cmp)(struct cluster *, struct cluster *)) { struct cluster *top, *closest, *subtop; struct list *stack; struct itable *active_clusters; double dclosest, dsubtop; int merge = 0; list_first_item(initial_clusters); top = list_next_item(initial_clusters); /* Return immediately if top is NULL, or there is a unique * initial cluster */ if(list_size(initial_clusters) < 2) return top; stack = list_create(0); list_push_head(stack, top); /* Add all of the initial clusters as active clusters. */ active_clusters = itable_create(0); while( (top = list_next_item(initial_clusters)) ) itable_insert(active_clusters, (uintptr_t) top, (void *) top); do { /* closest might be NULL if all of the clusters are in * the stack now. subtop might be NULL if top was the * only cluster in the stack */ top = list_pop_head( stack ); closest = cluster_nearest_neighbor(active_clusters, top, cmp); subtop = list_peek_head( stack ); dclosest = -1; dsubtop = -1; if(closest) dclosest = cluster_ward_distance(top, closest); if(subtop) dsubtop = cluster_ward_distance(top, subtop); /* The nearest neighbor of top is either one of the * remaining active clusters, or the second topmost * cluster in the stack */ if( closest && subtop ) { /* Use pointer address to systematically break ties. */ if(dclosest < dsubtop || ((dclosest == dsubtop) && (uintptr_t)closest < (uintptr_t)subtop)) merge = 0; else merge = 1; } else if( subtop ) merge = 1; else if( closest ) merge = 0; else fatal("Zero clusters?\n"); //We should never reach here. if(merge) { /* If the two topmost clusters in the stack are * mutual nearest neighbors, merge them into a single * cluster */ subtop = list_pop_head( stack ); list_push_head(stack, cluster_merge(top, subtop)); } else { /* Otherwise, push the nearest neighbor of top to the * stack */ itable_remove(active_clusters, (uintptr_t) closest); list_push_head(stack, top); list_push_head(stack, closest); } debug(D_DEBUG, "stack: %d active: %d closest: %lf subtop: %lf\n", list_size(stack), itable_size(active_clusters), dclosest, dsubtop); /* If there are no more active_clusters, but there is not * a single cluster in the stack, we try again, * converting the clusters in the stack into new active * clusters. */ if(itable_size(active_clusters) == 0 && list_size(stack) > 3) { itable_delete(active_clusters); return nearest_neighbor_clustering(stack, cmp); } }while( !(itable_size(active_clusters) == 0 && list_size(stack) == 1) ); /* top is now the root of a cluster hierarchy, of * cluster->right, cluster->left. */ top = list_pop_head(stack); list_delete(stack); itable_delete(active_clusters); return top; }
static batch_job_id_t batch_job_condor_wait (struct batch_queue * q, struct batch_job_info * info_out, time_t stoptime) { static FILE *logfile = 0; if(!logfile) { logfile = fopen(q->logfile, "r"); if(!logfile) { debug(D_NOTICE, "couldn't open logfile %s: %s\n", q->logfile, strerror(errno)); return -1; } } while(1) { /* Note: clearerr is necessary to clear any cached end-of-file condition, otherwise some implementations of fgets (i.e. darwin) will read to end of file once and then never look for any more data. */ clearerr(logfile); char line[BATCH_JOB_LINE_MAX]; while(fgets(line, sizeof(line), logfile)) { int type, proc, subproc; batch_job_id_t jobid; time_t current; struct tm tm; struct batch_job_info *info; int logcode, exitcode; if(sscanf(line, "%d (%" SCNbjid ".%d.%d) %d/%d %d:%d:%d", &type, &jobid, &proc, &subproc, &tm.tm_mon, &tm.tm_mday, &tm.tm_hour, &tm.tm_min, &tm.tm_sec) == 9) { tm.tm_year = 2008 - 1900; tm.tm_isdst = 0; current = mktime(&tm); info = itable_lookup(q->job_table, jobid); if(!info) { info = malloc(sizeof(*info)); memset(info, 0, sizeof(*info)); itable_insert(q->job_table, jobid, info); } debug(D_BATCH, "line: %s", line); if(type == 0) { info->submitted = current; } else if(type == 1) { info->started = current; debug(D_BATCH, "job %" PRIbjid " running now", jobid); } else if(type == 9) { itable_remove(q->job_table, jobid); info->finished = current; info->exited_normally = 0; info->exit_signal = SIGKILL; debug(D_BATCH, "job %" PRIbjid " was removed", jobid); memcpy(info_out, info, sizeof(*info)); free(info); return jobid; } else if(type == 5) { itable_remove(q->job_table, jobid); info->finished = current; fgets(line, sizeof(line), logfile); if(sscanf(line, " (%d) Normal termination (return value %d)", &logcode, &exitcode) == 2) { debug(D_BATCH, "job %" PRIbjid " completed normally with status %d.", jobid, exitcode); info->exited_normally = 1; info->exit_code = exitcode; } else if(sscanf(line, " (%d) Abnormal termination (signal %d)", &logcode, &exitcode) == 2) { debug(D_BATCH, "job %" PRIbjid " completed abnormally with signal %d.", jobid, exitcode); info->exited_normally = 0; info->exit_signal = exitcode; } else { debug(D_BATCH, "job %" PRIbjid " completed with unknown status.", jobid); info->exited_normally = 0; info->exit_signal = 0; } memcpy(info_out, info, sizeof(*info)); free(info); return jobid; } } } if(itable_size(q->job_table) <= 0) return 0; if(stoptime != 0 && time(0) >= stoptime) return -1; if(process_pending()) return -1; sleep(1); } return -1; }
int main( int argc, char *argv[] ) { signed char c; const char *progname = "wavefront"; debug_config(progname); progress_log_file = stdout; struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'v'}, {"debug", required_argument, 0, 'd'}, {"jobs", required_argument, 0, 'n'}, {"block-size", required_argument, 0, 'b'}, {"debug-file", required_argument, 0, 'o'}, {"log-file", required_argument, 0, 'l'}, {"bitmap", required_argument, 0, 'B'}, {"bitmap-interval", required_argument, 0, 'i'}, {"auto", no_argument, 0, 'A'}, {"local", no_argument, 0, 'L'}, {"batch-type", required_argument, 0, 'T'}, {"verify", no_argument, 0, 'V'}, {0,0,0,0} }; while((c=getopt_long(argc,argv,"n:b:d:o:l:B:i:qALDT:VX:Y:vh", long_options, NULL)) > -1) { switch(c) { case 'n': manual_max_jobs_running = atoi(optarg); break; case 'b': manual_block_size = atoi(optarg); break; case 'd': debug_flags_set(optarg); break; case 'o': debug_config_file(optarg); break; case 'B': progress_bitmap_file = optarg; break; case 'i': progress_bitmap_interval = atoi(optarg); break; case 'l': progress_log_file = fopen(optarg,"w"); if(!progress_log_file) { fprintf(stderr,"couldn't open %s: %s\n",optarg,strerror(errno)); return 1; } break; case 'A': wavefront_mode = WAVEFRONT_MODE_AUTO; break; case 'L': wavefront_mode = WAVEFRONT_MODE_MULTICORE; break; case 'T': wavefront_mode = WAVEFRONT_MODE_DISTRIBUTED; batch_system_type = batch_queue_type_from_string(optarg); if(batch_system_type==BATCH_QUEUE_TYPE_UNKNOWN) { fprintf(stderr,"unknown batch system type: %s\n",optarg); exit(1); } break; case 'V': verify_mode = 1; break; case 'X': xstart = atoi(optarg); break; case 'Y': ystart = atoi(optarg); break; case 'v': cctools_version_print(stdout, progname); exit(0); break; case 'h': show_help(progname); exit(0); break; } } cctools_version_debug(D_DEBUG, argv[0]); if( (argc-optind<3) ) { show_help(progname); exit(1); } function = argv[optind]; xsize=atoi(argv[optind+1]); ysize=atoi(argv[optind+2]); total_cells = xsize*ysize; if(!verify_mode && !check_configuration(function,xsize,ysize)) exit(1); int ncpus = load_average_get_cpus(); if(wavefront_mode!=WAVEFRONT_MODE_MULTICORE) { double task_time = measure_task_time(); printf("Each function takes %.02lfs to run.\n",task_time); block_size = find_best_block_size(xsize,1000,2,task_time,average_dispatch_time); double distributed_time = wavefront_distributed_model(xsize,1000,2,task_time,block_size,average_dispatch_time); double multicore_time = wavefront_multicore_model(xsize,ncpus,task_time); double ideal_multicore_time = wavefront_multicore_model(xsize,xsize,task_time); double sequential_time = wavefront_multicore_model(xsize,1,task_time); printf("---------------------------------\n"); printf("This workload would take:\n"); printf("%.02lfs sequentially\n",sequential_time); printf("%.02lfs on this %d-core machine\n",multicore_time,ncpus); printf("%.02lfs on a %d-core machine\n",ideal_multicore_time,xsize); printf("%.02lfs on a 1000-node distributed system with block size %d\n",distributed_time,block_size); printf("---------------------------------\n"); if(wavefront_mode==WAVEFRONT_MODE_AUTO) { if(multicore_time < distributed_time*2) { wavefront_mode = WAVEFRONT_MODE_MULTICORE; } else { wavefront_mode = WAVEFRONT_MODE_DISTRIBUTED; } } } if(wavefront_mode==WAVEFRONT_MODE_MULTICORE) { batch_system_type = BATCH_QUEUE_TYPE_LOCAL; max_jobs_running = ncpus; } else { max_jobs_running = 1000; } if(manual_block_size!=0) { block_size = manual_block_size; } if(manual_max_jobs_running!=0) { max_jobs_running = manual_max_jobs_running; } if(wavefront_mode==WAVEFRONT_MODE_MULTICORE) { printf("Running in multicore mode with %d CPUs.\n",max_jobs_running); } else { printf("Running in distributed mode with block size %d on up to %d CPUs\n",block_size,max_jobs_running); } batch_q = batch_queue_create(batch_system_type); if(verify_mode) exit(0); struct bitmap * b = bitmap_create(xsize+1,ysize+1); struct list *ready_list = list_create(); struct itable *running_table = itable_create(0); struct batch_job_info info; UINT64_T jobid; struct wavefront_task *task; wavefront_task_initialize(b,ready_list); printf("Starting workload...\n"); fprintf(progress_log_file,"# elapsed time : waiting jobs / running jobs / cells complete (percent complete)\n"); while(1) { if(abort_mode) { while((task=list_pop_tail(ready_list))) { wavefront_task_delete(task); } itable_firstkey(running_table); while(itable_nextkey(running_table,&jobid,(void**)&task)) { batch_job_remove(batch_q,jobid); } } if(list_size(ready_list)==0 && itable_size(running_table)==0) break; while(1) { if(itable_size(running_table)>=max_jobs_running) break; task = list_pop_tail(ready_list); if(!task) break; jobid = wavefront_task_submit(task); if(jobid>0) { itable_insert(running_table,jobid,task); wavefront_task_mark_range(task,b,WAVEFRONT_TASK_STATE_RUNNING); } else { abort(); sleep(1); list_push_head(ready_list,task); } } save_status(b,ready_list,running_table); jobid = batch_job_wait(batch_q,&info); if(jobid>0) { task = itable_remove(running_table,jobid); if(task) { if(info.exited_normally && info.exit_code==0) { total_dispatch_time += info.started-info.submitted; total_execute_time += MAX(info.finished-info.started,1); total_cells_complete+=task->width*task->height; total_jobs_complete++; average_dispatch_time = 1.0*total_dispatch_time / total_jobs_complete; average_task_time = 1.0*total_execute_time / total_cells_complete; wavefront_task_complete(b,ready_list,task); } else { printf("job %" PRIu64 " failed, aborting this workload\n",jobid); abort_mode = 1; } } } } save_status(b,ready_list,running_table); if(abort_mode) { printf("Workload was aborted.\n"); } else { printf("Workload complete.\n"); } return 0; }
int dag_local_jobs_running( struct dag *d ) { return itable_size(d->local_job_table); }
int dag_remote_jobs_running( struct dag *d ) { return itable_size(d->remote_job_table); }
struct mpi_queue_task *mpi_queue_wait(struct mpi_queue *q, int timeout) { struct mpi_queue_task *t; time_t stoptime; int result; if(timeout == MPI_QUEUE_WAITFORTASK) { stoptime = 0; } else { stoptime = time(0) + timeout; } while(1) { // If a task is already complete, return it t = list_pop_head(q->complete_list); if(t) return t; if(list_size(q->ready_list) == 0 && itable_size(q->active_list) == 0) break; // Wait no longer than the caller's patience. int msec; int sec; if(stoptime) { sec = MAX(0, stoptime - time(0)); msec = sec * 1000; } else { sec = 5; msec = 5000; } if(!q->mpi_link) { q->mpi_link = link_accept(q->master_link, stoptime); if(q->mpi_link) { char working_dir[MPI_QUEUE_LINE_MAX]; link_tune(q->mpi_link, LINK_TUNE_INTERACTIVE); link_usleep(q->mpi_link, msec, 0, 1); getcwd(working_dir, MPI_QUEUE_LINE_MAX); link_putfstring(q->mpi_link, "workdir %s\n", stoptime, working_dir); result = link_usleep(q->mpi_link, msec, 1, 1); } else { result = 0; } } else { debug(D_MPI, "Waiting for link to be ready\n"); result = link_usleep(q->mpi_link, msec, 1, 1); } // If nothing was awake, restart the loop or return without a task. if(result <= 0) { if(stoptime && time(0) >= stoptime) { return 0; } else { continue; } } debug(D_MPI, "sending %d tasks to the MPI master process\n", list_size(q->ready_list)); // Send all ready tasks to the MPI master process while(list_size(q->ready_list)) { struct mpi_queue_task *t = list_pop_head(q->ready_list); result = dispatch_task(q->mpi_link, t, msec/1000); if(result <= 0) return 0; itable_insert(q->active_list, t->taskid, t); } // Receive any results back result = get_results(q->mpi_link, q->active_list, q->complete_list, msec/1000); if(result < 0) { return 0; } } return 0; }
int mpi_queue_empty(struct mpi_queue *q) { return ((list_size(q->ready_list) + itable_size(q->active_list) + list_size(q->complete_list)) == 0); }
void makeflow_summary_create(struct dag *d, const char *filename, const char *email_summary_to, timestamp_t runtime, timestamp_t time_completed, int argc, char *argv[], const char *dagfile, struct batch_queue *remote_queue, int abort_flag, int failed_flag ) { char buffer[50]; FILE *summary_file = NULL; FILE *summary_email = NULL; if(filename) summary_file = fopen(filename, "w"); if(email_summary_to) { summary_email = popen("sendmail -t", "w"); fprintf(summary_email, "To: %s\n", email_summary_to); timestamp_fmt(buffer, 50, "%c", time_completed); fprintf(summary_email, "Subject: Makeflow Run Summary - %s \n", buffer); } int i; for(i = 0; i < argc; i++) summarize(summary_file, summary_email, "%s ", argv[i]); summarize(summary_file, summary_email, "\n"); if(abort_flag) summarize(summary_file, summary_email, "Workflow aborted:\t "); else if(failed_flag) summarize(summary_file, summary_email, "Workflow failed:\t "); else summarize(summary_file, summary_email, "Workflow completed:\t "); timestamp_fmt(buffer, 50, "%c\n", time_completed); summarize(summary_file, summary_email, "%s", buffer); int seconds = runtime / 1000000; int hours = seconds / 3600; int minutes = (seconds - hours * 3600) / 60; seconds = seconds - hours * 3600 - minutes * 60; summarize(summary_file, summary_email, "Total runtime:\t\t %d:%02d:%02d\n", hours, minutes, seconds); summarize(summary_file, summary_email, "Workflow file:\t\t %s\n", dagfile); struct dag_node *n; struct dag_file *f; const char *fn; dag_node_state_t state; struct list *output_files; output_files = list_create(); struct list *failed_tasks; failed_tasks = list_create(); int total_tasks = itable_size(d->node_table); int tasks_completed = 0; int tasks_aborted = 0; int tasks_unrun = 0; for(n = d->nodes; n; n = n->next) { state = n->state; if(state == DAG_NODE_STATE_FAILED && !list_find(failed_tasks, (int (*)(void *, const void *)) string_equal, (void *) fn)) list_push_tail(failed_tasks, (void *) n->command); else if(state == DAG_NODE_STATE_ABORTED) tasks_aborted++; else if(state == DAG_NODE_STATE_COMPLETE) { tasks_completed++; list_first_item(n->source_files); while((f = list_next_item(n->source_files))) { fn = f->filename; if(!list_find(output_files, (int (*)(void *, const void *)) string_equal, (void *) fn)) list_push_tail(output_files, (void *) fn); } } else tasks_unrun++; } summarize(summary_file, summary_email, "Number of tasks:\t %d\n", total_tasks); summarize(summary_file, summary_email, "Completed tasks:\t %d/%d\n", tasks_completed, total_tasks); if(tasks_aborted != 0) summarize(summary_file, summary_email, "Aborted tasks:\t %d/%d\n", tasks_aborted, total_tasks); if(tasks_unrun != 0) summarize(summary_file, summary_email, "Tasks not run:\t\t %d/%d\n", tasks_unrun, total_tasks); if(list_size(failed_tasks) > 0) summarize(summary_file, summary_email, "Failed tasks:\t\t %d/%d\n", list_size(failed_tasks), total_tasks); for(list_first_item(failed_tasks); (fn = list_next_item(failed_tasks)) != NULL;) summarize(summary_file, summary_email, "\t%s\n", fn); if(list_size(output_files) > 0) { summarize(summary_file, summary_email, "Output files:\n"); for(list_first_item(output_files); (fn = list_next_item(output_files)) != NULL;) { const char *size; struct stat buf; batch_fs_stat(remote_queue, fn, &buf); size = string_metric(buf.st_size, -1, NULL); summarize(summary_file, summary_email, "\t%s\t%s\n", fn, size); } } list_free(output_files); list_delete(output_files); list_free(failed_tasks); list_delete(failed_tasks); if(filename) { fprintf(stderr, "writing summary to %s.\n", filename); fclose(summary_file); } if(email_summary_to) { fprintf(stderr, "emailing summary to %s.\n", email_summary_to); fclose(summary_email); } }
int master_main(const char *host, int port, const char *addr) { time_t idle_stoptime; struct link *master = NULL; int num_workers, i; struct mpi_queue_job **workers; struct itable *active_jobs = itable_create(0); struct itable *waiting_jobs = itable_create(0); struct list *complete_jobs = list_create(); MPI_Comm_size(MPI_COMM_WORLD, &num_workers); workers = malloc(num_workers * sizeof(*workers)); memset(workers, 0, num_workers * sizeof(*workers)); idle_stoptime = time(0) + idle_timeout; while(!abort_flag) { char line[MPI_QUEUE_LINE_MAX]; if(time(0) > idle_stoptime) { if(master) { printf("mpi master: gave up after waiting %ds to receive a task.\n", idle_timeout); } else { printf("mpi master: gave up after waiting %ds to connect to %s port %d.\n", idle_timeout, host, port); } break; } if(!master) { char working_dir[MPI_QUEUE_LINE_MAX]; master = link_connect(addr, port, idle_stoptime); if(!master) { sleep(5); continue; } link_tune(master, LINK_TUNE_INTERACTIVE); link_readline(master, line, sizeof(line), time(0) + active_timeout); memset(working_dir, 0, MPI_QUEUE_LINE_MAX); if(sscanf(line, "workdir %s", working_dir) == 1) { MPI_Bcast(working_dir, MPI_QUEUE_LINE_MAX, MPI_CHAR, 0, MPI_COMM_WORLD); } else { link_close(master); master = NULL; continue; } } if(link_readline(master, line, sizeof(line), time(0) + short_timeout)) { struct mpi_queue_operation *op; int jobid, mode; INT64_T length; char path[MPI_QUEUE_LINE_MAX]; op = NULL; debug(D_MPI, "received: %s\n", line); if(!strcmp(line, "get results")) { struct mpi_queue_job *job; debug(D_MPI, "results requested: %d available\n", list_size(complete_jobs)); link_putfstring(master, "num results %d\n", time(0) + active_timeout, list_size(complete_jobs)); while(list_size(complete_jobs)) { job = list_pop_head(complete_jobs); link_putfstring(master, "result %d %d %d %lld\n", time(0) + active_timeout, job->jobid, job->status, job->result, job->output_length); if(job->output_length) { link_write(master, job->output, job->output_length, time(0)+active_timeout); } mpi_queue_job_delete(job); } } else if(sscanf(line, "work %d %lld", &jobid, &length)) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_WORK; op->buffer_length = length+1; op->buffer = malloc(length+1); op->buffer[op->buffer_length] = 0; link_read(master, op->buffer, length, time(0) + active_timeout); op->result = -1; } else if(sscanf(line, "stat %d %s", &jobid, path) == 2) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_STAT; sprintf(op->args, "%s", path); op->result = -1; } else if(sscanf(line, "unlink %d %s", &jobid, path) == 2) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_UNLINK; sprintf(op->args, "%s", path); op->result = -1; } else if(sscanf(line, "mkdir %d %s %o", &jobid, path, &mode) == 3) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_MKDIR; sprintf(op->args, "%s %o", path, mode); op->result = -1; } else if(sscanf(line, "close %d", &jobid) == 1) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_CLOSE; op->result = -1; // } else if(sscanf(line, "symlink %d %s %s", &jobid, path, filename) == 3) { // } else if(sscanf(line, "put %d %s %lld %o", &jobid, filename, &length, &mode) == 4) { // } else if(sscanf(line, "rget %d %s", &jobid, filename) == 2) { // } else if(sscanf(line, "get %d %s", &jobid, filename) == 2) { // } else if(sscanf(line, "thirdget %d %d %s %[^\n]", &jobid, &mode, filename, path) == 4) { // } else if(sscanf(line, "thirdput %d %d %s %[^\n]", &jobid, &mode, filename, path) == 4) { } else if(!strcmp(line, "exit")) { break; } else { abort_flag = 1; continue; } if(op) { struct mpi_queue_job *job; job = itable_lookup(active_jobs, jobid); if(!job) { job = itable_lookup(waiting_jobs, jobid); } if(!job) { job = malloc(sizeof(*job)); memset(job, 0, sizeof(*job)); job->jobid = jobid; job->operations = list_create(); job->status = MPI_QUEUE_JOB_WAITING; job->worker_rank = -1; itable_insert(waiting_jobs, jobid, job); } list_push_tail(job->operations, op); } idle_stoptime = time(0) + idle_timeout; } else { link_close(master); master = 0; sleep(5); } int num_waiting_jobs = itable_size(waiting_jobs); int num_unvisited_jobs = itable_size(active_jobs); for(i = 1; i < num_workers && (num_unvisited_jobs > 0 || num_waiting_jobs > 0); i++) { struct mpi_queue_job *job; struct mpi_queue_operation *op; int flag = 0; UINT64_T jobid; if(!workers[i]) { if(num_waiting_jobs) { itable_firstkey(waiting_jobs); itable_nextkey(waiting_jobs, &jobid, (void **)&job); itable_remove(waiting_jobs, jobid); itable_insert(active_jobs, jobid, job); workers[i] = job; num_waiting_jobs--; job->worker_rank = i; job->status = MPI_QUEUE_JOB_READY; } else { continue; } } else { num_unvisited_jobs--; if(workers[i]->status == MPI_QUEUE_JOB_BUSY) { MPI_Test(&workers[i]->request, &flag, &workers[i]->mpi_status); if(flag) { op = list_pop_head(workers[i]->operations); if(op->output_length) { op->output_buffer = malloc(op->output_length); MPI_Recv(op->output_buffer, op->output_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->mpi_status); } workers[i]->status = MPI_QUEUE_JOB_READY; if(op->type == MPI_QUEUE_OP_WORK || op->result < 0) { if(workers[i]->output) free(workers[i]->output); workers[i]->output = op->output_buffer; op->output_buffer = NULL; workers[i]->output_length = op->output_length; workers[i]->result = op->result; if(op->result < 0) { workers[i]->status = MPI_QUEUE_JOB_FAILED | op->type; op->type = MPI_QUEUE_OP_CLOSE; list_push_head(workers[i]->operations, op); op = NULL; } } if(op) { if(op->buffer) free(op->buffer); if(op->output_buffer) free(op->output_buffer); free(op); } } } } if( workers[i]->status != MPI_QUEUE_JOB_BUSY && list_size(workers[i]->operations)) { op = list_peek_head(workers[i]->operations); if(op->type == MPI_QUEUE_OP_CLOSE) { itable_remove(active_jobs, workers[i]->jobid); list_push_tail(complete_jobs, workers[i]); if(!(workers[i]->status & MPI_QUEUE_JOB_FAILED)) workers[i]->status = MPI_QUEUE_JOB_COMPLETE; workers[i] = NULL; i--; continue; } MPI_Send(op, sizeof(*op), MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD); if(op->buffer_length) { MPI_Send(op->buffer, op->buffer_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD); free(op->buffer); op->buffer_length = 0; op->buffer = NULL; } MPI_Irecv(op, sizeof(*op), MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->request); workers[i]->status = MPI_QUEUE_JOB_BUSY; } } } /** Clean up waiting & complete jobs, send Exit commands to each worker */ if(!master) { // If the master link hasn't been set up yet // the workers will be waiting for the working directory char line[MPI_QUEUE_LINE_MAX]; memset(line, 0, MPI_QUEUE_LINE_MAX); MPI_Bcast(line, MPI_QUEUE_LINE_MAX, MPI_CHAR, 0, MPI_COMM_WORLD); } else { link_close(master); } for(i = 1; i < num_workers; i++) { struct mpi_queue_operation *op, close; memset(&close, 0, sizeof(close)); close.type = MPI_QUEUE_OP_EXIT; if(workers[i]) { if(workers[i]->status == MPI_QUEUE_JOB_BUSY) { MPI_Wait(&workers[i]->request, &workers[i]->mpi_status); op = list_peek_head(workers[i]->operations); if(op->output_length) { op->output_buffer = malloc(op->output_length); MPI_Recv(op->output_buffer, op->output_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->mpi_status); } } itable_remove(active_jobs, workers[i]->jobid); list_push_tail(complete_jobs, workers[i]); } MPI_Send(&close, sizeof(close), MPI_BYTE, i, 0, MPI_COMM_WORLD); } itable_firstkey(waiting_jobs); while(itable_size(waiting_jobs)) { struct mpi_queue_job *job; UINT64_T jobid; itable_nextkey(waiting_jobs, &jobid, (void **)&job); itable_remove(waiting_jobs, jobid); list_push_tail(complete_jobs, job); } while(list_size(complete_jobs)) { mpi_queue_job_delete(list_pop_head(complete_jobs)); } MPI_Finalize(); return abort_flag; }