void makeflow_wrapper_delete(struct makeflow_wrapper *w) { if(w->command) free(w->command); list_free(w->input_files); list_delete(w->input_files); list_free(w->output_files); list_delete(w->output_files); if(w->uses_remote_rename){ uint64_t f; char *remote; itable_firstkey(w->remote_names); while(itable_nextkey(w->remote_names, &f, (void **) &remote)){ free(remote); } } itable_delete(w->remote_names); hash_table_delete(w->remote_names_inv); free(w); }
static batch_job_id_t batch_job_cluster_wait (struct batch_queue * q, struct batch_job_info * info_out, time_t stoptime) { struct batch_job_info *info; batch_job_id_t jobid; int t, c; while(1) { UINT64_T ujobid; itable_firstkey(q->job_table); while(itable_nextkey(q->job_table, &ujobid, (void **) &info)) { jobid = ujobid; char *statusfile = string_format("%s.status.%" PRIbjid, cluster_name, jobid); FILE *file = fopen(statusfile, "r"); if(file) { char line[BATCH_JOB_LINE_MAX]; while(fgets(line, sizeof(line), file)) { if(sscanf(line, "start %d", &t)) { info->started = t; } else if(sscanf(line, "stop %d %d", &c, &t) == 2) { debug(D_BATCH, "job %" PRIbjid " complete", jobid); if(!info->started) info->started = t; info->finished = t; info->exited_normally = 1; info->exit_code = c; } } fclose(file); if(info->finished != 0) { unlink(statusfile); info = itable_remove(q->job_table, jobid); *info_out = *info; free(info); free(statusfile); return jobid; } } else { debug(D_BATCH, "could not open status file \"%s\"", statusfile); } free(statusfile); } if(itable_size(q->job_table) <= 0) return 0; if(stoptime != 0 && time(0) >= stoptime) return -1; if(process_pending()) return -1; sleep(1); } return -1; }
void dag_find_ancestor_depth(struct dag *d) { UINT64_T key; struct dag_node *n; itable_firstkey(d->node_table); while(itable_nextkey(d->node_table, &key, (void **) &n)) { get_ancestor_depth(n); } }
void remove_all_workers( struct batch_queue *queue, struct itable *job_table ) { uint64_t jobid; void *value; debug(D_WQ,"removing all remaining worker jobs..."); int count = itable_size(job_table); itable_firstkey(job_table); while(itable_nextkey(job_table,&jobid,&value)) { debug(D_WQ,"removing job %"PRId64,jobid); batch_job_remove(queue,jobid); } debug(D_WQ,"%d workers removed.",count); }
void histogram_clear(struct histogram *h) { uint64_t key; struct box_count *box; itable_firstkey(h->buckets); while(itable_nextkey(h->buckets, &key, (void **) &box)) { free(box); } h->total_count = 0; h->max_value = 0; h->min_value = 0; h->mode = 0; itable_clear(h->buckets); }
static batch_job_id_t batch_job_dryrun_wait (struct batch_queue * q, struct batch_job_info * info_out, time_t stoptime) { struct batch_job_info *info; UINT64_T jobid; itable_firstkey(q->job_table); if (itable_nextkey(q->job_table, &jobid, NULL)) { info = itable_remove(q->job_table, jobid); info->finished = time(0); info->exited_normally = 1; info->exit_code = 0; memcpy(info_out, info, sizeof(*info)); free(info); return jobid; } else { return 0; } }
static int itable_double_buckets(struct itable *h) { struct itable *hn = itable_create(2 * h->bucket_count); if(!hn) return 0; /* Move pairs to new hash */ uint64_t key; void *value; itable_firstkey(h); while(itable_nextkey(h, &key, &value)) if(!itable_insert(hn, key, value)) { itable_delete(hn); return 0; } /* Delete all old pairs */ struct entry *e, *f; int i; for(i = 0; i < h->bucket_count; i++) { e = h->buckets[i]; while(e) { f = e->next; free(e); e = f; } } /* Make the old point to the new */ free(h->buckets); h->buckets = hn->buckets; h->bucket_count = hn->bucket_count; h->size = hn->size; /* Delete reference to new, so old is safe */ free(hn); return 1; }
struct cluster *cluster_nearest_neighbor(struct itable *active_clusters, struct cluster *c, double (*cmp)(struct cluster *, struct cluster *)) { uint64_t ptr; struct cluster *nearest = NULL; struct cluster *other; double dmin, dtest; itable_firstkey(active_clusters); while( itable_nextkey( active_clusters, &ptr, (void *) &other ) ) { dtest = cmp(c, other); if( !nearest || dtest < dmin ) { dmin = dtest; nearest = other; } } return nearest; }
void mpi_queue_delete(struct mpi_queue *q) { if(q) { UINT64_T key; void *value; list_free(q->ready_list); list_delete(q->ready_list); list_free(q->complete_list); list_delete(q->complete_list); itable_firstkey(q->active_list); while(itable_nextkey(q->active_list, &key, &value)) { free(value); itable_remove(q->active_list, key); } itable_delete(q->active_list); link_close(q->master_link); free(q); } }
double *histogram_buckets(struct histogram *h) { int n = histogram_size(h); if(n < 1) { return NULL; } double *values = calloc(histogram_size(h), sizeof(double)); int i = 0; uint64_t key; struct box_count *box; itable_firstkey(h->buckets); while(itable_nextkey(h->buckets, &key, (void **) &box)) { values[i] = end_of(h, key); i++; } qsort(values, n, sizeof(double), cmp_double); return values; }
int main( int argc, char *argv[] ) { signed char c; const char *progname = "wavefront"; debug_config(progname); progress_log_file = stdout; struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'v'}, {"debug", required_argument, 0, 'd'}, {"jobs", required_argument, 0, 'n'}, {"block-size", required_argument, 0, 'b'}, {"debug-file", required_argument, 0, 'o'}, {"log-file", required_argument, 0, 'l'}, {"bitmap", required_argument, 0, 'B'}, {"bitmap-interval", required_argument, 0, 'i'}, {"auto", no_argument, 0, 'A'}, {"local", no_argument, 0, 'L'}, {"batch-type", required_argument, 0, 'T'}, {"verify", no_argument, 0, 'V'}, {0,0,0,0} }; while((c=getopt_long(argc,argv,"n:b:d:o:l:B:i:qALDT:VX:Y:vh", long_options, NULL)) > -1) { switch(c) { case 'n': manual_max_jobs_running = atoi(optarg); break; case 'b': manual_block_size = atoi(optarg); break; case 'd': debug_flags_set(optarg); break; case 'o': debug_config_file(optarg); break; case 'B': progress_bitmap_file = optarg; break; case 'i': progress_bitmap_interval = atoi(optarg); break; case 'l': progress_log_file = fopen(optarg,"w"); if(!progress_log_file) { fprintf(stderr,"couldn't open %s: %s\n",optarg,strerror(errno)); return 1; } break; case 'A': wavefront_mode = WAVEFRONT_MODE_AUTO; break; case 'L': wavefront_mode = WAVEFRONT_MODE_MULTICORE; break; case 'T': wavefront_mode = WAVEFRONT_MODE_DISTRIBUTED; batch_system_type = batch_queue_type_from_string(optarg); if(batch_system_type==BATCH_QUEUE_TYPE_UNKNOWN) { fprintf(stderr,"unknown batch system type: %s\n",optarg); exit(1); } break; case 'V': verify_mode = 1; break; case 'X': xstart = atoi(optarg); break; case 'Y': ystart = atoi(optarg); break; case 'v': cctools_version_print(stdout, progname); exit(0); break; case 'h': show_help(progname); exit(0); break; } } cctools_version_debug(D_DEBUG, argv[0]); if( (argc-optind<3) ) { show_help(progname); exit(1); } function = argv[optind]; xsize=atoi(argv[optind+1]); ysize=atoi(argv[optind+2]); total_cells = xsize*ysize; if(!verify_mode && !check_configuration(function,xsize,ysize)) exit(1); int ncpus = load_average_get_cpus(); if(wavefront_mode!=WAVEFRONT_MODE_MULTICORE) { double task_time = measure_task_time(); printf("Each function takes %.02lfs to run.\n",task_time); block_size = find_best_block_size(xsize,1000,2,task_time,average_dispatch_time); double distributed_time = wavefront_distributed_model(xsize,1000,2,task_time,block_size,average_dispatch_time); double multicore_time = wavefront_multicore_model(xsize,ncpus,task_time); double ideal_multicore_time = wavefront_multicore_model(xsize,xsize,task_time); double sequential_time = wavefront_multicore_model(xsize,1,task_time); printf("---------------------------------\n"); printf("This workload would take:\n"); printf("%.02lfs sequentially\n",sequential_time); printf("%.02lfs on this %d-core machine\n",multicore_time,ncpus); printf("%.02lfs on a %d-core machine\n",ideal_multicore_time,xsize); printf("%.02lfs on a 1000-node distributed system with block size %d\n",distributed_time,block_size); printf("---------------------------------\n"); if(wavefront_mode==WAVEFRONT_MODE_AUTO) { if(multicore_time < distributed_time*2) { wavefront_mode = WAVEFRONT_MODE_MULTICORE; } else { wavefront_mode = WAVEFRONT_MODE_DISTRIBUTED; } } } if(wavefront_mode==WAVEFRONT_MODE_MULTICORE) { batch_system_type = BATCH_QUEUE_TYPE_LOCAL; max_jobs_running = ncpus; } else { max_jobs_running = 1000; } if(manual_block_size!=0) { block_size = manual_block_size; } if(manual_max_jobs_running!=0) { max_jobs_running = manual_max_jobs_running; } if(wavefront_mode==WAVEFRONT_MODE_MULTICORE) { printf("Running in multicore mode with %d CPUs.\n",max_jobs_running); } else { printf("Running in distributed mode with block size %d on up to %d CPUs\n",block_size,max_jobs_running); } batch_q = batch_queue_create(batch_system_type); if(verify_mode) exit(0); struct bitmap * b = bitmap_create(xsize+1,ysize+1); struct list *ready_list = list_create(); struct itable *running_table = itable_create(0); struct batch_job_info info; UINT64_T jobid; struct wavefront_task *task; wavefront_task_initialize(b,ready_list); printf("Starting workload...\n"); fprintf(progress_log_file,"# elapsed time : waiting jobs / running jobs / cells complete (percent complete)\n"); while(1) { if(abort_mode) { while((task=list_pop_tail(ready_list))) { wavefront_task_delete(task); } itable_firstkey(running_table); while(itable_nextkey(running_table,&jobid,(void**)&task)) { batch_job_remove(batch_q,jobid); } } if(list_size(ready_list)==0 && itable_size(running_table)==0) break; while(1) { if(itable_size(running_table)>=max_jobs_running) break; task = list_pop_tail(ready_list); if(!task) break; jobid = wavefront_task_submit(task); if(jobid>0) { itable_insert(running_table,jobid,task); wavefront_task_mark_range(task,b,WAVEFRONT_TASK_STATE_RUNNING); } else { abort(); sleep(1); list_push_head(ready_list,task); } } save_status(b,ready_list,running_table); jobid = batch_job_wait(batch_q,&info); if(jobid>0) { task = itable_remove(running_table,jobid); if(task) { if(info.exited_normally && info.exit_code==0) { total_dispatch_time += info.started-info.submitted; total_execute_time += MAX(info.finished-info.started,1); total_cells_complete+=task->width*task->height; total_jobs_complete++; average_dispatch_time = 1.0*total_dispatch_time / total_jobs_complete; average_task_time = 1.0*total_execute_time / total_cells_complete; wavefront_task_complete(b,ready_list,task); } else { printf("job %" PRIu64 " failed, aborting this workload\n",jobid); abort_mode = 1; } } } } save_status(b,ready_list,running_table); if(abort_mode) { printf("Workload was aborted.\n"); } else { printf("Workload complete.\n"); } return 0; }
static batch_job_id_t batch_job_amazon_batch_wait(struct batch_queue *q, struct batch_job_info *info_out, time_t stoptime){ struct internal_amazon_batch_amazon_ids amazon_ids = initialize(q); //succeeded check int done = 0; char* env_var = amazon_ids.master_env_prefix; itable_firstkey(amazon_job_ids); char* jaid; UINT64_T jobid; while(itable_nextkey(amazon_job_ids,&jobid,(void**)&jaid)){ done = describe_aws_job(jaid,env_var); char* jobname = string_format("%s_%u",queue_name,(unsigned int)jobid); unsigned int id = (unsigned int)jobid; if(done == DESCRIBE_AWS_JOB_SUCCESS){ if(itable_lookup(done_jobs,id+1) == NULL){ //id is done, returning here debug(D_BATCH,"Inserting id: %u into done_jobs",id); itable_insert(done_jobs,id+1,jobname); itable_remove(amazon_job_ids,jobid); //pull files from s3 char* output_files = itable_lookup(done_files,id); struct list* file_list = extract_file_names_from_list(output_files); if(list_size(file_list)> 0){ list_first_item(file_list); char* cur_file = NULL; while((cur_file=list_next_item(file_list)) != NULL){ debug(D_BATCH,"Copying over %s",cur_file); char* get_from_s3_cmd = string_format("%s aws s3 cp s3://%s/%s.txz ./%s.txz && tar -xvf %s.txz && rm %s.txz",env_var,bucket_name,cur_file,cur_file, cur_file, cur_file); int outputcode = sh_system(get_from_s3_cmd); debug(D_BATCH,"output code from calling S3 to pull file %s: %i",cur_file,outputcode); FILE* tmpOut = fopen(cur_file,"r"); if(tmpOut){ debug(D_BATCH,"File does indeed exist: %s",cur_file); fclose(tmpOut); }else{ debug(D_BATCH,"File doesn't exist: %s",cur_file); } free(get_from_s3_cmd); } } list_free(file_list); list_delete(file_list); //Let Makeflow know we're all done! debug(D_BATCH,"Removing the job from the job_table"); struct batch_job_info* info = itable_remove(q->job_table, id);//got from batch_job_amazon.c info->finished = time(0);//get now info->exited_normally=1; info->exit_code=finished_aws_job_exit_code(jaid,env_var); debug(D_BATCH,"copying over the data to info_out"); memcpy(info_out, info, sizeof(struct batch_job_info)); free(info); char* jobdef = aws_job_def(jaid); del_job_def(jobdef); free(jobdef); return id; } }else if(done == DESCRIBE_AWS_JOB_FAILED || done == DESCRIBE_AWS_JOB_NON_EXIST){ if(itable_lookup(done_jobs,id+1)==NULL){ //id is done, returning here itable_insert(done_jobs,id+1,jobname); itable_remove(amazon_job_ids,jobid); debug(D_BATCH,"Failed job: %i",id); struct batch_job_info* info = itable_remove(q->job_table, id);//got from batch_job_amazon.c info->finished = time(0); //get now info->exited_normally=0; int exc = finished_aws_job_exit_code(jaid,env_var); info->exit_code= exc == 0 ? -1 : exc; memcpy(info_out, info, sizeof(*info)); free(info); char* jobdef = aws_job_def(jaid); del_job_def(jobdef); free(jobdef); return id; } }else{ continue; } } return -1; }
int master_main(const char *host, int port, const char *addr) { time_t idle_stoptime; struct link *master = NULL; int num_workers, i; struct mpi_queue_job **workers; struct itable *active_jobs = itable_create(0); struct itable *waiting_jobs = itable_create(0); struct list *complete_jobs = list_create(); MPI_Comm_size(MPI_COMM_WORLD, &num_workers); workers = malloc(num_workers * sizeof(*workers)); memset(workers, 0, num_workers * sizeof(*workers)); idle_stoptime = time(0) + idle_timeout; while(!abort_flag) { char line[MPI_QUEUE_LINE_MAX]; if(time(0) > idle_stoptime) { if(master) { printf("mpi master: gave up after waiting %ds to receive a task.\n", idle_timeout); } else { printf("mpi master: gave up after waiting %ds to connect to %s port %d.\n", idle_timeout, host, port); } break; } if(!master) { char working_dir[MPI_QUEUE_LINE_MAX]; master = link_connect(addr, port, idle_stoptime); if(!master) { sleep(5); continue; } link_tune(master, LINK_TUNE_INTERACTIVE); link_readline(master, line, sizeof(line), time(0) + active_timeout); memset(working_dir, 0, MPI_QUEUE_LINE_MAX); if(sscanf(line, "workdir %s", working_dir) == 1) { MPI_Bcast(working_dir, MPI_QUEUE_LINE_MAX, MPI_CHAR, 0, MPI_COMM_WORLD); } else { link_close(master); master = NULL; continue; } } if(link_readline(master, line, sizeof(line), time(0) + short_timeout)) { struct mpi_queue_operation *op; int jobid, mode; INT64_T length; char path[MPI_QUEUE_LINE_MAX]; op = NULL; debug(D_MPI, "received: %s\n", line); if(!strcmp(line, "get results")) { struct mpi_queue_job *job; debug(D_MPI, "results requested: %d available\n", list_size(complete_jobs)); link_putfstring(master, "num results %d\n", time(0) + active_timeout, list_size(complete_jobs)); while(list_size(complete_jobs)) { job = list_pop_head(complete_jobs); link_putfstring(master, "result %d %d %d %lld\n", time(0) + active_timeout, job->jobid, job->status, job->result, job->output_length); if(job->output_length) { link_write(master, job->output, job->output_length, time(0)+active_timeout); } mpi_queue_job_delete(job); } } else if(sscanf(line, "work %d %lld", &jobid, &length)) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_WORK; op->buffer_length = length+1; op->buffer = malloc(length+1); op->buffer[op->buffer_length] = 0; link_read(master, op->buffer, length, time(0) + active_timeout); op->result = -1; } else if(sscanf(line, "stat %d %s", &jobid, path) == 2) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_STAT; sprintf(op->args, "%s", path); op->result = -1; } else if(sscanf(line, "unlink %d %s", &jobid, path) == 2) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_UNLINK; sprintf(op->args, "%s", path); op->result = -1; } else if(sscanf(line, "mkdir %d %s %o", &jobid, path, &mode) == 3) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_MKDIR; sprintf(op->args, "%s %o", path, mode); op->result = -1; } else if(sscanf(line, "close %d", &jobid) == 1) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_CLOSE; op->result = -1; // } else if(sscanf(line, "symlink %d %s %s", &jobid, path, filename) == 3) { // } else if(sscanf(line, "put %d %s %lld %o", &jobid, filename, &length, &mode) == 4) { // } else if(sscanf(line, "rget %d %s", &jobid, filename) == 2) { // } else if(sscanf(line, "get %d %s", &jobid, filename) == 2) { // } else if(sscanf(line, "thirdget %d %d %s %[^\n]", &jobid, &mode, filename, path) == 4) { // } else if(sscanf(line, "thirdput %d %d %s %[^\n]", &jobid, &mode, filename, path) == 4) { } else if(!strcmp(line, "exit")) { break; } else { abort_flag = 1; continue; } if(op) { struct mpi_queue_job *job; job = itable_lookup(active_jobs, jobid); if(!job) { job = itable_lookup(waiting_jobs, jobid); } if(!job) { job = malloc(sizeof(*job)); memset(job, 0, sizeof(*job)); job->jobid = jobid; job->operations = list_create(); job->status = MPI_QUEUE_JOB_WAITING; job->worker_rank = -1; itable_insert(waiting_jobs, jobid, job); } list_push_tail(job->operations, op); } idle_stoptime = time(0) + idle_timeout; } else { link_close(master); master = 0; sleep(5); } int num_waiting_jobs = itable_size(waiting_jobs); int num_unvisited_jobs = itable_size(active_jobs); for(i = 1; i < num_workers && (num_unvisited_jobs > 0 || num_waiting_jobs > 0); i++) { struct mpi_queue_job *job; struct mpi_queue_operation *op; int flag = 0; UINT64_T jobid; if(!workers[i]) { if(num_waiting_jobs) { itable_firstkey(waiting_jobs); itable_nextkey(waiting_jobs, &jobid, (void **)&job); itable_remove(waiting_jobs, jobid); itable_insert(active_jobs, jobid, job); workers[i] = job; num_waiting_jobs--; job->worker_rank = i; job->status = MPI_QUEUE_JOB_READY; } else { continue; } } else { num_unvisited_jobs--; if(workers[i]->status == MPI_QUEUE_JOB_BUSY) { MPI_Test(&workers[i]->request, &flag, &workers[i]->mpi_status); if(flag) { op = list_pop_head(workers[i]->operations); if(op->output_length) { op->output_buffer = malloc(op->output_length); MPI_Recv(op->output_buffer, op->output_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->mpi_status); } workers[i]->status = MPI_QUEUE_JOB_READY; if(op->type == MPI_QUEUE_OP_WORK || op->result < 0) { if(workers[i]->output) free(workers[i]->output); workers[i]->output = op->output_buffer; op->output_buffer = NULL; workers[i]->output_length = op->output_length; workers[i]->result = op->result; if(op->result < 0) { workers[i]->status = MPI_QUEUE_JOB_FAILED | op->type; op->type = MPI_QUEUE_OP_CLOSE; list_push_head(workers[i]->operations, op); op = NULL; } } if(op) { if(op->buffer) free(op->buffer); if(op->output_buffer) free(op->output_buffer); free(op); } } } } if( workers[i]->status != MPI_QUEUE_JOB_BUSY && list_size(workers[i]->operations)) { op = list_peek_head(workers[i]->operations); if(op->type == MPI_QUEUE_OP_CLOSE) { itable_remove(active_jobs, workers[i]->jobid); list_push_tail(complete_jobs, workers[i]); if(!(workers[i]->status & MPI_QUEUE_JOB_FAILED)) workers[i]->status = MPI_QUEUE_JOB_COMPLETE; workers[i] = NULL; i--; continue; } MPI_Send(op, sizeof(*op), MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD); if(op->buffer_length) { MPI_Send(op->buffer, op->buffer_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD); free(op->buffer); op->buffer_length = 0; op->buffer = NULL; } MPI_Irecv(op, sizeof(*op), MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->request); workers[i]->status = MPI_QUEUE_JOB_BUSY; } } } /** Clean up waiting & complete jobs, send Exit commands to each worker */ if(!master) { // If the master link hasn't been set up yet // the workers will be waiting for the working directory char line[MPI_QUEUE_LINE_MAX]; memset(line, 0, MPI_QUEUE_LINE_MAX); MPI_Bcast(line, MPI_QUEUE_LINE_MAX, MPI_CHAR, 0, MPI_COMM_WORLD); } else { link_close(master); } for(i = 1; i < num_workers; i++) { struct mpi_queue_operation *op, close; memset(&close, 0, sizeof(close)); close.type = MPI_QUEUE_OP_EXIT; if(workers[i]) { if(workers[i]->status == MPI_QUEUE_JOB_BUSY) { MPI_Wait(&workers[i]->request, &workers[i]->mpi_status); op = list_peek_head(workers[i]->operations); if(op->output_length) { op->output_buffer = malloc(op->output_length); MPI_Recv(op->output_buffer, op->output_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->mpi_status); } } itable_remove(active_jobs, workers[i]->jobid); list_push_tail(complete_jobs, workers[i]); } MPI_Send(&close, sizeof(close), MPI_BYTE, i, 0, MPI_COMM_WORLD); } itable_firstkey(waiting_jobs); while(itable_size(waiting_jobs)) { struct mpi_queue_job *job; UINT64_T jobid; itable_nextkey(waiting_jobs, &jobid, (void **)&job); itable_remove(waiting_jobs, jobid); list_push_tail(complete_jobs, job); } while(list_size(complete_jobs)) { mpi_queue_job_delete(list_pop_head(complete_jobs)); } MPI_Finalize(); return abort_flag; }