Exemple #1
0
void makeflow_wrapper_delete(struct makeflow_wrapper *w)
{
	if(w->command)
		free(w->command);

	list_free(w->input_files);
	list_delete(w->input_files);

	list_free(w->output_files);
	list_delete(w->output_files);

	if(w->uses_remote_rename){
		uint64_t f;
		char *remote;
		itable_firstkey(w->remote_names);
		while(itable_nextkey(w->remote_names, &f, (void **) &remote)){
			free(remote);
		}
	}
	itable_delete(w->remote_names);

	hash_table_delete(w->remote_names_inv);

	free(w);
}
static batch_job_id_t batch_job_cluster_wait (struct batch_queue * q, struct batch_job_info * info_out, time_t stoptime)
{
	struct batch_job_info *info;
	batch_job_id_t jobid;
	int t, c;

	while(1) {
		UINT64_T ujobid;
		itable_firstkey(q->job_table);
		while(itable_nextkey(q->job_table, &ujobid, (void **) &info)) {
			jobid = ujobid;
			char *statusfile = string_format("%s.status.%" PRIbjid, cluster_name, jobid);
			FILE *file = fopen(statusfile, "r");
			if(file) {
				char line[BATCH_JOB_LINE_MAX];
				while(fgets(line, sizeof(line), file)) {
					if(sscanf(line, "start %d", &t)) {
						info->started = t;
					} else if(sscanf(line, "stop %d %d", &c, &t) == 2) {
						debug(D_BATCH, "job %" PRIbjid " complete", jobid);
						if(!info->started)
							info->started = t;
						info->finished = t;
						info->exited_normally = 1;
						info->exit_code = c;
					}
				}
				fclose(file);

				if(info->finished != 0) {
					unlink(statusfile);
					info = itable_remove(q->job_table, jobid);
					*info_out = *info;
					free(info);
					free(statusfile);
					return jobid;
				}
			} else {
				debug(D_BATCH, "could not open status file \"%s\"", statusfile);
			}

			free(statusfile);
		}

		if(itable_size(q->job_table) <= 0)
			return 0;

		if(stoptime != 0 && time(0) >= stoptime)
			return -1;

		if(process_pending())
			return -1;

		sleep(1);
	}

	return -1;
}
Exemple #3
0
void dag_find_ancestor_depth(struct dag *d)
{
	UINT64_T key;
	struct dag_node *n;

	itable_firstkey(d->node_table);
	while(itable_nextkey(d->node_table, &key, (void **) &n)) {
		get_ancestor_depth(n);
	}
}
Exemple #4
0
void remove_all_workers( struct batch_queue *queue, struct itable *job_table )
{
	uint64_t jobid;
	void *value;

	debug(D_WQ,"removing all remaining worker jobs...");
	int count = itable_size(job_table);
	itable_firstkey(job_table);
	while(itable_nextkey(job_table,&jobid,&value)) {
		debug(D_WQ,"removing job %"PRId64,jobid);
		batch_job_remove(queue,jobid);
	}
	debug(D_WQ,"%d workers removed.",count);

}
Exemple #5
0
void histogram_clear(struct histogram *h) {

	uint64_t key;
	struct box_count *box;

	itable_firstkey(h->buckets);
	while(itable_nextkey(h->buckets, &key, (void **) &box)) {
		free(box);
	}

	h->total_count = 0;
	h->max_value   = 0;
	h->min_value   = 0;
	h->mode        = 0;

	itable_clear(h->buckets);
}
Exemple #6
0
static batch_job_id_t batch_job_dryrun_wait (struct batch_queue * q, struct batch_job_info * info_out, time_t stoptime)
{
	struct batch_job_info *info;
	UINT64_T jobid;

	itable_firstkey(q->job_table);
	if (itable_nextkey(q->job_table, &jobid, NULL)) {
		info = itable_remove(q->job_table, jobid);
		info->finished = time(0);
		info->exited_normally = 1;
		info->exit_code = 0;
		memcpy(info_out, info, sizeof(*info));
		free(info);
		return jobid;
	} else {
		return 0;
	}
}
Exemple #7
0
static int itable_double_buckets(struct itable *h)
{
	struct itable *hn = itable_create(2 * h->bucket_count);

	if(!hn)
		return 0;

	/* Move pairs to new hash */
	uint64_t key;
	void *value;
	itable_firstkey(h);
	while(itable_nextkey(h, &key, &value))
		if(!itable_insert(hn, key, value))
		{
			itable_delete(hn);
			return 0;
		}

	/* Delete all old pairs */
	struct entry *e, *f;
	int i;
	for(i = 0; i < h->bucket_count; i++) {
		e = h->buckets[i];
		while(e) {
			f = e->next;
			free(e);
			e = f;
		}
	}

	/* Make the old point to the new */
	free(h->buckets);
	h->buckets      = hn->buckets;
	h->bucket_count = hn->bucket_count;
	h->size         = hn->size;
	
	/* Delete reference to new, so old is safe */
	free(hn);

	return 1;
}
Exemple #8
0
struct cluster *cluster_nearest_neighbor(struct itable *active_clusters, struct cluster *c, double (*cmp)(struct cluster *, struct cluster *))
{
	uint64_t        ptr;
	struct cluster *nearest = NULL;
	struct cluster *other;
	double          dmin, dtest;

	itable_firstkey(active_clusters);
	while( itable_nextkey( active_clusters, &ptr, (void *) &other ) )
	{
		dtest = cmp(c, other);

		if( !nearest || dtest < dmin )
		{
			dmin    = dtest;
			nearest = other;
		}
	}

	return nearest; 
}
Exemple #9
0
void mpi_queue_delete(struct mpi_queue *q)
{
	if(q) {
		UINT64_T key;
		void *value;
		
		list_free(q->ready_list);
		list_delete(q->ready_list);
		list_free(q->complete_list);
		list_delete(q->complete_list);
		
		itable_firstkey(q->active_list);
		while(itable_nextkey(q->active_list, &key, &value)) {
			free(value);
			itable_remove(q->active_list, key);
		}
		itable_delete(q->active_list);
		
		link_close(q->master_link);
		free(q);
	}
}
Exemple #10
0
double *histogram_buckets(struct histogram *h) {

	int n = histogram_size(h);

	if(n < 1) {
		return NULL;
	}

	double *values = calloc(histogram_size(h), sizeof(double));

	int i = 0;
	uint64_t key;
	struct box_count *box;

	itable_firstkey(h->buckets);
	while(itable_nextkey(h->buckets, &key, (void **) &box)) {
		values[i] = end_of(h, key);
		i++;
	}

	qsort(values, n, sizeof(double), cmp_double);

	return values;
}
Exemple #11
0
int main( int argc, char *argv[] )
{
	signed char c;

	const char *progname = "wavefront";

	debug_config(progname);

	progress_log_file = stdout;

	struct option long_options[] = {
		{"help",  no_argument, 0, 'h'},
		{"version", no_argument, 0, 'v'},
		{"debug", required_argument, 0, 'd'},
		{"jobs", required_argument, 0, 'n'},
		{"block-size", required_argument, 0, 'b'},
		{"debug-file", required_argument, 0, 'o'},
		{"log-file", required_argument, 0, 'l'},
		{"bitmap", required_argument, 0, 'B'},
		{"bitmap-interval", required_argument, 0, 'i'},
		{"auto", no_argument, 0, 'A'},
		{"local", no_argument, 0, 'L'},
		{"batch-type", required_argument, 0, 'T'},
		{"verify", no_argument, 0, 'V'},
        {0,0,0,0}
	};

	while((c=getopt_long(argc,argv,"n:b:d:o:l:B:i:qALDT:VX:Y:vh", long_options, NULL)) > -1) {
		switch(c) {
			case 'n':
				manual_max_jobs_running = atoi(optarg);
				break;
			case 'b':
				manual_block_size = atoi(optarg);
				break;
			case 'd':
				debug_flags_set(optarg);
				break;
			case 'o':
				debug_config_file(optarg);
				break;
			case 'B':
				progress_bitmap_file = optarg;
				break;
			case 'i':
				progress_bitmap_interval = atoi(optarg);
				break;
			case 'l':
				progress_log_file = fopen(optarg,"w");
				if(!progress_log_file) {
					fprintf(stderr,"couldn't open %s: %s\n",optarg,strerror(errno));
					return 1;
				}
				break;
			case 'A':
				wavefront_mode = WAVEFRONT_MODE_AUTO;
				break;
			case 'L':
				wavefront_mode = WAVEFRONT_MODE_MULTICORE;
				break;
			case 'T':
				wavefront_mode = WAVEFRONT_MODE_DISTRIBUTED;
				batch_system_type = batch_queue_type_from_string(optarg);
				if(batch_system_type==BATCH_QUEUE_TYPE_UNKNOWN) {
					fprintf(stderr,"unknown batch system type: %s\n",optarg);
					exit(1);
				}
				break;
			case 'V':
				verify_mode = 1;
				break;
			case 'X':
				xstart = atoi(optarg);
				break;
			case 'Y':
				ystart = atoi(optarg);
				break;
			case 'v':
				cctools_version_print(stdout, progname);
				exit(0);
				break;
			case 'h':
				show_help(progname);
				exit(0);
				break;
		}
	}

	cctools_version_debug(D_DEBUG, argv[0]);

	if( (argc-optind<3) ) {
		show_help(progname);
		exit(1);
	}

	function = argv[optind];
	xsize=atoi(argv[optind+1]);
	ysize=atoi(argv[optind+2]);
	total_cells = xsize*ysize;

	if(!verify_mode && !check_configuration(function,xsize,ysize)) exit(1);

	int ncpus = load_average_get_cpus();

	if(wavefront_mode!=WAVEFRONT_MODE_MULTICORE) {
		double task_time = measure_task_time();
		printf("Each function takes %.02lfs to run.\n",task_time);

		block_size = find_best_block_size(xsize,1000,2,task_time,average_dispatch_time);
		double distributed_time = wavefront_distributed_model(xsize,1000,2,task_time,block_size,average_dispatch_time);
		double multicore_time = wavefront_multicore_model(xsize,ncpus,task_time);
		double ideal_multicore_time = wavefront_multicore_model(xsize,xsize,task_time);
		double sequential_time = wavefront_multicore_model(xsize,1,task_time);

		printf("---------------------------------\n");
		printf("This workload would take:\n");
		printf("%.02lfs sequentially\n",sequential_time);
		printf("%.02lfs on this %d-core machine\n",multicore_time,ncpus);
		printf("%.02lfs on a %d-core machine\n",ideal_multicore_time,xsize);
		printf("%.02lfs on a 1000-node distributed system with block size %d\n",distributed_time,block_size);
		printf("---------------------------------\n");

		if(wavefront_mode==WAVEFRONT_MODE_AUTO) {
			if(multicore_time < distributed_time*2) {
				wavefront_mode = WAVEFRONT_MODE_MULTICORE;
			} else {
				wavefront_mode = WAVEFRONT_MODE_DISTRIBUTED;
			}
		}
	}

	if(wavefront_mode==WAVEFRONT_MODE_MULTICORE) {
		batch_system_type = BATCH_QUEUE_TYPE_LOCAL;
		max_jobs_running = ncpus;
	} else {
		max_jobs_running = 1000;
	}

	if(manual_block_size!=0) {
		block_size = manual_block_size;
	}

	if(manual_max_jobs_running!=0) {
		max_jobs_running = manual_max_jobs_running;
	}

	if(wavefront_mode==WAVEFRONT_MODE_MULTICORE) {
		printf("Running in multicore mode with %d CPUs.\n",max_jobs_running);
	} else {
		printf("Running in distributed mode with block size %d on up to %d CPUs\n",block_size,max_jobs_running);
	}

	batch_q = batch_queue_create(batch_system_type);

	if(verify_mode) exit(0);

	struct bitmap * b = bitmap_create(xsize+1,ysize+1);
	struct list *ready_list = list_create();
	struct itable *running_table = itable_create(0);

	struct batch_job_info info;
	UINT64_T jobid;
	struct wavefront_task *task;

	wavefront_task_initialize(b,ready_list);

	printf("Starting workload...\n");

	fprintf(progress_log_file,"# elapsed time : waiting jobs / running jobs / cells complete (percent complete)\n");

	while(1) {

		if(abort_mode) {
			while((task=list_pop_tail(ready_list))) {
				wavefront_task_delete(task);
			}

			itable_firstkey(running_table);
			while(itable_nextkey(running_table,&jobid,(void**)&task)) {
				batch_job_remove(batch_q,jobid);
			}
		}

		if(list_size(ready_list)==0 && itable_size(running_table)==0) break;

		while(1) {
			if(itable_size(running_table)>=max_jobs_running) break;

			task = list_pop_tail(ready_list);
			if(!task) break;
			
			jobid = wavefront_task_submit(task);
			if(jobid>0) {
				itable_insert(running_table,jobid,task);
				wavefront_task_mark_range(task,b,WAVEFRONT_TASK_STATE_RUNNING);
			} else {
				abort();
				sleep(1);
				list_push_head(ready_list,task);
			}
		}


		save_status(b,ready_list,running_table);

		jobid = batch_job_wait(batch_q,&info);
		if(jobid>0) {
			task = itable_remove(running_table,jobid);
			if(task) {
				if(info.exited_normally && info.exit_code==0) {
					total_dispatch_time += info.started-info.submitted;
					total_execute_time += MAX(info.finished-info.started,1);
					total_cells_complete+=task->width*task->height;
					total_jobs_complete++;

					average_dispatch_time = 1.0*total_dispatch_time / total_jobs_complete;
					average_task_time = 1.0*total_execute_time / total_cells_complete;

					wavefront_task_complete(b,ready_list,task);
				} else {
					printf("job %" PRIu64 " failed, aborting this workload\n",jobid);
					abort_mode = 1;
				}
			}
		}
	}

	save_status(b,ready_list,running_table);

	if(abort_mode) {
		printf("Workload was aborted.\n");
	} else {
		printf("Workload complete.\n");
	}

	return 0;
}
static batch_job_id_t batch_job_amazon_batch_wait(struct batch_queue *q, struct batch_job_info *info_out, time_t stoptime){
	struct internal_amazon_batch_amazon_ids amazon_ids = initialize(q);
	//succeeded check
	int done  = 0;
	char* env_var = amazon_ids.master_env_prefix;
	itable_firstkey(amazon_job_ids);
	char* jaid;
	UINT64_T jobid;
 	while(itable_nextkey(amazon_job_ids,&jobid,(void**)&jaid)){
		done = describe_aws_job(jaid,env_var);
		char* jobname = string_format("%s_%u",queue_name,(unsigned int)jobid);
		unsigned int id = (unsigned int)jobid;
		if(done == DESCRIBE_AWS_JOB_SUCCESS){
			if(itable_lookup(done_jobs,id+1) == NULL){
				//id is done, returning here
				debug(D_BATCH,"Inserting id: %u into done_jobs",id);
				itable_insert(done_jobs,id+1,jobname);
				itable_remove(amazon_job_ids,jobid);
				
				//pull files from s3
				char* output_files = itable_lookup(done_files,id);
				struct list* file_list = extract_file_names_from_list(output_files);
				if(list_size(file_list)> 0){
					list_first_item(file_list);
					char* cur_file = NULL;
					while((cur_file=list_next_item(file_list)) != NULL){
						debug(D_BATCH,"Copying over %s",cur_file);
						char* get_from_s3_cmd = string_format("%s aws s3 cp s3://%s/%s.txz ./%s.txz && tar -xvf %s.txz && rm %s.txz",env_var,bucket_name,cur_file,cur_file, cur_file, cur_file);
						int outputcode = sh_system(get_from_s3_cmd);
						debug(D_BATCH,"output code from calling S3 to pull file %s: %i",cur_file,outputcode);
						FILE* tmpOut = fopen(cur_file,"r");
						if(tmpOut){
							debug(D_BATCH,"File does indeed exist: %s",cur_file);
							fclose(tmpOut);
						}else{
							debug(D_BATCH,"File doesn't exist: %s",cur_file);
						}
						free(get_from_s3_cmd);
					}
				}
				list_free(file_list);
				list_delete(file_list);
				
				//Let Makeflow know we're all done!
				debug(D_BATCH,"Removing the job from the job_table");
				struct batch_job_info* info = itable_remove(q->job_table, id);//got from batch_job_amazon.c
				info->finished = time(0);//get now
				info->exited_normally=1;
				info->exit_code=finished_aws_job_exit_code(jaid,env_var);
				debug(D_BATCH,"copying over the data to info_out");
				memcpy(info_out, info, sizeof(struct batch_job_info));
				free(info);
                                
                                char* jobdef = aws_job_def(jaid);
                                del_job_def(jobdef);
                                free(jobdef);
                                
				return id;
			}
		}else if(done == DESCRIBE_AWS_JOB_FAILED || done == DESCRIBE_AWS_JOB_NON_EXIST){
			if(itable_lookup(done_jobs,id+1)==NULL){
				//id is done, returning here
				itable_insert(done_jobs,id+1,jobname);
				itable_remove(amazon_job_ids,jobid);
				
				debug(D_BATCH,"Failed job: %i",id);
				
				struct batch_job_info* info = itable_remove(q->job_table, id);//got from batch_job_amazon.c
				info->finished = time(0); //get now
				info->exited_normally=0;
				int exc = finished_aws_job_exit_code(jaid,env_var);
				info->exit_code= exc == 0 ? -1 : exc;
				memcpy(info_out, info, sizeof(*info));
				free(info);
                                
                                char* jobdef = aws_job_def(jaid);
                                del_job_def(jobdef);
                                free(jobdef);
                                
				return id;
			}
		}else{
			continue;
		}
	}
	return -1;
}
Exemple #13
0
int master_main(const char *host, int port, const char *addr) {
	time_t idle_stoptime;
	struct link *master = NULL;
	int num_workers, i;
	struct mpi_queue_job **workers;

	struct itable *active_jobs = itable_create(0);
	struct itable *waiting_jobs = itable_create(0);
	struct list   *complete_jobs = list_create();

	MPI_Comm_size(MPI_COMM_WORLD, &num_workers);

	workers = malloc(num_workers * sizeof(*workers));
	memset(workers, 0, num_workers * sizeof(*workers));	
	
	idle_stoptime = time(0) + idle_timeout;

	while(!abort_flag) {
		char line[MPI_QUEUE_LINE_MAX];

		if(time(0) > idle_stoptime) {
			if(master) {
				printf("mpi master: gave up after waiting %ds to receive a task.\n", idle_timeout);
			} else {
				printf("mpi master: gave up after waiting %ds to connect to %s port %d.\n", idle_timeout, host, port);
			}
			break;
		}


		if(!master) {
			char working_dir[MPI_QUEUE_LINE_MAX];
			master = link_connect(addr, port, idle_stoptime);
			if(!master) {
				sleep(5);
				continue;
			}

			link_tune(master, LINK_TUNE_INTERACTIVE);
			
			link_readline(master, line, sizeof(line), time(0) + active_timeout);

			memset(working_dir, 0, MPI_QUEUE_LINE_MAX);
			if(sscanf(line, "workdir %s", working_dir) == 1) {
				MPI_Bcast(working_dir, MPI_QUEUE_LINE_MAX, MPI_CHAR, 0, MPI_COMM_WORLD);
			} else {
				link_close(master);
				master = NULL;
				continue;
			}
		}
		
		if(link_readline(master, line, sizeof(line), time(0) + short_timeout)) {
			struct mpi_queue_operation *op;
			int jobid, mode;
			INT64_T length;
			char path[MPI_QUEUE_LINE_MAX];
			op = NULL;
			
			debug(D_MPI, "received: %s\n", line);

			if(!strcmp(line, "get results")) {
				struct mpi_queue_job *job;
				debug(D_MPI, "results requested: %d available\n", list_size(complete_jobs));
				link_putfstring(master, "num results %d\n", time(0) + active_timeout, list_size(complete_jobs));
				while(list_size(complete_jobs)) {
					job = list_pop_head(complete_jobs);
					link_putfstring(master, "result %d %d %d %lld\n", time(0) + active_timeout, job->jobid, job->status, job->result, job->output_length);
					if(job->output_length) {
						link_write(master, job->output, job->output_length, time(0)+active_timeout);
					}
					mpi_queue_job_delete(job);
				}

			} else if(sscanf(line, "work %d %lld", &jobid, &length)) {
				op = malloc(sizeof(*op));
				memset(op, 0, sizeof(*op));
				op->type = MPI_QUEUE_OP_WORK;
				op->buffer_length = length+1;
				op->buffer = malloc(length+1);
				op->buffer[op->buffer_length] = 0;
				link_read(master, op->buffer, length, time(0) + active_timeout);
				op->result = -1;
				
			} else if(sscanf(line, "stat %d %s", &jobid, path) == 2) {
				op = malloc(sizeof(*op));
				memset(op, 0, sizeof(*op));
				op->type = MPI_QUEUE_OP_STAT;
				sprintf(op->args, "%s", path);
				op->result = -1;
				
			} else if(sscanf(line, "unlink %d %s", &jobid, path) == 2) {
				op = malloc(sizeof(*op));
				memset(op, 0, sizeof(*op));
				op->type = MPI_QUEUE_OP_UNLINK;
				sprintf(op->args, "%s", path);
				op->result = -1;
				
			} else if(sscanf(line, "mkdir %d %s %o", &jobid, path, &mode) == 3) {
				op = malloc(sizeof(*op));
				memset(op, 0, sizeof(*op));
				op->type = MPI_QUEUE_OP_MKDIR;
				sprintf(op->args, "%s %o", path, mode);
				op->result = -1;
				
			} else if(sscanf(line, "close %d", &jobid) == 1) {
				op = malloc(sizeof(*op));
				memset(op, 0, sizeof(*op));
				op->type = MPI_QUEUE_OP_CLOSE;
				op->result = -1;
				
//			} else if(sscanf(line, "symlink %d %s %s", &jobid, path, filename) == 3) {
//			} else if(sscanf(line, "put %d %s %lld %o", &jobid, filename, &length, &mode) == 4) {
//			} else if(sscanf(line, "rget %d %s", &jobid, filename) == 2) {
//			} else if(sscanf(line, "get %d %s", &jobid, filename) == 2) {
//			} else if(sscanf(line, "thirdget %d %d %s %[^\n]", &jobid, &mode, filename, path) == 4) {
//			} else if(sscanf(line, "thirdput %d %d %s %[^\n]", &jobid, &mode, filename, path) == 4) {
			} else if(!strcmp(line, "exit")) {
				break;
			} else {
				abort_flag = 1;
				continue;
			}
			if(op) {
				struct mpi_queue_job *job;
					job = itable_lookup(active_jobs, jobid);
				if(!job) {
					job = itable_lookup(waiting_jobs, jobid);
				}
				if(!job) {
					job = malloc(sizeof(*job));
					memset(job, 0, sizeof(*job));
					job->jobid = jobid;
					job->operations = list_create();
					job->status = MPI_QUEUE_JOB_WAITING;
					job->worker_rank = -1;
					itable_insert(waiting_jobs, jobid, job);
				}
				list_push_tail(job->operations, op);
			}
			idle_stoptime = time(0) + idle_timeout;
		} else {
			link_close(master);
			master = 0;
			sleep(5);
		}
		
		int num_waiting_jobs = itable_size(waiting_jobs);
		int num_unvisited_jobs = itable_size(active_jobs);
		for(i = 1; i < num_workers && (num_unvisited_jobs > 0 || num_waiting_jobs > 0); i++) {
			struct mpi_queue_job *job;
			struct mpi_queue_operation *op;
			int flag = 0;
			UINT64_T jobid;

			if(!workers[i]) {
				if(num_waiting_jobs) {
					itable_firstkey(waiting_jobs);
					itable_nextkey(waiting_jobs, &jobid, (void **)&job);
					itable_remove(waiting_jobs, jobid);
					itable_insert(active_jobs, jobid, job);
					workers[i] = job;
					num_waiting_jobs--;
					job->worker_rank = i;
					job->status = MPI_QUEUE_JOB_READY;
				} else {
					continue;
				}
			} else {
				num_unvisited_jobs--;
				if(workers[i]->status == MPI_QUEUE_JOB_BUSY) {
					MPI_Test(&workers[i]->request, &flag, &workers[i]->mpi_status);
					if(flag) {
						op = list_pop_head(workers[i]->operations);
						if(op->output_length) {
							op->output_buffer = malloc(op->output_length);
							MPI_Recv(op->output_buffer, op->output_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->mpi_status);
						}
						
						workers[i]->status = MPI_QUEUE_JOB_READY;

						if(op->type == MPI_QUEUE_OP_WORK || op->result < 0) {
							if(workers[i]->output)
								free(workers[i]->output);
							workers[i]->output = op->output_buffer;
							op->output_buffer = NULL;
							workers[i]->output_length = op->output_length;
							workers[i]->result = op->result;
							if(op->result < 0) {
								workers[i]->status = MPI_QUEUE_JOB_FAILED | op->type;
								op->type = MPI_QUEUE_OP_CLOSE;
								list_push_head(workers[i]->operations, op);
								op = NULL;
							}
						}
						if(op) {
							if(op->buffer)
								free(op->buffer);
							if(op->output_buffer)
								free(op->output_buffer);
							free(op);
						}
					}
				}
			}
			
			if( workers[i]->status != MPI_QUEUE_JOB_BUSY && list_size(workers[i]->operations)) {
				op = list_peek_head(workers[i]->operations);
				
				if(op->type == MPI_QUEUE_OP_CLOSE) {
					itable_remove(active_jobs, workers[i]->jobid);
					list_push_tail(complete_jobs, workers[i]);
					if(!(workers[i]->status & MPI_QUEUE_JOB_FAILED))
						workers[i]->status = MPI_QUEUE_JOB_COMPLETE;
					workers[i] = NULL;
					i--;
					continue;
				}
				
				MPI_Send(op, sizeof(*op), MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD);
				if(op->buffer_length) {
					MPI_Send(op->buffer, op->buffer_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD);
					free(op->buffer);
					op->buffer_length = 0;
					op->buffer = NULL;
				}
				MPI_Irecv(op, sizeof(*op), MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->request);
				workers[i]->status = MPI_QUEUE_JOB_BUSY;
			}
		}
	}


	/** Clean up waiting & complete jobs, send Exit commands to each worker */
	if(!master) {
		// If the master link hasn't been set up yet
		// the workers will be waiting for the working directory
		char line[MPI_QUEUE_LINE_MAX];
		memset(line, 0, MPI_QUEUE_LINE_MAX);
		MPI_Bcast(line, MPI_QUEUE_LINE_MAX, MPI_CHAR, 0, MPI_COMM_WORLD);
	} else {
		link_close(master);
	}

	for(i = 1; i < num_workers; i++) {
		struct mpi_queue_operation *op, close;
		memset(&close, 0, sizeof(close));
		close.type = MPI_QUEUE_OP_EXIT;
		
		if(workers[i]) {
			if(workers[i]->status == MPI_QUEUE_JOB_BUSY) {
				MPI_Wait(&workers[i]->request, &workers[i]->mpi_status);
				op = list_peek_head(workers[i]->operations);
				
				if(op->output_length) {
					op->output_buffer = malloc(op->output_length);
					MPI_Recv(op->output_buffer, op->output_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->mpi_status);
				}
			}
			itable_remove(active_jobs, workers[i]->jobid);
			list_push_tail(complete_jobs, workers[i]);
		}
		MPI_Send(&close, sizeof(close), MPI_BYTE, i, 0, MPI_COMM_WORLD);
	}

	itable_firstkey(waiting_jobs);
	while(itable_size(waiting_jobs)) {
		struct mpi_queue_job *job;
		UINT64_T jobid;

		itable_nextkey(waiting_jobs, &jobid, (void **)&job);
		itable_remove(waiting_jobs, jobid);
		list_push_tail(complete_jobs, job);
	}

	while(list_size(complete_jobs)) {
		mpi_queue_job_delete(list_pop_head(complete_jobs));
	}

	MPI_Finalize();
	return abort_flag;
}