Пример #1
0
void makeflow_wrapper_delete(struct makeflow_wrapper *w)
{
	if(w->command)
		free(w->command);

	list_free(w->input_files);
	list_delete(w->input_files);

	list_free(w->output_files);
	list_delete(w->output_files);

	if(w->uses_remote_rename){
		uint64_t f;
		char *remote;
		itable_firstkey(w->remote_names);
		while(itable_nextkey(w->remote_names, &f, (void **) &remote)){
			free(remote);
		}
	}
	itable_delete(w->remote_names);

	hash_table_delete(w->remote_names_inv);

	free(w);
}
Пример #2
0
void batch_queue_delete(struct batch_queue *q)
{
	if(q) {
		if(q->options_text)
			free(q->options_text);
		if(q->job_table)
			itable_delete(q->job_table);
		if(q->output_table)
			itable_delete(q->output_table);
		if(q->logfile)
			free(q->logfile);
		if(q->work_queue)
			work_queue_delete(q->work_queue);
		free(q);
	}
}
Пример #3
0
void histogram_delete(struct histogram *h) {
	histogram_clear(h);

	if(h->buckets) {
		itable_delete(h->buckets);
	}

	free(h);
}
Пример #4
0
void batch_queue_delete(struct batch_queue *q)
{
    if(q) {
        char *key;
        char *value;

        debug(D_BATCH, "deleting queue %p", q);

        q->module->free(q);

        for (hash_table_firstkey(q->options); hash_table_nextkey(q->options, &key, (void **) &value); free(value))
            ;
        hash_table_delete(q->options);
        for (hash_table_firstkey(q->features); hash_table_nextkey(q->features, &key, (void **) &value); free(value))
            ;
        hash_table_delete(q->features);
        itable_delete(q->job_table);
        itable_delete(q->output_table);
        free(q);
    }
}
Пример #5
0
static int itable_double_buckets(struct itable *h)
{
	struct itable *hn = itable_create(2 * h->bucket_count);

	if(!hn)
		return 0;

	/* Move pairs to new hash */
	uint64_t key;
	void *value;
	itable_firstkey(h);
	while(itable_nextkey(h, &key, &value))
		if(!itable_insert(hn, key, value))
		{
			itable_delete(hn);
			return 0;
		}

	/* Delete all old pairs */
	struct entry *e, *f;
	int i;
	for(i = 0; i < h->bucket_count; i++) {
		e = h->buckets[i];
		while(e) {
			f = e->next;
			free(e);
			e = f;
		}
	}

	/* Make the old point to the new */
	free(h->buckets);
	h->buckets      = hn->buckets;
	h->bucket_count = hn->bucket_count;
	h->size         = hn->size;
	
	/* Delete reference to new, so old is safe */
	free(hn);

	return 1;
}
Пример #6
0
void mpi_queue_delete(struct mpi_queue *q)
{
	if(q) {
		UINT64_T key;
		void *value;
		
		list_free(q->ready_list);
		list_delete(q->ready_list);
		list_free(q->complete_list);
		list_delete(q->complete_list);
		
		itable_firstkey(q->active_list);
		while(itable_nextkey(q->active_list, &key, &value)) {
			free(value);
			itable_remove(q->active_list, key);
		}
		itable_delete(q->active_list);
		
		link_close(q->master_link);
		free(q);
	}
}
Пример #7
0
static void mainloop( struct batch_queue *queue )
{
	int workers_submitted = 0;
	struct itable *job_table = itable_create(0);

	struct list *masters_list = NULL;
	struct list *foremen_list = NULL;

	int64_t factory_timeout_start = time(0);

	while(!abort_flag) {

		if(config_file && !read_config_file(config_file)) {
			debug(D_NOTICE, "Error re-reading '%s'. Using previous values.", config_file);
		} else {
			set_worker_resources_options( queue );
			batch_queue_set_option(queue, "autosize", autosize ? "yes" : NULL);
		}

		submission_regex = foremen_regex ? foremen_regex : project_regex;

		if(using_catalog) {
			masters_list = work_queue_catalog_query(catalog_host,catalog_port,project_regex);
		}
		else {
			masters_list = do_direct_query(master_host,master_port);
		}

		if(masters_list && list_size(masters_list) > 0)
		{
			factory_timeout_start = time(0);
		} else {
			// check to see if factory timeout is triggered, factory timeout will be 0 if flag isn't set
			if(factory_timeout > 0)
			{
				if(time(0) - factory_timeout_start > factory_timeout) {
					fprintf(stderr, "There have been no masters for longer then the factory timeout, exiting\n");
					abort_flag=1;
					break;
				}
			}
		}
	
		debug(D_WQ,"evaluating master list...");
		int workers_needed    = count_workers_needed(masters_list, 0);
		int workers_connected = count_workers_connected(masters_list);

		debug(D_WQ,"%d total workers needed across %d masters",
				workers_needed,
				masters_list ? list_size(masters_list) : 0);

		if(foremen_regex)
		{
			debug(D_WQ,"evaluating foremen list...");
			foremen_list    = work_queue_catalog_query(catalog_host,catalog_port,foremen_regex);

			/* add workers on foremen. Also, subtract foremen from workers
			 * connected, as they were not deployed by the pool. */

			workers_needed    += count_workers_needed(foremen_list, 1);
			workers_connected += MAX(count_workers_connected(foremen_list) - list_size(foremen_list), 0);

			debug(D_WQ,"%d total workers needed across %d foremen",workers_needed,list_size(foremen_list));
		}

		debug(D_WQ,"raw workers needed: %d", workers_needed);

		if(workers_needed > workers_max) {
			debug(D_WQ,"applying maximum of %d workers",workers_max);
			workers_needed = workers_max;
		}

		if(workers_needed < workers_min) {
			debug(D_WQ,"applying minimum of %d workers",workers_min);
			workers_needed = workers_min;
		}

		int new_workers_needed = workers_needed - workers_submitted;

		if(workers_per_cycle > 0 && new_workers_needed > workers_per_cycle) {
			debug(D_WQ,"applying maximum workers per cycle of %d",workers_per_cycle);
			new_workers_needed = workers_per_cycle;
		}

		if(workers_per_cycle > 0 && workers_submitted > new_workers_needed + workers_connected) {
			debug(D_WQ,"waiting for %d previously submitted workers to connect", workers_submitted - workers_connected);
			new_workers_needed = 0;
		}

		debug(D_WQ,"workers needed: %d",    workers_needed);
		debug(D_WQ,"workers submitted: %d", workers_submitted);
		debug(D_WQ,"workers requested: %d", new_workers_needed);

		print_stats(masters_list, foremen_list, workers_submitted, workers_needed, new_workers_needed, workers_connected);

		update_blacklisted_workers(queue, masters_list);

		if(new_workers_needed>0) {
			debug(D_WQ,"submitting %d new workers to reach target",new_workers_needed);
			workers_submitted += submit_workers(queue,job_table,new_workers_needed);
		} else if(new_workers_needed<0) {
			debug(D_WQ,"too many workers, will wait for some to exit");
		} else {
			debug(D_WQ,"target number of workers is reached.");
		}

		debug(D_WQ,"checking for exited workers...");
		time_t stoptime = time(0)+5;

		while(1) {
			struct batch_job_info info;
			batch_job_id_t jobid;
			jobid = batch_job_wait_timeout(queue,&info,stoptime);
			if(jobid>0) {
				if(itable_lookup(job_table,jobid)) {
					itable_remove(job_table,jobid);
					debug(D_WQ,"worker job %"PRId64" exited",jobid);
					workers_submitted--;
				} else {
					// it may have been a job from a previous run.
				}
			} else {
				break;
			}
		}

		delete_projects_list(masters_list);
		delete_projects_list(foremen_list);

		sleep(factory_period);
	}

	remove_all_workers(queue,job_table);
	itable_delete(job_table);
}
Пример #8
0
static void mainloop( struct batch_queue *queue, const char *project_regex, const char *foremen_regex )
{
	int workers_submitted = 0;
	struct itable *job_table = itable_create(0);

	struct list *masters_list = NULL;
	struct list *foremen_list = NULL;

	const char *submission_regex = foremen_regex ? foremen_regex : project_regex;

	while(!abort_flag) {
		masters_list = work_queue_catalog_query(catalog_host,catalog_port,project_regex);

		debug(D_WQ,"evaluating master list...");
		int workers_needed = count_workers_needed(masters_list, 0);

		debug(D_WQ,"%d total workers needed across %d masters",
				workers_needed,
				masters_list ? list_size(masters_list) : 0);

		if(foremen_regex)
		{
			debug(D_WQ,"evaluating foremen list...");
			foremen_list    = work_queue_catalog_query(catalog_host,catalog_port,foremen_regex);
			workers_needed += count_workers_needed(foremen_list, 1);
			debug(D_WQ,"%d total workers needed across %d foremen",workers_needed,list_size(foremen_list));
		}

		debug(D_WQ,"raw workers needed: %d", workers_needed);

		if(workers_needed > workers_max) {
			debug(D_WQ,"applying maximum of %d workers",workers_max);
			workers_needed = workers_max;
		}

		if(workers_needed < workers_min) {
			debug(D_WQ,"applying minimum of %d workers",workers_min);
			workers_needed = workers_min;
		}

		int new_workers_needed = workers_needed - workers_submitted;

		debug(D_WQ,"workers needed: %d",workers_needed);
		debug(D_WQ,"workers in queue: %d",workers_submitted);

		print_stats(masters_list, foremen_list, workers_submitted, workers_needed, new_workers_needed);

		if(new_workers_needed>0) {
			debug(D_WQ,"submitting %d new workers to reach target",new_workers_needed);
			workers_submitted += submit_workers(queue,job_table,new_workers_needed,submission_regex);
		} else if(new_workers_needed<0) {
			debug(D_WQ,"too many workers, will wait for some to exit");
		} else {
			debug(D_WQ,"target number of workers is reached.");
		}

		debug(D_WQ,"checking for exited workers...");
		time_t stoptime = time(0)+5;

		while(1) {
			struct batch_job_info info;
			batch_job_id_t jobid;
			jobid = batch_job_wait_timeout(queue,&info,stoptime);
			if(jobid>0) {
				if(itable_lookup(job_table,jobid)) {
					itable_remove(job_table,jobid);
					debug(D_WQ,"worker job %"PRId64" exited",jobid);
					workers_submitted--;
				} else {
					// it may have been a job from a previous run.
				}
			} else {
				break;
			}
		}

		delete_projects_list(masters_list);
		delete_projects_list(foremen_list);

		sleep(30);
	}

	remove_all_workers(queue,job_table);
	itable_delete(job_table);
}
Пример #9
0
struct cluster *nearest_neighbor_clustering(struct list *initial_clusters, double (*cmp)(struct cluster *, struct cluster *))
{
	struct cluster *top, *closest, *subtop;
	struct list   *stack;
	struct itable *active_clusters;
	double dclosest, dsubtop;

	int merge = 0;

	list_first_item(initial_clusters);
	top = list_next_item(initial_clusters);

	/* Return immediately if top is NULL, or there is a unique
	 * initial cluster */
	if(list_size(initial_clusters) < 2)
		return top;

	stack = list_create(0);
	list_push_head(stack, top);

	/* Add all of the initial clusters as active clusters. */
	active_clusters = itable_create(0);
	while( (top = list_next_item(initial_clusters)) ) 
		itable_insert(active_clusters, (uintptr_t) top, (void *) top);

	do
	{
		/* closest might be NULL if all of the clusters are in
		 * the stack now. subtop might be NULL if top was the
		 * only cluster in the stack */
		top     = list_pop_head( stack );
		closest = cluster_nearest_neighbor(active_clusters, top, cmp);
		subtop  = list_peek_head( stack );

		dclosest = -1;
		dsubtop  = -1;

		if(closest)
			dclosest = cluster_ward_distance(top, closest);

		if(subtop)
			dsubtop = cluster_ward_distance(top, subtop);

		/* The nearest neighbor of top is either one of the
		 * remaining active clusters, or the second topmost
		 * cluster in the stack */
		if( closest && subtop )
		{
			/* Use pointer address to systematically break ties. */
			if(dclosest < dsubtop || ((dclosest == dsubtop) && (uintptr_t)closest < (uintptr_t)subtop)) 
				merge = 0;
			else 
				merge = 1;
		}
		else if( subtop )
			merge = 1;
		else if( closest )
			merge = 0;
		else
			fatal("Zero clusters?\n"); //We should never reach here.

		if(merge)
		{
			/* If the two topmost clusters in the stack are
			 * mutual nearest neighbors, merge them into a single
			 * cluster */
			subtop = list_pop_head( stack );
			list_push_head(stack, cluster_merge(top, subtop));
		}
		else
		{
			/* Otherwise, push the nearest neighbor of top to the
			 * stack */
			itable_remove(active_clusters, (uintptr_t) closest);
			list_push_head(stack, top);
			list_push_head(stack, closest);
		}

		debug(D_DEBUG, "stack: %d  active: %d  closest: %lf subtop: %lf\n", 
				list_size(stack), itable_size(active_clusters), dclosest, dsubtop);

		/* If there are no more active_clusters, but there is not
		 * a single cluster in the stack, we try again,
		 * converting the clusters in the stack into new active
		 * clusters. */
		if(itable_size(active_clusters) == 0 && list_size(stack) > 3)
		{
			itable_delete(active_clusters);
			return nearest_neighbor_clustering(stack, cmp);
		}

	}while( !(itable_size(active_clusters) == 0 && list_size(stack) == 1) );

	/* top is now the root of a cluster hierarchy, of
	 * cluster->right, cluster->left. */
	top = list_pop_head(stack);

	list_delete(stack);
	itable_delete(active_clusters);

	return top;
}
Пример #10
0
/** The clean_mode variable was added so that we could better print out error messages
 * apply in the situation. Currently only used to silence node rerun checking.
 */
void makeflow_log_recover(struct dag *d, const char *filename, int verbose_mode, struct batch_queue *queue, makeflow_clean_depth clean_mode)
{
	char *line, *name, file[MAX_BUFFER_SIZE];
	int nodeid, state, jobid, file_state;
	int first_run = 1;
	struct dag_node *n;
	struct dag_file *f;
	struct stat buf;
	timestamp_t previous_completion_time;

	d->logfile = fopen(filename, "r");
	if(d->logfile) {
		int linenum = 0;
		first_run = 0;

		printf("recovering from log file %s...\n",filename);

		while((line = get_line(d->logfile))) {
			linenum++;

			if(sscanf(line, "# %d %s %" SCNu64 "", &file_state, file, &previous_completion_time) == 3) {

				f = dag_file_lookup_or_create(d, file);
				f->state = file_state;
				if(file_state == DAG_FILE_STATE_EXISTS){
					d->completed_files += 1;
					f->creation_logged = (time_t) (previous_completion_time / 1000000);
				} else if(file_state == DAG_FILE_STATE_DELETE){
					d->deleted_files += 1;
				}
				continue;
			}
			if(line[0] == '#')
				continue;
			if(sscanf(line, "%" SCNu64 " %d %d %d", &previous_completion_time, &nodeid, &state, &jobid) == 4) {
				n = itable_lookup(d->node_table, nodeid);
				if(n) {
					n->state = state;
					n->jobid = jobid;
					/* Log timestamp is in microseconds, we need seconds for diff. */
					n->previous_completion = (time_t) (previous_completion_time / 1000000);
					continue;
				}
			}

			fprintf(stderr, "makeflow: %s appears to be corrupted on line %d\n", filename, linenum);
			exit(1);
		}
		fclose(d->logfile);
	}

	d->logfile = fopen(filename, "a");
	if(!d->logfile) {
		fprintf(stderr, "makeflow: couldn't open logfile %s: %s\n", filename, strerror(errno));
		exit(1);
	}
	if(setvbuf(d->logfile, NULL, _IOLBF, BUFSIZ) != 0) {
		fprintf(stderr, "makeflow: couldn't set line buffer on logfile %s: %s\n", filename, strerror(errno));
		exit(1);
	}

	if(first_run && verbose_mode) {
		struct dag_file *f;
		struct dag_node *p;
		for(n = d->nodes; n; n = n->next) {
			/* Record node information to log */
			fprintf(d->logfile, "# NODE\t%d\t%s\n", n->nodeid, n->command);

			/* Record the node category to the log */
			fprintf(d->logfile, "# CATEGORY\t%d\t%s\n", n->nodeid, n->category->label);
			fprintf(d->logfile, "# SYMBOL\t%d\t%s\n", n->nodeid, n->category->label);   /* also write the SYMBOL as alias of CATEGORY, deprecated. */

			/* Record node parents to log */
			fprintf(d->logfile, "# PARENTS\t%d", n->nodeid);
			list_first_item(n->source_files);
			while( (f = list_next_item(n->source_files)) ) {
				p = f->created_by;
				if(p)
					fprintf(d->logfile, "\t%d", p->nodeid);
			}
			fputc('\n', d->logfile);

			/* Record node inputs to log */
			fprintf(d->logfile, "# SOURCES\t%d", n->nodeid);
			list_first_item(n->source_files);
			while( (f = list_next_item(n->source_files)) ) {
				fprintf(d->logfile, "\t%s", f->filename);
			}
			fputc('\n', d->logfile);

			/* Record node outputs to log */
			fprintf(d->logfile, "# TARGETS\t%d", n->nodeid);
			list_first_item(n->target_files);
			while( (f = list_next_item(n->target_files)) ) {
				fprintf(d->logfile, "\t%s", f->filename);
			}
			fputc('\n', d->logfile);

			/* Record translated command to log */
			fprintf(d->logfile, "# COMMAND\t%d\t%s\n", n->nodeid, n->command);
		}
	}


	dag_count_states(d);

	// Check for log consistency
	if(!first_run) {
		hash_table_firstkey(d->files);
		while(hash_table_nextkey(d->files, &name, (void **) &f)) {
			if(dag_file_should_exist(f) && !dag_file_is_source(f) && !(batch_fs_stat(queue, f->filename, &buf) >= 0)){
				fprintf(stderr, "makeflow: %s is reported as existing, but does not exist.\n", f->filename);
				makeflow_log_file_state_change(d, f, DAG_FILE_STATE_UNKNOWN);
				continue;
			}
			if(S_ISDIR(buf.st_mode))
				continue;
			if(dag_file_should_exist(f) && !dag_file_is_source(f) && difftime(buf.st_mtime, f->creation_logged) > 0) {
				fprintf(stderr, "makeflow: %s is reported as existing, but has been modified (%" SCNu64 " ,%" SCNu64 ").\n", f->filename, (uint64_t)buf.st_mtime, (uint64_t)f->creation_logged);
				makeflow_clean_file(d, queue, f, 0);
				makeflow_log_file_state_change(d, f, DAG_FILE_STATE_UNKNOWN);
			}
		}
	}

	int silent = 0;
	if(clean_mode != MAKEFLOW_CLEAN_NONE)
		silent = 1;
	// Decide rerun tasks
	if(!first_run) {
		struct itable *rerun_table = itable_create(0);
		for(n = d->nodes; n; n = n->next) {
			makeflow_node_decide_rerun(rerun_table, d, n, silent);
		}
		itable_delete(rerun_table);
	}

	//Update file reference counts from nodes in log
	for(n = d->nodes; n; n = n->next) {
		if(n->state == DAG_NODE_STATE_COMPLETE)
		{
			struct dag_file *f;
			list_first_item(n->source_files);
			while((f = list_next_item(n->source_files)))
				f->ref_count += -1;
		}
	}
}