Beispiel #1
0
static void batch_queue_wq_option_update (struct batch_queue *q, const char *what, const char *value)
{
	if(strcmp(what, "password") == 0) {
		if(value)
			work_queue_specify_password(q->data, value);
	} else if(strcmp(what, "master-mode") == 0) {
		if(strcmp(value, "catalog") == 0)
			work_queue_specify_master_mode(q->data, WORK_QUEUE_MASTER_MODE_CATALOG);
		else if(strcmp(value, "standalone") == 0)
			work_queue_specify_master_mode(q->data, WORK_QUEUE_MASTER_MODE_STANDALONE);
	} else if(strcmp(what, "name") == 0) {
		if(value)
			work_queue_specify_name(q->data, value);
	} else if(strcmp(what, "priority") == 0) {
		if(value)
			work_queue_specify_priority(q->data, atoi(value));
		else
			work_queue_specify_priority(q->data, 0);
	} else if(strcmp(what, "fast-abort") == 0) {
		if(value)
			work_queue_activate_fast_abort(q->data, atof(value));
	} else if(strcmp(what, "estimate-capacity") == 0) {
		work_queue_specify_estimate_capacity_on(q->data, string_istrue(value));
	} else if(strcmp(what, "keepalive-interval") == 0) {
		if(value)
			work_queue_specify_keepalive_interval(q->data, atoi(value));
		else
			work_queue_specify_keepalive_interval(q->data, WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL);
	} else if(strcmp(what, "keepalive-timeout") == 0) {
		if(value)
			work_queue_specify_keepalive_timeout(q->data, atoi(value));
		else
			work_queue_specify_keepalive_timeout(q->data, WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT);
	} else if(strcmp(what, "master-preferred-connection") == 0) {
		if(value)
			work_queue_master_preferred_connection(q->data, value);
		else
			work_queue_master_preferred_connection(q->data, "by_ip");
	} else if(strcmp(what, "category-limits") == 0) {
		struct rmsummary *s = rmsummary_parse_string(value);
		if(s) {
			work_queue_specify_category_max_resources(q->data, s->category, s);
			rmsummary_delete(s);
		} else {
			debug(D_NOTICE, "Could no parse '%s' as a summary of resorces encoded in JSON\n", value);
		}
	}
}
static void batch_queue_wq_option_update (struct batch_queue *q, const char *what, const char *value)
{
	if(strcmp(what, "password") == 0) {
		if(value)
			work_queue_specify_password(q->data, value);
	} else if(strcmp(what, "master-mode") == 0) {
		if(strcmp(value, "catalog") == 0)
			work_queue_specify_master_mode(q->data, WORK_QUEUE_MASTER_MODE_CATALOG);
		else if(strcmp(value, "standalone") == 0)
			work_queue_specify_master_mode(q->data, WORK_QUEUE_MASTER_MODE_STANDALONE);
	} else if(strcmp(what, "name") == 0) {
		if(value)
			work_queue_specify_name(q->data, value);
	} else if(strcmp(what, "priority") == 0) {
		if(value)
			work_queue_specify_priority(q->data, atoi(value));
		else
			work_queue_specify_priority(q->data, 0);
	} else if(strcmp(what, "estimate-capacity") == 0) {
		work_queue_specify_estimate_capacity_on(q->data, string_istrue(value));
	} else if(strcmp(what, "keepalive-interval") == 0) {
		if(value)
			work_queue_specify_keepalive_interval(q->data, atoi(value));
		else
			work_queue_specify_keepalive_interval(q->data, WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL);
	} else if(strcmp(what, "keepalive-timeout") == 0) {
		if(value)
			work_queue_specify_keepalive_timeout(q->data, atoi(value));
		else
			work_queue_specify_keepalive_timeout(q->data, WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT);
	} else if(strcmp(what, "wait-queue-size") == 0) {
		if(value)
			work_queue_activate_worker_waiting(q->data, atoi(value));
		else
			work_queue_activate_worker_waiting(q->data, 0);
	} else if(strcmp(what, "master-preferred-connection") == 0) {
		if(value)
			work_queue_master_preferred_connection(q->data, value);
		else
			work_queue_master_preferred_connection(q->data, "by_ip");
	}
}
Beispiel #3
0
int main(int argc, char *argv[])
{
	struct work_queue *q;
	int port = 0; //pick an arbitrary port
	int c;

	char *sort_arguments = NULL;
	const char *proj_name = NULL;
	char *outfile= NULL;
	int auto_partition = 0;
	int sample_env = 0;
	int print_runtime_estimates = 0;
	int estimate_partition= 0;
	struct timeval current;
	long long unsigned int execn_start_time, execn_time, workload_runtime;
	int keepalive_interval = 300;
	int keepalive_timeout = 30;

	unsigned long long records = 0;
	int partitions = PARTITION_DEFAULT;
	int sample_size = SAMPLE_SIZE_DEFAULT;

	gettimeofday(&current, 0);
	execn_start_time = ((long long unsigned int) current.tv_sec) * 1000000 + current.tv_usec;

	debug_flags_set("all");
	if(argc < 3) {
		show_help(argv[0]);
		return 0;
	}

	while((c = getopt(argc, argv, "N:k:o:ASs:p:MR:L:I:T:B:h")) != (char) -1) {
		switch (c) {
		case 'N':
			proj_name = strdup(optarg);
			break;
		case 'k':
			partitions = atoi(optarg);
			break;
		case 'o':
			outfile = strdup(optarg);
			break;
		case 'A':
			auto_partition = 1;
			break;
		case 's':
			sample_size = atoi(optarg);
			break;
		case 'S':
			sample_env = 1;
			break;
		case 'p':
			sort_arguments = strdup(optarg);
			break;
		case 'M':
			print_runtime_estimates = 1;
			break;
		case 'R':
			estimate_partition = atoi(optarg);
			break;
		case 'L':
			records = atoll(optarg);
			break;
		case 'I':
			keepalive_interval = atoi(optarg);
			break;
		case 'T':
			keepalive_timeout = atoi(optarg);
			break;
		case 'B':
			bandwidth_bytes_per_sec = atoi(optarg) * 1000000;
			break;
		case 'h':
			show_help(argv[0]);
			return 0;
		default:
			show_help(argv[0]);
			return -1;
		}
	}

	char sort_executable[256], infile[256];
	off_t last_partition_offset_end = 0;
	int optimal_partitions, optimal_resources, current_optimal_partitions;
	double current_optimal_time = DBL_MAX;
	double optimal_times[5];
	int sample_partition_offset_end = 0;
	int i;

	sprintf(sort_executable, "%s", argv[optind]);
	sprintf(infile, "%s", argv[optind+1]);

	if(!outfile){
		char *infile_dup = strdup(infile);
		outfile = (char *) malloc((strlen(infile)+8)*sizeof(char));
		sprintf(outfile, "%s.sorted", basename(infile_dup));
		free(infile_dup);
	}

	if(records == 0) {
		records = get_total_lines(infile);
		fprintf(stdout, "Input file %s has %llu records to sort\n", infile, records);
		if(records == 0) {
			fprintf(stderr, "Error in reading records. Quitting...\n");
			return 0;
		}
	}

	if(estimate_partition) {
		double *estimated_runtimes = (double *)malloc(sizeof(double) * 5);
		for (i = 1; i <= 2*estimate_partition; i++) {
			estimated_runtimes = sort_estimate_runtime(infile, sort_executable, records, i, estimate_partition);
			if(estimated_runtimes[0] < current_optimal_time) {
				current_optimal_time = estimated_runtimes[0];
				optimal_times[0] = estimated_runtimes[0];
				optimal_times[1] = estimated_runtimes[1];
				optimal_times[2] = estimated_runtimes[2];
				optimal_times[3] = estimated_runtimes[3];
				optimal_times[4] = estimated_runtimes[4];
				optimal_resources = i;
			}
		}
		fprintf(stdout, "For partition %d: %d %f %f %f %f %f\n", estimate_partition, optimal_resources, optimal_times[0], optimal_times[1], optimal_times[2], optimal_times[3], optimal_times[4]);
		free(estimated_runtimes);
		return 1;
	}

	if(print_runtime_estimates) {
		fprintf(stdout, "Resources \t Partitions \t Runtime \t Part time \t Merge time \t Task time \t Transfer time\n");
		for (i = 1; i <= 100; i++) {
			optimal_partitions = get_optimal_runtimes(infile, sort_executable, i, records, optimal_times);
			fprintf(stdout, "%d \t \t %d \t %f \t %f \t %f \t %f \t %f\n", i, optimal_partitions, optimal_times[0], optimal_times[1], optimal_times[2], optimal_times[3], optimal_times[4]);
		}
		return 1;
	}

	q = work_queue_create(port);
	if(!q) {
		fprintf(stderr, "couldn't listen on port %d: %s\n", port, strerror(errno));
		return 1;
	}

	fprintf(stdout, "listening on port %d...\n", work_queue_port(q));

	if(proj_name){
		work_queue_specify_master_mode(q, WORK_QUEUE_MASTER_MODE_CATALOG);
		work_queue_specify_name(q, proj_name);
	}
	work_queue_specify_keepalive_interval(q, keepalive_interval);
	work_queue_specify_keepalive_timeout(q, keepalive_timeout);

	free((void *)proj_name);

	fprintf(stdout, "%s will be run to sort contents of %s\n", sort_executable, infile);

	long long unsigned int sample_start_time, sample_end_time, sample_time;
	if(sample_env) {
		gettimeofday(&current, 0);
		sample_start_time = ((long long unsigned int) current.tv_sec) * 1000000 + current.tv_usec;
		int sample_record_size = (5*records)/100; //sample size is 5% of the total records

		char *sample_partition_file_prefix = (char *) malloc((strlen(outfile)+8) * sizeof(char));
		sprintf(sample_partition_file_prefix, "%s.sample", outfile);

		char *sample_outfile = (char *) malloc((strlen(outfile)+3) * sizeof(char));
		sprintf(sample_outfile, "%s.0", outfile);

		sample_partition_offset_end = sample_run(q, sort_executable, sort_arguments, infile, 0, sample_partition_file_prefix, sample_outfile, sample_size, sample_record_size);

		records -= sample_record_size;

		free(sample_partition_file_prefix);
		free(sample_outfile);
		gettimeofday(&current, 0);
		sample_end_time = ((long long unsigned int) current.tv_sec) * 1000000 + current.tv_usec;
		sample_time = sample_end_time - sample_start_time;
		fprintf(stdout, "Sampling time is %llu\n", sample_time);
	}

	if(auto_partition) {
		fprintf(stdout, "Determining optimal partition size for %s\n", infile);
		for (i = 1; i <= 100; i++) {
			current_optimal_partitions = get_optimal_runtimes(infile, sort_executable, i, records, optimal_times);
			if (optimal_times[0] < current_optimal_time) {
				current_optimal_time = optimal_times[0];
				optimal_partitions = current_optimal_partitions;
				optimal_resources = i;
			}
		}
		fprintf(stdout, "Optimal partition size is %d that runs the workload in %f\n", optimal_partitions, current_optimal_time);
		fprintf(stdout, "--> Please allocate %d resources for running this workload in a cost-efficient manner.\n", optimal_resources);
		partitions = optimal_partitions;
	}

	long long unsigned int part_start_time, part_end_time, part_time;
	gettimeofday(&current, 0);
	part_start_time = ((long long unsigned int) current.tv_sec) * 1000000 + current.tv_usec;

	last_partition_offset_end = partition_tasks(q, sort_executable, sort_arguments, infile, 0+sample_partition_offset_end, outfile, partitions, records);
	if(last_partition_offset_end <= 0) {
		fprintf(stderr, "Partitioning failed. Quitting...\n");
		return 0;
	}

	gettimeofday(&current, 0);
	part_end_time = ((long long unsigned int) current.tv_sec) * 1000000 + current.tv_usec;
	part_time = part_end_time - part_start_time;
	fprintf(stdout, "Partition time is %llu\n", part_time);

	free(sort_arguments);

	fprintf(stdout, "Waiting for tasks to complete...\n");
	long long unsigned int parallel_start_time, parallel_end_time, parallel_time;
	gettimeofday(&current, 0);
	parallel_start_time = ((long long unsigned int) current.tv_sec) * 1000000 + current.tv_usec;

	char *record_task_times_file = (char *)malloc((strlen(outfile)+11) * sizeof(char));
	sprintf(record_task_times_file, "%s.tasktimes", outfile);
	wait_partition_tasks(q, 5, record_task_times_file);
	free(record_task_times_file);

	gettimeofday(&current, 0);
	parallel_end_time = ((long long unsigned int) current.tv_sec) * 1000000 + current.tv_usec;
	parallel_time = parallel_end_time - parallel_start_time;
	fprintf(stdout, "Parallel execution time is %llu\n", parallel_time);

	long long unsigned int merge_start_time, merge_end_time, merge_time;
	gettimeofday(&current, 0);
	merge_start_time = ((long long unsigned int) current.tv_sec) * 1000000 + current.tv_usec;

	merge_sorted_outputs(outfile, outfile, created_partitions);

	gettimeofday(&current, 0);
	merge_end_time = ((long long unsigned int) current.tv_sec) * 1000000 + current.tv_usec;
	merge_time = merge_end_time - merge_start_time;
	fprintf(stdout, "Merge time is %llu\n", merge_time);

	fprintf(stdout, "Sorting complete. Output is at: %s!\n", outfile);

	execn_time = merge_end_time - execn_start_time;
	workload_runtime = merge_end_time - part_start_time;
	fprintf(stdout, "Workload execn time is %llu\n", workload_runtime);
	fprintf(stdout, "Total execn time is %llu\n", execn_time);

	FILE *time_file = fopen("wq_sort.times", "w");
	if (time_file) {
		fprintf(time_file, "Partition time: %llu\n", part_time);
		fprintf(time_file, "Parallel time: %llu\n", parallel_time);
		fprintf(time_file, "Merge time: %llu\n", merge_time);
		if(sample_env)
			fprintf(time_file, "Sampling time: %llu\n", sample_time);
		fprintf(time_file, "Workload execution time: %llu\n", workload_runtime);
		fprintf(time_file, "Total execution time: %llu\n", execn_time);
	}
	fclose(time_file);

	work_queue_delete(q);

	free(outfile);
	return 0;
}