int main(int argc, char** argv) {
	ParseCommandLineFlags(argc, argv);
	MapReduceSpecification spec;
	// 把输入文件列表存入"spec"
	for (int i = 1; i < argc; i++) {
		MapReduceInput* input = spec.add_input();
		input->set_format("text");
		input->set_filepattern(argv[i]);
		input->set_mapper_class("WordCounter");
	}
	//指定输出文件:
	// /gfs/test/freq-00000-of-00100
	// /gfs/test/freq-00001-of-00100
	// ...
	MapReduceOutput* out = spec.output();
	out->set_filebase("/gfs/test/freq");
	out->set_num_tasks(100);
	out->set_format("text");
	out->set_reducer_class("Adder");
	// 可选操作:在map任务中做部分累加工作,以便节省带宽
	out->set_combiner_class("Adder");
	// 调整参数: 使用2000台机器,每个任务100MB内存
	spec.set_machines(2000);
	spec.set_map_megabytes(100);
	spec.set_reduce_megabytes(100);
	// 运行它
	MapReduceResult result;
	if (!MapReduce(spec, &result)) abort();
	// 完成: 'result'结构包含计数,花费时间,和使用机器的信息
	return 0;
}
Exemplo n.º 2
0
int main(int argc, char** argv) {
 ParseCommandLineFlags(argc, argv);
 
 MapReduceSpecification spec;
 
 // Store list of input files into "spec"
 for (int i = 1; i < argc; i++) {
  MapReduceInput* input = spec.add_input();
  input->set_format("text");
  input->set_filepattern(argv[i]);
  input->set_mapper_class("WordCounter");
 }
 // Specify the output files:
 // /gfs/test/freq-00000-of-00100
 // /gfs/test/freq-00001-of-00100
 // …
 MapReduceOutput* out = spec.output();
 out->set_filebase("/gfs/test/freq");
 out->set_num_tasks(100);
 out->set_format("text");
 out->set_reducer_class("Adder");
 
 // Optional: do partial sums within map
 // tasks to save network bandwidth
 out->set_combiner_class("Adder");
 // Tuning parameters: use at most 2000
 // machines and 100 MB of memory per task
 spec.set_machines(2000);
 spec.set_map_megabytes(100);
 spec.set_reduce_megabytes(100);
 
 // Now run it
 MapReduceResult result;
 if (!MapReduce(spec, &result)) abort();
 
 // Done: ‘result’ structure contains info
 // about counters, time taken, number of
 // machines used, etc.
 return 0;
}