int main(int argc, char** argv) {
	ParseCommandLineFlags(argc, argv);
	MapReduceSpecification spec;
	// 把输入文件列表存入"spec"
	for (int i = 1; i < argc; i++) {
		MapReduceInput* input = spec.add_input();
		input->set_format("text");
		input->set_filepattern(argv[i]);
		input->set_mapper_class("WordCounter");
	}
	//指定输出文件:
	// /gfs/test/freq-00000-of-00100
	// /gfs/test/freq-00001-of-00100
	// ...
	MapReduceOutput* out = spec.output();
	out->set_filebase("/gfs/test/freq");
	out->set_num_tasks(100);
	out->set_format("text");
	out->set_reducer_class("Adder");
	// 可选操作:在map任务中做部分累加工作,以便节省带宽
	out->set_combiner_class("Adder");
	// 调整参数: 使用2000台机器,每个任务100MB内存
	spec.set_machines(2000);
	spec.set_map_megabytes(100);
	spec.set_reduce_megabytes(100);
	// 运行它
	MapReduceResult result;
	if (!MapReduce(spec, &result)) abort();
	// 完成: 'result'结构包含计数,花费时间,和使用机器的信息
	return 0;
}
Ejemplo n.º 2
0
int main(int argc, char** argv) {
 ParseCommandLineFlags(argc, argv);
 
 MapReduceSpecification spec;
 
 // Store list of input files into "spec"
 for (int i = 1; i < argc; i++) {
  MapReduceInput* input = spec.add_input();
  input->set_format("text");
  input->set_filepattern(argv[i]);
  input->set_mapper_class("WordCounter");
 }
 // Specify the output files:
 // /gfs/test/freq-00000-of-00100
 // /gfs/test/freq-00001-of-00100
 // …
 MapReduceOutput* out = spec.output();
 out->set_filebase("/gfs/test/freq");
 out->set_num_tasks(100);
 out->set_format("text");
 out->set_reducer_class("Adder");
 
 // Optional: do partial sums within map
 // tasks to save network bandwidth
 out->set_combiner_class("Adder");
 // Tuning parameters: use at most 2000
 // machines and 100 MB of memory per task
 spec.set_machines(2000);
 spec.set_map_megabytes(100);
 spec.set_reduce_megabytes(100);
 
 // Now run it
 MapReduceResult result;
 if (!MapReduce(spec, &result)) abort();
 
 // Done: ‘result’ structure contains info
 // about counters, time taken, number of
 // machines used, etc.
 return 0;
}
Ejemplo n.º 3
0
void initMain(int argc, char** argv) {
  initializeLogging(argc, argv);
  installLayerStackTracer();
  std::string line;
  for (int i = 0; i < argc; ++i) {
    line += argv[i];
    line += ' ';
  }
  LOG(INFO) << "commandline: " << line;
  ParseCommandLineFlags(&argc, argv, true);
  CHECK_EQ(argc, 1) << "Unknown commandline argument: " << argv[1];

  installProfilerSwitch();

  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
  _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);

  if (FLAGS_seed == 0) {
    unsigned int t = time(NULL);
    srand(t);
    ThreadLocalRand::initSeed(t);
    LOG(INFO) << "random number seed=" << t;
  } else {
    srand(FLAGS_seed);
    ThreadLocalRand::initSeed(FLAGS_seed);
  }

  if (FLAGS_use_gpu) {
    // This is the initialization of the CUDA environment,
    // need before runInitFunctions.
    // TODO(hedaoyuan) Can be considered in the runInitFunctions,
    // but to ensure that it is the first to initialize.
    hl_start();
    hl_init(FLAGS_gpu_id);
  }

  version::printVersion();
  runInitFunctions();
}