int main(int argc, char** argv) { ParseCommandLineFlags(argc, argv); MapReduceSpecification spec; // 把输入文件列表存入"spec" for (int i = 1; i < argc; i++) { MapReduceInput* input = spec.add_input(); input->set_format("text"); input->set_filepattern(argv[i]); input->set_mapper_class("WordCounter"); } //指定输出文件: // /gfs/test/freq-00000-of-00100 // /gfs/test/freq-00001-of-00100 // ... MapReduceOutput* out = spec.output(); out->set_filebase("/gfs/test/freq"); out->set_num_tasks(100); out->set_format("text"); out->set_reducer_class("Adder"); // 可选操作:在map任务中做部分累加工作,以便节省带宽 out->set_combiner_class("Adder"); // 调整参数: 使用2000台机器,每个任务100MB内存 spec.set_machines(2000); spec.set_map_megabytes(100); spec.set_reduce_megabytes(100); // 运行它 MapReduceResult result; if (!MapReduce(spec, &result)) abort(); // 完成: 'result'结构包含计数,花费时间,和使用机器的信息 return 0; }
int main(int argc, char** argv) { ParseCommandLineFlags(argc, argv); MapReduceSpecification spec; // Store list of input files into "spec" for (int i = 1; i < argc; i++) { MapReduceInput* input = spec.add_input(); input->set_format("text"); input->set_filepattern(argv[i]); input->set_mapper_class("WordCounter"); } // Specify the output files: // /gfs/test/freq-00000-of-00100 // /gfs/test/freq-00001-of-00100 // … MapReduceOutput* out = spec.output(); out->set_filebase("/gfs/test/freq"); out->set_num_tasks(100); out->set_format("text"); out->set_reducer_class("Adder"); // Optional: do partial sums within map // tasks to save network bandwidth out->set_combiner_class("Adder"); // Tuning parameters: use at most 2000 // machines and 100 MB of memory per task spec.set_machines(2000); spec.set_map_megabytes(100); spec.set_reduce_megabytes(100); // Now run it MapReduceResult result; if (!MapReduce(spec, &result)) abort(); // Done: ‘result’ structure contains info // about counters, time taken, number of // machines used, etc. return 0; }
void initMain(int argc, char** argv) { initializeLogging(argc, argv); installLayerStackTracer(); std::string line; for (int i = 0; i < argc; ++i) { line += argv[i]; line += ' '; } LOG(INFO) << "commandline: " << line; ParseCommandLineFlags(&argc, argv, true); CHECK_EQ(argc, 1) << "Unknown commandline argument: " << argv[1]; installProfilerSwitch(); _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); if (FLAGS_seed == 0) { unsigned int t = time(NULL); srand(t); ThreadLocalRand::initSeed(t); LOG(INFO) << "random number seed=" << t; } else { srand(FLAGS_seed); ThreadLocalRand::initSeed(FLAGS_seed); } if (FLAGS_use_gpu) { // This is the initialization of the CUDA environment, // need before runInitFunctions. // TODO(hedaoyuan) Can be considered in the runInitFunctions, // but to ensure that it is the first to initialize. hl_start(); hl_init(FLAGS_gpu_id); } version::printVersion(); runInitFunctions(); }