void reduce(HadoopPipes::ReduceContext& context) { int maxValue = INT_MIN; while (context.nextValue()) { maxValue = std::max(maxValue, HadoopUtils::toInt(context.getInputValue())); } context.emit(context.getInputKey(), HadoopUtils::toString(maxValue)); }
void reduce(HadoopPipes::ReduceContext& context) { int maxValue = 1; while (context.nextValue()) { maxValue = max(maxValue, toInt(context.getInputValue())); } context.emit(context.getInputKey(), toString(maxValue)); }
// reduce function void reduce( HadoopPipes::ReduceContext& context ) { int count = 0; //--- get all tuples with the same key, and count their numbers --- while ( context.nextValue() ) { count += HadoopUtils::toInt( context.getInputValue() ); } //--- emit (word, count) --- context.emit(context.getInputKey(), HadoopUtils::toString( count )); }
void reduce(HadoopPipes::ReduceContext& context) { std::string key = context.getInputKey(); std::vector<uint32_t> ids; while (context.nextValue()) { ids.push_back(nise::ParseUint32(context.getInputValue())); } std::sort(ids.begin(), ids.end()); ids.resize(std::unique(ids.begin(), ids.end()) - ids.begin()); std::ostringstream ss(std::ios::binary);; nise::WriteVector<uint32_t>(ss, ids); std::string value(ss.str()); context.emit(key, value); }
void WriteOsmSqlStatementsReducer::reduce(HadoopPipes::ReduceContext& context) { if (_context == NULL) { _context = &context; } boost::shared_ptr<Configuration> config(HadoopPipesUtils::toConfiguration(_context->getJobConf())); //LOG_VARD(config->getInt("mapred.reduce.tasks")); _localJobTracker = config->get("mapred.job.tracker") == "local"; const long writeBufferSize = config->getLong("writeBufferSize"); LOG_VART(writeBufferSize); if (config->hasKey("dbConnUrl")) { _dbConnStr = QString::fromStdString(config->get("dbConnUrl")); LOG_VART(_dbConnStr); } _retainSqlFile = config->get("retainSqlFile") == "1" ? true : false; LOG_VART(_retainSqlFile); //key syntax: <element id>;<table header> OR <element id>;<member id>;<table header> const QStringList keyParts = QString::fromStdString(context.getInputKey()).split(";"); const QString tableHeader = keyParts[keyParts.size() - 2] + ";\n"; LOG_VART(tableHeader); while (context.nextValue()) { _updateElementCounts(tableHeader); const QString newValue = QString::fromStdString(context.getInputValue()); LOG_VART(newValue); _sqlStatementBuffer[tableHeader] = _sqlStatementBuffer[tableHeader] % newValue; LOG_VART(_sqlStatementBuffer[tableHeader]); _sqlStatementBufferSize++; if (!_localJobTracker) { _context->incrementCounter( _context->getCounter("WriteOsmSqlStatements", "SQL statements"), 1); } //this flush will cause the same table to be written to the file twice, each time with a //different set of sql records...but that's ok...SQL is still valid if (_sqlStatementBufferSize >= writeBufferSize) { _flush(); } } }
void OsmMapReducer::reduce(HadoopPipes::ReduceContext& context) { _context = &context; const string& k = context.getInputKey(); OsmMapIterator it(_context, _map, _reader); reduce(k, it, context); }
void reduce(HadoopPipes::ReduceContext& context) { buffer[0] = MaxValueSimple::MAX_FAILURE; i = 1; while (context.nextValue()) { if (i < MaxTemp_Reducer::MAX_VALUES) buffer[i++] = atoi(context.getInputValue().c_str()); if (i >= MaxTemp_Reducer::MAX_VALUES) { buffer[0] = maxVal.maxValue(buffer, MaxTemp_Reducer::MAX_VALUES); i = 1; } } // work-around for context/iterable.hasNext() if (i > 1) buffer[0] = maxVal.maxValue(buffer, MaxTemp_Reducer::MAX_VALUES); valStr.str(""); valStr << buffer[0]; context.emit(context.getInputKey(), valStr.str()); }
SegUsageCalcWriter(HadoopPipes::ReduceContext& context) { printf("DBG:--enter func%s\n",__func__); const HadoopPipes::JobConf* job = context.getJobConf(); int part = job->getInt("mapred.task.partition"); std::string outDir = job->get("mapred.work.output.dir"); std::string inputDir = job->get("mapred.input.dir"); printf("DBG:--inputDir :%s\n",inputDir.c_str()); const char * fs_name = g_basename(inputDir.c_str()); const char * uri = inputDir.c_str(); printf("DBG:--fs_name:%s,uri:%s\n",fs_name,uri); m_storage = init_storage_handler(uri); HADOOP_ASSERT(m_storage != NULL, "failed to init storage handler "); }
void reduce(HadoopPipes::ReduceContext& context) { printf("DBG:--enter reduce func ...\n"); while(context.nextValue()){ printf("DBG:--key:%s value%s\n",context.getInputKey().c_str(),context.getInputValue().c_str()); } context.emit(context.getInputKey(),context.getInputValue()); printf("DBG:--exit reduce func ...\n"); }
void reduce(HadoopPipes::ReduceContext& context) { bool first = true; nise::Record record; std::string key = context.getInputKey(); while (context.nextValue()) { std::stringstream ss(context.getInputValue()); if (first) { nise::Signature::RECORD.check(ss); record.readFields(ss); if (ss) { first = false; continue; } } else { if (record.sources.size() > nise::MAX_SOURCES) { context.incrementCounter(trimmed, 1); break; } nise::Record cur; nise::Signature::RECORD.check(ss); cur.readFields(ss); if (ss && (cur.sources.size() > 0)) { record.sources.push_back(cur.sources[0]); continue; } } context.incrementCounter(bad, 1); } if (!first) { std::ostringstream ss(std::ios::binary); nise::Signature::RECORD.write(ss); record.write(ss); context.emit(key, ss.str()); } }
void SyntopReducer::reduce(HadoopPipes::ReduceContext& context) { // NOLINT boost::split(temp_string_components_, context.getInputKey(), boost::is_any_of("_")); float sum = 0; while (context.nextValue()) { sum += HadoopUtils::toFloat(context.getInputValue()); } if (boost::starts_with(temp_string_components_[0], "gamma")) { context.emit(context.getInputKey(), boost::lexical_cast<string>(sum)); } else if (boost::starts_with(temp_string_components_[0], "lhood")) { // sum += GlobalLikelihoodTerm(); sum += GlobalWeightTerm(vars_->nu_.get(), vars_->beta_.get(), params_->alpha_trans(), params_->alpha_top(), params_->finite()); context.emit(context.getInputKey(), boost::lexical_cast<string>(sum)); } else { if (boost::starts_with(temp_string_components_[2], "~")) { // cout << "optimizing" << endl; Optimize(); Emit(&output); StringMap::const_iterator last = (output).end(); for (StringMap::const_iterator itr = (output).begin(); itr != last; itr++) { // cout << itr->first << "\t" << itr->second << endl; context.emit(itr->first, boost::lexical_cast<string>(itr->second)); } output.clear(); last = (output).end(); for (StringMap::const_iterator itr = (output).begin(); itr != last; itr++) { // cout << "output is\t" << itr->first << "\t" << itr->second << endl; // context.emit(itr->first, boost::lexical_cast<string>(itr->second)); } index = boost::lexical_cast<int>(temp_string_components_[1]); vars_ = new VariationalParameters(*params_); display_matrix(vars_->tau_est_top_.get(), "tau_est_top is\n"); display_vector(vars_->tau_est_bottom_.get(), "tau_est_bottom is\n"); tau_coordinate_ = -1; nu_coordinate_ = -1; } else { ProcessKey(context.getInputKey(), sum); // cout << "processing\t" << context.getInputKey() << "\t" << sum << endl; } /* if (index == -1) { index = boost::lexical_cast<double>(temp_string_components_[1]); // reduceContext = context; } else { if (index != boost::lexical_cast<int>(temp_string_components_[1])) { Optimize(); Emit(&output); StringMap::const_iterator last = (output).end(); for (StringMap::const_iterator itr = (output).begin(); itr != last; itr++) { // cout << itr->first << "\t" << itr->second << endl; context.emit(itr->first, boost::lexical_cast<string>(itr->second)); } output.clear(); index = boost::lexical_cast<int>(temp_string_components_[1]); vars_ = new VariationalParameters(*params_); tau_coordinate_ = -1; nu_coordinate_ = -1; } } */ } // } }